Adds hyperscan support to pm operator

This commit is contained in:
Wang Xiang W 2021-04-08 09:21:19 +00:00 committed by Felipe Zimmerle
parent bed6e00324
commit 54312497a9
8 changed files with 407 additions and 158 deletions

View File

@ -245,6 +245,7 @@ UTILS = \
utils/decode.cc \
utils/geo_lookup.cc \
utils/https_client.cc \
utils/hyperscan.cc \
utils/ip_tree.cc \
utils/md5.cc \
utils/msc_tree.cc \

View File

@ -25,11 +25,6 @@
#include <list>
#include <memory>
#ifdef WITH_HS
#include <hs.h>
#endif
#include "src/operators/operator.h"
#ifndef WITH_HS
#include "src/utils/acmp.h"
@ -41,6 +36,7 @@ namespace operators {
Pm::~Pm() {
#ifdef WITH_HS
m_hs = NULL;
#else
acmp_node_t *root = m_p->root_node;
@ -48,10 +44,10 @@ Pm::~Pm() {
free(m_p);
m_p = NULL;
#endif
#ifdef MODSEC_MUTEX_ON_PM
pthread_mutex_destroy(&m_lock);
#endif
#endif
}
#ifndef WITH_HS
@ -95,14 +91,20 @@ void Pm::postOrderTraversal(acmp_btree_node_t *node) {
bool Pm::evaluate(Transaction *transaction, RuleWithActions *rule,
const std::string &input, std::shared_ptr<RuleMessage> ruleMessage) {
int rc = 0;
const char *match = NULL;
#ifdef WITH_HS
return 0;
#ifdef MODSEC_MUTEX_ON_PM
pthread_mutex_lock(&m_lock);
#endif
rc = m_hs->search(input.c_str(), input.length(), &match);
#ifdef MODSEC_MUTEX_ON_PM
pthread_mutex_unlock(&m_lock);
#endif
#else
int rc;
ACMPT pt;
pt.parser = m_p;
pt.ptr = NULL;
const char *match = NULL;
#ifdef MODSEC_MUTEX_ON_PM
pthread_mutex_lock(&m_lock);
#endif
@ -110,7 +112,7 @@ bool Pm::evaluate(Transaction *transaction, RuleWithActions *rule,
#ifdef MODSEC_MUTEX_ON_PM
pthread_mutex_unlock(&m_lock);
#endif
#endif
if (rc >= 0 && transaction) {
std::string match_(match?match:"");
logOffset(ruleMessage, rc - match_.size() + 1, match_.size());
@ -125,16 +127,138 @@ bool Pm::evaluate(Transaction *transaction, RuleWithActions *rule,
}
return rc >= 0;
#endif
return 0;
}
static
char *parse_pm_content(const char *op_parm, unsigned short int op_len, const char **error_msg) {
char *parm = NULL;
char *content;
unsigned short int offset = 0;
// char converted = 0;
int i, x;
unsigned char bin = 0, esc = 0, bin_offset = 0;
unsigned char c;
unsigned char bin_parm[3] = { 0 };
char *processed = NULL;
content = strdup(op_parm);
if (content == NULL) {
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
return NULL;
}
while (offset < op_len && (content[offset] == ' ' || content[offset] == '\t')) {
offset++;
};
op_len = strlen(content);
if (content[offset] == '\"' && content[op_len-1] == '\"') {
parm = strdup(content + offset + 1);
if (parm == NULL) {
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
free(content);
content = NULL;
return NULL;
}
parm[op_len - offset - 2] = '\0';
} else {
parm = strdup(content + offset);
if (parm == NULL) {
free(content);
content = NULL;
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
return NULL;
}
}
free(content);
content = NULL;
op_len = strlen(parm);
if (op_len == 0) {
*error_msg = "Content length is 0.";
free(parm);
return NULL;
}
for (i = 0, x = 0; i < op_len; i++) {
if (parm[i] == '|') {
if (bin) {
bin = 0;
} else {
bin = 1;
}
} else if(!esc && parm[i] == '\\') {
esc = 1;
} else {
if (bin) {
if (parm[i] == 0 || parm[i] == 1 || parm[i] == 2 ||
parm[i] == 3 || parm[i] == 4 || parm[i] == 5 ||
parm[i] == 6 || parm[i] == 7 || parm[i] == 8 ||
parm[i] == 9 ||
parm[i] == 'A' || parm[i] == 'a' ||
parm[i] == 'B' || parm[i] == 'b' ||
parm[i] == 'C' || parm[i] == 'c' ||
parm[i] == 'D' || parm[i] == 'd' ||
parm[i] == 'E' || parm[i] == 'e' ||
parm[i] == 'F' || parm[i] == 'f')
{
bin_parm[bin_offset] = (char)parm[i];
bin_offset++;
if (bin_offset == 2) {
c = strtol((char *)bin_parm, (char **) NULL, 16) & 0xFF;
bin_offset = 0;
parm[x] = c;
x++;
//converted = 1;
}
} else if (parm[i] == ' ') {
}
} else if (esc) {
if (parm[i] == ':' ||
parm[i] == ';' ||
parm[i] == '\\' ||
parm[i] == '\"')
{
parm[x] = parm[i];
x++;
} else {
*error_msg = std::string("Unsupported escape sequence.").c_str();
free(parm);
return NULL;
}
esc = 0;
//converted = 1;
} else {
parm[x] = parm[i];
x++;
}
}
}
#if 0
if (converted) {
op_len = x;
}
#endif
//processed = memcpy(processed, parm, op_len);
processed = strdup(parm);
free(parm);
parm = NULL;
if (processed == NULL) {
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
return NULL;
}
return processed;
}
bool Pm::init(const std::string &file, std::string *error) {
#ifdef WITH_HS
fprintf(stdout, "Sopport for HS is on the way: %s\n", hs_version());
#else
std::vector<std::string> vec;
std::istringstream *iss;
const char *err = NULL;
@ -154,12 +278,25 @@ bool Pm::init(const std::string &file, std::string *error) {
back_inserter(vec));
for (auto &a : vec) {
#ifdef WITH_HS
m_hs->addPattern(a.c_str(), a.length());
}
if (m_hs->compile(error) == false) {
if (content) {
free(content);
content = NULL;
}
delete iss;
return false;
}
#else
acmp_add_pattern(m_p, a.c_str(), NULL, NULL, a.length());
}
while (m_p->is_failtree_done == 0) {
acmp_prepare(m_p);
}
#endif
if (content) {
free(content);
@ -167,7 +304,6 @@ bool Pm::init(const std::string &file, std::string *error) {
}
delete iss;
#endif
return true;
}

View File

@ -22,8 +22,11 @@
#include <utility>
#include "src/operators/operator.h"
#ifdef WITH_HS
#include "src/utils/hyperscan.h"
#else
#include "src/utils/acmp.h"
#endif
namespace modsecurity {
namespace operators {
@ -34,15 +37,13 @@ class Pm : public Operator {
/** @ingroup ModSecurity_Operator */
explicit Pm(std::unique_ptr<RunTimeString> param)
: Operator("Pm", std::move(param)) {
#ifdef WITH_HS
#else
#ifndef WITH_HS
m_p = acmp_create(0);
#endif
}
explicit Pm(const std::string &n, std::unique_ptr<RunTimeString> param)
: Operator(n, std::move(param)) {
#ifdef WITH_HS
#else
#ifndef WITH_HS
m_p = acmp_create(0);
#endif
}
@ -59,16 +60,17 @@ class Pm : public Operator {
#endif
protected:
#ifndef WITH_HS
#ifdef WITH_HS
std::shared_ptr<Utils::HyperscanPm> m_hs =
std::make_shared<Utils::HyperscanPm>();
#else
ACMP *m_p;
#endif
private:
#ifndef WITH_HS
#ifdef MODSEC_MUTEX_ON_PM
pthread_mutex_t m_lock;
#endif
#endif
};

View File

@ -69,13 +69,18 @@ bool PmFromFile::init(const std::string &config, std::string *error) {
for (std::string line; std::getline(*iss, line); ) {
if (isComment(line) == false) {
#ifdef WITH_HS
m_hs->addPattern(line.c_str(), line.length());
}
}
if (m_hs->compile(error) == false) {
delete iss;
return false;
}
#else
acmp_add_pattern(m_p, line.c_str(), NULL, NULL, line.length());
#endif
}
}
}
#ifndef WITH_HS
while (m_p->is_failtree_done == 0) {
acmp_prepare(m_p);
}

View File

@ -35,134 +35,6 @@
*/
extern "C" {
char *parse_pm_content(const char *op_parm, unsigned short int op_len, const char **error_msg) {
char *parm = NULL;
char *content;
unsigned short int offset = 0;
// char converted = 0;
int i, x;
unsigned char bin = 0, esc = 0, bin_offset = 0;
unsigned char c = 0;
unsigned char bin_parm[3] = { 0 };
char *processed = NULL;
content = strdup(op_parm);
if (content == NULL) {
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
return NULL;
}
while (offset < op_len && (content[offset] == ' ' || content[offset] == '\t')) {
offset++;
};
op_len = strlen(content);
if (content[offset] == '\"' && content[op_len-1] == '\"') {
parm = strdup(content + offset + 1);
if (parm == NULL) {
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
free(content);
content = NULL;
return NULL;
}
parm[op_len - offset - 2] = '\0';
} else {
parm = strdup(content + offset);
if (parm == NULL) {
free(content);
content = NULL;
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
return NULL;
}
}
free(content);
content = NULL;
op_len = strlen(parm);
if (op_len == 0) {
*error_msg = "Content length is 0.";
free(parm);
return NULL;
}
for (i = 0, x = 0; i < op_len; i++) {
if (parm[i] == '|') {
if (bin) {
bin = 0;
} else {
bin = 1;
}
} else if(!esc && parm[i] == '\\') {
esc = 1;
} else {
if (bin) {
if (parm[i] == 0 || parm[i] == 1 || parm[i] == 2 ||
parm[i] == 3 || parm[i] == 4 || parm[i] == 5 ||
parm[i] == 6 || parm[i] == 7 || parm[i] == 8 ||
parm[i] == 9 ||
parm[i] == 'A' || parm[i] == 'a' ||
parm[i] == 'B' || parm[i] == 'b' ||
parm[i] == 'C' || parm[i] == 'c' ||
parm[i] == 'D' || parm[i] == 'd' ||
parm[i] == 'E' || parm[i] == 'e' ||
parm[i] == 'F' || parm[i] == 'f')
{
bin_parm[bin_offset] = (char)parm[i];
bin_offset++;
if (bin_offset == 2) {
c = strtol((char *)bin_parm, (char **) NULL, 16) & 0xFF;
bin_offset = 0;
parm[x] = c;
x++;
//converted = 1;
}
} else if (parm[i] == ' ') {
}
} else if (esc) {
if (parm[i] == ':' ||
parm[i] == ';' ||
parm[i] == '\\' ||
parm[i] == '\"')
{
parm[x] = parm[i];
x++;
} else {
*error_msg = std::string("Unsupported escape sequence.").c_str();
free(parm);
return NULL;
}
esc = 0;
//converted = 1;
} else {
parm[x] = parm[i];
x++;
}
}
}
#if 0
if (converted) {
op_len = x;
}
#endif
//processed = memcpy(processed, parm, op_len);
processed = strdup(parm);
free(parm);
parm = NULL;
if (processed == NULL) {
*error_msg = std::string("Error allocating memory for pattern matching content.").c_str();
return NULL;
}
return processed;
}
/*
*******************************************************************************
*******************************************************************************

View File

@ -189,8 +189,6 @@ int acmp_process_quick(ACMPT *acmpt, const char **match, const char *data, size_
*/
int acmp_prepare(ACMP *parser);
char *parse_pm_content(const char *op_parm, unsigned short int op_len, const char **error_msg);
}
#endif /*ACMP_H_*/

179
src/utils/hyperscan.cc Normal file
View File

@ -0,0 +1,179 @@
/*
* ModSecurity, http://www.modsecurity.org/
* Copyright (c) 2021 Trustwave Holdings, Inc. (http://www.trustwave.com/)
*
* You may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* If any of the files related to licensing are missing or if you have any
* other questions related to licensing please contact Trustwave Holdings, Inc.
* directly using the email address security@modsecurity.org.
*
*/
#include <string>
#include <vector>
#ifdef WITH_HS
#include "hyperscan.h"
namespace modsecurity {
namespace Utils {
// Render the given literal as a hex-escaped pattern.
static
std::string makeHex(const char *pat, const size_t patLen) {
std::string hexPattern;
char hex[5];
for (size_t i = 0; i < patLen; i++) {
snprintf(hex, 5, "\\x%02x", (unsigned char)pat[i]);
hexPattern += hex;
}
return hexPattern;
}
HyperscanPattern::HyperscanPattern(const char *pat, size_t patLen,
unsigned int patId) :
pattern(pat), len(patLen), id(patId) {}
HyperscanPm::~HyperscanPm() {
if (db) {
hs_free_database(db);
}
if (scratch) {
hs_free_scratch(scratch);
}
}
void HyperscanPm::addPattern(const char *pat, size_t patLen) {
if (patLen == 0) {
return;
}
HyperscanPattern p(pat, patLen, num_patterns++);
patterns.emplace_back(p);
}
bool HyperscanPm::compile(std::string *error) {
if (patterns.empty()) {
return false;
}
if (hs_valid_platform() != HS_SUCCESS )
{
error->assign("This host does not support Hyperscan.");
return false;
}
// Convert literal to its hex-escaped format.
std::vector<std::string> hexPats;
for (const auto &p : patterns) {
hexPats.emplace_back(makeHex(p.pattern.c_str(), p.len));
}
// The Hyperscan compiler takes its patterns in a group of arrays.
std::vector<const char *> pats;
std::vector<unsigned> flags(num_patterns, HS_FLAG_CASELESS);
std::vector<unsigned> ids;
int i = 0;
for (const auto &p : patterns) {
pats.emplace_back(hexPats[i++].c_str());
ids.emplace_back(p.id);
}
hs_compile_error_t *compile_error = NULL;
hs_error_t hs_error = hs_compile_multi(&pats[0], &flags[0], &ids[0],
num_patterns, HS_MODE_BLOCK, NULL, &db, &compile_error);
if (compile_error != NULL) {
std::string message(compile_error->message);
std::string expression = std::to_string(compile_error->expression);
error->assign("hs_compile_multi() failed: " + message +
"(expression: " + expression + ")");
hs_free_compile_error(compile_error);
return false;
}
if (hs_error != HS_SUCCESS) {
error->assign("hs_compile_multi() failed: error " +
std::to_string(hs_error));
return false;
}
// Allocate Hyperscan scratch space for this database.
hs_error = hs_alloc_scratch(db, &scratch);
if (hs_error != HS_SUCCESS) {
error->assign("hs_alloc_scratch() failed: error " +
std::to_string(hs_error));
return false;
}
size_t scratch_size = 0;
hs_error = hs_scratch_size(scratch, &scratch_size);
if (hs_error != HS_SUCCESS) {
error->assign("hs_scratch_size() failed: error " +
std::to_string(hs_error));
return false;
}
size_t db_size = 0;
hs_error = hs_database_size(db, &db_size);
if (hs_error != HS_SUCCESS) {
error->assign("hs_database_size() failed: error " +
std::to_string(hs_error));
return false;
}
return true;
}
// Context data used by Hyperscan match callback.
struct HyperscanCallbackContext {
HyperscanPm *pm;
unsigned int num_matches;
unsigned int offset;
const char **match;
};
// Match callback, called by hs_scan for every match.
static
int onMatch(unsigned int id, unsigned long long from, unsigned long long to,
unsigned int flags, void *hs_ctx) {
HyperscanCallbackContext *ctx = static_cast<HyperscanCallbackContext *>(hs_ctx);
ctx->num_matches++;
ctx->offset = (unsigned int)to - 1;
*ctx->match = ctx->pm->getPatternById(id);
return 1; // Terminate matching.
}
int HyperscanPm::search(const char *t, unsigned int tlen, const char **match) {
HyperscanCallbackContext ctx;
ctx.pm = this;
ctx.num_matches = 0;
ctx.offset = 0;
ctx.match = match;
hs_error_t error = hs_scan(db, t, tlen, 0, scratch, onMatch, &ctx);
if (error != HS_SCAN_TERMINATED) {
// TODO add debug output
return -1;
}
return ctx.num_matches > 0 ? ctx.offset : -1;
}
const char *HyperscanPm::getPatternById(unsigned int patId) const {
return patterns[patId].pattern.c_str();
}
} // namespace Utils
} // namespace modsecurity
#endif

56
src/utils/hyperscan.h Normal file
View File

@ -0,0 +1,56 @@
/*
* ModSecurity, http://www.modsecurity.org/
* Copyright (c) 2021 Trustwave Holdings, Inc. (http://www.trustwave.com/)
*
* You may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* If any of the files related to licensing are missing or if you have any
* other questions related to licensing please contact Trustwave Holdings, Inc.
* directly using the email address security@modsecurity.org.
*
*/
#include <string>
#include <vector>
#ifdef WITH_HS
#include <hs.h>
namespace modsecurity {
namespace Utils {
struct HyperscanPattern {
HyperscanPattern(const char *pat, size_t patLen, unsigned int patId);
std::string pattern;
size_t len;
unsigned int id; /* actual pattern id */
};
class HyperscanPm {
public:
~HyperscanPm();
void addPattern(const char *pat, size_t patLen);
bool compile(std::string *error);
int search(const char *t, unsigned int tlen, const char **match);
const char *getPatternById(unsigned int patId) const;
private:
hs_database_t *db = nullptr;
// Scratch space for Hyperscan.
hs_scratch_t *scratch = nullptr;
unsigned int num_patterns = 0; // number of elements
std::vector<HyperscanPattern> patterns;
};
} // namespace Utils
} // namespace modsecurity
#endif // WITH_HS