Update Regex util to support match limits

If the rx or rxGlobal operator encounters a regex error,
the RX_ERROR and RX_ERROR_RULE_ID variables are set.
RX_ERROR contains a simple error code which can be either
OTHER or MATCH_LIMIT. RX_ERROR_RULE_ID unsurprisingly
contains the ID of the rule associated with the error.
More than one rule may encounter regex errors,
but only the first error is reflected in these variables.
This commit is contained in:
Brandon Payton 2022-03-22 11:16:22 -04:00
parent 7b1cf0e99e
commit 8c269d31c5
17 changed files with 7760 additions and 7359 deletions

View File

@ -379,6 +379,7 @@ class RulesSetProperties {
from->m_responseBodyLimitAction,
PropertyNotSetBodyLimitAction);
to->m_pcreMatchLimit.merge(&from->m_pcreMatchLimit);
to->m_uploadFileLimit.merge(&from->m_uploadFileLimit);
to->m_uploadFileMode.merge(&from->m_uploadFileMode);
to->m_uploadDirectory.merge(&from->m_uploadDirectory);
@ -470,6 +471,7 @@ class RulesSetProperties {
ConfigDouble m_requestBodyLimit;
ConfigDouble m_requestBodyNoFilesLimit;
ConfigDouble m_responseBodyLimit;
ConfigInt m_pcreMatchLimit;
ConfigInt m_uploadFileLimit;
ConfigInt m_uploadFileMode;
DebugLog *m_debugLog;

View File

@ -184,6 +184,8 @@ class TransactionAnchoredVariables {
m_variableUniqueID(t, "UNIQUE_ID"),
m_variableUrlEncodedError(t, "URLENCODED_ERROR"),
m_variableUserID(t, "USERID"),
m_variableRxError(t, "RX_ERROR"),
m_variableRxErrorRuleID(t, "RX_ERROR_RULE_ID"),
m_variableArgs(t, "ARGS"),
m_variableArgsGet(t, "ARGS_GET"),
m_variableArgsPost(t, "ARGS_POST"),
@ -265,6 +267,8 @@ class TransactionAnchoredVariables {
AnchoredVariable m_variableUniqueID;
AnchoredVariable m_variableUrlEncodedError;
AnchoredVariable m_variableUserID;
AnchoredVariable m_variableRxError;
AnchoredVariable m_variableRxErrorRuleID;
AnchoredSetVariable m_variableArgs;
AnchoredSetVariable m_variableArgsGet;

View File

@ -51,12 +51,41 @@ bool Rx::evaluate(Transaction *transaction, RuleWithActions *rule,
re = m_re;
}
std::vector<Utils::SMatchCapture> captures;
if (re->hasError()) {
ms_dbg_a(transaction, 3, "Error with regular expression: \"" + re->pattern + "\"");
return false;
}
re->searchOneMatch(input, captures);
Utils::RegexResult regex_result;
std::vector<Utils::SMatchCapture> captures;
if (transaction && transaction->m_rules->m_pcreMatchLimit.m_set) {
unsigned long match_limit = transaction->m_rules->m_pcreMatchLimit.m_value;
regex_result = re->searchOneMatch(input, captures, match_limit);
} else {
regex_result = re->searchOneMatch(input, captures);
}
// FIXME: DRY regex error reporting. This logic is currently duplicated in other operators.
if (regex_result != Utils::RegexResult::Ok) {
std::string regex_error_str = "OTHER";
if (regex_result == Utils::RegexResult::ErrorMatchLimit) {
regex_error_str = "MATCH_LIMIT";
}
ms_dbg_a(transaction, 1, "rx: regex error '" + regex_error_str + "' for pattern '" + re->pattern + "'");
// Only expose the first regex error to indicate there is an issue
if (rule && transaction && transaction->m_variableRxError.m_value.empty()) {
transaction->m_variableRxError.set(regex_error_str, transaction->m_variableOffset);
transaction->m_variableRxErrorRuleID.set(
std::to_string(rule->m_ruleId),
transaction->m_variableOffset
);
}
return false;
}
if (rule && rule->hasCaptureAction() && transaction) {
for (const Utils::SMatchCapture& capture : captures) {

View File

@ -51,8 +51,36 @@ bool RxGlobal::evaluate(Transaction *transaction, RuleWithActions *rule,
re = m_re;
}
Utils::RegexResult regex_result;
std::vector<Utils::SMatchCapture> captures;
re->searchGlobal(input, captures);
if (transaction && transaction->m_rules->m_pcreMatchLimit.m_set) {
unsigned long match_limit = transaction->m_rules->m_pcreMatchLimit.m_value;
regex_result = re->searchGlobal(input, captures, match_limit);
} else {
regex_result = re->searchGlobal(input, captures);
}
// FIXME: DRY regex error reporting. This logic is currently duplicated in other operators.
if (regex_result != Utils::RegexResult::Ok) {
std::string regex_error_str = "OTHER";
if (regex_result == Utils::RegexResult::ErrorMatchLimit) {
regex_error_str = "MATCH_LIMIT";
}
ms_dbg_a(transaction, 1, "rxGlobal: regex error '" + regex_error_str + "' for pattern '" + re->pattern + "'");
// Only expose the first regex error to indicate there is an issue
if (rule && transaction && transaction->m_variableRxError.m_value.empty()) {
transaction->m_variableRxError.set(regex_error_str, transaction->m_variableOffset);
transaction->m_variableRxErrorRuleID.set(
std::to_string(rule->m_ruleId),
transaction->m_variableOffset
);
}
return false;
}
if (rule && rule->hasCaptureAction() && transaction) {
for (const Utils::SMatchCapture& capture : captures) {

View File

@ -1,4 +1,4 @@
// A Bison parser, made by GNU Bison 3.7.6.
// A Bison parser, made by GNU Bison 3.8.2.
// Locations for Bison parsers in C++

View File

@ -1,4 +1,4 @@
// A Bison parser, made by GNU Bison 3.7.6.
// A Bison parser, made by GNU Bison 3.8.2.
// Starting with Bison 3.2, this file is useless: the structure it
// used to define is now defined in "location.hh".

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -235,6 +235,8 @@ class Driver;
#include "src/variables/response_protocol.h"
#include "src/variables/response_status.h"
#include "src/variables/rule.h"
#include "src/variables/rx_error.h"
#include "src/variables/rx_error_rule_id.h"
#include "src/variables/server_addr.h"
#include "src/variables/server_name.h"
#include "src/variables/server_port.h"
@ -411,6 +413,8 @@ using namespace modsecurity::operators;
VARIABLE_RESPONSE_HEADERS_NAMES
VARIABLE_RESPONSE_PROTOCOL "RESPONSE_PROTOCOL"
VARIABLE_RESPONSE_STATUS "RESPONSE_STATUS"
VARIABLE_RX_ERROR "RX_ERROR"
VARIABLE_RX_ERROR_RULE_ID "RX_ERROR_RULE_ID"
VARIABLE_SERVER_ADDR "SERVER_ADDR"
VARIABLE_SERVER_NAME "SERVER_NAME"
VARIABLE_SERVER_PORT "SERVER_PORT"
@ -1648,10 +1652,10 @@ expression:
YYERROR;
*/
| CONFIG_DIR_PCRE_MATCH_LIMIT
/* Parser error disabled to avoid breaking default installations with modsecurity.conf-recommended
driver.error(@0, "SecPcreMatchLimit is not currently supported. Default PCRE values are being used for now");
YYERROR;
*/
{
driver.m_pcreMatchLimit.m_set = true;
driver.m_pcreMatchLimit.m_value = atoi($1.c_str());
}
| CONGIG_DIR_RESPONSE_BODY_MP
{
std::istringstream buf($1);
@ -2477,6 +2481,14 @@ var:
{
VARIABLE_CONTAINER($$, new variables::ResponseStatus());
}
| VARIABLE_RX_ERROR
{
VARIABLE_CONTAINER($$, new variables::RxError());
}
| VARIABLE_RX_ERROR_RULE_ID
{
VARIABLE_CONTAINER($$, new variables::RxErrorRuleID());
}
| VARIABLE_SERVER_ADDR
{
VARIABLE_CONTAINER($$, new variables::ServerAddr());

File diff suppressed because it is too large Load Diff

View File

@ -231,6 +231,8 @@ VARIABLE_RESPONSE_CONTENT_TYPE (?i:RESPONSE_CONTENT_TYPE)
VARIABLE_RESPONSE_HEADERS_NAMES (?i:RESPONSE_HEADERS_NAMES)
VARIABLE_RESPONSE_PROTOCOL (?i:RESPONSE_PROTOCOL)
VARIABLE_RESPONSE_STATUS (?i:RESPONSE_STATUS)
VARIABLE_RX_ERROR (?i:RX_ERROR)
VARIABLE_RX_ERROR_RULE_ID (?i:RX_ERROR_RULE_ID)
VARIABLE_SERVER_ADDR (?i:SERVER_ADDR)
VARIABLE_SERVER_NAME (?i:SERVER_NAME)
VARIABLE_SERVER_PORT (?i:SERVER_PORT)
@ -959,6 +961,8 @@ EQUALS_MINUS (?i:=\-)
{VARIABLE_RESPONSE_HEADERS_NAMES}[:.] { BEGINX(EXPECTING_VAR_PARAMETER); return p::make_VARIABLE_RESPONSE_HEADERS_NAMES(*driver.loc.back()); }
{VARIABLE_RESPONSE_PROTOCOL} { return p::make_VARIABLE_RESPONSE_PROTOCOL(*driver.loc.back()); }
{VARIABLE_RESPONSE_STATUS} { return p::make_VARIABLE_RESPONSE_STATUS(*driver.loc.back()); }
{VARIABLE_RX_ERROR} { return p::make_VARIABLE_RX_ERROR(*driver.loc.back()); }
{VARIABLE_RX_ERROR_RULE_ID} { return p::make_VARIABLE_RX_ERROR_RULE_ID(*driver.loc.back()); }
{VARIABLE_SERVER_ADDR} { return p::make_VARIABLE_SERVER_ADDR(*driver.loc.back()); }
{VARIABLE_SERVER_NAME} { return p::make_VARIABLE_SERVER_NAME(*driver.loc.back()); }
{VARIABLE_SERVER_PORT} { return p::make_VARIABLE_SERVER_PORT(*driver.loc.back()); }

View File

@ -1,4 +1,4 @@
// A Bison parser, made by GNU Bison 3.7.6.
// A Bison parser, made by GNU Bison 3.8.2.
// Starting with Bison 3.2, this file is useless: the structure it
// used to define is now defined with the parser itself.

View File

@ -25,12 +25,37 @@
#ifndef WITH_PCRE2
#if PCRE_HAVE_JIT
#define pcre_study_opt PCRE_STUDY_JIT_COMPILE
// NOTE: Add PCRE_STUDY_EXTRA_NEEDED so studying always yields a pcre_extra strucure
// and we can selectively override match limits using a copy of that structure at runtime.
#define pcre_study_opt PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_EXTRA_NEEDED
#else
#define pcre_study_opt 0
// NOTE: Add PCRE_STUDY_EXTRA_NEEDED so studying always yields a pcre_extra strucure
// and we can selectively override match limits using a copy of that structure at runtime.
#define pcre_study_opt PCRE_STUDY_EXTRA_NEEDED
#endif
#endif
#ifdef WITH_PCRE2
class Pcre2MatchContextPtr {
public:
Pcre2MatchContextPtr()
: m_match_context(pcre2_match_context_create(NULL)) {}
Pcre2MatchContextPtr(const Pcre2MatchContextPtr&) = delete;
~Pcre2MatchContextPtr() {
pcre2_match_context_free(m_match_context);
}
operator pcre2_match_context*() const {
return m_match_context;
}
private:
pcre2_match_context *m_match_context;
};
#endif
namespace modsecurity {
namespace Utils {
@ -163,24 +188,42 @@ std::list<SMatch> Regex::searchAll(const std::string& s) const {
return retList;
}
bool Regex::searchOneMatch(const std::string& s, std::vector<SMatchCapture>& captures) const {
RegexResult Regex::searchOneMatch(const std::string& s, std::vector<SMatchCapture>& captures) const {
return searchOneMatch(s, captures, get_default_match_limit());
}
RegexResult Regex::searchOneMatch(const std::string& s, std::vector<SMatchCapture>& captures, unsigned long match_limit) const {
#ifdef WITH_PCRE2
Pcre2MatchContextPtr match_context;
// TODO: What if setting the match limit fails?
pcre2_set_match_limit(match_context, match_limit);
PCRE2_SPTR pcre2_s = reinterpret_cast<PCRE2_SPTR>(s.c_str());
pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(m_pc, NULL);
int rc = 0;
if (m_pcje == 0) {
rc = pcre2_jit_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, NULL);
rc = pcre2_jit_match(m_pc, pcre2_s, s.length(), 0, 0, match_data, match_context);
}
if (m_pcje != 0 || rc == PCRE2_ERROR_JIT_STACKLIMIT) {
rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, PCRE2_NO_JIT, match_data, NULL);
rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, PCRE2_NO_JIT, match_data, match_context);
}
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
#else
const char *subject = s.c_str();
int ovector[OVECCOUNT];
pcre_extra local_pce;
pcre_extra *pce = NULL;
int rc = pcre_exec(m_pc, m_pce, subject, s.size(), 0, 0, ovector, OVECCOUNT);
if (m_pce != NULL) {
local_pce = *m_pce;
local_pce.match_limit = match_limit;
local_pce.flags |= PCRE_EXTRA_MATCH_LIMIT;
pce = &local_pce;
}
int rc = pcre_exec(m_pc, pce, subject, s.size(), 0, 0, ovector, OVECCOUNT);
#endif
for (int i = 0; i < rc; i++) {
@ -197,12 +240,22 @@ bool Regex::searchOneMatch(const std::string& s, std::vector<SMatchCapture>& cap
#ifdef WITH_PCRE2
pcre2_match_data_free(match_data);
#endif
return (rc > 0);
return to_regex_result(rc);
}
bool Regex::searchGlobal(const std::string& s, std::vector<SMatchCapture>& captures) const {
RegexResult Regex::searchGlobal(const std::string& s, std::vector<SMatchCapture>& captures) const {
return searchGlobal(s, captures, get_default_match_limit());
}
RegexResult Regex::searchGlobal(const std::string& s, std::vector<SMatchCapture>& captures, unsigned long match_limit) const {
const char *subject = s.c_str();
bool prev_match_zero_length = false;
#ifdef WITH_PCRE2
Pcre2MatchContextPtr match_context;
// TODO: What if setting the match limit fails?
pcre2_set_match_limit(match_context, match_limit);
PCRE2_SPTR pcre2_s = reinterpret_cast<PCRE2_SPTR>(s.c_str());
PCRE2_SIZE startOffset = 0;
@ -213,11 +266,20 @@ bool Regex::searchGlobal(const std::string& s, std::vector<SMatchCapture>& captu
pcre2_options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
}
int rc = pcre2_match(m_pc, pcre2_s, s.length(),
startOffset, pcre2_options, match_data, NULL);
startOffset, pcre2_options, match_data, match_context);
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
#else
const char *subject = s.c_str();
pcre_extra local_pce;
pcre_extra *pce = NULL;
if (m_pce != NULL) {
local_pce = *m_pce;
local_pce.match_limit = match_limit;
local_pce.flags |= PCRE_EXTRA_MATCH_LIMIT;
pce = &local_pce;
}
int startOffset = 0;
while (startOffset <= s.length()) {
@ -226,7 +288,12 @@ bool Regex::searchGlobal(const std::string& s, std::vector<SMatchCapture>& captu
if (prev_match_zero_length) {
pcre_options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
}
int rc = pcre_exec(m_pc, m_pce, subject, s.length(), startOffset, pcre_options, ovector, OVECCOUNT);
int rc = pcre_exec(m_pc, pce, subject, s.length(), startOffset, pcre_options, ovector, OVECCOUNT);
RegexResult regex_result = to_regex_result(rc);
if (regex_result != RegexResult::Ok) {
return regex_result;
}
#endif
if (rc > 0) {
@ -278,7 +345,7 @@ bool Regex::searchGlobal(const std::string& s, std::vector<SMatchCapture>& captu
#ifdef WITH_PCRE2
pcre2_match_data_free(match_data);
#endif
return (captures.size() > 0);
return RegexResult::Ok;
}
int Regex::search(const std::string& s, SMatch *match) const {
@ -340,5 +407,43 @@ int Regex::search(const std::string& s) const {
#endif
}
unsigned long Regex::get_default_match_limit() const {
unsigned long default_match_limit;
#ifdef WITH_PCRE2
int ret = pcre2_config(PCRE2_CONFIG_MATCHLIMIT, &default_match_limit);
#else
int ret = pcre_config(PCRE_CONFIG_MATCH_LIMIT, &default_match_limit);
#endif
if (ret < 0) {
default_match_limit = 10000000;
}
return default_match_limit;
}
RegexResult Regex::to_regex_result(int pcre_exec_result) const {
if (
pcre_exec_result > 0 ||
#ifdef WITH_PCRE2
pcre_exec_result == PCRE2_ERROR_NOMATCH
#else
pcre_exec_result == PCRE_ERROR_NOMATCH
#endif
) {
return RegexResult::Ok;
} else if(
#ifdef WITH_PCRE2
pcre_exec_result == PCRE2_ERROR_MATCHLIMIT
#else
pcre_exec_result == PCRE_ERROR_MATCHLIMIT
#endif
) {
return RegexResult::ErrorMatchLimit;
} else {
// Note that this can include the case where the PCRE result was zero.
// Zero is returned if the offset vector is not large enough and can be considered an error.
return RegexResult::ErrorOther;
}
}
} // namespace Utils
} // namespace modsecurity

View File

@ -34,6 +34,12 @@ namespace Utils {
#define OVECCOUNT 900
enum class RegexResult {
Ok,
ErrorMatchLimit,
ErrorOther,
};
class SMatch {
public:
SMatch() :
@ -76,13 +82,18 @@ class Regex {
return (m_pc == NULL);
}
std::list<SMatch> searchAll(const std::string& s) const;
bool searchOneMatch(const std::string& s, std::vector<SMatchCapture>& captures) const;
bool searchGlobal(const std::string& s, std::vector<SMatchCapture>& captures) const;
RegexResult searchOneMatch(const std::string& s, std::vector<SMatchCapture>& captures) const;
RegexResult searchOneMatch(const std::string& s, std::vector<SMatchCapture>& captures, unsigned long match_limit ) const;
RegexResult searchGlobal(const std::string& s, std::vector<SMatchCapture>& captures) const;
RegexResult searchGlobal(const std::string& s, std::vector<SMatchCapture>& captures, unsigned long match_limit ) const;
int search(const std::string &s, SMatch *match) const;
int search(const std::string &s) const;
const std::string pattern;
private:
unsigned long get_default_match_limit() const;
RegexResult to_regex_result( int pcre_exec_result ) const;
#if WITH_PCRE2
pcre2_code *m_pc;
int m_pcje;

39
src/variables/rx_error.h Normal file
View File

@ -0,0 +1,39 @@
/*
* ModSecurity, http://www.modsecurity.org/
* Copyright (c) 2015 - 2022 Trustwave Holdings, Inc. (http://www.trustwave.com/)
*
* You may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* If any of the files related to licensing are missing or if you have any
* other questions related to licensing please contact Trustwave Holdings, Inc.
* directly using the email address security@modsecurity.org.
*
*/
#include <iostream>
#include <string>
#include <vector>
#include <list>
#include <utility>
#ifndef SRC_VARIABLES_RX_ERROR_H_
#define SRC_VARIABLES_RX_ERROR_H_
#include "src/variables/variable.h"
namespace modsecurity {
class Transaction;
namespace variables {
DEFINE_VARIABLE(RxError, RX_ERROR, m_variableRxError)
} // namespace variables
} // namespace modsecurity
#endif // SRC_VARIABLES_RX_ERROR_H_

View File

@ -0,0 +1,39 @@
/*
* ModSecurity, http://www.modsecurity.org/
* Copyright (c) 2015 - 2022 Trustwave Holdings, Inc. (http://www.trustwave.com/)
*
* You may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* If any of the files related to licensing are missing or if you have any
* other questions related to licensing please contact Trustwave Holdings, Inc.
* directly using the email address security@modsecurity.org.
*
*/
#include <iostream>
#include <string>
#include <vector>
#include <list>
#include <utility>
#ifndef SRC_VARIABLES_RX_ERROR_RULE_ID_H_
#define SRC_VARIABLES_RX_ERROR_RULE_ID_H_
#include "src/variables/variable.h"
namespace modsecurity {
class Transaction;
namespace variables {
DEFINE_VARIABLE(RxErrorRuleID, RX_ERROR_RULE_ID, m_variableRxErrorRuleID)
} // namespace variables
} // namespace modsecurity
#endif // SRC_VARIABLES_RX_ERROR_RULE_ID_H_

View File

@ -282,6 +282,10 @@ class VariableMonkeyResolution {
t->m_variableUrlEncodedError.evaluate(l);
} else if (comp(variable, "USERID")) {
t->m_variableUserID.evaluate(l);
} else if (comp(variable, "RX_ERROR")) {
t->m_variableRxError.evaluate(l);
} else if (comp(variable, "RX_ERROR_RULE_ID")) {
t->m_variableRxErrorRuleID.evaluate(l);
} else {
throw std::invalid_argument("Variable not found.");
}
@ -462,6 +466,10 @@ class VariableMonkeyResolution {
} else if (comp(variable, "GLOBAL")) {
vv = t->m_collections.m_global_collection->resolveFirst("",
t->m_collections.m_global_collection_key, t->m_rules->m_secWebAppId.m_value);
} else if (comp(variable, "RX_ERROR")) {
vv = t->m_variableRxError.resolveFirst();
} else if (comp(variable, "RX_ERROR_RULE_ID")) {
vv = t->m_variableRxErrorRuleID.resolveFirst();
} else {
throw std::invalid_argument("Variable not found.");
}