Support PCRE2

This commit is contained in:
Martin Vierula
2022-04-13 09:37:54 -07:00
parent 5519f6cfae
commit f84614fe06
9 changed files with 380 additions and 18 deletions

View File

@@ -324,6 +324,7 @@ libmodsecurity_la_CPPFLAGS = \
$(YAJL_CFLAGS) \
$(LMDB_CFLAGS) \
$(PCRE_CFLAGS) \
$(PCRE2_CFLAGS) \
$(SSDEEP_CFLAGS) \
$(MAXMIND_CFLAGS) \
$(LUA_CFLAGS) \
@@ -339,6 +340,7 @@ libmodsecurity_la_LDFLAGS = \
$(LMDB_LDFLAGS) \
$(LUA_LDFLAGS) \
$(PCRE_LDFLAGS) \
$(PCRE2_LDFLAGS) \
$(SSDEEP_LDFLAGS) \
$(MAXMIND_LDFLAGS) \
$(YAJL_LDFLAGS) \
@@ -355,6 +357,7 @@ libmodsecurity_la_LIBADD = \
../others/libinjection.la \
../others/libmbedtls.la \
$(PCRE_LDADD) \
$(PCRE2_LDADD) \
$(MAXMIND_LDADD) \
$(SSDEEP_LDADD) \
$(YAJL_LDADD)

View File

@@ -15,24 +15,28 @@
#include "src/operators/verify_cc.h"
#include <pcre.h>
#include <iostream>
#include <cstring>
#include <vector>
#include "src/operators/operator.h"
#ifndef WITH_PCRE2
#if PCRE_HAVE_JIT
#define pcre_study_opt PCRE_STUDY_JIT_COMPILE
#else
#define pcre_study_opt 0
#endif
#endif
namespace modsecurity {
namespace operators {
VerifyCC::~VerifyCC() {
#if WITH_PCRE2
pcre2_code_free(m_pc);
#else
if (m_pc != NULL) {
pcre_free(m_pc);
m_pc = NULL;
@@ -45,6 +49,7 @@ VerifyCC::~VerifyCC() {
#endif
m_pce = NULL;
}
#endif
}
/**
@@ -90,6 +95,22 @@ int VerifyCC::luhnVerify(const char *ccnumber, int len) {
bool VerifyCC::init(const std::string &param2, std::string *error) {
#ifdef WITH_PCRE2
PCRE2_SPTR pcre2_pattern = reinterpret_cast<PCRE2_SPTR>(m_param.c_str());
uint32_t pcre2_options = (PCRE2_DOTALL|PCRE2_MULTILINE);
int errornumber = 0;
PCRE2_SIZE erroroffset = 0;
m_pc = pcre2_compile(pcre2_pattern, PCRE2_ZERO_TERMINATED,
pcre2_options, &errornumber, &erroroffset, NULL);
if (m_pc == NULL) {
return false;
} else {
m_match_data = pcre2_match_data_create_from_pattern(m_pc, NULL);
if (m_match_data == NULL) {
return false;
}
}
#else
const char *errptr = NULL;
int erroffset = 0;
@@ -112,6 +133,7 @@ bool VerifyCC::init(const std::string &param2, std::string *error) {
error->assign(errptr);
return false;
}
#endif
return true;
}
@@ -119,11 +141,25 @@ bool VerifyCC::init(const std::string &param2, std::string *error) {
bool VerifyCC::evaluate(Transaction *t, RuleWithActions *rule,
const std::string& i, std::shared_ptr<RuleMessage> ruleMessage) {
#ifdef WITH_PCRE2
PCRE2_SIZE offset = 0;
size_t target_length = i.length();
PCRE2_SPTR pcre2_i = reinterpret_cast<PCRE2_SPTR>(i.c_str());
for (offset = 0; offset < target_length; offset++) {
int ret = pcre2_match(m_pc, pcre2_i, target_length, offset, 0, m_match_data, NULL);
/* If there was no match, then we are done. */
if (ret < 0) {
break;
}
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(m_match_data);
#else
int offset = 0;
int target_length = i.length();
for (offset = 0; offset < target_length; offset++) {
std::string match;
int ovector[33];
memset(ovector, 0, sizeof(ovector));
int ret = pcre_exec(m_pc, m_pce, i.c_str(), i.size(), offset,
@@ -136,8 +172,9 @@ bool VerifyCC::evaluate(Transaction *t, RuleWithActions *rule,
if (ret < 0) {
return false;
}
#endif
if (ret > 0) {
match = std::string(i, ovector[0], ovector[1] - ovector[0]);
std::string match = std::string(i, ovector[0], ovector[1] - ovector[0]);
int is_cc = luhnVerify(match.c_str(), match.size());
if (is_cc) {
if (t) {

View File

@@ -16,7 +16,14 @@
#ifndef SRC_OPERATORS_VERIFY_CC_H_
#define SRC_OPERATORS_VERIFY_CC_H_
#if WITH_PCRE2
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
#else
#include <pcre.h>
#endif
#include <string>
#include <memory>
#include <utility>
@@ -32,7 +39,11 @@ class VerifyCC : public Operator {
explicit VerifyCC(std::unique_ptr<RunTimeString> param)
: Operator("VerifyCC", std::move(param)),
m_pc(NULL),
#if WITH_PCRE2
m_match_data(NULL) { }
#else
m_pce(NULL) { }
#endif
~VerifyCC();
bool evaluate(Transaction *t, RuleWithActions *rule,
@@ -40,8 +51,13 @@ class VerifyCC : public Operator {
std::shared_ptr<RuleMessage> ruleMessage) override;
bool init(const std::string &param, std::string *error) override;
private:
#if WITH_PCRE2
pcre2_code *m_pc;
pcre2_match_data *m_match_data;
#else
pcre *m_pc;
pcre_extra *m_pce;
#endif
static int luhnVerify(const char *ccnumber, int len);
};

View File

@@ -15,7 +15,6 @@
#include "src/utils/regex.h"
#include <pcre.h>
#include <string>
#include <list>
@@ -24,17 +23,27 @@
#include "src/utils/geo_lookup.h"
#ifndef WITH_PCRE2
#if PCRE_HAVE_JIT
#define pcre_study_opt PCRE_STUDY_JIT_COMPILE
#else
#define pcre_study_opt 0
#endif
#endif
namespace modsecurity {
namespace Utils {
// Helper function to tell us if the current config indicates CRLF is a valid newline sequence
bool crlfIsNewline() {
#if WITH_PCRE2
uint32_t newline = 0;
pcre2_config(PCRE2_CONFIG_NEWLINE, &newline);
bool crlf_is_newline =
newline == PCRE2_NEWLINE_ANY ||
newline == PCRE2_NEWLINE_CRLF ||
newline == PCRE2_NEWLINE_ANYCRLF;
#else
int d = 0;
pcre_config(PCRE_CONFIG_NEWLINE, &d);
@@ -48,12 +57,26 @@ bool crlfIsNewline() {
option_bits == PCRE_NEWLINE_ANY ||
option_bits == PCRE_NEWLINE_CRLF ||
option_bits == PCRE_NEWLINE_ANYCRLF;
#endif
return crlf_is_newline;
}
Regex::Regex(const std::string& pattern_, bool ignoreCase)
: pattern(pattern_.empty() ? ".*" : pattern_) {
#if WITH_PCRE2
PCRE2_SPTR pcre2_pattern = reinterpret_cast<PCRE2_SPTR>(pattern.c_str());
uint32_t pcre2_options = (PCRE2_DOTALL|PCRE2_MULTILINE);
if (ignoreCase) {
pcre2_options |= PCRE2_CASELESS;
}
int errornumber = 0;
PCRE2_SIZE erroroffset = 0;
m_pc = pcre2_compile(pcre2_pattern, PCRE2_ZERO_TERMINATED,
pcre2_options, &errornumber, &erroroffset, NULL);
if (m_pc != NULL) {
m_match_data = pcre2_match_data_create_from_pattern(m_pc, NULL);
}
#else
const char *errptr = NULL;
int erroffset;
int flags = (PCRE_DOTALL|PCRE_MULTILINE);
@@ -65,10 +88,15 @@ Regex::Regex(const std::string& pattern_, bool ignoreCase)
&errptr, &erroffset, NULL);
m_pce = pcre_study(m_pc, pcre_study_opt, &errptr);
#endif
}
Regex::~Regex() {
#if WITH_PCRE2
pcre2_match_data_free(m_match_data);
pcre2_code_free(m_pc);
#else
if (m_pc != NULL) {
pcre_free(m_pc);
m_pc = NULL;
@@ -81,29 +109,39 @@ Regex::~Regex() {
#endif
m_pce = NULL;
}
#endif
}
std::list<SMatch> Regex::searchAll(const std::string& s) const {
const char *subject = s.c_str();
const std::string tmpString = std::string(s.c_str(), s.size());
int ovector[OVECCOUNT];
int rc, i, offset = 0;
std::list<SMatch> retList;
int rc;
#ifdef WITH_PCRE2
PCRE2_SPTR pcre2_s = reinterpret_cast<PCRE2_SPTR>(s.c_str());
PCRE2_SIZE offset = 0;
do {
rc = pcre2_match(m_pc, pcre2_s, s.length(),
offset, 0, m_match_data, NULL);
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(m_match_data);
#else
const char *subject = s.c_str();
int ovector[OVECCOUNT];
int offset = 0;
do {
rc = pcre_exec(m_pc, m_pce, subject,
s.size(), offset, 0, ovector, OVECCOUNT);
for (i = 0; i < rc; i++) {
#endif
for (int i = 0; i < rc; i++) {
size_t start = ovector[2*i];
size_t end = ovector[2*i+1];
size_t len = end - start;
if (end > s.size()) {
rc = 0;
rc = -1;
break;
}
std::string match = std::string(tmpString, start, len);
std::string match = std::string(s, start, len);
offset = start + len;
retList.push_front(SMatch(match, start));
@@ -118,10 +156,16 @@ std::list<SMatch> Regex::searchAll(const std::string& s) const {
}
bool Regex::searchOneMatch(const std::string& s, std::vector<SMatchCapture>& captures) const {
#ifdef WITH_PCRE2
PCRE2_SPTR pcre2_s = reinterpret_cast<PCRE2_SPTR>(s.c_str());
int rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, 0, m_match_data, NULL);
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(m_match_data);
#else
const char *subject = s.c_str();
int ovector[OVECCOUNT];
int rc = pcre_exec(m_pc, m_pce, subject, s.size(), 0, 0, ovector, OVECCOUNT);
#endif
for (int i = 0; i < rc; i++) {
size_t start = ovector[2*i];
@@ -138,9 +182,22 @@ bool Regex::searchOneMatch(const std::string& s, std::vector<SMatchCapture>& cap
}
bool Regex::searchGlobal(const std::string& s, std::vector<SMatchCapture>& captures) const {
const char *subject = s.c_str();
bool prev_match_zero_length = false;
#ifdef WITH_PCRE2
PCRE2_SPTR pcre2_s = reinterpret_cast<PCRE2_SPTR>(s.c_str());
PCRE2_SIZE startOffset = 0;
while (startOffset <= s.length()) {
uint32_t pcre2_options = 0;
if (prev_match_zero_length) {
pcre2_options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
}
int rc = pcre2_match(m_pc, pcre2_s, s.length(),
startOffset, pcre2_options, m_match_data, NULL);
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(m_match_data);
#else
const char *subject = s.c_str();
int startOffset = 0;
while (startOffset <= s.length()) {
@@ -151,6 +208,7 @@ bool Regex::searchGlobal(const std::string& s, std::vector<SMatchCapture>& captu
}
int rc = pcre_exec(m_pc, m_pce, subject, s.length(), startOffset, pcre_options, ovector, OVECCOUNT);
#endif
if (rc > 0) {
size_t firstGroupForThisFullMatch = captures.size();
for (int i = 0; i < rc; i++) {
@@ -169,8 +227,13 @@ bool Regex::searchGlobal(const std::string& s, std::vector<SMatchCapture>& captu
startOffset = end;
prev_match_zero_length = false;
} else {
// zero-length match; modify next match attempt to avoid infinite loop
prev_match_zero_length = true;
if ( startOffset == s.length()) {
// zero-length match at end of string; force end of while-loop
startOffset++;
} else {
// zero-length match mid-string; adjust next match attempt
prev_match_zero_length = true;
}
}
}
}
@@ -196,11 +259,20 @@ bool Regex::searchGlobal(const std::string& s, std::vector<SMatchCapture>& captu
}
int Regex::search(const std::string& s, SMatch *match) const {
#ifdef WITH_PCRE2
PCRE2_SPTR pcre2_s = reinterpret_cast<PCRE2_SPTR>(s.c_str());
int ret = pcre2_match(m_pc, pcre2_s, s.length(),
0, 0, m_match_data, NULL) > 0;
if (ret > 0) { // match
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(m_match_data);
#else
int ovector[OVECCOUNT];
int ret = pcre_exec(m_pc, m_pce, s.c_str(),
s.size(), 0, 0, ovector, OVECCOUNT) > 0;
if (ret > 0) {
#endif
*match = SMatch(
std::string(s, ovector[ret-1], ovector[ret] - ovector[ret-1]),
0);
@@ -210,9 +282,19 @@ int Regex::search(const std::string& s, SMatch *match) const {
}
int Regex::search(const std::string& s) const {
#ifdef WITH_PCRE2
PCRE2_SPTR pcre2_s = reinterpret_cast<PCRE2_SPTR>(s.c_str());
int rc = pcre2_match(m_pc, pcre2_s, s.length(), 0, 0, m_match_data, NULL);
if (rc > 0) {
return 1; // match
} else {
return 0; // no match
}
#else
int ovector[OVECCOUNT];
return pcre_exec(m_pc, m_pce, s.c_str(),
s.size(), 0, 0, ovector, OVECCOUNT) > 0;
#endif
}
} // namespace Utils

View File

@@ -12,8 +12,12 @@
* directly using the email address security@modsecurity.org.
*
*/
#if WITH_PCRE2
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
#else
#include <pcre.h>
#endif
#include <iostream>
#include <fstream>
@@ -76,8 +80,13 @@ class Regex {
const std::string pattern;
private:
#if WITH_PCRE2
pcre2_code *m_pc;
pcre2_match_data *m_match_data;
#else
pcre *m_pc = NULL;
pcre_extra *m_pce = NULL;
#endif
};