Refactor Regex classes further

This commit changes Regex interface rather dramatically.

Most importantly, RegexMatch class now contains a list of matched groups,
with group(0) being entire match, group(1) - first capturing group,
and so on.

Secondly, searchAll now returns a list of RegexMatch objects instead
of reversed flattened list of groups from all matches.
This commit is contained in:
WGH 2019-01-29 21:29:44 +03:00 committed by Felipe Zimmerle
parent a2dc896520
commit 55b81f0e10
No known key found for this signature in database
GPG Key ID: E6DFB08CE8B11277
12 changed files with 167 additions and 159 deletions

View File

@ -228,9 +228,9 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
const unsigned char *buf;
size_t jsonSize;
std::list<regex::RegexMatch> vars = variables.searchAll(matchString);
std::list<regex::RegexMatch> ops = operators.searchAll(matchString);
std::list<regex::RegexMatch> trans = transformations.searchAll(matchString);
auto vars = variables.searchAll(matchString);
auto ops = operators.searchAll(matchString);
auto trans = transformations.searchAll(matchString);
g = yajl_gen_alloc(NULL);
if (g == NULL) {
@ -255,14 +255,12 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
strlen("highlight"));
yajl_gen_array_open(g);
while (vars.size() > 0) {
for (const auto &m : vars) {
std::string value;
yajl_gen_map_open(g);
vars.pop_back();
const std::string &startingAt = vars.back().str();
vars.pop_back();
const std::string &size = vars.back().str();
vars.pop_back();
const std::string &startingAt = m.group(1).string;
const std::string &size = m.group(2).string;
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>("startingAt"),
strlen("startingAt"));
@ -298,11 +296,11 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
yajl_gen_map_open(g);
yajl_gen_string(g, reinterpret_cast<const unsigned char*>("value"),
strlen("value"));
yajl_gen_string(g, reinterpret_cast<const unsigned char*>(varValue.c_str()),
yajl_gen_string(g, reinterpret_cast<const unsigned char*>(varValue.data()),
varValue.size());
yajl_gen_map_close(g);
while (trans.size() > 0) {
for (const auto &m : trans) {
modsecurity::actions::transformations::Transformation *t;
std::string varValueRes;
yajl_gen_map_open(g);
@ -310,20 +308,22 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
reinterpret_cast<const unsigned char*>("transformation"),
strlen("transformation"));
const std::string &transformation = m.group(0).string;
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>(trans.back().str().c_str()),
trans.back().str().size());
reinterpret_cast<const unsigned char*>(transformation.data()),
transformation.size());
t = modsecurity::actions::transformations::Transformation::instantiate(
trans.back().str().c_str());
transformation.c_str());
varValueRes = t->evaluate(varValue, NULL);
varValue.assign(varValueRes);
trans.pop_back();
yajl_gen_string(g, reinterpret_cast<const unsigned char*>("value"),
strlen("value"));
yajl_gen_string(g, reinterpret_cast<const unsigned char*>(
varValue.c_str()),
varValue.data()),
varValue.size());
yajl_gen_map_close(g);
@ -337,26 +337,23 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
yajl_gen_map_open(g);
while (ops.size() > 0) {
for (const auto &m : ops) {
std::string value;
yajl_gen_string(g, reinterpret_cast<const unsigned char*>("highlight"),
strlen("highlight"));
yajl_gen_map_open(g);
ops.pop_back();
std::string startingAt = ops.back().str();
ops.pop_back();
std::string size = ops.back().str();
ops.pop_back();
const std::string &startingAt = m.group(1).string;
const std::string &size = m.group(2).string;
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>("startingAt"),
strlen("startingAt"));
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>(startingAt.c_str()),
reinterpret_cast<const unsigned char*>(startingAt.data()),
startingAt.size());
yajl_gen_string(g, reinterpret_cast<const unsigned char*>("size"),
strlen("size"));
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>(size.c_str()),
reinterpret_cast<const unsigned char*>(size.data()),
size.size());
yajl_gen_map_close(g);

View File

@ -38,7 +38,6 @@ bool Rx::init(const std::string &arg, std::string *error) {
bool Rx::evaluate(Transaction *transaction, Rule *rule,
const std::string& input, std::shared_ptr<RuleMessage> ruleMessage) {
std::list<RegexMatch> matches;
Regex *re;
if (m_param.empty() && !m_string->m_containsMacro) {
@ -52,33 +51,30 @@ bool Rx::evaluate(Transaction *transaction, Rule *rule,
re = m_re;
}
matches = re->searchAll(input);
if (rule && rule->m_containsCaptureAction && transaction) {
int i = 0;
matches.reverse();
for (const RegexMatch& a : matches) {
regex::RegexMatch m;
bool matched = re->search(input, &m, 9);
if (matched && rule && rule->m_containsCaptureAction && transaction) {
for (int i = 0; i < m.num_groups(); i++) {
auto key = std::to_string(i);
const std::string &value = m.group(i).string;
transaction->m_collections.m_tx_collection->storeOrUpdateFirst(
std::to_string(i), a.str());
key, value);
ms_dbg_a(transaction, 7, "Added regex subexpression TX." +
std::to_string(i) + ": " + a.str());
transaction->m_matched.push_back(a.str());
i++;
key + ": " + value);
transaction->m_matched.push_back(value);
}
}
for (const auto & i : matches) {
logOffset(ruleMessage, i.offset(), i.str().size());
for (int i = 0; i < m.num_groups(); i++) {
const regex::MatchGroup &g = m.group(i);
logOffset(ruleMessage, g.offset, g.string.size());
}
if (m_string->m_containsMacro) {
delete re;
}
if (matches.size() > 0) {
return true;
}
return false;
return matched;
}

View File

@ -119,7 +119,6 @@ bool VerifyCPF::verify(const char *cpfnumber, int len) {
bool VerifyCPF::evaluate(Transaction *t, Rule *rule,
const std::string& input, std::shared_ptr<RuleMessage> ruleMessage) {
std::list<RegexMatch> matches;
bool is_cpf = false;
int i;
@ -128,18 +127,17 @@ bool VerifyCPF::evaluate(Transaction *t, Rule *rule,
}
for (i = 0; i < input.size() - 1 && is_cpf == false; i++) {
matches = m_re->searchAll(input.substr(i, input.size()));
for (const auto & i : matches) {
is_cpf = verify(i.str().c_str(), i.str().size());
auto matches = m_re->searchAll(input.substr(i, input.size()));
for (const auto &m : matches) {
const regex::MatchGroup &g = m.group(0);
is_cpf = verify(g.string.data(), g.string.size());
if (is_cpf) {
logOffset(ruleMessage, i.offset(), i.str().size());
logOffset(ruleMessage, g.offset, g.string.size());
if (rule && t && rule->m_containsCaptureAction) {
t->m_collections.m_tx_collection->storeOrUpdateFirst(
"0", i.str());
ms_dbg_a(t, 7, "Added VerifyCPF match TX.0: " + \
i.str());
"0", g.string);
ms_dbg_a(t, 7, "Added VerifyCPF match TX.0: " + g.string);
}
goto out;
}
}

View File

@ -110,7 +110,6 @@ invalid:
bool VerifySSN::evaluate(Transaction *t, Rule *rule,
const std::string& input, std::shared_ptr<RuleMessage> ruleMessage) {
std::list<RegexMatch> matches;
bool is_ssn = false;
int i;
@ -119,18 +118,17 @@ bool VerifySSN::evaluate(Transaction *t, Rule *rule,
}
for (i = 0; i < input.size() - 1 && is_ssn == false; i++) {
matches = m_re->searchAll(input.substr(i, input.size()));
for (const auto & i : matches) {
is_ssn = verify(i.str().c_str(), i.str().size());
auto matches = m_re->searchAll(input.substr(i, input.size()));
for (const auto &m : matches) {
const regex::MatchGroup &g = m.group(0);
is_ssn = verify(g.string.data(), g.string.size());
if (is_ssn) {
logOffset(ruleMessage, i.offset(), i.str().size());
logOffset(ruleMessage, g.offset, g.string.size());
if (rule && t && rule->m_containsCaptureAction) {
t->m_collections.m_tx_collection->storeOrUpdateFirst(
"0", i.str());
ms_dbg_a(t, 7, "Added VerifySSN match TX.0: " + \
i.str());
"0", g.string);
ms_dbg_a(t, 7, "Added VerifySSN match TX.0: " + g.string);
}
goto out;
}
}

View File

@ -30,9 +30,8 @@ public:
virtual bool ok() const = 0;
virtual std::list<RegexMatch> searchAll(const std::string& s) const = 0;
virtual int search(const std::string &s, RegexMatch *m) const = 0;
virtual int search(const std::string &s) const = 0;
virtual std::vector<RegexMatch> searchAll(const std::string& s, bool overlapping = false) const = 0;
virtual bool search(const std::string &s, RegexMatch *m = nullptr, ssize_t max_groups = -1) const = 0;
virtual const std::string& getPattern() const = 0;
};

View File

@ -17,7 +17,6 @@
#include <iostream>
#include <fstream>
#include <string>
#include <list>
#include "src/regex/backend/pcre.h"
@ -46,6 +45,8 @@ Pcre::Pcre(const std::string& pattern_)
&errptr, &erroffset, NULL);
m_pce = pcre_study(m_pc, pcre_study_opt, &errptr);
pcre_fullinfo(m_pc, m_pce, PCRE_INFO_CAPTURECOUNT, &m_capture_count);
}
@ -64,60 +65,76 @@ Pcre::~Pcre() {
}
}
std::list<RegexMatch> Pcre::searchAll(const std::string& s) const {
const char *subject = s.c_str();
const std::string tmpString = std::string(s.c_str(), s.size());
int ovector[OVECCOUNT];
int rc, i, offset = 0;
std::list<RegexMatch> retList;
do {
rc = pcre_exec(m_pc, m_pce, subject,
s.size(), offset, 0, ovector, OVECCOUNT);
for (i = 0; i < rc; i++) {
size_t start = ovector[2*i];
size_t end = ovector[2*i+1];
size_t len = end - start;
if (end > s.size()) {
rc = 0;
break;
}
std::string match = std::string(tmpString, start, len);
offset = start + len;
retList.push_front(RegexMatch(match, start));
}
offset = ovector[1]; // end
if (offset == ovector[0]) { // start == end (size == 0)
offset++;
}
} while (rc > 0);
return retList;
}
int Pcre::search(const std::string& s, RegexMatch *match) const {
int ovector[OVECCOUNT];
int ret = pcre_exec(m_pc, m_pce, s.c_str(),
s.size(), 0, 0, ovector, OVECCOUNT) > 0;
if (ret > 0) {
*match = RegexMatch(
std::string(s, ovector[ret-1], ovector[ret] - ovector[ret-1]),
0);
static bool do_match(
pcre *pc,
pcre_extra *pce,
int pcre_capture_count,
const char *s,
size_t n,
RegexMatch *m,
ssize_t max_groups,
size_t offset)
{
if (m == nullptr) {
max_groups = 0;
}
return ret;
// "+1" is required for full match (aka group 0)
int ovecsize = (pcre_capture_count+1) * 3;
int ovector[ovecsize];
int ret = pcre_exec(pc, pce, s, n, offset, 0, ovector, ovecsize);
if (ret > 0) {
if (max_groups < 0) {
max_groups = ret;
}
if (max_groups > 0) {
size_t ngroups = std::min<size_t>(max_groups, ret);
RegexMatch::MatchGroupContainer groups;
groups.reserve(ngroups);
for (size_t i = 0; i < ngroups; i++) {
size_t start = ovector[2*i];
size_t end = ovector[2*i+1];
std::string group(s + start, end - start);
groups.push_back(MatchGroup{start, std::move(group)});
}
*m = RegexMatch(std::move(groups));
}
return true;
}
return false;
}
std::vector<RegexMatch> Pcre::searchAll(const std::string& s, bool overlapping) const {
std::vector<RegexMatch> res;
size_t offset = 0;
int Pcre::search(const std::string& s) const {
int ovector[OVECCOUNT];
return pcre_exec(m_pc, m_pce, s.c_str(),
s.size(), 0, 0, ovector, OVECCOUNT) > 0;
while (1) {
RegexMatch m;
bool match = do_match(m_pc, m_pce, m_capture_count, s.data(), s.size(), &m, -1, offset);
if (!match) break;
if (overlapping) {
// start just after the beginning of the last match
offset = m.group(0).offset + 1;
} else {
// start just at the end of the last match
offset = m.group(0).offset + m.group(0).string.size();
if (offset == m.group(0).offset) {
// empty match - advance by one to not match empty string repeatedly
offset++;
}
}
res.push_back(std::move(m));
}
return res;
}
bool Pcre::search(const std::string &s, RegexMatch *m, ssize_t max_groups) const {
return do_match(m_pc, m_pce, m_capture_count, s.data(), s.size(), m, max_groups, 0);
}
#endif

View File

@ -34,9 +34,6 @@ namespace backend {
#ifdef WITH_PCRE
#define OVECCOUNT 30
class Pcre : public Backend {
public:
explicit Pcre(const std::string& pattern_);
@ -50,9 +47,8 @@ class Pcre : public Backend {
return m_pc != NULL;
}
std::list<RegexMatch> searchAll(const std::string& s) const override;
int search(const std::string &s, RegexMatch *m) const override;
int search(const std::string &s) const override;
std::vector<RegexMatch> searchAll(const std::string& s, bool overlapping = false) const override;
bool search(const std::string &s, RegexMatch *m = nullptr, ssize_t max_groups = -1) const override;
virtual const std::string& getPattern() const override {
return pattern;
@ -60,6 +56,8 @@ class Pcre : public Backend {
private:
const std::string pattern;
int m_capture_count;
pcre *m_pc = NULL;
pcre_extra *m_pce = NULL;
};

View File

@ -44,9 +44,8 @@ class Re2 : public Backend {
return re.ok();
}
std::list<RegexMatch> searchAll(const std::string& s) const override;
int search(const std::string &s, RegexMatch *m) const override;
int search(const std::string &s) const override;
std::vector<RegexMatch> searchAll(const std::string& s, bool overlapping = false) const override;
bool search(const std::string &s, RegexMatch *m = nullptr, ssize_t max_groups = -1) const override;
virtual const std::string& getPattern() const override {
return re.pattern();

View File

@ -49,14 +49,12 @@ public:
return backend->ok();
}
std::list<RegexMatch> searchAll(const std::string& s) const override {
return backend->searchAll(s);
std::vector<RegexMatch> searchAll(const std::string& s, bool overlapping = false) const override {
return backend->searchAll(s, overlapping);
}
int search(const std::string &s, RegexMatch *m) const override {
return backend->search(s, m);
}
int search(const std::string &s) const override {
return backend->search(s);
bool search(const std::string &s, RegexMatch *m = nullptr, ssize_t max_groups = -1) const override {
return backend->search(s, m, max_groups);
}
const std::string& getPattern() const override {

View File

@ -17,28 +17,38 @@
#ifndef SRC_REGEX_REGEX_MATCH_H_
#define SRC_REGEX_REGEX_MATCH_H_
#include <vector>
#include <string>
namespace modsecurity {
namespace regex {
class RegexMatch {
public:
RegexMatch() :
m_match(),
m_offset(0) { }
RegexMatch(const std::string &match, size_t offset) :
m_match(match),
m_offset(offset) { }
const std::string& str() const { return m_match; }
size_t offset() const { return m_offset; }
private:
std::string m_match;
size_t m_offset;
struct MatchGroup {
size_t offset;
std::string string;
};
class RegexMatch {
public:
using MatchGroupContainer = std::vector<MatchGroup>;
RegexMatch() {}
RegexMatch(MatchGroupContainer groups)
: m_groups(std::move(groups))
{}
size_t num_groups() const {
return m_groups.size();
}
const MatchGroup& group(size_t i) const {
return m_groups[i];
}
private:
MatchGroupContainer m_groups;
};
} // namespace regex
} // namespace modsecurity

View File

@ -198,23 +198,23 @@ class Rule_DictElementRegexp : public VariableRegex {
void evaluate(Transaction *t,
Rule *rule,
std::vector<const VariableValue *> *l) override {
if (m_r.search("id") > 0) {
if (m_r.search("id")) {
Rule_DictElement::id(t, rule, l);
return;
}
if (m_r.search("rev") > 0) {
if (m_r.search("rev")) {
Rule_DictElement::rev(t, rule, l);
return;
}
if (m_r.search("severity") > 0) {
if (m_r.search("severity")) {
Rule_DictElement::severity(t, rule, l);
return;
}
if (m_r.search("logdata") > 0) {
if (m_r.search("logdata")) {
Rule_DictElement::logData(t, rule, l);
return;
}
if (m_r.search("msg") > 0) {
if (m_r.search("msg")) {
Rule_DictElement::msg(t, rule, l);
return;
}

View File

@ -64,18 +64,16 @@ void json2bin(std::string *str) {
while (re.search(*str, &match)) {
unsigned int p;
std::string toBeReplaced = match.str();
toBeReplaced.erase(0, 2);
sscanf(toBeReplaced.c_str(), "%x", &p);
replaceAll(str, match.str(), p);
std::string toBeReplaced = match.group(0).string;
sscanf(toBeReplaced.substr(2).c_str(), "%x", &p);
replaceAll(str, toBeReplaced, p);
}
while (re2.search(*str, &match)) {
unsigned int p;
std::string toBeReplaced = match.str();
toBeReplaced.erase(0, 2);
sscanf(toBeReplaced.c_str(), "%4x", &p);
replaceAll(str, match.str(), p);
std::string toBeReplaced = match.group(0).string;
sscanf(toBeReplaced.substr(2).c_str(), "%4x", &p);
replaceAll(str, toBeReplaced, p);
}
/*