Refactor regex code

This commit fixes quite a few odd things in regex code:
 * Lack of encapsulation.
 * Non-method functions for matching without retrieving all groups.
 * Regex class being copyable without proper copy-constructor (potential UAF
   and double free due to pointer members m_pc and m_pce).
 * Redundant SMatch::m_length, which always equals to match.size() anyway.
 * Weird SMatch::size_ member which is initialized only by one of the three matching
   functions, and equals to the return value of that function anyways.
 * Several places in code having std::string value instead of reference.
This commit is contained in:
WGH 2019-01-17 01:55:17 +03:00 committed by Felipe Zimmerle
parent e0a0fa05cc
commit ad28de4f14
10 changed files with 68 additions and 67 deletions

View File

@ -1,6 +1,8 @@
v3.0.4 - YYYY-MMM-DD (to be released) v3.0.4 - YYYY-MMM-DD (to be released)
------------------------------------- -------------------------------------
- Refactoring on Regex and SMatch classes.
[@WGH-]
- Fixed buffer overflow in Utils::Md5::hexdigest() - Fixed buffer overflow in Utils::Md5::hexdigest()
[Issue #2002 - @defanator] [Issue #2002 - @defanator]
- Implemented merge() method for ConfigInt, ConfigDouble, ConfigString - Implemented merge() method for ConfigInt, ConfigDouble, ConfigString

View File

@ -134,7 +134,7 @@ void InMemoryPerProcess::resolveRegularExpression(const std::string& var,
//std::string name = std::string(var, var.find(":") + 2, //std::string name = std::string(var, var.find(":") + 2,
// var.size() - var.find(":") - 3); // var.size() - var.find(":") - 3);
//size_t keySize = col.size(); //size_t keySize = col.size();
Utils::Regex r = Utils::Regex(var); Utils::Regex r(var);
for (const auto& x : *this) { for (const auto& x : *this) {
//if (x.first.size() <= keySize + 1) { //if (x.first.size() <= keySize + 1) {

View File

@ -259,9 +259,9 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
std::string value; std::string value;
yajl_gen_map_open(g); yajl_gen_map_open(g);
vars.pop_back(); vars.pop_back();
std::string startingAt = vars.back().match; const std::string &startingAt = vars.back().str();
vars.pop_back(); vars.pop_back();
std::string size = vars.back().match; const std::string &size = vars.back().str();
vars.pop_back(); vars.pop_back();
yajl_gen_string(g, yajl_gen_string(g,
reinterpret_cast<const unsigned char*>("startingAt"), reinterpret_cast<const unsigned char*>("startingAt"),
@ -311,11 +311,11 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
strlen("transformation")); strlen("transformation"));
yajl_gen_string(g, yajl_gen_string(g,
reinterpret_cast<const unsigned char*>(trans.back().match.c_str()), reinterpret_cast<const unsigned char*>(trans.back().str().c_str()),
trans.back().match.size()); trans.back().str().size());
t = modsecurity::actions::transformations::Transformation::instantiate( t = modsecurity::actions::transformations::Transformation::instantiate(
trans.back().match.c_str()); trans.back().str().c_str());
varValueRes = t->evaluate(varValue, NULL); varValueRes = t->evaluate(varValue, NULL);
varValue.assign(varValueRes); varValue.assign(varValueRes);
trans.pop_back(); trans.pop_back();
@ -343,9 +343,9 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
strlen("highlight")); strlen("highlight"));
yajl_gen_map_open(g); yajl_gen_map_open(g);
ops.pop_back(); ops.pop_back();
std::string startingAt = ops.back().match; std::string startingAt = ops.back().str();
ops.pop_back(); ops.pop_back();
std::string size = ops.back().match; std::string size = ops.back().str();
ops.pop_back(); ops.pop_back();
yajl_gen_string(g, yajl_gen_string(g,
reinterpret_cast<const unsigned char*>("startingAt"), reinterpret_cast<const unsigned char*>("startingAt"),

View File

@ -38,7 +38,6 @@ bool Rx::init(const std::string &arg, std::string *error) {
bool Rx::evaluate(Transaction *transaction, Rule *rule, bool Rx::evaluate(Transaction *transaction, Rule *rule,
const std::string& input, std::shared_ptr<RuleMessage> ruleMessage) { const std::string& input, std::shared_ptr<RuleMessage> ruleMessage) {
SMatch match;
std::list<SMatch> matches; std::list<SMatch> matches;
Regex *re; Regex *re;
@ -59,16 +58,16 @@ bool Rx::evaluate(Transaction *transaction, Rule *rule,
matches.reverse(); matches.reverse();
for (const SMatch& a : matches) { for (const SMatch& a : matches) {
transaction->m_collections.m_tx_collection->storeOrUpdateFirst( transaction->m_collections.m_tx_collection->storeOrUpdateFirst(
std::to_string(i), a.match); std::to_string(i), a.str());
ms_dbg_a(transaction, 7, "Added regex subexpression TX." + ms_dbg_a(transaction, 7, "Added regex subexpression TX." +
std::to_string(i) + ": " + a.match); std::to_string(i) + ": " + a.str());
transaction->m_matched.push_back(a.match); transaction->m_matched.push_back(a.str());
i++; i++;
} }
} }
for (const auto & i : matches) { for (const auto & i : matches) {
logOffset(ruleMessage, i.m_offset, i.m_length); logOffset(ruleMessage, i.offset(), i.str().size());
} }
if (m_string->m_containsMacro) { if (m_string->m_containsMacro) {

View File

@ -130,14 +130,14 @@ bool VerifyCPF::evaluate(Transaction *t, Rule *rule,
for (i = 0; i < input.size() - 1 && is_cpf == false; i++) { for (i = 0; i < input.size() - 1 && is_cpf == false; i++) {
matches = m_re->searchAll(input.substr(i, input.size())); matches = m_re->searchAll(input.substr(i, input.size()));
for (const auto & i : matches) { for (const auto & i : matches) {
is_cpf = verify(i.match.c_str(), i.match.size()); is_cpf = verify(i.str().c_str(), i.str().size());
if (is_cpf) { if (is_cpf) {
logOffset(ruleMessage, i.m_offset, i.m_length); logOffset(ruleMessage, i.offset(), i.str().size());
if (rule && t && rule->m_containsCaptureAction) { if (rule && t && rule->m_containsCaptureAction) {
t->m_collections.m_tx_collection->storeOrUpdateFirst( t->m_collections.m_tx_collection->storeOrUpdateFirst(
"0", std::string(i.match)); "0", i.str());
ms_dbg_a(t, 7, "Added VerifyCPF match TX.0: " + \ ms_dbg_a(t, 7, "Added VerifyCPF match TX.0: " + \
std::string(i.match)); i.str());
} }
goto out; goto out;

View File

@ -121,14 +121,14 @@ bool VerifySSN::evaluate(Transaction *t, Rule *rule,
for (i = 0; i < input.size() - 1 && is_ssn == false; i++) { for (i = 0; i < input.size() - 1 && is_ssn == false; i++) {
matches = m_re->searchAll(input.substr(i, input.size())); matches = m_re->searchAll(input.substr(i, input.size()));
for (const auto & i : matches) { for (const auto & i : matches) {
is_ssn = verify(i.match.c_str(), i.match.size()); is_ssn = verify(i.str().c_str(), i.str().size());
if (is_ssn) { if (is_ssn) {
logOffset(ruleMessage, i.m_offset, i.m_length); logOffset(ruleMessage, i.offset(), i.str().size());
if (rule && t && rule->m_containsCaptureAction) { if (rule && t && rule->m_containsCaptureAction) {
t->m_collections.m_tx_collection->storeOrUpdateFirst( t->m_collections.m_tx_collection->storeOrUpdateFirst(
"0", std::string(i.match)); "0", i.str());
ms_dbg_a(t, 7, "Added VerifySSN match TX.0: " + \ ms_dbg_a(t, 7, "Added VerifySSN match TX.0: " + \
std::string(i.match)); i.str());
} }
goto out; goto out;

View File

@ -39,15 +39,11 @@ namespace Utils {
Regex::Regex(const std::string& pattern_) Regex::Regex(const std::string& pattern_)
: pattern(pattern_), : pattern(pattern_.empty() ? ".*" : pattern_)
m_ovector {0} { {
const char *errptr = NULL; const char *errptr = NULL;
int erroffset; int erroffset;
if (pattern.empty() == true) {
pattern.assign(".*");
}
m_pc = pcre_compile(pattern.c_str(), PCRE_DOTALL|PCRE_MULTILINE, m_pc = pcre_compile(pattern.c_str(), PCRE_DOTALL|PCRE_MULTILINE,
&errptr, &erroffset, NULL); &errptr, &erroffset, NULL);
@ -71,7 +67,7 @@ Regex::~Regex() {
} }
std::list<SMatch> Regex::searchAll(const std::string& s) { std::list<SMatch> Regex::searchAll(const std::string& s) const {
const char *subject = s.c_str(); const char *subject = s.c_str();
const std::string tmpString = std::string(s.c_str(), s.size()); const std::string tmpString = std::string(s.c_str(), s.size());
int ovector[OVECCOUNT]; int ovector[OVECCOUNT];
@ -83,7 +79,6 @@ std::list<SMatch> Regex::searchAll(const std::string& s) {
s.size(), offset, 0, ovector, OVECCOUNT); s.size(), offset, 0, ovector, OVECCOUNT);
for (i = 0; i < rc; i++) { for (i = 0; i < rc; i++) {
SMatch match;
size_t start = ovector[2*i]; size_t start = ovector[2*i];
size_t end = ovector[2*i+1]; size_t end = ovector[2*i+1];
size_t len = end - start; size_t len = end - start;
@ -91,11 +86,9 @@ std::list<SMatch> Regex::searchAll(const std::string& s) {
rc = 0; rc = 0;
break; break;
} }
match.match = std::string(tmpString, start, len); std::string match = std::string(tmpString, start, len);
match.m_offset = start;
match.m_length = len;
offset = start + len; offset = start + len;
retList.push_front(match); retList.push_front(SMatch(match, start));
if (len == 0) { if (len == 0) {
rc = 0; rc = 0;
@ -107,24 +100,24 @@ std::list<SMatch> Regex::searchAll(const std::string& s) {
return retList; return retList;
} }
int regex_search(const std::string& s, SMatch *match, int Regex::search(const std::string& s, SMatch *match) const {
const Regex& regex) {
int ovector[OVECCOUNT]; int ovector[OVECCOUNT];
int ret = pcre_exec(regex.m_pc, regex.m_pce, s.c_str(), int ret = pcre_exec(m_pc, m_pce, s.c_str(),
s.size(), 0, 0, ovector, OVECCOUNT) > 0; s.size(), 0, 0, ovector, OVECCOUNT) > 0;
if (ret > 0) { if (ret > 0) {
match->match = std::string(s, ovector[ret-1], *match = SMatch(
ovector[ret] - ovector[ret-1]); std::string(s, ovector[ret-1], ovector[ret] - ovector[ret-1]),
match->size_ = ret; 0
);
} }
return ret; return ret;
} }
int regex_search(const std::string& s, const Regex& regex) { int Regex::search(const std::string& s) const {
int ovector[OVECCOUNT]; int ovector[OVECCOUNT];
return pcre_exec(regex.m_pc, regex.m_pce, s.c_str(), return pcre_exec(m_pc, m_pce, s.c_str(),
s.size(), 0, 0, ovector, OVECCOUNT) > 0; s.size(), 0, 0, ovector, OVECCOUNT) > 0;
} }

View File

@ -31,39 +31,48 @@ namespace Utils {
class SMatch { class SMatch {
public: public:
SMatch() : size_(0), SMatch()
m_offset(0), : m_match(), m_offset(0)
m_length(0), {}
match("") { }
size_t size() const { return size_; }
std::string str() const { return match; }
int size_; SMatch(const std::string &match, size_t offset)
int m_offset; : m_match(match), m_offset(offset)
int m_length; {}
std::string match;
const std::string& str() const { return m_match; }
size_t offset() const { return m_offset; }
private:
std::string m_match;
size_t m_offset;
}; };
class Regex { class Regex {
public: public:
explicit Regex(const std::string& pattern_); explicit Regex(const std::string& pattern_);
~Regex(); ~Regex();
std::string pattern;
// m_pc and m_pce can't be easily copied
Regex(const Regex&) = delete;
Regex& operator=(const Regex&) = delete;
std::list<SMatch> searchAll(const std::string& s) const;
int search(const std::string &s, SMatch *m) const;
int search(const std::string &s) const;
const std::string pattern;
private:
pcre *m_pc = NULL; pcre *m_pc = NULL;
pcre_extra *m_pce = NULL; pcre_extra *m_pce = NULL;
int m_ovector[OVECCOUNT];
std::list<SMatch> searchAll(const std::string& s);
}; };
static inline int regex_search(const std::string& s, SMatch *match, const Regex& regex) {
return regex.search(s, match);
}
int regex_search(const std::string& s, SMatch *m, static inline int regex_search(const std::string& s, const Regex& regex) {
const Regex& regex); return regex.search(s);
}
int regex_search(const std::string& s, const Regex& r);
} // namespace Utils } // namespace Utils
} // namespace modsecurity } // namespace modsecurity

View File

@ -202,7 +202,7 @@ void perform_unit_test(ModSecurityTest<RegressionTest> *test,
SMatch match; SMatch match;
std::string s = modsec_rules->getParserError(); std::string s = modsec_rules->getParserError();
if (regex_search(s, &match, re) && match.size() >= 1) { if (regex_search(s, &match, re)) {
if (test->m_automake_output) { if (test->m_automake_output) {
std::cout << ":test-result: PASS " << filename \ std::cout << ":test-result: PASS " << filename \
<< ":" << t->name << std::endl; << ":" << t->name << std::endl;

View File

@ -62,8 +62,7 @@ void json2bin(std::string *str) {
modsecurity::Utils::Regex re2("\\\\u([a-z0-9A-Z]{4})"); modsecurity::Utils::Regex re2("\\\\u([a-z0-9A-Z]{4})");
modsecurity::Utils::SMatch match; modsecurity::Utils::SMatch match;
while (modsecurity::Utils::regex_search(*str, &match, re) while (modsecurity::Utils::regex_search(*str, &match, re)) {
&& match.size() > 0) {
unsigned int p; unsigned int p;
std::string toBeReplaced = match.str(); std::string toBeReplaced = match.str();
toBeReplaced.erase(0, 2); toBeReplaced.erase(0, 2);
@ -71,8 +70,7 @@ void json2bin(std::string *str) {
replaceAll(str, match.str(), p); replaceAll(str, match.str(), p);
} }
while (modsecurity::Utils::regex_search(*str, &match, re2) while (modsecurity::Utils::regex_search(*str, &match, re2)) {
&& match.size() > 0) {
unsigned int p; unsigned int p;
std::string toBeReplaced = match.str(); std::string toBeReplaced = match.str();
toBeReplaced.erase(0, 2); toBeReplaced.erase(0, 2);