Refactor regex code

This commit fixes quite a few odd things in regex code:
 * Lack of encapsulation.
 * Non-method functions for matching without retrieving all groups.
 * Regex class being copyable without proper copy-constructor (potential UAF
   and double free due to pointer members m_pc and m_pce).
 * Redundant SMatch::m_length, which always equals to match.size() anyway.
 * Weird SMatch::size_ member which is initialized only by one of the three matching
   functions, and equals to the return value of that function anyways.
 * Several places in code having std::string value instead of reference.
This commit is contained in:
WGH 2019-01-17 01:55:17 +03:00 committed by Felipe Zimmerle
parent e0a0fa05cc
commit ad28de4f14
10 changed files with 68 additions and 67 deletions

View File

@ -1,6 +1,8 @@
v3.0.4 - YYYY-MMM-DD (to be released)
-------------------------------------
- Refactoring on Regex and SMatch classes.
[@WGH-]
- Fixed buffer overflow in Utils::Md5::hexdigest()
[Issue #2002 - @defanator]
- Implemented merge() method for ConfigInt, ConfigDouble, ConfigString

View File

@ -134,7 +134,7 @@ void InMemoryPerProcess::resolveRegularExpression(const std::string& var,
//std::string name = std::string(var, var.find(":") + 2,
// var.size() - var.find(":") - 3);
//size_t keySize = col.size();
Utils::Regex r = Utils::Regex(var);
Utils::Regex r(var);
for (const auto& x : *this) {
//if (x.first.size() <= keySize + 1) {

View File

@ -259,9 +259,9 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
std::string value;
yajl_gen_map_open(g);
vars.pop_back();
std::string startingAt = vars.back().match;
const std::string &startingAt = vars.back().str();
vars.pop_back();
std::string size = vars.back().match;
const std::string &size = vars.back().str();
vars.pop_back();
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>("startingAt"),
@ -311,11 +311,11 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
strlen("transformation"));
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>(trans.back().match.c_str()),
trans.back().match.size());
reinterpret_cast<const unsigned char*>(trans.back().str().c_str()),
trans.back().str().size());
t = modsecurity::actions::transformations::Transformation::instantiate(
trans.back().match.c_str());
trans.back().str().c_str());
varValueRes = t->evaluate(varValue, NULL);
varValue.assign(varValueRes);
trans.pop_back();
@ -343,9 +343,9 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
strlen("highlight"));
yajl_gen_map_open(g);
ops.pop_back();
std::string startingAt = ops.back().match;
std::string startingAt = ops.back().str();
ops.pop_back();
std::string size = ops.back().match;
std::string size = ops.back().str();
ops.pop_back();
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>("startingAt"),

View File

@ -38,7 +38,6 @@ bool Rx::init(const std::string &arg, std::string *error) {
bool Rx::evaluate(Transaction *transaction, Rule *rule,
const std::string& input, std::shared_ptr<RuleMessage> ruleMessage) {
SMatch match;
std::list<SMatch> matches;
Regex *re;
@ -59,16 +58,16 @@ bool Rx::evaluate(Transaction *transaction, Rule *rule,
matches.reverse();
for (const SMatch& a : matches) {
transaction->m_collections.m_tx_collection->storeOrUpdateFirst(
std::to_string(i), a.match);
std::to_string(i), a.str());
ms_dbg_a(transaction, 7, "Added regex subexpression TX." +
std::to_string(i) + ": " + a.match);
transaction->m_matched.push_back(a.match);
std::to_string(i) + ": " + a.str());
transaction->m_matched.push_back(a.str());
i++;
}
}
for (const auto & i : matches) {
logOffset(ruleMessage, i.m_offset, i.m_length);
logOffset(ruleMessage, i.offset(), i.str().size());
}
if (m_string->m_containsMacro) {

View File

@ -130,14 +130,14 @@ bool VerifyCPF::evaluate(Transaction *t, Rule *rule,
for (i = 0; i < input.size() - 1 && is_cpf == false; i++) {
matches = m_re->searchAll(input.substr(i, input.size()));
for (const auto & i : matches) {
is_cpf = verify(i.match.c_str(), i.match.size());
is_cpf = verify(i.str().c_str(), i.str().size());
if (is_cpf) {
logOffset(ruleMessage, i.m_offset, i.m_length);
logOffset(ruleMessage, i.offset(), i.str().size());
if (rule && t && rule->m_containsCaptureAction) {
t->m_collections.m_tx_collection->storeOrUpdateFirst(
"0", std::string(i.match));
"0", i.str());
ms_dbg_a(t, 7, "Added VerifyCPF match TX.0: " + \
std::string(i.match));
i.str());
}
goto out;

View File

@ -121,14 +121,14 @@ bool VerifySSN::evaluate(Transaction *t, Rule *rule,
for (i = 0; i < input.size() - 1 && is_ssn == false; i++) {
matches = m_re->searchAll(input.substr(i, input.size()));
for (const auto & i : matches) {
is_ssn = verify(i.match.c_str(), i.match.size());
is_ssn = verify(i.str().c_str(), i.str().size());
if (is_ssn) {
logOffset(ruleMessage, i.m_offset, i.m_length);
logOffset(ruleMessage, i.offset(), i.str().size());
if (rule && t && rule->m_containsCaptureAction) {
t->m_collections.m_tx_collection->storeOrUpdateFirst(
"0", std::string(i.match));
"0", i.str());
ms_dbg_a(t, 7, "Added VerifySSN match TX.0: " + \
std::string(i.match));
i.str());
}
goto out;

View File

@ -39,15 +39,11 @@ namespace Utils {
Regex::Regex(const std::string& pattern_)
: pattern(pattern_),
m_ovector {0} {
: pattern(pattern_.empty() ? ".*" : pattern_)
{
const char *errptr = NULL;
int erroffset;
if (pattern.empty() == true) {
pattern.assign(".*");
}
m_pc = pcre_compile(pattern.c_str(), PCRE_DOTALL|PCRE_MULTILINE,
&errptr, &erroffset, NULL);
@ -71,7 +67,7 @@ Regex::~Regex() {
}
std::list<SMatch> Regex::searchAll(const std::string& s) {
std::list<SMatch> Regex::searchAll(const std::string& s) const {
const char *subject = s.c_str();
const std::string tmpString = std::string(s.c_str(), s.size());
int ovector[OVECCOUNT];
@ -83,7 +79,6 @@ std::list<SMatch> Regex::searchAll(const std::string& s) {
s.size(), offset, 0, ovector, OVECCOUNT);
for (i = 0; i < rc; i++) {
SMatch match;
size_t start = ovector[2*i];
size_t end = ovector[2*i+1];
size_t len = end - start;
@ -91,11 +86,9 @@ std::list<SMatch> Regex::searchAll(const std::string& s) {
rc = 0;
break;
}
match.match = std::string(tmpString, start, len);
match.m_offset = start;
match.m_length = len;
std::string match = std::string(tmpString, start, len);
offset = start + len;
retList.push_front(match);
retList.push_front(SMatch(match, start));
if (len == 0) {
rc = 0;
@ -107,24 +100,24 @@ std::list<SMatch> Regex::searchAll(const std::string& s) {
return retList;
}
int regex_search(const std::string& s, SMatch *match,
const Regex& regex) {
int Regex::search(const std::string& s, SMatch *match) const {
int ovector[OVECCOUNT];
int ret = pcre_exec(regex.m_pc, regex.m_pce, s.c_str(),
int ret = pcre_exec(m_pc, m_pce, s.c_str(),
s.size(), 0, 0, ovector, OVECCOUNT) > 0;
if (ret > 0) {
match->match = std::string(s, ovector[ret-1],
ovector[ret] - ovector[ret-1]);
match->size_ = ret;
*match = SMatch(
std::string(s, ovector[ret-1], ovector[ret] - ovector[ret-1]),
0
);
}
return ret;
}
int regex_search(const std::string& s, const Regex& regex) {
int Regex::search(const std::string& s) const {
int ovector[OVECCOUNT];
return pcre_exec(regex.m_pc, regex.m_pce, s.c_str(),
return pcre_exec(m_pc, m_pce, s.c_str(),
s.size(), 0, 0, ovector, OVECCOUNT) > 0;
}

View File

@ -31,39 +31,48 @@ namespace Utils {
class SMatch {
public:
SMatch() : size_(0),
m_offset(0),
m_length(0),
match("") { }
size_t size() const { return size_; }
std::string str() const { return match; }
SMatch()
: m_match(), m_offset(0)
{}
int size_;
int m_offset;
int m_length;
std::string match;
SMatch(const std::string &match, size_t offset)
: m_match(match), m_offset(offset)
{}
const std::string& str() const { return m_match; }
size_t offset() const { return m_offset; }
private:
std::string m_match;
size_t m_offset;
};
class Regex {
public:
explicit Regex(const std::string& pattern_);
~Regex();
std::string pattern;
// m_pc and m_pce can't be easily copied
Regex(const Regex&) = delete;
Regex& operator=(const Regex&) = delete;
std::list<SMatch> searchAll(const std::string& s) const;
int search(const std::string &s, SMatch *m) const;
int search(const std::string &s) const;
const std::string pattern;
private:
pcre *m_pc = NULL;
pcre_extra *m_pce = NULL;
int m_ovector[OVECCOUNT];
std::list<SMatch> searchAll(const std::string& s);
};
static inline int regex_search(const std::string& s, SMatch *match, const Regex& regex) {
return regex.search(s, match);
}
int regex_search(const std::string& s, SMatch *m,
const Regex& regex);
int regex_search(const std::string& s, const Regex& r);
static inline int regex_search(const std::string& s, const Regex& regex) {
return regex.search(s);
}
} // namespace Utils
} // namespace modsecurity

View File

@ -202,7 +202,7 @@ void perform_unit_test(ModSecurityTest<RegressionTest> *test,
SMatch match;
std::string s = modsec_rules->getParserError();
if (regex_search(s, &match, re) && match.size() >= 1) {
if (regex_search(s, &match, re)) {
if (test->m_automake_output) {
std::cout << ":test-result: PASS " << filename \
<< ":" << t->name << std::endl;

View File

@ -62,8 +62,7 @@ void json2bin(std::string *str) {
modsecurity::Utils::Regex re2("\\\\u([a-z0-9A-Z]{4})");
modsecurity::Utils::SMatch match;
while (modsecurity::Utils::regex_search(*str, &match, re)
&& match.size() > 0) {
while (modsecurity::Utils::regex_search(*str, &match, re)) {
unsigned int p;
std::string toBeReplaced = match.str();
toBeReplaced.erase(0, 2);
@ -71,8 +70,7 @@ void json2bin(std::string *str) {
replaceAll(str, match.str(), p);
}
while (modsecurity::Utils::regex_search(*str, &match, re2)
&& match.size() > 0) {
while (modsecurity::Utils::regex_search(*str, &match, re2)) {
unsigned int p;
std::string toBeReplaced = match.str();
toBeReplaced.erase(0, 2);