2023-02-15 19:09:38 +00:00

2075 lines
87 KiB
C++
Executable File

// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// #define WAF2_LOGGING_ENABLE (does performance impact)
#include "WaapAssetState.h"
#include "Waf2Regex.h"
#include "debug.h"
#include "Waf2Util.h"
#include "maybe_res.h"
#include "picojson.h"
#include "agent_core_utilities.h"
#include <algorithm>
#include <fstream>
#include <boost/regex.hpp>
#define MAX_CACHE_VALUE_SIZE 1024
USE_DEBUG_FLAG(D_WAAP_ASSET_STATE);
USE_DEBUG_FLAG(D_WAAP_SAMPLE_PREPROCESS);
USE_DEBUG_FLAG(D_WAAP_SAMPLE_SCAN);
USE_DEBUG_FLAG(D_WAAP_EVASIONS);
typedef picojson::value::object JsObj;
typedef picojson::value JsVal;
typedef picojson::value::array JsArr;
typedef std::map<std::string, std::vector<std::string>> filtered_parameters_t;
#ifdef WAF2_LOGGING_ENABLE
static void
print_filtered(std::string title, const std::set<std::string>& ignored_set, const std::vector<std::string>& v) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "--------------------------";
#if 0 // TODO:: may be useful for debug, but in general no need to print this on every scanned value...
dbgTrace(D_WAAP_SAMPLE_SCAN) << "Ignored " << title << " set:";
for (std::set<std::string>::const_iterator it = ignored_set.begin(); it != ignored_set.end(); ++it) {
const std::string& word = *it;
dbgTrace(D_WAAP_SAMPLE_SCAN) << "*'" << word << "'";
}
#endif
dbgTrace(D_WAAP_SAMPLE_SCAN) << title << " collected:";
for (std::vector<std::string>::const_iterator it = v.begin(); it != v.end(); ++it) {
const std::string& word = *it;
if (ignored_set.find(word) == ignored_set.end()) {
// not in ignored_set
dbgTrace(D_WAAP_SAMPLE_SCAN) << "+'" << word << "'";
}
else {
// in ignored set
dbgTrace(D_WAAP_SAMPLE_SCAN) << "-'" << word << "'";
}
}
dbgTrace(D_WAAP_SAMPLE_SCAN) << "--------------------------";
}
static void print_found_patterns(const Waap::Util::map_of_stringlists_t& m) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "-- found_patterns: ---------";
for (auto g = m.begin(); g != m.end(); ++g) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "'" << g->first << "'";
for (auto p = g->second.begin(); p != g->second.end(); ++p) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << " `-> '" << (*p) << "'";
}
}
dbgTrace(D_WAAP_SAMPLE_SCAN) << "--------------------------";
}
#endif
static bool err_hex = false;
static const std::string path_traversal_chars_regex = "[\\w.%?*\\/\\\\]";
static const std::string evasion_hex_regex_unallowed_prefix_helper =
"(?:(?<!(?<!0x|%u)[0-9a-f][0-9a-f])|(?<!(?<!%)[0-9a-f][0-9a-f]))";
static const std::string evasion_hex_regex_helper = "(0x[0-9a-f][0-9a-f])";
static const SingleRegex evasion_hex_regex(
evasion_hex_regex_unallowed_prefix_helper + evasion_hex_regex_helper + path_traversal_chars_regex +
"|" + path_traversal_chars_regex + evasion_hex_regex_unallowed_prefix_helper + evasion_hex_regex_helper,
err_hex,
"evasion_hex_regex");
static const std::string bad_hex_regex_helper = "(%[cC]1%(([19][cC])|([pP][cC])|(8[sS])))";
static const boost::regex bad_hex_regex(bad_hex_regex_helper);
static const SingleRegex evasion_bad_hex_regex(
bad_hex_regex_helper + path_traversal_chars_regex +
"|" + path_traversal_chars_regex + bad_hex_regex_helper,
err_hex,
"evasion_bad_hex_regex");
static const std::string utf_evasion_for_dot_helper =
"(%[cC]0%[562aAfFeE][eE])";
static const SingleRegex utf_evasion_for_dot(
utf_evasion_for_dot_helper + path_traversal_chars_regex +
"|" + path_traversal_chars_regex + utf_evasion_for_dot_helper,
err_hex,
"utf_evasion_for_dot");
static const boost::regex utf_evasion_for_dot_regex(utf_evasion_for_dot_helper);
static const std::string sqli_comma_evasion_regex_helper = "\"\\s*,\\s*\"";
static const boost::regex sqli_comma_evasion_regex(sqli_comma_evasion_regex_helper);
WaapAssetState::WaapAssetState(const std::shared_ptr<WaapAssetState>& pWaapAssetState,
const std::string& waapDataFileName,
const std::string& id) :
WaapAssetState(pWaapAssetState->m_Signatures,
waapDataFileName,
pWaapAssetState->m_cleanValuesCache.capacity(),
pWaapAssetState->m_suspiciousValuesCache.capacity(),
pWaapAssetState->m_sampleTypeCache.capacity(),
id)
{
scoreBuilder.mergeScores(pWaapAssetState->scoreBuilder);
updateScores();
m_typeValidator = pWaapAssetState->m_typeValidator;
registerConfigLoadCb(
[this]()
{
clearRateLimitingState();
clearSecurityHeadersState();
clearErrorLimitingState();
}
);
}
WaapAssetState::WaapAssetState(std::shared_ptr<Signatures> signatures,
const std::string& waapDataFileName,
size_t cleanValuesCacheCapacity,
size_t suspiciousValuesCacheCapacity,
size_t sampleTypeCacheCapacity,
const std::string& assetId) :
m_Signatures(signatures),
m_waapDataFileName(waapDataFileName),
m_assetId(assetId),
scoreBuilder(this),
m_rateLimitingState(nullptr),
m_errorLimitingState(nullptr),
m_securityHeadersState(nullptr),
m_filtersMngr(nullptr),
m_typeValidator(getWaapDataDir() + "/waap.data"),
m_cleanValuesCache(cleanValuesCacheCapacity),
m_suspiciousValuesCache(suspiciousValuesCacheCapacity),
m_sampleTypeCache(sampleTypeCacheCapacity)
{
if (assetId != "" && Singleton::exists<I_AgentDetails>())
{
I_AgentDetails* agentDetails = Singleton::Consume<I_AgentDetails>::by<WaapComponent>();
std::string path = agentDetails->getTenantId() + "/" + assetId;
m_filtersMngr = std::make_shared<IndicatorsFiltersManager>(path, assetId, this);
}
else
{
m_filtersMngr = std::make_shared<IndicatorsFiltersManager>("", "", this);
}
// Load keyword scores - copy from ScoreBuilder
updateScores();
}
WaapAssetState::~WaapAssetState() {
// TODO:: leaving this uncommented may introduce (not critical) memory leak.
// Should return this code after testing it well.
#if 0
// clean up the headers_re map to avoid memory leak
for (auto it = m_Signatures->headers_re.begin(); it != m_Signatures->headers_re.end(); ++it) {
delete it->second; // delete allocated Regex instances
}
#endif
}
std::shared_ptr<Signatures> WaapAssetState::getSignatures() const
{
return m_Signatures;
}
void WaapAssetState::reset()
{
m_filtersMngr->reset();
}
void filterUnicode(std::string & text) {
std::string::iterator it = text.begin();
std::string::iterator result = it;
uint32_t acc = 0;
int bytes_left = 0;
for (; it != text.end(); ++it) {
unsigned char ch = (unsigned char)(*it);
// If character high bits are 10xxxxxx, then it might be UTF-8 character used to evade.
// For example 0xc0, 0xaf may mean '/' in broken utf-8 decoders
// In our implementation we do remove leading byte in UTF8 encoding (such as 0xc0),
// but strip down the following bytes (with high bits 01).
if (ch <= 127) {
*result++ = ch;
bytes_left = 0; // any character <= 127 stops collecting UTF8 code
}
else {
if (bytes_left == 0) {
// collect utf8 code
if ((ch & 0xE0) == 0xC0) { // 110X XXXX two bytes follow
if ((ch & 0x1E) != 0) {
acc = ch & 31;
}
bytes_left = 1;
}
else if ((ch & 0xF0) == 0xE0) { // 1110 XXXX three bytes follow
acc = ch & 15;
bytes_left = 2;
}
else if ((ch & 0xF8) == 0xF0) { // 1111 0XXX four bytes follow
acc = ch & 7;
bytes_left = 3;
}
else if ((ch & 0xFC) == 0xF8) { // 1111 10XX five bytes follow (by standard -an error)
acc = ch & 3;
bytes_left = 4;
}
else if ((ch & 0xFE) == 0xFC) { // 1111 110X six bytes follow (by standard -an error)
acc = ch & 1;
bytes_left = 5;
}
else {
// error
bytes_left = 0;
}
}
else if (bytes_left > 0) {
// "good" encoder would check that the following bytes contain "10" as their high bits,
// but buggy encoders don't, so are we!
acc = (acc << 6) | (ch & 0x3F);
bytes_left--;
if (bytes_left == 0) {
// finished collecting the utf8 code
if (acc <= 127) {
*result++ = acc;
}
else if (isSpecialUnicode(acc)) {
*result++ = convertSpecialUnicode(acc);
}
acc = 0;
}
}
}
}
text.erase(result, text.end());
}
#if 0
//std::replace_if(text.begin(), text.end(), [](char c) { return !(c>=0); }, ' ');
inline void replaceUnicode(std::string & text, const char repl) {
std::string::iterator it = text.begin();
for (; it != text.end(); ++it) {
if (*it < 0) {
*it = repl;
}
}
}
#endif
// Python equivalent: text = re.sub(r'[^\x00-\x7F]+',' ', text)
void replaceUnicodeSequence(std::string & text, const char repl) {
std::string::iterator it = text.begin();
std::string::iterator result = it;
uint32_t acc = 0;
int bytes_left = 0;
for (; it != text.end(); ++it) {
unsigned char ch = (unsigned char)(*it);
// If character high bits are 10xxxxxx, then it might be UTF-8 character used to evade.
// For example 0xc0, 0xaf may mean '/' in broken utf-8 decoders
// In our implementation we do remove leading byte in UTF8 encoding (such as 0xc0),
// but strip down the following bytes (with high bits 01).
if (ch <= 127) {
*result++ = ch;
bytes_left = 0; // any character <= 127 stops collecting UTF8 code
}
else {
if (bytes_left == 0) {
// collect utf8 code
if ((ch & 0xE0) == 0xC0) { // 110X XXXX two bytes follow
if ((ch & 0x1E) != 0) {
acc = ch & 31;
}
bytes_left = 1;
}
else if ((ch & 0xF0) == 0xE0) { // 1110 XXXX three bytes follow
acc = ch & 15;
bytes_left = 2;
}
else if ((ch & 0xF8) == 0xF0) { // 1111 0XXX four bytes follow
acc = ch & 7;
bytes_left = 3;
}
else if ((ch & 0xFC) == 0xF8) { // 1111 10XX five bytes follow (by standard -an error)
acc = ch & 3;
bytes_left = 4;
}
else if ((ch & 0xFE) == 0xFC) { // 1111 110X six bytes follow (by standard -an error)
acc = ch & 1;
bytes_left = 5;
}
else {
// error
bytes_left = 0;
}
}
else if (bytes_left > 0) {
// "good" encoder would check that the following bytes contain "10" as their high bits,
// but buggy encoders don't, so are we!
acc = (acc << 6) | (ch & 0x3F);
bytes_left--;
if (bytes_left == 0) {
// finished collecting the utf8 code
if (acc <= 127) {
*result++ = acc;
}
else if (isSpecialUnicode(acc)) {
*result++ = convertSpecialUnicode(acc);
}
else {
*result++ = repl;
}
acc = 0;
}
}
}
}
text.erase(result, text.end());
}
void
fixBreakingSpace(std::string &line)
{
for (char &c : line) {
if (c == (char)0xA0) { // "non-breaking space"
c = ' '; // convert to normal space
}
}
}
std::string unescape(const std::string & s) {
std::string text = s;
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (0) '" << text << "'";
fixBreakingSpace(text);
// 1. remove all unicode characters from string. Basically,
// remove all characters whose ASCII code is >=128.
// Python equivalent: text.encode('ascii',errors='ignore')
filterUnicode(text);
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (1) '" << text << "'";
text = filterUTF7(text);
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (1) (after filterUTF7) '" << text << "'";
// 2. Replace %xx sequences by their single-character equivalents.
// Also replaces '+' symbol by space character.
// Python equivalent: text = urllib.unquote_plus(text)
text.erase(unquote_plus(text.begin(), text.end()), text.end());
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (2) '" << text << "'";
fixBreakingSpace(text);
// 3. remove all unicode characters from string. Basically,
// remove all characters whose ASCII code is >=128.
// Python equivalent: text.encode('ascii',errors='ignore')
filterUnicode(text);
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (3) '" << text << "'";
// 4. oh shi?... should I handle unicode html entities (python's htmlentitydefs module)???
// Python equivalent: text = HTMLParser.HTMLParser().unescape(text)
text.erase(escape_html(text.begin(), text.end()), text.end());
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (4) '" << text << "'";
// 5. Apply backslash escaping (like in C)
// Python equivalent: text = text.decode('string_escape')
text.erase(escape_backslashes(text.begin(), text.end()), text.end());
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (5) '" << text << "'";
// 6. remove all unicode characters from string. Basically,
// remove all characters whose ASCII code is >=128.
// Python equivalent: text.encode('ascii',errors='ignore')
filterUnicode(text);
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (6) '" << text << "'";
// 7. Replace %xx sequences by their single-character equivalents.
// Also replaces '+' symbol by space character.
// Python equivalent: text = urllib.unquote_plus(text)
text.erase(unquote_plus(text.begin(), text.end()), text.end());
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (7) '" << text << "'";
unescapeUnicode(text);
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "after unescapeUnicode '" << text << "'";
// 8. remove all unicode characters from string. Basically,
// remove all characters whose ASCII code is >=128.
// Python equivalent: text.encode('ascii',errors='ignore')
filterUnicode(text);
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (8) '" << text << "'";
// 9. ???
//
//try:
// text = text.decode('utf-8')
//except:
// pass
// 10. Replace each sequence of unicode characters with single space
// Python equivalent: text = re.sub(r'[^\x00-\x7F]+',' ', text)
// TODO:: actually, in python Pavel do this:
// text = re.sub(r'[^\x00-\x7F]+',' ', text).encode("ascii","ignore")
replaceUnicodeSequence(text, ' ');
#if 0 // Removed Aug 25 2018. Reason for removal - breaks input containing ASCII zeros.
// 11. remove all unicode characters from string.
// Basically, remove all characters whose ASCII code is >=128.
// Python equivalent: text.encode('ascii',errors='ignore')
filterUnicode(text);
#endif
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (11) '" << text << "'";
// 12. finally, apply tolower() to all characters of a string
// std::for_each(text.begin(), text.end(), [](char &c) { c = tolower(c); });
for (std::string::iterator pC = text.begin(); pC != text.end(); ++pC) {
*pC = tolower(*pC);
}
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (12) '" << text << "'";
return text;
}
inline std::string repr_uniq(const std::string & value) {
std::string result;
char hist[256];
memset(&hist, 0, sizeof(hist));
for (std::string::const_iterator pC = value.begin(); pC != value.end(); ++pC) {
unsigned char ch = (unsigned char)(*pC);
// Only take ASCII characters that are not alphanumeric, and each character only once
if (ch <= 127 && !isalnum(ch) && hist[ch] == 0) {
// Convert low ASCII characters to their C/C++ printable equivalent
// (used for easier viewing. Also, binary data causes issues with ElasticSearch)
switch (ch) {
case 0x07: result += "\\a"; break;
case 0x08: result += "\\b"; break;
case 0x09: result += "\\t"; break;
case 0x0A: result += "\\n"; break;
case 0x0B: result += "\\v"; break;
case 0x0C: result += "\\f"; break;
case 0x0D: result += "\\r"; break;
case 0x5C: result += "\\\\"; break;
case 0x27: result += "\\\'"; break;
case 0x22: result += "\\\""; break;
case 0x3F: result += "\\\?"; break;
default: {
if (ch >= 32) {
result += (char)ch;
}
else {
char buf[16];
sprintf(buf, "\\" "x%02X", ch);
result += buf;
}
}
}
hist[ch] = 1;
}
}
return result;
}
static bool isShortWord(const std::string &word)
{
return word.size() <= 2;
}
static bool isShortHtmlTag(const std::string &word)
{
return !word.empty() && word.size() <= 3 && word[0] == '<';
}
void
WaapAssetState::checkRegex(
const SampleValue &sample,
const Regex & pattern,
std::vector<std::string>& keyword_matches,
Waap::Util::map_of_stringlists_t & found_patterns,
bool longTextFound,
bool binaryDataFound) const
{
dbgFlow(D_WAAP_SAMPLE_SCAN) << "checkRegex: line='" << sample.getSampleString() << "' patt='" <<
pattern.getName() << "' longTextFound=" << longTextFound << " binaryDataFound=" << binaryDataFound;
std::vector<RegexMatch> matches;
sample.findMatches(pattern, matches);
for (std::vector<RegexMatch>::const_iterator pMatch = matches.begin(); pMatch != matches.end(); ++pMatch) {
const RegexMatch& match = *pMatch;
// Get whole match (group[0], which is always present in any match)
std::string word = match.groups.front().value;
dbgTrace(D_WAAP_SAMPLE_SCAN) << "checkRegex: match='" << word << "':";
// Short words matched by regexes wont be detected in some cases like
// if enough binary data is present in the value.
if (binaryDataFound && word.size() <= 2) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "Will not add a short keyword '" << word <<
"' because binaryData was found";
continue;
}
for (std::vector<RegexMatch::MatchGroup>::const_iterator pGroup = match.groups.begin() + 1;
pGroup != match.groups.end();
++pGroup) {
std::string group = pGroup->name;
if (group == "") {
continue; // skip unnamed group
}
const std::string& value = pGroup->value;
dbgTrace(D_WAAP_SAMPLE_SCAN) << "checkRegex: group name='" << group <<
"' value='" << value << "', word='" << word << "':";
// python:
// if 'fast_reg' in group:
// if 'evasion' in group:
// word = repr(str(''.join(set(value))))
// else:
// word =group
if (group.find("fast_reg") != std::string::npos) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "checkRegex: found '*fast_reg*' in group name";
if (group.find("evasion") != std::string::npos) {
dbgTrace(D_WAAP_SAMPLE_SCAN) <<
"checkRegex: found both 'fast_reg' and 'evasion' in group name.";
word = "encoded_" + repr_uniq(value);
// Normally, the word added to the keyword_matches list contain the character sequence.
// However, sometimes (for example in case the sequence contained only unicode characters),
// after running repr_uniq() the word will remain empty string. In this case leave
// something meaningful/readable there.
if (word == "encoded_") {
dbgTrace(D_WAAP_SAMPLE_SCAN) <<
"checkRegex: empty word after repr_uniq: resetting word to 'character_encoding'"
" and group to 'evasion'.";
word = "character_encoding";
}
else if (Waap::Util::str_isalnum(word)) {
dbgTrace(D_WAAP_SAMPLE_SCAN) <<
"checkRegex: isalnum word after repr_uniq: resetting group to 'evasion'.";
// If the found match is alphanumeric (we've seen strings like "640x480" match)
// we still should assume evasion but it doesn't need to include "fast_reg",
// which would cause unconditional report to stage2 and hit performance...
// This is why we remove the word "fast_reg" from the group name.
group = "evasion";
}
if (longTextFound) {
dbgTrace(D_WAAP_SAMPLE_SCAN) <<
"checkRegex: longTextFound so resetting group name to 'longtext'";
group = "longtext";
}
}
else {
word = group;
}
}
// In sequences detected as "longTextFound" or "longBinaryFound", do not add words in the
// "keyword_matches" list that:
// - starts with "encoded_"
// - or startswith("\")
// - or equal to "character_encoding"
if ((longTextFound || binaryDataFound) &&
(word == "character_encoding" || word.substr(0, 1) == "\\" || word.substr(0, 8) == "encoded_")) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "Not adding keyword '" << word << "' because longtext was found";
}
else if (binaryDataFound && (isShortWord(word) || isShortHtmlTag(word) ||
NGEN::Regex::regexMatch(__FILE__, __LINE__, group, m_Signatures->binary_data_kw_filter))) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "Not adding group='" << group << "', word='" << word <<
"' - due to binary data";
continue;
}
else if ((std::find(
keyword_matches.begin(),
keyword_matches.end(),
word) == keyword_matches.end())) {
// python: if (word not in current_matches): current_matches.append(word)
keyword_matches.push_back(word);
}
// python:
// if group not in found_patterns:
// found_patterns[group]=[]
if (found_patterns.find(group) == found_patterns.end()) {
found_patterns[group] = std::vector<std::string>();
}
// python:
// if value not in found_patterns[group]:
// found_patterns[group].append(value)
if (std::find(
found_patterns[group].begin(),
found_patterns[group].end(),
value
) == found_patterns[group].end()) {
found_patterns[group].push_back(value);
}
}
}
}
// TODO:: implement onload mechanism.
static bool isOnLoad = 0;
static void calcRepeatAndWordsCount(const std::string &line, unsigned int &repeat, unsigned int &wordsCount)
{
repeat = 0;
wordsCount = 0;
int prev = -1;
int prevPrev = -1;
for (std::string::const_iterator pC = line.begin(); pC != line.end(); ++pC) {
if (*pC == prev || *pC == prevPrev) {
repeat++;
}
if (Waap::Util::isAlphaAsciiFast(*pC) && !Waap::Util::isAlphaAsciiFast(prev)) {
wordsCount++;
}
prevPrev = prev;
prev = *pC;
}
}
static void calcRepetitionAndProbing(Waf2ScanResult &res, const std::set<std::string> *ignored_keywords,
const std::string &line, bool &detectedRepetition, bool &detectedProbing, unsigned int &wordsCount)
{
unsigned int repeat;
calcRepeatAndWordsCount(line, repeat, wordsCount);
if (!detectedRepetition && repeat>100) { // detect potential buffer overflow attacks
dbgTrace(D_WAAP_SAMPLE_SCAN) << "repetition detected: repeat=" << repeat;
detectedRepetition = true;
res.keyword_matches.push_back("repetition");
}
// python:
// keywords_num = sum(1 for x in keyword_matches if x not in ignored_keywords)
size_t keywords_num = countNotInSet(res.keyword_matches, *ignored_keywords);
dbgTrace(D_WAAP_SAMPLE_SCAN) << "wordsCount: " << wordsCount << ", repeat=" << repeat
<< ", keyword_matches(num=" << keywords_num << ", size=" << res.keyword_matches.size() << ")";
if (!detectedProbing //res.keyword_matches.size()
&& keywords_num + 2 > wordsCount
// res.keyword_matches.size()
&& keywords_num != 0)
{
dbgTrace(D_WAAP_SAMPLE_SCAN) << "probing detected: keywords_num=" << keywords_num <<
", wordsCount=" << wordsCount;
detectedProbing = true;
res.keyword_matches.push_back("probing");
}
}
void
WaapAssetState::filterKeywordsDueToLongText(Waf2ScanResult &res) const
{
// Test for long value without spaces (these can often cause false alarms)
if (m_Signatures->nospaces_long_value_re.hasMatch(res.unescaped_line)) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "nospaces_long_value matched. may remove some keywords below...";
// remove some keywords that are often present in such long lines
std::vector<std::string> &v = res.keyword_matches;
for (std::vector<std::string>::iterator it = v.begin(); it != v.end();) {
std::string &word = *it;
if (m_Signatures->ignored_for_nospace_long_value.find(word) !=
m_Signatures->ignored_for_nospace_long_value.end()) {
dbgTrace(D_WAAP_SAMPLE_SCAN)
<< "Removing keyword '"
<< word
<< "' because nospaces_long_value was found";
it = v.erase(it);
}
else {
++it;
}
}
}
#ifdef WAF2_LOGGING_ENABLE
// Dump interesting statistics and scores
print_filtered("keywords", *ignored_keywords, res.keyword_matches);
print_found_patterns(res.found_patterns);
dbgTrace(D_WAAP_SAMPLE_SCAN) << "keyword_matches.size()=" << res.keyword_matches.size();
#endif
}
bool
checkBinaryData(const std::string &line, bool binaryDataFound)
{
// Test whether count of non-printable characters in the parameter value is too high.
// Note that high-ASCII characters (>=128) are assumed "printable".
// All non-ASCII UTF-8 characters fall into this cathegory.
if (!binaryDataFound && line.size() > 25) {
size_t nonPrintableCharsCount = 0;
for (size_t i=0; i<line.size(); ++i) {
unsigned char ch = (unsigned char)(line[i]);
if (!isprint(ch)) {
nonPrintableCharsCount++;
}
}
dbgTrace(D_WAAP_SAMPLE_SCAN) << "checkBinaryData('" << line << "'): non-printable=" <<
nonPrintableCharsCount << ", len=" << line.size();
// note: the threshold here is the same as used in base64 decoding (in function b64DecodeChunk)
if (nonPrintableCharsCount * 3 >= line.size()) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "checkBinaryData('" << line << "'): detected BINARY DATA";
binaryDataFound = true;
}
}
return binaryDataFound;
}
bool
WaapAssetState::apply(
const std::string &line,
Waf2ScanResult &res,
const std::string &scanStage,
bool isBinaryData,
const Maybe<std::string> splitType) const
{
dbgTrace(D_WAAP_SAMPLE_SCAN)
<< "WaapAssetState::apply('"
<< line
<< "', scanStage="
<< scanStage
<< ", splitType='"
<< (splitType.ok() ? *splitType: "")
<< "'";
// Handle response scan stages
if (scanStage == "resp_body") {
res.clear();
SampleValue sample(line, nullptr);
checkRegex(sample,
m_Signatures->resp_body_words_regex_list,
res.keyword_matches,
res.found_patterns,
false,
false);
checkRegex(sample,
m_Signatures->resp_body_pattern_regex_list,
res.keyword_matches,
res.found_patterns,
false,
false);
dbgTrace(D_WAAP_SAMPLE_SCAN) << "WaapAssetState::apply(): response body " <<
(res.keyword_matches.empty() ? "is not" : "is") << " suspicious";
return !res.keyword_matches.empty();
}
if (scanStage == "resp_header") {
res.clear();
SampleValue sample(line, nullptr);
checkRegex(sample,
m_Signatures->resp_body_words_regex_list,
res.keyword_matches,
res.found_patterns,
false,
false);
checkRegex(sample,
m_Signatures->resp_body_pattern_regex_list,
res.keyword_matches,
res.found_patterns,
false,
false);
dbgTrace(D_WAAP_SAMPLE_SCAN) << "WaapAssetState::apply(): response header " <<
(res.keyword_matches.empty() ? "is not" : "is") << " suspicious";
return !res.keyword_matches.empty();
}
// Only cache values less or equal than MAX_CACHE_VALUE_SIZE
bool shouldCache = (line.size() <= MAX_CACHE_VALUE_SIZE);
if (shouldCache) {
// Handle cached clean values
CacheKey cache_key(line, scanStage, isBinaryData, splitType.ok() ? *splitType : "");
if (m_cleanValuesCache.exist(cache_key)) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "WaapAssetState::apply('" << line << "'): not suspicious (cache)";
res.clear();
return false;
}
// Handle cached suspicious values (if found - fills out the "res" structure)
if (m_suspiciousValuesCache.get(cache_key, res)) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "WaapAssetState::apply('" << line << "'): suspicious (cache)";
#ifdef WAF2_LOGGING_ENABLE
// Dump cached result
print_filtered("keywords", std::set<std::string>(), res.keyword_matches);
print_filtered("patterns", std::set<std::string>(), res.regex_matches);
print_found_patterns(res.found_patterns);
#endif
return true;
}
}
dbgTrace(D_WAAP_SAMPLE_SCAN) << "WaapAssetState::apply('" << line << "'): passed the cache check.";
const std::set<std::string>* ignored_keywords = &m_Signatures->global_ignored_keywords;
const std::set<std::string>* ignored_patterns = &m_Signatures->global_ignored_patterns;
bool isUrlScanStage = false;
bool isHeaderScanStage = false;
if ((scanStage.size() == 3 && scanStage == "url") || (scanStage.size() == 7 && scanStage == "referer")) {
if (m_Signatures->url_ignored_re.hasMatch(line)) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "WaapAssetState::apply('" << line << "'): ignored for URL.";
if (shouldCache) {
m_cleanValuesCache.insert(CacheKey(line, scanStage, isBinaryData, splitType.ok() ? *splitType : ""));
}
res.clear();
return false;
}
ignored_keywords = &m_Signatures->url_ignored_keywords;
ignored_patterns = &m_Signatures->url_ignored_patterns;
isUrlScanStage = true;
}
else if ((scanStage.size() == 6 && scanStage == "header") ||
(scanStage.size() == 6 && scanStage == "cookie")) {
if (m_Signatures->header_ignored_re.hasMatch(line)) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "WaapAssetState::apply('" << line << "'): ignored for header.";
if (shouldCache) {
m_cleanValuesCache.insert(CacheKey(line, scanStage, isBinaryData, splitType.ok() ? *splitType : ""));
}
res.clear();
return false;
}
ignored_keywords = &m_Signatures->header_ignored_keywords;
ignored_patterns = &m_Signatures->header_ignored_patterns;
isHeaderScanStage = true;
}
#if 0
// Removed by Pavel's request. Leaving here in case he'll want to add this back...
//// Pavel told me he wants to use "global" settings for cookie values, rather than cookie-specific ones here.
//else if (scanStage.size() == 6 && (scanStage == "cookie")) {
// if (cookie_ignored_re.hasMatch(line)) {
// dbgTrace(D_WAAP_SAMPLE_SCAN) << "WaapAssetState::apply('" << line << "'): ignored for cookie.";
// if (shouldCache) {
// m_cleanValuesCache.insert(CacheKey(line, scanStage));
// }
// res.clear();
// return false;
// }
// ignored_keywords = &cookie_ignored_keywords;
// ignored_patterns = &cookie_ignored_patterns;
//}
#endif
// Only perform these checks under load
if (isOnLoad) {
// Skip values that are too short
if (line.length() < 3) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "WaapAssetState::apply('" << line <<
"'): skipping: did not pass the length check.";
if (shouldCache) {
m_cleanValuesCache.insert(CacheKey(line, scanStage, isBinaryData, splitType.ok() ? *splitType : ""));
}
res.clear();
return false;
}
// Skip values where all characters are alphanumeric
bool allAlNum = true;
for (std::string::const_iterator pC = line.begin(); pC != line.end(); ++pC) {
if (!isalnum(*pC)) {
allAlNum = false;
break;
}
}
if (allAlNum) {
if (shouldCache) {
m_cleanValuesCache.insert(CacheKey(line, scanStage, isBinaryData, splitType.ok() ? *splitType : ""));
}
res.clear();
return false;
}
dbgTrace(D_WAAP_SAMPLE_SCAN) << "WaapAssetState::apply('" << line << "'): passed the stateless checks.";
// Skip values that are longer than 10 characters, and match allowed_text_re regex
if (line.length() > 10) {
if (m_Signatures->allowed_text_re.hasMatch(line) > 0) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "WaapAssetState::apply('" << line <<
"'): matched on allowed_text - ignoring.";
if (shouldCache) {
m_cleanValuesCache.insert(
CacheKey(line, scanStage, isBinaryData, splitType.ok() ? *splitType : "")
);
}
res.clear();
return false;
}
}
}
std::string unquote_line = line;
unquote_line.erase(unquote_plus(unquote_line.begin(), unquote_line.end()), unquote_line.end());
// If binary data type is detected outside the scanner - enable filtering specific matches/keywords
bool binaryDataFound =
checkBinaryData(unquote_line, isBinaryData) ||
checkBinaryData(line, isBinaryData);
// Complex unescape and then apply lowercase
res.unescaped_line = unescape(line);
dbgTrace(D_WAAP_SAMPLE_SCAN) << "unescapedLine: '" << res.unescaped_line << "'";
// Detect long text spans, and also any-length spans that end with file extensions such as ".jpg"
bool longTextFound = m_Signatures->longtext_re.hasMatch(res.unescaped_line);
if (longTextFound) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "longtext found";
}
dbgTrace(D_WAAP_SAMPLE_SCAN) << "doing first set of checkRegex calls...";
// Scan unescaped_line with aho-corasick once, and reuse it in multiple calls to checkRegex below
// This is done to improve performance of regex matching.
SampleValue unescapedLineSample(res.unescaped_line, m_Signatures->m_regexPreconditions);
checkRegex(
unescapedLineSample,
m_Signatures->specific_acuracy_keywords_regex,
res.keyword_matches,
res.found_patterns,
longTextFound,
binaryDataFound
);
checkRegex(unescapedLineSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns, longTextFound,
binaryDataFound);
filterKeywordsDueToLongText(res);
bool detectedRepetition = false;
bool detectedProbing = false;
unsigned int wordsCount = 0;
// Calculate repetition and/or probing indicators
if (!binaryDataFound) {
calcRepetitionAndProbing(res, ignored_keywords, res.unescaped_line, detectedRepetition, detectedProbing,
wordsCount);
}
// List of keywords to remove
std::vector<std::string> keywordsToRemove;
// Handle semicolon and pipe-split values.
// Specifically exclude split cookie values to avoid high-probability high-impact false positives.
// note: All-digits values triggers fp when prepended with separator, so they are excluded
if (scanStage != "cookie" && splitType.ok() && !Waap::Util::isAllDigits(res.unescaped_line)) {
dbgTrace(D_WAAP_EVASIONS) << "split value detected type='" << *splitType << "' value='" << line << "'";
// Split value detected eligible for special handling. Scan it after prepending the appropriate prefix
std::string unescaped;
std::set<std::string> keywords_to_filter {
"probing",
"os_cmd_sep_medium_acuracy"
};
if (*splitType == "sem") {
keywords_to_filter.insert(";");
unescaped = ";" + res.unescaped_line;
} else if (*splitType == "pipe") {
keywords_to_filter.insert("|");
unescaped = "|" + res.unescaped_line;
}
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
filterKeywordsDueToLongText(res);
// If only the filtered keywords were detected (no extras) - filter them. If any extra keyword is detected
// then leave everything
if (countNotInSet(res.keyword_matches, keywords_to_filter) == 0) {
for (const std::string &keyword_to_filter : keywords_to_filter) {
keywordsToRemove.push_back(keyword_to_filter);
}
}
if (!binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
bool os_cmd_ev = Waap::Util::find_in_map_of_stringlists_keys("os_cmd_ev", res.found_patterns);
if (os_cmd_ev) {
dbgTrace(D_WAAP_EVASIONS) << "os command evasion found";
// Possible os command evasion detected: - clean up and scan with regexes again.
std::string unescaped;
size_t kwCount = res.keyword_matches.size();
size_t pos = 0;
size_t found;
do {
found = res.unescaped_line.find('[', pos);
if (found != std::string::npos)
{
unescaped += res.unescaped_line.substr(pos, found-pos);
if (found + 3 < res.unescaped_line.size() &&
res.unescaped_line[found+1] == res.unescaped_line[found+2] && res.unescaped_line[found+3] == ']')
{
unescaped += res.unescaped_line[found+1];
pos = found+4; // [aa]
}
else
{
unescaped += res.unescaped_line[found];
pos = found+1;
}
}
} while(found != std::string::npos);
unescaped += res.unescaped_line.substr(pos); // add tail
if (res.unescaped_line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
if (kwCount == res.keyword_matches.size()) {
// Remove the evasion keyword if no real evasion found
keywordsToRemove.push_back("os_cmd_ev");
os_cmd_ev = false;
}
else if (!binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
bool quotes_ev = Waap::Util::find_in_map_of_stringlists_keys("quotes_ev", res.found_patterns);
if (quotes_ev) {
dbgTrace(D_WAAP_EVASIONS) << "quotes evasion found";
// Possible quotes evasion detected: - clean up and scan with regexes again.
std::string unescaped = m_Signatures->quotes_ev_pattern.sub(res.unescaped_line);
size_t kwCount = res.keyword_matches.size();
if (res.unescaped_line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
if (kwCount == res.keyword_matches.size()) {
// Remove the evasion keyword if no real evasion found
keywordsToRemove.push_back("quotes_ev");
quotes_ev = false;
}
else if (!binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
if (Waap::Util::containsInvalidUtf8(line)) {
dbgTrace(D_WAAP_EVASIONS) << "invalid utf-8 evasion found";
// Possible quotes evasion detected: - clean up and scan with regexes again.
std::string unescaped = Waap::Util::unescapeInvalidUtf8(line);
size_t kwCount = res.keyword_matches.size();
unescaped = unescape(unescaped);
if (res.unescaped_line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
Maybe<std::string> broken_utf8_line = Waap::Util::containsBrokenUtf8(line, unquote_line);
if (broken_utf8_line.ok()) {
dbgTrace(D_WAAP_EVASIONS) << "broken-down utf-8 evasion found";
std::string unescaped = Waap::Util::unescapeBrokenUtf8(broken_utf8_line.unpack());
size_t kwCount = res.keyword_matches.size();
unescaped = unescape(unescaped);
if (res.unescaped_line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
bool comment_ev = Waap::Util::find_in_map_of_stringlists_keys("comment_ev", res.found_patterns);
if (comment_ev) {
// Possible quotes evasion detected: - clean up and scan with regexes again.
dbgTrace(D_WAAP_EVASIONS) << "comment evasion found";
std::string unescaped = m_Signatures->comment_ev_pattern.sub(res.unescaped_line);
size_t kwCount = res.keyword_matches.size();
if (res.unescaped_line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
if (kwCount == res.keyword_matches.size()) {
// Remove the evasion keyword if no real evasion found
keywordsToRemove.push_back("comment_ev");
comment_ev = false;
}
else if (!binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
bool quoutes_space_evasion = Waap::Util::find_in_map_of_stringlists_keys(
"quotes_space_ev_fast_reg",
res.found_patterns
);
if (quoutes_space_evasion) {
// Possible quotes space evasion detected: - clean up and scan with regexes again.
dbgTrace(D_WAAP_EVASIONS) << "quotes space evasion found";
std::string unescaped = m_Signatures->quotes_space_ev_pattern.sub(res.unescaped_line);
size_t kwCount = res.keyword_matches.size();
if (res.unescaped_line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
if (kwCount == res.keyword_matches.size()) {
// Remove the evasion keyword if no real evasion found
keywordsToRemove.push_back("quotes_space_evasion");
quoutes_space_evasion = false;
}
else if (!binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
if (Waap::Util::testUrlBareUtf8Evasion(line)) {
// Possible quotes evasion detected: - clean up and scan with regexes again.
dbgTrace(D_WAAP_EVASIONS) << "url_bare_utf8 evasion found";
// Revert the encoding and rescan again
// Insert additional '%' character after each sequence of three characters either "%C0" or "%c0".
std::string unescaped = line;
replaceAll(unescaped, "%c0", "%c0%");
replaceAll(unescaped, "%C0", "%C0%");
// Run the result through another pass of "unescape" which will now correctly urldecode and utf8-decode it
unescaped = unescape(unescaped);
size_t kwCount = res.keyword_matches.size();
if (res.unescaped_line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
boost::cmatch what;
if (boost::regex_search(res.unescaped_line.c_str(), what, sqli_comma_evasion_regex)) {
// Possible SQLi evasion detected (","): - clean up and scan with regexes again.
dbgTrace(D_WAAP_EVASIONS) << "Possible SQLi evasion detected (\",\"): - clean up and scan with regexes again.";
std::string unescaped = res.unescaped_line;
unescaped = boost::regex_replace(unescaped, sqli_comma_evasion_regex, "");
unescaped = unescape(unescaped);
if (res.unescaped_line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
if ((res.unescaped_line.find("0x") != std::string::npos) && evasion_hex_regex.hasMatch(res.unescaped_line)) {
dbgTrace(D_WAAP_EVASIONS) << "hex evasion found (in unescaped line)";
std::string unescaped = res.unescaped_line;
replaceAll(unescaped, "0x", "\\x");
unescapeUnicode(unescaped);
dbgTrace(D_WAAP_EVASIONS) << "unescaped =='" << unescaped << "'";
size_t kwCount = res.keyword_matches.size();
if (res.unescaped_line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, false, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
false, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
false, binaryDataFound);
}
if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
for (const auto &kw : res.keyword_matches) {
if (kw.size() < 2 || str_contains(kw, "os_cmd_high_acuracy_fast_reg") ||
kw == "os_cmd_sep_medium_acuracy" || str_contains(kw, "regex_code_execution") ||
str_contains(kw, "regex_code_execution") || kw == "character_encoding" ||
str_contains(kw, "quotes_ev_fast_reg") || str_contains(kw, "encoded_") ||
str_contains(kw, "medium_acuracy") || str_contains(kw, "high_acuracy_fast_reg_xss"))
{
keywordsToRemove.push_back(kw);
}
}
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
if ((line.find("0x") != std::string::npos) && evasion_hex_regex.hasMatch(line)) {
dbgTrace(D_WAAP_EVASIONS) << "hex evasion found (in raw line)";
std::string unescaped = line;
replaceAll(unescaped, "0x", "\\x");
unescapeUnicode(unescaped);
dbgTrace(D_WAAP_EVASIONS) << "unescape == '" << unescaped << "'";
size_t kwCount = res.keyword_matches.size();
if (line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, false, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
false, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
false, binaryDataFound);
}
if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
for (const auto &kw : res.keyword_matches) {
if (kw.size() < 2 || str_contains(kw, "os_cmd_high_acuracy_fast_reg") ||
kw == "os_cmd_sep_medium_acuracy" || str_contains(kw, "regex_code_execution") ||
str_contains(kw, "regex_code_execution") || kw == "character_encoding" ||
str_contains(kw, "quotes_ev_fast_reg") || str_contains(kw, "encoded_") ||
str_contains(kw, "medium_acuracy") || str_contains(kw, "high_acuracy_fast_reg_xss"))
{
keywordsToRemove.push_back(kw);
}
}
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
if ((res.unescaped_line.find("%") != std::string::npos) && evasion_bad_hex_regex.hasMatch(res.unescaped_line)) {
dbgTrace(D_WAAP_EVASIONS) << "Bad hex evasion found (%c1%1c or %c1%9c in unescaped line)";
std::string unescaped = res.unescaped_line;
unescaped = boost::regex_replace(unescaped, bad_hex_regex, "/");
unescaped = unescape(unescaped);
dbgTrace(D_WAAP_EVASIONS) << "unescaped =='" << unescaped << "'";
size_t kwCount = res.keyword_matches.size();
if (res.unescaped_line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
if ((line.find("%") != std::string::npos) && evasion_bad_hex_regex.hasMatch(line)) {
dbgTrace(D_WAAP_EVASIONS) << "Bad hex evasion found (%c1%1c or %c1%9c in raw line)";
std::string unescaped = line;
unescaped = boost::regex_replace(unescaped, bad_hex_regex, "/");
unescaped = unescape(unescaped);
dbgTrace(D_WAAP_EVASIONS) << "unescaped == '" << unescaped << "'";
size_t kwCount = res.keyword_matches.size();
if (line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
if ((res.unescaped_line.find("%") != std::string::npos) && utf_evasion_for_dot.hasMatch(res.unescaped_line)) {
dbgTrace(D_WAAP_EVASIONS) <<
"UTF evasion for dot found (%c0%*e) in unescaped line";
std::string unescaped = res.unescaped_line;
unescaped = boost::regex_replace(unescaped, utf_evasion_for_dot_regex, ".");
unescaped = unescape(unescaped);
dbgTrace(D_WAAP_EVASIONS) << "unescaped == '" << unescaped << "'";
size_t kwCount = res.keyword_matches.size();
if (res.unescaped_line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
if ((line.find("%") != std::string::npos) && utf_evasion_for_dot.hasMatch(line)) {
dbgTrace(D_WAAP_EVASIONS) << "UTF evasion for dot found (%c0%*e) in raw line";
std::string unescaped = line;
unescaped = boost::regex_replace(unescaped, utf_evasion_for_dot_regex, ".");
unescaped = unescape(unescaped);
dbgTrace(D_WAAP_EVASIONS) << "unescaped == '" << unescaped << "'";
size_t kwCount = res.keyword_matches.size();
if (line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
// python: escape ='hi_acur_fast_reg_evasion' in found_patterns
bool escape = Waap::Util::find_in_map_of_stringlists_keys("evasion", res.found_patterns);
if (escape) {
// Possible evasion detected: remove unicode \u and \x sequences,
// delete all trash in un_escape_pattern, and scan with regexes again.
dbgTrace(D_WAAP_EVASIONS) << "escape pattern found";
std::string unescaped = res.unescaped_line;
dbgTrace(D_WAAP_EVASIONS) << "unescape'" << unescaped << "'";
replaceAll(unescaped, "0x", "\\x");
replaceAll(unescaped, "%u", "\\u");
std::string zero;
zero.push_back(0);
replaceAll(unescaped, zero, "");
unescapeUnicode(unescaped);
// from python: unescaped = un_escape_pattern.sub(r'',line) + ' ' + un_escape_pattern.sub(r' ',line)
// note: "line" in python is called "unescaped" in this code.
unescaped = m_Signatures->un_escape_pattern.sub(unescaped) + " " +
m_Signatures->un_escape_pattern.sub(unescaped, " ");
size_t kwCount = res.keyword_matches.size();
if (res.unescaped_line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
if (kwCount == res.keyword_matches.size()) {
// Remove the evasion keyword if no real evasion found
keywordsToRemove.push_back("evasion");
escape = false;
}
else if (!binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
// Detect bash "backslash" evasions
// Note that the search for low binary ASCII codes such as 7 or 8 are done here because
// unescaped_line after unescape() contains post-processed string, where original \b was already converted to
// single character (ASCII 8).
// This should handle cases like /\bin/sh
unsigned char prev_uch = '\0';
for (char ch : res.unescaped_line) {
unsigned char uch = (unsigned char)ch;
if ((uch >= 0x07 && uch <= 0x0D) || (uch == '\\') || (uch == '/' && prev_uch == '/')) {
escape = true;
break;
}
prev_uch = uch;
}
if (escape) {
dbgTrace(D_WAAP_EVASIONS) << "try decoding bash evasions";
// Possible bash evasion detected: - clean up and scan with regexes again.
dbgTrace(D_WAAP_EVASIONS) << "unescape='" << res.unescaped_line << "'";
std::string unescaped;
unescaped.reserve(res.unescaped_line.size()); // preallocate to improve performance of += clauses below
// Partially revert the effect of the escape_backslashes() function, remove the '\' characters and
// squash string of successive forward slashes to single slash.
// This allows us to decode bash evasions like "/\b\i\n/////s\h"
char prev_ch = '\0';
for (char ch : res.unescaped_line) {
switch (ch) {
case 7: unescaped += "a"; break;
case 8: unescaped += "b"; break;
case 9: unescaped += "t"; break;
case 10: unescaped += "n"; break;
case 11: unescaped += "v"; break;
case 12: unescaped += "f"; break;
case 13: unescaped += "r"; break;
case '\\': break; // remove backslashes
default:
// squash strings of successive '/' characters into single '/' character
if (prev_ch == '/' && ch == '/') {
break;
}
unescaped += ch;
}
prev_ch = ch;
}
size_t kwCount = res.keyword_matches.size();
if (res.unescaped_line != unescaped) {
SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
res.found_patterns, longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
longTextFound, binaryDataFound);
checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
}
if (kwCount == res.keyword_matches.size()) {
// Remove the evasion keyword if no real evasion found
keywordsToRemove.push_back("evasion");
escape = false;
}
else if (!binaryDataFound) {
// Recalculate repetition and/or probing indicators
unsigned int newWordsCount = 0;
calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
newWordsCount);
// Take minimal words count because empirically it means evasion was probably succesfully decoded
wordsCount = std::min(wordsCount, newWordsCount);
}
}
// Remove evasion keywords that should not be reported because there's no real evasion found
if (!keywordsToRemove.empty()) {
dbgTrace(D_WAAP_SAMPLE_SCAN)
<< "Removing these keywords (probably due to evasions): "
<< Waap::Util::vecToString(keywordsToRemove);
}
for (const auto &value : keywordsToRemove) {
Waap::Util::remove_startswith(res.keyword_matches, value);
Waap::Util::remove_in_map_of_stringlists_keys(value, res.found_patterns);
}
// python:
// if headers:
// keyword_matches = [x for x in keyword_matches if x not in '\(/);$=']
if (isHeaderScanStage) {
removeItemsMatchingSubstringOf(res.keyword_matches, "\\(/);$=");
// For headers, also remove all ignored patterns entirely, not just ignore it from counts
for (const auto &ignored_pattern : *ignored_patterns) {
if (res.found_patterns.erase(ignored_pattern)) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "Removed the found pattern in header: '" << ignored_pattern << "'";
}
}
}
// python:
// keywords_num = sum(1 for x in keyword_matches if x not in ignored_keywords)
size_t keywords_num = countNotInSet(res.keyword_matches, *ignored_keywords);
size_t regex_num = countNotInSet(res.regex_matches, *ignored_patterns);
bool forceReport = isUrlScanStage && Waap::Util::find_in_map_of_stringlists_keys("url", res.found_patterns);
if (forceReport) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "setting forceReport becacuse we are in url context and "
"'high_acuracy_fast_reg_evation' pattern is found!";
}
// python:
// if keywords_num >2 or ('acuracy' in patterns and not headers) or
// special_patten in patterns or 'probing' in keyword_matches or 'repetition' in keyword_matches:
if (keywords_num + regex_num > 2 ||
Waap::Util::find_in_map_of_stringlists_keys("acur", res.found_patterns) ||
forceReport ||
detectedRepetition ||
detectedProbing) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "pre-suspicion found.";
// apply regex signatures
checkRegex(unescapedLineSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
longTextFound, binaryDataFound);
// python:
// if len(regex_matches) and 'probing' not in keyword_matches:
// if len(keyword_matches+regex_matches)+2>words:
// keyword_matches.append('probing')
if (!binaryDataFound && res.regex_matches.size() > 0 && !detectedProbing) {
// if len(''.join(res.keyword_matches+res.regex_matches))>=alphanumeric_num {
if (res.keyword_matches.size() + res.regex_matches.size() + 2 > wordsCount) {
detectedProbing = true;
res.keyword_matches.push_back("probing");
}
}
// python:
// keywords_num = sum(1 for x in keyword_matches if x not in ignored_keywords)
keywords_num = countNotInSet(res.keyword_matches, *ignored_keywords);
regex_num = countNotInSet(res.regex_matches, *ignored_patterns);
// Regular (medium) acuracy contributes 1 to the score.
// High acuracy contributes 2 to the score.
int acuracy = 0;
// python:
// if 'acuracy' in patterns and not url:
if (Waap::Util::find_in_map_of_stringlists_keys("acur", res.found_patterns))
{
acuracy = 1;
// search for "high_acuracy" or "hi_acur" signature names
if (Waap::Util::find_in_map_of_stringlists_keys("high", res.found_patterns) ||
Waap::Util::find_in_map_of_stringlists_keys("hi_acur", res.found_patterns))
{
acuracy = 2;
}
}
// "Acuracy" contribution alone won't trigger suspicion yet. It needs additional boost
// of finding some keywords and/or matched regexes.
int score = keywords_num + acuracy + (2 * regex_num);
#ifdef WAF2_LOGGING_ENABLE
// Dump interesting statistics and scores
print_filtered("keywords", *ignored_keywords, res.keyword_matches);
print_filtered("patterns", *ignored_patterns, res.regex_matches);
print_found_patterns(res.found_patterns);
dbgTrace(D_WAAP_SAMPLE_SCAN) << "before decision: keywords(num=" << keywords_num << ", size=" <<
res.keyword_matches.size() << "); regex(num=" << regex_num << ", size=" << res.regex_matches.size() <<
"; acuracy=" << acuracy << "; score=" << score << "; forceReport=" << forceReport << "; probing=" <<
detectedProbing << "; repetition=" << detectedRepetition << "; 'fast_reg' in found_patterns: " <<
Waap::Util::find_in_map_of_stringlists_keys("fast_reg", res.found_patterns);
#endif
// python:
// if (keywords_num+acuracy+2*regex_num)>2 or special_patten in patterns or
// 'fast_reg' in patterns or 'probing' in keyword_matches or 'repetition' in keyword_matches:
if (score > 2 ||
forceReport ||
detectedProbing ||
detectedRepetition ||
Waap::Util::find_in_map_of_stringlists_keys("fast_reg", res.found_patterns)) {
dbgTrace(D_WAAP_SAMPLE_SCAN) << "apply(): suspicion found (score=" << score << ").";
if (shouldCache) {
m_suspiciousValuesCache.insert(
{CacheKey(line, scanStage, isBinaryData, splitType.ok() ? *splitType : ""), res}
);
}
return true; // suspicion found
}
dbgTrace(D_WAAP_SAMPLE_SCAN) << "apply(): suspicion not found (score=" << score << ").";
}
dbgTrace(D_WAAP_SAMPLE_SCAN) << "apply(): not suspicious.";
if (shouldCache) {
m_cleanValuesCache.insert(CacheKey(line, scanStage, isBinaryData, splitType.ok() ? *splitType : ""));
}
res.clear();
return false;
}
void WaapAssetState::updateScores()
{
scoreBuilder.snap();
}
std::string WaapAssetState::getWaapDataFileName() const {
return m_waapDataFileName;
}
std::map<std::string, std::vector<std::string>>& WaapAssetState::getFilterVerbose()
{
return m_filtered_keywords_verbose;
}
std::string WaapAssetState::getWaapDataDir() const {
size_t lastSlash = m_waapDataFileName.find_last_of('/');
std::string sigsFilterDir = ((lastSlash == std::string::npos) ?
m_waapDataFileName : m_waapDataFileName.substr(0, lastSlash));
dbgTrace(D_WAAP_ASSET_STATE) << " signatures filters directory: " << sigsFilterDir;
return sigsFilterDir;
}
void WaapAssetState::updateFilterManagerPolicy(IWaapConfig* pConfig)
{
m_filtersMngr->loadPolicy(pConfig);
}
bool WaapAssetState::isKeywordOfType(const std::string& keyword, ParamType type) const
{
return m_typeValidator.isKeywordOfType(keyword, type);
}
bool WaapAssetState::isBinarySampleType(const std::string & sample) const
{
// Binary data detection is based on existance of at least two ASCII NUL bytes
size_t nulBytePos = sample.find('\0', 0);
if (nulBytePos != std::string::npos) {
nulBytePos = sample.find('\0', nulBytePos+1);
if (nulBytePos != std::string::npos) {
dbgTrace(D_WAAP_ASSET_STATE) << "binary_input sample type detected (nul bytes)";
return true;
}
}
std::vector<RegexMatch> matches;
m_Signatures->format_magic_binary_re.findAllMatches(sample, matches);
if (!matches.empty()) {
dbgTrace(D_WAAP_ASSET_STATE) << "binary_input sample type detected (signature)";
return true;
}
return false;
}
static Maybe<uint8_t>
parse_wbxml_uint8(const std::string & sample, size_t &offset)
{
if (offset >= sample.size()) {
return genError("not wbxml");
}
return sample[offset++];
}
static Maybe<uint32_t>
parse_wbxml_mb_uint32(const std::string & sample, size_t &offset)
{
uint32_t value = 0;
for (int i=0; i < 5; i++) {
Maybe<uint8_t> v = parse_wbxml_uint8(sample, offset);
if (!v.ok()) return genError("not wbxml");
value = (value << 7) | (*v & 0x7F);
if ((*v & 0x80) == 0) {
return value;
}
}
return genError("not wbxml");
}
bool WaapAssetState::isWBXMLSampleType(const std::string & sample) const
{
size_t offset = 0;
// Parse protocol version
Maybe<uint8_t> version = parse_wbxml_uint8(sample, offset);
// Support only wbxml protocol versions 1-3 which can be more or less reliably detected
if (!version.ok() || *version==0 || *version > 0x03) return false;
// Parse public id
Maybe<uint32_t> public_id = parse_wbxml_mb_uint32(sample, offset);
if (!public_id.ok()) return false;
// Parse and validate charset (this is optional for v0 but we don't detect v0 anyway)
Maybe<uint32_t> charset = parse_wbxml_mb_uint32(sample, offset);
if (!charset.ok()) return false;
// Only subset of charsets are allowed
static const uint32_t allowed_charsets[] = {0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 17, 106, 1000, 1015, 2026};
if (std::find(std::begin(allowed_charsets), std::end(allowed_charsets), *charset) ==
std::end(allowed_charsets))
{
return false;
}
Maybe<uint32_t> strtbl_len = parse_wbxml_mb_uint32(sample, offset);
return strtbl_len.ok() && *strtbl_len <= sample.size() - offset;
}
std::set<std::string> WaapAssetState::getSampleType(const std::string & sample) const
{
std::set<std::string> types;
bool shouldCache = (sample.size() <= MAX_CACHE_VALUE_SIZE);
// Handle cached clean values
if (shouldCache && m_sampleTypeCache.exist(sample)) {
dbgTrace(D_WAAP_ASSET_STATE) << "WaapAssetState::getSampleType() sample: '" << sample <<
"' type is unknown (cache)";
types.insert("unknown");
return types;
}
for (auto& type_re : m_Signatures->params_type_re)
{
dbgTrace(D_WAAP_ASSET_STATE) << "WaapAssetState::getSampleType checking: " << sample <<
" against " << type_re.first;
std::vector<RegexMatch> matches;
type_re.second->findAllMatches(sample, matches);
dbgTrace(D_WAAP_ASSET_STATE) << "number of matched keywords: " << matches.size();
if (matches.empty())
{
continue;
}
types.insert(type_re.first);
}
// Binary data detection is based on existance of at least two ASCII NUL bytes
if (isBinarySampleType(sample)) {
dbgTrace(D_WAAP_ASSET_STATE) << "reporting binary_input sample type";
types.insert("binary_input");
}
if (types.empty())
{
types.insert("unknown");
m_sampleTypeCache.insert(sample);
}
return types;
}
void WaapAssetState::logIndicatorsInFilters(const std::string &param, Waap::Keywords::KeywordsSet& keywords,
IWaf2Transaction* pTransaction)
{
m_filtersMngr->registerKeywords(param, keywords, pTransaction);
}
void WaapAssetState::logParamHit(Waf2ScanResult& res, IWaf2Transaction* pTransaction)
{
Waap::Keywords::KeywordsSet emptySet;
std::string key = IndicatorsFiltersManager::generateKey(res.location, res.param_name, pTransaction);
m_filtersMngr->registerKeywords(key, emptySet, pTransaction);
}
void WaapAssetState::filterKeywords(
const std::string &param,
Waap::Keywords::KeywordsSet& keywords,
std::vector<std::string>& filteredKeywords)
{
dbgTrace(D_WAAP_ASSET_STATE) << "filter keywords";
m_filtersMngr->filterKeywords(param, keywords, filteredKeywords);
}
void WaapAssetState::clearFilterVerbose()
{
m_filtered_keywords_verbose.clear();
}
void WaapAssetState::filterVerbose(const std::string &param,
std::vector<std::string>& filteredKeywords)
{
m_filtersMngr->filterVerbose(param, filteredKeywords, m_filtered_keywords_verbose);
}
void WaapAssetState::filterKeywordsByParameters(
const std::string &parameter_name, Waap::Keywords::KeywordsSet &keywords_set)
{
dbgTrace(D_WAAP_ASSET_STATE) << "filter keywords based on parameter name: " << parameter_name;
auto filter_parameters_itr = m_Signatures->filter_parameters.find(parameter_name);
if (filter_parameters_itr != m_Signatures->filter_parameters.end())
{
dbgTrace(D_WAAP_ASSET_STATE) << "Found keywords to filter based on parameter name";
const auto &vec = filter_parameters_itr->second;
for (auto keyword_to_filter : vec)
{
auto keywords_set_itr = keywords_set.find(keyword_to_filter);
if (keywords_set_itr != keywords_set.end())
{
dbgTrace(D_WAAP_ASSET_STATE) << "Filtering keyword: " << keyword_to_filter;
keywords_set.erase(keyword_to_filter);
}
}
}
else
{
dbgTrace(D_WAAP_ASSET_STATE) << "No keywords need to be filtered for this parameter";
}
}
void WaapAssetState::removeKeywords(Waap::Keywords::KeywordsSet &keywords_set)
{
for (auto &keyword_to_remove : m_Signatures->remove_keywords_always)
{
auto keyword_set_itr = keywords_set.find(keyword_to_remove);
if (keyword_set_itr != keywords_set.end())
{
dbgTrace(D_WAAP_ASSET_STATE) << "Removing keyword: " << keyword_to_remove << " from keyword set";
keywords_set.erase(keyword_set_itr);
}
}
}
void WaapAssetState::removeWBXMLKeywords(Waap::Keywords::KeywordsSet &keywords_set,
std::vector<std::string> &filtered_keywords)
{
for (auto it = keywords_set.begin(); it != keywords_set.end();) {
if (NGEN::Regex::regexMatch(__FILE__, __LINE__, *it, m_Signatures->wbxml_data_kw_filter)) {
dbgTrace(D_WAAP_ASSET_STATE) << "Filtering keyword due to wbxml: '" << *it << "'";
filtered_keywords.push_back(*it);
it = keywords_set.erase(it);
}
else {
++it;
}
}
}
void WaapAssetState::createRateLimitingState(const std::shared_ptr<Waap::RateLimiting::Policy> &rateLimitingPolicy)
{
m_rateLimitingState = std::make_shared<Waap::RateLimiting::State>(rateLimitingPolicy);
}
void WaapAssetState::createErrorLimitingState(const std::shared_ptr<Waap::RateLimiting::Policy> &errorLimitingPolicy)
{
m_errorLimitingState = std::make_shared<Waap::RateLimiting::State>(errorLimitingPolicy);
}
void WaapAssetState::createSecurityHeadersState(
const std::shared_ptr<Waap::SecurityHeaders::Policy> &securityHeadersPolicy)
{
m_securityHeadersState = std::make_shared<Waap::SecurityHeaders::State>(securityHeadersPolicy);
}
std::shared_ptr<Waap::RateLimiting::State>& WaapAssetState::getRateLimitingState()
{
return m_rateLimitingState;
}
std::shared_ptr<Waap::RateLimiting::State>& WaapAssetState::getErrorLimitingState()
{
return m_errorLimitingState;
}
std::shared_ptr<Waap::SecurityHeaders::State>& WaapAssetState::getSecurityHeadersState()
{
return m_securityHeadersState;
}
void WaapAssetState::clearRateLimitingState()
{
m_rateLimitingState.reset();
}
void WaapAssetState::clearErrorLimitingState()
{
m_errorLimitingState.reset();
}
void WaapAssetState::clearSecurityHeadersState()
{
m_securityHeadersState.reset();
}