Mar 2nd 2023 update

This commit is contained in:
Ned Wright
2023-03-02 17:08:49 +00:00
parent fef95b12b3
commit 2a7ddf0666
14 changed files with 170 additions and 11 deletions

View File

@@ -102,6 +102,8 @@ static const boost::regex utf_evasion_for_dot_regex(utf_evasion_for_dot_helper);
static const std::string sqli_comma_evasion_regex_helper = "\"\\s*,\\s*\"";
static const boost::regex sqli_comma_evasion_regex(sqli_comma_evasion_regex_helper);
static const boost::regex space_evasion_regex("[[:space:]]{2,}");
WaapAssetState::WaapAssetState(const std::shared_ptr<WaapAssetState>& pWaapAssetState,
const std::string& waapDataFileName,
const std::string& id) :
@@ -267,6 +269,76 @@ WaapAssetState::WaapAssetState(std::shared_ptr<Signatures> signatures,
}
#endif
void trimSpaces(std::string & text) {
size_t result_position = 0;
size_t position = 0;
space_stage state = NO_SPACES;
uint32_t code;
if (text.empty()) {
return;
}
boost::cmatch what;
if (!boost::regex_search(text.c_str(), what, space_evasion_regex))
return;
dbgTrace(D_WAAP) << "Boost regex passed";
for (;position < text.size(); position++) {
code = text[position];
switch (code) {
case '\t':
case ' ':
case '\f':
case '\v':
if (state == NO_SPACES) {
state = SPACE_SYNBOL;
text[result_position++] = code;
}
break;
case '\r':
switch (state) {
case (SPACE_SYNBOL):
text[result_position - 1] = code;
state = BR_SYMBOL;
break;
case (NO_SPACES):
text[result_position++] = code;
state = BR_SYMBOL;
break;
case (BN_SYMBOL):
text[result_position++] = code;
state = BNR_SEQUENCE;
break;
default:
break;
}
break;
case '\n':
switch (state) {
case (SPACE_SYNBOL):
text[result_position - 1] = code;
state = BN_SYMBOL;
break;
case (NO_SPACES):
text[result_position++] = code;
state = BN_SYMBOL;
break;
case (BR_SYMBOL):
text[result_position++] = code;
state = BRN_SEQUENCE;
break;
default:
break;
}
break;
default:
text[result_position++] = code;
state = NO_SPACES;
}
}
text.erase(result_position, position - result_position);
}
// Python equivalent: text = re.sub(r'[^\x00-\x7F]+',' ', text)
void replaceUnicodeSequence(std::string & text, const char repl) {
std::string::iterator it = text.begin();
@@ -432,6 +504,8 @@ WaapAssetState::WaapAssetState(std::shared_ptr<Signatures> signatures,
dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (11) '" << text << "'";
trimSpaces(text);
// 12. finally, apply tolower() to all characters of a string
// std::for_each(text.begin(), text.end(), [](char &c) { c = tolower(c); });
for (std::string::iterator pC = text.begin(); pC != text.end(); ++pC) {

View File

@@ -34,6 +34,8 @@
#include "ScanResult.h"
#include "WaapSampleValue.h"
enum space_stage {SPACE_SYNBOL, BR_SYMBOL, BN_SYMBOL, BRN_SEQUENCE, BNR_SEQUENCE, NO_SPACES};
class IWaf2Transaction;
class WaapAssetState : public boost::noncopyable, public I_WaapAssetState
@@ -155,6 +157,7 @@ inline std::size_t hash_value(WaapAssetState::CacheKey const &cacheKey)
}
void filterUnicode(std::string & text);
void trimSpaces(std::string & text);
void replaceUnicodeSequence(std::string & text, const char repl);
std::string unescape(const std::string & s);