sync code

This commit is contained in:
Ned Wright
2025-02-27 16:03:28 +00:00
parent bb35eaf657
commit cd020a7ddd
28 changed files with 625 additions and 192 deletions

View File

@@ -113,6 +113,9 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
<< parser_depth
<< " v_len = "
<< v_len;
dbgTrace(D_WAAP_DEEP_PARSER) << m_key;
// Decide whether to push/pop the value in the keystack.
bool shouldUpdateKeyStack = (flags & BUFFERED_RECEIVER_F_UNNAMED) == 0;
@@ -275,13 +278,23 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
// Detect and decode potential base64 chunks in the value before further processing
bool base64ParamFound = false;
size_t base64_offset = 0;
Waap::Util::BinaryFileType base64BinaryFileType = Waap::Util::BinaryFileType::FILE_TYPE_NONE;
if (m_depth == 1 && flags == BUFFERED_RECEIVER_F_MIDDLE && m_key.depth() == 1 && m_key.first() != "#base64"){
dbgTrace(D_WAAP_DEEP_PARSER) << " === will not check base64 since prev data block was not b64-encoded ===";
} else {
dbgTrace(D_WAAP_DEEP_PARSER) << " ===Processing potential base64===";
if (isUrlPayload && m_depth == 1 && cur_val[0] == '/') {
dbgTrace(D_WAAP_DEEP_PARSER) << "removing leading '/' from URL param value";
base64_offset = 1;
}
std::string decoded_val, decoded_key;
base64_variants base64_status = Waap::Util::b64Test(cur_val, decoded_key, decoded_val, base64BinaryFileType);
base64_variants base64_status = Waap::Util::b64Test(
cur_val,
decoded_key,
decoded_val,
base64BinaryFileType,
base64_offset);
dbgTrace(D_WAAP_DEEP_PARSER)
<< " status = "
@@ -289,16 +302,50 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
<< " key = "
<< decoded_key
<< " value = "
<< decoded_val;
<< decoded_val
<< "m_depth = "
<< m_depth;
switch (base64_status) {
case SINGLE_B64_CHUNK_CONVERT:
cur_val = decoded_val;
if (base64_offset) {
cur_val = "/" + decoded_val;
} else {
cur_val = decoded_val;
}
base64ParamFound = true;
break;
case CONTINUE_DUAL_SCAN:
if (decoded_val.size() > 0) {
decoded_key = "#base64";
base64ParamFound = false;
if (base64_offset) {
decoded_val = "/" + decoded_val;
}
dbgTrace(D_WAAP_DEEP_PARSER) << m_key;
rc = onKv(
decoded_key.c_str(),
decoded_key.size(),
decoded_val.data(),
decoded_val.size(),
flags,
parser_depth
);
dbgTrace(D_WAAP_DEEP_PARSER) << "After call to onKv with suspected value rc = " << rc;
dbgTrace(D_WAAP_DEEP_PARSER) << m_key;
break;
} else {
dbgTrace(D_WAAP) << "base64 decode suspected and empty value. Skipping.";
base64ParamFound = false;
break;
}
break;
case KEY_VALUE_B64_PAIR:
// going deep with new pair in case value is not empty
if (decoded_val.size() > 0) {
if (base64_offset) {
decoded_key = "/" + decoded_key;
}
cur_val = decoded_val;
base64ParamFound = true;
rc = onKv(
@@ -309,9 +356,13 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
flags,
parser_depth
);
dbgTrace(D_WAAP_DEEP_PARSER) << " rc = " << rc;
dbgTrace(D_WAAP_DEEP_PARSER) << "After call to onKv with suspected value rc = " << rc;
dbgTrace(D_WAAP_DEEP_PARSER) << m_key;
if (rc != CONTINUE_PARSING) {
if (shouldUpdateKeyStack) {
m_key.pop("deep parser key");
}
m_depth--;
return rc;
}
}
@@ -323,7 +374,7 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
}
if (base64ParamFound) {
dbgTrace(D_WAAP_DEEP_PARSER) << "DeepParser::onKv(): pushing #base64 prefix to the key.";
dbgTrace(D_WAAP_DEEP_PARSER) << "pushing #base64 prefix to the key.";
m_key.push("#base64", 7, false);
}
}
@@ -437,7 +488,6 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
if (shouldUpdateKeyStack) {
m_key.pop("deep parser key");
}
m_depth--;
return rc;
}
@@ -587,7 +637,6 @@ DeepParser::parseBuffer(
if (shouldUpdateKeyStack) {
m_key.pop("deep parser key");
}
m_depth--;
return DONE_PARSING;
}
@@ -909,7 +958,6 @@ DeepParser::parseAfterMisleadingMultipartBoundaryCleaned(
return rc;
}
}
return rc;
}
@@ -1081,7 +1129,7 @@ DeepParser::createInternalParser(
<< " isBodyPayload = "
<< isBodyPayload;
//Detect sensor_data format in body and just use dedicated filter for it
if (m_depth == 1
if ((m_depth == 1)
&& isBodyPayload
&& Waap::Util::detectKnownSource(cur_val) == Waap::Util::SOURCE_TYPE_SENSOR_DATA) {
m_parsersDeque.push_back(

View File

@@ -37,14 +37,24 @@ void KeyStack::push(const char* subkey, size_t subkeySize, bool countDepth) {
m_nameDepth++;
}
dbgTrace(D_WAAP) << "KeyStack(" << m_name << ")::push(): '" << std::string(subkey, subkeySize) <<
"' => full_key='" << std::string(m_key.data(), m_key.size()) << "'";
dbgTrace(D_WAAP)
<< "KeyStack("
<< m_name
<< ")::push(): '"
<< std::string(subkey, subkeySize)
<< "' => full_key='"
<< std::string(m_key.data(), m_key.size())
<< "'";
}
void KeyStack::pop(const char* log, bool countDepth) {
// Keep depth balanced even if m_key[] buffer is full
if (m_key.empty() || m_stack.empty()) {
dbgDebug(D_WAAP) << "KeyStack(" << m_name << ")::pop(): [ERROR] ATTEMPT TO POP FROM EMPTY KEY STACK! " << log;
dbgDebug(D_WAAP)
<< "KeyStack("
<< m_name
<< ")::pop(): [ERROR] ATTEMPT TO POP FROM EMPTY KEY STACK! "
<< log;
return;
}
@@ -55,6 +65,22 @@ void KeyStack::pop(const char* log, bool countDepth) {
// Remove last subkey.
m_key.erase(m_stack.back());
m_stack.pop_back();
dbgTrace(D_WAAP) << "KeyStack(" << m_name << ")::pop(): full_key='" <<
std::string(m_key.data(), (int)m_key.size()) << "': pop_key=" << log << "'";
dbgTrace(D_WAAP)
<< "KeyStack("
<< m_name
<< ")::pop(): full_key='"
<< std::string(m_key.data(), (int)m_key.size())
<< "': pop_key="
<< log
<< "'";
}
void KeyStack::print(std::ostream &os) const
{
os
<< "KeyStack("
<< m_name
<< ")::show(): full_key='"
<< std::string(m_key.data(), (int)m_key.size())
<< "'";
}

View File

@@ -28,6 +28,7 @@ public:
void pop(const char* log, bool countDepth=true);
bool empty() const { return m_key.empty(); }
void clear() { m_key.clear(); m_stack.clear(); }
void print(std::ostream &os) const;
size_t depth() const { return m_nameDepth; }
size_t size() const {
return str().size();

View File

@@ -111,8 +111,7 @@ int BufferedReceiver::onKvDone()
// This must be called even if m_value is empty in order to signal the BUFFERED_RECEIVER_F_LAST flag to the
// receiver!
dbgTrace(D_WAAP_PARSER)
<< " Call onKv on the remainder of the buffer not yet pushed to the receiver "
<< "calling onKv()";
<< " Call onKv on the remainder of the buffer not yet pushed to the receiver calling onKv()";
int rc = onKv(m_key.data(), m_key.size(), m_value.data(), m_value.size(), m_flags, m_parser_depth);
// Reset the object's state to allow reuse for other parsers

View File

@@ -21,6 +21,7 @@ USE_DEBUG_FLAG(D_WAAP);
const std::string ParserPDF::m_parserName = "ParserPDF";
const char* PDF_TAIL = "%%EOF";
const size_t PDF_TAIL_LEN = 5;
ParserPDF::ParserPDF(
IParserStreamReceiver &receiver,
@@ -44,16 +45,21 @@ ParserPDF::push(const char *buf, size_t len)
<< "' len="
<< len;
const char *c;
if (m_state == s_error) {
return 0;
}
if (len == 0)
{
dbgTrace(D_WAAP_PARSER_PDF) << "ParserPDF::push(): end of stream. m_state=" << m_state;
if (m_state == s_end) {
if (len == 0) {
dbgTrace(D_WAAP_PARSER_PDF) << "ParserPDF::push(): end of stream. m_state=" << m_state;
if (m_state == s_body && m_tailOffset >= PDF_TAIL_LEN) {
if (m_receiver.onKey("PDF", 3) != 0) {
m_state = s_error;
return 0;
}
if (m_receiver.onValue("", 0) != 0) {
m_state = s_error;
return 0;
}
m_receiver.onKvDone();
} else {
m_state = s_error;
@@ -61,38 +67,43 @@ ParserPDF::push(const char *buf, size_t len)
return 0;
}
size_t start = (len > MAX_PDF_TAIL_LOOKUP) ? len - MAX_PDF_TAIL_LOOKUP : 0;
switch (m_state) {
case s_start:
m_state = s_body;
CP_FALL_THROUGH;
case s_body:
{
size_t tail_lookup_offset = (len > MAX_PDF_TAIL_LOOKUP) ? len - MAX_PDF_TAIL_LOOKUP : 0;
c = strstr(buf + tail_lookup_offset, PDF_TAIL);
for (size_t i = start; i < len; i++) {
dbgTrace(D_WAAP_PARSER_PDF)
<< "string to search: " << std::string(buf + tail_lookup_offset)
<< " c=" << c;
if (c) {
m_state = s_end;
CP_FALL_THROUGH;
<< "ParserPDF::push(): m_tailOffset="
<< m_tailOffset
<< " buf[i]="
<< buf[i];
if (m_tailOffset <= PDF_TAIL_LEN - 1) {
if (buf[i] == PDF_TAIL[m_tailOffset]) {
m_tailOffset++;
} else {
m_tailOffset = 0;
}
} else {
break;
if (buf[i] == '\r' || buf[i] == '\n' || buf[i] == ' ' || buf[i] == 0) {
m_tailOffset++;
} else {
m_tailOffset = 0;
i--;
}
}
}
case s_end:
if (m_receiver.onKey("PDF", 3) != 0) {
m_state = s_error;
return 0;
}
if (m_receiver.onValue("", 0) != 0) {
m_state = s_error;
return 0;
}
dbgTrace(D_WAAP_PARSER_PDF)
<< "ParserPDF::push()->s_body: m_tailOffset="
<< m_tailOffset;
break;
case s_error:
break;
default:
dbgTrace(D_WAAP_PARSER_PDF) << "ParserPDF::push(): unknown state: " << m_state;
dbgTrace(D_WAAP_PARSER_PDF)
<< "ParserPDF::push(): unknown state: "
<< m_state;
m_state = s_error;
return 0;
}

View File

@@ -34,7 +34,6 @@ private:
enum state {
s_start,
s_body,
s_end,
s_error
};
@@ -42,6 +41,7 @@ private:
enum state m_state;
static const std::string m_parserName;
size_t m_parser_depth;
size_t m_tailOffset = 0;
};
#endif // __PARSER_PDF_H__

View File

@@ -617,6 +617,17 @@ void SerializeToLocalAndRemoteSyncBase::setInterval(ch::seconds newInterval)
bool SerializeToLocalAndRemoteSyncBase::localSyncAndProcess()
{
bool isBackupSyncEnabled = getProfileAgentSettingWithDefault<bool>(
true,
"appsecLearningSettings.backupLocalSync");
if (!isBackupSyncEnabled) {
dbgInfo(D_WAAP_CONFIDENCE_CALCULATOR) << "Local sync is disabled";
processData();
saveData();
return true;
}
RemoteFilesList rawDataFiles;
dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Getting files of all agents";

View File

@@ -97,7 +97,9 @@ calcIndividualKeywords(
std::sort(keywords.begin(), keywords.end());
for (auto pKeyword = keywords.begin(); pKeyword != keywords.end(); ++pKeyword) {
addKeywordScore(scoreBuilder, poolName, *pKeyword, 2.0f, 0.3f, scoresArray, coefArray);
addKeywordScore(
scoreBuilder, poolName, *pKeyword, DEFAULT_KEYWORD_SCORE, DEFAULT_KEYWORD_COEF, scoresArray, coefArray
);
}
}
@@ -112,8 +114,6 @@ calcCombinations(
std::vector<std::string>& keyword_combinations)
{
keyword_combinations.clear();
static const double max_combi_score = 1.0f;
double default_coef = 0.8f;
for (size_t i = 0; i < keyword_matches.size(); ++i) {
std::vector<std::string> combinations;
@@ -137,8 +137,10 @@ calcCombinations(
default_score += scoreBuilder.getSnapshotKeywordScore(*it, 0.0f, poolName);
}
// set default combination score to be the sum of its keywords, bounded by 1
default_score = std::min(default_score, max_combi_score);
addKeywordScore(scoreBuilder, poolName, combination, default_score, default_coef, scoresArray, coefArray);
default_score = std::min(default_score, DEFAULT_COMBI_SCORE);
addKeywordScore(
scoreBuilder, poolName, combination, default_score, DEFAULT_COMBI_COEF, scoresArray, coefArray
);
keyword_combinations.push_back(combination);
}
}
@@ -155,7 +157,7 @@ calcArrayScore(std::vector<double>& scoreArray)
// *pScore is always positive and there's a +10 offset
score = 10.0f - left * 10.0f / divisor;
}
dbgTrace(D_WAAP_SCORE_BUILDER) << "calculated score: " << score;
dbgDebug(D_WAAP_SCORE_BUILDER) << "calculated score: " << score;
return score;
}
@@ -171,7 +173,9 @@ calcLogisticRegressionScore(std::vector<double> &coefArray, double intercept, do
}
// Apply the expit function to the log-odds to obtain the probability,
// and multiply by 10 to obtain a 'score' in the range [0, 10]
return 1.0f / (1.0f + exp(-log_odds)) * 10.0f;
double score = 1.0f / (1.0f + exp(-log_odds)) * 10.0f;
dbgDebug(D_WAAP_SCORE_BUILDER) << "calculated score (log_odds): " << score << " (" << log_odds << ")";
return score;
}
}

View File

@@ -32,6 +32,11 @@ struct ModelLoggingSettings {
bool logToStream;
};
static const double DEFAULT_KEYWORD_COEF = 0.3f;
static const double DEFAULT_KEYWORD_SCORE = 2.0f;
static const double DEFAULT_COMBI_COEF = 0.8f;
static const double DEFAULT_COMBI_SCORE = 1.0f;
std::string getScorePoolNameByLocation(const std::string &location);
std::string getOtherScorePoolName();
ModelLoggingSettings getModelLoggingSettings();

View File

@@ -40,6 +40,7 @@
#include "WaapOpenRedirectPolicy.h"
#include "WaapErrorDisclosurePolicy.h"
#include <boost/algorithm/string.hpp>
#include <boost/regex.hpp>
#include "generic_rulebase/parameters_config.h"
#include <iostream>
#include "ParserDelimiter.h"
@@ -1390,6 +1391,20 @@ Waf2Transaction::findHtmlTagToInject(const char* data, int data_len, int& pos)
size_t tagHistPosCheck = m_tagHistPos;
for (size_t i=0; i < tagSize; ++i) {
if (tag[i] != ::tolower(m_tagHist[tagHistPosCheck])) {
if (i == tagSize - 1 && m_tagHist[tagHistPosCheck] == ' ') {
// match regex on head element with attributes
string dataStr = Waap::Util::charToString(data + pos, data_len - pos);
dataStr = dataStr.substr(0, dataStr.find('>')+1);
tagMatches = NGEN::Regex::regexMatch(
__FILE__,
__LINE__,
dataStr,
boost::regex("(?:\\s+[a-zA-Z_:][-a-zA-Z0-9_:.]*(?:\\s*=\\s*(\"[^\"]*\"|'[^']*'|[^\\s\"'>]*))?)*\\s*>")
);
pos += dataStr.length() - 1;
dbgTrace(D_WAAP_BOT_PROTECTION) << "matching head element with attributes: " << dataStr << ". match: " << tagMatches;
break;
}
tagMatches = false;
break;
}
@@ -1403,12 +1418,8 @@ Waf2Transaction::findHtmlTagToInject(const char* data, int data_len, int& pos)
}
}
if(!headFound)
{
return false;
}
return true;
dbgTrace(D_WAAP_BOT_PROTECTION) << "head element tag found: " << headFound;
return headFound;
}
void
@@ -1577,6 +1588,8 @@ Waf2Transaction::decideFinal(
dbgTrace(D_WAAP) << "Waf2Transaction::decideFinal(): got relevant API configuration from the I/S";
sitePolicy = &ngenAPIConfig;
m_overrideState = getOverrideState(sitePolicy);
// User limits
shouldBlock = (getUserLimitVerdict() == ngx_http_cp_verdict_e::TRAFFIC_VERDICT_DROP);
}
else if (WaapConfigApplication::getWaapSiteConfig(ngenSiteConfig)) {
@@ -2322,10 +2335,11 @@ bool Waf2Transaction::decideResponse()
bool
Waf2Transaction::reportScanResult(const Waf2ScanResult &res) {
if (get_ignoreScore() || (res.score >= SCORE_THRESHOLD &&
(m_scanResult == nullptr || res.score > m_scanResult->score)))
if ((get_ignoreScore() || res.score >= SCORE_THRESHOLD) &&
(m_scanResult == nullptr || res.score > m_scanResult->score))
{
// Forget any previous scan result and replace with new
dbgTrace(D_WAAP) << "Setting scan result. New score: " << res.score;
// Forget any previous scan result and replace wit, h new
delete m_scanResult;
m_scanResult = new Waf2ScanResult(res);
return true;

View File

@@ -952,6 +952,145 @@ string filterUTF7(const string& text) {
return result;
}
// Decides the status of a Base64 decoded string based on various parameters.
// @param decoded The decoded string.
// @param entropy The entropy of the original encoded string.
// @param decoded_entropy The entropy of the decoded string.
// @param spacer_count The number of spacer characters in the decoded string.
// @param nonPrintableCharsCount The count of non-printable characters in the decoded string.
// @param clear_on_error Flag indicating whether to clear the decoded string on error.
// @param terminatorCharsSeen The number of terminator characters seen.
// @param called_with_prefix Flag indicating if the function was called with a prefix.
// @return The status of the Base64 decoding process.
//
// Idea:
// Check if input chunk should be replaced by decoded, suspected to be checked both as encoded and decoded
// or cleaned as binary data. Additional case - define as not base64 encoded.
// - in case decoded size less 5 - return invalid
// - check entropy delta based on that base64 encoded data has higher entropy than decoded, usually delta = 0.25
// - this check should rize suspect but cannot work vice versa
// check if decoded chunk has more than 10% of non-printable characters - this is supect for binary data encoded
// - if no suspect for binary data and entropy is suspected, check empiric conditions to decide if this binary data
// or invalid decoding
// - if suspect for binary data, first check is we have entropy suspection
// - if entropy is suspected and chunk is short and it have more than 25% of nonprintables, return invalid
// since this is not base64 encoded data
// - if entropy is not suspected and chunk is short and it have more than 50% of nonprintables, return invalid
// since this is not base64 encoded data
// - if entropy is suspected and chunk size is between 64-1024, perform additional empiric test
// This test will define if returm value should be treated as suspected or as binary data(cleared)
base64_decode_status decideStatusBase64Decoded(
string& decoded,
double entropy,
double decoded_entropy,
size_t spacer_count,
size_t nonPrintableCharsCount,
bool clear_on_error,
double terminatorCharsSeen,
bool called_with_prefix
)
{
base64_decode_status tmp_status = B64_DECODE_OK;
if (entropy - decoded_entropy + terminatorCharsSeen < BASE64_ENTROPY_THRESHOLD_DELTA) {
dbgTrace(D_WAAP_BASE64)
<< "The chunk is under suspect to be base64,"
<< "use dual processing because entropy delta is too low";
tmp_status = B64_DECODE_SUSPECTED;
}
bool empiric_condition = false;
if (decoded.size() >= 5) {
if (spacer_count > 1) {
nonPrintableCharsCount = nonPrintableCharsCount - spacer_count + 1;
}
dbgTrace(D_WAAP_BASE64)
<< "(before test for unprintables): decoded.size="
<< decoded.size()
<< ", nonPrintableCharsCount="
<< nonPrintableCharsCount
<< ", clear_on_error="
<< clear_on_error
<< ", called_with_prefix="
<< called_with_prefix;
if (nonPrintableCharsCount * 10 < decoded.size()) {
dbgTrace(D_WAAP_BASE64)
<< "(decode/replace due to small amount of nonprintables): will decide based on entropy values";
} else { // more than 10% of non-printable characters
dbgTrace(D_WAAP_BASE64) << "large amount of nonporintables";
if (tmp_status == B64_DECODE_SUSPECTED) {
// entropy - decoded_entropy + terminatorCharsSeen < 0.25
if (decoded.size() < 16 && nonPrintableCharsCount * 4 > decoded.size()) {
decoded.clear();
return B64_DECODE_INVALID;
}
dbgTrace(D_WAAP_BASE64)
<< "(large amount of nonporintables + entropy suspect), check emprirics because decoded."
<< " terminatorCharsSeen="
<< terminatorCharsSeen;
// empiric test based on investigation of real payloads
empiric_condition = entropy < decoded_entropy
&& entropy > BASE64_ENTROPY_BASE_THRESHOLD
&& decoded_entropy > BASE64_ENTROPY_DECODED_THRESHOLD
&& !called_with_prefix
&& decoded.size() > BASE64_MIN_SIZE_LIMIT
&& decoded.size() < BASE64_MAX_SIZE_LIMIT
&& terminatorCharsSeen != 0;
if (!empiric_condition) {
if (clear_on_error) decoded.clear();
return B64_DECODE_SUSPECTED;
} else {
if (clear_on_error) decoded.clear();
tmp_status = B64_DECODE_OK;
}
} else { // entropy - decoded_entropy + terminatorCharsSeen >= 0.25
// one more empiric based on uT and real payloads
if (decoded.size() < 16
&& nonPrintableCharsCount * 2 > decoded.size()
&& terminatorCharsSeen == 0) {
decoded.clear();
return B64_DECODE_INVALID;
}
dbgTrace(D_WAAP_BASE64)
<< "(delete as binary content) because decoded. Return B64_DECODE_INCOMPLETE";
if (clear_on_error) decoded.clear();
return B64_DECODE_INCOMPLETE;
}
} // less than 10% of non-printable characters
dbgTrace(D_WAAP_BASE64)
<< "After handling unprintables checking status";
if (tmp_status == B64_DECODE_OK) {
dbgTrace(D_WAAP_BASE64) << "replacing with decoded data, return B64_DECODE_OK";
return B64_DECODE_OK;
} else { // tmp_status == B64_DECODE_SUSPECTED, entropy - decoded_entropy + terminatorCharsSeen < 0.25
dbgTrace(D_WAAP_BASE64) << "Suspected due to entropy, making empiric test";
// and one more empiric test based on investigation of real payloads
empiric_condition = entropy < decoded_entropy
&& entropy > BASE64_ENTROPY_BASE_THRESHOLD
&& decoded_entropy > BASE64_ENTROPY_DECODED_THRESHOLD
&& !called_with_prefix
&& decoded.size() > BASE64_MIN_SIZE_LIMIT
&& decoded.size() < BASE64_MAX_SIZE_LIMIT;
if (empiric_condition) {
dbgTrace(D_WAAP_BASE64) << "Empiric test failed, non-base64 chunk, return B64_DECODE_INVALID";
decoded.clear();
return B64_DECODE_INVALID;
}
dbgTrace(D_WAAP_BASE64) << "Empiric test passed, return B64_DECODE_SUSPECTED";
return B64_DECODE_SUSPECTED;
}
return B64_DECODE_OK; // successfully decoded. Returns decoded data in "decoded" parameter
}
// If decoded size is too small - leave the encoded value (return false)
decoded.clear(); // discard partial data
dbgTrace(D_WAAP_BASE64)
<< "(leave as-is) because decoded too small. decoded.size="
<< decoded.size();
return B64_DECODE_INVALID;
}
// Attempts to validate and decode base64-encoded chunk.
// Value is the full value inside which potential base64-encoded chunk was found,
// it and end point to start and end of that chunk.
@@ -980,18 +1119,28 @@ base64_decode_status decodeBase64Chunk(
uint32_t spacer_count = 0;
uint32_t length = end - it;
dbgTrace(D_WAAP) << "decodeBase64Chunk: value='" << value << "' match='" << string(it, end) << "'";
dbgTrace(D_WAAP)
<< "value='"
<< value
<< "' match='"
<< string(it, end)
<< "' clear_on_error='"
<< clear_on_error
<< "' called_with_prefix='"
<< called_with_prefix
<< "'";
string::const_iterator begin = it;
// The encoded data length (without the "base64," prefix) should be exactly divisible by 4
// len % 4 is not 0 i.e. this is not base64
if ((end - it) % 4 != 0) {
dbgTrace(D_WAAP_BASE64) <<
"b64DecodeChunk: (leave as-is) because encoded data length should be exactly divisible by 4.";
if ((end - it) % 4 == 1) {
dbgTrace(D_WAAP_BASE64)
<< "(leave as-is) because encoded data length should not be <4*x + 1>.";
return B64_DECODE_INVALID;
}
std::unordered_map<char, double> frequency;
std::unordered_map<char, double> original_occurences_counter;
std::unordered_map<char, double> decoded_occurences_counter;
while (it != end) {
unsigned char c = *it;
@@ -999,9 +1148,8 @@ base64_decode_status decodeBase64Chunk(
if (terminatorCharsSeen) {
// terminator characters must all be '=', until end of match.
if (c != '=') {
dbgTrace(D_WAAP_BASE64) <<
"decodeBase64Chunk: (leave as-is) because terminator characters must all be '='," <<
"until end of match.";
dbgTrace(D_WAAP_BASE64)
<< "(leave as-is) because terminator characters must all be '=' until end of match.";
return B64_DECODE_INVALID;
}
@@ -1009,13 +1157,13 @@ base64_decode_status decodeBase64Chunk(
terminatorCharsSeen++;
if (terminatorCharsSeen > 2) {
dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (leave as-is) because terminatorCharsSeen > 2";
dbgTrace(D_WAAP_BASE64) << "(leave as-is) because terminatorCharsSeen > 2";
return B64_DECODE_INVALID;
}
// allow for more terminator characters
it++;
frequency[c]++;
original_occurences_counter[c]++;
continue;
}
@@ -1040,12 +1188,18 @@ base64_decode_status decodeBase64Chunk(
// Start tracking terminator characters
terminatorCharsSeen++;
it++;
frequency[c]++;
original_occurences_counter[c]++;
continue;
}
else {
dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (leave as-is) because of non-base64 character ('" <<
c << "', ASCII " << (unsigned int)c << ", offset " << (it-begin) << ")";
dbgTrace(D_WAAP_BASE64)
<< "(leave as-is) because of non-base64 character ('"
<< c
<< "', ASCII "
<< (unsigned int)c
<< ", offset "
<< (it-begin)
<< ")";
return B64_DECODE_INVALID; // non-base64 character
}
@@ -1068,18 +1222,19 @@ base64_decode_status decodeBase64Chunk(
}
decoded += (char)code;
decoded_occurences_counter[(char)code]++;
}
it++;
frequency[c]++;
original_occurences_counter[c]++;
}
// end of encoded sequence decoded.
dbgTrace(D_WAAP_BASE64)
<< "decodeBase64Chunk: decoded.size="
<< "decoding done: decoded.size="
<< decoded.size()
<< ", nonPrintableCharsCount="
<< ", uncorrected nonPrintableCharsCount="
<< nonPrintableCharsCount
<< ", spacer_count = "
<< spacer_count
@@ -1088,56 +1243,42 @@ base64_decode_status decodeBase64Chunk(
<< "; decoded='"
<< decoded << "'";
// Check if entropy is correlates with b64 threshold (initially > 4.5)
if (!called_with_prefix) {
double entropy = 0;
double p = 0;
for (const auto& pair : frequency) {
p = pair.second / length;
entropy -= p * std::log2(p);
}
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: base entropy = " << entropy << "length = " << length;
// Add short payload factor
if (length < 16)
entropy = entropy * 16 / length;
// Enforce tailoring '=' characters
entropy+=terminatorCharsSeen;
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: corrected entropy = " << entropy << "length = " << length;
if (entropy <= base64_entropy_threshold) {
return B64_DECODE_INVALID;
}
double entropy = 0;
double p = 0;
double decoded_entropy = 0;
for (const auto& pair : original_occurences_counter) {
p = pair.second / length;
entropy -= p * std::log2(p);
}
// Return success only if decoded.size>=5 and there are less than 10% of non-printable
// characters in output.
if (decoded.size() >= 5) {
if (spacer_count > 1) {
nonPrintableCharsCount = nonPrintableCharsCount - spacer_count + 1;
}
if (nonPrintableCharsCount * 10 < decoded.size()) {
dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (decode/replace) decoded.size=" << decoded.size() <<
", nonPrintableCharsCount=" << nonPrintableCharsCount << ": replacing with decoded data";
}
else {
dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (delete) because decoded.size=" << decoded.size() <<
", nonPrintableCharsCount=" << nonPrintableCharsCount <<
", clear_on_error=" << clear_on_error;
if (clear_on_error) decoded.clear();
return B64_DECODE_INCOMPLETE;
}
dbgTrace(D_WAAP_BASE64) << "returning true: successfully decoded."
<< " Returns decoded data in \"decoded\" parameter";
return B64_DECODE_OK; // successfully decoded. Returns decoded data in "decoded" parameter
for (const auto &pair : decoded_occurences_counter) {
p = pair.second / decoded.size();
decoded_entropy -= p * std::log2(p);
}
dbgTrace(D_WAAP_BASE64)
<< "Base entropy = "
<< entropy
<< " Decoded_entropy = "
<< decoded_entropy
<< "length = "
<< length;
base64_decode_status return_status = decideStatusBase64Decoded(
decoded,
entropy,
decoded_entropy,
spacer_count,
nonPrintableCharsCount,
clear_on_error,
terminatorCharsSeen,
called_with_prefix
);
dbgTrace(D_WAAP_BASE64)
<< "After decideStatusBase64Decoded return_status="
<< return_status;
return return_status;
// If decoded size is too small - leave the encoded value (return false)
decoded.clear(); // discard partial data
dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (leave as-is) because decoded too small. decoded.size=" <<
decoded.size() <<
", nonPrintableCharsCount=" << nonPrintableCharsCount <<
", clear_on_error=" << clear_on_error;
return B64_DECODE_INVALID;
}
// Attempts to detect and validate base64 chunk.
@@ -1180,8 +1321,9 @@ b64DecodeChunk(
return false;
}
}
return decodeBase64Chunk(value, it, end, decoded) != B64_DECODE_INVALID;
base64_decode_status status = decodeBase64Chunk(value, it, end, decoded);
dbgTrace(D_WAAP_BASE64) << "b64DecodeChunk: status = " << status;
return status != B64_DECODE_INVALID;
}
vector<string> split(const string& s, char delim) {
@@ -1281,6 +1423,7 @@ static void b64TestChunk(const string &s,
int &deletedCount,
string &outStr)
{
dbgTrace(D_WAAP_BASE64) << " ===b64TestChunk===: starting with = '" << s << "'";
size_t chunkLen = (chunkEnd - chunkStart);
if ((chunkEnd - chunkStart) > static_cast<int>(b64_prefix.size()) &&
@@ -1289,11 +1432,9 @@ static void b64TestChunk(const string &s,
chunkLen -= b64_prefix.size();
}
size_t chunkRem = chunkLen % 4;
// Only match chunk whose length is divisible by 4
string repl;
if (chunkRem == 0 && cb(s, chunkStart, chunkEnd, repl)) {
dbgTrace(D_WAAP_BASE64) << " ===b64TestChunk===: chunkLen = " << chunkLen;
if (cb(s, chunkStart, chunkEnd, repl)) {
// Succesfully matched b64 chunk
if (!repl.empty()) {
outStr += repl;
@@ -1340,9 +1481,7 @@ bool detectBase64Chunk(
dbgTrace(D_WAAP_BASE64) << " ===detectBase64Chunk===: isB64AlphaChar = true, '" << *it << "'";
start = it;
end = s.end();
if ((end - start) % 4 == 0) {
return true;
}
return true;
}
// non base64 before supposed chunk - will not process
return false;
@@ -1381,17 +1520,31 @@ bool isBase64PrefixProcessingOK (
if (detectBase64Chunk(s, start, end)) {
dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: chunk detected";
if ((start != s.end()) && (end == s.end())) {
dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: chunk detected but not complete";
retVal = processDecodedChunk(s, start, end, value, binaryFileType, true);
dbgTrace(D_WAAP_BASE64)
<< " ===isBase64PrefixProcessingOK===: after processDecodedChunk retVal = "
<< retVal
<< " binaryFileType = "
<< binaryFileType;
}
} else if (start != s.end()) {
dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: chunk not detected."
" searching for known file header only";
dbgTrace(D_WAAP_BASE64)
<< " ===isBase64PrefixProcessingOK===: chunk not detected. searching for known file header only";
end = (start + MAX_HEADER_LOOKUP < s.end()) ? start + MAX_HEADER_LOOKUP : s.end();
processDecodedChunk(s, start, end, value, binaryFileType);
value.clear();
dbgTrace(D_WAAP_BASE64)
<< " ===isBase64PrefixProcessingOK===: after processDecodedChunk binaryFileType = "
<< binaryFileType;
return binaryFileType != Waap::Util::BinaryFileType::FILE_TYPE_NONE;
}
}
dbgTrace(D_WAAP_BASE64)
<< " ===isBase64PrefixProcessingOK===: retVal = "
<< retVal
<< " binaryFileType = "
<< binaryFileType;
return retVal != B64_DECODE_INVALID;
}
@@ -1399,23 +1552,31 @@ base64_variants b64Test (
const string &s,
string &key,
string &value,
BinaryFileType &binaryFileType)
BinaryFileType &binaryFileType,
const size_t offset)
{
key.clear();
bool retVal;
binaryFileType = Waap::Util::BinaryFileType::FILE_TYPE_NONE;
auto begin = s.begin() + offset;
dbgTrace(D_WAAP_BASE64)
<< " ===b64Test===: string = "
<< s
<< " key = "
<< key
<< " value = "
<< value
<< " offset = "
<< offset;
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: string = " << s
<< " key = " << key << " value = " << value;
// Minimal length
if (s.size() < 8) {
if (s.size() < 8 + offset) {
return CONTINUE_AS_IS;
}
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: minimal lenght test passed";
std::string prefix_decoded_val;
string::const_iterator it = s.begin();
auto it = begin;
// 1st check if we have key candidate
if (base64_key_value_detector_re.hasMatch(s)) {
@@ -1433,7 +1594,7 @@ base64_variants b64Test (
break;
case EQUAL:
if (*it == '=') {
it = s.begin();
it = begin;
state=MISDETECT;
continue;
}
@@ -1455,7 +1616,7 @@ base64_variants b64Test (
if (it == s.end() || state == MISDETECT) {
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: detected *it = s.end()" << *it;
if (key.size() > 0) {
it = s.begin();
it = begin;
key.clear();
}
} else {
@@ -1479,7 +1640,7 @@ base64_variants b64Test (
}
}
string::const_iterator start = s.end();
auto start = s.end();
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: B64 itself = " << *it << " =======";
bool isB64AlphaChar = Waap::Util::isAlphaAsciiFast(*it) || isdigit(*it) || *it=='/' || *it=='+';
if (isB64AlphaChar) {
@@ -1487,11 +1648,6 @@ base64_variants b64Test (
dbgTrace(D_WAAP_BASE64) <<
" ===b64Test===: Start tracking potential b64 chunk = " << *it << " =======";
start = it;
if ((s.end() - start) % 4 != 0) {
key.clear();
value.clear();
return CONTINUE_AS_IS;
}
}
else {
dbgTrace(D_WAAP_BASE64) <<
@@ -1512,17 +1668,37 @@ base64_variants b64Test (
key.pop_back();
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: FINAL key = '" << key << "'";
}
retVal = decodeBase64Chunk(s, start, s.end(), value) != B64_DECODE_INVALID;
base64_decode_status decode_chunk_status = decodeBase64Chunk(s, start, s.end(), value);
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: After testing and conversion value = "
<< value << "retVal = '" << retVal <<"'";
if (!retVal) {
dbgTrace(D_WAAP_BASE64)
<< " ===b64Test===: After testing and conversion value = "
<< value
<< "decode_chunk_status = '"
<< decode_chunk_status
<<"'";
if (decode_chunk_status == B64_DECODE_INVALID) {
key.clear();
value.clear();
return CONTINUE_AS_IS;
}
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: After tpassed retVal check = "
<< value << "retVal = '" << retVal <<"'" << "key = '" << key << "'";
if (decode_chunk_status == B64_DECODE_INCOMPLETE) {
value.clear();
}
if (decode_chunk_status == B64_DECODE_SUSPECTED) {
return CONTINUE_DUAL_SCAN;
}
dbgTrace(D_WAAP_BASE64)
<< " ===b64Test===: After tpassed retVal check = "
<< value
<< "decode_chunk_status = '"
<< decode_chunk_status
<<"'"
<< "key = '"
<< key
<< "'";
if (key.empty()) {
return SINGLE_B64_CHUNK_CONVERT;
} else {
@@ -1548,7 +1724,7 @@ void b64Decode(
deletedCount = 0;
outStr = "";
int offsetFix = 0;
dbgTrace(D_WAAP_BASE64) << " ===b64Decode===: starting with = '" << s << "'";
string::const_iterator it = s.begin();
// Minimal length
@@ -1596,6 +1772,11 @@ void b64Decode(
}
// Decode and add chunk
dbgTrace(D_WAAP_BASE64)
<< " ===b64Decode===: chunkStart = "
<< *chunkStart
<< " it = "
<< *it;
b64TestChunk(s, chunkStart, it, cb, decodedCount, deletedCount, outStr);
// stop tracking b64 chunk
@@ -1607,6 +1788,7 @@ void b64Decode(
}
if (chunkStart != s.end()) {
dbgTrace(D_WAAP_BASE64) << " ===b64Decode===: chunkStart = " << *chunkStart;
b64TestChunk(s, chunkStart, it, cb, decodedCount, deletedCount, outStr);
}
}

View File

@@ -32,9 +32,15 @@
#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
enum base64_variants {SINGLE_B64_CHUNK_CONVERT, KEY_VALUE_B64_PAIR, CONTINUE_AS_IS};
enum base64_variants {SINGLE_B64_CHUNK_CONVERT, KEY_VALUE_B64_PAIR, CONTINUE_AS_IS, CONTINUE_DUAL_SCAN};
enum base64_stage {BEFORE_EQUAL, EQUAL, DONE, MISDETECT};
enum base64_decode_status {B64_DECODE_INVALID, B64_DECODE_OK, B64_DECODE_INCOMPLETE};
enum base64_decode_status {B64_DECODE_INVALID, B64_DECODE_OK, B64_DECODE_INCOMPLETE, B64_DECODE_SUSPECTED};
#define BASE64_ENTROPY_BASE_THRESHOLD 5.0
#define BASE64_ENTROPY_DECODED_THRESHOLD 5.4
#define BASE64_ENTROPY_THRESHOLD_DELTA 0.25
#define BASE64_MIN_SIZE_LIMIT 16
#define BASE64_MAX_SIZE_LIMIT 1024
// This is portable version of stricmp(), which is non-standard function (not even in C).
// Contrary to stricmp(), for a slight optimization, s2 is ASSUMED to be already in lowercase.
@@ -865,6 +871,17 @@ void unescapeUnicode(std::string &text);
// Try to find and decode UTF7 chunks
std::string filterUTF7(const std::string &text);
base64_decode_status
decideStatusBase64Decoded(
std::string& decoded,
double entropy,
double decoded_entropy,
size_t spacer_count,
size_t nonPrintableCharsCount,
bool clear_on_error,
double terminatorCharsSeen,
bool called_with_prefix);
base64_decode_status
decodeBase64Chunk(
const std::string &value,
@@ -926,7 +943,8 @@ namespace Util {
const std::string &s,
std::string &key,
std::string &value,
BinaryFileType &binaryFileType);
BinaryFileType &binaryFileType,
size_t offset = 0);
// The original stdlib implementation of isalpha() supports locale settings which we do not really need.
// It is also proven to contribute to slow performance in some of the algorithms using it.

View File

@@ -19,6 +19,7 @@
#include "table_opaque.h"
#include "i_transaction.h"
#include "waap_clib/DeepAnalyzer.h"
#include "waap_clib/WaapModelResultLogger.h"
#include "waap_clib/WaapAssetState.h"
#include "waap_clib/WaapAssetStatesManager.h"
#include "reputation_features_agg.h"