sync code

2025-11-16 09:21:54 +03:00 · 2025-02-27 16:03:28 +00:00
parent bb35eaf657
commit cd020a7ddd
28 changed files with 625 additions and 192 deletions
--- a/components/security_apps/waap/waap_clib/DeepParser.cc
+++ b/components/security_apps/waap/waap_clib/DeepParser.cc
@@ -113,6 +113,9 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
        << parser_depth
        << " v_len = "
        << v_len;
+
+    dbgTrace(D_WAAP_DEEP_PARSER) << m_key;
+
    // Decide whether to push/pop the value in the keystack.
    bool shouldUpdateKeyStack = (flags & BUFFERED_RECEIVER_F_UNNAMED) == 0;

@@ -275,13 +278,23 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
    // Detect and decode potential base64 chunks in the value before further processing

    bool base64ParamFound = false;
+    size_t base64_offset = 0;
    Waap::Util::BinaryFileType base64BinaryFileType = Waap::Util::BinaryFileType::FILE_TYPE_NONE;
    if (m_depth == 1 && flags == BUFFERED_RECEIVER_F_MIDDLE && m_key.depth() == 1 && m_key.first() != "#base64"){
        dbgTrace(D_WAAP_DEEP_PARSER) << " === will not check base64 since prev data block was not b64-encoded ===";
    } else {
        dbgTrace(D_WAAP_DEEP_PARSER) << " ===Processing potential base64===";
+        if (isUrlPayload && m_depth == 1 && cur_val[0] == '/') {
+            dbgTrace(D_WAAP_DEEP_PARSER) << "removing leading '/' from URL param value";
+            base64_offset = 1;
+        }
        std::string decoded_val, decoded_key;
-        base64_variants base64_status = Waap::Util::b64Test(cur_val, decoded_key, decoded_val, base64BinaryFileType);
+        base64_variants base64_status = Waap::Util::b64Test(
+            cur_val,
+            decoded_key,
+            decoded_val,
+            base64BinaryFileType,
+            base64_offset);

        dbgTrace(D_WAAP_DEEP_PARSER)
            << " status = "
@@ -289,16 +302,50 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
            << " key = "
            << decoded_key
            << " value = "
-            << decoded_val;
+            << decoded_val
+            << "m_depth = "
+            << m_depth;

        switch (base64_status) {
            case SINGLE_B64_CHUNK_CONVERT:
-                cur_val = decoded_val;
+                if (base64_offset) {
+                    cur_val = "/" + decoded_val;
+                } else {
+                    cur_val = decoded_val;
+                }
                base64ParamFound = true;
                break;
+            case CONTINUE_DUAL_SCAN:
+                if (decoded_val.size() > 0) {
+                    decoded_key = "#base64";
+                    base64ParamFound = false;
+                    if (base64_offset) {
+                        decoded_val = "/" + decoded_val;
+                    }
+                    dbgTrace(D_WAAP_DEEP_PARSER) << m_key;
+                    rc = onKv(
+                        decoded_key.c_str(),
+                        decoded_key.size(),
+                        decoded_val.data(),
+                        decoded_val.size(),
+                        flags,
+                        parser_depth
+                    );
+                    dbgTrace(D_WAAP_DEEP_PARSER) << "After call to onKv with suspected value rc = " << rc;
+                    dbgTrace(D_WAAP_DEEP_PARSER) << m_key;
+                    break;
+                } else {
+                    dbgTrace(D_WAAP) << "base64 decode suspected and empty value. Skipping.";
+                    base64ParamFound = false;
+                    break;
+                }
+                break;
            case KEY_VALUE_B64_PAIR:
                // going deep with new pair in case value is not empty
                if (decoded_val.size() > 0) {
+                    if (base64_offset) {
+                        decoded_key = "/" + decoded_key;
+                    }
                    cur_val = decoded_val;
                    base64ParamFound = true;
                    rc = onKv(
@@ -309,9 +356,13 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
                        flags,
                        parser_depth
                    );
-
-                    dbgTrace(D_WAAP_DEEP_PARSER) << " rc = " << rc;
+                    dbgTrace(D_WAAP_DEEP_PARSER) << "After call to onKv with suspected value rc = " << rc;
+                    dbgTrace(D_WAAP_DEEP_PARSER) << m_key;
                    if (rc != CONTINUE_PARSING) {
+                        if (shouldUpdateKeyStack) {
+                            m_key.pop("deep parser key");
+                        }
+                        m_depth--;
                        return rc;
                    }
                }
@@ -323,7 +374,7 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
        }

        if (base64ParamFound) {
-            dbgTrace(D_WAAP_DEEP_PARSER) << "DeepParser::onKv(): pushing #base64 prefix to the key.";
+            dbgTrace(D_WAAP_DEEP_PARSER) << "pushing #base64 prefix to the key.";
            m_key.push("#base64", 7, false);
        }
    }
@@ -437,7 +488,6 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
            if (shouldUpdateKeyStack) {
                m_key.pop("deep parser key");
            }
-
            m_depth--;
            return rc;
        }
@@ -587,7 +637,6 @@ DeepParser::parseBuffer(
        if (shouldUpdateKeyStack) {
            m_key.pop("deep parser key");
        }
-
        m_depth--;
        return DONE_PARSING;
    }
@@ -909,7 +958,6 @@ DeepParser::parseAfterMisleadingMultipartBoundaryCleaned(
            return rc;
        }
    }
-
    return rc;
 }

@@ -1081,7 +1129,7 @@ DeepParser::createInternalParser(
        << " isBodyPayload = "
        << isBodyPayload;
    //Detect sensor_data format in body and just use dedicated filter for it
-    if (m_depth == 1
+    if ((m_depth == 1)
        && isBodyPayload
        && Waap::Util::detectKnownSource(cur_val) ==  Waap::Util::SOURCE_TYPE_SENSOR_DATA) {
        m_parsersDeque.push_back(
--- a/components/security_apps/waap/waap_clib/KeyStack.cc
+++ b/components/security_apps/waap/waap_clib/KeyStack.cc
@@ -37,14 +37,24 @@ void KeyStack::push(const char* subkey, size_t subkeySize, bool countDepth) {
        m_nameDepth++;
    }

-    dbgTrace(D_WAAP) << "KeyStack(" << m_name << ")::push(): '" << std::string(subkey, subkeySize) <<
-        "' => full_key='" << std::string(m_key.data(), m_key.size()) << "'";
+    dbgTrace(D_WAAP)
+        << "KeyStack("
+        << m_name
+        << ")::push(): '"
+        << std::string(subkey, subkeySize)
+        << "' => full_key='"
+        << std::string(m_key.data(), m_key.size())
+        << "'";
 }

 void KeyStack::pop(const char* log, bool countDepth) {
    // Keep depth balanced even if m_key[] buffer is full
    if (m_key.empty() || m_stack.empty()) {
-        dbgDebug(D_WAAP) << "KeyStack(" << m_name << ")::pop(): [ERROR] ATTEMPT TO POP FROM EMPTY KEY STACK! " << log;
+        dbgDebug(D_WAAP)
+            << "KeyStack("
+            << m_name
+            << ")::pop(): [ERROR] ATTEMPT TO POP FROM EMPTY KEY STACK! "
+            << log;
        return;
    }

@@ -55,6 +65,22 @@ void KeyStack::pop(const char* log, bool countDepth) {
    // Remove last subkey.
    m_key.erase(m_stack.back());
    m_stack.pop_back();
-    dbgTrace(D_WAAP) << "KeyStack(" << m_name << ")::pop(): full_key='" <<
-        std::string(m_key.data(), (int)m_key.size()) << "': pop_key=" << log << "'";
+    dbgTrace(D_WAAP)
+        << "KeyStack("
+        << m_name
+        << ")::pop(): full_key='"
+        << std::string(m_key.data(), (int)m_key.size())
+        << "': pop_key="
+        << log
+        << "'";
+}
+
+void KeyStack::print(std::ostream &os) const
+{
+    os
+        << "KeyStack("
+        << m_name
+        << ")::show(): full_key='"
+        << std::string(m_key.data(), (int)m_key.size())
+        << "'";
 }
--- a/components/security_apps/waap/waap_clib/KeyStack.h
+++ b/components/security_apps/waap/waap_clib/KeyStack.h
@@ -28,6 +28,7 @@ public:
    void pop(const char* log, bool countDepth=true);
    bool empty() const { return m_key.empty(); }
    void clear() { m_key.clear(); m_stack.clear(); }
+    void print(std::ostream &os) const;
    size_t depth() const { return m_nameDepth; }
    size_t size() const {
        return str().size();
--- a/components/security_apps/waap/waap_clib/ParserBase.cc
+++ b/components/security_apps/waap/waap_clib/ParserBase.cc
@@ -111,8 +111,7 @@ int BufferedReceiver::onKvDone()
    // This must be called even if m_value is empty in order to signal the BUFFERED_RECEIVER_F_LAST flag to the
    // receiver!
    dbgTrace(D_WAAP_PARSER)
-        << " Call onKv on the remainder of the buffer not yet pushed to the receiver "
-        << "calling onKv()";
+        << " Call onKv on the remainder of the buffer not yet pushed to the receiver calling onKv()";
    int rc = onKv(m_key.data(), m_key.size(), m_value.data(), m_value.size(), m_flags, m_parser_depth);

    // Reset the object's state to allow reuse for other parsers
--- a/components/security_apps/waap/waap_clib/ParserPDF.cc
+++ b/components/security_apps/waap/waap_clib/ParserPDF.cc
@@ -21,6 +21,7 @@ USE_DEBUG_FLAG(D_WAAP);

 const std::string ParserPDF::m_parserName = "ParserPDF";
 const char* PDF_TAIL = "%%EOF";
+const size_t PDF_TAIL_LEN = 5;

 ParserPDF::ParserPDF(
    IParserStreamReceiver &receiver,
@@ -44,16 +45,21 @@ ParserPDF::push(const char *buf, size_t len)
        << "' len="
        << len;

-    const char *c;
-
    if (m_state == s_error) {
        return 0;
    }
-    if (len == 0)
-    {
-        dbgTrace(D_WAAP_PARSER_PDF) << "ParserPDF::push(): end of stream. m_state=" << m_state;

-        if (m_state == s_end) {
+    if (len == 0) {
+        dbgTrace(D_WAAP_PARSER_PDF) << "ParserPDF::push(): end of stream. m_state=" << m_state;
+        if (m_state == s_body && m_tailOffset >= PDF_TAIL_LEN) {
+            if (m_receiver.onKey("PDF", 3) != 0) {
+                m_state = s_error;
+                return 0;
+            }
+            if (m_receiver.onValue("", 0) != 0) {
+                m_state = s_error;
+                return 0;
+            }
            m_receiver.onKvDone();
        } else {
            m_state = s_error;
@@ -61,38 +67,43 @@ ParserPDF::push(const char *buf, size_t len)
        return 0;
    }

+    size_t start = (len > MAX_PDF_TAIL_LOOKUP) ? len - MAX_PDF_TAIL_LOOKUP : 0;
    switch (m_state) {
            case s_start:
                m_state = s_body;
                CP_FALL_THROUGH;
            case s_body:
-                {
-                    size_t tail_lookup_offset = (len > MAX_PDF_TAIL_LOOKUP) ? len - MAX_PDF_TAIL_LOOKUP : 0;
-                    c = strstr(buf + tail_lookup_offset, PDF_TAIL);
+                for (size_t i = start; i < len; i++) {
                    dbgTrace(D_WAAP_PARSER_PDF)
-                        << "string to search: " << std::string(buf + tail_lookup_offset)
-                        << " c=" << c;
-                    if (c) {
-                        m_state = s_end;
-                        CP_FALL_THROUGH;
+                        << "ParserPDF::push(): m_tailOffset="
+                        << m_tailOffset
+                        << " buf[i]="
+                        << buf[i];
+                    if (m_tailOffset  <= PDF_TAIL_LEN - 1) {
+                        if (buf[i] == PDF_TAIL[m_tailOffset]) {
+                            m_tailOffset++;
+                        } else {
+                            m_tailOffset = 0;
+                        }
                    } else {
-                        break;
+                        if (buf[i] == '\r' || buf[i] == '\n' || buf[i] == ' ' || buf[i] == 0) {
+                            m_tailOffset++;
+                        } else {
+                            m_tailOffset = 0;
+                            i--;
+                        }
                    }
                }
-            case s_end:
-                if (m_receiver.onKey("PDF", 3) != 0) {
-                    m_state = s_error;
-                    return 0;
-                }
-                if (m_receiver.onValue("", 0) != 0) {
-                    m_state = s_error;
-                    return 0;
-                }
+                dbgTrace(D_WAAP_PARSER_PDF)
+                    << "ParserPDF::push()->s_body: m_tailOffset="
+                    << m_tailOffset;
                break;
            case s_error:
                break;
            default:
-                dbgTrace(D_WAAP_PARSER_PDF) << "ParserPDF::push(): unknown state: " << m_state;
+                dbgTrace(D_WAAP_PARSER_PDF)
+                    << "ParserPDF::push(): unknown state: "
+                    << m_state;
                m_state = s_error;
                return 0;
    }
--- a/components/security_apps/waap/waap_clib/ParserPDF.h
+++ b/components/security_apps/waap/waap_clib/ParserPDF.h
@@ -34,7 +34,6 @@ private:
    enum state {
        s_start,
        s_body,
-        s_end,
        s_error
    };

@@ -42,6 +41,7 @@ private:
    enum state m_state;
    static const std::string m_parserName;
    size_t m_parser_depth;
+    size_t m_tailOffset = 0;
 };

 #endif // __PARSER_PDF_H__
--- a/components/security_apps/waap/waap_clib/Serializator.cc
+++ b/components/security_apps/waap/waap_clib/Serializator.cc
@@ -617,6 +617,17 @@ void SerializeToLocalAndRemoteSyncBase::setInterval(ch::seconds newInterval)

 bool SerializeToLocalAndRemoteSyncBase::localSyncAndProcess()
 {
+    bool isBackupSyncEnabled = getProfileAgentSettingWithDefault<bool>(
+        true,
+        "appsecLearningSettings.backupLocalSync");
+
+    if (!isBackupSyncEnabled) {
+        dbgInfo(D_WAAP_CONFIDENCE_CALCULATOR) << "Local sync is disabled";
+        processData();
+        saveData();
+        return true;
+    }
+
    RemoteFilesList rawDataFiles;

    dbgTrace(D_WAAP_CONFIDENCE_CALCULATOR) << "Getting files of all agents";
--- a/components/security_apps/waap/waap_clib/WaapScores.cc
+++ b/components/security_apps/waap/waap_clib/WaapScores.cc
@@ -97,7 +97,9 @@ calcIndividualKeywords(
    std::sort(keywords.begin(), keywords.end());

    for (auto pKeyword = keywords.begin(); pKeyword != keywords.end(); ++pKeyword) {
-        addKeywordScore(scoreBuilder, poolName, *pKeyword, 2.0f, 0.3f, scoresArray, coefArray);
+        addKeywordScore(
+            scoreBuilder, poolName, *pKeyword, DEFAULT_KEYWORD_SCORE, DEFAULT_KEYWORD_COEF, scoresArray, coefArray
+        );
    }
 }

@@ -112,8 +114,6 @@ calcCombinations(
    std::vector<std::string>& keyword_combinations)
 {
    keyword_combinations.clear();
-    static const double max_combi_score = 1.0f;
-    double default_coef = 0.8f;

    for (size_t i = 0; i < keyword_matches.size(); ++i) {
        std::vector<std::string> combinations;
@@ -137,8 +137,10 @@ calcCombinations(
                default_score += scoreBuilder.getSnapshotKeywordScore(*it, 0.0f, poolName);
            }
            // set default combination score to be the sum of its keywords, bounded by 1
-            default_score = std::min(default_score, max_combi_score);
-            addKeywordScore(scoreBuilder, poolName, combination, default_score, default_coef, scoresArray, coefArray);
+            default_score = std::min(default_score, DEFAULT_COMBI_SCORE);
+            addKeywordScore(
+                scoreBuilder, poolName, combination, default_score, DEFAULT_COMBI_COEF, scoresArray, coefArray
+            );
            keyword_combinations.push_back(combination);
        }
    }
@@ -155,7 +157,7 @@ calcArrayScore(std::vector<double>& scoreArray)
                                                    // *pScore is always positive and there's a +10 offset
        score = 10.0f - left * 10.0f / divisor;
    }
-    dbgTrace(D_WAAP_SCORE_BUILDER) << "calculated score: " << score;
+    dbgDebug(D_WAAP_SCORE_BUILDER) << "calculated score: " << score;
    return score;
 }

@@ -171,7 +173,9 @@ calcLogisticRegressionScore(std::vector<double> &coefArray, double intercept, do
    }
    // Apply the expit function to the log-odds to obtain the probability,
    // and multiply by 10 to obtain a 'score' in the range [0, 10]
-    return 1.0f / (1.0f + exp(-log_odds)) * 10.0f;
+    double score = 1.0f / (1.0f + exp(-log_odds)) * 10.0f;
+    dbgDebug(D_WAAP_SCORE_BUILDER) << "calculated score (log_odds): " << score << " (" << log_odds << ")";
+    return score;
 }

 }
--- a/components/security_apps/waap/waap_clib/WaapScores.h
+++ b/components/security_apps/waap/waap_clib/WaapScores.h
@@ -32,6 +32,11 @@ struct ModelLoggingSettings {
    bool logToStream;
 };

+static const double DEFAULT_KEYWORD_COEF = 0.3f;
+static const double DEFAULT_KEYWORD_SCORE = 2.0f;
+static const double DEFAULT_COMBI_COEF = 0.8f;
+static const double DEFAULT_COMBI_SCORE = 1.0f;
+
 std::string getScorePoolNameByLocation(const std::string &location);
 std::string getOtherScorePoolName();
 ModelLoggingSettings getModelLoggingSettings();
--- a/components/security_apps/waap/waap_clib/Waf2Engine.cc
+++ b/components/security_apps/waap/waap_clib/Waf2Engine.cc
@@ -40,6 +40,7 @@
 #include "WaapOpenRedirectPolicy.h"
 #include "WaapErrorDisclosurePolicy.h"
 #include <boost/algorithm/string.hpp>
+#include <boost/regex.hpp>
 #include "generic_rulebase/parameters_config.h"
 #include <iostream>
 #include "ParserDelimiter.h"
@@ -1390,6 +1391,20 @@ Waf2Transaction::findHtmlTagToInject(const char* data, int data_len, int& pos)
        size_t tagHistPosCheck = m_tagHistPos;
        for (size_t i=0; i < tagSize; ++i) {
            if (tag[i] != ::tolower(m_tagHist[tagHistPosCheck])) {
+                if (i == tagSize - 1 && m_tagHist[tagHistPosCheck] == ' ') {
+                    // match regex on head element with attributes
+                    string dataStr = Waap::Util::charToString(data + pos, data_len - pos);
+                    dataStr = dataStr.substr(0, dataStr.find('>')+1);
+                    tagMatches = NGEN::Regex::regexMatch(
+                        __FILE__,
+                        __LINE__,
+                        dataStr,
+                        boost::regex("(?:\\s+[a-zA-Z_:][-a-zA-Z0-9_:.]*(?:\\s*=\\s*(\"[^\"]*\"|'[^']*'|[^\\s\"'>]*))?)*\\s*>")
+                        );
+                        pos += dataStr.length() - 1;
+                        dbgTrace(D_WAAP_BOT_PROTECTION) << "matching head element with attributes: " << dataStr << ". match: " << tagMatches;
+                    break;
+                }
                tagMatches = false;
                break;
            }
@@ -1403,12 +1418,8 @@ Waf2Transaction::findHtmlTagToInject(const char* data, int data_len, int& pos)
        }
    }

-    if(!headFound)
-    {
-        return false;
-    }
-
-    return true;
+    dbgTrace(D_WAAP_BOT_PROTECTION) << "head element tag found: " << headFound;
+    return headFound;
 }

 void
@@ -1577,6 +1588,8 @@ Waf2Transaction::decideFinal(
        dbgTrace(D_WAAP) << "Waf2Transaction::decideFinal(): got relevant API configuration from the I/S";
        sitePolicy = &ngenAPIConfig;
        m_overrideState = getOverrideState(sitePolicy);
+
+        // User limits
        shouldBlock = (getUserLimitVerdict() == ngx_http_cp_verdict_e::TRAFFIC_VERDICT_DROP);
    }
    else if (WaapConfigApplication::getWaapSiteConfig(ngenSiteConfig)) {
@@ -2322,10 +2335,11 @@ bool Waf2Transaction::decideResponse()

 bool
 Waf2Transaction::reportScanResult(const Waf2ScanResult &res) {
-    if (get_ignoreScore() || (res.score >= SCORE_THRESHOLD &&
-        (m_scanResult == nullptr || res.score > m_scanResult->score)))
+    if ((get_ignoreScore() || res.score >= SCORE_THRESHOLD) &&
+        (m_scanResult == nullptr || res.score > m_scanResult->score))
    {
-        // Forget any previous scan result and replace with new
+        dbgTrace(D_WAAP) << "Setting scan result. New score: " << res.score;
+        // Forget any previous scan result and replace wit, h new
        delete m_scanResult;
        m_scanResult = new Waf2ScanResult(res);
        return true;
--- a/components/security_apps/waap/waap_clib/Waf2Util.cc
+++ b/components/security_apps/waap/waap_clib/Waf2Util.cc
@@ -952,6 +952,145 @@ string filterUTF7(const string& text) {
    return result;
 }

+//  Decides the status of a Base64 decoded string based on various parameters.
+//  @param decoded The decoded string.
+//  @param entropy The entropy of the original encoded string.
+//  @param decoded_entropy The entropy of the decoded string.
+//  @param spacer_count The number of spacer characters in the decoded string.
+//  @param nonPrintableCharsCount The count of non-printable characters in the decoded string.
+//  @param clear_on_error Flag indicating whether to clear the decoded string on error.
+//  @param terminatorCharsSeen The number of terminator characters seen.
+//  @param called_with_prefix Flag indicating if the function was called with a prefix.
+//  @return The status of the Base64 decoding process.
+//
+//  Idea:
+//  Check if input chunk should be replaced by decoded, suspected to be checked both as encoded and decoded
+//  or cleaned as binary data. Additional case - define as not base64 encoded.
+//  - in case decoded size less 5 - return invalid
+//  - check entropy delta based on that base64 encoded data has higher entropy than decoded, usually delta = 0.25
+//  - this check should rize suspect but cannot work vice versa
+//  check if decoded chunk has more than 10% of non-printable characters - this is supect for binary data encoded
+//   - if no suspect for binary data and entropy is suspected, check empiric conditions to decide if this binary data
+//     or invalid decoding
+//   - if suspect for binary data, first check is we have entropy suspection
+//     - if entropy is suspected and chunk is short and it have more than 25% of nonprintables, return invalid
+//       since this is not base64 encoded data
+//     - if entropy is not suspected and chunk is short and it have more than 50% of nonprintables, return invalid
+//       since this is not base64 encoded data
+//     - if entropy is suspected and chunk size is between 64-1024, perform additional empiric test
+//       This test will define if  returm value should be treated as suspected or as binary data(cleared)
+
+base64_decode_status decideStatusBase64Decoded(
+    string& decoded,
+    double entropy,
+    double decoded_entropy,
+    size_t spacer_count,
+    size_t nonPrintableCharsCount,
+    bool clear_on_error,
+    double terminatorCharsSeen,
+    bool called_with_prefix
+)
+{
+    base64_decode_status tmp_status = B64_DECODE_OK;
+    if (entropy - decoded_entropy + terminatorCharsSeen < BASE64_ENTROPY_THRESHOLD_DELTA) {
+        dbgTrace(D_WAAP_BASE64)
+            << "The chunk is under suspect to be base64,"
+            << "use dual processing because entropy delta is too low";
+        tmp_status = B64_DECODE_SUSPECTED;
+    }
+
+    bool empiric_condition = false;
+    if (decoded.size() >= 5) {
+        if (spacer_count > 1) {
+            nonPrintableCharsCount = nonPrintableCharsCount - spacer_count + 1;
+        }
+        dbgTrace(D_WAAP_BASE64)
+                << "(before test for unprintables):  decoded.size="
+                << decoded.size()
+                << ", nonPrintableCharsCount="
+                << nonPrintableCharsCount
+                << ", clear_on_error="
+                << clear_on_error
+                << ", called_with_prefix="
+                << called_with_prefix;
+        if (nonPrintableCharsCount * 10 < decoded.size()) {
+            dbgTrace(D_WAAP_BASE64)
+                << "(decode/replace due  to small amount of nonprintables): will decide based on entropy values";
+        } else { // more than 10% of non-printable characters
+            dbgTrace(D_WAAP_BASE64) << "large amount of nonporintables";
+            if (tmp_status == B64_DECODE_SUSPECTED) {
+                // entropy - decoded_entropy + terminatorCharsSeen < 0.25
+                if (decoded.size() < 16 &&  nonPrintableCharsCount * 4 > decoded.size())  {
+                    decoded.clear();
+                    return B64_DECODE_INVALID;
+                }
+                dbgTrace(D_WAAP_BASE64)
+                    << "(large amount of nonporintables + entropy suspect), check emprirics because decoded."
+                    << " terminatorCharsSeen="
+                    << terminatorCharsSeen;
+                    // empiric test based on investigation of real payloads
+                empiric_condition = entropy < decoded_entropy
+                    && entropy > BASE64_ENTROPY_BASE_THRESHOLD
+                    && decoded_entropy > BASE64_ENTROPY_DECODED_THRESHOLD
+                    && !called_with_prefix
+                    && decoded.size() > BASE64_MIN_SIZE_LIMIT
+                    && decoded.size() < BASE64_MAX_SIZE_LIMIT
+                    && terminatorCharsSeen != 0;
+                if (!empiric_condition) {
+                    if (clear_on_error) decoded.clear();
+                    return B64_DECODE_SUSPECTED;
+                } else {
+                    if (clear_on_error) decoded.clear();
+                    tmp_status = B64_DECODE_OK;
+                }
+            } else { // entropy - decoded_entropy + terminatorCharsSeen >= 0.25
+                // one more empiric based on uT and real payloads
+                if (decoded.size() < 16
+                        && nonPrintableCharsCount * 2 > decoded.size()
+                        && terminatorCharsSeen == 0) {
+                    decoded.clear();
+                    return B64_DECODE_INVALID;
+                }
+                dbgTrace(D_WAAP_BASE64)
+                    << "(delete as binary content) because decoded. Return B64_DECODE_INCOMPLETE";
+                if (clear_on_error) decoded.clear();
+                return B64_DECODE_INCOMPLETE;
+            }
+        } // less than 10% of non-printable characters
+        dbgTrace(D_WAAP_BASE64)
+            << "After handling unprintables checking status";
+        if (tmp_status == B64_DECODE_OK) {
+            dbgTrace(D_WAAP_BASE64) <<  "replacing with decoded data, return B64_DECODE_OK";
+            return B64_DECODE_OK;
+        } else { // tmp_status == B64_DECODE_SUSPECTED, entropy - decoded_entropy + terminatorCharsSeen < 0.25
+            dbgTrace(D_WAAP_BASE64) << "Suspected due to entropy, making empiric test";
+            // and one more empiric test based on investigation of real payloads
+            empiric_condition = entropy < decoded_entropy
+                && entropy > BASE64_ENTROPY_BASE_THRESHOLD
+                && decoded_entropy > BASE64_ENTROPY_DECODED_THRESHOLD
+                && !called_with_prefix
+                && decoded.size() > BASE64_MIN_SIZE_LIMIT
+                && decoded.size() < BASE64_MAX_SIZE_LIMIT;
+            if (empiric_condition) {
+                dbgTrace(D_WAAP_BASE64) << "Empiric test failed, non-base64 chunk, return B64_DECODE_INVALID";
+                decoded.clear();
+                return B64_DECODE_INVALID;
+            }
+            dbgTrace(D_WAAP_BASE64) << "Empiric test passed, return B64_DECODE_SUSPECTED";
+            return B64_DECODE_SUSPECTED;
+        }
+        return B64_DECODE_OK; // successfully decoded. Returns decoded data in "decoded" parameter
+    }
+
+    // If decoded size is too small - leave the encoded value (return false)
+    decoded.clear(); // discard partial data
+    dbgTrace(D_WAAP_BASE64)
+        << "(leave as-is) because decoded too small. decoded.size="
+        << decoded.size();
+    return B64_DECODE_INVALID;
+}
+
+
 // Attempts to validate and decode base64-encoded chunk.
 // Value is the full value inside which potential base64-encoded chunk was found,
 // it and end point to start and end of that chunk.
@@ -980,18 +1119,28 @@ base64_decode_status decodeBase64Chunk(
    uint32_t spacer_count = 0;
    uint32_t length = end - it;

-    dbgTrace(D_WAAP) << "decodeBase64Chunk: value='" << value << "' match='" << string(it, end) << "'";
+    dbgTrace(D_WAAP)
+        << "value='"
+        << value
+        << "' match='"
+        << string(it, end)
+        << "' clear_on_error='"
+        << clear_on_error
+        << "' called_with_prefix='"
+        << called_with_prefix
+        << "'";
    string::const_iterator begin = it;

    // The encoded data length (without the "base64," prefix) should be exactly divisible by 4
    // len % 4 is not 0 i.e. this is not base64
-        if ((end - it) % 4 != 0) {
-            dbgTrace(D_WAAP_BASE64) <<
-                "b64DecodeChunk: (leave as-is) because encoded data length should be exactly divisible by 4.";
+        if ((end - it) % 4 == 1) {
+            dbgTrace(D_WAAP_BASE64)
+                << "(leave as-is) because encoded data length should not be <4*x + 1>.";
            return B64_DECODE_INVALID;
        }

-        std::unordered_map<char, double> frequency;
+        std::unordered_map<char, double> original_occurences_counter;
+        std::unordered_map<char, double> decoded_occurences_counter;

        while (it != end) {
            unsigned char c = *it;
@@ -999,9 +1148,8 @@ base64_decode_status decodeBase64Chunk(
            if (terminatorCharsSeen) {
                // terminator characters must all be '=', until end of match.
                if (c != '=') {
-                    dbgTrace(D_WAAP_BASE64) <<
-                        "decodeBase64Chunk: (leave as-is) because terminator characters must all be '='," <<
-                        "until end of match.";
+                    dbgTrace(D_WAAP_BASE64)
+                    << "(leave as-is) because terminator characters must all be '=' until end of match.";
                    return B64_DECODE_INVALID;
                }

@@ -1009,13 +1157,13 @@ base64_decode_status decodeBase64Chunk(
                terminatorCharsSeen++;

                if (terminatorCharsSeen > 2) {
-                    dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (leave as-is) because terminatorCharsSeen > 2";
+                    dbgTrace(D_WAAP_BASE64) << "(leave as-is) because terminatorCharsSeen > 2";
                    return B64_DECODE_INVALID;
                }

                // allow for more terminator characters
                it++;
-                frequency[c]++;
+                original_occurences_counter[c]++;
                continue;
            }

@@ -1040,12 +1188,18 @@ base64_decode_status decodeBase64Chunk(
                // Start tracking terminator characters
                terminatorCharsSeen++;
                it++;
-                frequency[c]++;
+                original_occurences_counter[c]++;
                continue;
            }
            else {
-                dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (leave as-is) because of non-base64 character ('" <<
-                        c << "', ASCII " << (unsigned int)c << ", offset " << (it-begin) << ")";
+                dbgTrace(D_WAAP_BASE64)
+                    << "(leave as-is) because of non-base64 character ('"
+                    << c
+                    << "', ASCII "
+                    << (unsigned int)c
+                    << ", offset "
+                    << (it-begin)
+                    << ")";
                return B64_DECODE_INVALID; // non-base64 character
            }

@@ -1068,18 +1222,19 @@ base64_decode_status decodeBase64Chunk(
                }

                decoded += (char)code;
+                decoded_occurences_counter[(char)code]++;
            }

            it++;
-            frequency[c]++;
+            original_occurences_counter[c]++;
        }

        // end of encoded sequence decoded.

        dbgTrace(D_WAAP_BASE64)
-            << "decodeBase64Chunk: decoded.size="
+            << "decoding done: decoded.size="
            << decoded.size()
-            << ", nonPrintableCharsCount="
+            << ", uncorrected nonPrintableCharsCount="
            << nonPrintableCharsCount
            << ", spacer_count = "
            << spacer_count
@@ -1088,56 +1243,42 @@ base64_decode_status decodeBase64Chunk(
            << "; decoded='"
            << decoded << "'";

-        // Check if entropy is correlates with b64 threshold (initially > 4.5)
-        if (!called_with_prefix) {
-            double entropy = 0;
-            double p = 0;
-            for (const auto& pair : frequency) {
-                p = pair.second / length;
-                entropy -= p * std::log2(p);
-            }
-            dbgTrace(D_WAAP_BASE64) << " ===b64Test===:  base entropy = " << entropy << "length = " << length;
-            // Add short payload factor
-            if (length < 16)
-                entropy = entropy * 16 / length;
-            // Enforce tailoring '=' characters
-            entropy+=terminatorCharsSeen;
-
-            dbgTrace(D_WAAP_BASE64) << " ===b64Test===:  corrected entropy = " << entropy << "length = " << length;
-            if (entropy <= base64_entropy_threshold) {
-                return B64_DECODE_INVALID;
-            }
+        double entropy = 0;
+        double p = 0;
+        double decoded_entropy = 0;
+        for (const auto& pair : original_occurences_counter) {
+            p = pair.second / length;
+            entropy -= p * std::log2(p);
        }
-
-        // Return success only if decoded.size>=5 and there are less than 10% of non-printable
-        // characters in output.
-        if (decoded.size() >= 5) {
-            if (spacer_count > 1) {
-                nonPrintableCharsCount = nonPrintableCharsCount - spacer_count + 1;
-            }
-            if (nonPrintableCharsCount * 10 < decoded.size()) {
-                dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (decode/replace) decoded.size=" << decoded.size() <<
-                        ", nonPrintableCharsCount=" << nonPrintableCharsCount << ": replacing with decoded data";
-            }
-            else {
-                dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (delete) because decoded.size=" << decoded.size() <<
-                        ", nonPrintableCharsCount=" << nonPrintableCharsCount <<
-                        ", clear_on_error=" << clear_on_error;
-                if (clear_on_error) decoded.clear();
-                return B64_DECODE_INCOMPLETE;
-            }
-            dbgTrace(D_WAAP_BASE64) << "returning true: successfully decoded."
-                << " Returns decoded data in \"decoded\" parameter";
-            return B64_DECODE_OK; // successfully decoded. Returns decoded data in "decoded" parameter
+        for (const auto &pair : decoded_occurences_counter) {
+            p = pair.second / decoded.size();
+            decoded_entropy -= p * std::log2(p);
        }
+        dbgTrace(D_WAAP_BASE64)
+            << "Base entropy = "
+            << entropy
+            << " Decoded_entropy = "
+            << decoded_entropy
+            << "length = "
+            << length;
+
+        base64_decode_status return_status = decideStatusBase64Decoded(
+            decoded,
+            entropy,
+            decoded_entropy,
+            spacer_count,
+            nonPrintableCharsCount,
+            clear_on_error,
+            terminatorCharsSeen,
+            called_with_prefix
+        );
+
+        dbgTrace(D_WAAP_BASE64)
+            << "After decideStatusBase64Decoded return_status="
+            << return_status;
+
+        return return_status;

-        // If decoded size is too small - leave the encoded value (return false)
-        decoded.clear(); // discard partial data
-        dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (leave as-is) because decoded too small. decoded.size=" <<
-                decoded.size() <<
-                ", nonPrintableCharsCount=" << nonPrintableCharsCount <<
-                ", clear_on_error=" << clear_on_error;
-        return B64_DECODE_INVALID;
 }

 // Attempts to detect and validate base64 chunk.
@@ -1180,8 +1321,9 @@ b64DecodeChunk(
            return false;
        }
    }
-
-    return decodeBase64Chunk(value, it, end, decoded) != B64_DECODE_INVALID;
+    base64_decode_status status = decodeBase64Chunk(value, it, end, decoded);
+    dbgTrace(D_WAAP_BASE64) << "b64DecodeChunk: status = " << status;
+    return status != B64_DECODE_INVALID;
 }

 vector<string> split(const string& s, char delim) {
@@ -1281,6 +1423,7 @@ static void b64TestChunk(const string &s,
        int &deletedCount,
        string &outStr)
 {
+    dbgTrace(D_WAAP_BASE64) << " ===b64TestChunk===:  starting with = '" << s << "'";
    size_t chunkLen = (chunkEnd - chunkStart);

    if ((chunkEnd - chunkStart) > static_cast<int>(b64_prefix.size()) &&
@@ -1289,11 +1432,9 @@ static void b64TestChunk(const string &s,
        chunkLen -= b64_prefix.size();
    }

-    size_t chunkRem = chunkLen % 4;
-
-    // Only match chunk whose length is divisible by 4
    string repl;
-    if (chunkRem == 0 && cb(s, chunkStart, chunkEnd, repl)) {
+    dbgTrace(D_WAAP_BASE64) << " ===b64TestChunk===:  chunkLen = " << chunkLen;
+    if (cb(s, chunkStart, chunkEnd, repl)) {
        // Succesfully matched b64 chunk
        if (!repl.empty()) {
            outStr += repl;
@@ -1340,9 +1481,7 @@ bool detectBase64Chunk(
        dbgTrace(D_WAAP_BASE64) << " ===detectBase64Chunk===:  isB64AlphaChar = true, '" << *it << "'";
        start = it;
        end = s.end();
-        if ((end - start) % 4 == 0) {
-            return true;
-        }
+        return true;
    }
    // non base64 before supposed chunk - will not process
    return false;
@@ -1381,17 +1520,31 @@ bool isBase64PrefixProcessingOK (
        if (detectBase64Chunk(s, start, end)) {
            dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: chunk detected";
            if ((start != s.end()) && (end == s.end())) {
+                dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: chunk detected but not complete";
                retVal = processDecodedChunk(s, start, end, value, binaryFileType, true);
+                dbgTrace(D_WAAP_BASE64)
+                    << " ===isBase64PrefixProcessingOK===: after processDecodedChunk retVal = "
+                    << retVal
+                    << " binaryFileType = "
+                    << binaryFileType;
            }
        } else if (start != s.end()) {
-            dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: chunk not detected."
-                                        " searching for known file header only";
+            dbgTrace(D_WAAP_BASE64)
+                << " ===isBase64PrefixProcessingOK===: chunk not detected. searching for known file header only";
            end = (start + MAX_HEADER_LOOKUP < s.end()) ? start + MAX_HEADER_LOOKUP : s.end();
            processDecodedChunk(s, start, end, value, binaryFileType);
            value.clear();
+            dbgTrace(D_WAAP_BASE64)
+                << " ===isBase64PrefixProcessingOK===: after processDecodedChunk binaryFileType = "
+                << binaryFileType;
            return binaryFileType != Waap::Util::BinaryFileType::FILE_TYPE_NONE;
        }
    }
+    dbgTrace(D_WAAP_BASE64)
+        << " ===isBase64PrefixProcessingOK===: retVal = "
+        << retVal
+        << " binaryFileType = "
+        << binaryFileType;
    return retVal != B64_DECODE_INVALID;
 }

@@ -1399,23 +1552,31 @@ base64_variants b64Test (
        const string &s,
        string &key,
        string &value,
-        BinaryFileType &binaryFileType)
+        BinaryFileType &binaryFileType,
+        const size_t offset)
 {

    key.clear();
-    bool retVal;
    binaryFileType = Waap::Util::BinaryFileType::FILE_TYPE_NONE;
+    auto begin = s.begin() + offset;
+    dbgTrace(D_WAAP_BASE64)
+        << " ===b64Test===: string =  "
+        << s
+        << " key = "
+        << key
+        << " value = "
+        << value
+        << " offset = "
+        << offset;

-    dbgTrace(D_WAAP_BASE64) << " ===b64Test===: string =  " << s
-            << " key = " << key << " value = " << value;
    // Minimal length
-    if (s.size() < 8) {
+    if (s.size() < 8 + offset) {
        return CONTINUE_AS_IS;
    }
    dbgTrace(D_WAAP_BASE64) << " ===b64Test===: minimal lenght test passed";

    std::string prefix_decoded_val;
-    string::const_iterator it = s.begin();
+    auto it = begin;

    // 1st check if we have key candidate
    if (base64_key_value_detector_re.hasMatch(s)) {
@@ -1433,7 +1594,7 @@ base64_variants b64Test (
                break;
            case EQUAL:
                if (*it == '=') {
-                    it = s.begin();
+                    it = begin;
                    state=MISDETECT;
                    continue;
                }
@@ -1455,7 +1616,7 @@ base64_variants b64Test (
        if (it == s.end() || state == MISDETECT) {
            dbgTrace(D_WAAP_BASE64) << " ===b64Test===: detected  *it = s.end()" << *it;
            if (key.size() > 0) {
-                it = s.begin();
+                it = begin;
                key.clear();
            }
        } else {
@@ -1479,7 +1640,7 @@ base64_variants b64Test (
        }
    }

-    string::const_iterator start = s.end();
+    auto start = s.end();
    dbgTrace(D_WAAP_BASE64) << " ===b64Test===:  B64 itself = " << *it << " =======";
    bool isB64AlphaChar = Waap::Util::isAlphaAsciiFast(*it) || isdigit(*it) || *it=='/' || *it=='+';
    if (isB64AlphaChar) {
@@ -1487,11 +1648,6 @@ base64_variants b64Test (
        dbgTrace(D_WAAP_BASE64) <<
            " ===b64Test===:  Start tracking potential b64 chunk = " << *it << " =======";
        start = it;
-        if ((s.end() - start) % 4 != 0) {
-            key.clear();
-            value.clear();
-            return CONTINUE_AS_IS;
-        }
    }
    else {
        dbgTrace(D_WAAP_BASE64) <<
@@ -1512,17 +1668,37 @@ base64_variants b64Test (
            key.pop_back();
            dbgTrace(D_WAAP_BASE64) << " ===b64Test===: FINAL key = '" << key << "'";
        }
-        retVal = decodeBase64Chunk(s, start, s.end(), value) != B64_DECODE_INVALID;
+        base64_decode_status decode_chunk_status = decodeBase64Chunk(s, start, s.end(), value);

-        dbgTrace(D_WAAP_BASE64) << " ===b64Test===: After testing and conversion value = "
-                << value << "retVal = '" << retVal <<"'";
-        if (!retVal) {
+        dbgTrace(D_WAAP_BASE64)
+            << " ===b64Test===: After testing and conversion value = "
+            << value
+            << "decode_chunk_status = '"
+            << decode_chunk_status
+            <<"'";
+        if (decode_chunk_status == B64_DECODE_INVALID) {
            key.clear();
            value.clear();
            return CONTINUE_AS_IS;
        }
-        dbgTrace(D_WAAP_BASE64) << " ===b64Test===: After tpassed retVal check = "
-            << value << "retVal = '" << retVal <<"'" << "key = '" << key << "'";
+
+        if (decode_chunk_status == B64_DECODE_INCOMPLETE) {
+                value.clear();
+        }
+
+        if (decode_chunk_status == B64_DECODE_SUSPECTED) {
+            return CONTINUE_DUAL_SCAN;
+        }
+
+        dbgTrace(D_WAAP_BASE64)
+            << " ===b64Test===: After tpassed retVal check = "
+            << value
+            << "decode_chunk_status = '"
+            << decode_chunk_status
+            <<"'"
+            << "key = '"
+            << key
+            << "'";
        if (key.empty()) {
            return SINGLE_B64_CHUNK_CONVERT;
        } else {
@@ -1548,7 +1724,7 @@ void b64Decode(
    deletedCount = 0;
    outStr = "";
    int offsetFix = 0;
-
+    dbgTrace(D_WAAP_BASE64) << " ===b64Decode===:  starting with = '" << s << "'";
    string::const_iterator it = s.begin();

    // Minimal length
@@ -1596,6 +1772,11 @@ void b64Decode(
                }

                // Decode and add chunk
+                dbgTrace(D_WAAP_BASE64)
+                    << " ===b64Decode===:  chunkStart = "
+                    << *chunkStart
+                    << " it = "
+                    << *it;
                b64TestChunk(s, chunkStart, it, cb, decodedCount, deletedCount, outStr);

                // stop tracking b64 chunk
@@ -1607,6 +1788,7 @@ void b64Decode(
    }

    if (chunkStart != s.end()) {
+        dbgTrace(D_WAAP_BASE64) << " ===b64Decode===:  chunkStart = " << *chunkStart;
        b64TestChunk(s, chunkStart, it, cb, decodedCount, deletedCount, outStr);
    }
 }
--- a/components/security_apps/waap/waap_clib/Waf2Util.h
+++ b/components/security_apps/waap/waap_clib/Waf2Util.h
@@ -32,9 +32,15 @@

 #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)

-enum base64_variants {SINGLE_B64_CHUNK_CONVERT, KEY_VALUE_B64_PAIR, CONTINUE_AS_IS};
+enum base64_variants {SINGLE_B64_CHUNK_CONVERT, KEY_VALUE_B64_PAIR, CONTINUE_AS_IS, CONTINUE_DUAL_SCAN};
 enum base64_stage {BEFORE_EQUAL, EQUAL, DONE, MISDETECT};
-enum base64_decode_status {B64_DECODE_INVALID, B64_DECODE_OK, B64_DECODE_INCOMPLETE};
+enum base64_decode_status {B64_DECODE_INVALID, B64_DECODE_OK, B64_DECODE_INCOMPLETE, B64_DECODE_SUSPECTED};
+
+#define BASE64_ENTROPY_BASE_THRESHOLD 5.0
+#define BASE64_ENTROPY_DECODED_THRESHOLD 5.4
+#define BASE64_ENTROPY_THRESHOLD_DELTA 0.25
+#define BASE64_MIN_SIZE_LIMIT 16
+#define BASE64_MAX_SIZE_LIMIT 1024

 // This is portable version of stricmp(), which is non-standard function (not even in C).
 // Contrary to stricmp(), for a slight optimization, s2 is ASSUMED to be already in lowercase.
@@ -865,6 +871,17 @@ void unescapeUnicode(std::string &text);
 // Try to find and decode UTF7 chunks
 std::string filterUTF7(const std::string &text);

+base64_decode_status
+decideStatusBase64Decoded(
+    std::string& decoded,
+    double entropy,
+    double decoded_entropy,
+    size_t spacer_count,
+    size_t nonPrintableCharsCount,
+    bool clear_on_error,
+    double terminatorCharsSeen,
+    bool called_with_prefix);
+
 base64_decode_status
 decodeBase64Chunk(
    const std::string &value,
@@ -926,7 +943,8 @@ namespace Util {
            const std::string &s,
            std::string &key,
            std::string &value,
-            BinaryFileType &binaryFileType);
+            BinaryFileType &binaryFileType,
+            size_t offset = 0);

    // The original stdlib implementation of isalpha() supports locale settings which we do not really need.
    // It is also proven to contribute to slow performance in some of the algorithms using it.
--- a/components/security_apps/waap/waap_component_impl.h
+++ b/components/security_apps/waap/waap_component_impl.h
@@ -19,6 +19,7 @@
 #include "table_opaque.h"
 #include "i_transaction.h"
 #include "waap_clib/DeepAnalyzer.h"
+#include "waap_clib/WaapModelResultLogger.h"
 #include "waap_clib/WaapAssetState.h"
 #include "waap_clib/WaapAssetStatesManager.h"
 #include "reputation_features_agg.h"