Aug 08 2025 dev (#336)

* sync code * sync code * sync code --------- Co-authored-by: Ned Wright <nedwright@proton.me>
2025-11-16 09:21:54 +03:00 · 2025-08-10 13:21:52 +03:00
parent dd19bf6158
commit 6bbc89712a
153 changed files with 4864 additions and 1018 deletions
--- a/components/security_apps/waap/waap_clib/WaapAssetState.cc
+++ b/components/security_apps/waap/waap_clib/WaapAssetState.cc
@@ -446,13 +446,14 @@ WaapAssetState::WaapAssetState(std::shared_ptr<Signatures> signatures,
        // update orig_size and orig_capacity after string copy
        orig_size = text.size();
        orig_capacity = text.capacity();
-        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (1) (after filterUTF7) '" << text << "'";
+        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (1) (after filterUTF7) '" << text <<
+            "' size: " << text.size();

        // 2. Replace %xx sequences by their single-character equivalents.
        // Also replaces '+' symbol by space character.
        // Python equivalent: text = urllib.unquote_plus(text)
        text.erase(unquote_plus(text.begin(), text.end()), text.end());
-        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (2) '" << text << "'";
+        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (2) '" << text << "' size: " << text.size();

        fixBreakingSpace(text);

@@ -460,38 +461,38 @@ WaapAssetState::WaapAssetState(std::shared_ptr<Signatures> signatures,
        // remove all characters whose ASCII code is >=128.
        // Python equivalent: text.encode('ascii',errors='ignore')
        filterUnicode(text);
-        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (3) '" << text << "'";
+        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (3) '" << text << "' size: " << text.size();

        // 4. oh shi?... should I handle unicode html entities (python's htmlentitydefs module)???
        // Python equivalent: text = HTMLParser.HTMLParser().unescape(text)
        text.erase(escape_html(text.begin(), text.end()), text.end());
-        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (4) '" << text << "'";
+        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (4) '" << text << "' size: " << text.size();

        // 5. Apply backslash escaping (like in C)
        // Python equivalent: text = text.decode('string_escape')
        text.erase(escape_backslashes(text.begin(), text.end()), text.end());
-        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (5) '" << text << "'";
+        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (5) '" << text << "' size: " << text.size();

        // 6. remove all unicode characters from string. Basically,
        // remove all characters whose ASCII code is >=128.
        // Python equivalent: text.encode('ascii',errors='ignore')
        filterUnicode(text);
-        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (6) '" << text << "'";
+        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (6) '" << text << "' size: " << text.size();

        // 7. Replace %xx sequences by their single-character equivalents.
        // Also replaces '+' symbol by space character.
        // Python equivalent: text = urllib.unquote_plus(text)
        text.erase(unquote_plus(text.begin(), text.end()), text.end());
-        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (7) '" << text << "'";
+        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (7) '" << text << "' size: " << text.size();

        unescapeUnicode(text);
-        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "after unescapeUnicode '" << text << "'";
+        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "after unescapeUnicode '" << text << "' size: " << text.size();

        // 8. remove all unicode characters from string. Basically,
        // remove all characters whose ASCII code is >=128.
        // Python equivalent: text.encode('ascii',errors='ignore')
        filterUnicode(text);
-        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (8) '" << text << "'";
+        dbgTrace(D_WAAP_SAMPLE_PREPROCESS) << "unescape: (8) '" << text << "' size: " << text.size();

        // 9. ???
        //
@@ -530,7 +531,7 @@ WaapAssetState::WaapAssetState(std::shared_ptr<Signatures> signatures,
            << AlertInfo(AlertTeam::CORE, "WAAP sample processing")
            << "unescape: original size=" << orig_size << " capacity=" << orig_capacity
            << " new size=" << text.size() << " capacity=" << text.capacity()
-            << " text='" << text << "'";
+            << " text='" << text << "'" << " orig text='" << s << "'";

        return text;
    }
@@ -1098,6 +1099,9 @@ WaapAssetState::apply(
    // Detect long text spans, and also any-length spans that end with file extensions such as ".jpg"
    bool longTextFound = m_Signatures->longtext_re.hasMatch(res.unescaped_line);

+    // When this flag remains false until the last evasion handling, a second unescape is performed
+    bool evasion_detected = false;
+
    if (longTextFound) {
        dbgTrace(D_WAAP_SAMPLE_SCAN) << "longtext found";
    }
@@ -1297,6 +1301,7 @@ WaapAssetState::apply(
        }

        if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
+            evasion_detected = true;
            // Recalculate repetition and/or probing indicators
            unsigned int newWordsCount = 0;
            calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
@@ -1326,6 +1331,7 @@ WaapAssetState::apply(
        }

        if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
+            evasion_detected = true;
            // Recalculate repetition and/or probing indicators
            unsigned int newWordsCount = 0;
            calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
@@ -1409,6 +1415,42 @@ WaapAssetState::apply(
        dbgTrace(D_WAAP_EVASIONS) << "status after evasion checking " << nicePrint(res);
    }

+    bool tab_evasion = (res.unescaped_line.find('\t') != std::string::npos);
+
+    if (tab_evasion) {
+        dbgTrace(D_WAAP_EVASIONS) << "Tab character evasion detected";
+
+        // Create normalized version with tabs removed
+        std::string unescaped = res.unescaped_line;
+
+        // Remove all tab characters to normalize the string
+        std::string::iterator end_pos = std::remove(unescaped.begin(), unescaped.end(), '\t');
+        unescaped.erase(end_pos, unescaped.end());
+
+        size_t kwCount = res.keyword_matches.size();
+
+        if (res.unescaped_line != unescaped) {
+            SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
+            checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
+                res.found_patterns, longTextFound, binaryDataFound);
+            checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
+                longTextFound, binaryDataFound);
+            checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
+                longTextFound, binaryDataFound);
+        }
+
+        if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
+            // If new keywords were found, add a specific indication
+            res.keyword_matches.push_back("tab_character_evasion");
+
+            // Recalculate repetition and/or probing indicators
+            unsigned int newWordsCount = 0;
+            calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
+                newWordsCount);
+            // Take minimal words count because empirically it means evasion was probably successfully decoded
+            wordsCount = std::min(wordsCount, newWordsCount);
+        }
+    }

    bool quoutes_space_evasion = Waap::Util::find_in_map_of_stringlists_keys(
        "quotes_space_ev_fast_reg",
@@ -1471,6 +1513,7 @@ WaapAssetState::apply(
        }

        if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
+            evasion_detected = true;
            // Recalculate repetition and/or probing indicators
            unsigned int newWordsCount = 0;
            calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
@@ -1499,7 +1542,7 @@ WaapAssetState::apply(
                    longTextFound, binaryDataFound);
        }

-
+        evasion_detected = true;
        // Recalculate repetition and/or probing indicators
        unsigned int newWordsCount = 0;
        calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
@@ -1613,6 +1656,7 @@ WaapAssetState::apply(
        }

        if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
+            evasion_detected = true;
            // Recalculate repetition and/or probing indicators
            unsigned int newWordsCount = 0;
            calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
@@ -1644,6 +1688,7 @@ WaapAssetState::apply(
        }

        if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
+            evasion_detected = true;
            // Recalculate repetition and/or probing indicators
            unsigned int newWordsCount = 0;
            calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
@@ -1675,6 +1720,7 @@ WaapAssetState::apply(
        }

        if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
+            evasion_detected = true;
            // Recalculate repetition and/or probing indicators
            unsigned int newWordsCount = 0;
            calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
@@ -1706,6 +1752,7 @@ WaapAssetState::apply(
        }

        if (kwCount != res.keyword_matches.size() && !binaryDataFound) {
+            evasion_detected = true;
            // Recalculate repetition and/or probing indicators
            unsigned int newWordsCount = 0;
            calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
@@ -1715,6 +1762,74 @@ WaapAssetState::apply(
        }
    }

+    // Detect evasion for command injection vulnerability using encoded characters (such as ^).
+    // example: "c^m^d /c d^i^r" -> "cmd /c dir"
+    // if '^' is found in the line, remove it and rescan
+    // search in scan result for any keyword with "os", "cmd", "exec" "command" or "shell" in it
+    // if not found, we don't want this evasion
+    // if found, we want to use its results
+    // Note: this is not a perfect solution, but it is better than nothing
+    if (line.find('^') != std::string::npos) {
+        dbgTrace(D_WAAP_EVASIONS) << "Windows command injection evasion suspected";
+
+        std::string unescaped = line;
+        bool is_win_cmd_evasion = false;
+        // remove all occurances of '^' character
+        unescaped.erase(std::remove(unescaped.begin(), unescaped.end(), '^'), unescaped.end());
+        dbgTrace(D_WAAP_EVASIONS) << "unescaped == '" << unescaped << "'";
+        if (!unescaped.empty()) {
+            std::vector<std::string> evKeywordMatches(res.keyword_matches);
+            std::vector<std::string> evRegexMatches(res.regex_matches);
+            Waap::Util::map_of_stringlists_t evFoundPatterns(res.found_patterns);
+            bool evLongTextFound = longTextFound;
+            bool evBinaryDataFound = binaryDataFound;
+
+            if (line != unescaped) {
+                SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
+                checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, evKeywordMatches,
+                    res.found_patterns, evLongTextFound, evBinaryDataFound);
+                checkRegex(unescapedSample, m_Signatures->words_regex, evKeywordMatches, evFoundPatterns,
+                    evLongTextFound, evBinaryDataFound);
+                checkRegex(unescapedSample, m_Signatures->pattern_regex, evRegexMatches, evFoundPatterns,
+                    evLongTextFound, evBinaryDataFound);
+            }
+
+            if (evKeywordMatches.size() != res.keyword_matches.size()) {
+                if (Waap::Util::find_in_map_of_stringlists_keys("os_commands", evFoundPatterns)) {
+                    is_win_cmd_evasion = true;
+                } else {
+                    for (const auto &kw : evKeywordMatches) {
+                        if (kw.size() < 2 ||
+                                str_contains(kw, "cmd") ||
+                                str_contains(kw, "command") ||
+                                str_contains(kw, "os_") ||
+                                str_contains(kw, "exec")) {
+                            is_win_cmd_evasion = true;
+                            break;
+                        }
+                    }
+                }
+            }
+
+            if (is_win_cmd_evasion) {
+                dbgTrace(D_WAAP_EVASIONS) << "Evasion and relevant matches found, updating results";
+                // found relevant keywords after unescaping, set to new matches
+                res.keyword_matches = evKeywordMatches;
+                res.regex_matches = evRegexMatches;
+                res.found_patterns = evFoundPatterns;
+                longTextFound = evLongTextFound;
+                binaryDataFound = evBinaryDataFound;
+                // Recalculate repetition and/or probing indicators
+                unsigned int newWordsCount = 0;
+                calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
+                    newWordsCount);
+                // Take minimal words count because empirically it means evasion was probably successfully decoded
+                wordsCount = std::min(wordsCount, newWordsCount);
+            } else {
+                dbgTrace(D_WAAP_EVASIONS) << "Evasion not relevant, will not apply to results";
+            }
+        }
+    }

    // python: escape ='hi_acur_fast_reg_evasion' in found_patterns
    bool escape = Waap::Util::find_in_map_of_stringlists_keys("evasion", res.found_patterns);
@@ -1757,6 +1872,7 @@ WaapAssetState::apply(
            escape = false;
        }
        else if (!binaryDataFound) {
+            evasion_detected = true;
            // Recalculate repetition and/or probing indicators
            unsigned int newWordsCount = 0;
            calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
@@ -1833,6 +1949,7 @@ WaapAssetState::apply(
            escape = false;
        }
        else if (!binaryDataFound) {
+            evasion_detected = true;
            // Recalculate repetition and/or probing indicators
            unsigned int newWordsCount = 0;
            calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
@@ -1841,6 +1958,42 @@ WaapAssetState::apply(
            wordsCount = std::min(wordsCount, newWordsCount);
        }
    }
+
+    // Handle second URL encoding evasion
+    if (!evasion_detected &&
+            !binaryDataFound &&
+            !longTextFound &&
+            Waap::Util::containsPercentEncoding(res.unescaped_line)) {
+        dbgTrace(D_WAAP_EVASIONS) << "Second URL encoding evasion detected";
+
+        std::string unescaped = unescape(res.unescaped_line);
+        size_t kwCount = res.keyword_matches.size();
+
+        if (res.unescaped_line != unescaped) {
+            SampleValue unescapedSample(unescaped, m_Signatures->m_regexPreconditions);
+            checkRegex(unescapedSample, m_Signatures->specific_acuracy_keywords_regex, res.keyword_matches,
+                res.found_patterns, longTextFound, binaryDataFound);
+            checkRegex(unescapedSample, m_Signatures->words_regex, res.keyword_matches, res.found_patterns,
+                longTextFound, binaryDataFound);
+            checkRegex(unescapedSample, m_Signatures->pattern_regex, res.regex_matches, res.found_patterns,
+                longTextFound, binaryDataFound);
+        }
+
+        if (kwCount == res.keyword_matches.size()) {
+            // Remove the evasion keyword if no real evasion found
+            keywordsToRemove.push_back("evasion");
+        }
+        else if (!binaryDataFound) {
+            evasion_detected = true;
+            // Recalculate repetition and/or probing indicators
+            unsigned int newWordsCount = 0;
+            calcRepetitionAndProbing(res, ignored_keywords, unescaped, detectedRepetition, detectedProbing,
+                newWordsCount);
+            // Take minimal words count because empirically it means evasion was probably succesfully decoded
+            wordsCount = std::min(wordsCount, newWordsCount);
+        }
+    }
+
    dbgTrace(D_WAAP_SAMPLE_SCAN) << "after evasions..." << nicePrint(res);
    // Remove evasion keywords that should not be reported because there's no real evasion found
    if (!keywordsToRemove.empty()) {