mirror of
https://github.com/openappsec/openappsec.git
synced 2026-01-17 16:00:26 +03:00
Jan 06 2026 dev (#387)
* sync code * update code to support brotli * update code to support brotli * update code to support brotli * sync code * fix findBrotli * sync code * sync code * sync code * sync code --------- Co-authored-by: Ned Wright <nedwright@proton.me> Co-authored-by: Daniel Eisenberg <danielei@checkpoint.com>
This commit is contained in:
@@ -12,18 +12,24 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "Signatures.h"
|
||||
#include "AssertionRegexes.h"
|
||||
#include "agent_core_utilities.h"
|
||||
#include "debug.h"
|
||||
#include "waap.h"
|
||||
#include <cstdlib> // for getenv
|
||||
#include <cstring> // for strcmp
|
||||
#include <fstream>
|
||||
|
||||
USE_DEBUG_FLAG(D_WAAP);
|
||||
USE_DEBUG_FLAG(D_WAAP_SAMPLE_SCAN);
|
||||
USE_DEBUG_FLAG(D_WAAP_HYPERSCAN);
|
||||
|
||||
typedef picojson::value::object JsObj;
|
||||
typedef picojson::value JsVal;
|
||||
typedef picojson::value::array JsArr;
|
||||
typedef std::map<std::string, std::vector<std::string>> filtered_parameters_t;
|
||||
|
||||
|
||||
static std::vector<std::string> to_strvec(const picojson::value::array& jsV)
|
||||
static std::vector<std::string> to_strvec(const picojson::value::array &jsV)
|
||||
{
|
||||
std::vector<std::string> r;
|
||||
|
||||
@@ -34,7 +40,7 @@ static std::vector<std::string> to_strvec(const picojson::value::array& jsV)
|
||||
return r;
|
||||
}
|
||||
|
||||
static std::set<std::string> to_strset(const picojson::value::array& jsA)
|
||||
static std::set<std::string> to_strset(const picojson::value::array &jsA)
|
||||
{
|
||||
std::set<std::string> r;
|
||||
|
||||
@@ -45,18 +51,18 @@ static std::set<std::string> to_strset(const picojson::value::array& jsA)
|
||||
return r;
|
||||
}
|
||||
|
||||
static std::map<std::string, Regex*> to_regexmap(const picojson::value::object& jsO, bool& error)
|
||||
static std::map<std::string, Regex *> to_regexmap(const picojson::value::object &jsO, bool &error)
|
||||
{
|
||||
std::map<std::string, Regex*> r;
|
||||
std::map<std::string, Regex *> r;
|
||||
|
||||
for (auto it = jsO.begin(); it != jsO.end(); ++it) {
|
||||
const std::string& n = it->first;
|
||||
const std::string &n = it->first;
|
||||
// convert name to lowercase now (so we don't need to do it at runtime every time).
|
||||
std::string n_lower;
|
||||
for (std::string::const_iterator pCh = n.begin(); pCh != n.end(); ++pCh) {
|
||||
n_lower += std::tolower(*pCh);
|
||||
}
|
||||
const picojson::value& v = it->second;
|
||||
const picojson::value &v = it->second;
|
||||
|
||||
if (error) {
|
||||
// stop loading regexes if there's previous error...
|
||||
@@ -73,13 +79,12 @@ static std::map<std::string, Regex*> to_regexmap(const picojson::value::object&
|
||||
return r;
|
||||
}
|
||||
|
||||
static filtered_parameters_t to_filtermap(const picojson::value::object& JsObj)
|
||||
static filtered_parameters_t to_filtermap(const picojson::value::object &JsObj)
|
||||
{
|
||||
filtered_parameters_t result;
|
||||
for (auto it = JsObj.begin(); it != JsObj.end(); ++it)
|
||||
{
|
||||
for (auto it = JsObj.begin(); it != JsObj.end(); ++it) {
|
||||
const std::string parameter = it->first;
|
||||
const picojson::value::array& arr = it->second.get<picojson::value::array>();
|
||||
const picojson::value::array &arr = it->second.get<picojson::value::array>();
|
||||
result[parameter] = to_strvec(arr);
|
||||
}
|
||||
return result;
|
||||
@@ -215,9 +220,14 @@ Signatures::Signatures(const std::string& filepath) :
|
||||
to_strset(sigsSource["remove_keywords_always"].get<JsArr>())),
|
||||
user_agent_prefix_re(sigsSource["user_agent_prefix_re"].get<std::string>()),
|
||||
binary_data_kw_filter(sigsSource["binary_data_kw_filter"].get<std::string>()),
|
||||
wbxml_data_kw_filter(sigsSource["wbxml_data_kw_filter"].get<std::string>())
|
||||
wbxml_data_kw_filter(sigsSource["wbxml_data_kw_filter"].get<std::string>()),
|
||||
m_hyperscanInitialized(false)
|
||||
{
|
||||
|
||||
// Only preprocess hyperscan patterns if hyperscan is enabled
|
||||
bool should_use_hyperscan = Signatures::shouldUseHyperscan();
|
||||
if (should_use_hyperscan) {
|
||||
preprocessHyperscanPatterns();
|
||||
}
|
||||
}
|
||||
|
||||
Signatures::~Signatures()
|
||||
@@ -229,21 +239,299 @@ bool Signatures::fail()
|
||||
return error;
|
||||
}
|
||||
|
||||
picojson::value::object Signatures::loadSource(const std::string& waapDataFileName)
|
||||
// Static helper to process assertion flags for a pattern (for testing and internal use)
|
||||
std::string
|
||||
Signatures::processAssertions(const std::string &groupName, const std::string &pattern, AssertionFlags &flags)
|
||||
{
|
||||
std::string processed = pattern;
|
||||
|
||||
// Use regexes from AssertionRegexes namespace to detect assertions at start/end of the pattern string
|
||||
using namespace Waap::AssertionRegexes;
|
||||
boost::smatch match;
|
||||
|
||||
// Start assertions - only a single '(' can precede
|
||||
if (boost::regex_search(processed, match, reStartNonWordBehind) && match.position() >= 0) {
|
||||
flags.setFlag(AssertionFlag::START_NON_WORD_BEHIND);
|
||||
processed = boost::regex_replace(processed, reStartNonWordBehind, std::string(""));
|
||||
}
|
||||
|
||||
// Path traversal start assertion
|
||||
if (boost::regex_search(processed, match, rePathTraversalStart) && match.position() >= 0) {
|
||||
flags.setFlag(AssertionFlag::PATH_TRAVERSAL_START);
|
||||
processed = boost::regex_replace(processed, rePathTraversalStart, std::string(""));
|
||||
}
|
||||
|
||||
// End assertions - only a single ')' can follow
|
||||
if (boost::regex_search(processed, match, reEndNonWordAhead) && match.position() >= 0) {
|
||||
flags.setFlag(AssertionFlag::END_NON_WORD_AHEAD);
|
||||
processed = boost::regex_replace(processed, reEndNonWordAhead, std::string(""));
|
||||
} else if (boost::regex_search(processed, match, reEndNonWordSpecial) && match.position() >= 0) {
|
||||
flags.setFlag(AssertionFlag::END_NON_WORD_SPECIAL);
|
||||
processed = boost::regex_replace(processed, reEndNonWordSpecial, std::string(""));
|
||||
}
|
||||
|
||||
// Path traversal end assertion
|
||||
if (boost::regex_search(processed, match, rePathTraversalEnd) && match.position() >= 0) {
|
||||
flags.setFlag(AssertionFlag::PATH_TRAVERSAL_END);
|
||||
processed = boost::regex_replace(processed, rePathTraversalEnd, std::string(""));
|
||||
}
|
||||
|
||||
// wildcard evasion regex group name starts with evasion_wildcard_regex
|
||||
if (groupName.find("evasion_wildcard_regex") == 0) {
|
||||
flags.setFlag(AssertionFlag::WILDCARD_EVASION);
|
||||
}
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
// Extracts the group name from a regex pattern string (e.g., (?P<groupName>...))
|
||||
std::string Signatures::extractGroupName(const std::string &pattern) {
|
||||
boost::regex namedGroupRegex(R"(\(\?P<([^>]+)>)");
|
||||
boost::smatch match;
|
||||
if (boost::regex_search(pattern, match, namedGroupRegex)) {
|
||||
return match[1].str();
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
void Signatures::preprocessHyperscanPatterns()
|
||||
{
|
||||
std::map<std::string, size_t> categoryCount;
|
||||
|
||||
// Helper function to check if a pattern is hyperscan compatible
|
||||
auto isHyperscanCompatible = [&categoryCount](const std::string &pattern) -> bool {
|
||||
// Hyperscan doesn't support certain regex features that we can't easily convert
|
||||
static const std::vector<std::string> incompatibleFeatures = {
|
||||
R"((?!\w)", R"((?<!\w)", R"((?=\w)", R"((?<=\w)", // Lookahead/lookbehind assertions for \w
|
||||
R"((?!)", R"((?<!)", R"((?=)", R"((?<=)", // Lookahead/lookbehind assertions
|
||||
R"((?>)", R"((?&)", R"((?|)", R"((?P<)", // Atomic groups, named groups, and branching
|
||||
R"((?R)" // Recursion
|
||||
};
|
||||
|
||||
for (const auto &feature : incompatibleFeatures) {
|
||||
if (pattern.find(feature) != std::string::npos) {
|
||||
dbgInfo(D_WAAP_HYPERSCAN) << "Incompatible feature found: " << feature << " in pattern: " << pattern;
|
||||
categoryCount[feature]++;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
boost::regex backrefRegex(R"(\(\\\d+\))");
|
||||
if (boost::regex_search(pattern, backrefRegex)) {
|
||||
dbgInfo(D_WAAP_HYPERSCAN) << "Incompatible backreference found: " << pattern;
|
||||
categoryCount["backreference"]++;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
// Helper function to convert regex pattern to hyperscan-compatible format
|
||||
auto convertToHyperscanPattern = [](const std::string &originalPattern) -> std::string {
|
||||
std::string converted = originalPattern;
|
||||
|
||||
// Remove named group syntax - convert (?P<name>...) to ...
|
||||
boost::regex namedGroupRegex(R"(\(\?P<[^>]+>)");
|
||||
if (boost::regex_search(converted, namedGroupRegex)) {
|
||||
std::string end_str = ")";
|
||||
if (converted.back() == ')') {
|
||||
converted.pop_back(); // Remove the trailing ')'
|
||||
end_str = "";
|
||||
}
|
||||
converted = boost::regex_replace(converted, namedGroupRegex, end_str);
|
||||
}
|
||||
|
||||
// Handle atomic groups first (before removing word boundaries)
|
||||
// Hyperscan doesn't support atomic groups, so we need to convert them
|
||||
|
||||
// Convert (?>\b) to nothing (remove word boundary atomic groups)
|
||||
converted = boost::regex_replace(converted, boost::regex(R"(\(\?\>\\b\))"), std::string(""));
|
||||
// Convert (?>\B) to nothing (remove non-word boundary atomic groups)
|
||||
converted = boost::regex_replace(converted, boost::regex(R"(\(\?\>\\B\))"), std::string(""));
|
||||
// Convert empty atomic groups (?>) to nothing
|
||||
converted = boost::regex_replace(converted, boost::regex(R"(\(\?\>\))"), std::string(""));
|
||||
|
||||
// // Now remove remaining word boundaries (not supported by Hyperscan)
|
||||
// // At this point, any \b or \B that was inside atomic groups has been handled above
|
||||
// converted = boost::regex_replace(converted, boost::regex(R"(\\b)"), std::string(""));
|
||||
// converted = boost::regex_replace(converted, boost::regex(R"(\\B)"), std::string(""));
|
||||
|
||||
return converted;
|
||||
};
|
||||
|
||||
// Helper function to get patterns from sigsSource for each category
|
||||
auto getCommonPatternsForCategory = [this](const std::string &category,
|
||||
const std::string ®exSource) -> std::vector<std::string> {
|
||||
std::vector<std::string> patterns;
|
||||
|
||||
// Map regexSource/category to the JSON key in sigsSource
|
||||
std::string key;
|
||||
if (regexSource == "specific_acuracy_keywords_regex" || category == "specific_accuracy") {
|
||||
key = "specific_acuracy_keywords_regex_list";
|
||||
} else if (regexSource == "words_regex" || category == "keywords") {
|
||||
key = "words_regex_list";
|
||||
} else if (regexSource == "pattern_regex" || category == "patterns") {
|
||||
key = "pattern_regex_list";
|
||||
} else {
|
||||
// Fallback: allow passing the exact key name
|
||||
key = regexSource;
|
||||
dbgDebug(D_WAAP_HYPERSCAN) << "Unknown category/regexSource: " << category << "/" << regexSource
|
||||
<< ". Using regexSource as key.";
|
||||
}
|
||||
|
||||
// Fetch patterns directly from sigsSource if available
|
||||
auto it = sigsSource.find(key);
|
||||
if (it != sigsSource.end()) {
|
||||
try {
|
||||
patterns = to_strvec(it->second.get<JsArr>());
|
||||
} catch (...) {
|
||||
// If the type is unexpected, return empty and continue gracefully
|
||||
patterns.clear();
|
||||
dbgWarning(D_WAAP_HYPERSCAN) << "Unexpected type for key: " << key;
|
||||
}
|
||||
}
|
||||
|
||||
return patterns;
|
||||
};
|
||||
|
||||
// Process specific_acuracy_keywords_regex patterns
|
||||
std::vector<std::string> incompatiblePatterns;
|
||||
{
|
||||
auto patterns = getCommonPatternsForCategory("specific_accuracy", "specific_acuracy_keywords_regex");
|
||||
for (const auto &pattern : patterns) {
|
||||
AssertionFlags flags;
|
||||
std::string groupName = extractGroupName(pattern);
|
||||
std::string processedPattern = convertToHyperscanPattern(pattern);
|
||||
std::string hyperscanPattern = processAssertions(groupName, processedPattern, flags);
|
||||
|
||||
if (hyperscanPattern != pattern) {
|
||||
dbgTrace(D_WAAP_HYPERSCAN) << pattern << " -> " << hyperscanPattern;
|
||||
}
|
||||
|
||||
if (isHyperscanCompatible(hyperscanPattern)) {
|
||||
HyperscanPattern hsPattern;
|
||||
hsPattern.originalPattern = pattern;
|
||||
hsPattern.hyperscanPattern = hyperscanPattern;
|
||||
hsPattern.category = "specific_accuracy";
|
||||
hsPattern.regexSource = "specific_acuracy_keywords_regex";
|
||||
hsPattern.groupName = groupName;
|
||||
if (hsPattern.groupName.empty()) {
|
||||
hsPattern.groupName = "specific_accuracy_match";
|
||||
}
|
||||
hsPattern.isFastReg = (hsPattern.groupName.find("fast_reg") != std::string::npos);
|
||||
hsPattern.isEvasion = (hsPattern.groupName.find("evasion") != std::string::npos);
|
||||
|
||||
m_keywordHyperscanPatterns.push_back(hsPattern);
|
||||
m_keywordAssertionFlags.push_back(flags);
|
||||
} else {
|
||||
incompatiblePatterns.push_back(pattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process words_regex patterns
|
||||
{
|
||||
auto patterns = getCommonPatternsForCategory("keywords", "words_regex");
|
||||
for (const auto &pattern : patterns) {
|
||||
AssertionFlags flags;
|
||||
std::string groupName = extractGroupName(pattern);
|
||||
std::string processedPattern = convertToHyperscanPattern(pattern);
|
||||
std::string hyperscanPattern = processAssertions(groupName, processedPattern, flags);
|
||||
|
||||
if (hyperscanPattern != pattern) {
|
||||
dbgTrace(D_WAAP_HYPERSCAN) << pattern << " -> " << hyperscanPattern;
|
||||
}
|
||||
|
||||
if (isHyperscanCompatible(hyperscanPattern)) {
|
||||
HyperscanPattern hsPattern;
|
||||
hsPattern.originalPattern = pattern;
|
||||
hsPattern.hyperscanPattern = hyperscanPattern;
|
||||
hsPattern.category = "keywords";
|
||||
hsPattern.regexSource = "words_regex";
|
||||
hsPattern.groupName = groupName;
|
||||
if (hsPattern.groupName.empty()) {
|
||||
hsPattern.groupName = "keywords_match";
|
||||
}
|
||||
hsPattern.isFastReg = (hsPattern.groupName.find("fast_reg") != std::string::npos);
|
||||
hsPattern.isEvasion = (hsPattern.groupName.find("evasion") != std::string::npos);
|
||||
|
||||
m_keywordHyperscanPatterns.push_back(hsPattern);
|
||||
m_keywordAssertionFlags.push_back(flags);
|
||||
} else {
|
||||
incompatiblePatterns.push_back(pattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process pattern_regex patterns
|
||||
{
|
||||
auto patterns = getCommonPatternsForCategory("patterns", "pattern_regex");
|
||||
for (const auto &pattern : patterns) {
|
||||
AssertionFlags flags;
|
||||
std::string groupName = extractGroupName(pattern);
|
||||
std::string processedPattern = convertToHyperscanPattern(pattern);
|
||||
std::string hyperscanPattern = processAssertions(groupName, processedPattern, flags);
|
||||
|
||||
if (hyperscanPattern != pattern) {
|
||||
dbgTrace(D_WAAP_HYPERSCAN) << pattern << " -> " << hyperscanPattern;
|
||||
}
|
||||
|
||||
if (isHyperscanCompatible(hyperscanPattern)) {
|
||||
HyperscanPattern hsPattern;
|
||||
hsPattern.originalPattern = pattern;
|
||||
hsPattern.hyperscanPattern = hyperscanPattern;
|
||||
hsPattern.category = "patterns";
|
||||
hsPattern.regexSource = "pattern_regex";
|
||||
hsPattern.groupName = groupName;
|
||||
if (hsPattern.groupName.empty()) {
|
||||
hsPattern.groupName = "patterns_match";
|
||||
}
|
||||
hsPattern.isFastReg = (hsPattern.groupName.find("fast_reg") != std::string::npos);
|
||||
hsPattern.isEvasion = (hsPattern.groupName.find("evasion") != std::string::npos);
|
||||
|
||||
m_patternHyperscanPatterns.push_back(hsPattern);
|
||||
m_patternAssertionFlags.push_back(flags);
|
||||
} else {
|
||||
incompatiblePatterns.push_back(pattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dbgInfo(D_WAAP_HYPERSCAN) << "Preprocessed Hyperscan patterns: "
|
||||
<< "keywords=" << m_keywordHyperscanPatterns.size()
|
||||
<< ", patterns=" << m_patternHyperscanPatterns.size()
|
||||
<< ", incompatible=" << incompatiblePatterns.size();
|
||||
for (const auto &it : categoryCount) {
|
||||
dbgInfo(D_WAAP_HYPERSCAN) << "Feature: " << it.first << ", Count: " << it.second;
|
||||
}
|
||||
|
||||
// Convert incompatible patterns to PmWordSet for traditional regex processing
|
||||
if (m_regexPreconditions && !incompatiblePatterns.empty()) {
|
||||
for (const auto &pattern : incompatiblePatterns) {
|
||||
Waap::RegexPreconditions::WordIndex wordIndex = m_regexPreconditions->getWordByRegex(pattern);
|
||||
if (wordIndex != Waap::RegexPreconditions::emptyWordIndex) {
|
||||
m_incompatiblePatternsPmWordSet.insert(wordIndex);
|
||||
}
|
||||
}
|
||||
dbgInfo(D_WAAP_HYPERSCAN) << "Created PmWordSet for " << m_incompatiblePatternsPmWordSet.size()
|
||||
<< " incompatible patterns (from " << incompatiblePatterns.size() << " total)";
|
||||
}
|
||||
}
|
||||
|
||||
picojson::value::object Signatures::loadSource(const std::string &waapDataFileName)
|
||||
{
|
||||
picojson::value doc;
|
||||
std::ifstream f(waapDataFileName);
|
||||
|
||||
if (f.fail()) {
|
||||
dbgError(D_WAAP) << "Failed to open json data file '" << waapDataFileName << "'!";
|
||||
error = true; // flag an error
|
||||
error = true; // flag an error
|
||||
return picojson::value::object();
|
||||
}
|
||||
|
||||
int length;
|
||||
f.seekg(0, std::ios::end); // go to the end
|
||||
length = f.tellg(); // report location (this is the length)
|
||||
char* buffer = new char[length]; // allocate memory for a buffer of appropriate dimension
|
||||
char *buffer = new char[length]; // allocate memory for a buffer of appropriate dimension
|
||||
f.seekg(0, std::ios::beg); // go back to the beginning
|
||||
f.read(buffer, length); // read the whole file into the buffer
|
||||
f.close();
|
||||
@@ -258,11 +546,153 @@ picojson::value::object Signatures::loadSource(const std::string& waapDataFileNa
|
||||
ss >> doc;
|
||||
|
||||
if (!picojson::get_last_error().empty()) {
|
||||
dbgError(D_WAAP) << "WaapAssetState::loadSource('" << waapDataFileName << "') failed (parse error: '" <<
|
||||
picojson::get_last_error() << "').";
|
||||
error = true; // flag an error
|
||||
dbgError(D_WAAP) << "WaapAssetState::loadSource('" << waapDataFileName << "') failed (parse error: '"
|
||||
<< picojson::get_last_error() << "').";
|
||||
error = true; // flag an error
|
||||
return picojson::value::object();
|
||||
}
|
||||
|
||||
return doc.get<picojson::value::object>()["waap_signatures"].get<picojson::value::object>();
|
||||
}
|
||||
|
||||
const std::vector<Signatures::HyperscanPattern> &Signatures::getKeywordHyperscanPatterns() const
|
||||
{
|
||||
return m_keywordHyperscanPatterns;
|
||||
}
|
||||
|
||||
const std::vector<Signatures::HyperscanPattern> &Signatures::getPatternHyperscanPatterns() const
|
||||
{
|
||||
return m_patternHyperscanPatterns;
|
||||
}
|
||||
|
||||
const std::vector<Signatures::AssertionFlags> &Signatures::getKeywordAssertionFlags() const
|
||||
{
|
||||
return m_keywordAssertionFlags;
|
||||
}
|
||||
|
||||
const std::vector<Signatures::AssertionFlags> &Signatures::getPatternAssertionFlags() const
|
||||
{
|
||||
return m_patternAssertionFlags;
|
||||
}
|
||||
|
||||
const Waap::RegexPreconditions::PmWordSet &Signatures::getIncompatiblePatternsPmWordSet() const
|
||||
{
|
||||
return m_incompatiblePatternsPmWordSet;
|
||||
}
|
||||
|
||||
void Signatures::processRegexMatch(const std::string &groupName, const std::string &groupValue, std::string &word,
|
||||
std::vector<std::string> &keyword_matches,
|
||||
Waap::Util::map_of_stringlists_t &found_patterns, bool longTextFound,
|
||||
bool binaryDataFound) const
|
||||
{
|
||||
std::string group = groupName;
|
||||
|
||||
if (group == "") {
|
||||
return; // skip unnamed group
|
||||
}
|
||||
|
||||
const std::string &value = groupValue;
|
||||
dbgTrace(D_WAAP_SAMPLE_SCAN) << "checkRegex: group name='" << group << "' value='" << value << "', word='" << word
|
||||
<< "':";
|
||||
|
||||
if (group.find("fast_reg") != std::string::npos) {
|
||||
dbgTrace(D_WAAP_SAMPLE_SCAN) << "checkRegex: found '*fast_reg*' in group name";
|
||||
if (group.find("evasion") != std::string::npos) {
|
||||
dbgTrace(D_WAAP_SAMPLE_SCAN) << "checkRegex: found both 'fast_reg' and 'evasion' in group name.";
|
||||
word = "encoded_" + repr_uniq(value);
|
||||
if (word == "encoded_") {
|
||||
dbgTrace(D_WAAP_SAMPLE_SCAN)
|
||||
<< "checkRegex: empty word after repr_uniq: resetting word to 'character_encoding'"
|
||||
" and group to 'evasion'.";
|
||||
word = "character_encoding";
|
||||
} else if (Waap::Util::str_isalnum(word)) {
|
||||
dbgTrace(D_WAAP_SAMPLE_SCAN)
|
||||
<< "checkRegex: isalnum word after repr_uniq: resetting group to 'evasion'.";
|
||||
// If the found match is alphanumeric (we've seen strings like "640x480" match)
|
||||
// we still should assume evasion but it doesn't need to include "fast_reg",
|
||||
// which would cause unconditional report to stage2 and hit performance...
|
||||
// This is why we remove the word "fast_reg" from the group name.
|
||||
group = "evasion";
|
||||
}
|
||||
|
||||
if (longTextFound) {
|
||||
dbgTrace(D_WAAP_SAMPLE_SCAN) << "checkRegex: longTextFound so resetting group name to 'longtext'";
|
||||
group = "longtext";
|
||||
}
|
||||
} else {
|
||||
word = group;
|
||||
}
|
||||
}
|
||||
// In sequences detected as "longTextFound" or "longBinaryFound", do not add words in the
|
||||
// "keyword_matches" list that:
|
||||
// - starts with "encoded_"
|
||||
// - or startswith("\")
|
||||
// - or equal to "character_encoding"
|
||||
if ((longTextFound || binaryDataFound) &&
|
||||
(word == "character_encoding" || word.substr(0, 1) == "\\" || word.substr(0, 8) == "encoded_")) {
|
||||
// For now, do not skip
|
||||
// TODO - check if skipping improves detection
|
||||
dbgTrace(D_WAAP_SAMPLE_SCAN) << "longText/binaryData found with character_encoding";
|
||||
} else if (binaryDataFound && (isShortWord(word) || isShortHtmlTag(word) ||
|
||||
NGEN::Regex::regexMatch(__FILE__, __LINE__, group, binary_data_kw_filter))) {
|
||||
dbgTrace(D_WAAP_SAMPLE_SCAN) << "Not adding group='" << group << "', word='" << word
|
||||
<< "' - due to binary data";
|
||||
return;
|
||||
} else if ((std::find(keyword_matches.begin(), keyword_matches.end(), word) == keyword_matches.end())) {
|
||||
// python: if (word not in current_matches): current_matches.append(word)
|
||||
keyword_matches.push_back(word);
|
||||
dbgTrace(D_WAAP_SAMPLE_SCAN) << "added keyword match for group='" << group << "', value='" << value
|
||||
<< "', word='" << word << "'";
|
||||
}
|
||||
|
||||
// python:
|
||||
// if group not in found_patterns:
|
||||
// found_patterns[group]=[]
|
||||
if (found_patterns.find(group) == found_patterns.end()) {
|
||||
found_patterns[group] = std::vector<std::string>();
|
||||
}
|
||||
|
||||
// python:
|
||||
// if value not in found_patterns[group]:
|
||||
// found_patterns[group].append(value)
|
||||
if (std::find(found_patterns[group].begin(), found_patterns[group].end(), value) == found_patterns[group].end()) {
|
||||
found_patterns[group].push_back(value);
|
||||
dbgTrace(D_WAAP_SAMPLE_SCAN) << "added pattern match for group='" << group << "', value='" << value
|
||||
<< "', word='" << word << "'";
|
||||
}
|
||||
}
|
||||
|
||||
bool Signatures::isHyperscanInitialized() const
|
||||
{
|
||||
return m_hyperscanInitialized;
|
||||
}
|
||||
|
||||
void Signatures::setHyperscanInitialized(bool initialized)
|
||||
{
|
||||
m_hyperscanInitialized = initialized;
|
||||
}
|
||||
|
||||
bool Signatures::shouldUseHyperscan(bool force)
|
||||
{
|
||||
// This can be controlled by environment variable or configuration
|
||||
static bool useHyperscan = false;
|
||||
|
||||
#ifdef USE_HYPERSCAN
|
||||
static bool checked = false;
|
||||
if (!checked || force) {
|
||||
// Check environment variable first
|
||||
const char *env = getenv("WAAP_USE_HYPERSCAN");
|
||||
if (env) {
|
||||
useHyperscan = (strcmp(env, "1") == 0 || strcasecmp(env, "true") == 0);
|
||||
dbgDebug(D_WAAP_SAMPLE_SCAN) << "Hyperscan usage set by environment: " << useHyperscan;
|
||||
} else {
|
||||
// Default to false to maintain backward compatibility - Hyperscan is opt-in
|
||||
useHyperscan = false;
|
||||
dbgDebug(D_WAAP_SAMPLE_SCAN) << "Hyperscan usage default (disabled): " << useHyperscan;
|
||||
}
|
||||
checked = true;
|
||||
}
|
||||
#endif // USE_HYPERSCAN
|
||||
|
||||
return useHyperscan;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user