// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved. // Licensed under the Apache License, Version 2.0 (the "License"); // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef __SIGNATURES_H__ #define __SIGNATURES_H__ #include "Waf2Regex.h" #include "picojson.h" #include "flags.h" #include class Signatures { public: // Enum for zero-length assertion flags enum class AssertionFlag { START_WORD_BEHIND = 0, // (?<=\w) START_NON_WORD_BEHIND, // (?:=]|$) PATH_TRAVERSAL_START, // (?; static std::string extractGroupName(const std::string &pattern); static std::string processAssertions(const std::string &groupName, const std::string &pattern, AssertionFlags &flags); Signatures(const std::string& filepath); ~Signatures(); bool fail(); picojson::value::object sigsSource; bool error; std::shared_ptr m_regexPreconditions; // Regexes loaded from compiled signatures const Regex words_regex; const Regex specific_acuracy_keywords_regex; const Regex pattern_regex; const Regex un_escape_pattern; const Regex quotes_ev_pattern; const Regex comment_ev_pattern; const Regex quotes_space_ev_pattern; const Regex allowed_text_re; const Regex pipe_split_re; const Regex semicolon_split_re; const Regex longtext_re; const Regex nospaces_long_value_re; const Regex good_header_name_re; const Regex good_header_value_re; const std::set ignored_for_nospace_long_value; const std::set global_ignored_keywords; const std::set global_ignored_patterns; const std::set url_ignored_keywords; const std::set url_ignored_patterns; const Regex url_ignored_re; const std::set header_ignored_keywords; const std::set header_ignored_patterns; const Regex header_ignored_re; const std::map> filter_parameters; const std::map> m_attack_types; const Regex php_serialize_identifier; const Regex html_regex; const Regex uri_parser_regex; const boost::regex confluence_macro_re; #if 0 // Removed by Pavel's request. Leaving here in case he'll want to add this back... const std::set cookie_ignored_keywords; const std::set cookie_ignored_patterns; const Regex cookie_ignored_re; #endif std::map headers_re; const Regex format_magic_binary_re; std::map params_type_re; // Signatures for responses const Regex resp_hdr_pattern_regex_list; const Regex resp_hdr_words_regex_list; const Regex resp_body_pattern_regex_list; const Regex resp_body_words_regex_list; const std::set remove_keywords_always; const boost::regex user_agent_prefix_re; const boost::regex binary_data_kw_filter; const boost::regex wbxml_data_kw_filter; // Pre-compiled Hyperscan patterns and metadata for performance optimization struct HyperscanPattern { std::string originalPattern; std::string hyperscanPattern; std::string groupName; std::string category; std::string regexSource; bool isFastReg; bool isEvasion; HyperscanPattern() : isFastReg(false), isEvasion(false) {} }; // Pre-processed hyperscan patterns for each regex category std::vector m_keywordHyperscanPatterns; std::vector m_patternHyperscanPatterns; // Assertion flags corresponding to each pattern (same indices as above vectors) std::vector m_keywordAssertionFlags; std::vector m_patternAssertionFlags; // Getter methods for precompiled patterns const std::vector& getKeywordHyperscanPatterns() const; const std::vector& getPatternHyperscanPatterns() const; // Getter methods for assertion flags const std::vector& getKeywordAssertionFlags() const; const std::vector& getPatternAssertionFlags() const; // PmWordSet for incompatible patterns that need to use traditional regex scanning Waap::RegexPreconditions::PmWordSet m_incompatiblePatternsPmWordSet; // Getter method for incompatible patterns PmWordSet const Waap::RegexPreconditions::PmWordSet& getIncompatiblePatternsPmWordSet() const; // Hyperscan initialization state management bool isHyperscanInitialized() const; void setHyperscanInitialized(bool initialized); // Check if Hyperscan should be used (based on configuration) static bool shouldUseHyperscan(bool force = false); void processRegexMatch( const std::string &groupName, const std::string &groupValue, std::string &word, std::vector &keyword_matches, Waap::Util::map_of_stringlists_t &found_patterns, bool longTextFound, bool binaryDataFound ) const; private: picojson::value::object loadSource(const std::string& waapDataFileName); void preprocessHyperscanPatterns(); bool m_hyperscanInitialized; }; inline std::string repr_uniq(const std::string & value) { std::string result; char hist[256]; memset(&hist, 0, sizeof(hist)); for (std::string::const_iterator pC = value.begin(); pC != value.end(); ++pC) { unsigned char ch = (unsigned char)(*pC); // Only take ASCII characters that are not alphanumeric, and each character only once if (ch <= 127 && !isalnum(ch) && hist[ch] == 0) { // Convert low ASCII characters to their C/C++ printable equivalent // (used for easier viewing. Also, binary data causes issues with ElasticSearch) switch (ch) { case 0x07: result += "\\a"; break; case 0x08: result += "\\b"; break; case 0x09: result += "\\t"; break; case 0x0A: result += "\\n"; break; case 0x0B: result += "\\v"; break; case 0x0C: result += "\\f"; break; case 0x0D: result += "\\r"; break; case 0x5C: result += "\\\\"; break; case 0x27: result += "\\\'"; break; case 0x22: result += "\\\""; break; case 0x3F: result += "\\\?"; break; default: { if (ch >= 32) { result += (char)ch; } else { char buf[16]; sprintf(buf, "\\" "x%02X", ch); result += buf; } } } hist[ch] = 1; } } return result; } inline bool isShortWord(const std::string& word) { return word.size() <= 2; } inline bool isShortHtmlTag(const std::string& word) { return !word.empty() && word.size() <= 4 && word[0] == '<' && word[word.size() - 1] == '>'; } #endif