// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved. // Licensed under the Apache License, Version 2.0 (the "License"); // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef __WAF2_UTIL_H__148aa7e4 #define __WAF2_UTIL_H__148aa7e4 #include "WaapValueStatsAnalyzer.h" #include "log_generator.h" #include #include #include #include #include #include #include #include #include #include #include #include "WaapEnums.h" #include "yajl/yajl_gen.h" #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) enum base64_variants {SINGLE_B64_CHUNK_CONVERT, KEY_VALUE_B64_PAIR, CONTINUE_AS_IS, CONTINUE_DUAL_SCAN}; enum base64_stage {BEFORE_EQUAL, EQUAL, DONE, MISDETECT}; enum base64_decode_status {B64_DECODE_INVALID, B64_DECODE_OK, B64_DECODE_INCOMPLETE, B64_DECODE_SUSPECTED}; #define BASE64_ENTROPY_BASE_THRESHOLD 5.0 #define BASE64_ENTROPY_DECODED_THRESHOLD 5.4 #define BASE64_ENTROPY_THRESHOLD_DELTA 0.25 #define BASE64_MIN_SIZE_LIMIT 16 #define BASE64_MAX_SIZE_LIMIT 1024 // This is portable version of stricmp(), which is non-standard function (not even in C). // Contrary to stricmp(), for a slight optimization, s2 is ASSUMED to be already in lowercase. // s1 can be in mixed case and is convetred using tolower() before comparing to s2. // The function returns true if s1 (with all charactes lowered case) matches s2, false if not. inline bool my_stricmp(const char *s1, const char *s2) { assert(s1 != NULL); assert(s2 != NULL); // Compare header name, case insensitively, to "content-type" while (*s1 && *s2 && tolower(*s1)==*s2) { s1++; s2++; } // returns true if s1 (after applying tolower()) eactly matches s2 return (*s1=='\0' && *s2=='\0'); } // same as my_stricmp(), but assumes s1 has known size, and does not assume s1 string is null-terminated. inline bool my_strincmp(const char *s1, const char *s2, size_t s1_size) { assert(s1 != NULL); assert(s2 != NULL); // Compare header name, case insensitively, to "content-type" while (s1_size > 0 && *s2 && tolower(*s1)==*s2) { s1++; s2++; s1_size--; // reduce s1_size until we exhaust at most s1_size characters of the s1 string. } // returns true if s1 (after applying tolower()) eactly matches s2 return (s1_size==0 && *s2=='\0'); } inline bool my_stristarts_with(const char *s1, const char *s2) { assert(s1 != NULL); assert(s2 != NULL); // Compare case insensitively while (*s1 && *s2 && tolower(*s1)==*s2) { s1++; s2++; } // returns true if s1 (after applying tolower()) starts with s2 // (this happens when we finished to examine all s2 and it compared correctly to start of s1) return (*s2=='\0'); } inline unsigned char from_hex(unsigned char ch, bool &valid) { valid = true; if (ch <= '9' && ch >= '0') ch -= '0'; else if (ch <= 'f' && ch >= 'a') ch -= 'a' - 10; else if (ch <= 'F' && ch >= 'A') ch -= 'A' - 10; else { valid = false; ch = 0; } return ch; } inline bool str_starts_with(const std::string& value, const std::string& prefix) { if (prefix.size() > value.size()) { return false; } return value.compare(0, prefix.size(), prefix) == 0; } inline bool str_ends_with(const std::string& value, const std::string& ending) { if (ending.size() > value.size()) { return false; } return value.compare(value.size() - ending.size(), ending.size(), ending) == 0; } template _IT unquote_plus(_IT first, _IT last, bool decodeUrl=true, bool decodePlus=true) { _IT result = first; enum { STATE_COPY, STATE_FIRST_DIGIT, STATE_SECOND_DIGIT } state = STATE_COPY; unsigned char accVal = 0; // accumulated character (from hex digits) char lastCh = 0; for (; first != last; ++first) { switch (state) { case STATE_COPY: if (*first == '+' && decodePlus) { *result++ = ' '; } else if (decodeUrl && *first == '%') { state = STATE_FIRST_DIGIT; } else { *result++ = *first; } break; case STATE_FIRST_DIGIT: { bool valid; lastCh = *first; // remember it solely for special case where 2nd character is invalid hex accVal = from_hex(*first, valid); if (valid) { state = STATE_SECOND_DIGIT; } else { *result++ = '%'; // put the percent symbol to the output stream if (*first == '%') { // we found the '%%' sequence. Put back the first '%' character and continue // in the same state (as if we've just seen the first '%') // this supports the case of %%xx, which would otherwise fail to parse. } else { // put the "invalid" symbol to the output stream *result++ = *first; // continue copying state = STATE_COPY; } } break; } case STATE_SECOND_DIGIT: { bool valid; accVal = (accVal << 4) | from_hex(*first, valid); if (valid) { // After second hex digit decoded succesfully - put computed character to output and // continue to "copying" state *result++ = accVal; } else { if (*first == '%') { // put the percent symbol to the output *result++ = '%'; // put the first (among two) character (that was valid hex char), back to the output stream. *result++ = lastCh; state = STATE_FIRST_DIGIT; break; } // If second character is invalid - return original '%', the first character, // and the second character to the output. // put the percent symbol to the output *result++ = '%'; // put the first (among two) character (that was valid hex char), back to the output stream. *result++ = lastCh; // put the second (among two) "invalid" character to the output stream. *result++ = *first; } state = STATE_COPY; break; } } } if (state == STATE_FIRST_DIGIT) { // put the percent symbol to the output stream *result++ = '%'; } else if (state == STATE_SECOND_DIGIT) { // put the percent symbol to the output *result++ = '%'; // put the first (among two) character (that was valid hex char), back to the output stream. *result++ = lastCh; } return result; } inline bool isHexDigit(const char ch) { return isdigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); } template _IT escape_backslashes(_IT first, _IT last) { _IT result = first; enum { STATE_COPY, STATE_ESCAPE, STATE_OCTAL, STATE_HEX } state = STATE_COPY; unsigned char accVal = 0; unsigned char digitsCount = 0; _IT mark = first; for (; first != last; ++first) { switch (state) { case STATE_COPY: if (*first == '\\') { mark = first; state = STATE_ESCAPE; } else { *result++ = *first; } break; case STATE_ESCAPE: { if (*first >= '0' && *first <= '7') { accVal = *first - '0'; digitsCount = 1; state = STATE_OCTAL; break; } else if (*first == 'x') { accVal = 0; digitsCount = 0; state = STATE_HEX; break; } else { switch (*first) { case 'a': *result++ = 7; break; // BELL case 'b': *result++ = 8; break; // BACKSPACE case 't': *result++ = 9; break; // HORIZONTAL TAB case 'n': *result++ = 10; break; // LINEFEED case 'v': *result++ = 11; break; // VERTICAL TAB case 'f': *result++ = 12; break; // FORMFEED case 'r': *result++ = 13; break; // CARRIAGE RETURN case '\\': *result++ = '\\'; break; // upon seeing double backslash - output only one case '\"': *result++ = '"'; break; // backslash followed by '"' - output only '"' default: // invalid escape sequence - do not replace it (return original characters) // Copy from back-track, not including current character, and continue while (mark < first) { *result++ = *mark++; } // Copy current (terminator) character which is not "escape" and return to copy state // If current character is escape - stay is "escape" state if (*first != '\\') { *result++ = *mark++; state = STATE_COPY; } } state = STATE_COPY; } break; } case STATE_OCTAL: { if (*first >='0' && *first<='7') { accVal = (accVal << 3) | (*first - '0'); digitsCount++; // Up to 3 octal digits imposed by C standard, so after 3 digits accumulation stops. if (digitsCount == 3) { *result++ = accVal; // output character corresponding to collected accumulated value digitsCount = 0; state = STATE_COPY; } } else { // invalid octal digit stops the accumulation *result++ = accVal; // output character corresponding to collected accumulated value digitsCount = 0; if (*first != '\\') { // If terminating character is not backslash output the terminating character *result++ = *first; state = STATE_COPY; } else { // If terminating character is backslash start next escape sequence state = STATE_ESCAPE; } } break; } case STATE_HEX: { if (!isHexDigit(*first)) { // Copy from back-track, not including current character (which is absent), and continue while (mark < first) { *result++ = *mark++; } if (*first != '\\') { // If terminating character is not backslash output the terminating character *result++ = *first; state = STATE_COPY; } else { // If terminating character is backslash start next escape sequence state = STATE_ESCAPE; } } else { accVal = accVal << 4; if (isdigit(*first)) { accVal += *first - '0'; } else if (*first >= 'a' && *first <= 'f') { accVal += *first - 'a' + 10; } else if (*first >= 'A' && *first <= 'F') { accVal += *first - 'A' + 10; } digitsCount++; // exactly 2 hex digits are anticipated, so after 2 digits accumulation stops. if (digitsCount == 2) { *result++ = accVal; // output character corresponding to collected accumulated value digitsCount = 0; state = STATE_COPY; } } break; } } } // Handle state at end of input bool copyBackTrack = true; switch (state) { case STATE_HEX: // this can only happen on this sequence '\xH' where H is a single hex digit. // in this case the sequence is considered invalid and should be copied verbatim (copyBackTrack=true) break; case STATE_OCTAL: // this can only happen when less than 3 octal digits are found at the value end, like '\1' or '\12' *result++ = accVal; // output character corresponding to collected accumulated value copyBackTrack = false; break; case STATE_COPY: copyBackTrack = false; break; case STATE_ESCAPE: break; } if (copyBackTrack) { // invalid escape sequence - do not replace it (return original characters) // Copy from back-track while (mark < first) { *result++ = *mark++; } } return result; } inline bool str_contains(const std::string &haystack, const std::string &needle) { return haystack.find(needle) != std::string::npos; } struct HtmlEntity { const char *name; unsigned short value; }; extern const struct HtmlEntity g_htmlEntities[]; extern const size_t g_htmlEntitiesCount; template _IT escape_html(_IT first, _IT last) { _IT result = first; enum { STATE_COPY, STATE_ESCAPE, STATE_NAMED_CHARACTER_REFERENCE, STATE_NUMERIC_START, STATE_NUMERIC, STATE_HEX } state = STATE_COPY; unsigned short accVal = 0; // should be unsigned short to hold unicode character code (16-bits) bool digitsSeen = false; std::list potentialMatchIndices; size_t matchLength = 0; size_t lastKnownMatchIndex = -1; _IT mark = first; for (; first != last; ++first) { switch (state) { case STATE_COPY: if (*first == '&') { mark = first; state = STATE_ESCAPE; } else { *result++ = *first; } break; case STATE_ESCAPE: if (isalpha(*first)) { // initialize potential matches list potentialMatchIndices.clear(); for (size_t index = 0; index < g_htmlEntitiesCount; ++index) { if (*first == g_htmlEntities[index].name[0]) { potentialMatchIndices.push_back(index); lastKnownMatchIndex = index; } } // No potential matches - send ampersand and current character to output if (potentialMatchIndices.size() == 0) { *result++ = '&'; *result++ = *first; state = STATE_COPY; break; } // 1st character already matched, so matchLen already starts from 1 matchLength = 1; state = STATE_NAMED_CHARACTER_REFERENCE; } else if (*first == '#') { digitsSeen = 0; accVal = 0; state = STATE_NUMERIC_START; } else { // not isalpha and not '#' - this is invalid character reference - do not replace it // (return original characters) *result++ = '&'; *result++ = *first; state = STATE_COPY; } break; case STATE_NAMED_CHARACTER_REFERENCE: // Find and remove all potential matches that do not match anymore { int increaseMatchLength = 0; for ( std::list::iterator pPotentialMatchIndex = potentialMatchIndices.begin(); pPotentialMatchIndex != potentialMatchIndices.end(); ) { lastKnownMatchIndex = *pPotentialMatchIndex; const char *matchName = g_htmlEntities[lastKnownMatchIndex].name; // If there are no more characters in the potntial match name, // or the next tested character doesn't match - kill the match if ((matchName[matchLength] == '\0') || (matchName[matchLength] != *first)) { // remove current element from the list of potential matches pPotentialMatchIndex = potentialMatchIndices.erase(pPotentialMatchIndex); } else { increaseMatchLength = 1; ++pPotentialMatchIndex; } } matchLength += increaseMatchLength; } // No more potential matches: unsuccesful match -> flush all consumed characters back to output stream if (potentialMatchIndices.size() == 0) { // Send consumed ampersand to the output *result++ = '&'; // Send those matched characters (these are the same that we consumed) - to the output for (size_t i = 0; i < matchLength; i++) { *result++ = g_htmlEntities[lastKnownMatchIndex].name[i]; } // Send the character that terminated our search for possible matches *result++ = *first; // Continue copying text verbatim state = STATE_COPY; break; // note: this breaks out of the for() loop, not out of the switch } // There are still potential matches and ';' is hit if (*first == ';') { // longest match found for the named character reference. // translate it into output character(s) and we're done. unsigned short value = g_htmlEntities[lastKnownMatchIndex].value; // Encode UTF code point as UTF-8 bytes if (value < 0x80) { *result++ = value; } else if (value < 0x800 ) { *result++ = (value >> 6) | 0xC0; *result++ = (value & 0x3F) | 0x80; } else { // (value <= 0xFFFF : always true because value type is unsigned short which is 16-bit *result++ = (value >> 12) | 0xE0; *result++ = ((value >> 6) & 0x3F) | 0x80; *result++ = (value & 0x3F) | 0x80; } // Continue copying text verbatim state = STATE_COPY; break; // note: this breaks out of the for() loop, not out of the switch } break; case STATE_NUMERIC_START: digitsSeen = false; accVal = 0; if (*first == 'x' || *first == 'X') { state = STATE_HEX; } else if (isdigit(*first)) { digitsSeen = true; accVal = *first - '0'; state = STATE_NUMERIC; } else { // Sequence started with these two characters: '&#', and here is the third, non-digit character // Copy from back-track, not including current character, and continue while (mark < first) { *result++ = *mark++; } if (*first == '&') { // Terminator is also start of next escape sequence mark = first; state = STATE_ESCAPE; break; } else { // Copy the terminating character too *result++ = *first; } state = STATE_COPY; } break; case STATE_NUMERIC: if (!isdigit(*first)) { if (digitsSeen) { // Encode UTF code point as UTF-8 bytes if (accVal < 0x80) { *result++ = accVal; } else if (accVal < 0x800 ) { *result++ = (accVal >> 6) | 0xC0; *result++ = (accVal & 0x3F) | 0x80; } else { // (accVal <= 0xFFFF : always true because accVal type is unsigned short which is 16-bit *result++ = (accVal >> 12) | 0xE0; *result++ = ((accVal >> 6) & 0x3F) | 0x80; *result++ = (accVal & 0x3F) | 0x80; } } else { // Copy from back-track, not including current character (which is absent), and continue while (mark < first) { *result++ = *mark++; } } if (*first == '&') { // Terminator is also start of next escape sequence mark = first; state = STATE_ESCAPE; break; } else if (!digitsSeen || *first != ';') { // Do not copy the ';' but do copy any other terminator // Note: the ';' should remain in the output if there were no digits seen. *result++ = *first; } state = STATE_COPY; } else { digitsSeen = true; accVal = accVal * 10 + *first - '0'; // TODO:: beware of integer overflow? } break; case STATE_HEX: if (!isHexDigit(*first)) { if (digitsSeen) { // Encode UTF code point as UTF-8 bytes if (accVal < 0x80) { *result++ = accVal; } else if (accVal < 0x800 ) { *result++ = (accVal >> 6) | 0xC0; *result++ = (accVal & 0x3F) | 0x80; } else { // (accVal <= 0xFFFF : always true because accVal type is unsigned short which is 16-bit *result++ = (accVal >> 12) | 0xE0; *result++ = ((accVal >> 6) & 0x3F) | 0x80; *result++ = (accVal & 0x3F) | 0x80; } } else { // Copy from back-track, not including current character (which is absent), and continue while (mark < first) { *result++ = *mark++; } } if (*first == '&') { // Terminator is also start of next escape sequence mark = first; state = STATE_ESCAPE; break; } else if (!digitsSeen || *first != ';') { // Do not copy the ';' but do copy any other terminator // Note: the ';' should remain in the output if there were no digits seen. *result++ = *first; } state = STATE_COPY; } else { digitsSeen = true; accVal = accVal << 4; if (isdigit(*first)) { accVal += *first - '0'; } else if (*first >= 'a' && *first <= 'f') { accVal += *first - 'a' + 10; } else if (*first >= 'A' && *first <= 'F') { accVal += *first - 'A' + 10; } } break; } } if (state == STATE_ESCAPE) { *result++ = '&'; } else if (state == STATE_NAMED_CHARACTER_REFERENCE && potentialMatchIndices.size() > 0) { // Send consumed ampersand to the output *result++ = '&'; // Send those matched characters (these are the same that we consumed) - to the output for (size_t i = 0; i < matchLength; i++) { // Even if there are multiple potential matches, all of them start with the same // matchLength characters that we consumed! *result++ = g_htmlEntities[lastKnownMatchIndex].name[i]; } } if (state == STATE_HEX && !digitsSeen) { // Special case of "&#x" // Copy from back-track, not including current character (which is absent), and continue while (mark < first) { *result++ = *mark++; } state = STATE_COPY; } else if (state == STATE_HEX || state == STATE_NUMERIC || state == STATE_NUMERIC_START) { if (digitsSeen) { // Encode UTF code point as UTF-8 bytes if (accVal < 0x80) { *result++ = accVal; } else if (accVal < 0x800 ) { *result++ = (accVal >> 6) | 0xC0; *result++ = (accVal & 0x3F) | 0x80; } else { // (accVal <= 0xFFFF : always true because accVal type is unsigned short which is 16-bit *result++ = (accVal >> 12) | 0xE0; *result++ = ((accVal >> 6) & 0x3F) | 0x80; *result++ = (accVal & 0x3F) | 0x80; } } else { // Copy from back-track, not including current character (which is absent), and continue while (mark < first) { *result++ = *mark++; } state = STATE_COPY; } } return result; } // Compare two buffers, case insensitive. Return true if they are equal (case-insensitive) inline bool memcaseinsensitivecmp(const char *buf1, size_t buf1_len, const char *buf2, size_t buf2_len) { if (buf1_len != buf2_len) { return false; } for (; buf1_len > 0; --buf1_len) { if (tolower(*buf1++) != tolower(*buf2++)) { return false; // different } } return true; // equal } inline void replaceAll(std::string& str, const std::string& from, const std::string& to) { if(from.empty()) { return; } size_t start_pos = 0; while((start_pos = str.find(from, start_pos)) != std::string::npos) { str.replace(start_pos, from.length(), to); start_pos += to.length(); // In case 'to' contains 'from', like replacing 'x' with 'yx' } } inline std::string replaceAllCopy(std::string str, const std::string& from, const std::string& to) { replaceAll(str, from, to); return str; } inline void alignBase64Chunk (std::string &chunk) { size_t len = chunk.length() % 4; if (len >= 2) { chunk.append(4-len, '='); } } // Count items in v that are not in ignored_set inline size_t countNotInSet(const std::vector &v, const std::set &ignored_set) { size_t count = 0; for (const std::string &word : v) { if (ignored_set.find(word) == ignored_set.end()) { // not in ignored_set count++; } } return count; } // note: this algorithm may probably be rewritten with std::remove_if() and probably lambda, // but this better done when we can finally use c++11 inline void removeItemsMatchingSubstringOf(std::vector &v, const std::string& match) { for (std::vector::iterator it=v.begin(); it != v.end();) { // Remove items that are contained (substr) within the (longer or equal-length) match string. if (match.find(*it) != std::string::npos) { it = v.erase(it); } else { ++it; } } } // Detect whether unicode code is in the "Halfwidth and Fullwidth Forms" set convertable to ASCII. inline bool isUnicodeHalfAndFullWidthRange(uint32_t code) { return (code >= 0xFF01 && code <=0xFF5E); } // Convert unicode code from the "Halfwidth and Fullwidth Forms" set to ASCII. inline char convertFromUnicodeHalfAndFullWidthRange(uint32_t code) { assert(isUnicodeHalfAndFullWidthRange(code)); // Support set of unicode characters from the "Halfwidth and Fullwidth Forms" that are converted to ASCII static const char *xlat = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"; return xlat[code - 0xFF01]; } inline bool isSpecialUnicode(uint32_t code) { return isUnicodeHalfAndFullWidthRange(code) || 0x2028 == code || 0x2029 == code || 0x2215 == code || 0x2216 == code || 0xEFC8 == code || 0xF025 == code; } inline char convertSpecialUnicode(uint32_t code) { if (isUnicodeHalfAndFullWidthRange(code)) { return convertFromUnicodeHalfAndFullWidthRange(code); } else if (0x2216 == code || 0xEFC8 == code || 0xF025 == code) { return '\\'; } else if (0x2215 == code) { return '/'; } // assuming 0x2028 == code || 0x2029 == code else { return '\n'; } } inline void stripSpaces(std::string &text) { std::string::iterator it = text.begin(); std::string::iterator result = it; for (; it != text.end(); ++it) { unsigned char ch = (unsigned char)(*it); // Include only non-space characters if (!isspace(ch)) { *result++ = ch; } } text.erase(result, text.end()); } inline size_t countSubstrings(const std::string &str, const std::string &subStr) { if (subStr.empty()) { return str.size() + 1; // to conform to python's "str.count(subStr)" behavior when substr is empty string... } size_t count = 0; size_t pos = str.find(subStr); while( pos != std::string::npos) { count++; pos = str.find(subStr, pos + subStr.size()); } return count; } // Test whether text starts one of the known HTML tag names bool startsWithHtmlTagName(const char *text); // Normalizing URL means replacing any pure-numeric URL parts with the word "_num" // The parameters part of the given uri is also stripped (the '?' character and anything after it). std::string normalize_uri(const std::string &uri); std::string normalize_param(const std::string& param); // Analogous to python's text.decode('unicode_escape'), with the distinction that // this function simply throws out the \uXXXX sequences instead of converting them to binary unicode sequences. // This function performs in-place decoding, updating text string in progress. void unescapeUnicode(std::string &text); // Try to find and decode UTF7 chunks std::string filterUTF7(const std::string &text); base64_decode_status decideStatusBase64Decoded( std::string& decoded, double entropy, double decoded_entropy, size_t spacer_count, size_t nonPrintableCharsCount, bool clear_on_error, double terminatorCharsSeen, bool called_with_prefix); base64_decode_status decodeBase64Chunk( const std::string &value, std::string::const_iterator it, std::string::const_iterator end, std::string &decoded, bool clear_on_error = true, bool called_with_prefix = false); bool b64DecodeChunk( const std::string &value, std::string::const_iterator it, std::string::const_iterator end, std::string &decoded); std::vector split(const std::string& s, char delim); namespace Waap { namespace Util { typedef bool (*RegexSubCallback_f)( const std::string &value, std::string::const_iterator b, std::string::const_iterator e, std::string &repl); bool isValidJson(const std::string &input); enum KnownSourceType { SOURCE_TYPE_UNKNOWN = 0, SOURCE_TYPE_SENSOR_DATA = 1 }; KnownSourceType detectKnownSource(const std::string &input); bool isScreenedJson(const std::string &input); int definePrefixedJson(const std::string &input); bool detectJSONasParameter(const std::string &s, std::string &key, std::string &value); enum BinaryFileType { FILE_TYPE_NONE, FILE_TYPE_PNG, FILE_TYPE_JPEG, FILE_TYPE_PDF }; void b64Decode( const std::string &s, RegexSubCallback_f cb, int &decodedCount, int &deletedCount, std::string &outStr); base64_variants b64Test ( const std::string &s, std::string &key, std::string &value, BinaryFileType &binaryFileType, size_t offset = 0); // The original stdlib implementation of isalpha() supports locale settings which we do not really need. // It is also proven to contribute to slow performance in some of the algorithms using it. // This function has reduced functionality compared to stdlib isalpha(), but is much faster. inline bool isAlphaAsciiFast(unsigned char ch) { return ((unsigned int)ch | 32) - 'a' < 26; } // Compare two objects referenced by pointer - comparison is done by value (comparing objects themselves) // This is different from comparing object pointers. template bool compareObjects(_T &first, _T &second) { // If both are the same object (or both are equal to nullptr - then they are equivalent) if (first == second) { return true; } // If pointers are different and at least one of them is nullptr, then the other is not nullptr - so they are // not equivalent if (first == nullptr || second == nullptr) { return false; } // At this point, both pointers are for sure not nullptr, so we can dereference and compare objects pointed by return *first == *second; } inline bool str_isalnum(const std::string & value) { for (std::string::const_iterator pC = value.begin(); pC != value.end(); ++pC) { if (!std::isalnum(*pC)) { return false; // at least one non alphanumeric character detected } } return true; } inline bool isAllDigits(const std::string & value) { for (char ch : value) { if (!isdigit(ch)) { return false; // at least one non digit character detected } } return true; } typedef std::map > map_of_stringlists_t; // Yajl generator (C++ RAII edition :) struct Yajl { yajl_gen g; Yajl() :g(yajl_gen_alloc(NULL)) {} ~Yajl() { yajl_gen_free(g); } struct Map { yajl_gen& g; explicit Map(Yajl& y) : g(y.g) { yajl_gen_map_open(g); } ~Map() { yajl_gen_map_close(g); } void gen_null(const std::string& k) { yajl_gen_string(g, (unsigned char*)k.data(), k.size()); yajl_gen_null(g); } void gen_str(const std::string& k, const std::string& v) { yajl_gen_string(g, (unsigned char*)k.data(), k.size()); yajl_gen_string(g, (unsigned char*)v.data(), v.size()); } void gen_bool(const std::string& k, bool v) { yajl_gen_string(g, (unsigned char*)k.data(), k.size()); yajl_gen_bool(g, v); } void gen_integer(const std::string& k, long long int v) { yajl_gen_string(g, (unsigned char*)k.data(), k.size()); yajl_gen_integer(g, v); } void gen_double(const std::string& k, double v) { yajl_gen_string(g, (unsigned char*)k.data(), k.size()); yajl_gen_double(g, v); } void gen_key(const std::string& k) { yajl_gen_string(g, (unsigned char*)k.data(), k.size()); } }; struct Array { yajl_gen& g; explicit Array(Yajl& y) :g(y.g) { yajl_gen_array_open(g); } ~Array() { yajl_gen_array_close(g); } void gen_null() { yajl_gen_null(g); } void gen_str(const std::string& v) { yajl_gen_string(g, (unsigned char*)v.data(), v.size()); } void gen_bool(bool v) { yajl_gen_bool(g, v); } void gen_integer(long long int v) { yajl_gen_integer(g, v); } void gen_double(double v) { yajl_gen_double(g, v); } }; std::string get_json_str() const { const unsigned char* buf; size_t len; yajl_gen_get_buf(g, &buf, &len); return std::string((char*)buf, len); } }; enum ContentType { CONTENT_TYPE_UNKNOWN, CONTENT_TYPE_XML, CONTENT_TYPE_JSON, CONTENT_TYPE_GQL, CONTENT_TYPE_HTML, CONTENT_TYPE_MULTIPART_FORM, CONTENT_TYPE_URLENCODED, CONTENT_TYPE_WBXML, CONTENT_TYPES_COUNT }; // LCOV_EXCL_START Reason: coverage upgrade inline const char* getContentTypeStr(enum ContentType contentType) { static const char* contentTypeStr[] = { "UNKNOWN", "XML", "JSON", "HTML", "MULTIPART_FORM", "URLENCODED", "WBXML" }; if (contentType >= CONTENT_TYPES_COUNT) { contentType = CONTENT_TYPE_UNKNOWN; } return contentTypeStr[contentType]; }; // LCOV_EXCL_STOP static const std::string s_EncryptionKey = "KSO+hOFs1q5SkEnx8bvp67Om2zyHDD6ZJF4NHAa3R94=";; static const std::string s_EncryptionIV = "sxJNyEO7i6YfA1p9CTglHw=="; // trim from start static inline std::string <rim(std::string &s) { s.erase(s.begin(), std::find_if(s.begin(), s.end(), [] (char c) { return !std::isspace(c); })); return s; } // trim from end static inline std::string &rtrim(std::string &s) { s.erase(std::find_if(s.rbegin(), s.rend(), [] (char c) { return !std::isspace(c); }).base(), s.end()); return s; } // trim from both ends static inline std::string &trim(std::string &s) { return ltrim(rtrim(s)); } // Find whether some word (what) exists wihin keys of the map. // The search done by *searching* for "what" string within each key string, // not by *comparing* "what" with each key string. bool find_in_map_of_stringlists_keys(const std::string & what, const map_of_stringlists_t & where); void remove_in_map_of_stringlists_keys(const std::string & what, map_of_stringlists_t & where); void remove_startswith(std::vector &vec, const std::string &prefix); std::string AES128Decrypt(std::string& key, std::string& iv, std::string& message); std::string base64Encode(const std::string &input); std::string base64Decode(const std::string &input); std::string obfuscateXor(const std::string& toEncrypt); std::string obfuscateXorBase64(const std::string& toEncrypt); bool containsInvalidUtf8(const std::string &payload); // based on invalid utf-8 evasion from here: https://www.cgisecurity.com/lib/URLEmbeddedAttacks.html std::string unescapeInvalidUtf8(const std::string &text); Maybe containsBrokenUtf8(const std::string &payload, const std::string &unquoted_payload); std::string unescapeBrokenUtf8(const std::string &text); bool containsCspReportPolicy(const std::string &payload); bool testUrlBareUtf8Evasion(const std::string &line); bool testUrlBadUtf8Evasion(const std::string &line); std::string urlDecode(std::string src); std::string injectSpacesToString(const std::string& std); std::string charToString(const char* s, int slen); std::string vecToString(const std::vector& vec, char delim = ','); template std::string setToString(const std::set& set, bool addParenthesis=true) { std::ostringstream vts; if (addParenthesis) { vts << "["; } if (!set.empty()) { for (auto itr = set.begin(); itr != set.end(); itr++) { vts << *itr << ", "; } } else { return std::string(); } std::string res = vts.str(); res.pop_back(); res.pop_back(); if (addParenthesis) { res += "]"; } return res; } template void mergeFromVectorWithoutDuplicates( const std::vector& first_vector, std::vector& second_vector) { for (const V& element : first_vector) { if(find(second_vector.begin(), second_vector.end(), element) == second_vector.end()) { second_vector.push_back(element); } } } template void mergeFromMapOfVectorsWithoutDuplicates( const std::map>& first_map, std::map>& second_map) { for (auto itr = first_map.begin(); itr != first_map.end(); itr++) { if (second_map.find(itr->first) != second_map.end()) { const std::vector& first_vector = first_map.at(itr->first); mergeFromVectorWithoutDuplicates(first_vector, second_map[itr->first]); } else { const std::vector& first_vector = itr->second; second_map[itr->first] = first_vector; } } } template void mergeSets(const std::set& first_set, const std::set& second_set, std::set& merged_set) { std::set_union( first_set.begin(), first_set.end(), second_set.begin(), second_set.end(), std::inserter(merged_set, merged_set.begin()) ); } ReportIS::Severity computeSeverityFromThreatLevel(ThreatLevel threatLevel); ReportIS::Priority computePriorityFromThreatLevel(ThreatLevel threatLevel); std::string computeConfidenceFromThreatLevel(ThreatLevel threatLevel); void decodePercentEncoding(std::string &text, bool decodePlus=false); void decodeUtf16Value(const ValueStatsAnalyzer &valueStats, std::string &cur_val); std::string stripOptionalPort(const std::string::const_iterator &first, const std::string::const_iterator &last); std::string extractKeyValueFromCookie(const std::string &cookie, const std::string &key); bool isIpAddress(const std::string &ip_address); bool isUuid(const std::string& str); bool vectorStringContain(const std::vector& vec, const std::string& str); bool isIpTrusted(const std::string &ip, const std::vector &trusted_ips); ContentType detectContentType(const char* hdr_value); std::string convertParamTypeToStr(ParamType type); ParamType convertTypeStrToEnum(const std::string& typeStr); } } #endif // __WAF2_UTIL_H__148aa7e4