Feb_06_2024-Dev

This commit is contained in:
Ned Wright
2024-02-06 16:41:40 +00:00
parent 9f8535c0f7
commit 623951a2f0
59 changed files with 2207 additions and 1168 deletions

View File

@@ -85,6 +85,7 @@ add_library(waap_clib
ParserGql.cc
ParserPercentEncode.cc
ParserPairs.cc
Waf2Util2.cc
)
add_definitions("-Wno-unused-function")

View File

@@ -864,10 +864,6 @@ DeepParser::parseAfterMisleadingMultipartBoundaryCleaned(
return rc;
}
static bool err = false;
static const SingleRegex json_detector_re("\\A[{\\[][^;\",}\\]]*[,:\"].+[\\s\\S]", err, "json_detector");
static const SingleRegex json_quoteless_detector_re("^[{\\[][[,0-9nul\\]]+", err, "json_quoteless_detector");
//intended to keep and process all types of leftovers detected as separate cases for parsing
int
DeepParser::createUrlParserForJson(
@@ -1103,11 +1099,7 @@ DeepParser::createInternalParser(
} else {
dbgTrace(D_WAAP_DEEP_PARSER) << "attempt to find JSON by '{' or '['";
bool percent_encoded_doublequote_detected = cur_val.find("%22") != std::string::npos;
if (json_detector_re.hasMatch(cur_val)
&& (valueStats.hasDoubleQuote
|| json_quoteless_detector_re.hasMatch(cur_val)
|| percent_encoded_doublequote_detected)) {
// JSON value detected
if (Waap::Util::isValidJson(cur_val)) {
if (percent_encoded_doublequote_detected && !valueStats.hasDoubleQuote) {
// We have JSOn but it %-encoded, first start percent decoding for it. Very narrow case
dbgTrace(D_WAAP_DEEP_PARSER) << "Starting to parse a JSON file from percent decoding";

View File

@@ -19,7 +19,8 @@ USE_DEBUG_FLAG(D_WAAP_PARSER);
// Max size for key and value that can be stored in memory (per thread)
#define MAX_KEY_SIZE 64*1024
#define MAX_VALUE_SIZE 64*1024
#define MAX_VALUE_SIZE 16*1024
#define MAX_PROCESSING_BUFFER_SIZE 64*1024
BufferedReceiver::BufferedReceiver(IParserReceiver &receiver, size_t parser_depth) :
m_receiver(receiver),
@@ -69,14 +70,14 @@ int BufferedReceiver::onValue(const char *v, size_t v_len)
<< m_parser_depth;
while (v_len > 0) {
// Move data from buffer v to accumulated m_value string in an attempt to fill m_value to its max size
size_t bytesToFill = std::min(v_len, MAX_VALUE_SIZE - m_value.size());
size_t bytesToFill = std::min(v_len, MAX_PROCESSING_BUFFER_SIZE - m_value.size());
m_value += std::string(v, bytesToFill);
// Update v and v_len (input buffer) to reflect that we already consumed part (or all) of it
v += bytesToFill;
v_len -= bytesToFill;
// Only push full buffers to the m_receiver
if (m_value.size() == MAX_VALUE_SIZE) {
if (m_value.size() >= MAX_VALUE_SIZE) {
// The first full-size buffer will be pushed with BUFFERED_RECEIVER_F_FIRST flag
dbgTrace(D_WAAP_PARSER)
<< " The first full-size buffer will be pushed with BUFFERED_RECEIVER_F_FIRST flag"

View File

@@ -22,8 +22,9 @@
#include "yajl/yajl_parse.h"
#include "singleton.h"
#include "i_oa_schema_updater.h"
#define FIRST_JSON_BUFFER_SIZE 4 // must buffer at least 4 first bytes to allow unicode autodetection (BOM).
// must be at least 4 first bytes to allow unicode autodetection (BOM).
// BUT... reduced to 1 in order to allow better work of schema validation and API discovery
#define FIRST_JSON_BUFFER_SIZE 1
typedef size_t yajl_size_t;

View File

@@ -312,6 +312,7 @@ Waf2Transaction::Waf2Transaction() :
m_responseInjectReasons(),
m_index(-1),
m_triggerLog(),
is_schema_validation(false),
m_waf2TransactionFlags()
{}
@@ -343,6 +344,7 @@ Waf2Transaction::Waf2Transaction(std::shared_ptr<WaapAssetState> pWaapAssetState
m_responseInjectReasons(),
m_index(-1),
m_triggerLog(),
is_schema_validation(false),
m_waf2TransactionFlags()
{}
@@ -515,7 +517,6 @@ bool Waf2Transaction::checkIsScanningRequired()
m_siteConfig = &m_ngenAPIConfig;
auto rateLimitingPolicy = m_siteConfig ? m_siteConfig->get_RateLimitingPolicy() : NULL;
result |= m_siteConfig->get_WebAttackMitigation();
if(rateLimitingPolicy) {
result |= m_siteConfig->get_RateLimitingPolicy()->getRateLimitingEnforcementStatus();
}

View File

@@ -345,6 +345,7 @@ private:
// Cached pointer to const triggerLog (hence mutable)
mutable std::shared_ptr<Waap::Trigger::Log> m_triggerLog;
bool is_schema_validation = false;
Waf2TransactionFlags m_waf2TransactionFlags;
};

View File

@@ -2055,31 +2055,18 @@ string extractForwardedIp(const string &x_forwarded_hdr_val)
return forward_ip;
}
bool isUuid(const string& str) {
if (str.length() != 36) {
return false;
}
static bool err;
static const SingleRegex uuid_detector_re(
"[0-9a-fA-F]{8}-([0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}",
err,
"uuid_detector"
);
// Check if the string matches the UUID format
// Check if the string matches the UUID format
return uuid_detector_re.hasMatch(str);
/*
boost::cmatch what;
try {
static const boost::regex uuidRegex("[0-9a-fA-F]{8}-([0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}");
// Check if the string matches the UUID format
return boost::regex_match(str.c_str(), what, uuidRegex);
} catch (std::runtime_error &e) {
dbgError(D_WAAP) << e.what();
}
return false;
*/
}
bool isIpAddress(const string &ip_address)

View File

@@ -876,6 +876,8 @@ namespace Util {
std::string::const_iterator e,
std::string &repl);
bool isValidJson(const std::string &input);
bool detectJSONasParameter(const std::string &s,
std::string &key,
std::string &value);

View File

@@ -0,0 +1,632 @@
#include "Waf2Util.h"
#include <string>
namespace Waap {
namespace Util {
using namespace std;
static const char *trueString = "true";
static const size_t trueStringLen = strlen(trueString);
static const char *falseString = "false";
static const size_t falseStringLen = strlen(falseString);
static const char *nullString = "null";
static const size_t nullStringLen = strlen(nullString);
static const char *quoteString = "%22";
static const size_t quoteStringLen = strlen(quoteString);
int
isAlignedPrefix(
const char *sample,
const size_t sample_len,
const char *buffer,
const size_t buffer_len)
{
size_t lookup_len = 0;
if (buffer_len < sample_len) {
lookup_len = buffer_len;
} else {
lookup_len = sample_len;
}
if (strncmp(sample, buffer, lookup_len) == 0) {
return lookup_len;
}
return -1;
}
int
isBoolean(
const char *buffer,
const size_t buffer_len)
{
int status;
status = isAlignedPrefix(trueString, trueStringLen, buffer, buffer_len);
if (status >= 0) {
return status;
}
status = isAlignedPrefix(falseString, falseStringLen, buffer, buffer_len);
if (status >= 0) {
return status;
}
status = isAlignedPrefix(nullString, nullStringLen, buffer, buffer_len);
if (status >= 0) {
return status;
}
return -1;
}
bool
isValidExponent(
const char * buffer,
const size_t buffer_len,
size_t *i)
{
if (buffer_len == *i + 1) {
return true; // e or E is the last char in buffer
}
if (*i + 1 < buffer_len && (isdigit(buffer[*i + 1]) || (buffer[*i + 1] == '+' || buffer[*i + 1] == '-'))) {
(*i) += 1;
if (isdigit(buffer[*i + 1])) {
return true;
}
} else {
return false;
}
return false;
}
bool
isObjectStart(const char c, int *object_count)
{
if (c == '{') {
(*object_count)++;
return true;
}
return false;
}
bool
isObjectEnd(const char c, int *object_count)
{
if (c == '}') {
(*object_count)--;
return true;
}
return false;
}
bool
isArrayStart(const char c, int *array_count)
{
if (c == '[') {
(*array_count)++;
return true;
}
return false;
}
bool
isArrayEnd(const char c, int *array_count)
{
if (c == ']') {
(*array_count)--;
return true;
}
return false;
}
bool
isValidJson(const std::string &input)
{
static const size_t MAX_JSON_INSPECT_SIZE = 16;
enum state
{
S_START, // 0
S_OBJECT_START, // 1
S_OBJECT_END, // 2
S_ARRAY_START, // 3
S_ARRAY_END, // 4
S_NUMBER, // 5
S_NUMBER_END, // 6
S_STRING_START, // 7
S_STRING_BODY, // 8
S_STRING_END, // 9
S_VARIABLE_START, // 10
S_VARIABLE_BODY, // 11
S_VARIABLE_END, // 12
S_COMMA, // 13
S_COLON, // 14
S_BOOLEAN, // 15
S_ERROR, // 16
S_END // 17
};
state m_state;
bool encoded = false;
size_t i = 0;
char c;
const char *buf = input.c_str();
size_t len = input.length();
int array_count = 0;
int object_count = 0;
int status;
if (len < 2) {
return false;
}
m_state = S_START;
while (i < len && i < MAX_JSON_INSPECT_SIZE) {
c = buf[i];
if (c == 0x0) { // UTF16 to UTF8 support
i++;
continue;
}
switch (m_state) {
case S_START:
if (isObjectStart(c, &object_count)) {
m_state = S_OBJECT_START;
break;
}
if (isArrayStart(c, &array_count)) {
m_state = S_ARRAY_START;
break;
}
m_state = S_ERROR;
break; // S_START
case S_OBJECT_START:
if (isObjectEnd(c, &object_count)) {
m_state = S_OBJECT_END;
break;
}
if (c == '\"') {
m_state = S_VARIABLE_START;
break;
}
if (isspace(c)) {
break;
}
status = isAlignedPrefix(quoteString, quoteStringLen, buf + i, len - i);
if (status >= 0) {
m_state = S_VARIABLE_START;
encoded = true;
i += status - 1;
break;
}
m_state = S_ERROR;
break; // object_start
case S_ARRAY_START:
if (isObjectStart(c, &object_count)) {
m_state = S_OBJECT_START;
break;
}
if (isArrayStart(c, &array_count)) {
// keep state unchanged
break;
}
if (isArrayEnd(c, &array_count)) {
m_state = S_ARRAY_END;
break;
}
if (isdigit(c)) {
m_state = S_NUMBER;
break;
}
if (c == '-') {
if (i + 1 == len) { // End of buffer case
m_state = S_NUMBER;
break;
}
if (i + 1 < len && isdigit(buf[i + 1])) {
m_state = S_NUMBER;
i++;
break;
}
m_state = S_ERROR;
break;
}
if (isspace(c)) {
break;
}
if (c == '\"') {
m_state = S_STRING_START;
break;
}
status = isAlignedPrefix(quoteString, quoteStringLen, buf + i, len - i);
if (status >= 0) {
m_state = S_STRING_START;
encoded = true;
i += status - 1;
break;
} else {
m_state = S_ERROR;
}
status = isBoolean(buf + i, len - i);
if (status >= 0) {
m_state = S_BOOLEAN;
i += status - 1;
break;
}
m_state = S_ERROR;
break; // array_start
case S_OBJECT_END:
if (isspace(c)) {
break;
}
if (c == ',') {
m_state = S_COMMA;
break;
}
if (isArrayEnd(c, &array_count)) {
m_state = S_ARRAY_END;
break;
}
if (isObjectEnd(c, &object_count)) {
m_state = S_OBJECT_END;
break;
}
if (isArrayStart(c, &array_count)) { // nJSON support but contradicts to definition of json.org
m_state = S_ARRAY_START;
break;
}
if (isObjectStart(c, &object_count)) { // nJSON support but contradicts to definition of json.org
m_state = S_OBJECT_START;
break;
}
m_state = S_ERROR;
break; // S_OBJECT_END
case S_ARRAY_END:
if (isspace(c)) {
break;
}
if (c == ',') {
m_state = S_COMMA;
break;
}
if (isArrayEnd(c, &array_count)) {
m_state = S_ARRAY_END;
break;
}
if (isObjectEnd(c, &object_count)) {
m_state = S_OBJECT_END;
break;
}
if (isArrayStart(c, &array_count)) { // nJSON support but contradicts to definition of json.org
m_state = S_ARRAY_START;
break;
}
if (isObjectStart(c, &object_count)) { // nJSON support but contradicts to definition of json.org
m_state = S_OBJECT_START;
break;
}
m_state = S_ERROR;
break; // S_ARRAY_END
case S_NUMBER:
if (isdigit(c)) {
break;
}
if (c == '.') {
if (i + 1 == len) { // End of buffer case
m_state = S_NUMBER;
break;
}
if (i + 1 < len && isdigit(buf[i + 1])) {
m_state = S_NUMBER;
i++;
break;
}
m_state = S_ERROR;
break;
}
if (c == 'e' || c == 'E') {
if (isValidExponent(buf, len, &i)) {
m_state = S_NUMBER;
break;
}
m_state = S_ERROR;
break;
}
if (isspace(c)) {
m_state = S_NUMBER_END;
break;
}
if (c == ',') {
m_state = S_COMMA;
break;
}
if (isArrayEnd(c, &array_count)) {
m_state = S_ARRAY_END;
break;
}
if (isObjectEnd(c, &object_count)) {
m_state = S_OBJECT_END;
break;
}
m_state = S_ERROR;
break; // S_NUMBER
case S_NUMBER_END:
if (isspace(c)) {
break;
}
if (c == ',') {
m_state = S_COMMA;
break;
}
if (isArrayEnd(c, &array_count)) {
m_state = S_ARRAY_END;
break;
}
if (isObjectEnd(c, &object_count)) {
m_state = S_OBJECT_END;
break;
}
m_state = S_ERROR;
break; // S_NUMBER_END
case S_STRING_START:
if (c == '\"') {
m_state = S_STRING_END;
break;
}
if (encoded) { // url_encoded quote
status = isAlignedPrefix(quoteString, quoteStringLen, buf + i, len - i);
if (status >= 0) {
m_state = S_STRING_END;
encoded = true;
i += status - 1;
break;
} else {
m_state = S_ERROR;
}
}
m_state = S_STRING_BODY;
break; // S_STRING_START
case S_STRING_BODY:
if (c == '\"') {
if (buf[i - 1] == '\\' && buf[i - 2] != '\\') {
m_state = S_STRING_BODY;
break;
} else {
m_state = S_STRING_END;
break;
}
}
if (encoded) { // url_encoded quote
status = isAlignedPrefix(quoteString, quoteStringLen, buf + i, len - i);
if (status >= 0) {
m_state = S_STRING_END;
encoded = true;
i += status - 1;
break;
} else {
m_state = S_ERROR;
}
}
m_state = S_STRING_BODY;
break; // S_STRING_BODY;
case S_STRING_END:
if (isspace(c)) {
break;
}
if (c == ',') {
m_state = S_COMMA;
break;
}
if (isArrayEnd(c, &array_count)) {
m_state = S_ARRAY_END;
break;
}
if (isObjectEnd(c, &object_count)) {
m_state = S_OBJECT_END;
break;
}
if (c == ':') {
m_state = S_COLON;
break;
}
m_state = S_ERROR;
break; // s_sting_end
case S_VARIABLE_START:
if (c == '\"') {
m_state = S_VARIABLE_END;
break;
}
if (encoded) { // url_encoded quote
status = isAlignedPrefix(quoteString, quoteStringLen, buf + i, len - i);
if (status >= 0) {
m_state = S_VARIABLE_END;
encoded = true;
i += status - 1;
break;
} else {
m_state = S_ERROR;
}
}
m_state = S_VARIABLE_BODY;
break; // S_VARIABLE_START
case S_VARIABLE_BODY:
if (c == '\"') {
if (buf[i - 1] == '\\' && buf[i - 2] != '\\') {
m_state = S_VARIABLE_BODY;
break;
} else {
m_state = S_VARIABLE_END;
break;
}
}
if (encoded) { // url_encoded quote
status = isAlignedPrefix(quoteString, quoteStringLen, buf + i, len - i);
if (status >= 0) {
m_state = S_VARIABLE_END;
encoded = true;
i += status - 1;
break;
} else {
m_state = S_ERROR;
}
}
m_state = S_VARIABLE_BODY;
break; // S_VARIABLE_BODY
case S_VARIABLE_END:
if (isspace(c)) {
break;
}
if (c == ':') {
m_state = S_COLON;
break;
}
m_state = S_ERROR;
break; // S_VARIABLE_END
case S_COMMA:
if (isObjectStart(c, &object_count)) {
m_state = S_OBJECT_START;
break;
}
if (isArrayStart(c, &array_count)) {
m_state = S_ARRAY_START;
break;
}
if (isdigit(c)) {
m_state = S_NUMBER;
break;
}
if (c == '-') {
if (i + 1 == len) { // End of buffer case
m_state = S_NUMBER;
break;
}
if (i + 1 < len && isdigit(buf[i + 1])) {
m_state = S_NUMBER;
i++;
break;
}
m_state = S_ERROR;
break;
}
if (isspace(c)) {
break;
}
if (c == '\"') {
m_state = S_STRING_START;
break;
}
status = isAlignedPrefix(quoteString, quoteStringLen, buf + i, len - i);
if (status >= 0) {
m_state = S_STRING_START;
encoded = true;
i += status - 1;
break;
} else {
m_state = S_ERROR;
}
status = isBoolean(buf + i, len - i);
if (status >= 0) {
m_state = S_BOOLEAN;
i += status - 1;
break;
}
m_state = S_ERROR;
break; // S_COMMA
case S_COLON:
if (isObjectStart(c, &object_count)) {
m_state = S_OBJECT_START;
break;
}
if (isArrayStart(c, &array_count)) {
m_state = S_ARRAY_START;
break;
}
if (isArrayEnd(c, &array_count)) {
m_state = S_ARRAY_END;
break;
}
if (isdigit(c)) {
m_state = S_NUMBER;
break;
}
if (c == '-') {
if (i + 1 == len) { // End of buffer case
m_state = S_NUMBER;
break;
}
if (i + 1 < len && isdigit(buf[i + 1])) {
m_state = S_NUMBER;
i++;
break;
}
m_state = S_ERROR;
break;
}
if (isspace(c)) {
break;
}
if (c == '\"') {
m_state = S_STRING_START;
break;
}
status = isAlignedPrefix(quoteString, quoteStringLen, buf + i, len - i);
if (status >= 0) {
m_state = S_STRING_START;
encoded = true;
i += status - 1;
break;
} else {
m_state = S_ERROR;
}
status = isBoolean(buf + i, len - i);
if (status >= 0) {
m_state = S_BOOLEAN;
i += status - 1;
break;
}
m_state = S_ERROR;
break; // S_COLON
case S_BOOLEAN:
if (isspace(c)) {
break;
}
if (c == ',') {
m_state = S_COMMA;
break;
}
if (isArrayEnd(c, &array_count)) {
m_state = S_ARRAY_END;
break;
}
if (isObjectEnd(c, &object_count)) {
m_state = S_OBJECT_END;
break;
}
m_state = S_ERROR;
break; // S_BOOLEAN
case S_ERROR: break;
case S_END: break;
}
if (m_state == S_ERROR) {
return false;
}
i++;
}
if (m_state != S_ERROR && array_count >= 0 && object_count >= 0)
return true;
return false;
}
} // namespace Util
} // namespace Waap