May 27 update

This commit is contained in:
Ned Wright
2024-05-27 08:45:25 +00:00
parent 253ca70de6
commit fdc148aa9b
39 changed files with 1140 additions and 359 deletions

View File

@@ -87,6 +87,7 @@ add_library(waap_clib
ParserPairs.cc
Waf2Util2.cc
ParserPDF.cc
ParserBinaryFile.cc
)
add_definitions("-Wno-unused-function")

View File

@@ -27,6 +27,7 @@
#include "ParserPairs.h"
#include "ParserDelimiter.h"
#include "ParserPDF.h"
#include "ParserBinaryFile.h"
#include "WaapAssetState.h"
#include "Waf2Regex.h"
#include "Waf2Util.h"
@@ -274,7 +275,8 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
bool base64ParamFound = false;
dbgTrace(D_WAAP_DEEP_PARSER) << " ===Processing potential base64===";
std::string decoded_val, decoded_key;
base64_variants base64_status = Waap::Util::b64Test(cur_val, decoded_key, decoded_val);
Waap::Util::BinaryFileType base64BinaryFileType = Waap::Util::BinaryFileType::FILE_TYPE_NONE;
base64_variants base64_status = Waap::Util::b64Test(cur_val, decoded_key, decoded_val, base64BinaryFileType);
dbgTrace(D_WAAP_DEEP_PARSER)
<< " status = "
@@ -355,7 +357,8 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
isUrlPayload,
isUrlParamPayload,
flags,
parser_depth
parser_depth,
base64BinaryFileType
);
} else {
offset = 0;
@@ -425,7 +428,8 @@ DeepParser::onKv(const char *k, size_t k_len, const char *v, size_t v_len, int f
isUrlParamPayload,
flags,
parser_depth,
base64ParamFound
base64ParamFound,
base64BinaryFileType
);
if (rc != CONTINUE_PARSING) {
return rc;
@@ -798,7 +802,8 @@ DeepParser::parseAfterMisleadingMultipartBoundaryCleaned(
bool isUrlParamPayload,
int flags,
size_t parser_depth,
bool base64ParamFound)
bool base64ParamFound,
Waap::Util::BinaryFileType b64FileType)
{
int offset = -1;
int rc = 0;
@@ -815,7 +820,8 @@ DeepParser::parseAfterMisleadingMultipartBoundaryCleaned(
isUrlPayload,
isUrlParamPayload,
flags,
parser_depth
parser_depth,
b64FileType
);
} else {
offset = 0;
@@ -919,7 +925,8 @@ DeepParser::createInternalParser(
bool isUrlPayload,
bool isUrlParamPayload,
int flags,
size_t parser_depth
size_t parser_depth,
Waap::Util::BinaryFileType b64FileType
)
{
dbgTrace(D_WAAP_DEEP_PARSER)
@@ -1152,10 +1159,25 @@ DeepParser::createInternalParser(
m_parsersDeque.push_back(std::make_shared<BufferedParser<ParserPDF>>(*this, parser_depth + 1));
offset = 0;
} else {
dbgTrace(D_WAAP_DEEP_PARSER) << "Starting to parse a binary file";
m_parsersDeque.push_back(std::make_shared<BufferedParser<ParserBinary>>(*this, parser_depth + 1));
offset = 0;
Waap::Util::BinaryFileType fileType = ParserBinaryFile::detectBinaryFileHeader(cur_val);
if (fileType != Waap::Util::BinaryFileType::FILE_TYPE_NONE) {
dbgTrace(D_WAAP_DEEP_PARSER) << "Starting to parse a known binary file (type=" << fileType << ")";
m_parsersDeque.push_back(
std::make_shared<BufferedParser<ParserBinaryFile>>(*this, parser_depth + 1, false, fileType)
);
offset = 0;
} else {
dbgTrace(D_WAAP_DEEP_PARSER) << "Starting to parse a binary file";
m_parsersDeque.push_back(std::make_shared<BufferedParser<ParserBinary>>(*this, parser_depth + 1));
offset = 0;
}
}
} else if (b64FileType != Waap::Util::BinaryFileType::FILE_TYPE_NONE) {
dbgTrace(D_WAAP_DEEP_PARSER) << "Starting to parse a known binary file, base64 encoded";
m_parsersDeque.push_back(
std::make_shared<BufferedParser<ParserBinaryFile>>(*this, parser_depth + 1, true, b64FileType)
);
offset = 0;
}
}
if (offset < 0) {

View File

@@ -18,6 +18,7 @@
#include "KeyStack.h"
#include "WaapAssetState.h"
#include "Waf2Regex.h"
#include "Waf2Util.h"
#include "maybe_res.h"
#include <deque>
@@ -129,7 +130,8 @@ private:
bool isUrlPayload,
bool isUrlParamPayload,
int flags,
size_t parser_depth
size_t parser_depth,
Waap::Util::BinaryFileType b64FileType
);
int createUrlParserForJson(
@@ -160,7 +162,8 @@ private:
bool isUrlParamPayload,
int flags,
size_t parser_depth,
bool base64ParamFound
bool base64ParamFound,
Waap::Util::BinaryFileType b64FileType
);
int pushValueToTopParser(std::string &cur_val, int flags, bool base64ParamFound, int offset, size_t parser_depth);
int parseBuffer(

View File

@@ -0,0 +1,199 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ParserBinaryFile.h"
#include "Waf2Util.h"
#include "debug.h"
#include <string.h>
#include <map>
#include <tuple>
using namespace std;
using Waap::Util::BinaryFileType;
USE_DEBUG_FLAG(D_WAAP_PARSER_BINARY_FILE);
USE_DEBUG_FLAG(D_WAAP);
const string ParserBinaryFile::m_parserName = "ParserBinaryFile";
static const map<BinaryFileType, pair<string, string>> m_head_tail_map = {
{BinaryFileType::FILE_TYPE_PNG,
{string("\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"), // PNG
string("\x49\x45\x4e\x44\xae\x42\x60\x82")}}, // IEND
{BinaryFileType::FILE_TYPE_JPEG,
{string("\xff\xd8\xff"),
string("\xff\xd9")}},
{BinaryFileType::FILE_TYPE_PDF,
{string("%PDF-"),
string("%%EOF")}}
};
ParserBinaryFile::ParserBinaryFile(
IParserStreamReceiver &receiver,
size_t parser_depth,
bool is_base64,
BinaryFileType file_type
) :
m_receiver(receiver),
m_state(s_start),
m_parser_depth(parser_depth),
m_is_base64(is_base64),
m_file_type(file_type)
{}
ParserBinaryFile::~ParserBinaryFile()
{}
BinaryFileType
ParserBinaryFile::detectBinaryFileHeader(const string &buf)
{
if (buf.size() < MIN_HEADER_LOOKUP) {
dbgTrace(D_WAAP_PARSER_BINARY_FILE) << "Buffer size too small (" << buf.size() << ")";
return BinaryFileType::FILE_TYPE_NONE;
}
const string searchStr = buf.substr(0, MAX_HEADER_LOOKUP);
for (const auto &entry : m_head_tail_map) {
const string &head = entry.second.first;
size_t pos = searchStr.find(head);
if (pos != string::npos) {
if (buf.size() - pos >= MIN_HEADER_LOOKUP) {
dbgTrace(D_WAAP_PARSER_BINARY_FILE) << "Found. type=" << entry.first;
return entry.first;
} else {
dbgTrace(D_WAAP_PARSER_BINARY_FILE) << "Remaining size after header is too small";
return BinaryFileType::FILE_TYPE_NONE;
}
}
}
return BinaryFileType::FILE_TYPE_NONE;
}
size_t
ParserBinaryFile::push(const char *buf, size_t len)
{
dbgTrace(D_WAAP_PARSER_BINARY_FILE)
<< "buf="
<< buf
<< "len="
<< len;
const char *c;
if (m_state == s_error) {
return 0;
}
if (len == 0) {
dbgTrace(D_WAAP_PARSER_BINARY_FILE) << "end of stream. m_state=" << m_state;
if (m_state == s_end) {
m_receiver.onKvDone();
} else if (m_is_base64) {
dbgTrace(D_WAAP_PARSER_BINARY_FILE) << "finished parsing";
if (m_receiver.onKey("BinaryFileSkip", 14) != 0) {
m_state = s_error;
return 0;
}
if (m_receiver.onValue("", 0) != 0) {
m_state = s_error;
return 0;
}
m_receiver.onKvDone();
} else {
m_state = s_error;
}
return 0;
}
if (m_head_tail_map.find(m_file_type) == m_head_tail_map.end()) {
dbgTrace(D_WAAP_PARSER_BINARY_FILE) << "unknown file type: " << m_file_type;
m_state = s_error;
return 0;
}
const string tail = m_head_tail_map.at(m_file_type).second;
switch (m_state) {
case s_start:
m_state = s_body;
CP_FALL_THROUGH;
case s_body:
if (m_is_base64) {
dbgTrace(D_WAAP_PARSER_BINARY_FILE) << "parsing base64";
bool keepParsing = true;
for (size_t i = 0; i < len; i++) {
bool isB64AlphaChar =
Waap::Util::isAlphaAsciiFast(buf[i]) || isdigit(buf[i]) || buf[i] == '/' || buf[i] == '+';
if (buf[i] == '=') {
dbgTrace(D_WAAP_PARSER_BINARY_FILE)
<< "base64 padding found (offset=" << i << "). end of stream.";
m_state = s_end;
keepParsing = false;
break;
} else if (!isB64AlphaChar) {
dbgTrace(D_WAAP_PARSER_BINARY_FILE)
<< "non-base64 char found (c=" << buf[i] << ",offset=" << i << "). return error";
m_state = s_error;
return 0;
}
}
if (keepParsing) { // keep "parsing" on next call to push()
break;
}
} else {
dbgTrace(D_WAAP_PARSER_BINARY_FILE) << "parsing binary. Searching for tail: " << tail;
c = strstr(buf + len - tail.size(), tail.c_str());
dbgTrace(D_WAAP_PARSER_BINARY_FILE) << "search result: c=" << c;
if (c) {
m_state = s_end;
} else { // keep "parsing" on next call to push()
break;
}
}
CP_FALL_THROUGH;
case s_end:
dbgTrace(D_WAAP_PARSER_BINARY_FILE) << "finished parsing";
if (m_receiver.onKey("BinaryFileSkip", 14) != 0) {
m_state = s_error;
return 0;
}
if (m_receiver.onValue("", 0) != 0) {
m_state = s_error;
return 0;
}
break;
case s_error:
dbgTrace(D_WAAP_PARSER_BINARY_FILE) << "error detected";
break;
default:
dbgTrace(D_WAAP_PARSER_BINARY_FILE) << "unknown state: " << m_state;
m_state = s_error;
return 0;
}
return len;
}
void ParserBinaryFile::finish()
{
push(NULL, 0);
}
const string& ParserBinaryFile::name() const
{
return m_parserName;
}
bool ParserBinaryFile::error() const
{
return m_state == s_error;
}

View File

@@ -0,0 +1,57 @@
// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __PARSER_BINARY_FILE_H__
#define __PARSER_BINARY_FILE_H__
#include "ParserBase.h"
#include "Waf2Util.h"
#include <string.h>
#define MIN_HEADER_LOOKUP 16
#define MAX_HEADER_LOOKUP 64
#define MAX_TAIL_LOOKUP 5
class ParserBinaryFile : public ParserBase {
public:
static Waap::Util::BinaryFileType detectBinaryFileHeader(const std::string &buf);
ParserBinaryFile(
IParserStreamReceiver &receiver,
size_t parser_depth,
bool is_base64,
Waap::Util::BinaryFileType file_type);
virtual ~ParserBinaryFile();
virtual size_t push(const char *buf, size_t len);
virtual void finish();
virtual const std::string &name() const;
virtual bool error() const;
virtual size_t depth() { return 1; }
private:
enum state {
s_start,
s_body,
s_end,
s_error
};
IParserStreamReceiver &m_receiver;
enum state m_state;
static const std::string m_parserName;
size_t m_parser_depth;
bool m_is_base64;
Waap::Util::BinaryFileType m_file_type;
};
#endif // __PARSER_BINARY_FILE_H__

View File

@@ -304,6 +304,7 @@ ParserJson::ParserJson(
m_key.push("json", 4);
}
ParserJson::~ParserJson()
{
// Cleanup JSON

View File

@@ -34,6 +34,7 @@
#include "generic_rulebase/rulebase_config.h"
#include "user_identifiers_config.h"
#include "Waf2Regex.h"
#include "ParserBinaryFile.h"
using boost::algorithm::to_lower_copy;
using namespace std;
@@ -960,11 +961,12 @@ string filterUTF7(const string& text) {
// 4. percent of non-printable characters (!isprint())
// in decoded data is less than 10% (statistical garbage detection).
// Returns false above checks fail.
bool decodeBase64Chunk(
base64_decode_status decodeBase64Chunk(
const string& value,
string::const_iterator it,
string::const_iterator end,
string& decoded)
string& decoded,
bool clear_on_error)
{
decoded.clear();
uint32_t acc = 0;
@@ -974,13 +976,14 @@ bool decodeBase64Chunk(
uint32_t spacer_count = 0;
dbgTrace(D_WAAP) << "decodeBase64Chunk: value='" << value << "' match='" << string(it, end) << "'";
string::const_iterator begin = it;
// The encoded data length (without the "base64," prefix) should be exactly divisible by 4
// len % 4 is not 0 i.e. this is not base64
if ((end - it) % 4 != 0) {
dbgTrace(D_WAAP_BASE64) <<
"b64DecodeChunk: (leave as-is) because encoded data length should be exactly divisible by 4.";
return false;
return B64_DECODE_INVALID;
}
while (it != end) {
@@ -992,7 +995,7 @@ bool decodeBase64Chunk(
dbgTrace(D_WAAP_BASE64) <<
"decodeBase64Chunk: (leave as-is) because terminator characters must all be '='," <<
"until end of match.";
return false;
return B64_DECODE_INVALID;
}
// We should see 0, 1 or 2 (no more) terminator characters
@@ -1000,7 +1003,7 @@ bool decodeBase64Chunk(
if (terminatorCharsSeen > 2) {
dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (leave as-is) because terminatorCharsSeen > 2";
return false;
return B64_DECODE_INVALID;
}
// allow for more terminator characters
@@ -1033,8 +1036,8 @@ bool decodeBase64Chunk(
}
else {
dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (leave as-is) because of non-base64 character ('" <<
c << "', ASCII " << (unsigned int)c << ")";
return false; // non-base64 character
c << "', ASCII " << (unsigned int)c << ", offset " << (it-begin) << ")";
return B64_DECODE_INVALID; // non-base64 character
}
acc = (acc << 6) | val;
@@ -1087,20 +1090,23 @@ bool decodeBase64Chunk(
}
else {
dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (delete) because decoded.size=" << decoded.size() <<
", nonPrintableCharsCount=" << nonPrintableCharsCount;
decoded.clear();
", nonPrintableCharsCount=" << nonPrintableCharsCount <<
", clear_on_error=" << clear_on_error;
if (clear_on_error) decoded.clear();
return B64_DECODE_INCOMPLETE;
}
dbgTrace(D_WAAP_BASE64) << "returning true: successfully decoded."
<< " Returns decoded data in \"decoded\" parameter";
return true; // successfully decoded. Returns decoded data in "decoded" parameter
return B64_DECODE_OK; // successfully decoded. Returns decoded data in "decoded" parameter
}
// If decoded size is too small - leave the encoded value (return false)
decoded.clear(); // discard partial data
dbgTrace(D_WAAP_BASE64) << "decodeBase64Chunk: (leave as-is) because decoded too small. decoded.size=" <<
decoded.size() <<
", nonPrintableCharsCount=" << nonPrintableCharsCount;
return false;
", nonPrintableCharsCount=" << nonPrintableCharsCount <<
", clear_on_error=" << clear_on_error;
return B64_DECODE_INVALID;
}
// Attempts to detect and validate base64 chunk.
@@ -1144,7 +1150,7 @@ b64DecodeChunk(
}
}
return decodeBase64Chunk(value, it, end, decoded);
return decodeBase64Chunk(value, it, end, decoded) != B64_DECODE_INVALID;
}
vector<string> split(const string& s, char delim) {
@@ -1281,6 +1287,8 @@ bool detectBase64Chunk(
{
dbgTrace(D_WAAP_BASE64) << " ===detectBase64Chunk===: starting with = '" << s << "'";
string::const_iterator it = s.begin();
start = s.end();
end = s.end();
//detect "base64," prefix to start search after this
for (; it != s.end()-7; it++) {
@@ -1309,33 +1317,62 @@ bool detectBase64Chunk(
return false;
}
base64_decode_status
processDecodedChunk(
const string &s,
string::const_iterator start,
string::const_iterator end,
string &value,
BinaryFileType &binaryFileType
)
{
base64_decode_status retVal = decodeBase64Chunk(s, start, end, value, false);
dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: after decode. retVal=" << retVal
<< " value.size()=" << value.size();
if (retVal != B64_DECODE_INVALID && !value.empty()) {
binaryFileType = ParserBinaryFile::detectBinaryFileHeader(value);
if (retVal == B64_DECODE_INCOMPLETE) value.clear();
}
return retVal;
}
bool isBase64PrefixProcessingOK (
const string &s,
string &value)
string &value,
BinaryFileType &binaryFileType)
{
string::const_iterator start, end;
bool retVal = false;
base64_decode_status retVal = B64_DECODE_INVALID;
dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: before regex for prefix for string '" << s << "'";
if (base64_prefix_detector_re.hasMatch(s)) {
dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: prefix detected on string '" << s << "'";
if (detectBase64Chunk(s, start, end)) {
dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: chunk detected";
if ((start != s.end()) && (end == s.end())) {
retVal = decodeBase64Chunk(s, start, end, value);
retVal = processDecodedChunk(s, start, end, value, binaryFileType);
}
} else if (start != s.end()) {
dbgTrace(D_WAAP_BASE64) << " ===isBase64PrefixProcessingOK===: chunk not detected."
" searching for known file header only";
end = (start + MAX_HEADER_LOOKUP < s.end()) ? start + MAX_HEADER_LOOKUP : s.end();
processDecodedChunk(s, start, end, value, binaryFileType);
value.clear();
return binaryFileType != Waap::Util::BinaryFileType::FILE_TYPE_NONE;
}
}
return retVal;
return retVal != B64_DECODE_INVALID;
}
base64_variants b64Test (
const string &s,
string &key,
string &value)
string &value,
BinaryFileType &binaryFileType)
{
key.clear();
bool retVal;
binaryFileType = Waap::Util::BinaryFileType::FILE_TYPE_NONE;
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: string = " << s
<< " key = " << key << " value = " << value;
@@ -1397,7 +1434,7 @@ base64_variants b64Test (
}
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: after processing key = '" << key << "'";
bool found = isBase64PrefixProcessingOK(s, prefix_decoded_val);
bool found = isBase64PrefixProcessingOK(s, prefix_decoded_val, binaryFileType);
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: after prefix test found = "
<< found << " new value is '" << prefix_decoded_val << "' - done";
if (found) {
@@ -1421,7 +1458,7 @@ base64_variants b64Test (
if ((s.end() - start) % 4 != 0) {
key.clear();
value.clear();
return CONTINUE_AS_IS;;
return CONTINUE_AS_IS;
}
}
else {
@@ -1443,7 +1480,7 @@ base64_variants b64Test (
key.pop_back();
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: FINAL key = '" << key << "'";
}
retVal = decodeBase64Chunk(s, start, s.end(), value);
retVal = decodeBase64Chunk(s, start, s.end(), value) != B64_DECODE_INVALID;
dbgTrace(D_WAAP_BASE64) << " ===b64Test===: After testing and conversion value = "
<< value << "retVal = '" << retVal <<"'";

View File

@@ -34,6 +34,7 @@
enum base64_variants {SINGLE_B64_CHUNK_CONVERT, KEY_VALUE_B64_PAIR, CONTINUE_AS_IS};
enum base64_stage {BEFORE_EQUAL, EQUAL, DONE, MISDETECT};
enum base64_decode_status {B64_DECODE_INVALID, B64_DECODE_OK, B64_DECODE_INCOMPLETE};
// This is portable version of stricmp(), which is non-standard function (not even in C).
// Contrary to stricmp(), for a slight optimization, s2 is ASSUMED to be already in lowercase.
@@ -858,12 +859,13 @@ void unescapeUnicode(std::string &text);
// Try to find and decode UTF7 chunks
std::string filterUTF7(const std::string &text);
bool
base64_decode_status
decodeBase64Chunk(
const std::string &value,
std::string::const_iterator it,
std::string::const_iterator end,
std::string &decoded);
std::string &decoded,
bool clear_on_error = true);
bool
b64DecodeChunk(
@@ -889,6 +891,13 @@ namespace Util {
std::string &key,
std::string &value);
enum BinaryFileType {
FILE_TYPE_NONE,
FILE_TYPE_PNG,
FILE_TYPE_JPEG,
FILE_TYPE_PDF
};
void b64Decode(
const std::string &s,
RegexSubCallback_f cb,
@@ -899,7 +908,8 @@ namespace Util {
base64_variants b64Test (
const std::string &s,
std::string &key,
std::string &value);
std::string &value,
BinaryFileType &binaryFileType);
// The original stdlib implementation of isalpha() supports locale settings which we do not really need.
// It is also proven to contribute to slow performance in some of the algorithms using it.

View File

@@ -14,7 +14,8 @@ enum SchemaKeyType
StartObjectKeyName,
StartArrayKeyName,
EndArrayKeyName,
OtherKey
OtherKey,
JsonFailure
};
#endif // __OA_SCHEMA_UPDATER_KEYS_H__