Jan 06 2026 dev (#387)

* sync code

* update code to support brotli

* update code to support brotli

* update code to support brotli

* sync code

* fix findBrotli

* sync code

* sync code

* sync code

* sync code

---------

Co-authored-by: Ned Wright <nedwright@proton.me>
Co-authored-by: Daniel Eisenberg <danielei@checkpoint.com>
This commit is contained in:
Daniel-Eisenberg
2026-01-13 17:17:52 +02:00
committed by GitHub
parent c1058db57d
commit e7b6e51b31
216 changed files with 12601 additions and 2825 deletions

View File

@@ -48,6 +48,49 @@ USE_DEBUG_FLAG(D_WAAP_JSON);
#define MIN_HEX_LENGTH 6
#define charToDigit(c) (c - '0')
// Base64 decoding lookup table constants
#define BASE64_INVALID -1
#define BASE64_PADDING -2
// Base64 lookup table for optimized decoding
static int base64_table[256];
static bool base64_table_initialized = false;
// Initialize the base64 lookup table for optimized decoding
static void initialize_base64_table()
{
if (base64_table_initialized) {
return;
}
// Initialize all entries to invalid
for (int i = 0; i < 256; i++) {
base64_table[i] = BASE64_INVALID;
}
// Set valid base64 characters (A-Z = 0-25)
for (int i = 0; i < 26; i++) {
base64_table['A' + i] = i;
}
// Set valid base64 characters (a-z = 26-51)
for (int i = 0; i < 26; i++) {
base64_table['a' + i] = 26 + i;
}
// Set valid base64 characters (0-9 = 52-61)
for (int i = 0; i < 10; i++) {
base64_table['0' + i] = 52 + i;
}
// Set special base64 characters
base64_table['+'] = 62;
base64_table['/'] = 63;
base64_table['='] = BASE64_PADDING;
base64_table_initialized = true;
}
// See https://dev.w3.org/html5/html-author/charref
const struct HtmlEntity g_htmlEntities[] =
{
@@ -1086,6 +1129,19 @@ base64_decode_status decideStatusBase64Decoded(
return B64_DECODE_INVALID;
}
//
// Base64 decoding with direct array lookup
// Performance optimization notes:
// - Replaced unordered_map lookups with direct array indexing
// - Improved cache locality with a simple 256-element array
// - Eliminated hash computation and collision handling overhead
// - One-time initialization of the lookup table
//
// The table maps each ASCII character to its base64 value:
// - For valid base64 characters (A-Z, a-z, 0-9, +, /): returns 0-63 value
// - For padding character ('='): returns BASE64_PADDING (-2)
// - For invalid characters: returns BASE64_INVALID (-1)
//
// Attempts to validate and decode base64-encoded chunk.
// Value is the full value inside which potential base64-encoded chunk was found,
@@ -1107,6 +1163,9 @@ base64_decode_status decodeBase64Chunk(
bool clear_on_error,
bool called_with_prefix)
{
// Initialize the base64 lookup table on first call
initialize_base64_table();
decoded.clear();
uint32_t acc = 0;
int acc_bits = 0; // how many bits are filled in acc
@@ -1135,8 +1194,9 @@ base64_decode_status decodeBase64Chunk(
return B64_DECODE_INVALID;
}
std::unordered_map<char, double> original_occurences_counter;
std::unordered_map<char, double> decoded_occurences_counter;
// Fixed arrays for character counting - Proposal 1 optimization
uint32_t original_counts[256] = {0}; // Count for each ASCII character in original
uint32_t decoded_counts[256] = {0}; // Count for each byte value in decoded
while (it != end) {
unsigned char c = *it;
@@ -1159,35 +1219,21 @@ base64_decode_status decodeBase64Chunk(
// allow for more terminator characters
it++;
original_occurences_counter[c]++;
original_counts[static_cast<unsigned char>(c)]++;
continue;
}
unsigned char val = 0;
// Use lookup table for faster decoding
int lookup_value = base64_table[static_cast<unsigned char>(c)];
if (c >= 'A' && c <= 'Z') {
val = c - 'A';
}
else if (c >= 'a' && c <= 'z') {
val = c - 'a' + 26;
}
else if (isdigit(c)) {
val = c - '0' + 52;
}
else if (c == '+') {
val = 62;
}
else if (c == '/') {
val = 63;
}
else if (c == '=') {
// Start tracking terminator characters
if (lookup_value == BASE64_PADDING) {
// Start tracking terminator characters ('=')
terminatorCharsSeen++;
it++;
original_occurences_counter[c]++;
original_counts[static_cast<unsigned char>(c)]++;
continue;
}
else {
else if (lookup_value == BASE64_INVALID) {
dbgTrace(D_WAAP_BASE64)
<< "(leave as-is) because of non-base64 character ('"
<< c
@@ -1199,6 +1245,8 @@ base64_decode_status decodeBase64Chunk(
return B64_DECODE_INVALID; // non-base64 character
}
unsigned char val = static_cast<unsigned char>(lookup_value);
acc = (acc << 6) | val;
acc_bits += 6;
@@ -1218,11 +1266,11 @@ base64_decode_status decodeBase64Chunk(
}
decoded += (char)code;
decoded_occurences_counter[(char)code]++;
decoded_counts[static_cast<unsigned char>(code)]++;
}
it++;
original_occurences_counter[c]++;
original_counts[static_cast<unsigned char>(c)]++;
}
// end of encoded sequence decoded.
@@ -1242,13 +1290,21 @@ base64_decode_status decodeBase64Chunk(
double entropy = 0;
double p = 0;
double decoded_entropy = 0;
for (const auto& pair : original_occurences_counter) {
p = pair.second / length;
entropy -= p * std::log2(p);
// Calculate entropy from original character counts
for (int i = 0; i < 256; i++) {
if (original_counts[i] > 0) {
p = static_cast<double>(original_counts[i]) / length;
entropy -= p * std::log2(p);
}
}
for (const auto &pair : decoded_occurences_counter) {
p = pair.second / decoded.size();
decoded_entropy -= p * std::log2(p);
// Calculate entropy from decoded character counts
for (int i = 0; i < 256; i++) {
if (decoded_counts[i] > 0) {
p = static_cast<double>(decoded_counts[i]) / decoded.size();
decoded_entropy -= p * std::log2(p);
}
}
dbgTrace(D_WAAP_BASE64)
<< "Base entropy = "
@@ -1917,9 +1973,6 @@ isGzipped(const string &stream)
{
if (stream.size() < 2) return false;
auto unsinged_stream = reinterpret_cast<const u_char *>(stream.data());
dbgTrace(D_WAAP) << "isGzipped: first two bytes: "
<< std::hex << static_cast<int>(unsinged_stream[0]) << " "
<< std::hex << static_cast<int>(unsinged_stream[1]);
return unsinged_stream[0] == 0x1f && unsinged_stream[1] == 0x8b;
}
@@ -2310,7 +2363,7 @@ string extractForwardedIp(const string &x_forwarded_hdr_val)
vector<string> trusted_ips;
string forward_ip;
auto identify_config = getConfiguration<UsersAllIdentifiersConfig>(
auto identify_config = getConfigurationWithCache<UsersAllIdentifiersConfig>(
"rulebase",
"usersIdentifiers"
);