From 041a46339065e3dd9e33dccf2626b59560c5316d Mon Sep 17 00:00:00 2001 From: Ned Wright Date: Sun, 4 Jan 2026 11:39:41 +0000 Subject: [PATCH] update code to support brotli --- CMakeLists.txt | 12 + cmake/FindBrotli.cmake | 224 +++++ components/include/hyperscan_hook.h | 40 + .../prometheus/prometheus_ut/prometheus_ut.cc | 212 +++-- .../waap/include/buffered_compressed_stream.h | 127 +++ .../waap/waap_clib/AssertionRegexes.h | 35 + .../waap_clib/UnifiedIndicatorsContainer.cc | 339 ++++++++ .../waap_clib/UnifiedIndicatorsContainer.h | 220 +++++ .../waap/waap_clib/WaapHyperscanEngine.cc | 767 ++++++++++++++++++ .../waap/waap_clib/WaapHyperscanEngine.h | 54 ++ .../waap_clib/buffered_compressed_stream.cc | 481 +++++++++++ components/utils/pm/hyperscan_hook.cc | 135 +++ .../nginx_conf_collector/CMakeLists.txt | 1 + core/compression/CMakeLists.txt | 4 + core/config/config_cache_stats.cc | 13 + .../attachments/nano_attachment_common.h | 672 +++++++++++++++ .../general/intell_registration_event.h | 28 + .../CMakeLists.txt | 1 + nodes/http_transaction_handler/CMakeLists.txt | 1 + nodes/orchestration/CMakeLists.txt | 1 + nodes/prometheus/CMakeLists.txt | 4 +- unit_test.cmake | 2 +- 22 files changed, 3292 insertions(+), 81 deletions(-) create mode 100644 cmake/FindBrotli.cmake create mode 100644 components/include/hyperscan_hook.h create mode 100644 components/security_apps/waap/include/buffered_compressed_stream.h create mode 100644 components/security_apps/waap/waap_clib/AssertionRegexes.h create mode 100644 components/security_apps/waap/waap_clib/UnifiedIndicatorsContainer.cc create mode 100644 components/security_apps/waap/waap_clib/UnifiedIndicatorsContainer.h create mode 100644 components/security_apps/waap/waap_clib/WaapHyperscanEngine.cc create mode 100644 components/security_apps/waap/waap_clib/WaapHyperscanEngine.h create mode 100644 components/security_apps/waap/waap_clib/buffered_compressed_stream.cc create mode 100644 components/utils/pm/hyperscan_hook.cc create mode 100644 core/config/config_cache_stats.cc create mode 100644 core/include/attachments/nano_attachment_common.h create mode 100644 core/include/general/intell_registration_event.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d46122..ac59d99 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,9 +8,21 @@ if(NOT IS_ALPINE EQUAL "0") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Dalpine") endif() +list(APPEND CMAKE_MODULE_PATH + "${CMAKE_CURRENT_SOURCE_DIR}/cmake" +) + find_package(Boost REQUIRED) find_package(ZLIB REQUIRED) find_package(GTest REQUIRED) +find_package(PkgConfig REQUIRED) +find_package(Brotli REQUIRED MODULE) + +message(STATUS "Brotli_INCLUDE_DIR='${Brotli_INCLUDE_DIR}'") +message(STATUS "Brotli_LIBRARY='${Brotli_LIBRARY}'") +message(STATUS "Brotli_LIBRARIES='${Brotli_LIBRARIES}'") +message(STATUS "Brotli_FOUND='${Brotli_FOUND}'") +message(STATUS "Brotli root='${BROTLI_ROOT_DIR}'") include(cppcheck.cmake) diff --git a/cmake/FindBrotli.cmake b/cmake/FindBrotli.cmake new file mode 100644 index 0000000..9c1de6f --- /dev/null +++ b/cmake/FindBrotli.cmake @@ -0,0 +1,224 @@ +# FindBrotli.cmake +# +# Supports COMPONENTS: +# decoder, encoder, common +# +# Exports targets: +# Brotli::decoder +# Brotli::encoder +# Brotli::common +# +# Optional variables: +# BROTLI_ROOT_DIR +# BROTLI_USE_STATIC_LIBS + +# ------------------------------------------------------------ +# Version handling (not supported) +# ------------------------------------------------------------ +if(Brotli_FIND_VERSION) + set(_brotli_version_error_msg "FindBrotli.cmake does not support version checking.") + if(Brotli_FIND_REQUIRED) + message(FATAL_ERROR "${_brotli_version_error_msg}") + elseif(NOT Brotli_FIND_QUIETLY) + message(WARNING "${_brotli_version_error_msg}") + endif() +endif() + +# ------------------------------------------------------------ +# Component dependencies +# ------------------------------------------------------------ +if(Brotli_FIND_REQUIRED_decoder OR Brotli_FIND_REQUIRED_encoder) + set(Brotli_FIND_REQUIRED_common TRUE) +endif() + +# ------------------------------------------------------------ +# Static library preference +# ------------------------------------------------------------ +if(BROTLI_USE_STATIC_LIBS) + set(_brotli_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) + if(WIN32) + set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a) + else() + set(CMAKE_FIND_LIBRARY_SUFFIXES .a) + endif() +endif() + +# ------------------------------------------------------------ +# Optional pkg-config +# ------------------------------------------------------------ +find_package(PkgConfig QUIET) + +# ------------------------------------------------------------ +# Includes +# ------------------------------------------------------------ +find_path(Brotli_INCLUDE_DIR + NAMES + brotli/decode.h + brotli/encode.h + HINTS + ${BROTLI_ROOT_DIR} + PATH_SUFFIXES + include + includes +) +mark_as_advanced(Brotli_INCLUDE_DIR) + +# ------------------------------------------------------------ +# Internal state +# ------------------------------------------------------------ +set(_brotli_req_vars "") + +# For figuring out the real (non-ALIAS) targets when using pkg-config +set(_brotli_decoder_real_target "") +set(_brotli_encoder_real_target "") +set(_brotli_common_real_target "") + +if(BROTLI_USE_STATIC_LIBS) + set(_brotli_stat_str "_STATIC") +else() + set(_brotli_stat_str "") +endif() + +# ------------------------------------------------------------ +# Components loop +# ------------------------------------------------------------ +foreach(_listvar "common;common" "decoder;dec" "encoder;enc") + list(GET _listvar 0 _component) + list(GET _listvar 1 _libname) + + # ---- pkg-config path ---- + if(PKG_CONFIG_FOUND) + if(BROTLI_USE_STATIC_LIBS) + pkg_check_modules( + Brotli_${_component}_STATIC + QUIET + GLOBAL + IMPORTED_TARGET + libbrotli${_libname} + ) + else() + pkg_check_modules( + Brotli_${_component} + QUIET + GLOBAL + IMPORTED_TARGET + libbrotli${_libname} + ) + endif() + endif() + + # If pkg-config created an imported target, make our alias to it. + if(TARGET PkgConfig::Brotli_${_component}${_brotli_stat_str}) + add_library( + Brotli::${_component} + ALIAS + PkgConfig::Brotli_${_component}${_brotli_stat_str} + ) + + # Save the underlying real target name for later linkage fixes + set(_brotli_${_component}_real_target "PkgConfig::Brotli_${_component}${_brotli_stat_str}") + + set(Brotli_${_component}_FOUND TRUE) + + if(Brotli_FIND_REQUIRED_${_component}) + # For FindPackageHandleStandardArgs: ensure libraries are actually present + if(BROTLI_USE_STATIC_LIBS) + list(APPEND _brotli_req_vars Brotli_${_component}_STATIC_LIBRARIES) + else() + list(APPEND _brotli_req_vars Brotli_${_component}_LINK_LIBRARIES) + endif() + endif() + + continue() + endif() + + # ---- find_library path ---- + if(Brotli_FIND_REQUIRED_${_component}) + list(APPEND _brotli_req_vars Brotli_${_component}) + endif() + + if(BROTLI_USE_STATIC_LIBS) + set(_brotli_names + brotli${_libname}-static + libbrotli${_libname}-static + ) + else() + set(_brotli_names + brotli${_libname} + libbrotli${_libname} + ) + endif() + + find_library(Brotli_${_component} + NAMES ${_brotli_names} + HINTS ${BROTLI_ROOT_DIR} + PATH_SUFFIXES + lib + lib64 + libs + libs64 + lib/x86_64-linux-gnu + ) + mark_as_advanced(Brotli_${_component}) + + if(Brotli_${_component}) + set(Brotli_${_component}_FOUND TRUE) + + add_library(Brotli::${_component} UNKNOWN IMPORTED) + set_target_properties(Brotli::${_component} PROPERTIES + IMPORTED_LOCATION "${Brotli_${_component}}" + INTERFACE_INCLUDE_DIRECTORIES "${Brotli_INCLUDE_DIR}" + ) + + # In this branch, our target is real (not ALIAS), so it can be linked later. + set(_brotli_${_component}_real_target "Brotli::${_component}") + else() + set(Brotli_${_component}_FOUND FALSE) + endif() +endforeach() + +# ------------------------------------------------------------ +# Link decoder/encoder → common (but never on ALIAS targets) +# ------------------------------------------------------------ +if(_brotli_common_real_target) + foreach(_comp decoder encoder) + if(_brotli_${_comp}_real_target) + # Only link if the target is NOT an ALIAS + get_target_property(_aliased ${_brotli_${_comp}_real_target} ALIASED_TARGET) + if(NOT _aliased) + target_link_libraries(${_brotli_${_comp}_real_target} INTERFACE ${_brotli_common_real_target}) + endif() + endif() + endforeach() +endif() + +# ------------------------------------------------------------ +# Aggregate convenience variables +# ------------------------------------------------------------ +set(Brotli_LIBRARIES "") +foreach(_comp decoder encoder common) + if(TARGET Brotli::${_comp}) + list(APPEND Brotli_LIBRARIES Brotli::${_comp}) + endif() +endforeach() + +# ------------------------------------------------------------ +# Final package check (FIXED: use _brotli_req_vars) +# ------------------------------------------------------------ +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Brotli + FOUND_VAR + Brotli_FOUND + REQUIRED_VARS + Brotli_INCLUDE_DIR + ${_brotli_req_vars} + HANDLE_COMPONENTS +) + +# ------------------------------------------------------------ +# Restore suffixes +# ------------------------------------------------------------ +if(BROTLI_USE_STATIC_LIBS) + set(CMAKE_FIND_LIBRARY_SUFFIXES ${_brotli_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) +endif() + diff --git a/components/include/hyperscan_hook.h b/components/include/hyperscan_hook.h new file mode 100644 index 0000000..955e8fc --- /dev/null +++ b/components/include/hyperscan_hook.h @@ -0,0 +1,40 @@ +// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef __HYPERSCAN_HOOK_H__ +#define __HYPERSCAN_HOOK_H__ + +#ifdef USE_HYPERSCAN +#include +#include + +#include +#include "hs.h" +#include "i_pm_scan.h" + +class HyperscanHook : public I_PMScan { +public: + HyperscanHook(); + ~HyperscanHook(); + Maybe prepare(const std::set &patterns); + std::set scanBuf(const Buffer &buf) const override; + std::set> scanBufWithOffset(const Buffer &buf) const override; + void scanBufWithOffsetLambda(const Buffer &buf, I_PMScan::CBFunction cb) const override; + bool ok() const { return m_hsReady; } +private: + hs_database_t *m_hsDatabase; + hs_scratch_t *m_hsScratch; + std::vector m_hsPatterns; + std::vector m_idToPattern; + bool m_hsReady; +}; +#endif // USE_HYPERSCAN + +#endif // __HYPERSCAN_HOOK_H__ diff --git a/components/security_apps/prometheus/prometheus_ut/prometheus_ut.cc b/components/security_apps/prometheus/prometheus_ut/prometheus_ut.cc index f021450..49329d0 100755 --- a/components/security_apps/prometheus/prometheus_ut/prometheus_ut.cc +++ b/components/security_apps/prometheus/prometheus_ut/prometheus_ut.cc @@ -1,79 +1,133 @@ -#include "prometheus_comp.h" - -#include -#include -#include - -#include "cmock.h" -#include "cptest.h" -#include "maybe_res.h" -#include "debug.h" -#include "config.h" -#include "environment.h" -#include "config_component.h" -#include "agent_details.h" -#include "time_proxy.h" -#include "mock/mock_mainloop.h" -#include "mock/mock_rest_api.h" -#include "mock/mock_messaging.h" - -using namespace std; -using namespace testing; - -USE_DEBUG_FLAG(D_PROMETHEUS); - -class PrometheusCompTest : public Test -{ -public: - PrometheusCompTest() - { - EXPECT_CALL(mock_rest, mockRestCall(_, "declare-boolean-variable", _)).WillOnce(Return(false)); - env.preload(); - config.preload(); - env.init(); - - EXPECT_CALL( - mock_rest, - addGetCall("metrics", _) - ).WillOnce(DoAll(SaveArg<1>(&get_metrics_func), Return(true))); - - prometheus_comp.init(); - } - - ::Environment env; - ConfigComponent config; - PrometheusComp prometheus_comp; - StrictMock mock_rest; - StrictMock mock_ml; - NiceMock mock_messaging; - unique_ptr agent_uninstall; - function get_metrics_func; - CPTestTempfile status_file; - string registered_services_file_path; - -}; - -TEST_F(PrometheusCompTest, checkAddingMetric) -{ - registered_services_file_path = cptestFnameInSrcDir(string("registered_services.json")); - setConfiguration(registered_services_file_path, "orchestration", "Orchestration registered services"); - string metric_body = "{\n" - " \"metrics\": [\n" - " {\n" - " \"metric_name\": \"watchdogProcessStartupEventsSum\",\n" - " \"metric_type\": \"counter\",\n" - " \"metric_description\": \"\",\n" - " \"labels\": \"{method=\\\"post\\\",code=\\\"200\\\"}\",\n" - " \"value\": \"1534\"\n" - " }\n" - " ]\n" - "}"; - - string message_body; - EXPECT_CALL(mock_messaging, sendSyncMessage(_, "/service-metrics", _, _, _)) - .Times(2).WillRepeatedly(Return(HTTPResponse(HTTPStatusCode::HTTP_OK, metric_body))); - - string metric_str = "# TYPE nano_service_restarts_counter counter\n" - "nano_service_restarts_counter{method=\"post\",code=\"200\"} 1534\n\n"; - EXPECT_EQ(metric_str, get_metrics_func()); -} +#include "prometheus_comp.h" + +#include +#include +#include + +#include "cmock.h" +#include "cptest.h" +#include "maybe_res.h" +#include "debug.h" +#include "config.h" +#include "environment.h" +#include "config_component.h" +#include "agent_details.h" +#include "time_proxy.h" +#include "mock/mock_mainloop.h" +#include "mock/mock_rest_api.h" +#include "mock/mock_messaging.h" + +using namespace std; +using namespace testing; + +USE_DEBUG_FLAG(D_PROMETHEUS); + +class PrometheusCompTest : public Test +{ +public: + PrometheusCompTest() + { + EXPECT_CALL(mock_rest, mockRestCall(_, "declare-boolean-variable", _)).WillOnce(Return(false)); + env.preload(); + config.preload(); + env.init(); + + EXPECT_CALL( + mock_rest, + addGetCall("metrics", _) + ).WillOnce(DoAll(SaveArg<1>(&get_metrics_func), Return(true))); + + prometheus_comp.init(); + } + + ::Environment env; + ConfigComponent config; + PrometheusComp prometheus_comp; + StrictMock mock_rest; + StrictMock mock_ml; + NiceMock mock_messaging; + unique_ptr agent_uninstall; + function get_metrics_func; + CPTestTempfile status_file; + string registered_services_file_path; + +}; + +TEST_F(PrometheusCompTest, checkAddingMetricWithEmptyUniqueName) +{ + registered_services_file_path = cptestFnameInSrcDir(string("registered_services.json")); + setConfiguration(registered_services_file_path, "orchestration", "Orchestration registered services"); + string metric_body = "{\n" + " \"metrics\": [\n" + " {\n" + " \"metric_name\": \"watchdogProcessStartupEventsSum\",\n" + " \"unique_name\": \"\",\n" + " \"metric_type\": \"counter\",\n" + " \"metric_description\": \"\",\n" + " \"labels\": \"{method=\\\"post\\\",code=\\\"200\\\"}\",\n" + " \"value\": \"1534\"\n" + " }\n" + " ]\n" + "}"; + + string message_body; + EXPECT_CALL(mock_messaging, sendSyncMessage(_, "/service-metrics", _, _, _)) + .Times(2).WillRepeatedly(Return(HTTPResponse(HTTPStatusCode::HTTP_OK, metric_body))); + + string metric_str = "# TYPE nano_service_restarts_counter counter\n" + "nano_service_restarts_counter{method=\"post\",code=\"200\"} 1534\n\n"; + EXPECT_EQ(metric_str, get_metrics_func()); +} + +TEST_F(PrometheusCompTest, checkAddingMetricWithoutUniqueName) +{ + registered_services_file_path = cptestFnameInSrcDir(string("registered_services.json")); + setConfiguration(registered_services_file_path, "orchestration", "Orchestration registered services"); + string metric_body = "{\n" + " \"metrics\": [\n" + " {\n" + " \"metric_name\": \"watchdogProcessStartupEventsSum\",\n" + " \"unique_name\": \"watchdogProcessStartupEventsSum_Bla bla\",\n" + " \"metric_type\": \"counter\",\n" + " \"metric_description\": \"\",\n" + " \"labels\": \"{method=\\\"post\\\",code=\\\"200\\\"}\",\n" + " \"value\": \"1534\"\n" + " }\n" + " ]\n" + "}"; + + string message_body; + EXPECT_CALL(mock_messaging, sendSyncMessage(_, "/service-metrics", _, _, _)) + .Times(2).WillRepeatedly(Return(HTTPResponse(HTTPStatusCode::HTTP_OK, metric_body))); + + string metric_str = "# TYPE nano_service_restarts_counter counter\n" + "nano_service_restarts_counter{method=\"post\",code=\"200\"} 1534\n\n"; + EXPECT_EQ(metric_str, get_metrics_func()); +} + +TEST_F(PrometheusCompTest, checkAddingMetricWithUniqueName) +{ + registered_services_file_path = cptestFnameInSrcDir(string("registered_services.json")); + setConfiguration(registered_services_file_path, "orchestration", "Orchestration registered services"); + string metric_body = "{\n" + " \"metrics\": [\n" + " {\n" + " \"metric_name\": \"reservedNgenA\",\n" + " \"unique_name\": \"reservedNgenA_WAAP telemetry\",\n" + " \"metric_type\": \"counter\",\n" + " \"metric_description\": \"\",\n" + " \"labels\": \"{method=\\\"post\\\",code=\\\"200\\\"}\",\n" + " \"value\": \"1534\"\n" + " }\n" + " ]\n" + "}"; + + string message_body; + EXPECT_CALL(mock_messaging, sendSyncMessage(_, "/service-metrics", _, _, _)) + .Times(2).WillRepeatedly(Return(HTTPResponse(HTTPStatusCode::HTTP_OK, metric_body))); + + string metric_str = "# TYPE total_requests_counter counter\n" + "total_requests_counter{method=\"post\",code=\"200\"} 1534\n\n"; + EXPECT_EQ(metric_str, get_metrics_func()); +} + diff --git a/components/security_apps/waap/include/buffered_compressed_stream.h b/components/security_apps/waap/include/buffered_compressed_stream.h new file mode 100644 index 0000000..8cf1b2a --- /dev/null +++ b/components/security_apps/waap/include/buffered_compressed_stream.h @@ -0,0 +1,127 @@ +// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include "compression_utils.h" +#include "debug.h" + +USE_DEBUG_FLAG(D_WAAP_SERIALIZE); + +// Forward declarations +class WaapComponent; + +void yieldIfPossible(const std::string &func, int line); + +#define YIELD_IF_POSSIBLE() yieldIfPossible(__FUNCTION__, __LINE__) + +// +// Buffered output stream that compresses and encrypts data when flushing +// +// Usage example: +// std::stringstream ss; +// BufferedCompressedOutputStream compressed_stream(ss); +// compressed_stream << "Hello, World!"; +// compressed_stream.flush(); // Data is compressed, encrypted, and written to ss +class BufferedCompressedOutputStream : public std::ostream +{ +public: + explicit BufferedCompressedOutputStream(std::ostream &underlying_stream); + ~BufferedCompressedOutputStream(); + + // Manual flush to compress, encrypt and write data + void flush(); + void close(); + +private: + class CompressedBuffer : public std::streambuf, Singleton::Consume + { + public: + explicit CompressedBuffer(std::ostream &underlying_stream); + ~CompressedBuffer(); + + // Public method to flush the buffer + void flushAndClose(); + void flushBuffer(); + + protected: + virtual int overflow(int c) override; + virtual std::streamsize xsputn(const char* s, std::streamsize n) override; + virtual int sync() override; + + private: + // Compress and encrypt buffer; is_last indicates final chunk + bool compressAndEncryptBuffer(bool is_last); + std::ostream &m_underlying_stream; + std::vector m_buffer; + static const size_t BUFFER_SIZE = 16 * 1024; // 16KiB + CompressionStream* m_compression_stream; + bool m_closed; + }; + + std::unique_ptr m_buffer; +}; + + +// Buffered input stream that decrypts and decompresses data when reading +// +// Usage example: +// std::stringstream ss("encrypted compressed data"); +// BufferedCompressedInputStream decompressed_stream(ss); +// std::string line; +// std::getline(decompressed_stream, line); // Data is decrypted and decompressed + +class BufferedCompressedInputStream : public std::istream +{ +public: + explicit BufferedCompressedInputStream(std::istream &underlying_stream); + ~BufferedCompressedInputStream(); + +private: + class DecompressedBuffer : public std::streambuf + { + public: + explicit DecompressedBuffer(std::istream &underlying_stream); + ~DecompressedBuffer(); + + protected: + virtual int underflow() override; + virtual std::streamsize xsgetn(char* s, std::streamsize n) override; + + private: + bool fillBuffer(); + bool processNextChunk(); + bool decryptChunk(const std::vector &encrypted_chunk, std::vector &decrypted_chunk); + bool decompressChunk(const std::vector &compressed_chunk, std::vector &decompressed_chunk); + + std::istream &m_underlying_stream; + std::vector m_buffer; // Output buffer for decompressed data + std::vector m_encrypted_buffer; // Buffer for encrypted data from stream + std::vector m_compressed_buffer; // Buffer for decrypted but still compressed data + std::vector m_decompressed_buffer; // Buffer for decompressed data chunks + size_t m_decompressed_pos; // Current position in decompressed buffer + + static const size_t OUTPUT_BUFFER_SIZE = 64 * 1024; // 64KiB output buffer + static const size_t CHUNK_SIZE = 16 * 1024; // 16KiB chunks for processing + + CompressionStream* m_compression_stream; + bool m_eof_reached; + bool m_stream_finished; // Whether we've finished processing the entire stream + }; + + std::unique_ptr m_buffer; +}; diff --git a/components/security_apps/waap/waap_clib/AssertionRegexes.h b/components/security_apps/waap/waap_clib/AssertionRegexes.h new file mode 100644 index 0000000..000aa45 --- /dev/null +++ b/components/security_apps/waap/waap_clib/AssertionRegexes.h @@ -0,0 +1,35 @@ +// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef __ASSERTION_REGEXES_H__ +#define __ASSERTION_REGEXES_H__ + +#include + +namespace Waap { +namespace AssertionRegexes { + +// Static const boost regexes used in processAssertions() function +// These regexes detect various assertion patterns in regex strings +// The patterns are in a separate file to avoid this codestyle checker issue: +// "error T009: comma should be followed by whitespace" +static const boost::regex reStartNonWordBehind(R"(\(\?:=\]\|\$\))"); // (?=[^\w?<>:=]|$) +static const boost::regex rePathTraversalStart(R"(\(\? +#include +#include +#include + +using std::string; +using std::unordered_map; +using std::unordered_set; +using std::ostream; +using std::istream; + +// ------------------------------- +// Interning helpers +// ------------------------------- +const std::string* +UnifiedIndicatorsContainer::internValue(const std::string &value) +{ + auto it = valuePool.find(value); + if (it == valuePool.end()) it = valuePool.insert(value).first; + return &(*it); +} + +const std::string* +UnifiedIndicatorsContainer::internSource(const std::string &source) +{ + auto it = sourcesPool.find(source); + if (it == sourcesPool.end()) it = sourcesPool.insert(source).first; + return &(*it); +} + +// ------------------------------- +// Public API +// ------------------------------- +void +UnifiedIndicatorsContainer::addIndicator( + const std::string &key, + const std::string &value, + IndicatorType type, + const std::string &source) +{ + auto &filters = filtersDataPerKey[key]; + + const std::string *valPtr = internValue(value); + const std::string *srcPtr = internSource(source); + + FilterData &bucket = (type == IndicatorType::KEYWORD) + ? filters.getIndicators() + : filters.getTypes(); + + auto &srcSet = bucket[const_cast(valPtr)]; + srcSet.insert(const_cast(srcPtr)); + + // Update per-key total sources union + filters.getTotalSources().insert(const_cast(srcPtr)); +} + +void UnifiedIndicatorsContainer::addEntry(const Entry &entry) +{ + const std::string *srcPtr = internSource(entry.sourceId); + if (entry.isTrusted && srcPtr) { + trustedSources.insert(srcPtr); + } + for (const auto &val : entry.indicators) { + addIndicator(entry.key, val, IndicatorType::KEYWORD, entry.sourceId); + } + for (const auto &val : entry.types) { + addIndicator(entry.key, val, IndicatorType::TYPE, entry.sourceId); + } +} + +bool +UnifiedIndicatorsContainer::hasIndicator( + const std::string &key, + const std::string &value, + IndicatorType type) const +{ + auto keyIt = filtersDataPerKey.find(key); + if (keyIt == filtersDataPerKey.end()) return false; + + const Filters &filters = keyIt->second; + const FilterData &bucket = (type == IndicatorType::KEYWORD) + ? filters.getIndicators() + : filters.getTypes(); + + auto valIt = valuePool.find(value); + if (valIt == valuePool.end()) return false; + + auto it = bucket.find(const_cast(&(*valIt))); + return it != bucket.end(); +} + +std::unordered_set +UnifiedIndicatorsContainer::getSources( + const std::string &key, + const std::string &value, + IndicatorType type) const +{ + std::unordered_set out; + + auto keyIt = filtersDataPerKey.find(key); + if (keyIt == filtersDataPerKey.end()) return out; + + const Filters &filters = keyIt->second; + const FilterData &bucket = (type == IndicatorType::KEYWORD) + ? filters.getIndicators() + : filters.getTypes(); + + auto valIt = valuePool.find(value); + if (valIt == valuePool.end()) return out; + + auto it = bucket.find(const_cast(&(*valIt))); + if (it == bucket.end()) return out; + + for (auto p : it->second) if (p) out.insert(*p); + return out; +} + +size_t +UnifiedIndicatorsContainer::getIndicatorCount() const +{ + size_t count = 0; + for (const auto &k : filtersDataPerKey) { + count += k.second.getIndicators().size(); + count += k.second.getTypes().size(); + } + return count; +} + +size_t +UnifiedIndicatorsContainer::getKeyCount() const +{ + return filtersDataPerKey.size(); +} + +size_t +UnifiedIndicatorsContainer::getValuePoolSize() const +{ + return valuePool.size(); +} + +void +UnifiedIndicatorsContainer::clear() +{ + filtersDataPerKey.clear(); + valuePool.clear(); + sourcesPool.clear(); + trustedSources.clear(); +} + +// ------------------------------- +// Serialization +// ------------------------------- +void +UnifiedIndicatorsContainer::serialize(std::ostream &stream) const +{ + cereal::JSONOutputArchive ar(stream); + + // Write trustedSources as a named array under the root object (global trusted only) + ar.setNextName("trustedSources"); + ar.startNode(); + cereal::size_type n_trusted = static_cast(trustedSources.size()); + ar(cereal::make_size_tag(n_trusted)); + for (auto p : trustedSources) ar(p ? *p : std::string()); + ar.finishNode(); + + // logger: object of keys -> { totalSources: [...], indicators: {...}, types: {...} } + ar.setNextName("logger"); + ar.startNode(); + for (const auto &k : filtersDataPerKey) { + ar.setNextName(k.first.c_str()); + ar.startNode(); + + // totalSources section (union per key) + ar.setNextName("totalSources"); + ar.startNode(); + const auto &ts = k.second.getTotalSources(); + cereal::size_type ts_sz = static_cast(ts.size()); + ar(cereal::make_size_tag(ts_sz)); + for (auto p : ts) ar(p ? *p : std::string()); + ar.finishNode(); + + // indicators section + ar.setNextName("indicators"); + ar.startNode(); + for (const auto &kv : k.second.getIndicators()) { + const std::string *val = kv.first; + ar.setNextName(val ? val->c_str() : ""); + ar.startNode(); + cereal::size_type sz = static_cast(kv.second.size()); + ar(cereal::make_size_tag(sz)); + for (auto p : kv.second) ar(p ? *p : std::string()); + ar.finishNode(); // end value array + } + ar.finishNode(); // end indicators + + // types section + ar.setNextName("types"); + ar.startNode(); + for (const auto &kv : k.second.getTypes()) { + const std::string *val = kv.first; + ar.setNextName(val ? val->c_str() : ""); + ar.startNode(); + cereal::size_type sz = static_cast(kv.second.size()); + ar(cereal::make_size_tag(sz)); + for (auto p : kv.second) ar(p ? *p : std::string()); + ar.finishNode(); // end value array + } + ar.finishNode(); // end types + + ar.finishNode(); // end key object + } + ar.finishNode(); // end logger +} + +void +UnifiedIndicatorsContainer::deserialize(std::istream &stream) +{ + cereal::JSONInputArchive ar(stream); + clear(); + + // trustedSources (optional) as a named array + try { + ar.setNextName("trustedSources"); + ar.startNode(); + cereal::size_type n = 0; + ar(cereal::make_size_tag(n)); + for (cereal::size_type i = 0; i < n; ++i) { + std::string s; ar(s); + const std::string *p = internSource(s); + trustedSources.insert(p); + } + ar.finishNode(); + } catch (...) { + // Field may be absent + } + + // logger + try { + ar.setNextName("logger"); + ar.startNode(); + while (true) { + const auto node_name = ar.getNodeName(); + if (!node_name) break; + + std::string key = node_name; + ar.startNode(); // enter key object + + // totalSources (optional) + try { + ar.setNextName("totalSources"); + ar.startNode(); + cereal::size_type ts_sz = 0; + ar(cereal::make_size_tag(ts_sz)); + auto &ts = filtersDataPerKey[key].getTotalSources(); + for (cereal::size_type i = 0; i < ts_sz; ++i) { + std::string s; ar(s); + const std::string *p = internSource(s); + ts.insert(const_cast(p)); + } + ar.finishNode(); + } catch (...) { + // no totalSources + } + + // indicators + try { + ar.setNextName("indicators"); + ar.startNode(); + while (true) { + const auto val_name = ar.getNodeName(); + if (!val_name) break; + std::string value = val_name; + ar.startNode(); + cereal::size_type sz = 0; + ar(cereal::make_size_tag(sz)); + for (cereal::size_type i = 0; i < sz; ++i) { + std::string src; ar(src); + addIndicator(key, value, IndicatorType::KEYWORD, src); + } + ar.finishNode(); // end value array + } + ar.finishNode(); + } catch (...) { + // no indicators + } + + // types + try { + ar.setNextName("types"); + ar.startNode(); + while (true) { + const auto val_name = ar.getNodeName(); + if (!val_name) break; + std::string value = val_name; + ar.startNode(); + cereal::size_type sz = 0; + ar(cereal::make_size_tag(sz)); + for (cereal::size_type i = 0; i < sz; ++i) { + std::string src; ar(src); + addIndicator(key, value, IndicatorType::TYPE, src); + } + ar.finishNode(); // end value array + } + ar.finishNode(); + } catch (...) { + // no types + } + + ar.finishNode(); // finish key object + } + ar.finishNode(); // finish logger + } catch (...) { + // Field may be absent + } +} + +bool UnifiedIndicatorsContainer::isTrustedSource(const std::string &source) const { + // Linear check via interning: attempt to find an interned pointer matching source + // We maintain sourcesPool mapping actual std::string storage, so compare by value. + for (const auto &p : trustedSources) { + if (p && *p == source) return true; + } + return false; +} diff --git a/components/security_apps/waap/waap_clib/UnifiedIndicatorsContainer.h b/components/security_apps/waap/waap_clib/UnifiedIndicatorsContainer.h new file mode 100644 index 0000000..cef7535 --- /dev/null +++ b/components/security_apps/waap/waap_clib/UnifiedIndicatorsContainer.h @@ -0,0 +1,220 @@ +// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "i_serialize.h" +// #include "custom_serialization.h" + +// Indicator type enumeration for type safety and compactness +enum class IndicatorType : uint8_t { + KEYWORD = 0, + TYPE = 1 +}; + +typedef std::unordered_set SourcesSet; +typedef std::unordered_map FilterData; + +// Proposed name for `Filters`: KeyLog (represents the per-key section under "logger") +// Keeping class name as Filters to minimize changes; can be renamed in a follow-up. +class Filters { +public: + Filters() = default; + ~Filters() = default; + + // Const overload for cereal serialization + template + void serialize(Archive& ar) const { + std::vector totalSourcesVec; + std::unordered_map> indicatorsMap, typesMap; + + for (auto p : totalSources) { + if (p) totalSourcesVec.push_back(*p); + } + for (const auto& kv : indicators) { + std::string key = kv.first ? *kv.first : std::string(); + std::vector sources; + for (auto p : kv.second) { + if (p) sources.push_back(*p); + } + indicatorsMap[key] = sources; + } + for (const auto& kv : types) { + std::string key = kv.first ? *kv.first : std::string(); + std::vector sources; + for (auto p : kv.second) { + if (p) sources.push_back(*p); + } + typesMap[key] = sources; + } + + ar( + cereal::make_nvp("totalSources", totalSourcesVec), + cereal::make_nvp("indicators", indicatorsMap), + cereal::make_nvp("types", typesMap) + ); + } + + // Accessors for container implementation + FilterData & getIndicators() { return indicators; } + FilterData & getTypes() { return types; } + const FilterData & getIndicators() const { return indicators; } + const FilterData & getTypes() const { return types; } + + // Per-key total sources (union of sources from indicators and types) + SourcesSet & getTotalSources() { return totalSources; } + const SourcesSet & getTotalSources() const { return totalSources; } + +private: + FilterData indicators; + FilterData types; + SourcesSet totalSources; +}; + +// Unified indicators container with string interning and memory optimization +class UnifiedIndicatorsContainer { +public: + // Batch entry input + struct Entry { + std::string key; + std::string sourceId; + bool isTrusted = false; + std::vector indicators; // values treated as KEYWORD + std::vector types; // values treated as TYPE + }; + void addEntry(const Entry& entry); + + // Check if an indicator exists + bool hasIndicator(const std::string& key, const std::string& value, IndicatorType type) const; + + // Get all sources for a specific indicator + std::unordered_set getSources(const std::string& key, + const std::string& value, + IndicatorType type) const; + + // Statistics and metrics + size_t getIndicatorCount() const; + size_t getKeyCount() const; + size_t getValuePoolSize() const; + // Returns true if the given source string is marked as trusted (appears in the global trustedSources set) + bool isTrustedSource(const std::string &source) const; + + // Container management + void clear(); + + // Serialization for cross-agent compatibility + // void serialize(std::ostream& stream) const; + template + void serialize(Archive& ar) const { + // trustedSources as array + std::vector trusted_srcs; + for (auto p : trustedSources) { + if (p) trusted_srcs.push_back(*p); + } + ar.setNextName("trustedSources"); + ar.startNode(); + cereal::size_type n_trusted = static_cast(trusted_srcs.size()); + ar(cereal::make_size_tag(n_trusted)); + for (const auto &s : trusted_srcs) ar(s); + ar.finishNode(); + + // logger: object of keys -> { totalSources: [...], indicators: {...}, types: {...} } + ar.setNextName("logger"); + ar.startNode(); + for (const auto &k : filtersDataPerKey) { + ar.setNextName(k.first.c_str()); + ar.startNode(); + + // totalSources section (union per key) + ar.setNextName("totalSources"); + ar.startNode(); + const auto &ts = k.second.getTotalSources(); + cereal::size_type ts_sz = static_cast(ts.size()); + ar(cereal::make_size_tag(ts_sz)); + for (auto p : ts) ar(p ? *p : std::string()); + ar.finishNode(); + + // indicators section + ar.setNextName("indicators"); + ar.startNode(); + for (const auto &kv : k.second.getIndicators()) { + const std::string *val = kv.first; + ar.setNextName(val ? val->c_str() : ""); + ar.startNode(); + cereal::size_type sz = static_cast(kv.second.size()); + ar(cereal::make_size_tag(sz)); + for (auto p : kv.second) ar(p ? *p : std::string()); + ar.finishNode(); // end value array + } + ar.finishNode(); // end indicators + + // types section + ar.setNextName("types"); + ar.startNode(); + for (const auto &kv : k.second.getTypes()) { + const std::string *val = kv.first; + ar.setNextName(val ? val->c_str() : ""); + ar.startNode(); + cereal::size_type sz = static_cast(kv.second.size()); + ar(cereal::make_size_tag(sz)); + for (auto p : kv.second) ar(p ? *p : std::string()); + ar.finishNode(); // end value array + } + ar.finishNode(); // end types + + ar.finishNode(); // end key object + } + ar.finishNode(); // end logger + } + void serialize(std::ostream &stream) const; + void deserialize(std::istream& stream); + +private: + // Single indicator add + void addIndicator(const std::string& key, const std::string& value, + IndicatorType type, const std::string& source); + + // String interning pool for values + std::unordered_set valuePool; + + // String interning pool for sources + std::unordered_set sourcesPool; + + // Main storage: key -> Filters + std::unordered_map filtersDataPerKey; + + // Global set of trusted sources + std::unordered_set trustedSources; + + // Helper methods + const std::string* internValue(const std::string& value); + const std::string* internSource(const std::string& source); +}; +// UnifiedIndicatorsLogPost for REST, compatible with cereal and messaging +class UnifiedIndicatorsLogPost : public RestGetFile { +public: + UnifiedIndicatorsLogPost(std::shared_ptr container_ptr) + { + unifiedIndicators = std::move(*container_ptr); + } + +private: + C2S_PARAM(UnifiedIndicatorsContainer, unifiedIndicators); +}; diff --git a/components/security_apps/waap/waap_clib/WaapHyperscanEngine.cc b/components/security_apps/waap/waap_clib/WaapHyperscanEngine.cc new file mode 100644 index 0000000..b0b191b --- /dev/null +++ b/components/security_apps/waap/waap_clib/WaapHyperscanEngine.cc @@ -0,0 +1,767 @@ +// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "WaapHyperscanEngine.h" +#include "Signatures.h" +#include "ScanResult.h" +#include "WaapSampleValue.h" +#include "Waf2Regex.h" +#include "Waf2Util.h" +#include "debug.h" +#include +#include +#include +#include +#include + +#ifdef USE_HYPERSCAN +#include "hs.h" +#endif + +USE_DEBUG_FLAG(D_WAAP_SAMPLE_SCAN); +USE_DEBUG_FLAG(D_WAAP_HYPERSCAN); + +#ifdef USE_HYPERSCAN +static const unsigned int HS_STANDARD_FLAGS = HS_FLAG_CASELESS | HS_FLAG_SOM_LEFTMOST; +#endif // USE_HYPERSCAN +static const bool matchOriginalPattern = true; +static const size_t maxRegexValidationMatches = 10; + +class WaapHyperscanEngine::Impl { +public: + struct PatternInfo { + std::string originalPattern; + std::string hyperscanPattern; + std::string groupName; + std::string category; // "keywords", "specific_accuracy", "patterns" + bool isFastReg; + bool isEvasion; + std::string regexSource; // "specific_acuracy_keywords_regex", "words_regex", "pattern_regex" + Signatures::AssertionFlags assertionFlags; // Zero-length assertion flags + std::unique_ptr originalRegex; // Precompiled original pattern for validation + + PatternInfo() : isFastReg(false), isEvasion(false) {} + }; + + struct MatchContext { + const WaapHyperscanEngine::Impl* engine; + const std::string* sampleText; + std::vector* keyword_matches; + std::vector* regex_matches; + Waap::Util::map_of_stringlists_t* found_patterns; + bool longTextFound; + bool binaryDataFound; + bool includePatternRegex; + bool includeKeywordRegex; + + // Per-signature tracking of last match end (pattern id => last end offset) + std::unordered_map lastMatchEndPerSignature; + }; + + Impl(); + ~Impl(); + + bool initialize(const std::shared_ptr& signatures); + void scanSample(const SampleValue& sample, + Waf2ScanResult& res, + bool longTextFound, + bool binaryDataFound, + bool includeKeywordRegex, + bool includePatternRegex) const; + bool isInitialized() const { return m_isInitialized; } + size_t getPatternCount() const { return m_patternInfos.size(); } + size_t getCompiledPatternCount() const { return m_compiledPatternCount; } + size_t getFailedPatternCount() const { return m_failedPatternCount; } + +private: +#ifdef USE_HYPERSCAN + hs_database_t* m_keywordDatabase; + hs_database_t* m_patternDatabase; + hs_scratch_t* m_keywordScratch; + hs_scratch_t* m_patternScratch; +#endif + + std::shared_ptr m_Signatures; + std::vector m_patternInfos; + bool m_isInitialized; + size_t m_compiledPatternCount; + size_t m_failedPatternCount; + + // Helper methods + bool compileHyperscanDatabases(const std::shared_ptr& signatures); + void loadPrecompiledPatterns(const std::shared_ptr& signatures); + + // use an ordered set to keep PCRE2-validated matches sorted and unique in input order + // LCOV_EXCL_START Reason: Trivial + struct Match { + size_t from; + size_t to; + Match(size_t from, size_t to) : from(from), to(to) {} + bool operator<(const Match& other) const { + return (from < other.from) || (from == other.from && to < other.to); + } + }; + + // LCOV_EXCL_STOP + + // Assertion validation helpers + bool validateAssertions(const std::string& sampleText, + size_t matchStart, + size_t matchEnd, + const PatternInfo& patternInfo, + std::set &foundMatches, + size_t maxMatches) const; + static bool isWordChar(char c); + static bool isNonWordSpecialChar(char c); + +#ifdef USE_HYPERSCAN + // Hyperscan callback function + static int onMatch(unsigned int id, + unsigned long long from, + unsigned long long to, + unsigned int flags, + void* context); + + void processMatch(unsigned int id, + unsigned long long from, + unsigned long long to, + MatchContext* context) const; + + void identifyFailingPatterns(const std::vector& patterns, + const std::vector& hsPatterns, + const std::string& logPrefix) { + for (size_t i = 0; i < patterns.size(); ++i) { + const char *single_pattern = patterns[i].c_str(); + unsigned int single_flag = HS_STANDARD_FLAGS; + unsigned int single_id = static_cast(i); + hs_database_t *test_db = nullptr; + hs_compile_error_t *single_err = nullptr; + hs_error_t single_result = hs_compile_multi(&single_pattern, + &single_flag, + &single_id, + 1, + HS_MODE_BLOCK, + nullptr, + &test_db, + &single_err); + if (single_result != HS_SUCCESS) { + std::string additional_info = ""; + if (i < hsPatterns.size()) { + const auto &hsPattern = hsPatterns[i]; + additional_info = " | Category: " + hsPattern.category + + " | Group: " + hsPattern.groupName + + " | Source: " + hsPattern.regexSource; + if (!hsPattern.originalPattern.empty() && + hsPattern.originalPattern != hsPattern.hyperscanPattern) { + additional_info += " | Original: '" + hsPattern.originalPattern + "'"; + } + } + dbgWarning(D_WAAP_HYPERSCAN) + << logPrefix << " [" << i << "]: '" << patterns[i] + << "' - Error: " << (single_err ? single_err->message : "unknown") << additional_info; + if (single_err) { + hs_free_compile_error(single_err); + single_err = nullptr; + } + } else { + if (test_db) { + hs_free_database(test_db); + test_db = nullptr; + } + } + if (single_err) { + hs_free_compile_error(single_err); + single_err = nullptr; + } + } + } +#endif // USE_HYPERSCAN +}; + +WaapHyperscanEngine::Impl::Impl() + : +#ifdef USE_HYPERSCAN + m_keywordDatabase(nullptr), m_patternDatabase(nullptr), m_keywordScratch(nullptr), m_patternScratch(nullptr), +#endif // USE_HYPERSCAN + m_isInitialized(false), m_compiledPatternCount(0), m_failedPatternCount(0) +{ +} + +WaapHyperscanEngine::Impl::~Impl() +{ +#ifdef USE_HYPERSCAN + if (m_keywordScratch) hs_free_scratch(m_keywordScratch); + if (m_patternScratch) hs_free_scratch(m_patternScratch); + if (m_keywordDatabase) hs_free_database(m_keywordDatabase); + if (m_patternDatabase) hs_free_database(m_patternDatabase); +#endif +} + +bool WaapHyperscanEngine::Impl::initialize(const std::shared_ptr &signatures) +{ + if (!signatures) { + dbgWarning(D_WAAP_HYPERSCAN) << "WaapHyperscanEngine::initialize: null signatures"; + return false; + } + m_Signatures = signatures; + +#ifdef USE_HYPERSCAN + m_isInitialized = compileHyperscanDatabases(signatures); + if (m_isInitialized) { + dbgInfo(D_WAAP_HYPERSCAN) << "WaapHyperscanEngine initialized successfully. " + << "Compiled: " << m_compiledPatternCount << ", Failed: " << m_failedPatternCount; + } else { + dbgWarning(D_WAAP_HYPERSCAN) << "WaapHyperscanEngine initialization failed"; + } + return m_isInitialized; +#else + dbgInfo(D_WAAP_HYPERSCAN) << "WaapHyperscanEngine: Hyperscan not available on this platform"; + return false; +#endif +} + +bool WaapHyperscanEngine::Impl::compileHyperscanDatabases(const std::shared_ptr &signatures) +{ +#ifdef USE_HYPERSCAN + // Load precompiled patterns from signatures instead of extracting at runtime + loadPrecompiledPatterns(signatures); + + std::vector keywordPatterns; + std::vector patternRegexPatterns; + + // Collect keyword patterns (from specific_accuracy and keywords categories) + auto keywordAssertionFlags = signatures->getKeywordAssertionFlags(); + for (size_t i = 0; i < signatures->getKeywordHyperscanPatterns().size(); ++i) { + const auto &hsPattern = signatures->getKeywordHyperscanPatterns()[i]; + keywordPatterns.push_back(hsPattern.hyperscanPattern); + + PatternInfo info; + info.originalPattern = hsPattern.originalPattern; + info.hyperscanPattern = hsPattern.hyperscanPattern; + info.category = hsPattern.category; + info.regexSource = hsPattern.regexSource; + info.groupName = hsPattern.groupName; + info.isFastReg = hsPattern.isFastReg; + info.isEvasion = hsPattern.isEvasion; + + // Set assertion flags if available + if (i < keywordAssertionFlags.size()) { + info.assertionFlags = keywordAssertionFlags[i]; + } + + // Compile original regex pattern for validation only when matchOriginal flag is set + if (!info.originalPattern.empty() && matchOriginalPattern) { + bool regexError = false; + info.originalRegex = std::make_unique( + info.originalPattern, regexError, "ValidationRegex_" + info.groupName + "_" + std::to_string(i)); + if (regexError) { + dbgWarning(D_WAAP_HYPERSCAN) + << "Failed to compile original regex for pattern: " << info.originalPattern + << " (group: " << info.groupName << ")"; + info.originalRegex.reset(); // Clear failed regex + } + } + + m_patternInfos.push_back(std::move(info)); + } + + // Collect pattern regex patterns (from patterns category) + auto patternAssertionFlags = signatures->getPatternAssertionFlags(); + for (size_t i = 0; i < signatures->getPatternHyperscanPatterns().size(); ++i) { + const auto &hsPattern = signatures->getPatternHyperscanPatterns()[i]; + patternRegexPatterns.push_back(hsPattern.hyperscanPattern); + + PatternInfo info; + info.originalPattern = hsPattern.originalPattern; + info.hyperscanPattern = hsPattern.hyperscanPattern; + info.category = hsPattern.category; + info.regexSource = hsPattern.regexSource; + info.groupName = hsPattern.groupName; + info.isFastReg = hsPattern.isFastReg; + info.isEvasion = hsPattern.isEvasion; + + // Set assertion flags if available + if (i < patternAssertionFlags.size()) { + info.assertionFlags = patternAssertionFlags[i]; + } + + // Compile original regex pattern for validation only when matchOriginal flag is set + if (!info.originalPattern.empty() && matchOriginalPattern) { + bool regexError = false; + size_t patternIndex = keywordPatterns.size() + i; // Offset by keyword patterns count + info.originalRegex = std::make_unique(info.originalPattern, regexError, + "ValidationRegex_" + info.groupName + "_" + std::to_string(patternIndex)); + if (regexError) { + dbgWarning(D_WAAP_HYPERSCAN) + << "Failed to compile original regex for pattern: " << info.originalPattern + << " (group: " << info.groupName << ")"; + info.originalRegex.reset(); // Clear failed regex + } + } + + m_patternInfos.push_back(std::move(info)); + } + + dbgInfo(D_WAAP_HYPERSCAN) << "Using precompiled patterns: " + << "keywords=" << keywordPatterns.size() + << ", patterns=" << patternRegexPatterns.size(); + + // Compile keyword database (specific_acuracy_keywords_regex + words_regex) + size_t total_ids = 0; + if (!keywordPatterns.empty()) { + std::vector c_patterns; + std::vector flags; + std::vector ids; + + for (size_t i = 0; i < keywordPatterns.size(); ++i) { + c_patterns.push_back(keywordPatterns[i].c_str()); + flags.push_back(HS_STANDARD_FLAGS); + ids.push_back(static_cast(total_ids++)); + } + + // Defensive checks before calling hs_compile_multi + if (c_patterns.size() != flags.size() || c_patterns.size() != ids.size()) { + dbgWarning(D_WAAP_HYPERSCAN) << "Pattern, flag, and id arrays are not the same size!"; + return false; + } + if (c_patterns.empty()) { + dbgWarning(D_WAAP_HYPERSCAN) << "No patterns to compile!"; + return false; + } + dbgInfo(D_WAAP_HYPERSCAN) << "Compiling " << c_patterns.size() + << " keyword patterns with hs_compile_multi. First pattern: '" + << keywordPatterns[0] << "'"; + + hs_compile_error_t *compile_err = nullptr; + hs_error_t result = + hs_compile_multi(c_patterns.data(), + flags.data(), + ids.data(), + static_cast(c_patterns.size()), + HS_MODE_BLOCK, + nullptr, + &m_keywordDatabase, + &compile_err); + + if (result != HS_SUCCESS) { + std::string error_msg = compile_err ? compile_err->message : "unknown error"; + dbgWarning(D_WAAP_HYPERSCAN) << "Failed to compile keyword database: " << error_msg; + + // Try to identify the specific failing pattern(s) + if (compile_err) { + dbgWarning(D_WAAP_HYPERSCAN) << "Attempting to identify failing keyword pattern(s)..."; + auto keywordHsPatterns = signatures->getKeywordHyperscanPatterns(); + std::vector keywordPatternInfos; + keywordPatternInfos.reserve(keywordHsPatterns.size()); + for (const auto& hsPattern : keywordHsPatterns) { + keywordPatternInfos.emplace_back(); + PatternInfo& info = keywordPatternInfos.back(); + info.originalPattern = hsPattern.originalPattern; + info.hyperscanPattern = hsPattern.hyperscanPattern; + info.category = hsPattern.category; + info.regexSource = hsPattern.regexSource; + info.groupName = hsPattern.groupName; + info.isFastReg = hsPattern.isFastReg; + info.isEvasion = hsPattern.isEvasion; + } + identifyFailingPatterns(keywordPatterns, keywordPatternInfos, "Failing keyword pattern"); + } + if (compile_err) { + hs_free_compile_error(compile_err); + compile_err = nullptr; + } + return false; + } + + if (hs_alloc_scratch(m_keywordDatabase, &m_keywordScratch) != HS_SUCCESS) { + dbgWarning(D_WAAP_HYPERSCAN) << "Failed to allocate keyword scratch space"; + return false; + } + + m_compiledPatternCount += keywordPatterns.size(); + } + + // Compile pattern database (pattern_regex) + if (!patternRegexPatterns.empty()) { + std::vector c_patterns; + std::vector flags; + std::vector ids; + + for (size_t i = 0; i < patternRegexPatterns.size(); ++i) { + c_patterns.push_back(patternRegexPatterns[i].c_str()); + flags.push_back(HS_STANDARD_FLAGS); + ids.push_back(static_cast(total_ids++)); + } + + // Defensive checks before calling hs_compile_multi + if (c_patterns.size() != flags.size() || c_patterns.size() != ids.size()) { + dbgWarning(D_WAAP_HYPERSCAN) + << "Pattern, flag, and id arrays are not the same size! (patternRegexPatterns)"; + return false; + } + if (c_patterns.empty()) { + dbgWarning(D_WAAP_HYPERSCAN) << "No pattern regex patterns to compile!"; + return false; + } + dbgInfo(D_WAAP_HYPERSCAN) << "Compiling " << c_patterns.size() + << " pattern regex patterns with hs_compile_multi. First pattern: '" + << patternRegexPatterns[0] << "'"; + + hs_compile_error_t *compile_err = nullptr; + hs_error_t result = + hs_compile_multi(c_patterns.data(), + flags.data(), + ids.data(), + static_cast(c_patterns.size()), + HS_MODE_BLOCK, + nullptr, + &m_patternDatabase, + &compile_err); + + if (result != HS_SUCCESS) { + std::string error_msg = compile_err ? compile_err->message : "unknown error"; + dbgWarning(D_WAAP_HYPERSCAN) << "Failed to compile pattern database: " << error_msg; + + // Try to identify the specific failing pattern(s) + if (compile_err) { + dbgWarning(D_WAAP_HYPERSCAN) << "Attempting to identify failing pattern regex pattern(s)..."; + auto patternHsPatterns = signatures->getPatternHyperscanPatterns(); + std::vector patternPatternInfos; + patternPatternInfos.reserve(patternHsPatterns.size()); + for (const auto& hsPattern : patternHsPatterns) { + patternPatternInfos.emplace_back(); + PatternInfo& info = patternPatternInfos.back(); + info.originalPattern = hsPattern.originalPattern; + info.hyperscanPattern = hsPattern.hyperscanPattern; + info.category = hsPattern.category; + info.regexSource = hsPattern.regexSource; + info.groupName = hsPattern.groupName; + info.isFastReg = hsPattern.isFastReg; + info.isEvasion = hsPattern.isEvasion; + } + identifyFailingPatterns(patternRegexPatterns, patternPatternInfos, "Failing pattern regex"); + } + if (compile_err) { + hs_free_compile_error(compile_err); + compile_err = nullptr; + } + return false; + } + + if (hs_alloc_scratch(m_patternDatabase, &m_patternScratch) != HS_SUCCESS) { + dbgWarning(D_WAAP_HYPERSCAN) << "Failed to allocate pattern scratch space"; + return false; + } + + m_compiledPatternCount += patternRegexPatterns.size(); + } + + return true; +#else // USE_HYPERSCAN + return false; +#endif // USE_HYPERSCAN +} + +void WaapHyperscanEngine::Impl::loadPrecompiledPatterns(const std::shared_ptr &signatures) +{ + // This method is called to initialize any additional pattern processing if needed + // For now, the patterns are directly accessed from the signatures object + dbgTrace(D_WAAP_HYPERSCAN) << "Loading precompiled patterns from Signatures"; + m_Signatures = signatures; +} + +#ifdef USE_HYPERSCAN +int WaapHyperscanEngine::Impl::onMatch(unsigned int id, + unsigned long long from, + unsigned long long to, + unsigned int flags, + void *context) +{ + MatchContext *ctx = static_cast(context); + ctx->engine->processMatch(id, from, to, ctx); + return 0; // Continue scanning +} + +void WaapHyperscanEngine::Impl::processMatch(unsigned int id, + unsigned long long from, + unsigned long long to, + MatchContext *context) const +{ + if (id >= m_patternInfos.size()) { + dbgWarning(D_WAAP_HYPERSCAN) << "Invalid pattern ID: " << id; + return; + } + + const PatternInfo &info = m_patternInfos[id]; + const std::string &sampleText = *context->sampleText; + size_t start = static_cast(from); + size_t end = static_cast(to); + + if (end > sampleText.length()) end = sampleText.length(); + if (start >= end) return; + + // skip overlaps for this pattern + size_t &lastEnd = context->lastMatchEndPerSignature[id]; + if (start < lastEnd) { + dbgTrace(D_WAAP_HYPERSCAN) << "Skipping overlapping match for pattern id=" << id << " start=" << start + << " lastEnd=" << lastEnd << ", match: '" << sampleText.substr(start, end - start) + << "'"; + return; + } + + std::set foundMatches; + if (!validateAssertions(sampleText, start, end, info, foundMatches, maxRegexValidationMatches)) return; + + for (const auto &match : foundMatches) { + std::string matchedText = sampleText.substr(match.from, match.to - match.from); + std::string word = matchedText; + + dbgTrace(D_WAAP_HYPERSCAN) << " match='" << word << "' id='" << id << "' group='" << info.groupName + << "' category=" << info.category; + + if (context->binaryDataFound && word.size() <= 2) { + dbgTrace(D_WAAP_HYPERSCAN) + << "Will not add a short keyword '" << word << "' because binaryData was found"; + continue; + } + + if (context->includeKeywordRegex && (info.category == "keywords" || info.category == "specific_accuracy")) { + m_Signatures->processRegexMatch(info.groupName, matchedText, word, *context->keyword_matches, + *context->found_patterns, context->longTextFound, + context->binaryDataFound); + } else if (context->includePatternRegex && info.category == "patterns") { + m_Signatures->processRegexMatch(info.groupName, matchedText, word, *context->regex_matches, + *context->found_patterns, context->longTextFound, + context->binaryDataFound); + } + lastEnd = std::max(lastEnd, match.to); + } +} +#endif // USE_HYPERSCAN + +void WaapHyperscanEngine::Impl::scanSample(const SampleValue &sample, Waf2ScanResult &res, bool longTextFound, + bool binaryDataFound, bool includeKeywordRegex, bool includePatternRegex) const +{ +#ifdef USE_HYPERSCAN + if (!m_isInitialized) { + dbgTrace(D_WAAP_HYPERSCAN) << "WaapHyperscanEngine: not initialized, skipping scan"; + return; + } + + const std::string &sampleText = sample.getSampleString(); + + MatchContext context; + context.engine = this; + context.sampleText = &sampleText; + context.keyword_matches = &res.keyword_matches; + context.regex_matches = &res.regex_matches; + context.found_patterns = &res.found_patterns; + context.longTextFound = longTextFound; + context.binaryDataFound = binaryDataFound; + context.includePatternRegex = includePatternRegex; + context.includeKeywordRegex = includeKeywordRegex; + + context.lastMatchEndPerSignature.clear(); + dbgTrace(D_WAAP_HYPERSCAN) << "WaapHyperscanEngine::scanSample: scanning '" << sampleText + << "' longTextFound=" << longTextFound << " binaryDataFound=" << binaryDataFound + << " includeKeywordRegex=" << includeKeywordRegex + << " includePatternRegex=" << includePatternRegex; + + if (includeKeywordRegex && m_keywordDatabase && m_keywordScratch) { + hs_error_t result = + hs_scan(m_keywordDatabase, sampleText.c_str(), static_cast(sampleText.length()), 0, + m_keywordScratch, onMatch, &context); + + if (result != HS_SUCCESS) { + dbgWarning(D_WAAP_HYPERSCAN) << "Keyword database scan failed: " << result; + } + } + + if (includePatternRegex && m_patternDatabase && m_patternScratch) { + hs_error_t result = + hs_scan(m_patternDatabase, sampleText.c_str(), static_cast(sampleText.length()), 0, + m_patternScratch, onMatch, &context); + + if (result != HS_SUCCESS) { + dbgWarning(D_WAAP_HYPERSCAN) << "Pattern database scan failed: " << result; + } + } + + dbgTrace(D_WAAP_HYPERSCAN) << "WaapHyperscanEngine::scanSample: found " << res.keyword_matches.size() + << " keyword matches, " << res.regex_matches.size() << " regex matches"; +#else + dbgWarning(D_WAAP_HYPERSCAN) << "WaapHyperscanEngine::scanSample called but Hyperscan not available"; +#endif +} + +bool WaapHyperscanEngine::Impl::validateAssertions(const std::string &sampleText, size_t matchStart, size_t matchEnd, + const PatternInfo &patternInfo, std::set &foundMatches, + size_t maxMatches) const +{ + foundMatches.clear(); + + // If we don't have an original regex compiled, fall back to the assertion flags validation + if (!patternInfo.originalRegex) { + dbgTrace(D_WAAP_HYPERSCAN) << "No original regex available for validation, " + << "falling back to assertion flags check"; + foundMatches.emplace(matchStart, matchEnd); + // If no assertion flags are set, the match is valid + if (patternInfo.assertionFlags.empty()) { + return true; + } + + if ( + patternInfo.assertionFlags.isSet(Signatures::AssertionFlag::END_NON_WORD_AHEAD) && + matchEnd < sampleText.length() && + isWordChar(sampleText[matchEnd])) { + // (?!\w) - requires NO word character after the match + return false; + } + + if (patternInfo.assertionFlags.isSet(Signatures::AssertionFlag::START_NON_WORD_BEHIND) && matchStart > 0 && + isWordChar(sampleText[matchStart - 1])) { + // (?= sampleText.length() || !isWordChar(sampleText[matchEnd]))) { + // (?=\w) - requires a word character after the match + return false; + } + + if (patternInfo.assertionFlags.isSet(Signatures::AssertionFlag::END_NON_WORD_SPECIAL)) { + // (?=[^\w?<>:=]|$) - requires a non-word character (excluding ?<>:=) or end of string after the match + if (matchEnd < sampleText.length()) { + char nextChar = sampleText[matchEnd]; + if (isWordChar(nextChar) || nextChar == '?' || nextChar == '<' || nextChar == '>' || nextChar == ':' || + nextChar == '=') { + return false; + } + } + // If we're at the end of string, this condition is satisfied + } + + return true; + } + + if (patternInfo.assertionFlags.isSet(Signatures::AssertionFlag::WILDCARD_EVASION)) { + // skip if the match does not contain either type of slash, and not a question mark + bool hasSlash = false; + bool hasQuestionMark = false; + + for (size_t i = matchStart; i < matchEnd && !(hasSlash && hasQuestionMark); ++i) { + if (sampleText[i] == '\\' || sampleText[i] == '/') { + hasSlash = true; + } + if (sampleText[i] == '?') { + hasQuestionMark = true; + } + } + dbgTrace(D_WAAP_HYPERSCAN) << "Testing for wildcard evasion: '" + << " hasSlash=" << hasSlash << " hasQuestionMark=" << hasQuestionMark; + if (!hasSlash || !hasQuestionMark) { + return false; + } + } + + // Use the original compiled regex to find matches within the specified range + std::vector matchRanges; + + // look behind to cover possible assertions, look ahead much further to cover lazy hyperscan match end + static const size_t lookbehind_range = 4, lookahead_range = 32; + size_t searchStart = (matchStart > lookbehind_range) ? (matchStart - lookbehind_range) : 0UL; + size_t searchEnd = ((matchEnd + lookahead_range) < matchEnd || (matchEnd + lookahead_range) > sampleText.length()) + ? sampleText.length() // overflow + : (matchEnd + lookahead_range); // within bounds + + std::vector regex_matches; + patternInfo.originalRegex->findMatchRanges(sampleText, regex_matches, maxMatches, searchStart, searchEnd); + + for (const auto &match : regex_matches) { + foundMatches.emplace(match.start, match.end); + if (isDebugRequired(TRACE, D_WAAP_HYPERSCAN)) { + dbgTrace(D_WAAP_HYPERSCAN) << "Match for: '" << patternInfo.originalPattern << "' matched in range [" + << match.start << "," << match.end << "] " + << "matched text: '" + << sampleText.substr(match.start, match.end - match.start) + << "'"; + } + } + + if (foundMatches.empty()) { + if (isDebugRequired(TRACE, D_WAAP_HYPERSCAN)) { + dbgTrace(D_WAAP_HYPERSCAN) << "No match for: '" << patternInfo.originalPattern + << "' did not match in range [" << matchStart << "," << matchEnd << "] " + << "matched text: '" << sampleText.substr(matchStart, matchEnd - matchStart) + << "'"; + } + return false; + } + return true; +} + +// LCOV_EXCL_START Reason: Not in use currently, but kept for future reference +bool WaapHyperscanEngine::Impl::isWordChar(char c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'; +} + +bool WaapHyperscanEngine::Impl::isNonWordSpecialChar(char c) +{ + return c == '?' || c == '<' || c == '>' || c == ':' || c == '='; +} +// LCOV_EXCL_STOP + +// WaapHyperscanEngine public interface - delegates to Impl +WaapHyperscanEngine::WaapHyperscanEngine() : pimpl(std::make_unique()) +{ +} + +WaapHyperscanEngine::~WaapHyperscanEngine() = default; + +bool WaapHyperscanEngine::initialize(const std::shared_ptr& signatures) +{ + return pimpl->initialize(signatures); +} + +void WaapHyperscanEngine::scanSample(const SampleValue& sample, Waf2ScanResult& res, bool longTextFound, + bool binaryDataFound, bool includeKeywordRegex, bool includePatternRegex) const +{ + pimpl->scanSample(sample, res, longTextFound, binaryDataFound, includeKeywordRegex, includePatternRegex); +} + +bool WaapHyperscanEngine::isInitialized() const +{ + return pimpl->isInitialized(); +} + +size_t WaapHyperscanEngine::getPatternCount() const +{ + return pimpl->getPatternCount(); +} + +size_t WaapHyperscanEngine::getCompiledPatternCount() const +{ + return pimpl->getCompiledPatternCount(); +} + +size_t WaapHyperscanEngine::getFailedPatternCount() const +{ + return pimpl->getFailedPatternCount(); +} diff --git a/components/security_apps/waap/waap_clib/WaapHyperscanEngine.h b/components/security_apps/waap/waap_clib/WaapHyperscanEngine.h new file mode 100644 index 0000000..7979a23 --- /dev/null +++ b/components/security_apps/waap/waap_clib/WaapHyperscanEngine.h @@ -0,0 +1,54 @@ +// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef __WAAP_HYPERSCAN_ENGINE_H__ +#define __WAAP_HYPERSCAN_ENGINE_H__ + +#include +#include +#include +#include +#include + +class Signatures; +class SampleValue; +struct Waf2ScanResult; + +class WaapHyperscanEngine { +public: + + WaapHyperscanEngine(); + ~WaapHyperscanEngine(); + + // Initialize with patterns from Signatures + bool initialize(const std::shared_ptr& signatures); + + // Main scanning function - same interface as performStandardRegexChecks + void scanSample(const SampleValue& sample, + Waf2ScanResult& res, + bool longTextFound, + bool binaryDataFound, + bool includeKeywordRegex, + bool includePatternRegex) const; + + // Check if the engine is ready to use + bool isInitialized() const; + + // Get statistics + size_t getPatternCount() const; + size_t getCompiledPatternCount() const; + size_t getFailedPatternCount() const; + +private: + class Impl; + std::unique_ptr pimpl; +}; + +#endif // __WAAP_HYPERSCAN_ENGINE_H__ diff --git a/components/security_apps/waap/waap_clib/buffered_compressed_stream.cc b/components/security_apps/waap/waap_clib/buffered_compressed_stream.cc new file mode 100644 index 0000000..1f43e2c --- /dev/null +++ b/components/security_apps/waap/waap_clib/buffered_compressed_stream.cc @@ -0,0 +1,481 @@ +// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "buffered_compressed_stream.h" +#include "waap.h" +#include "compression_utils.h" +#include + +USE_DEBUG_FLAG(D_WAAP_SERIALIZE); + +using namespace std; + +void yieldIfPossible(const string &func, int line) +{ + // check mainloop exists and current routine is not the main routine + if (Singleton::exists() && + Singleton::Consume::by()->getCurrentRoutineId().ok()) + { + dbgDebug(D_WAAP_SERIALIZE) << "Yielding to main loop from: " << func << ":" << line; + Singleton::Consume::by()->yield(false); + } +} + +// Static member definitions +const size_t BufferedCompressedOutputStream::CompressedBuffer::BUFFER_SIZE; +const size_t BufferedCompressedInputStream::DecompressedBuffer::OUTPUT_BUFFER_SIZE; +const size_t BufferedCompressedInputStream::DecompressedBuffer::CHUNK_SIZE; + +BufferedCompressedOutputStream::BufferedCompressedOutputStream(ostream &underlying_stream) + : + ostream(nullptr), + m_buffer(make_unique(underlying_stream)) +{ + rdbuf(m_buffer.get()); +} + +BufferedCompressedOutputStream::~BufferedCompressedOutputStream() +{ + try { + close(); + } catch (exception &e) { + // Destructor should not throw + dbgWarning(D_WAAP_SERIALIZE) << "Exception in BufferedCompressedOutputStream destructor: " << e.what(); + } +} + +void BufferedCompressedOutputStream::flush() +{ + if (m_buffer) { + dbgTrace(D_WAAP_SERIALIZE) << "Flushing internal buffer..."; + m_buffer->flushBuffer(); // This will compress and encrypt the current buffer with is_last=false + // and flush the underlying stream. + } + // Do NOT call ostream::flush() here, as it would call sync() on our m_buffer, + // which calls compressAndEncryptBuffer(true) and finalizes the GZIP stream prematurely. + // The m_underlying_stream within m_buffer is flushed by compressAndEncryptBuffer itself. +} + +void BufferedCompressedOutputStream::close() +{ + if (m_buffer) { + dbgTrace(D_WAAP_SERIALIZE) << "Closing stream and flushing buffer..."; + m_buffer->flushAndClose(); + } +} + +BufferedCompressedOutputStream::CompressedBuffer::CompressedBuffer(ostream &underlying_stream) + : + m_underlying_stream(underlying_stream), + m_buffer(), + m_compression_stream(nullptr), + m_closed(false) +{ + m_buffer.reserve(BUFFER_SIZE); + m_compression_stream = initCompressionStream(); + +} + +BufferedCompressedOutputStream::CompressedBuffer::~CompressedBuffer() +{ + try { + if (!m_closed) { + sync(); + } + if (m_compression_stream) { + finiCompressionStream(m_compression_stream); + m_compression_stream = nullptr; + } + } catch (exception &e) { + // Destructor should not throw + dbgWarning(D_WAAP_SERIALIZE) << "Exception in CompressedBuffer destructor: " << e.what(); + } +} + +void BufferedCompressedOutputStream::CompressedBuffer::flushAndClose() +{ + sync(); +} + +int BufferedCompressedOutputStream::CompressedBuffer::overflow(int c) +{ + if (m_closed) { + dbgTrace(D_WAAP_SERIALIZE) << "Stream is closed, returning EOF"; + return traits_type::eof(); + } + + if (c != traits_type::eof()) { + m_buffer.push_back(static_cast(c)); + dbgTrace(D_WAAP_SERIALIZE) << "Added char, buffer size now: " << m_buffer.size(); + } + + if (m_buffer.size() >= BUFFER_SIZE) { + dbgTrace(D_WAAP_SERIALIZE) << "Buffer full, flushing..."; + compressAndEncryptBuffer(false); + } + + return c; +} + +streamsize BufferedCompressedOutputStream::CompressedBuffer::xsputn(const char* s, streamsize n) +{ + if (m_closed) { + dbgDebug(D_WAAP_SERIALIZE) << "Stream is closed, returning 0"; + return 0; + } + + dbgTrace(D_WAAP_SERIALIZE) << "Writing " << n << " bytes"; + streamsize written = 0; + while (written < n) { + size_t space_available = BUFFER_SIZE - m_buffer.size(); + size_t to_write = min(static_cast(n - written), space_available); + + m_buffer.insert(m_buffer.end(), s + written, s + written + to_write); + written += to_write; + + dbgTrace(D_WAAP_SERIALIZE) << "Wrote " << to_write << " bytes, total written: " << written + << ", buffer size: " << m_buffer.size(); + + if (m_buffer.size() >= BUFFER_SIZE) { + dbgTrace(D_WAAP_SERIALIZE) << "Buffer full, flushing..."; + compressAndEncryptBuffer(false); + } + } + + dbgTrace(D_WAAP_SERIALIZE) << "Completed, total written: " << written; + return written; +} + +int BufferedCompressedOutputStream::CompressedBuffer::sync() +{ + dbgTrace(D_WAAP_SERIALIZE) << "Called, closed=" << m_closed << ", buffer size=" << m_buffer.size(); + if (!m_closed) { + bool success = compressAndEncryptBuffer(true); // Attempt final compression/encryption + // Mark as closed REGARDLESS of the success of the attempt to ensure finalization logic + // for this context isn't re-attempted if this call failed. + m_closed = true; + if (!success) { + dbgWarning(D_WAAP_SERIALIZE) << "Final compression/encryption failed"; + return -1; + } + dbgTrace(D_WAAP_SERIALIZE) << "Stream closed successfully"; + } else { + dbgDebug(D_WAAP_SERIALIZE) << "Stream already closed, skipping"; + } + return 0; +} + +void BufferedCompressedOutputStream::CompressedBuffer::flushBuffer() +{ + if (m_buffer.empty() || m_closed) { + return; + } + + dbgTrace(D_WAAP_SERIALIZE) << "Flushing buffer with " << m_buffer.size() << " bytes"; + compressAndEncryptBuffer(false); +} + +bool BufferedCompressedOutputStream::CompressedBuffer::compressAndEncryptBuffer(bool is_last) +{ + // If the stream is already marked as closed at this buffer's level, + // it means sync() has run, and everything, including encryption, has been finalized. + if (m_closed) { + dbgTrace(D_WAAP_SERIALIZE) << "Stream is already closed, skipping."; + return true; + } + + // Skip if there's nothing to compress and this is not the final flush + if (m_buffer.empty() && !is_last) { + dbgTrace(D_WAAP_SERIALIZE) << "Buffer empty and not last call, skipping."; + return true; + } + + dbgTrace(D_WAAP_SERIALIZE) << "Compressing and encrypting " << m_buffer.size() << " bytes, is_last: " << is_last; + + // Compress the buffer + CompressionResult result = compressData( + m_compression_stream, + CompressionType::GZIP, + static_cast(m_buffer.size()), + reinterpret_cast(m_buffer.data()), + is_last ? 1 : 0 + ); + + if (!result.ok) { + dbgWarning(D_WAAP_SERIALIZE) << "Failed to compress data"; + return false; + } + + string compressed_data; + if (result.output && result.num_output_bytes > 0) { + compressed_data = string(reinterpret_cast(result.output), result.num_output_bytes); + free(result.output); + } + + dbgDebug(D_WAAP_SERIALIZE) << "Compression complete: " << m_buffer.size() + << " bytes -> " << compressed_data.size() << " bytes"; + + // Yield after compression to allow other routines to run + YIELD_IF_POSSIBLE(); + + string final_data = compressed_data; + + // Write to underlying stream only if we have data to write + if (!final_data.empty()) { + m_underlying_stream.write(final_data.c_str(), final_data.size()); + m_underlying_stream.flush(); + } + + m_buffer.clear(); + + // Yield after writing chunk to allow other routines to run + YIELD_IF_POSSIBLE(); + + return true; +} + +BufferedCompressedInputStream::BufferedCompressedInputStream(istream &underlying_stream) + : + istream(nullptr), + m_buffer(make_unique(underlying_stream)) +{ + rdbuf(m_buffer.get()); +} + +BufferedCompressedInputStream::~BufferedCompressedInputStream() +{ + // DecompressedBuffer destructor will handle cleanup +} + +BufferedCompressedInputStream::DecompressedBuffer::DecompressedBuffer(istream &underlying_stream) + : +m_underlying_stream(underlying_stream), + m_buffer(), + m_encrypted_buffer(), + m_compressed_buffer(), + m_decompressed_buffer(), + m_decompressed_pos(0), + m_compression_stream(nullptr), + m_eof_reached(false), + m_stream_finished(false) +{ + m_buffer.resize(OUTPUT_BUFFER_SIZE); + m_encrypted_buffer.reserve(CHUNK_SIZE); + m_compressed_buffer.reserve(CHUNK_SIZE); + m_decompressed_buffer.reserve(OUTPUT_BUFFER_SIZE); + m_compression_stream = initCompressionStream(); + + + // Set buffer pointers to indicate empty buffer + setg(m_buffer.data(), m_buffer.data(), m_buffer.data()); +} + +BufferedCompressedInputStream::DecompressedBuffer::~DecompressedBuffer() +{ + try { + if (m_compression_stream) { + finiCompressionStream(m_compression_stream); + m_compression_stream = nullptr; + } + } catch (exception &e) { + // Destructor should not throw + dbgWarning(D_WAAP_SERIALIZE) << "Exception in DecompressedBuffer destructor: " << e.what(); + } +} + +int BufferedCompressedInputStream::DecompressedBuffer::underflow() +{ + if (gptr() < egptr()) { + return traits_type::to_int_type(*gptr()); + } + + if (m_eof_reached) { + return traits_type::eof(); + } + + if (!fillBuffer()) { + m_eof_reached = true; + return traits_type::eof(); + } + + return traits_type::to_int_type(*gptr()); +} + +streamsize BufferedCompressedInputStream::DecompressedBuffer::xsgetn(char* s, streamsize n) +{ + streamsize total_read = 0; + + while (total_read < n) { + if (gptr() >= egptr()) { + if (!fillBuffer()) { + m_eof_reached = true; + break; + } + } + + streamsize available = egptr() - gptr(); + streamsize to_copy = min(n - total_read, available); + + memcpy(s + total_read, gptr(), to_copy); + gbump(static_cast(to_copy)); + total_read += to_copy; + } + + return total_read; +} + +bool BufferedCompressedInputStream::DecompressedBuffer::fillBuffer() +{ + if (m_eof_reached) { + return false; + } + + // If we have remaining data in the decompressed buffer, use it first + if (m_decompressed_pos < m_decompressed_buffer.size()) { + size_t remaining = m_decompressed_buffer.size() - m_decompressed_pos; + size_t to_copy = min(remaining, OUTPUT_BUFFER_SIZE); + + memcpy(m_buffer.data(), m_decompressed_buffer.data() + m_decompressed_pos, to_copy); + m_decompressed_pos += to_copy; + + // Set up the buffer pointers for streambuf: + // eback() = m_buffer.data() (start of buffer) + // gptr() = m_buffer.data() (current position) + // egptr() = m_buffer.data() + to_copy (end of valid data) + setg(m_buffer.data(), m_buffer.data(), m_buffer.data() + to_copy); + + dbgTrace(D_WAAP_SERIALIZE) << "Serving " << to_copy << " bytes from existing decompressed buffer"; + + // Yield after serving data from buffer to allow other routines to run + YIELD_IF_POSSIBLE(); + return true; + } + + // Need to process the next chunk + if (!processNextChunk()) { + m_eof_reached = true; + return false; + } + + // Now try again with the new data + return fillBuffer(); +} + +bool BufferedCompressedInputStream::DecompressedBuffer::processNextChunk() +{ + while (true) { + if (m_stream_finished) { + return false; + } + + // Read a chunk of encrypted data from the underlying stream + if (m_encrypted_buffer.size() < CHUNK_SIZE) { + m_encrypted_buffer.resize(CHUNK_SIZE); + } + m_underlying_stream.read(m_encrypted_buffer.data(), CHUNK_SIZE); + streamsize bytes_read = m_underlying_stream.gcount(); + + if (bytes_read <= 0) { + m_stream_finished = true; + + // End of stream - no more data to process + dbgTrace(D_WAAP_SERIALIZE) << "Reached end of input stream"; + return false; + } + + m_encrypted_buffer.resize(bytes_read); + + dbgTrace(D_WAAP_SERIALIZE) << "Read " << bytes_read << " encrypted bytes from stream"; + + // Decrypt the chunk + std::vector decrypted_chunk; + if (!decryptChunk(m_encrypted_buffer, decrypted_chunk)) { + dbgWarning(D_WAAP_SERIALIZE) << "Failed to decrypt chunk"; + break; + } + + // Decompress the chunk + std::vector decompressed_chunk; + if (!decompressChunk(decrypted_chunk, decompressed_chunk)) { + dbgWarning(D_WAAP_SERIALIZE) << "Failed to decompress chunk"; + break; + } + + if (decompressed_chunk.empty()) { + dbgTrace(D_WAAP_SERIALIZE) << "Decompressed chunk is empty, skipping"; + continue; // Nothing to add to the buffer + } + // Replace the decompressed buffer with new data using swap to avoid unnecessary allocations + m_decompressed_buffer.swap(decompressed_chunk); + m_decompressed_pos = 0; + + dbgTrace(D_WAAP_SERIALIZE) << "Processed chunk: " << bytes_read + << " encrypted -> " << decrypted_chunk.size() + << " compressed -> " << m_decompressed_buffer.size() << " decompressed"; + + // Yield after processing chunk to allow other routines to run + YIELD_IF_POSSIBLE(); + return true; + } + return false; +} + +bool BufferedCompressedInputStream::DecompressedBuffer::decryptChunk( + const std::vector &encrypted_chunk, + std::vector &decrypted_chunk) +{ + + // No encryption - just copy the data + decrypted_chunk = encrypted_chunk; + return true; +} + +bool BufferedCompressedInputStream::DecompressedBuffer::decompressChunk( + const std::vector &compressed_chunk, + std::vector &decompressed_chunk) +{ + if (compressed_chunk.empty()) { + return true; // Nothing to decompress + } + + // Use the streaming decompression + DecompressionResult result = decompressData( + m_compression_stream, + compressed_chunk.size(), + reinterpret_cast(compressed_chunk.data()) + ); + + if (!result.ok) { + dbgWarning(D_WAAP_SERIALIZE) << "Failed to decompress chunk"; + return false; + } + + if (result.output && result.num_output_bytes > 0) { + decompressed_chunk.assign( + reinterpret_cast(result.output), + reinterpret_cast(result.output) + result.num_output_bytes + ); + free(result.output); + + dbgTrace(D_WAAP_SERIALIZE) << "Decompressed chunk: " << compressed_chunk.size() + << " -> " << decompressed_chunk.size() << " bytes"; + + // Yield after decompression to allow other routines to run + YIELD_IF_POSSIBLE(); + return true; + } + + // No output data yet (might need more input for compression algorithm) + decompressed_chunk.clear(); + return true; +} diff --git a/components/utils/pm/hyperscan_hook.cc b/components/utils/pm/hyperscan_hook.cc new file mode 100644 index 0000000..9f00073 --- /dev/null +++ b/components/utils/pm/hyperscan_hook.cc @@ -0,0 +1,135 @@ +// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef USE_HYPERSCAN +#include "hyperscan_hook.h" +#include +#include + +// Helper function to escape regex special characters for literal matching +static std::string escapeRegexChars(const std::string& input) { + std::string escaped; + for (char c : input) { + switch (c) { + case '.': + case '^': + case '$': + case '*': + case '+': + case '?': + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case '\\': + case '|': + escaped += '\\'; + escaped += c; + break; + default: + escaped += c; + break; + } + } + return escaped; +} + +HyperscanHook::HyperscanHook() : m_hsDatabase(nullptr), m_hsScratch(nullptr), m_hsReady(false) {} + +HyperscanHook::~HyperscanHook() { + if (m_hsScratch) hs_free_scratch(m_hsScratch); + if (m_hsDatabase) hs_free_database(m_hsDatabase); +} + +Maybe HyperscanHook::prepare(const std::set &patterns) { + m_hsPatterns.clear(); + m_idToPattern.clear(); for (const auto &pat : patterns) { + if (pat.empty()) continue; // Use pat.empty() instead of pat.pattern().empty() + + // Convert pattern data to string using the public interface + std::string pattern_str(reinterpret_cast(pat.data()), pat.size()); + + // Escape regex special characters for literal matching + std::string escaped_pattern = escapeRegexChars(pattern_str); + + m_hsPatterns.push_back(escaped_pattern); + m_idToPattern.push_back(pat); + } + + std::vector c_patterns; + std::vector flags; + std::vector ids; + + for (size_t i = 0; i < m_hsPatterns.size(); ++i) { + c_patterns.push_back(m_hsPatterns[i].c_str()); + flags.push_back(HS_FLAG_CASELESS); // adjust as needed + ids.push_back((unsigned int)i); + } hs_compile_error_t *compile_err = nullptr; + hs_error_t result = hs_compile_multi(c_patterns.data(), flags.data(), ids.data(), + (unsigned int)c_patterns.size(), HS_MODE_BLOCK, nullptr, + &m_hsDatabase, &compile_err); + + if (result != HS_SUCCESS) { + std::string error_msg = "Failed to compile Hyperscan database"; + if (compile_err) { + error_msg += ": "; + error_msg += compile_err->message; + hs_free_compile_error(compile_err); + } + return genError(error_msg); + }if (hs_alloc_scratch(m_hsDatabase, &m_hsScratch) != HS_SUCCESS) { + return genError("Failed to allocate Hyperscan scratch space"); + } + + m_hsReady = true; + return Maybe(); +} + +// TODO - No need for HS hook, scanning is done by WaapHyperscanEngine::scanSample() +std::set HyperscanHook::scanBuf(const Buffer &buf) const { + std::set results; + scanBufWithOffsetLambda(buf, [&results](uint, const PMPattern &pattern, bool) { + results.insert(pattern); + }); + return results; +} + +std::set> HyperscanHook::scanBufWithOffset(const Buffer &buf) const { + std::set> results; + scanBufWithOffsetLambda(buf, [&results](uint endMatchOffset, const PMPattern &pattern, bool) { + uint startOffset = endMatchOffset + 1 - pattern.size(); + results.insert(std::make_pair(startOffset, endMatchOffset)); + }); + return results; +} + +void HyperscanHook::scanBufWithOffsetLambda(const Buffer &buf, I_PMScan::CBFunction cb) const { + if (!m_hsReady) return; + struct HyperScanContext { + const HyperscanHook *self; + const Buffer *buffer; + I_PMScan::CBFunction cb; + }; + auto onMatch = [](unsigned int id, unsigned long long, unsigned long long to, unsigned int, + void *ctx) -> int { + HyperScanContext *hctx = (HyperScanContext*)ctx; + const HyperscanHook *self = hctx->self; + const PMPattern &pat = self->m_idToPattern[id]; + uint endMatchOffset = (uint)to - 1; + hctx->cb(endMatchOffset, pat, false); // matchAll logic can be extended if needed + return 0; + }; + HyperScanContext ctx{this, &buf, cb}; + hs_scan(m_hsDatabase, (const char*)buf.data(), (unsigned int)buf.size(), 0, m_hsScratch, onMatch, &ctx); +} + +#endif // USE_HYPERSCAN diff --git a/components/utils/utilities/nginx_conf_collector/CMakeLists.txt b/components/utils/utilities/nginx_conf_collector/CMakeLists.txt index 6e56c58..e362854 100755 --- a/components/utils/utilities/nginx_conf_collector/CMakeLists.txt +++ b/components/utils/utilities/nginx_conf_collector/CMakeLists.txt @@ -32,6 +32,7 @@ target_link_libraries(${EXECUTABLE_NAME} boost_context boost_regex pthread + ${Brotli_LIBRARIES} ) install(TARGETS ${EXECUTABLE_NAME} DESTINATION bin) diff --git a/core/compression/CMakeLists.txt b/core/compression/CMakeLists.txt index f9b5a18..1a30be1 100755 --- a/core/compression/CMakeLists.txt +++ b/core/compression/CMakeLists.txt @@ -4,6 +4,10 @@ add_definitions(-DZLIB_CONST) add_library(compression_utils SHARED compression_utils.cc) add_library(static_compression_utils compression_utils.cc) +target_link_libraries(compression_utils + ${Brotli_LIBRARIES} +) + add_subdirectory(compression_utils_ut) install(TARGETS compression_utils DESTINATION lib) diff --git a/core/config/config_cache_stats.cc b/core/config/config_cache_stats.cc new file mode 100644 index 0000000..c3b8ffe --- /dev/null +++ b/core/config/config_cache_stats.cc @@ -0,0 +1,13 @@ +#include + +// Forward declaration of CacheStats struct +struct CacheStats { + static std::atomic hits; + static std::atomic misses; + static bool tracking_enabled; +}; + +// Define static members for cache statistics +std::atomic CacheStats::hits{0}; +std::atomic CacheStats::misses{0}; +bool CacheStats::tracking_enabled = false; diff --git a/core/include/attachments/nano_attachment_common.h b/core/include/attachments/nano_attachment_common.h new file mode 100644 index 0000000..85aa1ed --- /dev/null +++ b/core/include/attachments/nano_attachment_common.h @@ -0,0 +1,672 @@ +#ifndef __NANO_ATTACHMENT_COMMON_H__ +#define __NANO_ATTACHMENT_COMMON_H__ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "compression_utils.h" + +typedef uint32_t SessionID; +typedef void* DataBuffer; +typedef int64_t NanoHttpCpInjectPos; + +#define MAX_NGINX_UID_LEN 32 +#define MAX_SHARED_MEM_PATH_LEN 128 +#define NUM_OF_NGINX_IPC_ELEMENTS 200 +#define DEFAULT_KEEP_ALIVE_INTERVAL_MSEC 300000u +#define SHARED_MEM_PATH "/dev/shm/" +#define SHARED_REGISTRATION_SIGNAL_PATH SHARED_MEM_PATH "check-point/cp-nano-attachment-registration" +#define SHARED_KEEP_ALIVE_PATH SHARED_MEM_PATH "check-point/cp-nano-attachment-registration-expiration-socket" +#define SHARED_VERDICT_SIGNAL_PATH SHARED_MEM_PATH "check-point/cp-nano-http-transaction-handler" +#define SHARED_ATTACHMENT_CONF_PATH SHARED_MEM_PATH "cp_nano_http_attachment_conf" +#define DEFAULT_STATIC_RESOURCES_PATH SHARED_MEM_PATH "static_resources" +#define INJECT_POS_IRRELEVANT -1 +#define CORRUPTED_SESSION_ID 0 +#define METRIC_PERIODIC_TIMEOUT 600 +#define MAX_CONTAINER_ID_LEN 12 +#define CONTAINER_ID_FILE_PATH "/proc/self/cgroup" +#define RESPONSE_PAGE_PARTS 4 +#define UUID_SIZE 64 +#define CUSTOM_RESPONSE_TITLE_SIZE 64 +#define CUSTOM_RESPONSE_BODY_SIZE 128 +#define REDIRECT_RESPONSE_LOCATION_SIZE 512 + +#ifdef __cplusplus +typedef enum class NanoWebResponseType +#else +typedef enum NanoWebResponseType +#endif +{ + CUSTOM_WEB_RESPONSE, + CUSTOM_WEB_BLOCK_PAGE_RESPONSE, + RESPONSE_CODE_ONLY, + REDIRECT_WEB_RESPONSE, + + NO_WEB_RESPONSE +} NanoWebResponseType; + +#ifdef __cplusplus +typedef enum class NanoHttpInspectionMode +#else +typedef enum NanoHttpInspectionMode +#endif +{ + NON_BLOCKING_THREAD, + BLOCKING_THREAD, + NO_THREAD, + + INSPECTION_MODE_COUNT +} NanoHttpInspectionMode; + +#ifdef __cplusplus +typedef enum class NanoCommunicationResult +#else +typedef enum NanoCommunicationResult +#endif +{ + NANO_OK, + NANO_ERROR, + NANO_ABORT, + NANO_AGAIN, + NANO_HTTP_FORBIDDEN, + NANO_DECLINED, + NANO_TIMEOUT +} NanoCommunicationResult; + +#ifdef __cplusplus +typedef enum class nano_http_cp_debug_level +#else +typedef enum nano_http_cp_debug_level +#endif +{ + DBG_LEVEL_TRACE, + DBG_LEVEL_DEBUG, + DBG_LEVEL_INFO, + DBG_LEVEL_WARNING, + DBG_LEVEL_ERROR, +#ifndef __cplusplus + DBG_LEVEL_ASSERT, +#endif + DBG_LEVEL_COUNT +} nano_http_cp_debug_level_e; + +#ifdef __cplusplus +typedef enum class AttachmentMetricType +#else +typedef enum AttachmentMetricType +#endif +{ + TRANSPARENTS_COUNT, + TOTAL_TRANSPARENTS_TIME, + INSPECTION_OPEN_FAILURES_COUNT, + INSPECTION_CLOSE_FAILURES_COUNT, + INSPECTION_SUCCESSES_COUNT, + INJECT_VERDICTS_COUNT, + DROP_VERDICTS_COUNT, + ACCEPT_VERDICTS_COUNT, + IRRELEVANT_VERDICTS_COUNT, + RECONF_VERDICTS_COUNT, + INSPECT_VERDICTS_COUNT, + HOLD_VERDICTS_COUNT, + AVERAGE_OVERALL_PPROCESSING_TIME_UNTIL_VERDICT, + MAX_OVERALL_PPROCESSING_TIME_UNTIL_VERDICT, + MIN_OVERALL_PPROCESSING_TIME_UNTIL_VERDICT, + AVERAGE_REQ_PPROCESSING_TIME_UNTIL_VERDICT, + MAX_REQ_PPROCESSING_TIME_UNTIL_VERDICT, + MIN_REQ_PPROCESSING_TIME_UNTIL_VERDICT, + AVERAGE_RES_PPROCESSING_TIME_UNTIL_VERDICT, + MAX_RES_PPROCESSING_TIME_UNTIL_VERDICT, + MIN_RES_PPROCESSING_TIME_UNTIL_VERDICT, + THREAD_TIMEOUT, + REG_THREAD_TIMEOUT, + REQ_METADATA_THREAD_TIMEOUT, + REQ_HEADER_THREAD_TIMEOUT, + REQ_BODY_THREAD_TIMEOUT, + REQ_END_THREAD_TIMEOUT, + AVERAGE_REQ_BODY_SIZE_UPON_TIMEOUT, + MAX_REQ_BODY_SIZE_UPON_TIMEOUT, + MIN_REQ_BODY_SIZE_UPON_TIMEOUT, + RES_HEADER_THREAD_TIMEOUT, + RES_BODY_THREAD_TIMEOUT, + RES_END_THREAD_TIMEOUT, + HOLD_THREAD_TIMEOUT, + AVERAGE_RES_BODY_SIZE_UPON_TIMEOUT, + MAX_RES_BODY_SIZE_UPON_TIMEOUT, + MIN_RES_BODY_SIZE_UPON_TIMEOUT, + THREAD_FAILURE, + REQ_PROCCESSING_TIMEOUT, + RES_PROCCESSING_TIMEOUT, + REQ_FAILED_TO_REACH_UPSTREAM, + REQ_FAILED_COMPRESSION_COUNT, + RES_FAILED_COMPRESSION_COUNT, + REQ_FAILED_DECOMPRESSION_COUNT, + RES_FAILED_DECOMPRESSION_COUNT, + REQ_SUCCESSFUL_COMPRESSION_COUNT, + RES_SUCCESSFUL_COMPRESSION_COUNT, + REQ_SUCCESSFUL_DECOMPRESSION_COUNT, + RES_SUCCESSFUL_DECOMPRESSION_COUNT, + CORRUPTED_ZIP_SKIPPED_SESSION_COUNT, + CPU_USAGE, + AVERAGE_VM_MEMORY_USAGE, + AVERAGE_RSS_MEMORY_USAGE, + MAX_VM_MEMORY_USAGE, + MAX_RSS_MEMORY_USAGE, + REQUEST_OVERALL_SIZE_COUNT, + RESPONSE_OVERALL_SIZE_COUNT, + + METRIC_TYPES_COUNT +} AttachmentMetricType; + +#ifdef __cplusplus +typedef enum class AttachmentDataType +#else +typedef enum AttachmentDataType +#endif +{ + REQUEST_START, + REQUEST_HEADER, + REQUEST_BODY, + REQUEST_END, + RESPONSE_CODE, + RESPONSE_HEADER, + RESPONSE_BODY, + RESPONSE_END, + CONTENT_LENGTH, + METRIC_DATA_FROM_PLUGIN, + REQUEST_DELAYED_VERDICT, + + COUNT +} AttachmentDataType; + +#ifdef __cplusplus +typedef enum class HttpChunkType +#else +typedef enum HttpChunkType +#endif +{ + HTTP_REQUEST_FILTER, + HTTP_REQUEST_METADATA, + HTTP_REQUEST_HEADER, + HTTP_REQUEST_BODY, + HTTP_REQUEST_END, + HTTP_RESPONSE_HEADER, + HTTP_RESPONSE_BODY, + HTTP_RESPONSE_END, + HOLD_DATA +} HttpChunkType; + +#ifdef __cplusplus +typedef enum class ServiceVerdict +#else +typedef enum ServiceVerdict +#endif +{ + TRAFFIC_VERDICT_INSPECT, + TRAFFIC_VERDICT_ACCEPT, + TRAFFIC_VERDICT_DROP, + TRAFFIC_VERDICT_INJECT, + TRAFFIC_VERDICT_IRRELEVANT, + TRAFFIC_VERDICT_RECONF, + TRAFFIC_VERDICT_DELAYED, + LIMIT_RESPONSE_HEADERS, + TRAFFIC_VERDICT_CUSTOM_RESPONSE +} ServiceVerdict; + +#ifdef __cplusplus +typedef enum class AttachmentContentType +#else +typedef enum AttachmentContentType +#endif +{ + CONTENT_TYPE_APPLICATION_JSON, + CONTENT_TYPE_TEXT_HTML, + CONTENT_TYPE_TEXT_PLAIN, + CONTENT_TYPE_OTHER +} AttachmentContentType; + +#ifdef __cplusplus +typedef enum class AttachmentVerdict +#else +typedef enum AttachmentVerdict +#endif +{ + ATTACHMENT_VERDICT_INSPECT, + ATTACHMENT_VERDICT_ACCEPT, + ATTACHMENT_VERDICT_DROP, + ATTACHMENT_VERDICT_INJECT +} AttachmentVerdict; + +#ifdef __cplusplus +typedef enum class HttpModificationType +#else +typedef enum HttpModificationType +#endif +{ + APPEND, + INJECT, + REPLACE +} HttpModificationType; + +typedef struct __attribute__((__packed__)) HttpInjectData { + NanoHttpCpInjectPos injection_pos; + HttpModificationType mod_type; + uint16_t injection_size; + uint8_t is_header; + uint8_t orig_buff_index; + char data[0]; +} HttpInjectData; + +typedef struct __attribute__((__packed__)) HttpWebResponseData { + uint8_t web_response_type; + uint8_t uuid_size; + + union { + struct __attribute__((__packed__)) NanoHttpCpCustomWebResponseData { + uint16_t response_code; + uint8_t title_size; + uint8_t body_size; + char data[0]; + } custom_response_data; + + struct __attribute__((__packed__)) NanoHttpCpRedirectData { + uint8_t unused_dummy; + uint8_t add_event_id; + uint16_t redirect_location_size; + char redirect_location[0]; + } redirect_data; + } response_data; +} HttpWebResponseData; + +typedef struct __attribute__((__packed__)) HttpJsonResponseData { + uint16_t response_code; + uint16_t body_size; + AttachmentContentType content_type; + char body[0]; +} HttpJsonResponseData; + +typedef struct { + size_t len; + unsigned char *data; +} nano_str_t; + +typedef struct CustomResponseData { + uint16_t response_code; + unsigned char title[CUSTOM_RESPONSE_TITLE_SIZE]; + unsigned char body[CUSTOM_RESPONSE_BODY_SIZE]; +} CustomResponseData; + +typedef struct RedirectData { + unsigned char redirect_location[REDIRECT_RESPONSE_LOCATION_SIZE]; +} RedirectData; + +typedef struct WebResponseData { + NanoWebResponseType web_response_type; + unsigned char uuid[UUID_SIZE]; + DataBuffer data; +} WebResponseData; + +#ifdef __cplusplus +typedef enum class HttpMetaDataType +#else +typedef enum HttpMetaDataType +#endif +{ + HTTP_PROTOCOL_SIZE, + HTTP_PROTOCOL_DATA, + HTTP_METHOD_SIZE, + HTTP_METHOD_DATA, + HOST_NAME_SIZE, + HOST_NAME_DATA, + LISTENING_ADDR_SIZE, + LISTENING_ADDR_DATA, + LISTENING_PORT, + URI_SIZE, + URI_DATA, + CLIENT_ADDR_SIZE, + CLIENT_ADDR_DATA, + CLIENT_PORT, + PARSED_HOST_SIZE, + PARSED_HOST_DATA, + PARSED_URI_SIZE, + PARSED_URI_DATA, + WAF_TAG_SIZE, + WAF_TAG_DATA, + + META_DATA_COUNT +} HttpMetaDataType; + +#ifdef __cplusplus +typedef enum class HttpHeaderDataType +#else +typedef enum HttpHeaderDataType +#endif +{ + HEADER_KEY_SIZE, + HEADER_KEY_DATA, + HEADER_VAL_SIZE, + HEADER_VAL_DATA, + + HEADER_DATA_COUNT +} HttpHeaderDataType; + +/// @struct NanoHttpModificationList +/// @brief A node that holds all the information regarding modifications. +typedef struct NanoHttpModificationList { + struct NanoHttpModificationList *next; ///< Next node. + HttpInjectData modification; ///< Modification data. + char *modification_buffer; ///< Modification buffer used to store extra needed data. +} NanoHttpModificationList; + +/// @struct NanoHttpResponseData +/// Holds all the data for Compression in a session. +typedef struct { + + /// Original compression type, can hold the following values: + /// - #GZIP + /// - #ZLIB + CompressionType compression_type; + + /// Compression stream + CompressionStream *compression_stream; + + /// Decompression stream + CompressionStream *decompression_stream; +} NanoHttpResponseData; + +/// @struct HttpSessionData +/// @brief Holds all the session's information needed to communicate with the nano service. +/// @details Such as to save verdict and session ID between the request and the response +typedef struct HttpSessionData { + int was_request_fully_inspected; ///< Holds if the request fully inspected. + ServiceVerdict verdict; ///< Holds the session's verdict from the Nano Service. + uint32_t session_id; ///< Current session's Id. + unsigned int remaining_messages_to_reply; ///< Remaining messages left for the agent to respond to. + + NanoHttpResponseData response_data; ///< Holds session's response data. + + double req_proccesing_time; ///< Holds session's request processing time. + double res_proccesing_time; ///< Holds session's response processing time. + uint64_t processed_req_body_size; ///< Holds session's request body's size. + uint64_t processed_res_body_size; ///< Holds session's response body's size'. +} HttpSessionData; + +typedef struct HttpMetaData { + nano_str_t http_protocol; + nano_str_t method_name; + nano_str_t host; + nano_str_t listening_ip; + uint16_t listening_port; + nano_str_t uri; + nano_str_t client_ip; + uint16_t client_port; + nano_str_t parsed_host; + nano_str_t parsed_uri; +} HttpMetaData; + +typedef struct HttpHeaderData { + nano_str_t key; + nano_str_t value; +} HttpHeaderData; + +typedef struct HttpHeaders { + HttpHeaderData *data; + size_t headers_count; +} HttpHeaders; + +typedef struct HttpRequestFilterData { + HttpMetaData *meta_data; + HttpHeaders *req_headers; + bool contains_body; +} HttpRequestFilterData; + +typedef struct ResHttpHeaders { + HttpHeaders *headers; + uint16_t response_code; + uint64_t content_length; +} ResHttpHeaders; + +typedef struct NanoHttpBody { + nano_str_t *data; + size_t bodies_count; +} NanoHttpBody; + +typedef struct AttachmentData { + SessionID session_id; + HttpChunkType chunk_type; + HttpSessionData *session_data; + DataBuffer data; +} AttachmentData; + +typedef union __attribute__((__packed__)) HttpModifyData { + HttpInjectData inject_data[0]; + HttpWebResponseData web_response_data[0]; + HttpJsonResponseData json_response_data[0]; +} HttpModifyData; + +typedef struct __attribute__((__packed__)) HttpReplyFromService { + uint16_t verdict; + SessionID session_id; + uint8_t modification_count; + HttpModifyData modify_data[0]; +} HttpReplyFromService; + +typedef struct AttachmentVerdictResponse { + AttachmentVerdict verdict; + SessionID session_id; + WebResponseData *web_response_data; + NanoHttpModificationList *modifications; +} AttachmentVerdictResponse; + +typedef struct __attribute__((__packed__)) AttachmentRequest { + struct __attribute__((__packed__)) connection { + int sockaddr; + int local_sockaddr; + } connection; + + struct __attribute__((__packed__)) http_protocol { + int len; + int data; + } http_protocol; + + struct __attribute__((__packed__)) method { + int name; + int data; + } method; + + struct __attribute__((__packed__)) uri { + int len; + int data; + } uri; + + struct __attribute__((__packed__)) unparsed_uri { + int len; + int data; + } unparsed_uri; +} AttachmentRequest; + +typedef struct BlockPageData { + uint16_t response_code; + nano_str_t title_prefix; + nano_str_t title; + nano_str_t body_prefix; + nano_str_t body; + nano_str_t uuid_prefix; + nano_str_t uuid; + nano_str_t uuid_suffix; +} BlockPageData; + +typedef struct RedirectPageData { + nano_str_t redirect_location; +} RedirectPageData; + +typedef struct NanoResponseModifications { + NanoHttpModificationList *modifications; +} NanoResponseModifications; + +typedef struct __attribute__((__packed__)) NanoHttpRequestData { + uint16_t data_type; + uint32_t session_id; + unsigned char data[0]; +} NanoHttpRequestData; + +typedef struct __attribute__((__packed__)) NanoHttpMetricData { + uint16_t data_type; +#ifdef __cplusplus + uint64_t data[static_cast(AttachmentMetricType::METRIC_TYPES_COUNT)]; +#else + uint64_t data[METRIC_TYPES_COUNT]; +#endif +} NanoHttpMetricData; + +// Simple but reliable hash function for generating consistent, well-distributed offsets +// Uses a basic polynomial hash that avoids large intermediate values +static inline uint32_t hash_string(const char *str) { + uint32_t hash = 0; + while (*str) { + hash = (hash * 31 + (unsigned char)*str++) % 10000; // Keep values under 10000 + } + return hash; // Return bounded hash - modulo will be applied by caller +} + +static inline int set_affinity_by_uid(uint32_t uid) { + int num_cores = sysconf(_SC_NPROCESSORS_CONF); + // Debug print for troubleshooting + fprintf(stderr, "[DEBUG] set_affinity_by_uid: num_cores=%d, uid=%u\n", num_cores, uid); + uint32_t core_num = (uid - 1) % num_cores; // Ensure core_num is within bounds + cpu_set_t mask, mask_check; + CPU_ZERO(&mask); + CPU_ZERO(&mask_check); + CPU_SET(core_num, &mask); + pid_t pid = getpid(); // Use process PID, not thread ID + + if (sched_setaffinity(pid, sizeof(mask), &mask) != 0) { + return -1; // Error setting affinity + } + if (sched_getaffinity(pid, sizeof(mask_check), &mask_check) != 0) { + return -2; // Error getting affinity + } + // Compare mask and mask_check + int i; + for (i = 0; i < num_cores; ++i) { + if (CPU_ISSET(i, &mask) != CPU_ISSET(i, &mask_check)) { + return -3; // Affinity not set as expected + } + } + return 0; // Success +} + +static inline int set_affinity_by_uid_with_offset(uint32_t uid, uint32_t offset) { + int num_cores = sysconf(_SC_NPROCESSORS_CONF); + // Debug print for troubleshooting + fprintf( + stderr, "[DEBUG] set_affinity_by_uid_with_offset: num_cores=%d, uid=%u, offset=%u\n", num_cores, uid, offset); + // Prevent integer overflow by applying modulo to offset first + uint32_t safe_offset = offset % num_cores; + uint32_t core_num = ((uid - 1) + safe_offset) % num_cores; + cpu_set_t mask, mask_check; + CPU_ZERO(&mask); + CPU_ZERO(&mask_check); + CPU_SET(core_num, &mask); + pid_t pid = getpid(); // Use process PID, not thread ID + + if (sched_setaffinity(pid, sizeof(mask), &mask) != 0) { + return -1; // Error setting affinity + } + if (sched_getaffinity(pid, sizeof(mask_check), &mask_check) != 0) { + return -2; // Error getting affinity + } + // Compare mask and mask_check + int i; + for (i = 0; i < num_cores; ++i) { + if (CPU_ISSET(i, &mask) != CPU_ISSET(i, &mask_check)) { + return -3; // Affinity not set as expected + } + } + return 0; // Success +} + +static inline int set_affinity_by_uid_with_offset_fixed_cores(uint32_t uid, uint32_t offset, int num_cores) { + // Debug print for troubleshooting + fprintf( + stderr, + "[DEBUG] set_affinity_by_uid_with_offset_fixed_cores: num_cores=%d, uid=%u, offset=%u\n", + num_cores, + uid, + offset + ); + // Prevent integer overflow by applying modulo to offset first + + uint32_t safe_offset = offset % num_cores; + uint32_t core_num = ((uid - 1) + safe_offset) % num_cores; + cpu_set_t mask, mask_check; + CPU_ZERO(&mask); + CPU_ZERO(&mask_check); + CPU_SET(core_num, &mask); + pid_t pid = getpid(); // Use process PID, not thread ID + + if (sched_setaffinity(pid, sizeof(mask), &mask) != 0) { + return -1; // Error setting affinity + } + if (sched_getaffinity(pid, sizeof(mask_check), &mask_check) != 0) { + return -2; // Error getting affinity + } + // Compare mask and mask_check + int i; + for (i = 0; i < num_cores; ++i) { + if (CPU_ISSET(i, &mask) != CPU_ISSET(i, &mask_check)) { + return -3; // Affinity not set as expected + } + } + return 0; // Success +} + +static inline int set_affinity_to_core(int target_core) { + // Debug print for troubleshooting + fprintf(stderr, "[DEBUG] set_affinity_to_core: target_core=%d\n", target_core); + cpu_set_t mask, mask_check; + CPU_ZERO(&mask); + CPU_ZERO(&mask_check); + CPU_SET(target_core, &mask); + pid_t pid = getpid(); // Use process PID, not thread ID + + if (sched_setaffinity(pid, sizeof(mask), &mask) != 0) { + return -1; // Error setting affinity + } + if (sched_getaffinity(pid, sizeof(mask_check), &mask_check) != 0) { + return -2; // Error getting affinity + } + // Compare mask and mask_check + int num_cores = sysconf(_SC_NPROCESSORS_CONF); + int i; + for (i = 0; i < num_cores; ++i) { + if (CPU_ISSET(i, &mask) != CPU_ISSET(i, &mask_check)) { + return -3; // Affinity not set as expected + } + } + return 0; // Success +} + +static inline int reset_affinity() { + int num_cores = sysconf(_SC_NPROCESSORS_CONF); + // Debug print for troubleshooting + fprintf(stderr, "[DEBUG] reset_affinity: num_cores=%d\n", num_cores); + cpu_set_t mask; + CPU_ZERO(&mask); + int i; + for (i = 0; i < num_cores; ++i) CPU_SET(i, &mask); + pid_t pid = getpid(); // Use process PID, not thread ID + if (sched_setaffinity(pid, sizeof(mask), &mask) != 0) { + return -1; // Error setting affinity + } + return 0; // Success +} + +#endif // __NANO_ATTACHMENT_COMMON_H__ diff --git a/core/include/general/intell_registration_event.h b/core/include/general/intell_registration_event.h new file mode 100644 index 0000000..3ea0d69 --- /dev/null +++ b/core/include/general/intell_registration_event.h @@ -0,0 +1,28 @@ +#ifndef __INTELL_REGISTRATION_EVENT_H__ +#define __INTELL_REGISTRATION_EVENT_H__ + +#include "event.h" + +class IntelligenceRegistrationEvent : public Event +{ +public: + IntelligenceRegistrationEvent(bool registration_successful, std::string registration_response) + : + registration_successful(registration_successful), + registration_response(registration_response) + {} + + IntelligenceRegistrationEvent(bool registration_successful) + : + IntelligenceRegistrationEvent(registration_successful, "") + {} + + bool isRegistrationSuccessful() const { return registration_successful; } + std::string getRegistrationResponse() const { return registration_response; } + +private: + bool registration_successful; + std::string registration_response; +}; + +#endif // __INTELL_REGISTRATION_EVENT_H__ diff --git a/nodes/attachment_registration_manager/CMakeLists.txt b/nodes/attachment_registration_manager/CMakeLists.txt index 02db893..d93a6d8 100755 --- a/nodes/attachment_registration_manager/CMakeLists.txt +++ b/nodes/attachment_registration_manager/CMakeLists.txt @@ -14,6 +14,7 @@ target_link_libraries(attachment_registration_manager attachment_registrator http_transaction_data + ${Brotli_LIBRARIES} -Wl,--end-group ) diff --git a/nodes/http_transaction_handler/CMakeLists.txt b/nodes/http_transaction_handler/CMakeLists.txt index f8f8f88..77933b0 100755 --- a/nodes/http_transaction_handler/CMakeLists.txt +++ b/nodes/http_transaction_handler/CMakeLists.txt @@ -41,6 +41,7 @@ target_link_libraries(cp-nano-http-transaction-handler l7_access_control geo_location http_geo_filter + ${Brotli_LIBRARIES} -Wl,--end-group ) diff --git a/nodes/orchestration/CMakeLists.txt b/nodes/orchestration/CMakeLists.txt index 229208b..bc6f853 100755 --- a/nodes/orchestration/CMakeLists.txt +++ b/nodes/orchestration/CMakeLists.txt @@ -32,6 +32,7 @@ target_link_libraries( curl external_sdk_server service_health_status + ${Brotli_LIBRARIES} -Wl,--end-group ) diff --git a/nodes/prometheus/CMakeLists.txt b/nodes/prometheus/CMakeLists.txt index 7f0ca0e..33e637a 100755 --- a/nodes/prometheus/CMakeLists.txt +++ b/nodes/prometheus/CMakeLists.txt @@ -5,7 +5,6 @@ add_executable(prometheus main.cc) target_link_libraries(prometheus -Wl,--start-group ${COMMON_LIBRARIES} - generic_rulebase generic_rulebase_evaluators ip_utilities @@ -15,6 +14,9 @@ target_link_libraries(prometheus prometheus_comp http_transaction_data -Wl,--end-group + -Wl,--no-as-needed + ${Brotli_LIBRARIES} + -Wl,--as-needed ) add_dependencies(prometheus ngen_core) diff --git a/unit_test.cmake b/unit_test.cmake index 6eacca4..996a1bb 100644 --- a/unit_test.cmake +++ b/unit_test.cmake @@ -2,7 +2,7 @@ enable_testing() function(add_unit_test ut_name ut_sources use_libs) add_executable(${ut_name} ${ut_sources}) - target_link_libraries(${ut_name} -Wl,--start-group ${use_libs} version debug_is report cptest pthread packet singleton environment metric event_is buffers rest config compression_utils z ${GTEST_BOTH_LIBRARIES} gmock boost_regex pthread dl -Wl,--end-group) + target_link_libraries(${ut_name} -Wl,--start-group ${use_libs} version debug_is report cptest pthread packet singleton environment metric event_is buffers rest config compression_utils z ${GTEST_BOTH_LIBRARIES} gmock boost_regex pthread dl ${Brotli_LIBRARIES} -Wl,--end-group) add_test(NAME ${ut_name} COMMAND ${ut_name}