From e91c75f1396aa10f63695f84cc1021ddfae32e54 Mon Sep 17 00:00:00 2001 From: Yoan Picchi Date: Fri, 16 May 2025 12:26:34 +0000 Subject: [PATCH 1/4] Implement Direct API The API now provide searches for: - short literal (up to 8 char) - long literal - (long) literals set - single char - char set - single pair - pair set Signed-off-by: Yoan Picchi --- CMakeLists.txt | 2 + hs.def | 21 ++ hs_runtime.def | 9 +- src/dispatcher.c | 94 ++++++ src/hs_common.h | 85 ++++++ src/hs_compile.h | 271 +++++++++++++++++ src/hs_direct_search.cpp | 435 +++++++++++++++++++++++++++ src/hs_direct_search.h | 207 +++++++++++++ src/hs_direct_search_compile.cpp | 495 +++++++++++++++++++++++++++++++ src/hs_direct_search_types.h | 87 ++++++ src/hs_runtime.h | 221 ++++++++++++++ 11 files changed, 1926 insertions(+), 1 deletion(-) create mode 100644 src/hs_direct_search.cpp create mode 100644 src/hs_direct_search.h create mode 100644 src/hs_direct_search_compile.cpp create mode 100644 src/hs_direct_search_types.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cb77ffd..83aaaf25 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -304,6 +304,7 @@ set (hs_exec_SRCS src/crc32.h src/report.h src/runtime.c + src/hs_direct_search.cpp src/stream_compress.c src/stream_compress.h src/stream_compress_impl.h @@ -484,6 +485,7 @@ SET (hs_compile_SRCS src/hs.cpp src/hs_internal.h src/hs_version.h.in + src/hs_direct_search_compile.cpp src/scratch.h src/state.h src/ue2common.h diff --git a/hs.def b/hs.def index 28f7877c..753e99b5 100644 --- a/hs.def +++ b/hs.def @@ -41,3 +41,24 @@ EXPORTS hs_stream_size hs_valid_platform hs_version + hs_short_literal_search + hs_long_literal_search + hs_multi_literal_search + hs_single_char_search + hs_char_set_search + hs_single_char_pair_search + hs_char_pair_set_search + hs_compile_short_literal_search + hs_compile_long_literal_search + hs_compile_multi_literal_search + hs_compile_single_char_search + hs_compile_char_set_search + hs_compile_single_char_pair_search + hs_compile_char_pair_set_search + hs_free_short_literal_pattern + hs_free_long_literal_pattern + hs_free_multi_literal_pattern + hs_free_single_char_pattern + hs_free_char_set_pattern + hs_free_single_char_pair_pattern + hs_free_char_pair_set_pattern \ No newline at end of file diff --git a/hs_runtime.def b/hs_runtime.def index 6c434bed..33e17de3 100644 --- a/hs_runtime.def +++ b/hs_runtime.def @@ -33,4 +33,11 @@ EXPORTS hs_set_stream_allocator hs_stream_size hs_valid_platform - hs_version \ No newline at end of file + hs_version + hs_short_literal_search + hs_long_literal_search + hs_multi_literal_search + hs_single_char_search + hs_char_set_search + hs_single_char_pair_search + hs_char_pair_set_search \ No newline at end of file diff --git a/src/dispatcher.c b/src/dispatcher.c index e213bbe6..cc55d382 100644 --- a/src/dispatcher.c +++ b/src/dispatcher.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2016-2020, Intel Corporation * Copyright (c) 2024, VectorCamp PC + * Copyright (c) 2025, Arm ltd * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -352,6 +353,99 @@ CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_strea CONNECT_ARGS_3(hs_error_t, hs_reset_and_expand_stream, to_stream, buf, buf_size, scratch, onEvent, context); +/** DIRECT API **/ + +CREATE_DISPATCH(hs_error_t, hs_short_literal_search, + const hs_short_literal_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_1(hs_error_t, hs_short_literal_search, database, data, length, + onEvent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_short_literal_search, + const hs_short_literal_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_3(hs_error_t, hs_short_literal_search, database, data, length, + onEvent, context); + +CREATE_DISPATCH(hs_error_t, hs_long_literal_search, + const hs_long_literal_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_1(hs_error_t, hs_long_literal_search, database, data, length, + onEvent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_long_literal_search, + const hs_long_literal_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_3(hs_error_t, hs_long_literal_search, database, data, length, + onEvent, context); + +CREATE_DISPATCH(hs_error_t, hs_multi_literal_search, + const hs_multi_literal_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_1(hs_error_t, hs_multi_literal_search, database, data, length, + onEvent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_multi_literal_search, + const hs_multi_literal_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_3(hs_error_t, hs_multi_literal_search, database, data, length, + onEvent, context); + +CREATE_DISPATCH(hs_error_t, hs_single_char_search, + const hs_single_char_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_1(hs_error_t, hs_single_char_search, database, data, length, + onEvent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_single_char_search, + const hs_single_char_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_3(hs_error_t, hs_single_char_search, database, data, length, + onEvent, context); + +CREATE_DISPATCH(hs_error_t, hs_char_set_search, + const hs_char_set_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_1(hs_error_t, hs_char_set_search, database, data, length, onEvent, + context); +CONNECT_DISPATCH_2(hs_error_t, hs_char_set_search, + const hs_char_set_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_3(hs_error_t, hs_char_set_search, database, data, length, onEvent, + context); + +CREATE_DISPATCH(hs_error_t, hs_single_char_pair_search, + const hs_single_char_pair_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_1(hs_error_t, hs_single_char_pair_search, database, data, length, + onEvent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_single_char_pair_search, + const hs_single_char_pair_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_3(hs_error_t, hs_single_char_pair_search, database, data, length, + onEvent, context); + +CREATE_DISPATCH(hs_error_t, hs_char_pair_set_search, + const hs_char_pair_set_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_1(hs_error_t, hs_char_pair_set_search, database, data, length, + onEvent, context); +CONNECT_DISPATCH_2(hs_error_t, hs_char_pair_set_search, + const hs_char_pair_set_compiled_pattern_t *database, + const char *data, size_t length, match_event_handler onEvent, + void *context); +CONNECT_ARGS_3(hs_error_t, hs_char_pair_set_search, database, data, length, + onEvent, context); + /** INTERNALS **/ CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen); diff --git a/src/hs_common.h b/src/hs_common.h index 3078ad7b..b3af75f8 100644 --- a/src/hs_common.h +++ b/src/hs_common.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2019, Intel Corporation + * Copyright (c) 2024-2025, Arm ltd * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -585,6 +586,90 @@ hs_error_t HS_CDECL hs_valid_platform(void); /** @} */ +/** + * The following functions are part of the extended API. + * This extension offers direct access to search algorithms + * allowing the user to minimise calling overhead for simple + * search use cases where type of the search is known. + */ + +/** + * @defgroup DIRECT_API_COMMON + * + * @{ + */ + +/** + * The size threshold after which a pattern is considered long and must be fed + * to @ref hs_compile_long_literal_search(). Patterns up to this length may be + * fed to hs_compile_short_literal_search() instead. + */ +#define HS_SHORT_PATTERN_THRESHOLD 8 + +/** + * The compiled pattern type for searching for short literals + * + * Generated by @ref hs_compile_short_literal_search() and to be freed with @ref + * hs_free_short_literal_pattern + */ +typedef struct hs_short_literal_compiled_pattern + hs_short_literal_compiled_pattern_t; + +/** + * The compiled pattern type for searching for long literals + * + * Generated by @ref hs_compile_long_literal_search() and to be freed with @ref + * hs_free_long_literal_pattern + */ +typedef struct hs_long_literal_compiled_pattern + hs_long_literal_compiled_pattern_t; + +/** + * The compiled pattern type for searching for several long literal + * + * Generated by @ref hs_compile_multi_literal_search() and to be freed with @ref + * hs_free_multi_literal_pattern + */ +typedef struct hs_multi_literal_compiled_pattern + hs_multi_literal_compiled_pattern_t; + +/** + * The compiled pattern type for searching for a single character + * + * Generated by @ref hs_compile_single_char_search() and to be freed with @ref + * hs_free_single_char_pattern + */ +typedef struct hs_single_char_compiled_pattern + hs_single_char_compiled_pattern_t; + +/** + * The compiled pattern type for searching for a character set + * + * Generated by @ref hs_compile_char_set_search() and to be freed with @ref + * hs_free_char_set_pattern + */ +typedef struct hs_char_set_compiled_pattern hs_char_set_compiled_pattern_t; + +/** + * The compiled pattern type for searching for a character pair + * + * Generated by @ref hs_compile_char_pair_search() and to be freed with @ref + * hs_free_char_pair_pattern + */ +typedef struct hs_single_char_pair_compiled_pattern + hs_single_char_pair_compiled_pattern_t; + +/** + * The compiled pattern type for searching for a set of character pairs + * + * Generated by @ref hs_compile_char_pair_set_search() and to be freed with + * @ref hs_free_char_pair_set_pattern + */ +typedef struct hs_char_pair_set_compiled_pattern + hs_char_pair_set_compiled_pattern_t; + +/** @} */ + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/hs_compile.h b/src/hs_compile.h index 5aa24188..1e2946ed 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2021, Intel Corporation + * Copyright (c) 2024-2025, Arm ltd * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1211,6 +1212,276 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform); /** @} */ +/** + * The following functions are part of the extended API. + * This extension offers direct access to search algorithms + * allowing the user to minimise calling overhead for simple + * search use cases where type of the search is known. + * + * All search functions handle a limited type of pattern. + * For more generic patterns, use @ref hs_compile(). + * + * NOTE: All search functions are considered case-sensitive. + */ + +/** + * @defgroup DIRECT_API_COMPILE + * + * @{ + */ + +/** + * Compiles a short literal expression used in @ref hs_short_literal_search(). + * + * The expression must be at most @ref HS_SHORT_PATTERN_THRESHOLD characters + * long. For longer expressions, use @ref hs_compile_long_literal_search() and + * @ref hs_long_literal_search() instead. + * + * @param expression + * The expression to parse. Note that this string must represent ONLY the + * pattern to be matched, with no delimiters. Null characters are accepted + * as part of the expression. + * + * @param expression_length + * The length of the expression in bytes. Up to @ref + * HS_SHORT_PATTERN_THRESHOLD characters long. + * + * @param output_database + * Returns pointer to buffer containing @ref + * hs_short_literal_compiled_pattern_t. The buffer must be freed with + * @ref hs_free_short_literal_pattern. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR otherwise. + */ +hs_error_t HS_CDECL hs_compile_short_literal_search( + const char *expression, size_t expression_length, + hs_short_literal_compiled_pattern_t **output_database); + +/** + * Free a short literal pattern. + * + * @param database + * The @ref hs_short_literal_compiled_pattern_t pointer to be freed. + */ +void HS_CDECL +hs_free_short_literal_pattern(hs_short_literal_compiled_pattern_t *database); + +/** + * Compiles a literal expression used in @ref hs_long_literal_search(). + * + * There is no size limit. For expressions up to @ref + * HS_SHORT_PATTERN_THRESHOLD character long, @ref + * hs_compile_short_literal_search() and @ref hs_short_literal_search() might be + * faster + * + * @param expression + * The expression to parse. Note that this string must represent ONLY the + * pattern to be matched, with no delimiters. Null characters are accepted + * as part of the expression. + * + * @param expression_length + * The length of the expression in bytes. + * + * @param output_database + * Returns pointer to buffer containing @ref + * hs_long_literal_compiled_pattern_t. The buffer must be freed with + * @ref hs_free_long_literal_pattern. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR otherwise. + */ +hs_error_t HS_CDECL hs_compile_long_literal_search( + const char *expression, size_t expression_length, + hs_long_literal_compiled_pattern_t **output_database); + +/** + * Free a long literal pattern. + * + * @param database + * The @ref hs_long_literal_compiled_pattern_t pointer to be freed. + */ +void HS_CDECL +hs_free_long_literal_pattern(hs_long_literal_compiled_pattern_t *database); + +/** + * Compiles several literal expressions used in @ref hs_multi_literal_search(). + * + * There is no size limit. + * + * @param expression + * The array of expressions to parse. Note that the strings must represent + * ONLY the patterns to be matched, with no delimiters. Null characters are + * accepted as part of the expression. The expression id in + * @ref match_event_handler will match the order of the expression given + * here (ie: expression[0] will be id 0). + * + * @param pattern_count + * The number of expressions in the @p expression array. + * + * @param expression_length + * The array of length of each expression in the @p expression array. + * Expressed in bytes. + * + * @param output_database + * Returns pointer to buffer containing @ref + * hs_multi_literal_compiled_pattern_t. The buffer must be freed with + * @ref hs_free_multi_literal_pattern. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR otherwise. + */ +hs_error_t HS_CDECL hs_compile_multi_literal_search( + const char **expression, size_t pattern_count, + const size_t *expression_length, + hs_multi_literal_compiled_pattern_t **output_database); + +/** + * Free a multi literal pattern. + * + * @param database + * The @ref hs_multi_literal_compiled_pattern_t pointer to be freed. + */ +void HS_CDECL +hs_free_multi_literal_pattern(hs_multi_literal_compiled_pattern_t *database); + +/** + * Compiles a single character used in @ref hs_single_char_search(). + * + * @param character + * The single character to be searched. It is case sensitive. + * + * @param output_database + * Returns pointer to buffer containing @ref + * hs_single_char_compiled_pattern_t. The buffer must be freed with + * @ref hs_free_single_char_pattern. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR otherwise. + */ +hs_error_t HS_CDECL hs_compile_single_char_search( + const char character, hs_single_char_compiled_pattern_t **output_database); + +/** + * Free a single char pattern + * @param database + * The @ref hs_single_char_compiled_pattern_t pointer to be freed. + */ +void HS_CDECL +hs_free_single_char_pattern(hs_single_char_compiled_pattern_t *database); + +/** + * Compiles a set of characters used in @ref hs_char_set_search(). + * + * @param character_array + * The string or character array containing all the characters in the set. + * It is case sensitive. Null terminator is optional. + * + * @param character_count + * The number of characters in @p character_array + * + * @param output_database + * Returns pointer to buffer containing @ref + * hs_char_set_compiled_pattern_t. The buffer must be freed with + * @ref hs_free_char_set_pattern. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR otherwise. + */ +hs_error_t HS_CDECL hs_compile_char_set_search( + const char *character_array, size_t character_count, + hs_char_set_compiled_pattern_t **output_database); + +/** + * Free a multi char pattern. + * + * @param database + * The @ref hs_char_set_compiled_pattern_t pointer to be freed. + */ +void HS_CDECL +hs_free_char_set_pattern(hs_char_set_compiled_pattern_t *database); + +/** + * Compiles a pair of characters used in @ref hs_single_char_pair_search(). + * + * NOTE: The character order matters in the pair. "Aj" won't match "jA" + * + * @param pair + * The string or character array containing the pair. Null terminator is + * optional. + * + * @param output_database + * Returns pointer to buffer containing @ref + * hs_single_char_pair_compiled_pattern_t. The buffer must be freed with + * @ref hs_free_single_char_pair_pattern. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR otherwise. + */ +hs_error_t HS_CDECL hs_compile_single_char_pair_search( + const char *pair, hs_single_char_pair_compiled_pattern_t **output_database); + +/** + * Free a single char pair pattern. + * + * @param database + * The @ref hs_single_char_pair_compiled_pattern_t pointer to be freed. + */ +void HS_CDECL hs_free_single_char_pair_pattern( + hs_single_char_pair_compiled_pattern_t *database); + +/** + * Compiles severals pairs used in @ref hs_char_pair_set_search(). + * + * IMPORTANT: Compilation is only guaranteed for up to 8 pairs. If you search + * for more, internal compression may attempt to merge adjacent patterns + * (e.g., [ab, ac, ad] becomes a[bcd]) to reduce the total to 8 pairs. If the + * compression is insufficient, compilation will fail with + * @ref HS_COMPILER_ERROR. In such cases, use @ref multi_literal_search instead. + * The compression does not affect the match IDs returned by + * @ref hs_char_pair_set_search(). For example, a[bcd] will still report "ab" as + * ID 0, "ac" as ID 1, and "ad" as ID 2. + * + * NOTE: The character order matters in the pair. "Aj" won't match "jA" + * + * @param expression + * The concatenation of all pairs to be parsed. If one want to search for + * "ab" or "Cd", then @p expression would be ['a','b','C','d']. Null + * terminator is ignored, use @ref pair_count to set the length. + * + * @param pair_count + * The number of characters pair in @p expression + * + * @param output_database + * Returns pointer to buffer containing @ref + * hs_char_pair_set_compiled_pattern_t. The buffer must be freed with + * @ref hs_free_char_pair_set_pattern. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR otherwise. + */ +hs_error_t HS_CDECL hs_compile_char_pair_set_search( + const char *expression, size_t pair_count, + hs_char_pair_set_compiled_pattern_t **output_database); + +/** + * Free a multi char pairs pattern. + * + * @param database + * The @ref hs_char_pair_set_compiled_pattern_t pointer to be freed. + */ +void HS_CDECL +hs_free_char_pair_set_pattern(hs_char_pair_set_compiled_pattern_t *database); + +/** @} */ + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/hs_direct_search.cpp b/src/hs_direct_search.cpp new file mode 100644 index 00000000..c0c43397 --- /dev/null +++ b/src/hs_direct_search.cpp @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2024-2025, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + +#include "hs_common.h" +#include "hs_runtime.h" +#include "hs_direct_search.h" +#include "hs_direct_search_types.h" + +#include "scratch.h" +#include "util/arch.h" // CAN_USE_WIDE_TRUFFLE +#include "util/bitutils.h" // ctz64() +#include "util/simd_utils.h" // load128() +#include "util/supervector/supervector.hpp" + +#include "fdr/fdr.h" +#include "hwlm/noodle_engine.h" +#include "nfa/shufti.h" +#include "nfa/truffle.h" + +typedef typename SuperVector::comparemask_type vector_mask_type; + +static_assert((uint64_t)CB_CONTINUE_MATCHING == HWLM_CONTINUE_MATCHING, + "CB_CONTINUE_MATCHING doesn't match HWLM_CONTINUE_MATCHING"); +static_assert((uint64_t)CB_TERMINATE_MATCHING == HWLM_TERMINATE_MATCHING, + "CB_TERMINATE_MATCHING doesn't match HWLM_TERMINATE_MATCHING"); + +static inline hs_error_t hwlm_to_hs_error(const hwlm_error_t error) { + switch (error) { + case HWLM_SUCCESS: + return HS_SUCCESS; + case HWLM_TERMINATED: + return HS_SCAN_TERMINATED; + case HWLM_ERROR_UNKNOWN: + return HS_UNKNOWN_ERROR; + case HWLM_LITERAL_MAX_LEN: + return HS_COMPILER_ERROR; + default: + return HS_UNKNOWN_ERROR; + } +} + +// convert the callback type of Noodle +hwlmcb_rv_t HS_CDECL noodle_to_hs_callback(size_t end, u32 id, + struct hs_scratch *scratch) { + struct noodle_context *storage = reinterpret_cast( + scratch->core_info.userContext); + // hwlm's end is the last char of the pattern, but hs's end is the first + // char after the pattern + size_t match_start = end + 1 - storage->pattern_length; + return (hwlmcb_rv_t)(scratch->core_info.userCallback( + id, match_start, end + 1, 0, storage->usr_context)); +} + +// Receive the FDR callback and perform the check for longer patterns (>8 char) +hwlmcb_rv_t HS_CDECL FDR_to_hs_callback(size_t end, u32 id, + struct hs_scratch *scratch) { + const struct FDR_cb_context *combined_ctx = + reinterpret_cast( + scratch->core_info.userContext); + const FDR_pattern_storage *ps = combined_ctx->patterns; + size_t pattern_length = get_const_pattern_sizes(ps)[id]; + size_t start_offset = + end + 1 - std::min(pattern_length, (size_t)HWLM_LITERAL_MAX_LEN); + if (pattern_length > HWLM_LITERAL_MAX_LEN) { + // long pattern for FDR, we need to confirm it. + const char *pattern = get_const_pattern_ptrs(ps)[id]; + const char *buffer = combined_ctx->buffer; + size_t buffer_length = combined_ctx->buffer_length; + + if (start_offset + pattern_length > buffer_length) { + // pattern too long for the remaining buffer, no match + return HWLM_CONTINUE_MATCHING; + } + + const char *confirm_buffer_start = + buffer + start_offset + HWLM_LITERAL_MAX_LEN; + const char *confirm_pattern_start = pattern + HWLM_LITERAL_MAX_LEN; + size_t confirm_len = pattern_length - HWLM_LITERAL_MAX_LEN; + + if (confirm_len >= VECTORSIZE) { + while (confirm_len > VECTORSIZE) { + SuperVector buffer_vector = + SuperVector::loadu(confirm_buffer_start); + SuperVector pattern_vector = + SuperVector::loadu(confirm_pattern_start); + vector_mask_type mask = buffer_vector.eqmask(pattern_vector); + if(~mask) + // don't match the pattern, continue searching + return HWLM_CONTINUE_MATCHING; + confirm_buffer_start += VECTORSIZE; + confirm_pattern_start += VECTORSIZE; + confirm_len -= VECTORSIZE; + } + + // unaligned load: we cannot risk loading any extra byte, so we run + // the vector one last time with an offset to overlap the previous + // check, but avoid overflowing. + size_t overlap = VECTORSIZE - confirm_len; + SuperVector buffer_vector = + SuperVector::loadu(confirm_buffer_start - overlap); + SuperVector pattern_vector = + SuperVector::loadu(confirm_pattern_start - overlap); + vector_mask_type mask = buffer_vector.eqmask(pattern_vector); + if(~mask) + // don't match the pattern, continue searching + return HWLM_CONTINUE_MATCHING; + } else { + size_t confirm_64 = confirm_len / 8; + for (size_t i = 0; i < confirm_64; i++) { + if ((reinterpret_cast(confirm_buffer_start))[i] != + (reinterpret_cast(confirm_pattern_start))[i]) + // don't match the pattern, continue searching + return HWLM_CONTINUE_MATCHING; + } + confirm_len = confirm_len % 8; + + for (size_t i = 0; i < confirm_len; i++) { + if (confirm_buffer_start[i] != confirm_pattern_start[i]) + // don't match the pattern, continue searching + return HWLM_CONTINUE_MATCHING; + } + } + + // we have a valid match. Call the user callback + return (hwlmcb_rv_t)(scratch->core_info.userCallback( + id, start_offset, start_offset + pattern_length, 0, + combined_ctx->usr_context)); + } else { + // short pattern, no confirmation needed + return (hwlmcb_rv_t)(scratch->core_info.userCallback( + id, start_offset, end + 1, 0, combined_ctx->usr_context)); + } +} + + + + +// --- short_literal (Noodle) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL hs_short_literal_search( + const hs_short_literal_compiled_pattern *database, const char *data, + size_t length, match_event_handler onEvent, void *context) { + assert(onEvent != nullptr && + "hs_short_literal_search called with nullptr callback"); + assert(data != nullptr && + "hs_short_literal_search called with nullptr buffer"); + assert(database != nullptr && + "hs_short_literal_search called with nullptr database"); + struct noodle_context storage; + storage.usr_context = context; + storage.pattern_length = database->pattern_length; + struct hs_scratch scratch; + scratch.core_info.userContext = &storage; + scratch.core_info.userCallback = onEvent; + + hwlm_error_t error = noodExec(&(database->noodle_database), + reinterpret_cast(data), + length, 0, noodle_to_hs_callback, &scratch); + return hwlm_to_hs_error(error); +} + + + + + +// --- long_literal (FDR) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL hs_long_literal_search( + const hs_long_literal_compiled_pattern_t *database, const char *data, + size_t length, match_event_handler onEvent, + void *context) { + assert(onEvent != nullptr && + "hs_long_literal_search called with nullptr callback"); + assert(data != nullptr && + "hs_long_literal_search called with nullptr buffer"); + assert(database != nullptr && + "hs_long_literal_search called with nullptr database"); + + struct hs_scratch scratch; + struct FDR_cb_context combined_ctx = { + context, database->fdr_database.patterns, data, length}; + scratch.core_info.userContext = &combined_ctx; + scratch.core_info.userCallback = onEvent; + scratch.fdr_conf = nullptr; + hwlm_error_t error = + fdrExec(database->fdr_database.database, + reinterpret_cast(data), length, 0, + FDR_to_hs_callback, &scratch, HWLM_ALL_GROUPS); + return hwlm_to_hs_error(error); +} + + + + + + +// --- multi_literal (FDR) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL hs_multi_literal_search( + const hs_multi_literal_compiled_pattern_t *database, const char *data, + size_t length, match_event_handler onEvent, void *context) { + assert(onEvent != nullptr && + "hs_multi_literal_search called with nullptr callback"); + assert(data != nullptr && + "hs_multi_literal_search called with nullptr buffer"); + assert(database != nullptr && + "hs_multi_literal_search called with nullptr database"); + + struct hs_scratch scratch; + struct FDR_cb_context combined_ctx = { + context, database->fdr_database.patterns, data, length}; + scratch.core_info.userContext = &combined_ctx; + scratch.core_info.userCallback = onEvent; + scratch.fdr_conf = nullptr; + hwlm_error_t error = + fdrExec(database->fdr_database.database, + reinterpret_cast(data), length, 0, + FDR_to_hs_callback, &scratch, HWLM_ALL_GROUPS); + return hwlm_to_hs_error(error); +} + + + + + +// --- single_char (Noodle) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL hs_single_char_search( + const hs_single_char_compiled_pattern *database, const char *data, + size_t length, match_event_handler onEvent, void *context) { + assert(onEvent != nullptr && + "hs_single_char_search called with nullptr callback"); + assert(data != nullptr && + "hs_single_char_search called with nullptr buffer"); + assert(database != nullptr && + "hs_single_char_search called with nullptr database"); + struct noodle_context storage; + storage.usr_context = context; + storage.pattern_length = 1; + struct hs_scratch scratch; + scratch.core_info.userContext = &storage; + scratch.core_info.userCallback = onEvent; + + hwlm_error_t error = noodExec(&(database->noodle_database), + reinterpret_cast(data), + length, 0, noodle_to_hs_callback, &scratch); + return hwlm_to_hs_error(error); +} + + + + + +// --- char_set (Truffle) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL hs_char_set_search( + const hs_char_set_compiled_pattern *database, const char *data, + size_t length, match_event_handler onEvent, void *context) { + assert(onEvent != nullptr && + "hs_char_set_search called with nullptr callback"); + assert(data != nullptr && + "hs_char_set_search called with nullptr buffer"); + assert(database != nullptr && + "hs_char_set_search called with nullptr database"); + + const u8 *current_buf = reinterpret_cast(data); + // buf_end must be the first char past the buffer, so current_buf==buf_end + // means current_buf is empty. + const u8 *buf_end = reinterpret_cast(data) + length; + while(current_buf < buf_end) { + const u8 *current_match; +#ifdef CAN_USE_WIDE_TRUFFLE + current_match = truffleExecWide( + loadu256(database->wide_mask), current_buf, buf_end); +#else + current_match = truffleExec(load128(database->mask1), + load128(database->mask2), + current_buf, buf_end); +#endif + // current_match is the pointer to the matching char, NOT past the + // matching char. or buf_end if no match. + if(current_match < buf_end) { + size_t id = database->char_id_map[*current_match]; + size_t match_start = + current_match - reinterpret_cast(data); + if( ! onEvent(id, match_start, match_start + 1, 0, context)) { + // user requested to stop matching + break; + } + } + current_buf = current_match + 1; + } + + return HS_SUCCESS; +} + + + + + +// --- single_char_pair (Noodle) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL hs_single_char_pair_search( + const hs_single_char_pair_compiled_pattern *database, + const char *data, size_t length, match_event_handler onEvent, + void *context) { + assert(onEvent != nullptr && + "hs_single_char_pair_search called with nullptr callback"); + assert(data != nullptr && + "hs_single_char_pair_search called with nullptr buffer"); + assert(database != nullptr && + "hs_single_char_pair_search called with nullptr database"); + struct noodle_context storage; + storage.usr_context = context; + storage.pattern_length = 2; + struct hs_scratch scratch; + scratch.core_info.userContext = &storage; + scratch.core_info.userCallback = onEvent; + + hwlm_error_t error = noodExec(&(database->noodle_database), + reinterpret_cast(data), + length, 0, noodle_to_hs_callback, &scratch); + return hwlm_to_hs_error(error); +} + + + + + +// --- char_pair_set (Double shufti) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL hs_char_pair_set_search( + const hs_char_pair_set_compiled_pattern *database, const char *data, + size_t length, match_event_handler onEvent, void *context) { + assert(onEvent != nullptr && + "hs_char_pair_set_search called with nullptr callback"); + assert(data != nullptr && + "hs_char_pair_set_search called with nullptr buffer"); + assert(database != nullptr && + "hs_char_pair_set_search called with nullptr database"); + + const u8 *current_buf = reinterpret_cast(data); + // buf_end must be the first char past the buffer, so current_buf==buf_end + // means current_buf is empty. + const u8 *buf_end = reinterpret_cast(data) + length; + while(current_buf < buf_end) { + const u8 *current_match; + current_match = shuftiDoubleExec( + load128(database->dshufti_database.mask1), + load128(database->dshufti_database.mask2), + load128(database->dshufti_database.mask3), + load128(database->dshufti_database.mask4), current_buf, buf_end); + // current_match is the pointer to the matching char, NOT past the + // matching char. or buf_end if no match. + if (current_match < buf_end) { + // Shufti doesn't return which pair matched so we have to find out. + // Use a 16 bits vector search on the original pattern string, + // then return the /2 as ID. + SuperVector found_pair = SuperVector( + *reinterpret_cast(current_match)); + size_t width = SuperVector::mask_width(); + SuperVector all_pair; + vector_mask_type mask; + vector_mask_type merged_mask; + size_t loop = 0; + size_t vector_match_iterations_needed = + ((database->dshufti_database.pair_count - 1) / + (VECTORSIZE / 2)); + for (; loop <= vector_match_iterations_needed; loop++) { + all_pair = SuperVector::load( + database->dshufti_database.all_pairs + (VECTORSIZE * loop)); + // It is fine if the vector isn't filled as we are guaranteed to + // have a match before reaching the garbage data + mask = all_pair.eqmask(found_pair); + // now we have bit set to 1 when a char match. + // first we merge the lane result to keep only consecutive + // matches + merged_mask = mask & (mask >> width); + // Then we filter to keep only a single bit per lane, and only + // every other lane + merged_mask = + merged_mask & database->dshufti_database.bit_filter_mask; + if (merged_mask) + break; + } + // And finaly we can ctz to get the first pair that match + unsigned int id = + (ctz64(merged_mask) / width / 2) + (loop * (VECTORSIZE / 2)); + size_t match_start = current_match - reinterpret_cast(data); + if (!onEvent(id, match_start, match_start + 2, 0, context)) { + // user requested to stop matching + break; + } + } + current_buf = current_match + 1; + } + + return HS_SUCCESS; +} diff --git a/src/hs_direct_search.h b/src/hs_direct_search.h new file mode 100644 index 00000000..9d8398d4 --- /dev/null +++ b/src/hs_direct_search.h @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2024-2025, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DIRECT_SEARCH_H +#define DIRECT_SEARCH_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#include "allocator.h" + +#include "fdr/fdr_internal.h" +#include "util/arch.h" + +/* + * FDR_pattern_storage memory layout: + * + * |-------------------------------------------------| + * | size_t pattern_count | + * |------------------------|------------------------| + * | pattern_raw_storage : char* pattern_ptrs[] | + * | :------------------------| + * | : size_t pattern_sizes[] | + * | :------------------------| + * | : char actual_storage[] | + * |------------------------|------------------------| + * + * Use size_fdr_pattern() to get the size to allocate. + */ + +struct FDR_pattern_storage { + size_t pattern_count; + char pattern_raw_storage[]; +}; + +static inline char **get_pattern_ptrs(struct FDR_pattern_storage *pat) { + // cppcheck-suppress cstyleCast + return (char **)((char *)pat + + offsetof(struct FDR_pattern_storage, pattern_raw_storage)); +} + +static inline char *const * +get_const_pattern_ptrs(const struct FDR_pattern_storage *pat) { + // cppcheck-suppress cstyleCast + return (char *const *)((const char *)pat + + offsetof(struct FDR_pattern_storage, + pattern_raw_storage)); +} + +static inline size_t *get_pattern_sizes(struct FDR_pattern_storage *pat) { + // cppcheck-suppress cstyleCast + return (size_t *)((char *)get_pattern_ptrs(pat) + + pat->pattern_count * sizeof(char *)); +} + +static inline const size_t * +get_const_pattern_sizes(const struct FDR_pattern_storage *pat) { + // cppcheck-suppress cstyleCast + return (const size_t *)((const char *)get_const_pattern_ptrs(pat) + + pat->pattern_count * sizeof(char *)); +} + +static inline char * +get_pattern_string_storage(struct FDR_pattern_storage *pat) { + return (char *)get_pattern_sizes(pat) + pat->pattern_count * sizeof(size_t); +} + +static inline const char * +get_const_pattern_string_storage(const struct FDR_pattern_storage *pat) { + return (const char *)get_const_pattern_sizes(pat) + + pat->pattern_count * sizeof(size_t); +} + +static +void init_pattern_store(struct FDR_pattern_storage *storage, + const char **in_expression, size_t in_pattern_count, + const size_t *in_expression_length) { + storage->pattern_count = in_pattern_count; + memcpy(get_pattern_sizes(storage), in_expression_length, + storage->pattern_count); + char *next_string = get_pattern_string_storage(storage); + for (size_t i = 0; i < storage->pattern_count; i++) { + memcpy(next_string, in_expression[i], in_expression_length[i]); + get_pattern_ptrs(storage)[i] = next_string; + get_pattern_sizes(storage)[i] = in_expression_length[i]; + next_string += in_expression_length[i]; + } +} + +static inline +void init_pattern_store_single(struct FDR_pattern_storage *storage, + const char *in_expression, + const size_t in_expression_length) { + init_pattern_store(storage, &in_expression, 1, &in_expression_length); +} + +static +size_t size_fdr_pattern(size_t in_pattern_count, + const size_t *in_expression_length) { + size_t total_string_size = 0; + for (size_t i = 0; i < in_pattern_count; i++) { + total_string_size += in_expression_length[i]; + } + size_t ptr_array_size = in_pattern_count * sizeof(char *); + size_t pattern_sizes_array_size = in_pattern_count * sizeof(size_t); + size_t required_mem = sizeof(struct FDR_pattern_storage) + ptr_array_size + + pattern_sizes_array_size + total_string_size; + return required_mem; +} + +/* + * combined_fdr_database memory layout: + * + * |-------------------------------------------------| + * | FDR *database | + * |-------------------------------------------------| + * | FDR_pattern_storage *patterns | + * |------------------------|------------------------| + * | raw_storage : FDR fdr_storage | + * | :------------------------| + * | : FDR_pattern_storage | + * |------------------------|------------------------| + * + * Use size_fdr_database() to get the size to allocate. + */ +struct combined_fdr_database { + struct FDR *database; + struct FDR_pattern_storage *patterns; + unsigned char raw_storage[]; +}; + +void init_combined_fdr_database(struct combined_fdr_database *database, + size_t fdr_size, const char **in_expression, + size_t in_pattern_count, + const size_t *in_expression_length); + +void init_combined_fdr_database_single(struct combined_fdr_database *database, + size_t fdr_size, + const char *in_expression, + const size_t in_expression_length); +static inline +size_t size_fdr_database(size_t fdr_size, size_t in_pattern_count, + const size_t *in_expression_length) { + return sizeof(struct combined_fdr_database) + + size_fdr_pattern(in_pattern_count, in_expression_length) + fdr_size; +} + +static inline +size_t size_fdr_database_single(size_t fdr_size, + const size_t in_expression_length) { + return size_fdr_database(fdr_size, 1, &in_expression_length); +} + +hwlmcb_rv_t HS_CDECL noodle_to_hs_callback(size_t end, u32 id, + struct hs_scratch *scratch); + +// Receive the FDR callback and perform the check for longer patterns (>8 char) +hwlmcb_rv_t HS_CDECL FDR_to_hs_callback(size_t end, u32 id, + struct hs_scratch *scratch); + +struct FDR_cb_context { + void *usr_context; + const struct FDR_pattern_storage *patterns; + const char *buffer; + size_t buffer_length; +}; + +struct noodle_context { + void *usr_context; + u8 pattern_length; +}; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // DIRECT_SEARCH_H diff --git a/src/hs_direct_search_compile.cpp b/src/hs_direct_search_compile.cpp new file mode 100644 index 00000000..725b1dc7 --- /dev/null +++ b/src/hs_direct_search_compile.cpp @@ -0,0 +1,495 @@ +/* + * Copyright (c) 2024-2025, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + +#include "hs_common.h" +#include "hs_compile.h" +#include "hs_direct_search.h" +#include "hs_direct_search_types.h" + +#include "allocator.h" // hs_database_alloc() +#include "grey.h" +#include "hwlm/hwlm.h" // HWLM_LITERAL_MAX_LEN +#include "hwlm/hwlm_internal.h" // HWLM_ENGINE_FDR +#include "hwlm/hwlm_literal.h" // ue2::hwlmLiteral +#include "hwlm/noodle_internal.h" // noodTable +#include "ue2common.h" // likely() - unlikely() +#include "util/arch.h" // CAN_USE_WIDE_TRUFFLE +#include "util/bytecode_ptr.h" +#include "util/charreach.h" +#include "util/flat_containers.h" // flat_set +#include "util/supervector/supervector.hpp" +#include "util/target_info.h" // target_t + +#include "fdr/fdr_compile.h" +#include "hwlm/noodle_build.h" +#include "nfa/shufticompile.h" +#include "nfa/trufflecompile.h" + +typedef typename SuperVector::comparemask_type vector_mask_type; + +void init_combined_fdr_database(struct combined_fdr_database *database, + size_t fdr_size, const char **in_expression, + size_t in_pattern_count, + const size_t *in_expression_length) { + database->database = reinterpret_cast(database->raw_storage); + database->patterns = reinterpret_cast( + database->raw_storage + fdr_size); + init_pattern_store(database->patterns, in_expression, in_pattern_count, + in_expression_length); +}; + +void init_combined_fdr_database_single(struct combined_fdr_database *database, + size_t fdr_size, + const char *in_expression, + const size_t in_expression_length) { + database->database = reinterpret_cast(database->raw_storage); + database->patterns = reinterpret_cast( + database->raw_storage + fdr_size); + init_pattern_store_single(database->patterns, in_expression, + in_expression_length); +}; + +inline void generic_free(void *database) { + if (likely(database)) { + hs_database_free(database); + } +} + + + + +// --- short_literal (Noodle) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL hs_compile_short_literal_search( + const char *expression, size_t expression_length, + hs_short_literal_compiled_pattern **output_database) { + assert(expression_length > 0 && + "hs_compile_short_literal_search called with an empty pattern"); + assert(expression != nullptr && + "hs_compile_short_literal_search called with nullptr"); + assert(output_database != nullptr && + "hs_compile_short_literal_search called with nullptr"); + if (unlikely(expression_length > HS_SHORT_PATTERN_THRESHOLD)) { + return HS_INVALID; + } + /* + * Exposing caseness at the api level may restrict our ability to change + * the backing algorithm, so we decided to make all algo case sensitive + */ + bool is_case_insensitive = false; + bool only_need_first_match = false; + ue2::hwlmLiteral lit(std::string(expression, expression_length), + is_case_insensitive, only_need_first_match, 0, + HWLM_ALL_GROUPS, {}, {}); + + hs_short_literal_compiled_pattern *database = + reinterpret_cast(hs_database_alloc( + sizeof(hs_short_literal_compiled_pattern))); + if (unlikely(database == nullptr)) { + return HS_NOMEM; + } + ue2::bytecode_ptr bytecode_database = ue2::noodBuildTable(lit); + if (unlikely(bytecode_database.get() == nullptr)) { + return HS_UNKNOWN_ERROR; + } + database->pattern_length = expression_length; + memcpy(&(database->noodle_database), bytecode_database.get(), + sizeof(noodTable)); + *output_database = database; + + return HS_SUCCESS; +} + +HS_PUBLIC_API +void hs_free_short_literal_pattern( + hs_short_literal_compiled_pattern *database) { + generic_free(database); +} + + + + + +// --- long_literal (FDR) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL hs_compile_long_literal_search( + const char *expression, size_t expression_length, + hs_long_literal_compiled_pattern_t **output_database) { + assert(expression_length > 0 && + "hs_compile_long_literal_search called with an empty pattern"); + assert(expression != nullptr && + "hs_compile_long_literal_search called with nullptr"); + assert(output_database != nullptr && + "hs_compile_long_literal_search called with nullptr"); + /* + * Exposing caseness at the api level may restrict our ability to change + * the backing algorithm, so we decided to make all algo case sensitive + */ + bool is_case_insensitive = false; + bool only_need_first_match = false; + std::vector lits; + // longer strings are checked in the callback + ue2::hwlmLiteral lit( + std::string(expression, + std::min(expression_length, (size_t)HWLM_LITERAL_MAX_LEN)), + is_case_insensitive, only_need_first_match, 0, HWLM_ALL_GROUPS, {}, {}); + lits.push_back(lit); + + ue2::Grey g = ue2::Grey(); + u8 engType = HWLM_ENGINE_FDR; + bool make_small = false; + + hs_platform_info platform_info; + hs_populate_platform(&platform_info); + + ue2::target_t target = ue2::target_t(platform_info); + + std::unique_ptr proto = + ue2::fdrBuildProto(engType, lits, make_small, target, g); + + ue2::bytecode_ptr bytecode_database = ue2::fdrBuildTable(*proto, g); + if (unlikely(bytecode_database.get() == nullptr)) { + return HS_UNKNOWN_ERROR; + } + size_t fdr_size = bytecode_database.get()->size; + + size_t mem_required = size_fdr_database_single(fdr_size, expression_length); + struct combined_fdr_database *combined_database = + reinterpret_cast( + hs_database_alloc(mem_required)); + if (unlikely(combined_database == nullptr)) { + return HS_NOMEM; + } + init_combined_fdr_database_single(combined_database, fdr_size, expression, + expression_length); + memcpy(combined_database->database, bytecode_database.get(), fdr_size); + *output_database = reinterpret_cast( + combined_database); + + return HS_SUCCESS; +} + +HS_PUBLIC_API +void hs_free_long_literal_pattern( + hs_long_literal_compiled_pattern_t *database) { + generic_free(database); +} + + + + + + +// --- multi_literal (FDR) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL hs_compile_multi_literal_search( + const char **expression, size_t pattern_count, + const size_t *expression_length, + hs_multi_literal_compiled_pattern_t **output_database) { + assert(pattern_count > 0 && + "hs_compile_multi_literal_search called with no pattern"); + assert(expression != nullptr && + "hs_compile_multi_literal_search called with nullptr"); + assert(expression_length != nullptr && + "hs_compile_multi_literal_search called with nullptr"); + assert(output_database != nullptr && + "hs_compile_multi_literal_search called with nullptr"); + /* + * Exposing caseness at the api level may restrict our ability to change + * the backing algorithm, so we decided to make all algo case sensitive + */ + bool is_case_insensitive = false; + bool only_need_first_match = false; + std::vector lits; + for (size_t i = 0; i < pattern_count; i++) { + assert(expression_length[i] > 0 && expression[i] && + "hs_compile_multi_literal_search called with an empty pattern"); + // longer strings are checked in the callback + ue2::hwlmLiteral lit( + std::string(expression[i], std::min(expression_length[i], + (size_t)HWLM_LITERAL_MAX_LEN)), + is_case_insensitive, only_need_first_match, i, HWLM_ALL_GROUPS, {}, + {}); + lits.push_back(lit); + } + + ue2::Grey g = ue2::Grey(); + u8 engType = HWLM_ENGINE_FDR; + bool make_small = false; + + hs_platform_info platform_info; + hs_populate_platform(&platform_info); + + ue2::target_t target = ue2::target_t(platform_info); + + std::unique_ptr proto = + ue2::fdrBuildProto(engType, lits, make_small, target, g); + + ue2::bytecode_ptr bytecode_database = ue2::fdrBuildTable(*proto, g); + if (unlikely(bytecode_database.get() == nullptr)) { + return HS_UNKNOWN_ERROR; + } + size_t fdr_size = bytecode_database.get()->size; + + size_t mem_required = + size_fdr_database(fdr_size, pattern_count, expression_length); + struct combined_fdr_database *combined_database = + reinterpret_cast( + hs_database_alloc(mem_required)); + if (unlikely(combined_database == nullptr)) { + return HS_NOMEM; + } + init_combined_fdr_database(combined_database, fdr_size, expression, + pattern_count, expression_length); + memcpy(combined_database->database, bytecode_database.get(), fdr_size); + *output_database = reinterpret_cast( + combined_database); + + return HS_SUCCESS; +} + +HS_PUBLIC_API +void hs_free_multi_literal_pattern( + hs_multi_literal_compiled_pattern_t *database) { + generic_free(database); +} + + + + + +// --- single_char (Noodle) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL hs_compile_single_char_search( + const char character, hs_single_char_compiled_pattern **output_database) { + assert(output_database != nullptr && + "hs_compile_single_char_search called with nullptr"); + + /* + * Exposing caseness at the api level may restrict our ability to change + * the backing algorithm, so we decided to make all algo case sensitive + */ + bool is_case_insensitive = false; + bool only_need_first_match = false; + ue2::hwlmLiteral lit(std::string(&character, 1), is_case_insensitive, + only_need_first_match, 0, HWLM_ALL_GROUPS, {}, {}); + + hs_single_char_compiled_pattern *database = + reinterpret_cast(hs_database_alloc( + sizeof(hs_single_char_compiled_pattern))); + if (unlikely(database == nullptr)) { + return HS_NOMEM; + } + ue2::bytecode_ptr bytecode_database = ue2::noodBuildTable(lit); + if (unlikely(bytecode_database.get() == nullptr)) { + return HS_UNKNOWN_ERROR; + } + memcpy(&(database->noodle_database), bytecode_database.get(), + sizeof(noodTable)); + *output_database = database; + + return HS_SUCCESS; +} + +HS_PUBLIC_API +void hs_free_single_char_pattern( + hs_single_char_compiled_pattern *database) { + generic_free(database); +} + + + + + +// --- char_set (Truffle) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL +hs_compile_char_set_search(const char *character_array, size_t character_count, + hs_char_set_compiled_pattern **output_database) { + assert(character_count > 0 && + "hs_compile_char_set_search called with an empty set"); + assert(character_array != nullptr && + "hs_compile_char_set_search called with nullptr"); + assert(output_database != nullptr && + "hs_compile_char_set_search called with nullptr"); + + const ue2::CharReach cr = + ue2::CharReach(std::string(character_array, character_count)); + truffle_storage *database = reinterpret_cast( + hs_database_alloc(sizeof(truffle_storage))); + // hs_database_alloc is meant to align to a machine word (likely 64b), which + // is actually required here + assert((((intptr_t)(database) & 3) == 0) && + "user-provided alloc didn't meet alignment requirement in " + "hs_compile_char_set_search"); + for (u8 i = 0; i < character_count; i++) { + database->char_id_map[(u8)character_array[i]] = i; + } + +#ifdef CAN_USE_WIDE_TRUFFLE + ue2::truffleBuildMasksWide(cr, database->wide_mask); +#else + ue2::truffleBuildMasks(cr, database->mask1, + database->mask2); +#endif + + *output_database = database; + + return HS_SUCCESS; +} + +HS_PUBLIC_API +void hs_free_char_set_pattern(hs_char_set_compiled_pattern *database) { + generic_free(database); +} + + + + + +// --- single_char_pair (Noodle) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL hs_compile_single_char_pair_search( + const char *pair, hs_single_char_pair_compiled_pattern **output_database) { + assert(pair != nullptr && + "hs_compile_single_char_pair_search called with nullptr"); + assert(output_database != nullptr && + "hs_compile_single_char_pair_search called with nullptr"); + + /* + * Exposing caseness at the api level may restrict our ability to change + * the backing algorithm, so we decided to make all algo case sensitive + */ + bool is_case_insensitive = false; + bool only_need_first_match = false; + ue2::hwlmLiteral lit(std::string(pair, 2), is_case_insensitive, + only_need_first_match, 0, HWLM_ALL_GROUPS, {}, {}); + + hs_single_char_pair_compiled_pattern *database = + reinterpret_cast( + hs_database_alloc(sizeof(hs_single_char_pair_compiled_pattern))); + if (unlikely(database == nullptr)) { + return HS_NOMEM; + } + ue2::bytecode_ptr bytecode_database = ue2::noodBuildTable(lit); + if (unlikely(bytecode_database.get() == nullptr)) { + return HS_UNKNOWN_ERROR; + } + memcpy(&(database->noodle_database), bytecode_database.get(), + sizeof(noodTable)); + *output_database = database; + + return HS_SUCCESS; +} + +HS_PUBLIC_API +void hs_free_single_char_pair_pattern( + hs_single_char_pair_compiled_pattern *database) { + generic_free(database); +} + + + + + +// --- char_pair_set (Double shufti) --- + +HS_PUBLIC_API +hs_error_t HS_CDECL hs_compile_char_pair_set_search( + const char *expression, size_t pair_count, + hs_char_pair_set_compiled_pattern **output_database) { + assert(pair_count > 0 && + "hs_compile_char_pair_set_search called with an empty set"); + assert(expression != nullptr && + "hs_compile_char_pair_set_search called with nullptr"); + assert(output_database != nullptr && + "hs_compile_char_pair_set_search called with nullptr"); + + ue2::flat_set> pairs; + for (u8 i = 0; i < pair_count; i++) { + pairs.insert( + std::make_pair((u8)expression[2 * i], (u8)expression[2 * i + 1])); + } + + hs_char_pair_set_compiled_pattern *database = + reinterpret_cast(hs_database_alloc( + sizeof(hs_char_pair_set_compiled_pattern) + + sizeof(char) * 2 * pair_count)); + // hs_database_alloc is meant to align to a machine word (likely 64b), which + // is actually required here + assert((((intptr_t)(database) & 3) == 0) && + "user-provided alloc didn't meet alignment requirement in " + "hs_compile_char_pair_set_search"); + + bool success = ue2::shuftiBuildDoubleMasks( + ue2::CharReach(), pairs, database->dshufti_database.mask1, + database->dshufti_database.mask2, database->dshufti_database.mask3, + database->dshufti_database.mask4); + + if (!success) { + return HS_COMPILER_ERROR; + } + + database->dshufti_database.pair_count = pair_count; + + size_t width = SuperVector::mask_width(); + assert(width <= 4 && + "Code needs rework if supervector's mask are bigger than 4"); + assert(width != 3 && + "Code needs rework if supervector's mask aren't a power of 2"); + // we need a mask such that every 2*width bits, only the lsb is set to 1 + // so for a width of 4, we repeat 0X01 + unsigned char bit_filter_mask = 0; + for (size_t i = 8; i > 0; i -= 2 * width) { + bit_filter_mask = bit_filter_mask << (2 * width) | 0x1; + } + memset(&(database->dshufti_database.bit_filter_mask), bit_filter_mask, + sizeof(vector_mask_type)); + memcpy(database->dshufti_database.all_pairs, expression, 2 * pair_count); + + *output_database = database; + + return HS_SUCCESS; +} + +HS_PUBLIC_API +void hs_free_char_pair_set_pattern( + hs_char_pair_set_compiled_pattern *database) { + generic_free(database); +} + diff --git a/src/hs_direct_search_types.h b/src/hs_direct_search_types.h new file mode 100644 index 00000000..ba53e2b0 --- /dev/null +++ b/src/hs_direct_search_types.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2024-2025, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DIRECT_SEARCH_TYPES_H +#define DIRECT_SEARCH_TYPES_H + +#include + +#include "util/supervector/supervector.hpp" + +#include "fdr/fdr_internal.h" +#include "hwlm/noodle_internal.h" + + +struct hs_short_literal_compiled_pattern { + noodTable noodle_database; + u8 pattern_length; +}; + +struct hs_long_literal_compiled_pattern { + struct combined_fdr_database fdr_database; +}; + +struct hs_multi_literal_compiled_pattern { + struct combined_fdr_database fdr_database; +}; + +struct hs_single_char_compiled_pattern { + struct noodTable noodle_database; +}; + +struct hs_single_char_pair_compiled_pattern { + struct noodTable noodle_database; +}; + +typedef struct hs_char_set_compiled_pattern { + union + { + struct { + uint8_t mask1[16] __attribute__((aligned)); + uint8_t mask2[16] __attribute__((aligned)); + }; + uint8_t wide_mask[32] __attribute__((aligned)); + }; + // allows us to get the id from the character + u8 char_id_map[256]; +} truffle_storage; + +struct dshufti_storage { + alignas(16) uint8_t mask1[16]; + alignas(16) uint8_t mask2[16]; + alignas(16) uint8_t mask3[16]; + alignas(16) uint8_t mask4[16]; + size_t pair_count; + typename SuperVector::comparemask_type bit_filter_mask; + alignas(VECTORSIZE) uint8_t all_pairs[]; +}; + +struct hs_char_pair_set_compiled_pattern { + struct dshufti_storage dshufti_database; +}; +#endif // DIRECT_SEARCH_TYPES_H diff --git a/src/hs_runtime.h b/src/hs_runtime.h index 6d34b6c4..92770358 100644 --- a/src/hs_runtime.h +++ b/src/hs_runtime.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2024-2025, Arm ltd * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -614,6 +615,226 @@ hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch); */ #define HS_OFFSET_PAST_HORIZON (~0ULL) +/** @} */ + +/** + * The following functions are part of the extended API. + * This extension offers direct access to search algorithms + * allowing the user to minimise calling overhead for simple + * search use cases where type of the search is known. + * + * All search functions handle a limited kind of patterns. For more generic + * patterns, use @ref hs_scan() + * + * NOTE: All search functions are considered case-sensitive. + */ + +/** + * @defgroup DIRECT_API_RUNTIME + * + * @{ + */ + +/** Callback return value indicating that we should continue matching. */ +#define CB_CONTINUE_MATCHING (int)(~0U) + +/** Callback return value indicating that we should halt matching. */ +#define CB_TERMINATE_MATCHING (int)0 + +/** + * Search the given data for the short literal pattern up to + * @ref HS_SHORT_PATTERN_THRESHOLD chars long. For longer patterns, use @ref + * hs_long_literal_search(). Other options exists for character pairs or set. + * + * @param database + * The compiled pattern returned by @ref hs_compile_short_literal_search() + * @param data + * Pointer to the data to be scanned. + * @param length + * The number of bytes to scan. + * @param onEvent + * Pointer to a @ref match_event_handler callback function. If a NULL + * pointer is given, no matches will be returned. + * The "flag" argument is unused. + * @param context + * The user defined pointer which will be passed to the callback function. + * + * @return + * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the + * match callback indicated that scanning should stop; other values on + * error. + */ +hs_error_t HS_CDECL hs_short_literal_search( + const hs_short_literal_compiled_pattern_t *database, const char *data, + size_t length, match_event_handler onEvent, + void *context); + +/** + * Search the given data for the long literal pattern. + * + * If the pattern length is less or equal to @ref HS_SHORT_PATTERN_THRESHOLD, + * @ref hs_short_literal_search() may be faster. + * + * @param database + * The compiled pattern returned by @ref hs_compile_long_literal_search() + * @param data + * Pointer to the data to be scanned. + * @param length + * The number of bytes to scan. + * @param onEvent + * Pointer to a @ref match_event_handler callback function. If a NULL + * pointer is given, no matches will be returned. + * The "flag" argument is unused. + * @param context + * The user defined pointer which will be passed to the callback function. + * + * @return + * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the + * match callback indicated that scanning should stop; other values on + * error. + */ +hs_error_t HS_CDECL hs_long_literal_search( + const hs_long_literal_compiled_pattern_t *database, const char *data, + size_t length, match_event_handler onEvent, + void *context); + +/** + * Search the given data for several long literal patterns at once. + * + * @param database + * The compiled pattern returned by @ref hs_compile_multi_literal_search() + * @param data + * Pointer to the data to be scanned. + * @param length + * The number of bytes to scan. + * @param onEvent + * Pointer to a @ref match_event_handler callback function. If a NULL + * pointer is given, no matches will be returned. + * The "flag" argument is unused. + * The reported ID is the index of the matching literal. + * @param context + * The user defined pointer which will be passed to the callback function. + * + * @return + * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the + * match callback indicated that scanning should stop; other values on + * error. + */ +hs_error_t HS_CDECL hs_multi_literal_search( + const hs_multi_literal_compiled_pattern_t *database, const char *data, + size_t length, match_event_handler onEvent, + void *context); + +/** + * Search the given data for any occurrence of the given character. + * + * @param database + * The compiled pattern returned by @ref hs_compile_single_char_search() + * @param data + * Pointer to the data to be scanned. + * @param length + * The number of bytes to scan. + * @param onEvent + * Pointer to a @ref match_event_handler callback function. If a NULL + * pointer is given, no matches will be returned. + * The "flag" argument is unused. + * @param context + * The user defined pointer which will be passed to the callback function. + * + * @return + * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the + * match callback indicated that scanning should stop; other values on + * error. + */ +hs_error_t HS_CDECL hs_single_char_search( + const hs_single_char_compiled_pattern_t *database, const char *data, + size_t length, match_event_handler onEvent, + void *context); + +/** + * Search the given data for occurrences of any character from the given + * character set. + * + * @param database + * The compiled pattern returned by @ref hs_compile_char_set_search() + * @param data + * Pointer to the data to be scanned. + * @param length + * The number of bytes to scan. + * @param onEvent + * Pointer to a @ref match_event_handler callback function. If a NULL + * pointer is given, no matches will be returned. + * The "flag" argument is unused. + * The reported ID is the index of the matching char. + * @param context + * The user defined pointer which will be passed to the callback function. + * + * @return + * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the + * match callback indicated that scanning should stop; other values on + * error. + */ +hs_error_t HS_CDECL hs_char_set_search( + const hs_char_set_compiled_pattern_t *database, const char *data, + size_t length, match_event_handler onEvent, + void *context); + +/** + * Search the given data for occurrences of the given ordered character pair + * ("Aj" won't match "jA"). + * + * @param database + * The compiled pattern returned by @ref hs_compile_char_pair_search() + * @param data + * Pointer to the data to be scanned. + * @param length + * The number of bytes to scan. + * @param onEvent + * Pointer to a @ref match_event_handler callback function. If a NULL + * pointer is given, no matches will be returned. + * The "flag" argument is unused. + * @param context + * The user defined pointer which will be passed to the callback function. + * + * @return + * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the + * match callback indicated that scanning should stop; other values on + * error. + */ +hs_error_t HS_CDECL hs_single_char_pair_search( + const hs_single_char_pair_compiled_pattern_t *database, const char *data, + size_t length, match_event_handler onEvent, + void *context); + +/** + * Search the given data for occurrences of any of the ordered character pair + * from the given set ("Aj" won't match "jA") + * + * @param database + * The compiled pattern returned by @ref + * hs_compile_char_pair_set_search() + * @param data + * Pointer to the data to be scanned. + * @param length + * The number of bytes to scan. + * @param onEvent + * Pointer to a @ref match_event_handler callback function. If a NULL + * pointer is given, no matches will be returned. + * The "flag" argument is unused. + * The reported ID is the index of the matching pair. + * @param context + * The user defined pointer which will be passed to the callback function. + * + * @return + * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the + * match callback indicated that scanning should stop; other values on + * error. + */ +hs_error_t HS_CDECL hs_char_pair_set_search( + const hs_char_pair_set_compiled_pattern_t *database, const char *data, + size_t length, match_event_handler onEvent, + void *context); + #ifdef __cplusplus } /* extern "C" */ #endif From 5782780e11f932348558190537cfe36e53b1f81b Mon Sep 17 00:00:00 2001 From: Yoan Picchi Date: Fri, 16 May 2025 12:29:26 +0000 Subject: [PATCH 2/4] Add test suite for the Direct API Signed-off-by: Yoan Picchi --- unit/CMakeLists.txt | 15 + unit/direct_API/char_pair_set.cpp | 378 ++++++++++++++++++++ unit/direct_API/char_set.cpp | 333 +++++++++++++++++ unit/direct_API/common.h | 210 +++++++++++ unit/direct_API/long_literal.cpp | 394 ++++++++++++++++++++ unit/direct_API/main.cpp | 36 ++ unit/direct_API/multi_literal.cpp | 515 +++++++++++++++++++++++++++ unit/direct_API/short_literal.cpp | 377 ++++++++++++++++++++ unit/direct_API/single_char.cpp | 293 +++++++++++++++ unit/direct_API/single_char_pair.cpp | 303 ++++++++++++++++ 10 files changed, 2854 insertions(+) create mode 100644 unit/direct_API/char_pair_set.cpp create mode 100644 unit/direct_API/char_set.cpp create mode 100644 unit/direct_API/common.h create mode 100644 unit/direct_API/long_literal.cpp create mode 100644 unit/direct_API/main.cpp create mode 100644 unit/direct_API/multi_literal.cpp create mode 100644 unit/direct_API/short_literal.cpp create mode 100644 unit/direct_API/single_char.cpp create mode 100644 unit/direct_API/single_char_pair.cpp diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 7e16f333..2ad2ea83 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -63,6 +63,21 @@ set(unit_hyperscan_SOURCES add_executable(unit-hyperscan ${unit_hyperscan_SOURCES}) target_link_libraries(unit-hyperscan hs expressionutil) +set(unit_direct_api_SOURCES + ${gtest_SOURCES} + direct_API/char_pair_set.cpp + direct_API/char_set.cpp + direct_API/common.h + direct_API/long_literal.cpp + direct_API/main.cpp + direct_API/multi_literal.cpp + direct_API/short_literal.cpp + direct_API/single_char_pair.cpp + direct_API/single_char.cpp + ) +add_executable(unit-direct-API ${unit_direct_api_SOURCES}) +target_link_libraries(unit-direct-API hs) + if (NOT FAT_RUNTIME AND BUILD_STATIC_LIBS) set(BUILD_UNIT_INTERNAL TRUE) set(unit_internal_SOURCES diff --git a/unit/direct_API/char_pair_set.cpp b/unit/direct_API/char_pair_set.cpp new file mode 100644 index 00000000..f371d996 --- /dev/null +++ b/unit/direct_API/char_pair_set.cpp @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2024-2025, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "common.h" + +#include "hwlm/noodle_internal.h" + +#define COMPILE_CHAR_PAIR_SET(in_character_array, in_pair_count) \ + const size_t pair_count = (in_pair_count); \ + const char *character_array = (in_character_array); \ + hs_char_pair_set_compiled_pattern_t *database = nullptr; \ + hs_error_t compile_ret = hs_compile_char_pair_set_search( \ + character_array, pair_count, &database); \ + hs_error_t ret = 0; \ + (void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \ + const char *buffer = nullptr; \ + (void)buffer; \ + context_t context = {}; \ + (void) context; + +#define SEARCH_CHAR_PAIR_SET(in_buffer, in_buffer_len, in_expected_match, \ + in_expected_start_array, in_expected_id_array) \ + { \ + buffer = (in_buffer); \ + const size_t buffer_len = (in_buffer_len); \ + const size_t expected_match = (in_expected_match); \ + size_t expected_start_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_end_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_id_array[expected_match] = \ + BRACED_INIT_LIST in_expected_id_array; \ + for (size_t i = 0; i < expected_match; i++) { \ + expected_end_array[i] += 2; \ + } \ + context.expected_start_array = expected_start_array; \ + context.expected_end_array = expected_end_array; \ + context.expected_id_array = expected_id_array; \ + context.array_size = expected_match; \ + context.number_matched = 0; \ + context.number_wrong = 0; \ + \ + ret = hs_char_pair_set_search(database, buffer, buffer_len, callback, \ + &context); \ + } + +// ------------------------free tests------------------------------------------- + +/* +hs_free_char_pair_set_pattern + nullptr + general +*/ + +TEST(char_pair_set_free, nullptr) { + hs_char_pair_set_compiled_pattern_t *database = nullptr; + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_free, general) { + SETUP_MEM_LEAK_TEST(); + noodTable *clear_database = + reinterpret_cast(test_malloc(sizeof(noodTable))); + + hs_char_pair_set_compiled_pattern_t *database = + reinterpret_cast(clear_database); + + hs_free_char_pair_set_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +// ------------------------compile tests---------------------------------------- + +/* +hs_compile_char_pair_set_search + single pair + multiple pair + pair duplicate + valid pair including null char + + empty char array + nullptr char array + nullptr output +*/ + +TEST(char_pair_set_compile, single_pair) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1); + (void) ret; + (void) buffer; + EXPECT_COMPILE_SUCCESS("test_compile_char_pair_set_single_pair"); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_compile, two_pairs) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 2); + (void) ret; + (void) buffer; + EXPECT_COMPILE_SUCCESS("test_compile_char_pair_set_two_pairs"); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_compile, duplicate) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_AB_DUPLICATE, 2); + (void) ret; + (void) buffer; + EXPECT_COMPILE_SUCCESS("test_compile_char_pair_set_duplicate"); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_compile, null_char) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_A_NULL_BC, 1); + (void) ret; + (void) buffer; + EXPECT_COMPILE_SUCCESS("test_compile_char_pair_set_null_char"); + hs_free_char_pair_set_pattern(database); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(char_pair_set_compile, no_expression) { + const size_t pair_count = 0; + const char *character_array = PAIR_SET_ABCD; + hs_char_pair_set_compiled_pattern_t *database = nullptr; + EXPECT_DEATH(hs_compile_char_pair_set_search(character_array, pair_count, + &database), + "called with an empty set"); +} + +TEST(char_pair_set_compile, nullptr_char_array) { + hs_char_pair_set_compiled_pattern_t *database = nullptr; + EXPECT_DEATH( + hs_compile_char_pair_set_search(nullptr, 1, &database), + "called with nullptr"); +} + +TEST(char_pair_set_compile, nullptr_database) { + const size_t pair_count = 2; + const char *character_array = PAIR_SET_ABCD; + EXPECT_DEATH(hs_compile_char_pair_set_search(character_array, + pair_count, nullptr), + "called with nullptr"); +} + +#endif + +// ------------------------search tests----------------------------------------- + +/* +hs_char_pair_set_search + general pattern + match at start + match middle (general) + match index 15 (cross over vector) + match at end + match past end + match null char + bad caseness + search several times + match a pair duplicate + match several pattern in the same search + match when there's more pairs than fit in a vector + buffer containing null char + pattern with null char + general pattern + + buff size 0 + nullptr pattern + nullptr buffer + nullptr callback +*/ + +TEST(char_pair_set_search, start) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_start"); + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0), (0)); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_search, general) { + SETUP_MEM_LEAK_TEST(); + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_general"); + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0)); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +TEST(char_pair_set_search, cross_vector) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_cross_vector"); + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0)); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_search, end) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_end"); + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN, 1, (30), (0)); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_search, past_end) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_past_end"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 0, (), ()); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_search, null_char) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_A_NULL_BC, 1); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_null_char"); + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5), (0)); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_search, bad_case) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_bad_case"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, (), ()); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_search, several_search) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_several_search"); + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0)); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0)); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_search, duplicate) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_AB_DUPLICATE, 2); + ASSERT_COMPILE_SUCCESS("char_pair_set_search_duplicate"); + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0)); + EXPECT_SEARCH_SUCCESS("char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_search, match_multiple) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 2); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_match_multiple"); + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 2, (5, 7), (0, 1)); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_search, last_of_long_pattern) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_LONG_PATTERN_AB, 9); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_last_of_long_pattern"); + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (8)); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_search, null_char_buff_and_pattern) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_A_NULL_BC, 2); + ASSERT_COMPILE_SUCCESS( + "test_char_pair_set_search_null_char_buff_and_pattern"); + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5), (0)); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_search, null_char_buff) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 2); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_null_char_buff"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, (), ()); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); +} + +TEST(char_pair_set_search, empty_buff) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_empty_buff"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_CHAR_PAIR_SET("", 0, 0, (), ()); + EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer); + hs_free_char_pair_set_pattern(database); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(char_pair_set_search, nullptr_pattern) { + const hs_char_pair_set_compiled_pattern_t *database = nullptr; + context_t context; + EXPECT_DEATH( + { + const char *buffer; + hs_error_t ret; + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, (), ()); + }, + "called with nullptr database"); +} + +TEST(char_pair_set_search, nullptr_buffer) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_nullptr_buffer"); + EXPECT_DEATH( + { + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_CHAR_PAIR_SET(nullptr, EXPR_NOISE_LEN, 0, (), ()); + }, + "called with nullptr buffer"); +} + +TEST(char_pair_set_search, nullptr_callback) { + COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1); + ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_nullptr_callback"); + + buffer = EXPR_NOISE_5; + const size_t buffer_len = EXPR_NOISE_LEN; + const size_t expected_match = 1; + size_t expected_start_array[expected_match] = {5}; + size_t expected_end_array[expected_match] = {5}; + size_t expected_id_array[expected_match] = {0}; + for (size_t i = 0; i < expected_match; i++) { + expected_end_array[i] += 2; + } + context.expected_start_array = expected_start_array; + context.expected_end_array = expected_end_array; + context.expected_id_array = expected_id_array; + context.array_size = expected_match; + context.number_matched = 0; + context.number_wrong = 0; + + EXPECT_DEATH( + { + hs_char_pair_set_search(database, buffer, buffer_len, nullptr, + &context); + }, + "called with nullptr callback"); +} + +#endif diff --git a/unit/direct_API/char_set.cpp b/unit/direct_API/char_set.cpp new file mode 100644 index 00000000..33909776 --- /dev/null +++ b/unit/direct_API/char_set.cpp @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2024-2025, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "common.h" + +#define COMPILE_CHAR_SET(in_character_array, in_character_count) \ + const size_t character_count = (in_character_count); \ + const char *character_array = (in_character_array); \ + hs_char_set_compiled_pattern_t *database = nullptr; \ + hs_error_t compile_ret = hs_compile_char_set_search( \ + character_array, character_count, &database); \ + hs_error_t ret = 0; \ + (void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \ + const char *buffer = nullptr; \ + (void)buffer; \ + context_t context = {}; \ + (void) context; + +#define SEARCH_CHAR_SET(in_buffer, in_buffer_len, in_expected_match, \ + in_expected_start_array, in_expected_id_array) \ + { \ + buffer = (in_buffer); \ + const size_t buffer_len = (in_buffer_len); \ + const size_t expected_match = (in_expected_match); \ + size_t expected_start_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_end_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_id_array[expected_match] = \ + BRACED_INIT_LIST in_expected_id_array; \ + for (size_t i = 0; i < expected_match; i++) { \ + expected_end_array[i] += 1; \ + } \ + context.expected_start_array = expected_start_array; \ + context.expected_end_array = expected_end_array; \ + context.expected_id_array = expected_id_array; \ + context.array_size = expected_match; \ + context.number_matched = 0; \ + context.number_wrong = 0; \ + \ + ret = hs_char_set_search(database, buffer, buffer_len, callback, \ + &context); \ + } + +// ------------------------free tests------------------------------------------- + +/* +hs_free_char_set_pattern + nullptr + general +*/ + +TEST(char_set_free, nullptr) { + hs_char_set_compiled_pattern_t *database = nullptr; + hs_free_char_set_pattern(database); +} + +TEST(char_set_free, general) { + SETUP_MEM_LEAK_TEST(); + truffle_storage *clear_database = reinterpret_cast( + test_malloc(sizeof(truffle_storage))); + + hs_char_set_compiled_pattern_t *database = + reinterpret_cast(clear_database); + + hs_free_char_set_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +// ------------------------compile tests---------------------------------------- + +/* +hs_compile_char_set_search + single char + same char twice + general (several different chars) + null char + + empty char array + nullptr char array + nullptr output +*/ + +TEST(char_set_compile, single_char) { + COMPILE_CHAR_SET(CHAR_SET_A, 1); + EXPECT_COMPILE_SUCCESS("test_compile_char_set_single_char"); + hs_free_char_set_pattern(database); +} + +TEST(char_set_compile, single_char_twice) { + COMPILE_CHAR_SET(CHAR_SET_A, 2); + EXPECT_COMPILE_SUCCESS("test_compile_char_set_single_char_twice"); + hs_free_char_set_pattern(database); +} + +TEST(char_set_compile, general) { + COMPILE_CHAR_SET(CHAR_SET_ABCDE, 5); + EXPECT_COMPILE_SUCCESS("test_compile_char_set_general"); + hs_free_char_set_pattern(database); +} + +TEST(char_set_compile, null_char) { + COMPILE_CHAR_SET(CHAR_SET_NULL, 1); + EXPECT_COMPILE_SUCCESS("test_compile_char_set_null_char"); + hs_free_char_set_pattern(database); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(char_set_compile, no_expression) { + const size_t character_count = 0; + const char *character_array = CHAR_SET_AB; + hs_char_set_compiled_pattern_t *database = nullptr; + EXPECT_DEATH(hs_compile_char_set_search(character_array, character_count, + &database), + "called with an empty set"); +} + +TEST(char_set_compile, nullptr_char_array) { + hs_char_set_compiled_pattern_t *database = nullptr; + EXPECT_DEATH(hs_compile_char_set_search(nullptr, 1, &database), + "called with nullptr"); +} + +TEST(char_set_compile, nullptr_database) { + const size_t character_count = 2; + const char *character_array = CHAR_SET_AB; + EXPECT_DEATH(hs_compile_char_set_search(character_array, + character_count, nullptr), + "called with nullptr"); +} + +#endif + +// ------------------------search tests----------------------------------------- + +/* +hs_char_set_search + match at start + match middle (general) + match index 15 (last char of a vector) + match at end + match past end + match null char + bad caseness + search several times + match first char + match last char + buff size 0 + + nullptr pattern + nullptr buffer + nullptr callback +*/ + +TEST(char_set_search, start) { + COMPILE_CHAR_SET(CHAR_SET_AB, 2); + ASSERT_COMPILE_SUCCESS("test_char_set_search_start"); + SEARCH_CHAR_SET(EXPR_NOISE_0, EXPR_NOISE_LEN, 2, (0, 1), (0, 1)); + EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer); + hs_free_char_set_pattern(database); +} + +TEST(char_set_search, general) { + SETUP_MEM_LEAK_TEST(); + COMPILE_CHAR_SET(CHAR_SET_AB, 2); + ASSERT_COMPILE_SUCCESS("test_char_set_search_general"); + SEARCH_CHAR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 2, (5, 6), (0, 1)); + EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer); + hs_free_char_set_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +TEST(char_set_search, cross_vector) { + COMPILE_CHAR_SET(CHAR_SET_AB, 2); + ASSERT_COMPILE_SUCCESS("test_char_set_search_cross_vector"); + SEARCH_CHAR_SET(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 4, (5, 6, 15, 16), + (0, 1, 0, 1)); + EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer); + hs_free_char_set_pattern(database); +} + +TEST(char_set_search, end) { + COMPILE_CHAR_SET(CHAR_SET_AB, 2); + ASSERT_COMPILE_SUCCESS("test_char_set_search_end"); + SEARCH_CHAR_SET(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN, 2, (30, 31), (0, 1)); + EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer); + hs_free_char_set_pattern(database); +} + +TEST(char_set_search, past_end) { + COMPILE_CHAR_SET(CHAR_SET_AB, 2); + ASSERT_COMPILE_SUCCESS("test_char_set_search_past_end"); + SEARCH_CHAR_SET(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 1, (30), (0)); + EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer); + hs_free_char_set_pattern(database); +} + +TEST(char_set_search, null_char) { + COMPILE_CHAR_SET(CHAR_SET_NULL, 1); + ASSERT_COMPILE_SUCCESS("test_char_set_search_null_char"); + SEARCH_CHAR_SET(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (6), (0)); + EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer); + hs_free_char_set_pattern(database); +} + +TEST(char_set_search, bad_case) { + COMPILE_CHAR_SET(CHAR_SET_AB, 2); + ASSERT_COMPILE_SUCCESS("test_char_set_search_bad_case"); + SEARCH_CHAR_SET(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 1, (16), (1)); + EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer); + hs_free_char_set_pattern(database); +} + +TEST(char_set_search, several_search) { + COMPILE_CHAR_SET(CHAR_SET_AB, 2); + ASSERT_COMPILE_SUCCESS("test_char_set_search_several_search"); + SEARCH_CHAR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 2, (5, 6), (0, 1)); + EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer); + SEARCH_CHAR_SET(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 4, (5, 6, 15, 16), + (0, 1, 0, 1)); + EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer); + hs_free_char_set_pattern(database); +} + +TEST(char_set_search, first_char) { + COMPILE_CHAR_SET(CHAR_SET_AB, 2); + ASSERT_COMPILE_SUCCESS("test_char_set_search_first_char"); + SEARCH_CHAR_SET(EXPR_UNIFORM_1_A, EXPR_UNIFORM_LEN, 1, (5), (0)); + EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array[0], buffer); + hs_free_char_set_pattern(database); +} + +TEST(char_set_search, last_char) { + COMPILE_CHAR_SET(CHAR_SET_AB, 2); + ASSERT_COMPILE_SUCCESS("test_char_set_search_last_char"); + SEARCH_CHAR_SET(EXPR_UNIFORM_1_B, EXPR_UNIFORM_LEN, 1, (5), (1)); + EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array[1], buffer); + hs_free_char_set_pattern(database); +} + +TEST(char_set_search, empty_buff) { + COMPILE_CHAR_SET(CHAR_SET_AB, 2); + ASSERT_COMPILE_SUCCESS("test_char_set_search_empty_buff"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_CHAR_SET("", 0, 0, (), ()); + EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer); + hs_free_char_set_pattern(database); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(char_set_search, nullptr_pattern) { + const hs_char_set_compiled_pattern_t *database = nullptr; + context_t context; + EXPECT_DEATH( + { + const char *buffer; + hs_error_t ret; + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_CHAR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, (), ()); + }, + "called with nullptr database"); +} + +TEST(char_set_search, nullptr_buffer) { + COMPILE_CHAR_SET(CHAR_SET_AB, 2); + ASSERT_COMPILE_SUCCESS("test_char_set_search_nullptr_buffer"); + EXPECT_DEATH( + { + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_CHAR_SET(nullptr, EXPR_NOISE_LEN, 0, (), ()); + }, + "called with nullptr buffer"); +} + +TEST(char_set_search, nullptr_callback) { + COMPILE_CHAR_SET(CHAR_SET_AB, 2); + ASSERT_COMPILE_SUCCESS("test_char_set_search_nullptr_callback"); + + buffer = EXPR_NOISE_5; + const size_t buffer_len = EXPR_NOISE_LEN; + const size_t expected_match = 2; + size_t expected_start_array[expected_match] = {5, 6}; + size_t expected_end_array[expected_match] = {5, 6}; + for (size_t i = 0; i < expected_match; i++) { + expected_end_array[i] += 1; + } + context.expected_start_array = expected_start_array; + context.expected_end_array = expected_end_array; + context.array_size = expected_match; + context.number_matched = 0; + context.number_wrong = 0; + + EXPECT_DEATH( + { + hs_char_set_search(database, buffer, buffer_len, nullptr, &context); + }, + "called with nullptr callback"); +} + +#endif diff --git a/unit/direct_API/common.h b/unit/direct_API/common.h new file mode 100644 index 00000000..9aaaa355 --- /dev/null +++ b/unit/direct_API/common.h @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2024-2025, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef COMMON_H +#define COMMON_H + +#include +#include + +#include "hs_common.h" +#include "hs_compile.h" +#include "hs_runtime.h" +#include "hs_direct_search.h" +#include "hs_direct_search_types.h" + +#include "gtest/gtest.h" + +// ----------------------------------------------------------------------------- + +#define PATTERN_0_CHAR "" +#define PATTERN_1_CHAR "a" +#define PATTERN_1_CHAR_NULL "\0" +#define PATTERN_2_CHAR "aB" +#define PATTERN_2_WITH_NULL "a\0" +#define PATTERN_3_CHAR "aBc" +#define PATTERN_5_CHAR "aBcde" +#define PATTERN_5_WITH_NULL "a\0Bcd" +#define PATTERN_8_CHAR "aBcdeoAb" +#define PATTERN_10_CHAR "aBcdeoAbCD" +#define PATTERN_25_CHAR "aBcdeoAbCDumefnvqmuz,crhUq" + +#define CHAR_SET_NULL "\0" +#define CHAR_SET_A "aaAA" +#define CHAR_SET_AB "aB" +#define CHAR_SET_ABCDE "aBcde" + +#define PAIR_SET_ABCD "aBcd" +#define PAIR_SET_A_NULL_BC "a\0Bc" +#define PAIR_SET_AB_DUPLICATE "aBaB" +#define PAIR_SET_LONG_PATTERN_AB "u0u1u2u3u4u5u6u7aB" + +#define PATTERN_ARRAY_CONTAIN_EMPTY_0 {""} +#define PATTERN_ARRAY_SINGLE_CHAR_PAT_1 {"a"} +#define PATTERN_ARRAY_SINGLE_PAT_5 {"aBcde"} +#define PATTERN_ARRAY_GENERAL_5_5 {"aBcde","fghij"} +#define PATTERN_ARRAY_GENERAL_5_DUPLICATE {"aBcde","aBcde"} +#define PATTERN_ARRAY_LONG_10_10 {"aBcdeoAbCD","muz,crhUqu"} +#define PATTERN_ARRAY_CONTAIN_NULLPTR_5_0 {"aBcde",nullptr} +#define PATTERN_ARRAY_CONTAIN_EMPTY_0 {""} +#define PATTERN_ARRAY_WITH_NULL_5_5 {"a\0Bcd","aBcde"} +#define PATTERN_ARRAY_OVERLAP_5_8 {"aBcde","cdeoAbCD"} +#define PATTERN_ARRAY_NULLPTR ((char**)nullptr) + +// ----------------------------------------------------------------------------- + +#define EXPR_NOISE_LEN 32 +#define EXPR_NOISE "zmeh vnMezr,xbzumefnvqmuz,crhUqu" +#define EXPR_NOISE_0 "aBcdeoAbCDr,xbzumefnvqmuz,crhUqu" +#define EXPR_NOISE_5 "zmeh aBcdeoAbCDumefnvqmuz,crhUqu" +#define EXPR_NOISE_5_NULL "zmeh a\0Bcdr,xbzumefnvqmuz,crhUqu" +#define EXPR_NOISE_5_15 "zmeh aBcdeoAbCDaBcdeoAbCD,crhUqu" +#define EXPR_NOISE_5_15_BAD_CASE "zmeh AbcdeoAbCDABcdeoAbCD,crhUqu" +#define EXPR_NOISE_MIX "zmeh fgcder,xbzumefnvqmuz,crhUqu" +#define EXPR_NOISE_PAT2_5 "zmeh fghijr,xbzumefnvqmuz,crhUqu" +#define EXPR_NOISE_DUO_5_15 "zmeh aBcdeoAbCDfghijvqmuz,crhUqu" +#define EXPR_NOISE_SHORT_ONLY_5 "zmeh aBcdeoAbHHumefnvqmuz,crhUqu" +#define EXPR_NOISE_5_AB "zmeh aBMezr,xbzumefnvqmuz,crhUqu" + +#define EXPR_NOISE_A_END_31 "zmeh vnMezr,xbzumefnvqmuz,crhUqa" +#define EXPR_NOISE_AB_END_30 "zmeh vnMezr,xbzumefnvqmuz,crhUaB" +#define EXPR_NOISE_ABCDE_END_27 "zmeh vnMezr,xbzumefnvqmuz,caBcde" +#define EXPR_NOISE_ABCDEOABCD_END_22 "zmeh vnMezr,xbzumefnvqaBcdeoAbCD" + +#define EXPR_UNIFORM_LEN 32 +#define EXPR_UNIFORM "uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu" +#define EXPR_UNIFORM_1_A "uuuuuauuuuuuuuuuuuuuuuuuuuuuuuuu" +#define EXPR_UNIFORM_1_B "uuuuuBuuuuuuuuuuuuuuuuuuuuuuuuuu" + +// ----------------------------------------------------------------------------- + +#define BRACED_INIT_LIST(...) {__VA_ARGS__} + +#define EXPECT_COMPILE_SUCCESS(func_name) \ + EXPECT_EQ(compile_ret, HS_SUCCESS) \ + << "Fail to build the pattern in " << (func_name) << "\n"; \ + EXPECT_NE(database, nullptr) \ + << "Compilation returned nullptr database " << (func_name) << "\n"; + +#define EXPECT_COMPILE_FAILURE(func_name) \ + EXPECT_NE(compile_ret, HS_SUCCESS) \ + << "Pattern built fine when error was expected in " << (func_name) \ + << "\n"; + +#define ASSERT_COMPILE_SUCCESS(func_name) \ + ASSERT_EQ(compile_ret, HS_SUCCESS) \ + << "Fail to build the pattern in " << (func_name) << "\n"; \ + ASSERT_NE(database, nullptr) \ + << "Compilation returned nullptr database " << (func_name) << "\n"; + +#define ASSERT_COMPILE_FAILURE(func_name) \ + ASSERT_NE(compile_ret, HS_SUCCESS) \ + << "Pattern built fine when error was expected in " << (func_name) \ + << "\n"; + +#define EXPECT_SEARCH_SUCCESS(search_func_name, pattern, buffer) \ + EXPECT_EQ(HS_SUCCESS, ret) \ + << (search_func_name) << ", pattern: " << (pattern) << ", buffer: \"" \ + << (buffer) << "\"\n Search failed"; \ + EXPECT_EQ(context.array_size, context.number_matched) \ + << (search_func_name) << ", pattern: " << (pattern) << ", buffer: \"" \ + << (buffer) << "\"\n Missed some matches.\n"; \ + EXPECT_LE(0, context.number_wrong) \ + << (search_func_name) << ", pattern: " << (pattern) << ", buffer: \"" \ + << (buffer) << "\"\n Unexpected matches.\n"; + +// ----------------------------------------------------------------------------- + +typedef struct callback_context { + /* array of indices in the string where we expect match to start*/ + size_t *expected_start_array; + /* array of indices in the string where we expect match to end*/ + size_t *expected_end_array; + /* array of pattern ID we expect match to be reported, in order */ + size_t *expected_id_array; + size_t array_size; + /* counter of matches happening at a position in expected_array */ + size_t number_matched; + /* counter of matches happening at a position NOT in expected_array */ + size_t number_wrong; +} context_t; + +static +int callback(unsigned int id, unsigned long long start, + unsigned long long end_offset, unsigned int flags, + void *raw_context) { + (void)flags; + context_t *context = reinterpret_cast(raw_context); + bool matched = false; + // Check if the match is expected + for (size_t i = 0; i < context->array_size; i++) { + if (end_offset == context->expected_end_array[i] && + start == context->expected_start_array[i] && + id == context->expected_id_array[i]) { + matched = true; + } + } + // Tally the right counter whether the match was expected or not + if (matched) { + context->number_matched += 1; + // printf("match at index %llu\n", end_offset); + } else { + context->number_wrong += 1; + // printf("unplanned match at index %llu\n", end_offset); + } + return CB_CONTINUE_MATCHING; +} + +static std::unordered_set alloced_mem; + +static void* test_malloc(size_t size) { + void * mem = malloc(size); + alloced_mem.insert(mem); + return mem; +} + +static void test_free(void *ptr) { + size_t erased_count = alloced_mem.erase(ptr); + if(erased_count == 1) { + free(ptr); + } else { + printf("all currently allocated memory:\n"); + for (const void *elem : alloced_mem) + printf("%p ", elem); + printf("\nTrying to free: %p\n", ptr); + FAIL(); + } +} + +#define SETUP_MEM_LEAK_TEST() hs_set_allocator(test_malloc, test_free); +#define UNSET_MEM_LEAK_TEST() hs_set_allocator(nullptr, nullptr); +#define EXPECT_MEMORY_CLEAN() \ + EXPECT_TRUE(alloced_mem.empty()); \ + alloced_mem.clear(); + +#endif // COMMON_H diff --git a/unit/direct_API/long_literal.cpp b/unit/direct_API/long_literal.cpp new file mode 100644 index 00000000..31bace2f --- /dev/null +++ b/unit/direct_API/long_literal.cpp @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2024-2025, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "common.h" + +#include "fdr/fdr_internal.h" + +#define COMPILE_LONG_LITERAL(in_pattern, in_pattern_len) \ + size_t pattern_len = (in_pattern_len); \ + const char *pattern = (in_pattern); \ + hs_long_literal_compiled_pattern_t *database = nullptr; \ + hs_error_t compile_ret = \ + hs_compile_long_literal_search(pattern, pattern_len, &database); \ + hs_error_t ret = 0; \ + (void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \ + const char *buffer = nullptr; \ + (void)buffer; \ + context_t context = {}; \ + (void) context; + +// expected match array here is the index of the start of match. +#define SEARCH_LONG_LITERAL(in_buffer, in_buffer_len, in_expected_match, \ + in_expected_start_array) \ + { \ + buffer = (in_buffer); \ + const size_t buffer_len = (in_buffer_len); \ + const size_t expected_match = (in_expected_match); \ + size_t expected_start_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_end_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_id_array[expected_match]; \ + for (size_t i = 0; i < expected_match; i++) { \ + expected_end_array[i] += pattern_len; \ + expected_id_array[i] = 0; \ + } \ + context.expected_start_array = expected_start_array; \ + context.expected_end_array = expected_end_array; \ + context.expected_id_array = expected_id_array; \ + context.array_size = expected_match; \ + context.number_matched = 0; \ + context.number_wrong = 0; \ + \ + ret = hs_long_literal_search(database, buffer, buffer_len, callback, \ + &context); \ + } + +static_assert(HS_SHORT_PATTERN_THRESHOLD == 8, + "changing the threshold for short/long literal require changing " + "the tests to still test the threshold behavior"); + +// ------------------------free tests------------------------------------------- + +/* +hs_free_long_literal_pattern + nullptr + general +*/ + +TEST(long_literal_free, nullptr) { + hs_long_literal_compiled_pattern_t *database = nullptr; + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_free, general) { + SETUP_MEM_LEAK_TEST(); + combined_fdr_database *clear_database = + reinterpret_cast( + test_malloc(sizeof(combined_fdr_database))); + + hs_long_literal_compiled_pattern_t *database = + reinterpret_cast(clear_database); + + hs_free_long_literal_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +// ------------------------compile tests---------------------------------------- + +/* +hs_compile_long_literal_search + <=8 char + general (>8 char) + valid pattern including null char + + empty expression + nullptr expression + nullptr output +*/ + +TEST(long_literal_compile, short) { + COMPILE_LONG_LITERAL(PATTERN_5_CHAR, 5); + hs_free_long_literal_pattern(database); + EXPECT_COMPILE_SUCCESS("test_compile_long_literal_general"); +} + +TEST(long_literal_compile, general) { + COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10); + hs_free_long_literal_pattern(database); + EXPECT_COMPILE_SUCCESS("test_compile_long_literal_general"); +} + +TEST(long_literal_compile, null_char) { + COMPILE_LONG_LITERAL(PATTERN_5_WITH_NULL, 5); + hs_free_long_literal_pattern(database); + EXPECT_COMPILE_SUCCESS("test_compile_long_literal_null_char"); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(long_literal_compile, empty_pattern) { + hs_long_literal_compiled_pattern_t *database = nullptr; + EXPECT_DEATH( + hs_compile_long_literal_search(PATTERN_0_CHAR, 0, &database), + "called with an empty pattern"); +} + +TEST(long_literal_compile, nullptr_pattern) { + hs_long_literal_compiled_pattern_t *database = nullptr; + EXPECT_DEATH(hs_compile_long_literal_search(nullptr, 5, &database), + "called with nullptr"); +} + +TEST(long_literal_compile, nullptr_database) { + EXPECT_DEATH(hs_compile_long_literal_search(PATTERN_5_CHAR, 5, nullptr), + "called with nullptr"); +} + +#endif + +// ------------------------search tests----------------------------------------- + +/* +hs_long_literal_search + short pattern + positive match + negative match + general pattern + general pattern but the buffer only have the short part of it + extra long pattern (vectorized confirm) + match at start + match middle (general) + match index 15 (cross over vector) + match at end + match past end (a few char ok, then end, so missing some chars) + bad caseness + search several times + single char pattern + general match + match at end + no match + buffer containing null char + pattern with null char + general pattern (no null char searched for) + buff size 0 + nullptr pattern + nullptr buffer + nullptr callback +*/ + +TEST(long_literal_search, short_positive) { + COMPILE_LONG_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_general"); + SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, short_negative) { + COMPILE_LONG_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_general"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_LONG_LITERAL(EXPR_NOISE, EXPR_NOISE_LEN, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, short_but_negative_long) { + COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_short_but_negative_long"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_LONG_LITERAL(EXPR_NOISE_SHORT_ONLY_5, EXPR_NOISE_LEN, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, start) { + COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_start"); + SEARCH_LONG_LITERAL(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0)); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, general) { + SETUP_MEM_LEAK_TEST(); + COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_general"); + SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +TEST(long_literal_search, extra_long) { + COMPILE_LONG_LITERAL(PATTERN_25_CHAR, 25); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_extra_long"); + SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, cross_vector) { + COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_cross_vector"); + SEARCH_LONG_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15)); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, end) { + COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_end"); + SEARCH_LONG_LITERAL(EXPR_NOISE_ABCDEOABCD_END_22, EXPR_NOISE_LEN, 1, (22)); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, past_end) { + COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_past_end"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_LONG_LITERAL(EXPR_NOISE_ABCDEOABCD_END_22, EXPR_NOISE_LEN - 3, 0, + ()); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, bad_case) { + COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_bad_case"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_LONG_LITERAL(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, several_search) { + COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_several_search"); + SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + SEARCH_LONG_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15)); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, single_char) { + COMPILE_LONG_LITERAL(PATTERN_1_CHAR, 1); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_single_char"); + SEARCH_LONG_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15)); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, single_char_end) { + COMPILE_LONG_LITERAL(PATTERN_1_CHAR, 1); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_single_char_end"); + SEARCH_LONG_LITERAL(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 1, (30)); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, single_char_no_match) { + COMPILE_LONG_LITERAL(PATTERN_1_CHAR, 1); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_single_char_no_match"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_LONG_LITERAL(EXPR_NOISE, EXPR_NOISE_LEN, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, null_char_buff_and_pattern) { + COMPILE_LONG_LITERAL(PATTERN_5_WITH_NULL, 5); + ASSERT_COMPILE_SUCCESS( + "test_long_literal_search_null_char_buff_and_pattern"); + SEARCH_LONG_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, null_char_buff) { + COMPILE_LONG_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_null_char_buff"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_LONG_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +TEST(long_literal_search, empty_buff) { + COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_empty_buff"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_LONG_LITERAL("", 0, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer); + hs_free_long_literal_pattern(database); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(long_literal_search, nullptr_pattern) { + const hs_long_literal_compiled_pattern_t *database = nullptr; + context_t context; + EXPECT_DEATH( + { + const char *buffer; + hs_error_t ret; + size_t pattern_len = 5; + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, ()); + }, + "called with nullptr database"); +} + +TEST(long_literal_search, nullptr_buffer) { + COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_nullptr_buffer"); + EXPECT_DEATH( + { + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_LONG_LITERAL(nullptr, EXPR_NOISE_LEN, 0, ()); + }, + "called with nullptr buffer"); +} + +TEST(long_literal_search, nullptr_callback) { + COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10); + ASSERT_COMPILE_SUCCESS("test_long_literal_search_nullptr_callback"); + + buffer = EXPR_NOISE_5; + const size_t buffer_len = EXPR_NOISE_LEN; + const size_t expected_match = 1; + size_t expected_start_array[expected_match] = {5}; + size_t expected_end_array[expected_match] = {5}; + for (size_t i = 0; i < expected_match; i++) { + expected_end_array[i] += pattern_len; + } + context.expected_start_array = expected_start_array; + context.expected_end_array = expected_end_array; + context.array_size = expected_match; + context.number_matched = 0; + context.number_wrong = 0; + + EXPECT_DEATH( + { + hs_long_literal_search(database, buffer, buffer_len, nullptr, + &context); + }, + "called with nullptr callback"); +} + +#endif diff --git a/unit/direct_API/main.cpp b/unit/direct_API/main.cpp new file mode 100644 index 00000000..7a504574 --- /dev/null +++ b/unit/direct_API/main.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2024-2025, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "gtest/gtest.h" + +// Driver: run all the tests (defined in other source files in this directory) +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} diff --git a/unit/direct_API/multi_literal.cpp b/unit/direct_API/multi_literal.cpp new file mode 100644 index 00000000..aa0797d2 --- /dev/null +++ b/unit/direct_API/multi_literal.cpp @@ -0,0 +1,515 @@ +/* + * Copyright (c) 2024-2025, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "common.h" + +#include "fdr/fdr_internal.h" + +#define COMPILE_MULTI_LITERAL(in_pattern, in_pattern_count, in_pattern_len) \ + const size_t pattern_count = (in_pattern_count); \ + size_t pattern_len[pattern_count] = BRACED_INIT_LIST in_pattern_len; \ + const char *pattern_storage[] = in_pattern; \ + const char **pattern = pattern_storage; \ + hs_multi_literal_compiled_pattern_t *database = nullptr; \ + hs_error_t compile_ret = hs_compile_multi_literal_search( \ + pattern, pattern_count, pattern_len, &database); \ + hs_error_t ret = 0; \ + (void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \ + const char *buffer = nullptr; \ + (void)buffer; \ + context_t context = {}; \ + (void) context; + +// expected match array here is the index of the start of match, assuming it +// match a pattern with the same length as pattern 0 +#define SEARCH_MULTI_LITERAL(in_buffer, in_buffer_len, in_expected_match, \ + in_expected_start_array, in_expected_id_array) \ + { \ + buffer = (in_buffer); \ + const size_t buffer_len = (in_buffer_len); \ + const size_t expected_match = (in_expected_match); \ + size_t expected_start_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_end_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_id_array[expected_match] = \ + BRACED_INIT_LIST in_expected_id_array; \ + for (size_t i = 0; i < expected_match; i++) { \ + expected_end_array[i] += pattern_len[0]; \ + } \ + context.expected_start_array = expected_start_array; \ + context.expected_end_array = expected_end_array; \ + context.expected_id_array = expected_id_array; \ + context.array_size = expected_match; \ + context.number_matched = 0; \ + context.number_wrong = 0; \ + \ + ret = hs_multi_literal_search(database, buffer, buffer_len, callback, \ + &context); \ + } + +// ------------------------free tests------------------------------------------- + +/* +hs_free_multi_literal_pattern + nullptr + general +*/ + +TEST(multi_literal_free, nullptr) { + hs_multi_literal_compiled_pattern_t *database = nullptr; + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_free, general) { + SETUP_MEM_LEAK_TEST(); + combined_fdr_database *clear_database = + reinterpret_cast( + test_malloc(sizeof(combined_fdr_database))); + + hs_multi_literal_compiled_pattern_t *database = + reinterpret_cast(clear_database); + + hs_free_multi_literal_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +// ------------------------compile tests---------------------------------------- + +/* +hs_compile_multi_literal_search + single expression + single char expression + general (several expressions) + pattern duplicate + valid pattern including null char + overlaping patterns (eg, "abba" and "bb") + + no expressions + empty expression + nullptr expression array + one of the expression is nullptr + nullptr output +*/ + +TEST(multi_literal_compile, single_pattern) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_PAT_5, 1, (5)); + EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_single_pattern"); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_compile, single_pattern_single_char) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_CHAR_PAT_1, 1, (1)); + EXPECT_COMPILE_SUCCESS( + "test_compile_multi_literal_single_pattern_single_char"); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_compile, general) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_general"); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_compile, duplicate) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_DUPLICATE, 2, (5, 5)); + EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_duplicate"); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_compile, with_null_char) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_WITH_NULL_5_5, 2, (5, 5)); + EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_with_null_char"); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_compile, overlapping_patterns) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_OVERLAP_5_8, 2, (5, 8)); + EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_overlapping_patterns"); + hs_free_multi_literal_pattern(database); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(multi_literal_compile, no_expression) { + const size_t pattern_count = 0; + const char *pattern_storage[] = PATTERN_ARRAY_GENERAL_5_5; + const char **pattern = pattern_storage; + hs_multi_literal_compiled_pattern_t *database = nullptr; + EXPECT_DEATH( + { + size_t pattern_len[2]; + pattern_len[0] = 5; + pattern_len[1] = 5; + hs_compile_multi_literal_search(pattern, pattern_count, pattern_len, + &database); + }, + "called with no pattern"); +} + +TEST(multi_literal_compile, empty_expression) { + const size_t pattern_count = 1; + const size_t pattern_len[pattern_count] = {0}; + const char *pattern_storage[] = PATTERN_ARRAY_CONTAIN_EMPTY_0; + const char **pattern = pattern_storage; + hs_multi_literal_compiled_pattern_t *database = nullptr; + EXPECT_DEATH(hs_compile_multi_literal_search(pattern, pattern_count, + pattern_len, &database), + "called with an empty pattern"); +} + +TEST(multi_literal_compile, nullptr_pattern_array) { + const size_t pattern_count = 1; + const size_t pattern_len[pattern_count] = {5}; + const char **pattern = nullptr; + hs_multi_literal_compiled_pattern_t *database = nullptr; + EXPECT_DEATH(hs_compile_multi_literal_search(pattern, pattern_count, + pattern_len, &database), + "called with nullptr"); +} + +TEST(multi_literal_compile, nullptr_pattern_in_array) { + const size_t pattern_count = 2; + const size_t pattern_len[pattern_count] = {5, 5}; + const char *pattern_storage[] = PATTERN_ARRAY_CONTAIN_NULLPTR_5_0; + const char **pattern = pattern_storage; + hs_multi_literal_compiled_pattern_t *database = nullptr; + EXPECT_DEATH(hs_compile_multi_literal_search(pattern, pattern_count, + pattern_len, &database), + "called with an empty pattern"); +} + +TEST(multi_literal_compile, nullptr_database) { + const size_t pattern_count = 2; + const size_t pattern_len[pattern_count] = {5, 5}; + const char *pattern_storage[] = PATTERN_ARRAY_GENERAL_5_5; + const char **pattern = pattern_storage; + EXPECT_DEATH(hs_compile_multi_literal_search(pattern, pattern_count, + pattern_len, nullptr), + "called with nullptr"); +} + +#endif + +// ------------------------search tests----------------------------------------- + +/* +hs_multi_literal_search + general pattern + match at start + match middle (general) + match index 15 (cross over vector) + match at end + match past end (a few char ok, then end, so missing some chars) + match long patterns + long pattern but the buffer only have the short part of it + bad caseness + search several times + match first pattern + match last pattern + match several pattern in the same search + match overlapping patterns + pattern mix (start with pattern A, finish with pattern B. Expect no +match) + match a pattern duplicate + single char pattern + general match + match at end + no match + buffer containing null char + pattern with null char + general pattern (no null char searched for) + buff size 0 + nullptr pattern + nullptr buffer + nullptr callback +*/ + +TEST(multi_literal_search, start) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_start"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0), (0)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, general) { + SETUP_MEM_LEAK_TEST(); + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_general"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +TEST(multi_literal_search, cross_vector) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_cross_vector"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, end) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_end"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_ABCDE_END_27, EXPR_NOISE_LEN, 1, (27), (0)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, past_end) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_past_end"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_MULTI_LITERAL(EXPR_NOISE_ABCDE_END_27, EXPR_NOISE_LEN - 3, 0, (), ()); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, long_pattern) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_LONG_10_10, 2, (10, 10)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_long_pattern"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 2, (5, 22), (0, 1)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, short_but_negative_long) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_LONG_10_10, 2, (10, 10)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_short_but_negative_long"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_SHORT_ONLY_5, EXPR_NOISE_LEN, 1, (22), (1)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[1], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, bad_case) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_bad_case"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_MULTI_LITERAL(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, (), ()); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, several_search) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_several_search"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + SEARCH_MULTI_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, first_pattern) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_first_pattern"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, last_pattern) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_last_pattern"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_PAT2_5, EXPR_NOISE_LEN, 1, (5), (1)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[1], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, multi_pattern) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_multi_pattern"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_DUO_5_15, EXPR_NOISE_LEN, 2, (5, 15), + (0, 1)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, overlap) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_OVERLAP_5_8, 2, (5, 8)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_overlap"); + + buffer = EXPR_NOISE_5; + const size_t buffer_len = EXPR_NOISE_LEN; + const size_t expected_match = 2; + size_t expected_start_array[expected_match] = {5, 7}; + size_t expected_end_array[expected_match] = {5, 7}; + size_t expected_id_array[expected_match] = {0, 1}; + for (size_t i = 0; i < expected_match; i++) { + // we need the length of the second pattern, hence not using the macro + expected_end_array[i] += pattern_len[i]; + } + context.expected_start_array = expected_start_array; + context.expected_end_array = expected_end_array; + context.expected_id_array = expected_id_array; + context.array_size = expected_match; + context.number_matched = 0; + context.number_wrong = 0; + + ret = hs_multi_literal_search(database, buffer, buffer_len, + callback, &context); + + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, pattern_mix) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_pattern_mix"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_MULTI_LITERAL(EXPR_NOISE_MIX, EXPR_NOISE_LEN, 0, (), ()); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, duplicate) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_DUPLICATE, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_duplicate"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, single_char) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_CHAR_PAT_1, 1, (1)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_single_char"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, single_char_end) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_CHAR_PAT_1, 1, (1)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_single_char_end"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 1, (30), + (0)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, single_char_no_match) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_CHAR_PAT_1, 1, (1)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_single_char_no_match"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_MULTI_LITERAL(EXPR_NOISE, EXPR_NOISE_LEN, 0, (), ()); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, null_char_buff_and_pattern) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_WITH_NULL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS( + "test_multi_literal_search_null_char_buff_and_pattern"); + SEARCH_MULTI_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5), (0)); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, null_char_buff) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_null_char_buff"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_MULTI_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, (), ()); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +TEST(multi_literal_search, empty_buff) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_empty_buff"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_MULTI_LITERAL("", 0, 0, (), ()); + EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer); + hs_free_multi_literal_pattern(database); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(multi_literal_search, nullptr_pattern) { + const hs_multi_literal_compiled_pattern_t *database = nullptr; + context_t context; + EXPECT_DEATH( + { + const char *buffer; + hs_error_t ret; + size_t pattern_len[2]; + pattern_len[0] = 5; + pattern_len[1] = 5; + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, (), ()); + }, + "called with nullptr database"); +} + +TEST(multi_literal_search, nullptr_buffer) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_nullptr_buffer"); + EXPECT_DEATH( + { + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_MULTI_LITERAL(nullptr, EXPR_NOISE_LEN, 0, (), ()); + }, + "called with nullptr buffer"); +} + +TEST(multi_literal_search, nullptr_callback) { + COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5)); + ASSERT_COMPILE_SUCCESS("test_multi_literal_search_nullptr_callback"); + + buffer = EXPR_NOISE_5; + const size_t buffer_len = EXPR_NOISE_LEN; + const size_t expected_match = 1; + size_t expected_start_array[expected_match] = {5}; + size_t expected_end_array[expected_match] = {5}; + for (size_t i = 0; i < expected_match; i++) { + expected_end_array[i] += pattern_len[0]; + } + context.expected_start_array = expected_start_array; + context.expected_end_array = expected_end_array; + context.array_size = expected_match; + context.number_matched = 0; + context.number_wrong = 0; + + EXPECT_DEATH( + { + hs_multi_literal_search(database, buffer, buffer_len, nullptr, + &context); + }, + "called with nullptr callback"); +} + +#endif diff --git a/unit/direct_API/short_literal.cpp b/unit/direct_API/short_literal.cpp new file mode 100644 index 00000000..016a3dec --- /dev/null +++ b/unit/direct_API/short_literal.cpp @@ -0,0 +1,377 @@ +/* + * Copyright (c) 2024-2025, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "direct_API/common.h" + +#include "hwlm/noodle_internal.h" + +#define COMPILE_SHORT_LITERAL(in_pattern, in_pattern_len) \ + size_t pattern_len = (in_pattern_len); \ + const char *pattern = (in_pattern); \ + hs_short_literal_compiled_pattern_t *database = nullptr; \ + hs_error_t compile_ret = \ + hs_compile_short_literal_search(pattern, pattern_len, &database); \ + hs_error_t ret = 0; \ + (void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \ + const char *buffer = nullptr; \ + (void)buffer; \ + context_t context = {}; \ + (void) context; + +// expected match array here is the index of the start of match. +#define SEARCH_SHORT_LITERAL(in_buffer, in_buffer_len, in_expected_match, \ + in_expected_start_array) \ + { \ + buffer = (in_buffer); \ + const size_t buffer_len = (in_buffer_len); \ + const size_t expected_match = (in_expected_match); \ + size_t expected_start_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_end_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_id_array[expected_match]; \ + for (size_t i = 0; i < expected_match; i++) { \ + expected_end_array[i] += pattern_len; \ + expected_id_array[i] = 0; \ + } \ + context.expected_start_array = expected_start_array; \ + context.expected_end_array = expected_end_array; \ + context.expected_id_array = expected_id_array; \ + context.array_size = expected_match; \ + context.number_matched = 0; \ + context.number_wrong = 0; \ + \ + ret = hs_short_literal_search(database, buffer, buffer_len, callback, \ + &context); \ + } + +static_assert(HS_SHORT_PATTERN_THRESHOLD == 8, + "changing the threshold for short/long literal require changing " + "the tests to still test the threshold behavior"); + +// ------------------------free tests------------------------------------------- + +/* +hs_free_short_literal_pattern + nullptr + general +*/ + +TEST(short_literal_free, nullptr) { + hs_short_literal_compiled_pattern_t *database = nullptr; + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_free, general) { + SETUP_MEM_LEAK_TEST(); + noodTable *clear_database = + reinterpret_cast(test_malloc(sizeof(noodTable))); + hs_short_literal_compiled_pattern_t *database = + reinterpret_cast( + clear_database); + + hs_free_short_literal_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +// ------------------------compile tests---------------------------------------- + +/* +hs_compile_short_literal_search + single char + general + 8 char + >8 char + valid pattern including null char + empty expression + nullptr expression + nullptr output +*/ + +TEST(short_literal_compile, single_char) { + COMPILE_SHORT_LITERAL(PATTERN_1_CHAR, 1); + EXPECT_COMPILE_SUCCESS("test_compile_short_literal_single_char"); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_compile, general) { + COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5); + EXPECT_COMPILE_SUCCESS("test_compile_short_literal_general"); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_compile, max_length) { + COMPILE_SHORT_LITERAL(PATTERN_8_CHAR, 8); + EXPECT_COMPILE_SUCCESS("test_compile_short_literal_max_len"); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_compile, too_long) { + COMPILE_SHORT_LITERAL(PATTERN_10_CHAR, 10); + EXPECT_COMPILE_FAILURE("test_compile_short_literal_too_long"); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_compile, null_char) { + COMPILE_SHORT_LITERAL(PATTERN_5_WITH_NULL, 5); + EXPECT_COMPILE_SUCCESS("test_compile_short_literal_null_char"); + hs_free_short_literal_pattern(database); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(short_literal_compile, empty_pattern) { + hs_short_literal_compiled_pattern_t *database = nullptr; + EXPECT_DEATH( + hs_compile_short_literal_search(PATTERN_0_CHAR, 0, &database), + "called with an empty pattern"); +} + +TEST(short_literal_compile, nullptr_pattern) { + hs_short_literal_compiled_pattern_t *database = nullptr; + EXPECT_DEATH(hs_compile_short_literal_search(nullptr, 5, &database), + "called with nullptr"); +} + +TEST(short_literal_compile, nullptr_database) { + EXPECT_DEATH(hs_compile_short_literal_search(PATTERN_5_CHAR, 5, nullptr), + "called with nullptr"); +} + +#endif + +// ------------------------search tests----------------------------------------- + +/* +hs_short_literal_search + general pattern + match at start + match middle (general) + match index 15 (noodle cross over vector) + match at end + match the full pattern, not just the first pair + match past end (2 char ok, then end, so missing some chars) + bad caseness + search several times + single char pattern + general match + match at end + no match + buffer containing null char + pattern with null char + general pattern + buff size 0 + nullptr pattern + nullptr buffer + nullptr callback +*/ + +TEST(short_literal_search, start) { + COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_start"); + SEARCH_SHORT_LITERAL(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0)); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_search, general) { + SETUP_MEM_LEAK_TEST(); + COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_general"); + SEARCH_SHORT_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +TEST(short_literal_search, cross_vector) { + COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_cross_vector"); + SEARCH_SHORT_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15)); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_search, end) { + COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_end"); + SEARCH_SHORT_LITERAL(EXPR_NOISE_ABCDE_END_27, EXPR_NOISE_LEN, 1, (27)); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_search, past_end) { + COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_past_end"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_SHORT_LITERAL(EXPR_NOISE_ABCDE_END_27, EXPR_NOISE_LEN - 3, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_search, short_no_match) { + COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_short_no_match"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_SHORT_LITERAL(EXPR_NOISE_5_AB, EXPR_NOISE_LEN, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_search, bad_case) { + COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_bad_case"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_SHORT_LITERAL(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_search, several_search) { + COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_several_search"); + SEARCH_SHORT_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + // cppcheck-suppress redundantAssignment + SEARCH_SHORT_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15)); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_search, single_char) { + COMPILE_SHORT_LITERAL(PATTERN_1_CHAR, 1); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_single_char"); + SEARCH_SHORT_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15)); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_search, single_char_end) { + COMPILE_SHORT_LITERAL(PATTERN_1_CHAR, 1); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_single_char_end"); + SEARCH_SHORT_LITERAL(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 1, (30)); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_search, single_char_no_match) { + COMPILE_SHORT_LITERAL(PATTERN_1_CHAR, 1); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_single_char_no_match"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_SHORT_LITERAL(EXPR_NOISE, EXPR_NOISE_LEN, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_search, null_char_buff_and_pattern) { + COMPILE_SHORT_LITERAL(PATTERN_5_WITH_NULL, 5); + ASSERT_COMPILE_SUCCESS( + "test_short_literal_search_null_char_buff_and_pattern"); + SEARCH_SHORT_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_search, null_char_buff) { + COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_null_char_buff"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_SHORT_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); +} + +TEST(short_literal_search, empty_buff) { + COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_empty_buff"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_SHORT_LITERAL("", 0, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer); + hs_free_short_literal_pattern(database); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(short_literal_search, nullptr_pattern) { + const hs_short_literal_compiled_pattern_t *database = nullptr; + context_t context; + EXPECT_DEATH( + { + const char *buffer; + hs_error_t ret; + size_t pattern_len = 5; + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_SHORT_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, ()); + }, + "called with nullptr database"); +} + +TEST(short_literal_search, nullptr_buffer) { + COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_nullptr_buffer"); + EXPECT_DEATH( + { + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_SHORT_LITERAL(nullptr, EXPR_NOISE_LEN, 0, ()); + }, + "called with nullptr buffer"); +} + +TEST(short_literal_search, nullptr_callback) { + COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5); + ASSERT_COMPILE_SUCCESS("test_short_literal_search_nullptr_callback"); + + buffer = EXPR_NOISE_5; + const size_t buffer_len = EXPR_NOISE_LEN; + const size_t expected_match = 1; + size_t expected_start_array[expected_match] = {5}; + size_t expected_end_array[expected_match] = {5}; + for (size_t i = 0; i < expected_match; i++) { + expected_end_array[i] += pattern_len; + } + context.expected_start_array = expected_start_array; + context.expected_end_array = expected_end_array; + context.array_size = expected_match; + context.number_matched = 0; + context.number_wrong = 0; + + EXPECT_DEATH( + { + hs_short_literal_search(database, buffer, buffer_len, nullptr, + &context); + }, + "called with nullptr callback"); +} + +#endif diff --git a/unit/direct_API/single_char.cpp b/unit/direct_API/single_char.cpp new file mode 100644 index 00000000..a9ec0d68 --- /dev/null +++ b/unit/direct_API/single_char.cpp @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2024-2025, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "common.h" + +#include "hwlm/noodle_internal.h" + +#define COMPILE_SINGLE_CHAR(in_pattern) \ + const char pattern = *(in_pattern); \ + hs_single_char_compiled_pattern_t *database = nullptr; \ + hs_error_t compile_ret = hs_compile_single_char_search(pattern, &database);\ + hs_error_t ret = 0; \ + (void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \ + const char *buffer = nullptr; \ + (void)buffer; \ + context_t context = {}; \ + (void) context; + +// expected match array here is the index of the start of match. +#define SEARCH_SINGLE_CHAR(in_buffer, in_buffer_len, in_expected_match, \ + in_expected_start_array) \ + { \ + buffer = (in_buffer); \ + const size_t buffer_len = (in_buffer_len); \ + const size_t expected_match = (in_expected_match); \ + size_t expected_start_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_end_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_id_array[expected_match]; \ + for (size_t i = 0; i < expected_match; i++) { \ + expected_end_array[i] += 1; \ + expected_id_array[i] = 0; \ + } \ + context.expected_start_array = expected_start_array; \ + context.expected_end_array = expected_end_array; \ + context.expected_id_array = expected_id_array; \ + context.array_size = expected_match; \ + context.number_matched = 0; \ + context.number_wrong = 0; \ + \ + ret = hs_single_char_search(database, buffer, buffer_len, callback, \ + &context); \ + } + +// ------------------------free tests------------------------------------------- + +/* +hs_free_single_char_pattern + nullptr + general +*/ + +TEST(single_char_free, nullptr) { + hs_single_char_compiled_pattern_t *database = nullptr; + hs_free_single_char_pattern(database); +} + +TEST(single_char_free, general) { + SETUP_MEM_LEAK_TEST(); + truffle_storage *clear_database = reinterpret_cast( + test_malloc(sizeof(truffle_storage))); + hs_single_char_compiled_pattern_t *database = + reinterpret_cast(clear_database); + + hs_free_single_char_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +// ------------------------compile tests---------------------------------------- + +/* +hs_compile_single_char_search + general (1 char) + null char pattern + + nullptr output +*/ + +TEST(single_char_compile, general) { + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR) + EXPECT_COMPILE_SUCCESS("test_compile_single_char_general") + hs_free_single_char_pattern(database); +} + +TEST(single_char_compile, null_char) { + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR) + EXPECT_COMPILE_SUCCESS("test_compile_single_char_null_char") + hs_free_single_char_pattern(database); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(single_char_compile, nullptr_database) { + EXPECT_DEATH(hs_compile_single_char_search(*PATTERN_1_CHAR, nullptr), + "called with nullptr"); +} + +#endif + +// ------------------------search tests----------------------------------------- + +/* +hs_single_char_search + general pattern + match at start + match middle (general) + match vector end + match at buffer end + match past end + bad caseness + search several times + buffer containing null char + null char pattern + general pattern + buff size 0 + nullptr pattern + nullptr buffer + nullptr callback +*/ + +TEST(single_char_search, start) { + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_search_start"); + SEARCH_SINGLE_CHAR(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0)); + EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer); + hs_free_single_char_pattern(database); +} + +TEST(single_char_search, general) { + SETUP_MEM_LEAK_TEST(); + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_search_general"); + SEARCH_SINGLE_CHAR(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer); + hs_free_single_char_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +TEST(single_char_search, end_vector) { + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_search_end_vector"); + SEARCH_SINGLE_CHAR(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15)); + EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer); + hs_free_single_char_pattern(database); +} + +TEST(single_char_search, end) { + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_search_end"); + SEARCH_SINGLE_CHAR(EXPR_NOISE_A_END_31, EXPR_NOISE_LEN, 1, (31)); + EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer); + hs_free_single_char_pattern(database); +} + +TEST(single_char_search, past_end) { + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_search_past_end"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_SINGLE_CHAR(EXPR_NOISE_A_END_31, EXPR_NOISE_LEN - 1, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer); + hs_free_single_char_pattern(database); +} + +TEST(single_char_search, bad_case) { + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_search_bad_case"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_SINGLE_CHAR(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer); + hs_free_single_char_pattern(database); +} + +TEST(single_char_search, several_search) { + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_search_several_search"); + SEARCH_SINGLE_CHAR(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer); + SEARCH_SINGLE_CHAR(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15)); + EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer); + hs_free_single_char_pattern(database); +} + +TEST(single_char_search, null_char_buff_and_pattern) { + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR_NULL); + ASSERT_COMPILE_SUCCESS( + "test_single_char_search_null_char_buff_and_pattern"); + SEARCH_SINGLE_CHAR(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (6)); + EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer); + hs_free_single_char_pattern(database); +} + +TEST(single_char_search, null_char_buff) { + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_search_null_char_buff"); + SEARCH_SINGLE_CHAR(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer); + hs_free_single_char_pattern(database); +} + +TEST(single_char_search, empty_buff) { + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_search_empty_buff"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_SINGLE_CHAR("", 0, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer); + hs_free_single_char_pattern(database); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(single_char_search, nullptr_pattern) { + const hs_single_char_compiled_pattern_t *database = nullptr; + context_t context; + EXPECT_DEATH( + { + const char *buffer; + hs_error_t ret; + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_SINGLE_CHAR(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, ()); + }, + "called with nullptr database"); +} + +TEST(single_char_search, nullptr_buffer) { + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_search_nullptr_buffer"); + EXPECT_DEATH( + { + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_SINGLE_CHAR(nullptr, EXPR_NOISE_LEN, 0, ()); + }, + "called with nullptr buffer"); +} + +TEST(single_char_search, nullptr_callback) { + COMPILE_SINGLE_CHAR(PATTERN_1_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_search_nullptr_callback"); + + buffer = EXPR_NOISE_5; + const size_t buffer_len = EXPR_NOISE_LEN; + const size_t expected_match = 1; + size_t expected_start_array[expected_match] = {5}; + size_t expected_end_array[expected_match] = {5}; + for (size_t i = 0; i < expected_match; i++) { + expected_end_array[i] += 1; + } + context.expected_start_array = expected_start_array; + context.expected_end_array = expected_end_array; + context.array_size = expected_match; + context.number_matched = 0; + context.number_wrong = 0; + + EXPECT_DEATH( + { + hs_single_char_search(database, buffer, buffer_len, nullptr, + &context); + }, + "called with nullptr callback"); +} + +#endif diff --git a/unit/direct_API/single_char_pair.cpp b/unit/direct_API/single_char_pair.cpp new file mode 100644 index 00000000..10307743 --- /dev/null +++ b/unit/direct_API/single_char_pair.cpp @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2024-2025, Arm ltd + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "common.h" + +#include "hwlm/noodle_internal.h" + +#define COMPILE_SINGLE_CHAR_PAIR(in_pattern) \ + const char *pattern = (in_pattern); \ + hs_single_char_pair_compiled_pattern_t *database = nullptr; \ + hs_error_t compile_ret = \ + hs_compile_single_char_pair_search(pattern, &database); \ + hs_error_t ret = 0; \ + (void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \ + const char *buffer = nullptr; \ + (void)buffer; \ + context_t context = {}; \ + (void) context; + +// expected match array here is the index of the start of match. +#define SEARCH_SINGLE_CHAR_PAIR(in_buffer, in_buffer_len, in_expected_match, \ + in_expected_start_array) \ + { \ + buffer = (in_buffer); \ + const size_t buffer_len = (in_buffer_len); \ + const size_t expected_match = (in_expected_match); \ + size_t expected_start_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_end_array[expected_match] = \ + BRACED_INIT_LIST in_expected_start_array; \ + size_t expected_id_array[expected_match]; \ + for (size_t i = 0; i < expected_match; i++) { \ + expected_end_array[i] += 2; \ + expected_id_array[i] = 0; \ + } \ + context.expected_start_array = expected_start_array; \ + context.expected_end_array = expected_end_array; \ + context.expected_id_array = expected_id_array; \ + context.array_size = expected_match; \ + context.number_matched = 0; \ + context.number_wrong = 0; \ + \ + ret = hs_single_char_pair_search(database, buffer, buffer_len, \ + callback, &context); \ + } + +// ------------------------free tests------------------------------------------- + +/* +hs_free_single_char_pair_pattern + nullptr + general +*/ + +TEST(single_char_pair_free, nullptr) { + hs_single_char_pair_compiled_pattern_t *database = nullptr; + hs_free_single_char_pair_pattern(database); +} + +TEST(single_char_pair_free, general) { + SETUP_MEM_LEAK_TEST(); + noodTable *clear_database = + reinterpret_cast(test_malloc(sizeof(noodTable))); + hs_single_char_pair_compiled_pattern_t *database = + reinterpret_cast( + clear_database); + + hs_free_single_char_pair_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +// ------------------------compile tests---------------------------------------- + +/* +hs_compile_single_char_pair_search + general (2 char) + valid pattern including null char + + nullptr expression + nullptr output +*/ + +TEST(single_char_pair_compile, general) { + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR) + EXPECT_COMPILE_SUCCESS("test_compile_single_char_pair_general") + hs_free_single_char_pair_pattern(database); +} + +TEST(single_char_pair_compile, with_null_char) { + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_WITH_NULL) + EXPECT_COMPILE_SUCCESS("test_compile_single_char_pair_with_null_char") + hs_free_single_char_pair_pattern(database); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(single_char_pair_compile, nullptr_pattern) { + hs_single_char_pair_compiled_pattern_t *database = nullptr; + EXPECT_DEATH(hs_compile_single_char_pair_search(nullptr, &database), + "called with nullptr"); +} + +TEST(single_char_pair_compile, nullptr_database) { + EXPECT_DEATH(hs_compile_single_char_pair_search(PATTERN_5_CHAR, nullptr), + "called with nullptr"); +} + +#endif + +// ------------------------search tests----------------------------------------- + +/* +hs_single_char_pair_search + general pattern + match at start + match middle (general) + match index 15 (cross over vector) + match at end + match past end (1 char ok, then end, so missing one chars) + bad caseness + search several times + buffer containing null char + pattern with null char + general pattern + buff size 0 + nullptr pattern + nullptr buffer + nullptr callback +*/ + +TEST(single_char_pair_search, start) { + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_start"); + SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0)); + EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer); + hs_free_single_char_pair_pattern(database); +} + +TEST(single_char_pair_search, general) { + SETUP_MEM_LEAK_TEST(); + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_general"); + SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer); + hs_free_single_char_pair_pattern(database); + EXPECT_MEMORY_CLEAN(); + UNSET_MEM_LEAK_TEST(); +} + +TEST(single_char_pair_search, cross_vector) { + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_cross_vector"); + SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15)); + EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer); + hs_free_single_char_pair_pattern(database); +} + +TEST(single_char_pair_search, end) { + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_end"); + SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN, 1, (30)); + EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer); + hs_free_single_char_pair_pattern(database); +} + +TEST(single_char_pair_search, past_end) { + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_past_end"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer); + hs_free_single_char_pair_pattern(database); +} + +TEST(single_char_pair_search, bad_case) { + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_bad_case"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer); + hs_free_single_char_pair_pattern(database); +} + +TEST(single_char_pair_search, several_search) { + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_several_search"); + SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer); + SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15)); + EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer); + hs_free_single_char_pair_pattern(database); +} + +TEST(single_char_pair_search, null_char_buff_and_pattern) { + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_WITH_NULL); + ASSERT_COMPILE_SUCCESS( + "test_single_char_pair_search_null_char_buff_and_pattern"); + SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5)); + EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer); + hs_free_single_char_pair_pattern(database); +} + +TEST(single_char_pair_search, null_char_buff) { + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_null_char_buff"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer); + hs_free_single_char_pair_pattern(database); +} + +TEST(single_char_pair_search, empty_buff) { + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_empty_buff"); + // cppcheck-suppress unsignedLessThanZero + SEARCH_SINGLE_CHAR_PAIR("", 0, 0, ()); + EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer); + hs_free_single_char_pair_pattern(database); +} + +#if !defined(RELEASE_BUILD) +// test asserts + +TEST(single_char_pair_search, nullptr_pattern) { + const hs_single_char_pair_compiled_pattern_t *database = nullptr; + context_t context; + EXPECT_DEATH( + { + const char *buffer; + hs_error_t ret; + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, ()); + }, + "called with nullptr database"); +} + +TEST(single_char_pair_search, nullptr_buffer) { + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_nullptr_buffer"); + EXPECT_DEATH( + { + // cppcheck-suppress unsignedLessThanZero + // cppcheck-suppress unreadVariable + SEARCH_SINGLE_CHAR_PAIR(nullptr, EXPR_NOISE_LEN, 0, ()); + }, + "called with nullptr buffer"); +} + +TEST(single_char_pair_search, nullptr_callback) { + COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR); + ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_nullptr_callback"); + + buffer = EXPR_NOISE_5; + const size_t buffer_len = EXPR_NOISE_LEN; + const size_t expected_match = 1; + size_t expected_start_array[expected_match] = {5}; + size_t expected_end_array[expected_match] = {5}; + for (size_t i = 0; i < expected_match; i++) { + expected_end_array[i] += 2; + } + context.expected_start_array = expected_start_array; + context.expected_end_array = expected_end_array; + context.array_size = expected_match; + context.number_matched = 0; + context.number_wrong = 0; + + EXPECT_DEATH( + { + hs_single_char_pair_search(database, buffer, buffer_len, nullptr, + &context); + }, + "called with nullptr callback"); +} + +#endif From 28986c364a514097fbb159f232cf1153b1c8a767 Mon Sep 17 00:00:00 2001 From: Yoan Picchi Date: Thu, 19 Jun 2025 13:55:23 +0000 Subject: [PATCH 3/4] Add Direct API documentation Signed-off-by: Yoan Picchi --- doc/dev-reference/api_constants.rst | 7 +++++++ doc/dev-reference/compilation.rst | 3 +++ doc/dev-reference/direct_api.rst | 28 ++++++++++++++++++++++++++++ doc/dev-reference/index.rst | 1 + 4 files changed, 39 insertions(+) create mode 100644 doc/dev-reference/direct_api.rst diff --git a/doc/dev-reference/api_constants.rst b/doc/dev-reference/api_constants.rst index bbe229de..912c1917 100644 --- a/doc/dev-reference/api_constants.rst +++ b/doc/dev-reference/api_constants.rst @@ -51,3 +51,10 @@ Compile mode flags .. doxygengroup:: HS_MODE_FLAG :content-only: :no-link: + +****************************** +Other Constants +****************************** + +.. doxygendefine:: HS_SHORT_PATTERN_THRESHOLD + :no-link: diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index a0ae8c8b..21066c80 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -123,6 +123,9 @@ Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_SINGLEMATCH`, The new literal APIs introduced here are designed for rule sets containing only pure literal expressions. +In tight loops where performance is critical, some further specialization of the +literal search exists in the form of :ref:`direct_api` + *************** Pattern Support *************** diff --git a/doc/dev-reference/direct_api.rst b/doc/dev-reference/direct_api.rst new file mode 100644 index 00000000..8942633a --- /dev/null +++ b/doc/dev-reference/direct_api.rst @@ -0,0 +1,28 @@ +.. _direct_api: + +#################### +Direct API extension +#################### + +Even though pure literal searches are fast, there is still some overhead. +In tight loops where both the pattern and the data are small (say, a +4-character pattern with a 32-character data buffer), this overhead can +become noticeable. In such cases, the functions provided by the Direct API +offer a minimal-overhead alternative, at the cost of a reduced set of +functionality. + +Each type of call is designed for a specific pattern type: + - Strings + - Pairs of two characters + - Single characters +Each type comes in a ``single`` search and ``set`` search variant, depending +on whether you need to search for one or multiple patterns. + +For each case, compile, search, and free functions are provided. + +All search functions are case-sensitive. + +The single string search has an additional specialization based on the length +of the pattern. If the pattern is "short", ie shorter than or equal to +:c:member:`HS_SHORT_PATTERN_THRESHOLD` characters—then +:c:func:`hs_compile_short_literal_search` may be used instead. diff --git a/doc/dev-reference/index.rst b/doc/dev-reference/index.rst index 4046a298..5845f9e1 100644 --- a/doc/dev-reference/index.rst +++ b/doc/dev-reference/index.rst @@ -21,3 +21,4 @@ Vectorscan |version| Developer's Reference Guide api_constants api_files chimera + direct_api From 9f3867c5d20fbc8c7cdeeac62374dc4cdab65948 Mon Sep 17 00:00:00 2001 From: Yoan Picchi Date: Thu, 19 Jun 2025 14:00:36 +0000 Subject: [PATCH 4/4] Update to modern shpinx The doc generations used a function removed since shpinx 4.0. This replace it with the modern equivalent. Signed-off-by: Yoan Picchi --- doc/dev-reference/conf.py.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/dev-reference/conf.py.in b/doc/dev-reference/conf.py.in index 298a54b1..8adff2cb 100644 --- a/doc/dev-reference/conf.py.in +++ b/doc/dev-reference/conf.py.in @@ -23,7 +23,7 @@ import os # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +needs_sphinx = '4.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -272,4 +272,4 @@ breathe_domain_by_extension = {"h" : "c"} # -- Add some customisation ----------------------------------------------- def setup(app): - app.add_stylesheet("hyperscan.css") # Custom stylesheet for e.g. :regex: + app.add_css_file("hyperscan.css") # Custom stylesheet for e.g. :regex: