Merge 9f3867c5d20fbc8c7cdeeac62374dc4cdab65948 into 9e9a10ad01fceb2032ae6e36cb0262c4dbba90c7

This commit is contained in:
ypicchi-arm 2025-06-27 16:29:24 +00:00 committed by GitHub
commit 272888499d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 4821 additions and 3 deletions

View File

@ -304,6 +304,7 @@ set (hs_exec_SRCS
src/crc32.h
src/report.h
src/runtime.c
src/hs_direct_search.cpp
src/stream_compress.c
src/stream_compress.h
src/stream_compress_impl.h
@ -484,6 +485,7 @@ SET (hs_compile_SRCS
src/hs.cpp
src/hs_internal.h
src/hs_version.h.in
src/hs_direct_search_compile.cpp
src/scratch.h
src/state.h
src/ue2common.h

View File

@ -51,3 +51,10 @@ Compile mode flags
.. doxygengroup:: HS_MODE_FLAG
:content-only:
:no-link:
******************************
Other Constants
******************************
.. doxygendefine:: HS_SHORT_PATTERN_THRESHOLD
:no-link:

View File

@ -123,6 +123,9 @@ Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_SINGLEMATCH`,
The new literal APIs introduced here are designed for rule sets
containing only pure literal expressions.
In tight loops where performance is critical, some further specialization of the
literal search exists in the form of :ref:`direct_api`
***************
Pattern Support
***************

View File

@ -23,7 +23,7 @@ import os
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
needs_sphinx = '4.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
@ -272,4 +272,4 @@ breathe_domain_by_extension = {"h" : "c"}
# -- Add some customisation -----------------------------------------------
def setup(app):
app.add_stylesheet("hyperscan.css") # Custom stylesheet for e.g. :regex:
app.add_css_file("hyperscan.css") # Custom stylesheet for e.g. :regex:

View File

@ -0,0 +1,28 @@
.. _direct_api:
####################
Direct API extension
####################
Even though pure literal searches are fast, there is still some overhead.
In tight loops where both the pattern and the data are small (say, a
4-character pattern with a 32-character data buffer), this overhead can
become noticeable. In such cases, the functions provided by the Direct API
offer a minimal-overhead alternative, at the cost of a reduced set of
functionality.
Each type of call is designed for a specific pattern type:
- Strings
- Pairs of two characters
- Single characters
Each type comes in a ``single`` search and ``set`` search variant, depending
on whether you need to search for one or multiple patterns.
For each case, compile, search, and free functions are provided.
All search functions are case-sensitive.
The single string search has an additional specialization based on the length
of the pattern. If the pattern is "short", ie shorter than or equal to
:c:member:`HS_SHORT_PATTERN_THRESHOLD` characters—then
:c:func:`hs_compile_short_literal_search` may be used instead.

View File

@ -21,3 +21,4 @@ Vectorscan |version| Developer's Reference Guide
api_constants
api_files
chimera
direct_api

21
hs.def
View File

@ -41,3 +41,24 @@ EXPORTS
hs_stream_size
hs_valid_platform
hs_version
hs_short_literal_search
hs_long_literal_search
hs_multi_literal_search
hs_single_char_search
hs_char_set_search
hs_single_char_pair_search
hs_char_pair_set_search
hs_compile_short_literal_search
hs_compile_long_literal_search
hs_compile_multi_literal_search
hs_compile_single_char_search
hs_compile_char_set_search
hs_compile_single_char_pair_search
hs_compile_char_pair_set_search
hs_free_short_literal_pattern
hs_free_long_literal_pattern
hs_free_multi_literal_pattern
hs_free_single_char_pattern
hs_free_char_set_pattern
hs_free_single_char_pair_pattern
hs_free_char_pair_set_pattern

View File

@ -33,4 +33,11 @@ EXPORTS
hs_set_stream_allocator
hs_stream_size
hs_valid_platform
hs_version
hs_version
hs_short_literal_search
hs_long_literal_search
hs_multi_literal_search
hs_single_char_search
hs_char_set_search
hs_single_char_pair_search
hs_char_pair_set_search

View File

@ -1,6 +1,7 @@
/*
* Copyright (c) 2016-2020, Intel Corporation
* Copyright (c) 2024, VectorCamp PC
* Copyright (c) 2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -352,6 +353,99 @@ CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_strea
CONNECT_ARGS_3(hs_error_t, hs_reset_and_expand_stream, to_stream,
buf, buf_size, scratch, onEvent, context);
/** DIRECT API **/
CREATE_DISPATCH(hs_error_t, hs_short_literal_search,
const hs_short_literal_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_1(hs_error_t, hs_short_literal_search, database, data, length,
onEvent, context);
CONNECT_DISPATCH_2(hs_error_t, hs_short_literal_search,
const hs_short_literal_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_3(hs_error_t, hs_short_literal_search, database, data, length,
onEvent, context);
CREATE_DISPATCH(hs_error_t, hs_long_literal_search,
const hs_long_literal_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_1(hs_error_t, hs_long_literal_search, database, data, length,
onEvent, context);
CONNECT_DISPATCH_2(hs_error_t, hs_long_literal_search,
const hs_long_literal_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_3(hs_error_t, hs_long_literal_search, database, data, length,
onEvent, context);
CREATE_DISPATCH(hs_error_t, hs_multi_literal_search,
const hs_multi_literal_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_1(hs_error_t, hs_multi_literal_search, database, data, length,
onEvent, context);
CONNECT_DISPATCH_2(hs_error_t, hs_multi_literal_search,
const hs_multi_literal_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_3(hs_error_t, hs_multi_literal_search, database, data, length,
onEvent, context);
CREATE_DISPATCH(hs_error_t, hs_single_char_search,
const hs_single_char_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_1(hs_error_t, hs_single_char_search, database, data, length,
onEvent, context);
CONNECT_DISPATCH_2(hs_error_t, hs_single_char_search,
const hs_single_char_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_3(hs_error_t, hs_single_char_search, database, data, length,
onEvent, context);
CREATE_DISPATCH(hs_error_t, hs_char_set_search,
const hs_char_set_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_1(hs_error_t, hs_char_set_search, database, data, length, onEvent,
context);
CONNECT_DISPATCH_2(hs_error_t, hs_char_set_search,
const hs_char_set_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_3(hs_error_t, hs_char_set_search, database, data, length, onEvent,
context);
CREATE_DISPATCH(hs_error_t, hs_single_char_pair_search,
const hs_single_char_pair_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_1(hs_error_t, hs_single_char_pair_search, database, data, length,
onEvent, context);
CONNECT_DISPATCH_2(hs_error_t, hs_single_char_pair_search,
const hs_single_char_pair_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_3(hs_error_t, hs_single_char_pair_search, database, data, length,
onEvent, context);
CREATE_DISPATCH(hs_error_t, hs_char_pair_set_search,
const hs_char_pair_set_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_1(hs_error_t, hs_char_pair_set_search, database, data, length,
onEvent, context);
CONNECT_DISPATCH_2(hs_error_t, hs_char_pair_set_search,
const hs_char_pair_set_compiled_pattern_t *database,
const char *data, size_t length, match_event_handler onEvent,
void *context);
CONNECT_ARGS_3(hs_error_t, hs_char_pair_set_search, database, data, length,
onEvent, context);
/** INTERNALS **/
CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2015-2019, Intel Corporation
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -585,6 +586,90 @@ hs_error_t HS_CDECL hs_valid_platform(void);
/** @} */
/**
* The following functions are part of the extended API.
* This extension offers direct access to search algorithms
* allowing the user to minimise calling overhead for simple
* search use cases where type of the search is known.
*/
/**
* @defgroup DIRECT_API_COMMON
*
* @{
*/
/**
* The size threshold after which a pattern is considered long and must be fed
* to @ref hs_compile_long_literal_search(). Patterns up to this length may be
* fed to hs_compile_short_literal_search() instead.
*/
#define HS_SHORT_PATTERN_THRESHOLD 8
/**
* The compiled pattern type for searching for short literals
*
* Generated by @ref hs_compile_short_literal_search() and to be freed with @ref
* hs_free_short_literal_pattern
*/
typedef struct hs_short_literal_compiled_pattern
hs_short_literal_compiled_pattern_t;
/**
* The compiled pattern type for searching for long literals
*
* Generated by @ref hs_compile_long_literal_search() and to be freed with @ref
* hs_free_long_literal_pattern
*/
typedef struct hs_long_literal_compiled_pattern
hs_long_literal_compiled_pattern_t;
/**
* The compiled pattern type for searching for several long literal
*
* Generated by @ref hs_compile_multi_literal_search() and to be freed with @ref
* hs_free_multi_literal_pattern
*/
typedef struct hs_multi_literal_compiled_pattern
hs_multi_literal_compiled_pattern_t;
/**
* The compiled pattern type for searching for a single character
*
* Generated by @ref hs_compile_single_char_search() and to be freed with @ref
* hs_free_single_char_pattern
*/
typedef struct hs_single_char_compiled_pattern
hs_single_char_compiled_pattern_t;
/**
* The compiled pattern type for searching for a character set
*
* Generated by @ref hs_compile_char_set_search() and to be freed with @ref
* hs_free_char_set_pattern
*/
typedef struct hs_char_set_compiled_pattern hs_char_set_compiled_pattern_t;
/**
* The compiled pattern type for searching for a character pair
*
* Generated by @ref hs_compile_char_pair_search() and to be freed with @ref
* hs_free_char_pair_pattern
*/
typedef struct hs_single_char_pair_compiled_pattern
hs_single_char_pair_compiled_pattern_t;
/**
* The compiled pattern type for searching for a set of character pairs
*
* Generated by @ref hs_compile_char_pair_set_search() and to be freed with
* @ref hs_free_char_pair_set_pattern
*/
typedef struct hs_char_pair_set_compiled_pattern
hs_char_pair_set_compiled_pattern_t;
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2015-2021, Intel Corporation
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -1211,6 +1212,276 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
/** @} */
/**
* The following functions are part of the extended API.
* This extension offers direct access to search algorithms
* allowing the user to minimise calling overhead for simple
* search use cases where type of the search is known.
*
* All search functions handle a limited type of pattern.
* For more generic patterns, use @ref hs_compile().
*
* NOTE: All search functions are considered case-sensitive.
*/
/**
* @defgroup DIRECT_API_COMPILE
*
* @{
*/
/**
* Compiles a short literal expression used in @ref hs_short_literal_search().
*
* The expression must be at most @ref HS_SHORT_PATTERN_THRESHOLD characters
* long. For longer expressions, use @ref hs_compile_long_literal_search() and
* @ref hs_long_literal_search() instead.
*
* @param expression
* The expression to parse. Note that this string must represent ONLY the
* pattern to be matched, with no delimiters. Null characters are accepted
* as part of the expression.
*
* @param expression_length
* The length of the expression in bytes. Up to @ref
* HS_SHORT_PATTERN_THRESHOLD characters long.
*
* @param output_database
* Returns pointer to buffer containing @ref
* hs_short_literal_compiled_pattern_t. The buffer must be freed with
* @ref hs_free_short_literal_pattern.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR otherwise.
*/
hs_error_t HS_CDECL hs_compile_short_literal_search(
const char *expression, size_t expression_length,
hs_short_literal_compiled_pattern_t **output_database);
/**
* Free a short literal pattern.
*
* @param database
* The @ref hs_short_literal_compiled_pattern_t pointer to be freed.
*/
void HS_CDECL
hs_free_short_literal_pattern(hs_short_literal_compiled_pattern_t *database);
/**
* Compiles a literal expression used in @ref hs_long_literal_search().
*
* There is no size limit. For expressions up to @ref
* HS_SHORT_PATTERN_THRESHOLD character long, @ref
* hs_compile_short_literal_search() and @ref hs_short_literal_search() might be
* faster
*
* @param expression
* The expression to parse. Note that this string must represent ONLY the
* pattern to be matched, with no delimiters. Null characters are accepted
* as part of the expression.
*
* @param expression_length
* The length of the expression in bytes.
*
* @param output_database
* Returns pointer to buffer containing @ref
* hs_long_literal_compiled_pattern_t. The buffer must be freed with
* @ref hs_free_long_literal_pattern.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR otherwise.
*/
hs_error_t HS_CDECL hs_compile_long_literal_search(
const char *expression, size_t expression_length,
hs_long_literal_compiled_pattern_t **output_database);
/**
* Free a long literal pattern.
*
* @param database
* The @ref hs_long_literal_compiled_pattern_t pointer to be freed.
*/
void HS_CDECL
hs_free_long_literal_pattern(hs_long_literal_compiled_pattern_t *database);
/**
* Compiles several literal expressions used in @ref hs_multi_literal_search().
*
* There is no size limit.
*
* @param expression
* The array of expressions to parse. Note that the strings must represent
* ONLY the patterns to be matched, with no delimiters. Null characters are
* accepted as part of the expression. The expression id in
* @ref match_event_handler will match the order of the expression given
* here (ie: expression[0] will be id 0).
*
* @param pattern_count
* The number of expressions in the @p expression array.
*
* @param expression_length
* The array of length of each expression in the @p expression array.
* Expressed in bytes.
*
* @param output_database
* Returns pointer to buffer containing @ref
* hs_multi_literal_compiled_pattern_t. The buffer must be freed with
* @ref hs_free_multi_literal_pattern.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR otherwise.
*/
hs_error_t HS_CDECL hs_compile_multi_literal_search(
const char **expression, size_t pattern_count,
const size_t *expression_length,
hs_multi_literal_compiled_pattern_t **output_database);
/**
* Free a multi literal pattern.
*
* @param database
* The @ref hs_multi_literal_compiled_pattern_t pointer to be freed.
*/
void HS_CDECL
hs_free_multi_literal_pattern(hs_multi_literal_compiled_pattern_t *database);
/**
* Compiles a single character used in @ref hs_single_char_search().
*
* @param character
* The single character to be searched. It is case sensitive.
*
* @param output_database
* Returns pointer to buffer containing @ref
* hs_single_char_compiled_pattern_t. The buffer must be freed with
* @ref hs_free_single_char_pattern.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR otherwise.
*/
hs_error_t HS_CDECL hs_compile_single_char_search(
const char character, hs_single_char_compiled_pattern_t **output_database);
/**
* Free a single char pattern
* @param database
* The @ref hs_single_char_compiled_pattern_t pointer to be freed.
*/
void HS_CDECL
hs_free_single_char_pattern(hs_single_char_compiled_pattern_t *database);
/**
* Compiles a set of characters used in @ref hs_char_set_search().
*
* @param character_array
* The string or character array containing all the characters in the set.
* It is case sensitive. Null terminator is optional.
*
* @param character_count
* The number of characters in @p character_array
*
* @param output_database
* Returns pointer to buffer containing @ref
* hs_char_set_compiled_pattern_t. The buffer must be freed with
* @ref hs_free_char_set_pattern.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR otherwise.
*/
hs_error_t HS_CDECL hs_compile_char_set_search(
const char *character_array, size_t character_count,
hs_char_set_compiled_pattern_t **output_database);
/**
* Free a multi char pattern.
*
* @param database
* The @ref hs_char_set_compiled_pattern_t pointer to be freed.
*/
void HS_CDECL
hs_free_char_set_pattern(hs_char_set_compiled_pattern_t *database);
/**
* Compiles a pair of characters used in @ref hs_single_char_pair_search().
*
* NOTE: The character order matters in the pair. "Aj" won't match "jA"
*
* @param pair
* The string or character array containing the pair. Null terminator is
* optional.
*
* @param output_database
* Returns pointer to buffer containing @ref
* hs_single_char_pair_compiled_pattern_t. The buffer must be freed with
* @ref hs_free_single_char_pair_pattern.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR otherwise.
*/
hs_error_t HS_CDECL hs_compile_single_char_pair_search(
const char *pair, hs_single_char_pair_compiled_pattern_t **output_database);
/**
* Free a single char pair pattern.
*
* @param database
* The @ref hs_single_char_pair_compiled_pattern_t pointer to be freed.
*/
void HS_CDECL hs_free_single_char_pair_pattern(
hs_single_char_pair_compiled_pattern_t *database);
/**
* Compiles severals pairs used in @ref hs_char_pair_set_search().
*
* IMPORTANT: Compilation is only guaranteed for up to 8 pairs. If you search
* for more, internal compression may attempt to merge adjacent patterns
* (e.g., [ab, ac, ad] becomes a[bcd]) to reduce the total to 8 pairs. If the
* compression is insufficient, compilation will fail with
* @ref HS_COMPILER_ERROR. In such cases, use @ref multi_literal_search instead.
* The compression does not affect the match IDs returned by
* @ref hs_char_pair_set_search(). For example, a[bcd] will still report "ab" as
* ID 0, "ac" as ID 1, and "ad" as ID 2.
*
* NOTE: The character order matters in the pair. "Aj" won't match "jA"
*
* @param expression
* The concatenation of all pairs to be parsed. If one want to search for
* "ab" or "Cd", then @p expression would be ['a','b','C','d']. Null
* terminator is ignored, use @ref pair_count to set the length.
*
* @param pair_count
* The number of characters pair in @p expression
*
* @param output_database
* Returns pointer to buffer containing @ref
* hs_char_pair_set_compiled_pattern_t. The buffer must be freed with
* @ref hs_free_char_pair_set_pattern.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR otherwise.
*/
hs_error_t HS_CDECL hs_compile_char_pair_set_search(
const char *expression, size_t pair_count,
hs_char_pair_set_compiled_pattern_t **output_database);
/**
* Free a multi char pairs pattern.
*
* @param database
* The @ref hs_char_pair_set_compiled_pattern_t pointer to be freed.
*/
void HS_CDECL
hs_free_char_pair_set_pattern(hs_char_pair_set_compiled_pattern_t *database);
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif

435
src/hs_direct_search.cpp Normal file
View File

@ -0,0 +1,435 @@
/*
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <string>
#include <cstring>
#include "hs_common.h"
#include "hs_runtime.h"
#include "hs_direct_search.h"
#include "hs_direct_search_types.h"
#include "scratch.h"
#include "util/arch.h" // CAN_USE_WIDE_TRUFFLE
#include "util/bitutils.h" // ctz64()
#include "util/simd_utils.h" // load128()
#include "util/supervector/supervector.hpp"
#include "fdr/fdr.h"
#include "hwlm/noodle_engine.h"
#include "nfa/shufti.h"
#include "nfa/truffle.h"
typedef typename SuperVector<VECTORSIZE>::comparemask_type vector_mask_type;
static_assert((uint64_t)CB_CONTINUE_MATCHING == HWLM_CONTINUE_MATCHING,
"CB_CONTINUE_MATCHING doesn't match HWLM_CONTINUE_MATCHING");
static_assert((uint64_t)CB_TERMINATE_MATCHING == HWLM_TERMINATE_MATCHING,
"CB_TERMINATE_MATCHING doesn't match HWLM_TERMINATE_MATCHING");
static inline hs_error_t hwlm_to_hs_error(const hwlm_error_t error) {
switch (error) {
case HWLM_SUCCESS:
return HS_SUCCESS;
case HWLM_TERMINATED:
return HS_SCAN_TERMINATED;
case HWLM_ERROR_UNKNOWN:
return HS_UNKNOWN_ERROR;
case HWLM_LITERAL_MAX_LEN:
return HS_COMPILER_ERROR;
default:
return HS_UNKNOWN_ERROR;
}
}
// convert the callback type of Noodle
hwlmcb_rv_t HS_CDECL noodle_to_hs_callback(size_t end, u32 id,
struct hs_scratch *scratch) {
struct noodle_context *storage = reinterpret_cast<struct noodle_context *>(
scratch->core_info.userContext);
// hwlm's end is the last char of the pattern, but hs's end is the first
// char after the pattern
size_t match_start = end + 1 - storage->pattern_length;
return (hwlmcb_rv_t)(scratch->core_info.userCallback(
id, match_start, end + 1, 0, storage->usr_context));
}
// Receive the FDR callback and perform the check for longer patterns (>8 char)
hwlmcb_rv_t HS_CDECL FDR_to_hs_callback(size_t end, u32 id,
struct hs_scratch *scratch) {
const struct FDR_cb_context *combined_ctx =
reinterpret_cast<struct FDR_cb_context *>(
scratch->core_info.userContext);
const FDR_pattern_storage *ps = combined_ctx->patterns;
size_t pattern_length = get_const_pattern_sizes(ps)[id];
size_t start_offset =
end + 1 - std::min(pattern_length, (size_t)HWLM_LITERAL_MAX_LEN);
if (pattern_length > HWLM_LITERAL_MAX_LEN) {
// long pattern for FDR, we need to confirm it.
const char *pattern = get_const_pattern_ptrs(ps)[id];
const char *buffer = combined_ctx->buffer;
size_t buffer_length = combined_ctx->buffer_length;
if (start_offset + pattern_length > buffer_length) {
// pattern too long for the remaining buffer, no match
return HWLM_CONTINUE_MATCHING;
}
const char *confirm_buffer_start =
buffer + start_offset + HWLM_LITERAL_MAX_LEN;
const char *confirm_pattern_start = pattern + HWLM_LITERAL_MAX_LEN;
size_t confirm_len = pattern_length - HWLM_LITERAL_MAX_LEN;
if (confirm_len >= VECTORSIZE) {
while (confirm_len > VECTORSIZE) {
SuperVector<VECTORSIZE> buffer_vector =
SuperVector<VECTORSIZE>::loadu(confirm_buffer_start);
SuperVector<VECTORSIZE> pattern_vector =
SuperVector<VECTORSIZE>::loadu(confirm_pattern_start);
vector_mask_type mask = buffer_vector.eqmask(pattern_vector);
if(~mask)
// don't match the pattern, continue searching
return HWLM_CONTINUE_MATCHING;
confirm_buffer_start += VECTORSIZE;
confirm_pattern_start += VECTORSIZE;
confirm_len -= VECTORSIZE;
}
// unaligned load: we cannot risk loading any extra byte, so we run
// the vector one last time with an offset to overlap the previous
// check, but avoid overflowing.
size_t overlap = VECTORSIZE - confirm_len;
SuperVector<VECTORSIZE> buffer_vector =
SuperVector<VECTORSIZE>::loadu(confirm_buffer_start - overlap);
SuperVector<VECTORSIZE> pattern_vector =
SuperVector<VECTORSIZE>::loadu(confirm_pattern_start - overlap);
vector_mask_type mask = buffer_vector.eqmask(pattern_vector);
if(~mask)
// don't match the pattern, continue searching
return HWLM_CONTINUE_MATCHING;
} else {
size_t confirm_64 = confirm_len / 8;
for (size_t i = 0; i < confirm_64; i++) {
if ((reinterpret_cast<const uint64_t *>(confirm_buffer_start))[i] !=
(reinterpret_cast<const uint64_t *>(confirm_pattern_start))[i])
// don't match the pattern, continue searching
return HWLM_CONTINUE_MATCHING;
}
confirm_len = confirm_len % 8;
for (size_t i = 0; i < confirm_len; i++) {
if (confirm_buffer_start[i] != confirm_pattern_start[i])
// don't match the pattern, continue searching
return HWLM_CONTINUE_MATCHING;
}
}
// we have a valid match. Call the user callback
return (hwlmcb_rv_t)(scratch->core_info.userCallback(
id, start_offset, start_offset + pattern_length, 0,
combined_ctx->usr_context));
} else {
// short pattern, no confirmation needed
return (hwlmcb_rv_t)(scratch->core_info.userCallback(
id, start_offset, end + 1, 0, combined_ctx->usr_context));
}
}
// --- short_literal (Noodle) ---
HS_PUBLIC_API
hs_error_t HS_CDECL hs_short_literal_search(
const hs_short_literal_compiled_pattern *database, const char *data,
size_t length, match_event_handler onEvent, void *context) {
assert(onEvent != nullptr &&
"hs_short_literal_search called with nullptr callback");
assert(data != nullptr &&
"hs_short_literal_search called with nullptr buffer");
assert(database != nullptr &&
"hs_short_literal_search called with nullptr database");
struct noodle_context storage;
storage.usr_context = context;
storage.pattern_length = database->pattern_length;
struct hs_scratch scratch;
scratch.core_info.userContext = &storage;
scratch.core_info.userCallback = onEvent;
hwlm_error_t error = noodExec(&(database->noodle_database),
reinterpret_cast<const uint8_t *>(data),
length, 0, noodle_to_hs_callback, &scratch);
return hwlm_to_hs_error(error);
}
// --- long_literal (FDR) ---
HS_PUBLIC_API
hs_error_t HS_CDECL hs_long_literal_search(
const hs_long_literal_compiled_pattern_t *database, const char *data,
size_t length, match_event_handler onEvent,
void *context) {
assert(onEvent != nullptr &&
"hs_long_literal_search called with nullptr callback");
assert(data != nullptr &&
"hs_long_literal_search called with nullptr buffer");
assert(database != nullptr &&
"hs_long_literal_search called with nullptr database");
struct hs_scratch scratch;
struct FDR_cb_context combined_ctx = {
context, database->fdr_database.patterns, data, length};
scratch.core_info.userContext = &combined_ctx;
scratch.core_info.userCallback = onEvent;
scratch.fdr_conf = nullptr;
hwlm_error_t error =
fdrExec(database->fdr_database.database,
reinterpret_cast<const uint8_t *>(data), length, 0,
FDR_to_hs_callback, &scratch, HWLM_ALL_GROUPS);
return hwlm_to_hs_error(error);
}
// --- multi_literal (FDR) ---
HS_PUBLIC_API
hs_error_t HS_CDECL hs_multi_literal_search(
const hs_multi_literal_compiled_pattern_t *database, const char *data,
size_t length, match_event_handler onEvent, void *context) {
assert(onEvent != nullptr &&
"hs_multi_literal_search called with nullptr callback");
assert(data != nullptr &&
"hs_multi_literal_search called with nullptr buffer");
assert(database != nullptr &&
"hs_multi_literal_search called with nullptr database");
struct hs_scratch scratch;
struct FDR_cb_context combined_ctx = {
context, database->fdr_database.patterns, data, length};
scratch.core_info.userContext = &combined_ctx;
scratch.core_info.userCallback = onEvent;
scratch.fdr_conf = nullptr;
hwlm_error_t error =
fdrExec(database->fdr_database.database,
reinterpret_cast<const uint8_t *>(data), length, 0,
FDR_to_hs_callback, &scratch, HWLM_ALL_GROUPS);
return hwlm_to_hs_error(error);
}
// --- single_char (Noodle) ---
HS_PUBLIC_API
hs_error_t HS_CDECL hs_single_char_search(
const hs_single_char_compiled_pattern *database, const char *data,
size_t length, match_event_handler onEvent, void *context) {
assert(onEvent != nullptr &&
"hs_single_char_search called with nullptr callback");
assert(data != nullptr &&
"hs_single_char_search called with nullptr buffer");
assert(database != nullptr &&
"hs_single_char_search called with nullptr database");
struct noodle_context storage;
storage.usr_context = context;
storage.pattern_length = 1;
struct hs_scratch scratch;
scratch.core_info.userContext = &storage;
scratch.core_info.userCallback = onEvent;
hwlm_error_t error = noodExec(&(database->noodle_database),
reinterpret_cast<const uint8_t *>(data),
length, 0, noodle_to_hs_callback, &scratch);
return hwlm_to_hs_error(error);
}
// --- char_set (Truffle) ---
HS_PUBLIC_API
hs_error_t HS_CDECL hs_char_set_search(
const hs_char_set_compiled_pattern *database, const char *data,
size_t length, match_event_handler onEvent, void *context) {
assert(onEvent != nullptr &&
"hs_char_set_search called with nullptr callback");
assert(data != nullptr &&
"hs_char_set_search called with nullptr buffer");
assert(database != nullptr &&
"hs_char_set_search called with nullptr database");
const u8 *current_buf = reinterpret_cast<const u8*>(data);
// buf_end must be the first char past the buffer, so current_buf==buf_end
// means current_buf is empty.
const u8 *buf_end = reinterpret_cast<const u8*>(data) + length;
while(current_buf < buf_end) {
const u8 *current_match;
#ifdef CAN_USE_WIDE_TRUFFLE
current_match = truffleExecWide(
loadu256(database->wide_mask), current_buf, buf_end);
#else
current_match = truffleExec(load128(database->mask1),
load128(database->mask2),
current_buf, buf_end);
#endif
// current_match is the pointer to the matching char, NOT past the
// matching char. or buf_end if no match.
if(current_match < buf_end) {
size_t id = database->char_id_map[*current_match];
size_t match_start =
current_match - reinterpret_cast<const u8 *>(data);
if( ! onEvent(id, match_start, match_start + 1, 0, context)) {
// user requested to stop matching
break;
}
}
current_buf = current_match + 1;
}
return HS_SUCCESS;
}
// --- single_char_pair (Noodle) ---
HS_PUBLIC_API
hs_error_t HS_CDECL hs_single_char_pair_search(
const hs_single_char_pair_compiled_pattern *database,
const char *data, size_t length, match_event_handler onEvent,
void *context) {
assert(onEvent != nullptr &&
"hs_single_char_pair_search called with nullptr callback");
assert(data != nullptr &&
"hs_single_char_pair_search called with nullptr buffer");
assert(database != nullptr &&
"hs_single_char_pair_search called with nullptr database");
struct noodle_context storage;
storage.usr_context = context;
storage.pattern_length = 2;
struct hs_scratch scratch;
scratch.core_info.userContext = &storage;
scratch.core_info.userCallback = onEvent;
hwlm_error_t error = noodExec(&(database->noodle_database),
reinterpret_cast<const uint8_t *>(data),
length, 0, noodle_to_hs_callback, &scratch);
return hwlm_to_hs_error(error);
}
// --- char_pair_set (Double shufti) ---
HS_PUBLIC_API
hs_error_t HS_CDECL hs_char_pair_set_search(
const hs_char_pair_set_compiled_pattern *database, const char *data,
size_t length, match_event_handler onEvent, void *context) {
assert(onEvent != nullptr &&
"hs_char_pair_set_search called with nullptr callback");
assert(data != nullptr &&
"hs_char_pair_set_search called with nullptr buffer");
assert(database != nullptr &&
"hs_char_pair_set_search called with nullptr database");
const u8 *current_buf = reinterpret_cast<const u8*>(data);
// buf_end must be the first char past the buffer, so current_buf==buf_end
// means current_buf is empty.
const u8 *buf_end = reinterpret_cast<const u8*>(data) + length;
while(current_buf < buf_end) {
const u8 *current_match;
current_match = shuftiDoubleExec(
load128(database->dshufti_database.mask1),
load128(database->dshufti_database.mask2),
load128(database->dshufti_database.mask3),
load128(database->dshufti_database.mask4), current_buf, buf_end);
// current_match is the pointer to the matching char, NOT past the
// matching char. or buf_end if no match.
if (current_match < buf_end) {
// Shufti doesn't return which pair matched so we have to find out.
// Use a 16 bits vector search on the original pattern string,
// then return the <first match>/2 as ID.
SuperVector<VECTORSIZE> found_pair = SuperVector<VECTORSIZE>(
*reinterpret_cast<const u16 *>(current_match));
size_t width = SuperVector<VECTORSIZE>::mask_width();
SuperVector<VECTORSIZE> all_pair;
vector_mask_type mask;
vector_mask_type merged_mask;
size_t loop = 0;
size_t vector_match_iterations_needed =
((database->dshufti_database.pair_count - 1) /
(VECTORSIZE / 2));
for (; loop <= vector_match_iterations_needed; loop++) {
all_pair = SuperVector<VECTORSIZE>::load(
database->dshufti_database.all_pairs + (VECTORSIZE * loop));
// It is fine if the vector isn't filled as we are guaranteed to
// have a match before reaching the garbage data
mask = all_pair.eqmask(found_pair);
// now we have <width> bit set to 1 when a char match.
// first we merge the lane result to keep only consecutive
// matches
merged_mask = mask & (mask >> width);
// Then we filter to keep only a single bit per lane, and only
// every other lane
merged_mask =
merged_mask & database->dshufti_database.bit_filter_mask;
if (merged_mask)
break;
}
// And finaly we can ctz to get the first pair that match
unsigned int id =
(ctz64(merged_mask) / width / 2) + (loop * (VECTORSIZE / 2));
size_t match_start = current_match - reinterpret_cast<const u8*>(data);
if (!onEvent(id, match_start, match_start + 2, 0, context)) {
// user requested to stop matching
break;
}
}
current_buf = current_match + 1;
}
return HS_SUCCESS;
}

207
src/hs_direct_search.h Normal file
View File

@ -0,0 +1,207 @@
/*
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DIRECT_SEARCH_H
#define DIRECT_SEARCH_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stddef.h>
#include <string.h>
#include <stdint.h>
#include "allocator.h"
#include "fdr/fdr_internal.h"
#include "util/arch.h"
/*
* FDR_pattern_storage memory layout:
*
* |-------------------------------------------------|
* | size_t pattern_count |
* |------------------------|------------------------|
* | pattern_raw_storage : char* pattern_ptrs[] |
* | :------------------------|
* | : size_t pattern_sizes[] |
* | :------------------------|
* | : char actual_storage[] |
* |------------------------|------------------------|
*
* Use size_fdr_pattern() to get the size to allocate.
*/
struct FDR_pattern_storage {
size_t pattern_count;
char pattern_raw_storage[];
};
static inline char **get_pattern_ptrs(struct FDR_pattern_storage *pat) {
// cppcheck-suppress cstyleCast
return (char **)((char *)pat +
offsetof(struct FDR_pattern_storage, pattern_raw_storage));
}
static inline char *const *
get_const_pattern_ptrs(const struct FDR_pattern_storage *pat) {
// cppcheck-suppress cstyleCast
return (char *const *)((const char *)pat +
offsetof(struct FDR_pattern_storage,
pattern_raw_storage));
}
static inline size_t *get_pattern_sizes(struct FDR_pattern_storage *pat) {
// cppcheck-suppress cstyleCast
return (size_t *)((char *)get_pattern_ptrs(pat) +
pat->pattern_count * sizeof(char *));
}
static inline const size_t *
get_const_pattern_sizes(const struct FDR_pattern_storage *pat) {
// cppcheck-suppress cstyleCast
return (const size_t *)((const char *)get_const_pattern_ptrs(pat) +
pat->pattern_count * sizeof(char *));
}
static inline char *
get_pattern_string_storage(struct FDR_pattern_storage *pat) {
return (char *)get_pattern_sizes(pat) + pat->pattern_count * sizeof(size_t);
}
static inline const char *
get_const_pattern_string_storage(const struct FDR_pattern_storage *pat) {
return (const char *)get_const_pattern_sizes(pat) +
pat->pattern_count * sizeof(size_t);
}
static
void init_pattern_store(struct FDR_pattern_storage *storage,
const char **in_expression, size_t in_pattern_count,
const size_t *in_expression_length) {
storage->pattern_count = in_pattern_count;
memcpy(get_pattern_sizes(storage), in_expression_length,
storage->pattern_count);
char *next_string = get_pattern_string_storage(storage);
for (size_t i = 0; i < storage->pattern_count; i++) {
memcpy(next_string, in_expression[i], in_expression_length[i]);
get_pattern_ptrs(storage)[i] = next_string;
get_pattern_sizes(storage)[i] = in_expression_length[i];
next_string += in_expression_length[i];
}
}
static inline
void init_pattern_store_single(struct FDR_pattern_storage *storage,
const char *in_expression,
const size_t in_expression_length) {
init_pattern_store(storage, &in_expression, 1, &in_expression_length);
}
static
size_t size_fdr_pattern(size_t in_pattern_count,
const size_t *in_expression_length) {
size_t total_string_size = 0;
for (size_t i = 0; i < in_pattern_count; i++) {
total_string_size += in_expression_length[i];
}
size_t ptr_array_size = in_pattern_count * sizeof(char *);
size_t pattern_sizes_array_size = in_pattern_count * sizeof(size_t);
size_t required_mem = sizeof(struct FDR_pattern_storage) + ptr_array_size +
pattern_sizes_array_size + total_string_size;
return required_mem;
}
/*
* combined_fdr_database memory layout:
*
* |-------------------------------------------------|
* | FDR *database |
* |-------------------------------------------------|
* | FDR_pattern_storage *patterns |
* |------------------------|------------------------|
* | raw_storage : FDR fdr_storage |
* | :------------------------|
* | : FDR_pattern_storage |
* |------------------------|------------------------|
*
* Use size_fdr_database() to get the size to allocate.
*/
struct combined_fdr_database {
struct FDR *database;
struct FDR_pattern_storage *patterns;
unsigned char raw_storage[];
};
void init_combined_fdr_database(struct combined_fdr_database *database,
size_t fdr_size, const char **in_expression,
size_t in_pattern_count,
const size_t *in_expression_length);
void init_combined_fdr_database_single(struct combined_fdr_database *database,
size_t fdr_size,
const char *in_expression,
const size_t in_expression_length);
static inline
size_t size_fdr_database(size_t fdr_size, size_t in_pattern_count,
const size_t *in_expression_length) {
return sizeof(struct combined_fdr_database) +
size_fdr_pattern(in_pattern_count, in_expression_length) + fdr_size;
}
static inline
size_t size_fdr_database_single(size_t fdr_size,
const size_t in_expression_length) {
return size_fdr_database(fdr_size, 1, &in_expression_length);
}
hwlmcb_rv_t HS_CDECL noodle_to_hs_callback(size_t end, u32 id,
struct hs_scratch *scratch);
// Receive the FDR callback and perform the check for longer patterns (>8 char)
hwlmcb_rv_t HS_CDECL FDR_to_hs_callback(size_t end, u32 id,
struct hs_scratch *scratch);
struct FDR_cb_context {
void *usr_context;
const struct FDR_pattern_storage *patterns;
const char *buffer;
size_t buffer_length;
};
struct noodle_context {
void *usr_context;
u8 pattern_length;
};
#ifdef __cplusplus
} // extern "C"
#endif
#endif // DIRECT_SEARCH_H

View File

@ -0,0 +1,495 @@
/*
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <string>
#include <cstring>
#include "hs_common.h"
#include "hs_compile.h"
#include "hs_direct_search.h"
#include "hs_direct_search_types.h"
#include "allocator.h" // hs_database_alloc()
#include "grey.h"
#include "hwlm/hwlm.h" // HWLM_LITERAL_MAX_LEN
#include "hwlm/hwlm_internal.h" // HWLM_ENGINE_FDR
#include "hwlm/hwlm_literal.h" // ue2::hwlmLiteral
#include "hwlm/noodle_internal.h" // noodTable
#include "ue2common.h" // likely() - unlikely()
#include "util/arch.h" // CAN_USE_WIDE_TRUFFLE
#include "util/bytecode_ptr.h"
#include "util/charreach.h"
#include "util/flat_containers.h" // flat_set
#include "util/supervector/supervector.hpp"
#include "util/target_info.h" // target_t
#include "fdr/fdr_compile.h"
#include "hwlm/noodle_build.h"
#include "nfa/shufticompile.h"
#include "nfa/trufflecompile.h"
typedef typename SuperVector<VECTORSIZE>::comparemask_type vector_mask_type;
void init_combined_fdr_database(struct combined_fdr_database *database,
size_t fdr_size, const char **in_expression,
size_t in_pattern_count,
const size_t *in_expression_length) {
database->database = reinterpret_cast<FDR *>(database->raw_storage);
database->patterns = reinterpret_cast<FDR_pattern_storage *>(
database->raw_storage + fdr_size);
init_pattern_store(database->patterns, in_expression, in_pattern_count,
in_expression_length);
};
void init_combined_fdr_database_single(struct combined_fdr_database *database,
size_t fdr_size,
const char *in_expression,
const size_t in_expression_length) {
database->database = reinterpret_cast<FDR *>(database->raw_storage);
database->patterns = reinterpret_cast<FDR_pattern_storage *>(
database->raw_storage + fdr_size);
init_pattern_store_single(database->patterns, in_expression,
in_expression_length);
};
inline void generic_free(void *database) {
if (likely(database)) {
hs_database_free(database);
}
}
// --- short_literal (Noodle) ---
HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile_short_literal_search(
const char *expression, size_t expression_length,
hs_short_literal_compiled_pattern **output_database) {
assert(expression_length > 0 &&
"hs_compile_short_literal_search called with an empty pattern");
assert(expression != nullptr &&
"hs_compile_short_literal_search called with nullptr");
assert(output_database != nullptr &&
"hs_compile_short_literal_search called with nullptr");
if (unlikely(expression_length > HS_SHORT_PATTERN_THRESHOLD)) {
return HS_INVALID;
}
/*
* Exposing caseness at the api level may restrict our ability to change
* the backing algorithm, so we decided to make all algo case sensitive
*/
bool is_case_insensitive = false;
bool only_need_first_match = false;
ue2::hwlmLiteral lit(std::string(expression, expression_length),
is_case_insensitive, only_need_first_match, 0,
HWLM_ALL_GROUPS, {}, {});
hs_short_literal_compiled_pattern *database =
reinterpret_cast<hs_short_literal_compiled_pattern *>(hs_database_alloc(
sizeof(hs_short_literal_compiled_pattern)));
if (unlikely(database == nullptr)) {
return HS_NOMEM;
}
ue2::bytecode_ptr<noodTable> bytecode_database = ue2::noodBuildTable(lit);
if (unlikely(bytecode_database.get() == nullptr)) {
return HS_UNKNOWN_ERROR;
}
database->pattern_length = expression_length;
memcpy(&(database->noodle_database), bytecode_database.get(),
sizeof(noodTable));
*output_database = database;
return HS_SUCCESS;
}
HS_PUBLIC_API
void hs_free_short_literal_pattern(
hs_short_literal_compiled_pattern *database) {
generic_free(database);
}
// --- long_literal (FDR) ---
HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile_long_literal_search(
const char *expression, size_t expression_length,
hs_long_literal_compiled_pattern_t **output_database) {
assert(expression_length > 0 &&
"hs_compile_long_literal_search called with an empty pattern");
assert(expression != nullptr &&
"hs_compile_long_literal_search called with nullptr");
assert(output_database != nullptr &&
"hs_compile_long_literal_search called with nullptr");
/*
* Exposing caseness at the api level may restrict our ability to change
* the backing algorithm, so we decided to make all algo case sensitive
*/
bool is_case_insensitive = false;
bool only_need_first_match = false;
std::vector<ue2::hwlmLiteral> lits;
// longer strings are checked in the callback
ue2::hwlmLiteral lit(
std::string(expression,
std::min(expression_length, (size_t)HWLM_LITERAL_MAX_LEN)),
is_case_insensitive, only_need_first_match, 0, HWLM_ALL_GROUPS, {}, {});
lits.push_back(lit);
ue2::Grey g = ue2::Grey();
u8 engType = HWLM_ENGINE_FDR;
bool make_small = false;
hs_platform_info platform_info;
hs_populate_platform(&platform_info);
ue2::target_t target = ue2::target_t(platform_info);
std::unique_ptr<ue2::HWLMProto> proto =
ue2::fdrBuildProto(engType, lits, make_small, target, g);
ue2::bytecode_ptr<FDR> bytecode_database = ue2::fdrBuildTable(*proto, g);
if (unlikely(bytecode_database.get() == nullptr)) {
return HS_UNKNOWN_ERROR;
}
size_t fdr_size = bytecode_database.get()->size;
size_t mem_required = size_fdr_database_single(fdr_size, expression_length);
struct combined_fdr_database *combined_database =
reinterpret_cast<struct combined_fdr_database *>(
hs_database_alloc(mem_required));
if (unlikely(combined_database == nullptr)) {
return HS_NOMEM;
}
init_combined_fdr_database_single(combined_database, fdr_size, expression,
expression_length);
memcpy(combined_database->database, bytecode_database.get(), fdr_size);
*output_database = reinterpret_cast<hs_long_literal_compiled_pattern_t *>(
combined_database);
return HS_SUCCESS;
}
HS_PUBLIC_API
void hs_free_long_literal_pattern(
hs_long_literal_compiled_pattern_t *database) {
generic_free(database);
}
// --- multi_literal (FDR) ---
HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile_multi_literal_search(
const char **expression, size_t pattern_count,
const size_t *expression_length,
hs_multi_literal_compiled_pattern_t **output_database) {
assert(pattern_count > 0 &&
"hs_compile_multi_literal_search called with no pattern");
assert(expression != nullptr &&
"hs_compile_multi_literal_search called with nullptr");
assert(expression_length != nullptr &&
"hs_compile_multi_literal_search called with nullptr");
assert(output_database != nullptr &&
"hs_compile_multi_literal_search called with nullptr");
/*
* Exposing caseness at the api level may restrict our ability to change
* the backing algorithm, so we decided to make all algo case sensitive
*/
bool is_case_insensitive = false;
bool only_need_first_match = false;
std::vector<ue2::hwlmLiteral> lits;
for (size_t i = 0; i < pattern_count; i++) {
assert(expression_length[i] > 0 && expression[i] &&
"hs_compile_multi_literal_search called with an empty pattern");
// longer strings are checked in the callback
ue2::hwlmLiteral lit(
std::string(expression[i], std::min(expression_length[i],
(size_t)HWLM_LITERAL_MAX_LEN)),
is_case_insensitive, only_need_first_match, i, HWLM_ALL_GROUPS, {},
{});
lits.push_back(lit);
}
ue2::Grey g = ue2::Grey();
u8 engType = HWLM_ENGINE_FDR;
bool make_small = false;
hs_platform_info platform_info;
hs_populate_platform(&platform_info);
ue2::target_t target = ue2::target_t(platform_info);
std::unique_ptr<ue2::HWLMProto> proto =
ue2::fdrBuildProto(engType, lits, make_small, target, g);
ue2::bytecode_ptr<FDR> bytecode_database = ue2::fdrBuildTable(*proto, g);
if (unlikely(bytecode_database.get() == nullptr)) {
return HS_UNKNOWN_ERROR;
}
size_t fdr_size = bytecode_database.get()->size;
size_t mem_required =
size_fdr_database(fdr_size, pattern_count, expression_length);
struct combined_fdr_database *combined_database =
reinterpret_cast<struct combined_fdr_database *>(
hs_database_alloc(mem_required));
if (unlikely(combined_database == nullptr)) {
return HS_NOMEM;
}
init_combined_fdr_database(combined_database, fdr_size, expression,
pattern_count, expression_length);
memcpy(combined_database->database, bytecode_database.get(), fdr_size);
*output_database = reinterpret_cast<hs_multi_literal_compiled_pattern_t *>(
combined_database);
return HS_SUCCESS;
}
HS_PUBLIC_API
void hs_free_multi_literal_pattern(
hs_multi_literal_compiled_pattern_t *database) {
generic_free(database);
}
// --- single_char (Noodle) ---
HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile_single_char_search(
const char character, hs_single_char_compiled_pattern **output_database) {
assert(output_database != nullptr &&
"hs_compile_single_char_search called with nullptr");
/*
* Exposing caseness at the api level may restrict our ability to change
* the backing algorithm, so we decided to make all algo case sensitive
*/
bool is_case_insensitive = false;
bool only_need_first_match = false;
ue2::hwlmLiteral lit(std::string(&character, 1), is_case_insensitive,
only_need_first_match, 0, HWLM_ALL_GROUPS, {}, {});
hs_single_char_compiled_pattern *database =
reinterpret_cast<hs_single_char_compiled_pattern *>(hs_database_alloc(
sizeof(hs_single_char_compiled_pattern)));
if (unlikely(database == nullptr)) {
return HS_NOMEM;
}
ue2::bytecode_ptr<noodTable> bytecode_database = ue2::noodBuildTable(lit);
if (unlikely(bytecode_database.get() == nullptr)) {
return HS_UNKNOWN_ERROR;
}
memcpy(&(database->noodle_database), bytecode_database.get(),
sizeof(noodTable));
*output_database = database;
return HS_SUCCESS;
}
HS_PUBLIC_API
void hs_free_single_char_pattern(
hs_single_char_compiled_pattern *database) {
generic_free(database);
}
// --- char_set (Truffle) ---
HS_PUBLIC_API
hs_error_t HS_CDECL
hs_compile_char_set_search(const char *character_array, size_t character_count,
hs_char_set_compiled_pattern **output_database) {
assert(character_count > 0 &&
"hs_compile_char_set_search called with an empty set");
assert(character_array != nullptr &&
"hs_compile_char_set_search called with nullptr");
assert(output_database != nullptr &&
"hs_compile_char_set_search called with nullptr");
const ue2::CharReach cr =
ue2::CharReach(std::string(character_array, character_count));
truffle_storage *database = reinterpret_cast<truffle_storage *>(
hs_database_alloc(sizeof(truffle_storage)));
// hs_database_alloc is meant to align to a machine word (likely 64b), which
// is actually required here
assert((((intptr_t)(database) & 3) == 0) &&
"user-provided alloc didn't meet alignment requirement in "
"hs_compile_char_set_search");
for (u8 i = 0; i < character_count; i++) {
database->char_id_map[(u8)character_array[i]] = i;
}
#ifdef CAN_USE_WIDE_TRUFFLE
ue2::truffleBuildMasksWide(cr, database->wide_mask);
#else
ue2::truffleBuildMasks(cr, database->mask1,
database->mask2);
#endif
*output_database = database;
return HS_SUCCESS;
}
HS_PUBLIC_API
void hs_free_char_set_pattern(hs_char_set_compiled_pattern *database) {
generic_free(database);
}
// --- single_char_pair (Noodle) ---
HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile_single_char_pair_search(
const char *pair, hs_single_char_pair_compiled_pattern **output_database) {
assert(pair != nullptr &&
"hs_compile_single_char_pair_search called with nullptr");
assert(output_database != nullptr &&
"hs_compile_single_char_pair_search called with nullptr");
/*
* Exposing caseness at the api level may restrict our ability to change
* the backing algorithm, so we decided to make all algo case sensitive
*/
bool is_case_insensitive = false;
bool only_need_first_match = false;
ue2::hwlmLiteral lit(std::string(pair, 2), is_case_insensitive,
only_need_first_match, 0, HWLM_ALL_GROUPS, {}, {});
hs_single_char_pair_compiled_pattern *database =
reinterpret_cast<hs_single_char_pair_compiled_pattern *>(
hs_database_alloc(sizeof(hs_single_char_pair_compiled_pattern)));
if (unlikely(database == nullptr)) {
return HS_NOMEM;
}
ue2::bytecode_ptr<noodTable> bytecode_database = ue2::noodBuildTable(lit);
if (unlikely(bytecode_database.get() == nullptr)) {
return HS_UNKNOWN_ERROR;
}
memcpy(&(database->noodle_database), bytecode_database.get(),
sizeof(noodTable));
*output_database = database;
return HS_SUCCESS;
}
HS_PUBLIC_API
void hs_free_single_char_pair_pattern(
hs_single_char_pair_compiled_pattern *database) {
generic_free(database);
}
// --- char_pair_set (Double shufti) ---
HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile_char_pair_set_search(
const char *expression, size_t pair_count,
hs_char_pair_set_compiled_pattern **output_database) {
assert(pair_count > 0 &&
"hs_compile_char_pair_set_search called with an empty set");
assert(expression != nullptr &&
"hs_compile_char_pair_set_search called with nullptr");
assert(output_database != nullptr &&
"hs_compile_char_pair_set_search called with nullptr");
ue2::flat_set<std::pair<u8, u8>> pairs;
for (u8 i = 0; i < pair_count; i++) {
pairs.insert(
std::make_pair((u8)expression[2 * i], (u8)expression[2 * i + 1]));
}
hs_char_pair_set_compiled_pattern *database =
reinterpret_cast<hs_char_pair_set_compiled_pattern *>(hs_database_alloc(
sizeof(hs_char_pair_set_compiled_pattern) +
sizeof(char) * 2 * pair_count));
// hs_database_alloc is meant to align to a machine word (likely 64b), which
// is actually required here
assert((((intptr_t)(database) & 3) == 0) &&
"user-provided alloc didn't meet alignment requirement in "
"hs_compile_char_pair_set_search");
bool success = ue2::shuftiBuildDoubleMasks(
ue2::CharReach(), pairs, database->dshufti_database.mask1,
database->dshufti_database.mask2, database->dshufti_database.mask3,
database->dshufti_database.mask4);
if (!success) {
return HS_COMPILER_ERROR;
}
database->dshufti_database.pair_count = pair_count;
size_t width = SuperVector<VECTORSIZE>::mask_width();
assert(width <= 4 &&
"Code needs rework if supervector's mask are bigger than 4");
assert(width != 3 &&
"Code needs rework if supervector's mask aren't a power of 2");
// we need a mask such that every 2*width bits, only the lsb is set to 1
// so for a width of 4, we repeat 0X01
unsigned char bit_filter_mask = 0;
for (size_t i = 8; i > 0; i -= 2 * width) {
bit_filter_mask = bit_filter_mask << (2 * width) | 0x1;
}
memset(&(database->dshufti_database.bit_filter_mask), bit_filter_mask,
sizeof(vector_mask_type));
memcpy(database->dshufti_database.all_pairs, expression, 2 * pair_count);
*output_database = database;
return HS_SUCCESS;
}
HS_PUBLIC_API
void hs_free_char_pair_set_pattern(
hs_char_pair_set_compiled_pattern *database) {
generic_free(database);
}

View File

@ -0,0 +1,87 @@
/*
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DIRECT_SEARCH_TYPES_H
#define DIRECT_SEARCH_TYPES_H
#include <stdalign.h>
#include "util/supervector/supervector.hpp"
#include "fdr/fdr_internal.h"
#include "hwlm/noodle_internal.h"
struct hs_short_literal_compiled_pattern {
noodTable noodle_database;
u8 pattern_length;
};
struct hs_long_literal_compiled_pattern {
struct combined_fdr_database fdr_database;
};
struct hs_multi_literal_compiled_pattern {
struct combined_fdr_database fdr_database;
};
struct hs_single_char_compiled_pattern {
struct noodTable noodle_database;
};
struct hs_single_char_pair_compiled_pattern {
struct noodTable noodle_database;
};
typedef struct hs_char_set_compiled_pattern {
union
{
struct {
uint8_t mask1[16] __attribute__((aligned));
uint8_t mask2[16] __attribute__((aligned));
};
uint8_t wide_mask[32] __attribute__((aligned));
};
// allows us to get the id from the character
u8 char_id_map[256];
} truffle_storage;
struct dshufti_storage {
alignas(16) uint8_t mask1[16];
alignas(16) uint8_t mask2[16];
alignas(16) uint8_t mask3[16];
alignas(16) uint8_t mask4[16];
size_t pair_count;
typename SuperVector<VECTORSIZE>::comparemask_type bit_filter_mask;
alignas(VECTORSIZE) uint8_t all_pairs[];
};
struct hs_char_pair_set_compiled_pattern {
struct dshufti_storage dshufti_database;
};
#endif // DIRECT_SEARCH_TYPES_H

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2015-2018, Intel Corporation
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -614,6 +615,226 @@ hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch);
*/
#define HS_OFFSET_PAST_HORIZON (~0ULL)
/** @} */
/**
* The following functions are part of the extended API.
* This extension offers direct access to search algorithms
* allowing the user to minimise calling overhead for simple
* search use cases where type of the search is known.
*
* All search functions handle a limited kind of patterns. For more generic
* patterns, use @ref hs_scan()
*
* NOTE: All search functions are considered case-sensitive.
*/
/**
* @defgroup DIRECT_API_RUNTIME
*
* @{
*/
/** Callback return value indicating that we should continue matching. */
#define CB_CONTINUE_MATCHING (int)(~0U)
/** Callback return value indicating that we should halt matching. */
#define CB_TERMINATE_MATCHING (int)0
/**
* Search the given data for the short literal pattern up to
* @ref HS_SHORT_PATTERN_THRESHOLD chars long. For longer patterns, use @ref
* hs_long_literal_search(). Other options exists for character pairs or set.
*
* @param database
* The compiled pattern returned by @ref hs_compile_short_literal_search()
* @param data
* Pointer to the data to be scanned.
* @param length
* The number of bytes to scan.
* @param onEvent
* Pointer to a @ref match_event_handler callback function. If a NULL
* pointer is given, no matches will be returned.
* The "flag" argument is unused.
* @param context
* The user defined pointer which will be passed to the callback function.
*
* @return
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
* match callback indicated that scanning should stop; other values on
* error.
*/
hs_error_t HS_CDECL hs_short_literal_search(
const hs_short_literal_compiled_pattern_t *database, const char *data,
size_t length, match_event_handler onEvent,
void *context);
/**
* Search the given data for the long literal pattern.
*
* If the pattern length is less or equal to @ref HS_SHORT_PATTERN_THRESHOLD,
* @ref hs_short_literal_search() may be faster.
*
* @param database
* The compiled pattern returned by @ref hs_compile_long_literal_search()
* @param data
* Pointer to the data to be scanned.
* @param length
* The number of bytes to scan.
* @param onEvent
* Pointer to a @ref match_event_handler callback function. If a NULL
* pointer is given, no matches will be returned.
* The "flag" argument is unused.
* @param context
* The user defined pointer which will be passed to the callback function.
*
* @return
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
* match callback indicated that scanning should stop; other values on
* error.
*/
hs_error_t HS_CDECL hs_long_literal_search(
const hs_long_literal_compiled_pattern_t *database, const char *data,
size_t length, match_event_handler onEvent,
void *context);
/**
* Search the given data for several long literal patterns at once.
*
* @param database
* The compiled pattern returned by @ref hs_compile_multi_literal_search()
* @param data
* Pointer to the data to be scanned.
* @param length
* The number of bytes to scan.
* @param onEvent
* Pointer to a @ref match_event_handler callback function. If a NULL
* pointer is given, no matches will be returned.
* The "flag" argument is unused.
* The reported ID is the index of the matching literal.
* @param context
* The user defined pointer which will be passed to the callback function.
*
* @return
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
* match callback indicated that scanning should stop; other values on
* error.
*/
hs_error_t HS_CDECL hs_multi_literal_search(
const hs_multi_literal_compiled_pattern_t *database, const char *data,
size_t length, match_event_handler onEvent,
void *context);
/**
* Search the given data for any occurrence of the given character.
*
* @param database
* The compiled pattern returned by @ref hs_compile_single_char_search()
* @param data
* Pointer to the data to be scanned.
* @param length
* The number of bytes to scan.
* @param onEvent
* Pointer to a @ref match_event_handler callback function. If a NULL
* pointer is given, no matches will be returned.
* The "flag" argument is unused.
* @param context
* The user defined pointer which will be passed to the callback function.
*
* @return
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
* match callback indicated that scanning should stop; other values on
* error.
*/
hs_error_t HS_CDECL hs_single_char_search(
const hs_single_char_compiled_pattern_t *database, const char *data,
size_t length, match_event_handler onEvent,
void *context);
/**
* Search the given data for occurrences of any character from the given
* character set.
*
* @param database
* The compiled pattern returned by @ref hs_compile_char_set_search()
* @param data
* Pointer to the data to be scanned.
* @param length
* The number of bytes to scan.
* @param onEvent
* Pointer to a @ref match_event_handler callback function. If a NULL
* pointer is given, no matches will be returned.
* The "flag" argument is unused.
* The reported ID is the index of the matching char.
* @param context
* The user defined pointer which will be passed to the callback function.
*
* @return
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
* match callback indicated that scanning should stop; other values on
* error.
*/
hs_error_t HS_CDECL hs_char_set_search(
const hs_char_set_compiled_pattern_t *database, const char *data,
size_t length, match_event_handler onEvent,
void *context);
/**
* Search the given data for occurrences of the given ordered character pair
* ("Aj" won't match "jA").
*
* @param database
* The compiled pattern returned by @ref hs_compile_char_pair_search()
* @param data
* Pointer to the data to be scanned.
* @param length
* The number of bytes to scan.
* @param onEvent
* Pointer to a @ref match_event_handler callback function. If a NULL
* pointer is given, no matches will be returned.
* The "flag" argument is unused.
* @param context
* The user defined pointer which will be passed to the callback function.
*
* @return
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
* match callback indicated that scanning should stop; other values on
* error.
*/
hs_error_t HS_CDECL hs_single_char_pair_search(
const hs_single_char_pair_compiled_pattern_t *database, const char *data,
size_t length, match_event_handler onEvent,
void *context);
/**
* Search the given data for occurrences of any of the ordered character pair
* from the given set ("Aj" won't match "jA")
*
* @param database
* The compiled pattern returned by @ref
* hs_compile_char_pair_set_search()
* @param data
* Pointer to the data to be scanned.
* @param length
* The number of bytes to scan.
* @param onEvent
* Pointer to a @ref match_event_handler callback function. If a NULL
* pointer is given, no matches will be returned.
* The "flag" argument is unused.
* The reported ID is the index of the matching pair.
* @param context
* The user defined pointer which will be passed to the callback function.
*
* @return
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
* match callback indicated that scanning should stop; other values on
* error.
*/
hs_error_t HS_CDECL hs_char_pair_set_search(
const hs_char_pair_set_compiled_pattern_t *database, const char *data,
size_t length, match_event_handler onEvent,
void *context);
#ifdef __cplusplus
} /* extern "C" */
#endif

View File

@ -63,6 +63,21 @@ set(unit_hyperscan_SOURCES
add_executable(unit-hyperscan ${unit_hyperscan_SOURCES})
target_link_libraries(unit-hyperscan hs expressionutil)
set(unit_direct_api_SOURCES
${gtest_SOURCES}
direct_API/char_pair_set.cpp
direct_API/char_set.cpp
direct_API/common.h
direct_API/long_literal.cpp
direct_API/main.cpp
direct_API/multi_literal.cpp
direct_API/short_literal.cpp
direct_API/single_char_pair.cpp
direct_API/single_char.cpp
)
add_executable(unit-direct-API ${unit_direct_api_SOURCES})
target_link_libraries(unit-direct-API hs)
if (NOT FAT_RUNTIME AND BUILD_STATIC_LIBS)
set(BUILD_UNIT_INTERNAL TRUE)
set(unit_internal_SOURCES

View File

@ -0,0 +1,378 @@
/*
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "common.h"
#include "hwlm/noodle_internal.h"
#define COMPILE_CHAR_PAIR_SET(in_character_array, in_pair_count) \
const size_t pair_count = (in_pair_count); \
const char *character_array = (in_character_array); \
hs_char_pair_set_compiled_pattern_t *database = nullptr; \
hs_error_t compile_ret = hs_compile_char_pair_set_search( \
character_array, pair_count, &database); \
hs_error_t ret = 0; \
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
const char *buffer = nullptr; \
(void)buffer; \
context_t context = {}; \
(void) context;
#define SEARCH_CHAR_PAIR_SET(in_buffer, in_buffer_len, in_expected_match, \
in_expected_start_array, in_expected_id_array) \
{ \
buffer = (in_buffer); \
const size_t buffer_len = (in_buffer_len); \
const size_t expected_match = (in_expected_match); \
size_t expected_start_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_end_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_id_array[expected_match] = \
BRACED_INIT_LIST in_expected_id_array; \
for (size_t i = 0; i < expected_match; i++) { \
expected_end_array[i] += 2; \
} \
context.expected_start_array = expected_start_array; \
context.expected_end_array = expected_end_array; \
context.expected_id_array = expected_id_array; \
context.array_size = expected_match; \
context.number_matched = 0; \
context.number_wrong = 0; \
\
ret = hs_char_pair_set_search(database, buffer, buffer_len, callback, \
&context); \
}
// ------------------------free tests-------------------------------------------
/*
hs_free_char_pair_set_pattern
nullptr
general
*/
TEST(char_pair_set_free, nullptr) {
hs_char_pair_set_compiled_pattern_t *database = nullptr;
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_free, general) {
SETUP_MEM_LEAK_TEST();
noodTable *clear_database =
reinterpret_cast<noodTable *>(test_malloc(sizeof(noodTable)));
hs_char_pair_set_compiled_pattern_t *database =
reinterpret_cast<hs_char_pair_set_compiled_pattern_t*>(clear_database);
hs_free_char_pair_set_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
// ------------------------compile tests----------------------------------------
/*
hs_compile_char_pair_set_search
single pair
multiple pair
pair duplicate
valid pair including null char
empty char array
nullptr char array
nullptr output
*/
TEST(char_pair_set_compile, single_pair) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
(void) ret;
(void) buffer;
EXPECT_COMPILE_SUCCESS("test_compile_char_pair_set_single_pair");
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_compile, two_pairs) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 2);
(void) ret;
(void) buffer;
EXPECT_COMPILE_SUCCESS("test_compile_char_pair_set_two_pairs");
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_compile, duplicate) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_AB_DUPLICATE, 2);
(void) ret;
(void) buffer;
EXPECT_COMPILE_SUCCESS("test_compile_char_pair_set_duplicate");
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_compile, null_char) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_A_NULL_BC, 1);
(void) ret;
(void) buffer;
EXPECT_COMPILE_SUCCESS("test_compile_char_pair_set_null_char");
hs_free_char_pair_set_pattern(database);
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(char_pair_set_compile, no_expression) {
const size_t pair_count = 0;
const char *character_array = PAIR_SET_ABCD;
hs_char_pair_set_compiled_pattern_t *database = nullptr;
EXPECT_DEATH(hs_compile_char_pair_set_search(character_array, pair_count,
&database),
"called with an empty set");
}
TEST(char_pair_set_compile, nullptr_char_array) {
hs_char_pair_set_compiled_pattern_t *database = nullptr;
EXPECT_DEATH(
hs_compile_char_pair_set_search(nullptr, 1, &database),
"called with nullptr");
}
TEST(char_pair_set_compile, nullptr_database) {
const size_t pair_count = 2;
const char *character_array = PAIR_SET_ABCD;
EXPECT_DEATH(hs_compile_char_pair_set_search(character_array,
pair_count, nullptr),
"called with nullptr");
}
#endif
// ------------------------search tests-----------------------------------------
/*
hs_char_pair_set_search
general pattern
match at start
match middle (general)
match index 15 (cross over vector)
match at end
match past end
match null char
bad caseness
search several times
match a pair duplicate
match several pattern in the same search
match when there's more pairs than fit in a vector
buffer containing null char
pattern with null char
general pattern
buff size 0
nullptr pattern
nullptr buffer
nullptr callback
*/
TEST(char_pair_set_search, start) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_start");
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0), (0));
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_search, general) {
SETUP_MEM_LEAK_TEST();
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_general");
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
TEST(char_pair_set_search, cross_vector) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_cross_vector");
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0));
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_search, end) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_end");
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN, 1, (30), (0));
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_search, past_end) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_past_end");
// cppcheck-suppress unsignedLessThanZero
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 0, (), ());
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_search, null_char) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_A_NULL_BC, 1);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_null_char");
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5), (0));
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_search, bad_case) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_bad_case");
// cppcheck-suppress unsignedLessThanZero
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, (), ());
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_search, several_search) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_several_search");
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0));
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_search, duplicate) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_AB_DUPLICATE, 2);
ASSERT_COMPILE_SUCCESS("char_pair_set_search_duplicate");
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
EXPECT_SEARCH_SUCCESS("char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_search, match_multiple) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 2);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_match_multiple");
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 2, (5, 7), (0, 1));
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_search, last_of_long_pattern) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_LONG_PATTERN_AB, 9);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_last_of_long_pattern");
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (8));
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_search, null_char_buff_and_pattern) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_A_NULL_BC, 2);
ASSERT_COMPILE_SUCCESS(
"test_char_pair_set_search_null_char_buff_and_pattern");
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5), (0));
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_search, null_char_buff) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 2);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_null_char_buff");
// cppcheck-suppress unsignedLessThanZero
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, (), ());
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
}
TEST(char_pair_set_search, empty_buff) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_empty_buff");
// cppcheck-suppress unsignedLessThanZero
SEARCH_CHAR_PAIR_SET("", 0, 0, (), ());
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
hs_free_char_pair_set_pattern(database);
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(char_pair_set_search, nullptr_pattern) {
const hs_char_pair_set_compiled_pattern_t *database = nullptr;
context_t context;
EXPECT_DEATH(
{
const char *buffer;
hs_error_t ret;
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, (), ());
},
"called with nullptr database");
}
TEST(char_pair_set_search, nullptr_buffer) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_nullptr_buffer");
EXPECT_DEATH(
{
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_CHAR_PAIR_SET(nullptr, EXPR_NOISE_LEN, 0, (), ());
},
"called with nullptr buffer");
}
TEST(char_pair_set_search, nullptr_callback) {
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_nullptr_callback");
buffer = EXPR_NOISE_5;
const size_t buffer_len = EXPR_NOISE_LEN;
const size_t expected_match = 1;
size_t expected_start_array[expected_match] = {5};
size_t expected_end_array[expected_match] = {5};
size_t expected_id_array[expected_match] = {0};
for (size_t i = 0; i < expected_match; i++) {
expected_end_array[i] += 2;
}
context.expected_start_array = expected_start_array;
context.expected_end_array = expected_end_array;
context.expected_id_array = expected_id_array;
context.array_size = expected_match;
context.number_matched = 0;
context.number_wrong = 0;
EXPECT_DEATH(
{
hs_char_pair_set_search(database, buffer, buffer_len, nullptr,
&context);
},
"called with nullptr callback");
}
#endif

View File

@ -0,0 +1,333 @@
/*
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "common.h"
#define COMPILE_CHAR_SET(in_character_array, in_character_count) \
const size_t character_count = (in_character_count); \
const char *character_array = (in_character_array); \
hs_char_set_compiled_pattern_t *database = nullptr; \
hs_error_t compile_ret = hs_compile_char_set_search( \
character_array, character_count, &database); \
hs_error_t ret = 0; \
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
const char *buffer = nullptr; \
(void)buffer; \
context_t context = {}; \
(void) context;
#define SEARCH_CHAR_SET(in_buffer, in_buffer_len, in_expected_match, \
in_expected_start_array, in_expected_id_array) \
{ \
buffer = (in_buffer); \
const size_t buffer_len = (in_buffer_len); \
const size_t expected_match = (in_expected_match); \
size_t expected_start_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_end_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_id_array[expected_match] = \
BRACED_INIT_LIST in_expected_id_array; \
for (size_t i = 0; i < expected_match; i++) { \
expected_end_array[i] += 1; \
} \
context.expected_start_array = expected_start_array; \
context.expected_end_array = expected_end_array; \
context.expected_id_array = expected_id_array; \
context.array_size = expected_match; \
context.number_matched = 0; \
context.number_wrong = 0; \
\
ret = hs_char_set_search(database, buffer, buffer_len, callback, \
&context); \
}
// ------------------------free tests-------------------------------------------
/*
hs_free_char_set_pattern
nullptr
general
*/
TEST(char_set_free, nullptr) {
hs_char_set_compiled_pattern_t *database = nullptr;
hs_free_char_set_pattern(database);
}
TEST(char_set_free, general) {
SETUP_MEM_LEAK_TEST();
truffle_storage *clear_database = reinterpret_cast<truffle_storage *>(
test_malloc(sizeof(truffle_storage)));
hs_char_set_compiled_pattern_t *database =
reinterpret_cast<hs_char_set_compiled_pattern_t*>(clear_database);
hs_free_char_set_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
// ------------------------compile tests----------------------------------------
/*
hs_compile_char_set_search
single char
same char twice
general (several different chars)
null char
empty char array
nullptr char array
nullptr output
*/
TEST(char_set_compile, single_char) {
COMPILE_CHAR_SET(CHAR_SET_A, 1);
EXPECT_COMPILE_SUCCESS("test_compile_char_set_single_char");
hs_free_char_set_pattern(database);
}
TEST(char_set_compile, single_char_twice) {
COMPILE_CHAR_SET(CHAR_SET_A, 2);
EXPECT_COMPILE_SUCCESS("test_compile_char_set_single_char_twice");
hs_free_char_set_pattern(database);
}
TEST(char_set_compile, general) {
COMPILE_CHAR_SET(CHAR_SET_ABCDE, 5);
EXPECT_COMPILE_SUCCESS("test_compile_char_set_general");
hs_free_char_set_pattern(database);
}
TEST(char_set_compile, null_char) {
COMPILE_CHAR_SET(CHAR_SET_NULL, 1);
EXPECT_COMPILE_SUCCESS("test_compile_char_set_null_char");
hs_free_char_set_pattern(database);
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(char_set_compile, no_expression) {
const size_t character_count = 0;
const char *character_array = CHAR_SET_AB;
hs_char_set_compiled_pattern_t *database = nullptr;
EXPECT_DEATH(hs_compile_char_set_search(character_array, character_count,
&database),
"called with an empty set");
}
TEST(char_set_compile, nullptr_char_array) {
hs_char_set_compiled_pattern_t *database = nullptr;
EXPECT_DEATH(hs_compile_char_set_search(nullptr, 1, &database),
"called with nullptr");
}
TEST(char_set_compile, nullptr_database) {
const size_t character_count = 2;
const char *character_array = CHAR_SET_AB;
EXPECT_DEATH(hs_compile_char_set_search(character_array,
character_count, nullptr),
"called with nullptr");
}
#endif
// ------------------------search tests-----------------------------------------
/*
hs_char_set_search
match at start
match middle (general)
match index 15 (last char of a vector)
match at end
match past end
match null char
bad caseness
search several times
match first char
match last char
buff size 0
nullptr pattern
nullptr buffer
nullptr callback
*/
TEST(char_set_search, start) {
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
ASSERT_COMPILE_SUCCESS("test_char_set_search_start");
SEARCH_CHAR_SET(EXPR_NOISE_0, EXPR_NOISE_LEN, 2, (0, 1), (0, 1));
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
hs_free_char_set_pattern(database);
}
TEST(char_set_search, general) {
SETUP_MEM_LEAK_TEST();
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
ASSERT_COMPILE_SUCCESS("test_char_set_search_general");
SEARCH_CHAR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 2, (5, 6), (0, 1));
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
hs_free_char_set_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
TEST(char_set_search, cross_vector) {
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
ASSERT_COMPILE_SUCCESS("test_char_set_search_cross_vector");
SEARCH_CHAR_SET(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 4, (5, 6, 15, 16),
(0, 1, 0, 1));
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
hs_free_char_set_pattern(database);
}
TEST(char_set_search, end) {
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
ASSERT_COMPILE_SUCCESS("test_char_set_search_end");
SEARCH_CHAR_SET(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN, 2, (30, 31), (0, 1));
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
hs_free_char_set_pattern(database);
}
TEST(char_set_search, past_end) {
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
ASSERT_COMPILE_SUCCESS("test_char_set_search_past_end");
SEARCH_CHAR_SET(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 1, (30), (0));
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
hs_free_char_set_pattern(database);
}
TEST(char_set_search, null_char) {
COMPILE_CHAR_SET(CHAR_SET_NULL, 1);
ASSERT_COMPILE_SUCCESS("test_char_set_search_null_char");
SEARCH_CHAR_SET(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (6), (0));
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
hs_free_char_set_pattern(database);
}
TEST(char_set_search, bad_case) {
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
ASSERT_COMPILE_SUCCESS("test_char_set_search_bad_case");
SEARCH_CHAR_SET(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 1, (16), (1));
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
hs_free_char_set_pattern(database);
}
TEST(char_set_search, several_search) {
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
ASSERT_COMPILE_SUCCESS("test_char_set_search_several_search");
SEARCH_CHAR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 2, (5, 6), (0, 1));
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
SEARCH_CHAR_SET(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 4, (5, 6, 15, 16),
(0, 1, 0, 1));
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
hs_free_char_set_pattern(database);
}
TEST(char_set_search, first_char) {
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
ASSERT_COMPILE_SUCCESS("test_char_set_search_first_char");
SEARCH_CHAR_SET(EXPR_UNIFORM_1_A, EXPR_UNIFORM_LEN, 1, (5), (0));
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array[0], buffer);
hs_free_char_set_pattern(database);
}
TEST(char_set_search, last_char) {
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
ASSERT_COMPILE_SUCCESS("test_char_set_search_last_char");
SEARCH_CHAR_SET(EXPR_UNIFORM_1_B, EXPR_UNIFORM_LEN, 1, (5), (1));
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array[1], buffer);
hs_free_char_set_pattern(database);
}
TEST(char_set_search, empty_buff) {
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
ASSERT_COMPILE_SUCCESS("test_char_set_search_empty_buff");
// cppcheck-suppress unsignedLessThanZero
SEARCH_CHAR_SET("", 0, 0, (), ());
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
hs_free_char_set_pattern(database);
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(char_set_search, nullptr_pattern) {
const hs_char_set_compiled_pattern_t *database = nullptr;
context_t context;
EXPECT_DEATH(
{
const char *buffer;
hs_error_t ret;
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_CHAR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, (), ());
},
"called with nullptr database");
}
TEST(char_set_search, nullptr_buffer) {
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
ASSERT_COMPILE_SUCCESS("test_char_set_search_nullptr_buffer");
EXPECT_DEATH(
{
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_CHAR_SET(nullptr, EXPR_NOISE_LEN, 0, (), ());
},
"called with nullptr buffer");
}
TEST(char_set_search, nullptr_callback) {
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
ASSERT_COMPILE_SUCCESS("test_char_set_search_nullptr_callback");
buffer = EXPR_NOISE_5;
const size_t buffer_len = EXPR_NOISE_LEN;
const size_t expected_match = 2;
size_t expected_start_array[expected_match] = {5, 6};
size_t expected_end_array[expected_match] = {5, 6};
for (size_t i = 0; i < expected_match; i++) {
expected_end_array[i] += 1;
}
context.expected_start_array = expected_start_array;
context.expected_end_array = expected_end_array;
context.array_size = expected_match;
context.number_matched = 0;
context.number_wrong = 0;
EXPECT_DEATH(
{
hs_char_set_search(database, buffer, buffer_len, nullptr, &context);
},
"called with nullptr callback");
}
#endif

210
unit/direct_API/common.h Normal file
View File

@ -0,0 +1,210 @@
/*
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef COMMON_H
#define COMMON_H
#include <iostream>
#include <unordered_set>
#include "hs_common.h"
#include "hs_compile.h"
#include "hs_runtime.h"
#include "hs_direct_search.h"
#include "hs_direct_search_types.h"
#include "gtest/gtest.h"
// -----------------------------------------------------------------------------
#define PATTERN_0_CHAR ""
#define PATTERN_1_CHAR "a"
#define PATTERN_1_CHAR_NULL "\0"
#define PATTERN_2_CHAR "aB"
#define PATTERN_2_WITH_NULL "a\0"
#define PATTERN_3_CHAR "aBc"
#define PATTERN_5_CHAR "aBcde"
#define PATTERN_5_WITH_NULL "a\0Bcd"
#define PATTERN_8_CHAR "aBcdeoAb"
#define PATTERN_10_CHAR "aBcdeoAbCD"
#define PATTERN_25_CHAR "aBcdeoAbCDumefnvqmuz,crhUq"
#define CHAR_SET_NULL "\0"
#define CHAR_SET_A "aaAA"
#define CHAR_SET_AB "aB"
#define CHAR_SET_ABCDE "aBcde"
#define PAIR_SET_ABCD "aBcd"
#define PAIR_SET_A_NULL_BC "a\0Bc"
#define PAIR_SET_AB_DUPLICATE "aBaB"
#define PAIR_SET_LONG_PATTERN_AB "u0u1u2u3u4u5u6u7aB"
#define PATTERN_ARRAY_CONTAIN_EMPTY_0 {""}
#define PATTERN_ARRAY_SINGLE_CHAR_PAT_1 {"a"}
#define PATTERN_ARRAY_SINGLE_PAT_5 {"aBcde"}
#define PATTERN_ARRAY_GENERAL_5_5 {"aBcde","fghij"}
#define PATTERN_ARRAY_GENERAL_5_DUPLICATE {"aBcde","aBcde"}
#define PATTERN_ARRAY_LONG_10_10 {"aBcdeoAbCD","muz,crhUqu"}
#define PATTERN_ARRAY_CONTAIN_NULLPTR_5_0 {"aBcde",nullptr}
#define PATTERN_ARRAY_CONTAIN_EMPTY_0 {""}
#define PATTERN_ARRAY_WITH_NULL_5_5 {"a\0Bcd","aBcde"}
#define PATTERN_ARRAY_OVERLAP_5_8 {"aBcde","cdeoAbCD"}
#define PATTERN_ARRAY_NULLPTR ((char**)nullptr)
// -----------------------------------------------------------------------------
#define EXPR_NOISE_LEN 32
#define EXPR_NOISE "zmeh vnMezr,xbzumefnvqmuz,crhUqu"
#define EXPR_NOISE_0 "aBcdeoAbCDr,xbzumefnvqmuz,crhUqu"
#define EXPR_NOISE_5 "zmeh aBcdeoAbCDumefnvqmuz,crhUqu"
#define EXPR_NOISE_5_NULL "zmeh a\0Bcdr,xbzumefnvqmuz,crhUqu"
#define EXPR_NOISE_5_15 "zmeh aBcdeoAbCDaBcdeoAbCD,crhUqu"
#define EXPR_NOISE_5_15_BAD_CASE "zmeh AbcdeoAbCDABcdeoAbCD,crhUqu"
#define EXPR_NOISE_MIX "zmeh fgcder,xbzumefnvqmuz,crhUqu"
#define EXPR_NOISE_PAT2_5 "zmeh fghijr,xbzumefnvqmuz,crhUqu"
#define EXPR_NOISE_DUO_5_15 "zmeh aBcdeoAbCDfghijvqmuz,crhUqu"
#define EXPR_NOISE_SHORT_ONLY_5 "zmeh aBcdeoAbHHumefnvqmuz,crhUqu"
#define EXPR_NOISE_5_AB "zmeh aBMezr,xbzumefnvqmuz,crhUqu"
#define EXPR_NOISE_A_END_31 "zmeh vnMezr,xbzumefnvqmuz,crhUqa"
#define EXPR_NOISE_AB_END_30 "zmeh vnMezr,xbzumefnvqmuz,crhUaB"
#define EXPR_NOISE_ABCDE_END_27 "zmeh vnMezr,xbzumefnvqmuz,caBcde"
#define EXPR_NOISE_ABCDEOABCD_END_22 "zmeh vnMezr,xbzumefnvqaBcdeoAbCD"
#define EXPR_UNIFORM_LEN 32
#define EXPR_UNIFORM "uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu"
#define EXPR_UNIFORM_1_A "uuuuuauuuuuuuuuuuuuuuuuuuuuuuuuu"
#define EXPR_UNIFORM_1_B "uuuuuBuuuuuuuuuuuuuuuuuuuuuuuuuu"
// -----------------------------------------------------------------------------
#define BRACED_INIT_LIST(...) {__VA_ARGS__}
#define EXPECT_COMPILE_SUCCESS(func_name) \
EXPECT_EQ(compile_ret, HS_SUCCESS) \
<< "Fail to build the pattern in " << (func_name) << "\n"; \
EXPECT_NE(database, nullptr) \
<< "Compilation returned nullptr database " << (func_name) << "\n";
#define EXPECT_COMPILE_FAILURE(func_name) \
EXPECT_NE(compile_ret, HS_SUCCESS) \
<< "Pattern built fine when error was expected in " << (func_name) \
<< "\n";
#define ASSERT_COMPILE_SUCCESS(func_name) \
ASSERT_EQ(compile_ret, HS_SUCCESS) \
<< "Fail to build the pattern in " << (func_name) << "\n"; \
ASSERT_NE(database, nullptr) \
<< "Compilation returned nullptr database " << (func_name) << "\n";
#define ASSERT_COMPILE_FAILURE(func_name) \
ASSERT_NE(compile_ret, HS_SUCCESS) \
<< "Pattern built fine when error was expected in " << (func_name) \
<< "\n";
#define EXPECT_SEARCH_SUCCESS(search_func_name, pattern, buffer) \
EXPECT_EQ(HS_SUCCESS, ret) \
<< (search_func_name) << ", pattern: " << (pattern) << ", buffer: \"" \
<< (buffer) << "\"\n Search failed"; \
EXPECT_EQ(context.array_size, context.number_matched) \
<< (search_func_name) << ", pattern: " << (pattern) << ", buffer: \"" \
<< (buffer) << "\"\n Missed some matches.\n"; \
EXPECT_LE(0, context.number_wrong) \
<< (search_func_name) << ", pattern: " << (pattern) << ", buffer: \"" \
<< (buffer) << "\"\n Unexpected matches.\n";
// -----------------------------------------------------------------------------
typedef struct callback_context {
/* array of indices in the string where we expect match to start*/
size_t *expected_start_array;
/* array of indices in the string where we expect match to end*/
size_t *expected_end_array;
/* array of pattern ID we expect match to be reported, in order */
size_t *expected_id_array;
size_t array_size;
/* counter of matches happening at a position in expected_array */
size_t number_matched;
/* counter of matches happening at a position NOT in expected_array */
size_t number_wrong;
} context_t;
static
int callback(unsigned int id, unsigned long long start,
unsigned long long end_offset, unsigned int flags,
void *raw_context) {
(void)flags;
context_t *context = reinterpret_cast<context_t*>(raw_context);
bool matched = false;
// Check if the match is expected
for (size_t i = 0; i < context->array_size; i++) {
if (end_offset == context->expected_end_array[i] &&
start == context->expected_start_array[i] &&
id == context->expected_id_array[i]) {
matched = true;
}
}
// Tally the right counter whether the match was expected or not
if (matched) {
context->number_matched += 1;
// printf("match at index %llu\n", end_offset);
} else {
context->number_wrong += 1;
// printf("unplanned match at index %llu\n", end_offset);
}
return CB_CONTINUE_MATCHING;
}
static std::unordered_set<void *> alloced_mem;
static void* test_malloc(size_t size) {
void * mem = malloc(size);
alloced_mem.insert(mem);
return mem;
}
static void test_free(void *ptr) {
size_t erased_count = alloced_mem.erase(ptr);
if(erased_count == 1) {
free(ptr);
} else {
printf("all currently allocated memory:\n");
for (const void *elem : alloced_mem)
printf("%p ", elem);
printf("\nTrying to free: %p\n", ptr);
FAIL();
}
}
#define SETUP_MEM_LEAK_TEST() hs_set_allocator(test_malloc, test_free);
#define UNSET_MEM_LEAK_TEST() hs_set_allocator(nullptr, nullptr);
#define EXPECT_MEMORY_CLEAN() \
EXPECT_TRUE(alloced_mem.empty()); \
alloced_mem.clear();
#endif // COMMON_H

View File

@ -0,0 +1,394 @@
/*
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "common.h"
#include "fdr/fdr_internal.h"
#define COMPILE_LONG_LITERAL(in_pattern, in_pattern_len) \
size_t pattern_len = (in_pattern_len); \
const char *pattern = (in_pattern); \
hs_long_literal_compiled_pattern_t *database = nullptr; \
hs_error_t compile_ret = \
hs_compile_long_literal_search(pattern, pattern_len, &database); \
hs_error_t ret = 0; \
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
const char *buffer = nullptr; \
(void)buffer; \
context_t context = {}; \
(void) context;
// expected match array here is the index of the start of match.
#define SEARCH_LONG_LITERAL(in_buffer, in_buffer_len, in_expected_match, \
in_expected_start_array) \
{ \
buffer = (in_buffer); \
const size_t buffer_len = (in_buffer_len); \
const size_t expected_match = (in_expected_match); \
size_t expected_start_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_end_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_id_array[expected_match]; \
for (size_t i = 0; i < expected_match; i++) { \
expected_end_array[i] += pattern_len; \
expected_id_array[i] = 0; \
} \
context.expected_start_array = expected_start_array; \
context.expected_end_array = expected_end_array; \
context.expected_id_array = expected_id_array; \
context.array_size = expected_match; \
context.number_matched = 0; \
context.number_wrong = 0; \
\
ret = hs_long_literal_search(database, buffer, buffer_len, callback, \
&context); \
}
static_assert(HS_SHORT_PATTERN_THRESHOLD == 8,
"changing the threshold for short/long literal require changing "
"the tests to still test the threshold behavior");
// ------------------------free tests-------------------------------------------
/*
hs_free_long_literal_pattern
nullptr
general
*/
TEST(long_literal_free, nullptr) {
hs_long_literal_compiled_pattern_t *database = nullptr;
hs_free_long_literal_pattern(database);
}
TEST(long_literal_free, general) {
SETUP_MEM_LEAK_TEST();
combined_fdr_database *clear_database =
reinterpret_cast<combined_fdr_database *>(
test_malloc(sizeof(combined_fdr_database)));
hs_long_literal_compiled_pattern_t *database =
reinterpret_cast<hs_long_literal_compiled_pattern_t*>(clear_database);
hs_free_long_literal_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
// ------------------------compile tests----------------------------------------
/*
hs_compile_long_literal_search
<=8 char
general (>8 char)
valid pattern including null char
empty expression
nullptr expression
nullptr output
*/
TEST(long_literal_compile, short) {
COMPILE_LONG_LITERAL(PATTERN_5_CHAR, 5);
hs_free_long_literal_pattern(database);
EXPECT_COMPILE_SUCCESS("test_compile_long_literal_general");
}
TEST(long_literal_compile, general) {
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
hs_free_long_literal_pattern(database);
EXPECT_COMPILE_SUCCESS("test_compile_long_literal_general");
}
TEST(long_literal_compile, null_char) {
COMPILE_LONG_LITERAL(PATTERN_5_WITH_NULL, 5);
hs_free_long_literal_pattern(database);
EXPECT_COMPILE_SUCCESS("test_compile_long_literal_null_char");
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(long_literal_compile, empty_pattern) {
hs_long_literal_compiled_pattern_t *database = nullptr;
EXPECT_DEATH(
hs_compile_long_literal_search(PATTERN_0_CHAR, 0, &database),
"called with an empty pattern");
}
TEST(long_literal_compile, nullptr_pattern) {
hs_long_literal_compiled_pattern_t *database = nullptr;
EXPECT_DEATH(hs_compile_long_literal_search(nullptr, 5, &database),
"called with nullptr");
}
TEST(long_literal_compile, nullptr_database) {
EXPECT_DEATH(hs_compile_long_literal_search(PATTERN_5_CHAR, 5, nullptr),
"called with nullptr");
}
#endif
// ------------------------search tests-----------------------------------------
/*
hs_long_literal_search
short pattern
positive match
negative match
general pattern
general pattern but the buffer only have the short part of it
extra long pattern (vectorized confirm)
match at start
match middle (general)
match index 15 (cross over vector)
match at end
match past end (a few char ok, then end, so missing some chars)
bad caseness
search several times
single char pattern
general match
match at end
no match
buffer containing null char
pattern with null char
general pattern (no null char searched for)
buff size 0
nullptr pattern
nullptr buffer
nullptr callback
*/
TEST(long_literal_search, short_positive) {
COMPILE_LONG_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_general");
SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, short_negative) {
COMPILE_LONG_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_general");
// cppcheck-suppress unsignedLessThanZero
SEARCH_LONG_LITERAL(EXPR_NOISE, EXPR_NOISE_LEN, 0, ());
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, short_but_negative_long) {
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_short_but_negative_long");
// cppcheck-suppress unsignedLessThanZero
SEARCH_LONG_LITERAL(EXPR_NOISE_SHORT_ONLY_5, EXPR_NOISE_LEN, 0, ());
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, start) {
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_start");
SEARCH_LONG_LITERAL(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0));
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, general) {
SETUP_MEM_LEAK_TEST();
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_general");
SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
TEST(long_literal_search, extra_long) {
COMPILE_LONG_LITERAL(PATTERN_25_CHAR, 25);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_extra_long");
SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, cross_vector) {
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_cross_vector");
SEARCH_LONG_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, end) {
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_end");
SEARCH_LONG_LITERAL(EXPR_NOISE_ABCDEOABCD_END_22, EXPR_NOISE_LEN, 1, (22));
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, past_end) {
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_past_end");
// cppcheck-suppress unsignedLessThanZero
SEARCH_LONG_LITERAL(EXPR_NOISE_ABCDEOABCD_END_22, EXPR_NOISE_LEN - 3, 0,
());
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, bad_case) {
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_bad_case");
// cppcheck-suppress unsignedLessThanZero
SEARCH_LONG_LITERAL(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, ());
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, several_search) {
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_several_search");
SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
SEARCH_LONG_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, single_char) {
COMPILE_LONG_LITERAL(PATTERN_1_CHAR, 1);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_single_char");
SEARCH_LONG_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, single_char_end) {
COMPILE_LONG_LITERAL(PATTERN_1_CHAR, 1);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_single_char_end");
SEARCH_LONG_LITERAL(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 1, (30));
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, single_char_no_match) {
COMPILE_LONG_LITERAL(PATTERN_1_CHAR, 1);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_single_char_no_match");
// cppcheck-suppress unsignedLessThanZero
SEARCH_LONG_LITERAL(EXPR_NOISE, EXPR_NOISE_LEN, 0, ());
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, null_char_buff_and_pattern) {
COMPILE_LONG_LITERAL(PATTERN_5_WITH_NULL, 5);
ASSERT_COMPILE_SUCCESS(
"test_long_literal_search_null_char_buff_and_pattern");
SEARCH_LONG_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, null_char_buff) {
COMPILE_LONG_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_null_char_buff");
// cppcheck-suppress unsignedLessThanZero
SEARCH_LONG_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, ());
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
TEST(long_literal_search, empty_buff) {
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_empty_buff");
// cppcheck-suppress unsignedLessThanZero
SEARCH_LONG_LITERAL("", 0, 0, ());
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
hs_free_long_literal_pattern(database);
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(long_literal_search, nullptr_pattern) {
const hs_long_literal_compiled_pattern_t *database = nullptr;
context_t context;
EXPECT_DEATH(
{
const char *buffer;
hs_error_t ret;
size_t pattern_len = 5;
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, ());
},
"called with nullptr database");
}
TEST(long_literal_search, nullptr_buffer) {
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_nullptr_buffer");
EXPECT_DEATH(
{
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_LONG_LITERAL(nullptr, EXPR_NOISE_LEN, 0, ());
},
"called with nullptr buffer");
}
TEST(long_literal_search, nullptr_callback) {
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
ASSERT_COMPILE_SUCCESS("test_long_literal_search_nullptr_callback");
buffer = EXPR_NOISE_5;
const size_t buffer_len = EXPR_NOISE_LEN;
const size_t expected_match = 1;
size_t expected_start_array[expected_match] = {5};
size_t expected_end_array[expected_match] = {5};
for (size_t i = 0; i < expected_match; i++) {
expected_end_array[i] += pattern_len;
}
context.expected_start_array = expected_start_array;
context.expected_end_array = expected_end_array;
context.array_size = expected_match;
context.number_matched = 0;
context.number_wrong = 0;
EXPECT_DEATH(
{
hs_long_literal_search(database, buffer, buffer_len, nullptr,
&context);
},
"called with nullptr callback");
}
#endif

36
unit/direct_API/main.cpp Normal file
View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "gtest/gtest.h"
// Driver: run all the tests (defined in other source files in this directory)
int main(int argc, char **argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -0,0 +1,515 @@
/*
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "common.h"
#include "fdr/fdr_internal.h"
#define COMPILE_MULTI_LITERAL(in_pattern, in_pattern_count, in_pattern_len) \
const size_t pattern_count = (in_pattern_count); \
size_t pattern_len[pattern_count] = BRACED_INIT_LIST in_pattern_len; \
const char *pattern_storage[] = in_pattern; \
const char **pattern = pattern_storage; \
hs_multi_literal_compiled_pattern_t *database = nullptr; \
hs_error_t compile_ret = hs_compile_multi_literal_search( \
pattern, pattern_count, pattern_len, &database); \
hs_error_t ret = 0; \
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
const char *buffer = nullptr; \
(void)buffer; \
context_t context = {}; \
(void) context;
// expected match array here is the index of the start of match, assuming it
// match a pattern with the same length as pattern 0
#define SEARCH_MULTI_LITERAL(in_buffer, in_buffer_len, in_expected_match, \
in_expected_start_array, in_expected_id_array) \
{ \
buffer = (in_buffer); \
const size_t buffer_len = (in_buffer_len); \
const size_t expected_match = (in_expected_match); \
size_t expected_start_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_end_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_id_array[expected_match] = \
BRACED_INIT_LIST in_expected_id_array; \
for (size_t i = 0; i < expected_match; i++) { \
expected_end_array[i] += pattern_len[0]; \
} \
context.expected_start_array = expected_start_array; \
context.expected_end_array = expected_end_array; \
context.expected_id_array = expected_id_array; \
context.array_size = expected_match; \
context.number_matched = 0; \
context.number_wrong = 0; \
\
ret = hs_multi_literal_search(database, buffer, buffer_len, callback, \
&context); \
}
// ------------------------free tests-------------------------------------------
/*
hs_free_multi_literal_pattern
nullptr
general
*/
TEST(multi_literal_free, nullptr) {
hs_multi_literal_compiled_pattern_t *database = nullptr;
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_free, general) {
SETUP_MEM_LEAK_TEST();
combined_fdr_database *clear_database =
reinterpret_cast<combined_fdr_database *>(
test_malloc(sizeof(combined_fdr_database)));
hs_multi_literal_compiled_pattern_t *database =
reinterpret_cast<hs_multi_literal_compiled_pattern_t*>(clear_database);
hs_free_multi_literal_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
// ------------------------compile tests----------------------------------------
/*
hs_compile_multi_literal_search
single expression
single char expression
general (several expressions)
pattern duplicate
valid pattern including null char
overlaping patterns (eg, "abba" and "bb")
no expressions
empty expression
nullptr expression array
one of the expression is nullptr
nullptr output
*/
TEST(multi_literal_compile, single_pattern) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_PAT_5, 1, (5));
EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_single_pattern");
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_compile, single_pattern_single_char) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_CHAR_PAT_1, 1, (1));
EXPECT_COMPILE_SUCCESS(
"test_compile_multi_literal_single_pattern_single_char");
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_compile, general) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_general");
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_compile, duplicate) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_DUPLICATE, 2, (5, 5));
EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_duplicate");
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_compile, with_null_char) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_WITH_NULL_5_5, 2, (5, 5));
EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_with_null_char");
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_compile, overlapping_patterns) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_OVERLAP_5_8, 2, (5, 8));
EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_overlapping_patterns");
hs_free_multi_literal_pattern(database);
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(multi_literal_compile, no_expression) {
const size_t pattern_count = 0;
const char *pattern_storage[] = PATTERN_ARRAY_GENERAL_5_5;
const char **pattern = pattern_storage;
hs_multi_literal_compiled_pattern_t *database = nullptr;
EXPECT_DEATH(
{
size_t pattern_len[2];
pattern_len[0] = 5;
pattern_len[1] = 5;
hs_compile_multi_literal_search(pattern, pattern_count, pattern_len,
&database);
},
"called with no pattern");
}
TEST(multi_literal_compile, empty_expression) {
const size_t pattern_count = 1;
const size_t pattern_len[pattern_count] = {0};
const char *pattern_storage[] = PATTERN_ARRAY_CONTAIN_EMPTY_0;
const char **pattern = pattern_storage;
hs_multi_literal_compiled_pattern_t *database = nullptr;
EXPECT_DEATH(hs_compile_multi_literal_search(pattern, pattern_count,
pattern_len, &database),
"called with an empty pattern");
}
TEST(multi_literal_compile, nullptr_pattern_array) {
const size_t pattern_count = 1;
const size_t pattern_len[pattern_count] = {5};
const char **pattern = nullptr;
hs_multi_literal_compiled_pattern_t *database = nullptr;
EXPECT_DEATH(hs_compile_multi_literal_search(pattern, pattern_count,
pattern_len, &database),
"called with nullptr");
}
TEST(multi_literal_compile, nullptr_pattern_in_array) {
const size_t pattern_count = 2;
const size_t pattern_len[pattern_count] = {5, 5};
const char *pattern_storage[] = PATTERN_ARRAY_CONTAIN_NULLPTR_5_0;
const char **pattern = pattern_storage;
hs_multi_literal_compiled_pattern_t *database = nullptr;
EXPECT_DEATH(hs_compile_multi_literal_search(pattern, pattern_count,
pattern_len, &database),
"called with an empty pattern");
}
TEST(multi_literal_compile, nullptr_database) {
const size_t pattern_count = 2;
const size_t pattern_len[pattern_count] = {5, 5};
const char *pattern_storage[] = PATTERN_ARRAY_GENERAL_5_5;
const char **pattern = pattern_storage;
EXPECT_DEATH(hs_compile_multi_literal_search(pattern, pattern_count,
pattern_len, nullptr),
"called with nullptr");
}
#endif
// ------------------------search tests-----------------------------------------
/*
hs_multi_literal_search
general pattern
match at start
match middle (general)
match index 15 (cross over vector)
match at end
match past end (a few char ok, then end, so missing some chars)
match long patterns
long pattern but the buffer only have the short part of it
bad caseness
search several times
match first pattern
match last pattern
match several pattern in the same search
match overlapping patterns
pattern mix (start with pattern A, finish with pattern B. Expect no
match)
match a pattern duplicate
single char pattern
general match
match at end
no match
buffer containing null char
pattern with null char
general pattern (no null char searched for)
buff size 0
nullptr pattern
nullptr buffer
nullptr callback
*/
TEST(multi_literal_search, start) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_start");
SEARCH_MULTI_LITERAL(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0), (0));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, general) {
SETUP_MEM_LEAK_TEST();
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_general");
SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
TEST(multi_literal_search, cross_vector) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_cross_vector");
SEARCH_MULTI_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, end) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_end");
SEARCH_MULTI_LITERAL(EXPR_NOISE_ABCDE_END_27, EXPR_NOISE_LEN, 1, (27), (0));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, past_end) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_past_end");
// cppcheck-suppress unsignedLessThanZero
SEARCH_MULTI_LITERAL(EXPR_NOISE_ABCDE_END_27, EXPR_NOISE_LEN - 3, 0, (), ());
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, long_pattern) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_LONG_10_10, 2, (10, 10));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_long_pattern");
SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 2, (5, 22), (0, 1));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, short_but_negative_long) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_LONG_10_10, 2, (10, 10));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_short_but_negative_long");
SEARCH_MULTI_LITERAL(EXPR_NOISE_SHORT_ONLY_5, EXPR_NOISE_LEN, 1, (22), (1));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[1], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, bad_case) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_bad_case");
// cppcheck-suppress unsignedLessThanZero
SEARCH_MULTI_LITERAL(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, (), ());
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, several_search) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_several_search");
SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
SEARCH_MULTI_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, first_pattern) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_first_pattern");
SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, last_pattern) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_last_pattern");
SEARCH_MULTI_LITERAL(EXPR_NOISE_PAT2_5, EXPR_NOISE_LEN, 1, (5), (1));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[1], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, multi_pattern) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_multi_pattern");
SEARCH_MULTI_LITERAL(EXPR_NOISE_DUO_5_15, EXPR_NOISE_LEN, 2, (5, 15),
(0, 1));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, overlap) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_OVERLAP_5_8, 2, (5, 8));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_overlap");
buffer = EXPR_NOISE_5;
const size_t buffer_len = EXPR_NOISE_LEN;
const size_t expected_match = 2;
size_t expected_start_array[expected_match] = {5, 7};
size_t expected_end_array[expected_match] = {5, 7};
size_t expected_id_array[expected_match] = {0, 1};
for (size_t i = 0; i < expected_match; i++) {
// we need the length of the second pattern, hence not using the macro
expected_end_array[i] += pattern_len[i];
}
context.expected_start_array = expected_start_array;
context.expected_end_array = expected_end_array;
context.expected_id_array = expected_id_array;
context.array_size = expected_match;
context.number_matched = 0;
context.number_wrong = 0;
ret = hs_multi_literal_search(database, buffer, buffer_len,
callback, &context);
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, pattern_mix) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_pattern_mix");
// cppcheck-suppress unsignedLessThanZero
SEARCH_MULTI_LITERAL(EXPR_NOISE_MIX, EXPR_NOISE_LEN, 0, (), ());
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, duplicate) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_DUPLICATE, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_duplicate");
SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, single_char) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_CHAR_PAT_1, 1, (1));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_single_char");
SEARCH_MULTI_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, single_char_end) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_CHAR_PAT_1, 1, (1));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_single_char_end");
SEARCH_MULTI_LITERAL(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 1, (30),
(0));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, single_char_no_match) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_CHAR_PAT_1, 1, (1));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_single_char_no_match");
// cppcheck-suppress unsignedLessThanZero
SEARCH_MULTI_LITERAL(EXPR_NOISE, EXPR_NOISE_LEN, 0, (), ());
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, null_char_buff_and_pattern) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_WITH_NULL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS(
"test_multi_literal_search_null_char_buff_and_pattern");
SEARCH_MULTI_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5), (0));
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, null_char_buff) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_null_char_buff");
// cppcheck-suppress unsignedLessThanZero
SEARCH_MULTI_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, (), ());
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
TEST(multi_literal_search, empty_buff) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_empty_buff");
// cppcheck-suppress unsignedLessThanZero
SEARCH_MULTI_LITERAL("", 0, 0, (), ());
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
hs_free_multi_literal_pattern(database);
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(multi_literal_search, nullptr_pattern) {
const hs_multi_literal_compiled_pattern_t *database = nullptr;
context_t context;
EXPECT_DEATH(
{
const char *buffer;
hs_error_t ret;
size_t pattern_len[2];
pattern_len[0] = 5;
pattern_len[1] = 5;
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, (), ());
},
"called with nullptr database");
}
TEST(multi_literal_search, nullptr_buffer) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_nullptr_buffer");
EXPECT_DEATH(
{
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_MULTI_LITERAL(nullptr, EXPR_NOISE_LEN, 0, (), ());
},
"called with nullptr buffer");
}
TEST(multi_literal_search, nullptr_callback) {
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_nullptr_callback");
buffer = EXPR_NOISE_5;
const size_t buffer_len = EXPR_NOISE_LEN;
const size_t expected_match = 1;
size_t expected_start_array[expected_match] = {5};
size_t expected_end_array[expected_match] = {5};
for (size_t i = 0; i < expected_match; i++) {
expected_end_array[i] += pattern_len[0];
}
context.expected_start_array = expected_start_array;
context.expected_end_array = expected_end_array;
context.array_size = expected_match;
context.number_matched = 0;
context.number_wrong = 0;
EXPECT_DEATH(
{
hs_multi_literal_search(database, buffer, buffer_len, nullptr,
&context);
},
"called with nullptr callback");
}
#endif

View File

@ -0,0 +1,377 @@
/*
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "direct_API/common.h"
#include "hwlm/noodle_internal.h"
#define COMPILE_SHORT_LITERAL(in_pattern, in_pattern_len) \
size_t pattern_len = (in_pattern_len); \
const char *pattern = (in_pattern); \
hs_short_literal_compiled_pattern_t *database = nullptr; \
hs_error_t compile_ret = \
hs_compile_short_literal_search(pattern, pattern_len, &database); \
hs_error_t ret = 0; \
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
const char *buffer = nullptr; \
(void)buffer; \
context_t context = {}; \
(void) context;
// expected match array here is the index of the start of match.
#define SEARCH_SHORT_LITERAL(in_buffer, in_buffer_len, in_expected_match, \
in_expected_start_array) \
{ \
buffer = (in_buffer); \
const size_t buffer_len = (in_buffer_len); \
const size_t expected_match = (in_expected_match); \
size_t expected_start_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_end_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_id_array[expected_match]; \
for (size_t i = 0; i < expected_match; i++) { \
expected_end_array[i] += pattern_len; \
expected_id_array[i] = 0; \
} \
context.expected_start_array = expected_start_array; \
context.expected_end_array = expected_end_array; \
context.expected_id_array = expected_id_array; \
context.array_size = expected_match; \
context.number_matched = 0; \
context.number_wrong = 0; \
\
ret = hs_short_literal_search(database, buffer, buffer_len, callback, \
&context); \
}
static_assert(HS_SHORT_PATTERN_THRESHOLD == 8,
"changing the threshold for short/long literal require changing "
"the tests to still test the threshold behavior");
// ------------------------free tests-------------------------------------------
/*
hs_free_short_literal_pattern
nullptr
general
*/
TEST(short_literal_free, nullptr) {
hs_short_literal_compiled_pattern_t *database = nullptr;
hs_free_short_literal_pattern(database);
}
TEST(short_literal_free, general) {
SETUP_MEM_LEAK_TEST();
noodTable *clear_database =
reinterpret_cast<noodTable *>(test_malloc(sizeof(noodTable)));
hs_short_literal_compiled_pattern_t *database =
reinterpret_cast<hs_short_literal_compiled_pattern_t *>(
clear_database);
hs_free_short_literal_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
// ------------------------compile tests----------------------------------------
/*
hs_compile_short_literal_search
single char
general
8 char
>8 char
valid pattern including null char
empty expression
nullptr expression
nullptr output
*/
TEST(short_literal_compile, single_char) {
COMPILE_SHORT_LITERAL(PATTERN_1_CHAR, 1);
EXPECT_COMPILE_SUCCESS("test_compile_short_literal_single_char");
hs_free_short_literal_pattern(database);
}
TEST(short_literal_compile, general) {
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
EXPECT_COMPILE_SUCCESS("test_compile_short_literal_general");
hs_free_short_literal_pattern(database);
}
TEST(short_literal_compile, max_length) {
COMPILE_SHORT_LITERAL(PATTERN_8_CHAR, 8);
EXPECT_COMPILE_SUCCESS("test_compile_short_literal_max_len");
hs_free_short_literal_pattern(database);
}
TEST(short_literal_compile, too_long) {
COMPILE_SHORT_LITERAL(PATTERN_10_CHAR, 10);
EXPECT_COMPILE_FAILURE("test_compile_short_literal_too_long");
hs_free_short_literal_pattern(database);
}
TEST(short_literal_compile, null_char) {
COMPILE_SHORT_LITERAL(PATTERN_5_WITH_NULL, 5);
EXPECT_COMPILE_SUCCESS("test_compile_short_literal_null_char");
hs_free_short_literal_pattern(database);
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(short_literal_compile, empty_pattern) {
hs_short_literal_compiled_pattern_t *database = nullptr;
EXPECT_DEATH(
hs_compile_short_literal_search(PATTERN_0_CHAR, 0, &database),
"called with an empty pattern");
}
TEST(short_literal_compile, nullptr_pattern) {
hs_short_literal_compiled_pattern_t *database = nullptr;
EXPECT_DEATH(hs_compile_short_literal_search(nullptr, 5, &database),
"called with nullptr");
}
TEST(short_literal_compile, nullptr_database) {
EXPECT_DEATH(hs_compile_short_literal_search(PATTERN_5_CHAR, 5, nullptr),
"called with nullptr");
}
#endif
// ------------------------search tests-----------------------------------------
/*
hs_short_literal_search
general pattern
match at start
match middle (general)
match index 15 (noodle cross over vector)
match at end
match the full pattern, not just the first pair
match past end (2 char ok, then end, so missing some chars)
bad caseness
search several times
single char pattern
general match
match at end
no match
buffer containing null char
pattern with null char
general pattern
buff size 0
nullptr pattern
nullptr buffer
nullptr callback
*/
TEST(short_literal_search, start) {
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_start");
SEARCH_SHORT_LITERAL(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0));
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
}
TEST(short_literal_search, general) {
SETUP_MEM_LEAK_TEST();
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_general");
SEARCH_SHORT_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
TEST(short_literal_search, cross_vector) {
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_cross_vector");
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
}
TEST(short_literal_search, end) {
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_end");
SEARCH_SHORT_LITERAL(EXPR_NOISE_ABCDE_END_27, EXPR_NOISE_LEN, 1, (27));
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
}
TEST(short_literal_search, past_end) {
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_past_end");
// cppcheck-suppress unsignedLessThanZero
SEARCH_SHORT_LITERAL(EXPR_NOISE_ABCDE_END_27, EXPR_NOISE_LEN - 3, 0, ());
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
}
TEST(short_literal_search, short_no_match) {
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_short_no_match");
// cppcheck-suppress unsignedLessThanZero
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_AB, EXPR_NOISE_LEN, 0, ());
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
}
TEST(short_literal_search, bad_case) {
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_bad_case");
// cppcheck-suppress unsignedLessThanZero
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, ());
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
}
TEST(short_literal_search, several_search) {
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_several_search");
SEARCH_SHORT_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
// cppcheck-suppress redundantAssignment
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
}
TEST(short_literal_search, single_char) {
COMPILE_SHORT_LITERAL(PATTERN_1_CHAR, 1);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_single_char");
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
}
TEST(short_literal_search, single_char_end) {
COMPILE_SHORT_LITERAL(PATTERN_1_CHAR, 1);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_single_char_end");
SEARCH_SHORT_LITERAL(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 1, (30));
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
}
TEST(short_literal_search, single_char_no_match) {
COMPILE_SHORT_LITERAL(PATTERN_1_CHAR, 1);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_single_char_no_match");
// cppcheck-suppress unsignedLessThanZero
SEARCH_SHORT_LITERAL(EXPR_NOISE, EXPR_NOISE_LEN, 0, ());
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
}
TEST(short_literal_search, null_char_buff_and_pattern) {
COMPILE_SHORT_LITERAL(PATTERN_5_WITH_NULL, 5);
ASSERT_COMPILE_SUCCESS(
"test_short_literal_search_null_char_buff_and_pattern");
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
}
TEST(short_literal_search, null_char_buff) {
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_null_char_buff");
// cppcheck-suppress unsignedLessThanZero
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, ());
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
}
TEST(short_literal_search, empty_buff) {
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_empty_buff");
// cppcheck-suppress unsignedLessThanZero
SEARCH_SHORT_LITERAL("", 0, 0, ());
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
hs_free_short_literal_pattern(database);
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(short_literal_search, nullptr_pattern) {
const hs_short_literal_compiled_pattern_t *database = nullptr;
context_t context;
EXPECT_DEATH(
{
const char *buffer;
hs_error_t ret;
size_t pattern_len = 5;
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_SHORT_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, ());
},
"called with nullptr database");
}
TEST(short_literal_search, nullptr_buffer) {
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_nullptr_buffer");
EXPECT_DEATH(
{
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_SHORT_LITERAL(nullptr, EXPR_NOISE_LEN, 0, ());
},
"called with nullptr buffer");
}
TEST(short_literal_search, nullptr_callback) {
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
ASSERT_COMPILE_SUCCESS("test_short_literal_search_nullptr_callback");
buffer = EXPR_NOISE_5;
const size_t buffer_len = EXPR_NOISE_LEN;
const size_t expected_match = 1;
size_t expected_start_array[expected_match] = {5};
size_t expected_end_array[expected_match] = {5};
for (size_t i = 0; i < expected_match; i++) {
expected_end_array[i] += pattern_len;
}
context.expected_start_array = expected_start_array;
context.expected_end_array = expected_end_array;
context.array_size = expected_match;
context.number_matched = 0;
context.number_wrong = 0;
EXPECT_DEATH(
{
hs_short_literal_search(database, buffer, buffer_len, nullptr,
&context);
},
"called with nullptr callback");
}
#endif

View File

@ -0,0 +1,293 @@
/*
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "common.h"
#include "hwlm/noodle_internal.h"
#define COMPILE_SINGLE_CHAR(in_pattern) \
const char pattern = *(in_pattern); \
hs_single_char_compiled_pattern_t *database = nullptr; \
hs_error_t compile_ret = hs_compile_single_char_search(pattern, &database);\
hs_error_t ret = 0; \
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
const char *buffer = nullptr; \
(void)buffer; \
context_t context = {}; \
(void) context;
// expected match array here is the index of the start of match.
#define SEARCH_SINGLE_CHAR(in_buffer, in_buffer_len, in_expected_match, \
in_expected_start_array) \
{ \
buffer = (in_buffer); \
const size_t buffer_len = (in_buffer_len); \
const size_t expected_match = (in_expected_match); \
size_t expected_start_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_end_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_id_array[expected_match]; \
for (size_t i = 0; i < expected_match; i++) { \
expected_end_array[i] += 1; \
expected_id_array[i] = 0; \
} \
context.expected_start_array = expected_start_array; \
context.expected_end_array = expected_end_array; \
context.expected_id_array = expected_id_array; \
context.array_size = expected_match; \
context.number_matched = 0; \
context.number_wrong = 0; \
\
ret = hs_single_char_search(database, buffer, buffer_len, callback, \
&context); \
}
// ------------------------free tests-------------------------------------------
/*
hs_free_single_char_pattern
nullptr
general
*/
TEST(single_char_free, nullptr) {
hs_single_char_compiled_pattern_t *database = nullptr;
hs_free_single_char_pattern(database);
}
TEST(single_char_free, general) {
SETUP_MEM_LEAK_TEST();
truffle_storage *clear_database = reinterpret_cast<truffle_storage *>(
test_malloc(sizeof(truffle_storage)));
hs_single_char_compiled_pattern_t *database =
reinterpret_cast<hs_single_char_compiled_pattern_t*>(clear_database);
hs_free_single_char_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
// ------------------------compile tests----------------------------------------
/*
hs_compile_single_char_search
general (1 char)
null char pattern
nullptr output
*/
TEST(single_char_compile, general) {
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR)
EXPECT_COMPILE_SUCCESS("test_compile_single_char_general")
hs_free_single_char_pattern(database);
}
TEST(single_char_compile, null_char) {
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR)
EXPECT_COMPILE_SUCCESS("test_compile_single_char_null_char")
hs_free_single_char_pattern(database);
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(single_char_compile, nullptr_database) {
EXPECT_DEATH(hs_compile_single_char_search(*PATTERN_1_CHAR, nullptr),
"called with nullptr");
}
#endif
// ------------------------search tests-----------------------------------------
/*
hs_single_char_search
general pattern
match at start
match middle (general)
match vector end
match at buffer end
match past end
bad caseness
search several times
buffer containing null char
null char pattern
general pattern
buff size 0
nullptr pattern
nullptr buffer
nullptr callback
*/
TEST(single_char_search, start) {
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_search_start");
SEARCH_SINGLE_CHAR(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0));
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
hs_free_single_char_pattern(database);
}
TEST(single_char_search, general) {
SETUP_MEM_LEAK_TEST();
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_search_general");
SEARCH_SINGLE_CHAR(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
hs_free_single_char_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
TEST(single_char_search, end_vector) {
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_search_end_vector");
SEARCH_SINGLE_CHAR(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
hs_free_single_char_pattern(database);
}
TEST(single_char_search, end) {
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_search_end");
SEARCH_SINGLE_CHAR(EXPR_NOISE_A_END_31, EXPR_NOISE_LEN, 1, (31));
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
hs_free_single_char_pattern(database);
}
TEST(single_char_search, past_end) {
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_search_past_end");
// cppcheck-suppress unsignedLessThanZero
SEARCH_SINGLE_CHAR(EXPR_NOISE_A_END_31, EXPR_NOISE_LEN - 1, 0, ());
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
hs_free_single_char_pattern(database);
}
TEST(single_char_search, bad_case) {
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_search_bad_case");
// cppcheck-suppress unsignedLessThanZero
SEARCH_SINGLE_CHAR(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, ());
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
hs_free_single_char_pattern(database);
}
TEST(single_char_search, several_search) {
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_search_several_search");
SEARCH_SINGLE_CHAR(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
SEARCH_SINGLE_CHAR(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
hs_free_single_char_pattern(database);
}
TEST(single_char_search, null_char_buff_and_pattern) {
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR_NULL);
ASSERT_COMPILE_SUCCESS(
"test_single_char_search_null_char_buff_and_pattern");
SEARCH_SINGLE_CHAR(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (6));
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
hs_free_single_char_pattern(database);
}
TEST(single_char_search, null_char_buff) {
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_search_null_char_buff");
SEARCH_SINGLE_CHAR(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
hs_free_single_char_pattern(database);
}
TEST(single_char_search, empty_buff) {
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_search_empty_buff");
// cppcheck-suppress unsignedLessThanZero
SEARCH_SINGLE_CHAR("", 0, 0, ());
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
hs_free_single_char_pattern(database);
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(single_char_search, nullptr_pattern) {
const hs_single_char_compiled_pattern_t *database = nullptr;
context_t context;
EXPECT_DEATH(
{
const char *buffer;
hs_error_t ret;
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_SINGLE_CHAR(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, ());
},
"called with nullptr database");
}
TEST(single_char_search, nullptr_buffer) {
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_search_nullptr_buffer");
EXPECT_DEATH(
{
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_SINGLE_CHAR(nullptr, EXPR_NOISE_LEN, 0, ());
},
"called with nullptr buffer");
}
TEST(single_char_search, nullptr_callback) {
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_search_nullptr_callback");
buffer = EXPR_NOISE_5;
const size_t buffer_len = EXPR_NOISE_LEN;
const size_t expected_match = 1;
size_t expected_start_array[expected_match] = {5};
size_t expected_end_array[expected_match] = {5};
for (size_t i = 0; i < expected_match; i++) {
expected_end_array[i] += 1;
}
context.expected_start_array = expected_start_array;
context.expected_end_array = expected_end_array;
context.array_size = expected_match;
context.number_matched = 0;
context.number_wrong = 0;
EXPECT_DEATH(
{
hs_single_char_search(database, buffer, buffer_len, nullptr,
&context);
},
"called with nullptr callback");
}
#endif

View File

@ -0,0 +1,303 @@
/*
* Copyright (c) 2024-2025, Arm ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "common.h"
#include "hwlm/noodle_internal.h"
#define COMPILE_SINGLE_CHAR_PAIR(in_pattern) \
const char *pattern = (in_pattern); \
hs_single_char_pair_compiled_pattern_t *database = nullptr; \
hs_error_t compile_ret = \
hs_compile_single_char_pair_search(pattern, &database); \
hs_error_t ret = 0; \
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
const char *buffer = nullptr; \
(void)buffer; \
context_t context = {}; \
(void) context;
// expected match array here is the index of the start of match.
#define SEARCH_SINGLE_CHAR_PAIR(in_buffer, in_buffer_len, in_expected_match, \
in_expected_start_array) \
{ \
buffer = (in_buffer); \
const size_t buffer_len = (in_buffer_len); \
const size_t expected_match = (in_expected_match); \
size_t expected_start_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_end_array[expected_match] = \
BRACED_INIT_LIST in_expected_start_array; \
size_t expected_id_array[expected_match]; \
for (size_t i = 0; i < expected_match; i++) { \
expected_end_array[i] += 2; \
expected_id_array[i] = 0; \
} \
context.expected_start_array = expected_start_array; \
context.expected_end_array = expected_end_array; \
context.expected_id_array = expected_id_array; \
context.array_size = expected_match; \
context.number_matched = 0; \
context.number_wrong = 0; \
\
ret = hs_single_char_pair_search(database, buffer, buffer_len, \
callback, &context); \
}
// ------------------------free tests-------------------------------------------
/*
hs_free_single_char_pair_pattern
nullptr
general
*/
TEST(single_char_pair_free, nullptr) {
hs_single_char_pair_compiled_pattern_t *database = nullptr;
hs_free_single_char_pair_pattern(database);
}
TEST(single_char_pair_free, general) {
SETUP_MEM_LEAK_TEST();
noodTable *clear_database =
reinterpret_cast<noodTable *>(test_malloc(sizeof(noodTable)));
hs_single_char_pair_compiled_pattern_t *database =
reinterpret_cast<hs_single_char_pair_compiled_pattern_t*>(
clear_database);
hs_free_single_char_pair_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
// ------------------------compile tests----------------------------------------
/*
hs_compile_single_char_pair_search
general (2 char)
valid pattern including null char
nullptr expression
nullptr output
*/
TEST(single_char_pair_compile, general) {
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR)
EXPECT_COMPILE_SUCCESS("test_compile_single_char_pair_general")
hs_free_single_char_pair_pattern(database);
}
TEST(single_char_pair_compile, with_null_char) {
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_WITH_NULL)
EXPECT_COMPILE_SUCCESS("test_compile_single_char_pair_with_null_char")
hs_free_single_char_pair_pattern(database);
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(single_char_pair_compile, nullptr_pattern) {
hs_single_char_pair_compiled_pattern_t *database = nullptr;
EXPECT_DEATH(hs_compile_single_char_pair_search(nullptr, &database),
"called with nullptr");
}
TEST(single_char_pair_compile, nullptr_database) {
EXPECT_DEATH(hs_compile_single_char_pair_search(PATTERN_5_CHAR, nullptr),
"called with nullptr");
}
#endif
// ------------------------search tests-----------------------------------------
/*
hs_single_char_pair_search
general pattern
match at start
match middle (general)
match index 15 (cross over vector)
match at end
match past end (1 char ok, then end, so missing one chars)
bad caseness
search several times
buffer containing null char
pattern with null char
general pattern
buff size 0
nullptr pattern
nullptr buffer
nullptr callback
*/
TEST(single_char_pair_search, start) {
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_start");
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0));
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
hs_free_single_char_pair_pattern(database);
}
TEST(single_char_pair_search, general) {
SETUP_MEM_LEAK_TEST();
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_general");
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
hs_free_single_char_pair_pattern(database);
EXPECT_MEMORY_CLEAN();
UNSET_MEM_LEAK_TEST();
}
TEST(single_char_pair_search, cross_vector) {
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_cross_vector");
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
hs_free_single_char_pair_pattern(database);
}
TEST(single_char_pair_search, end) {
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_end");
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN, 1, (30));
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
hs_free_single_char_pair_pattern(database);
}
TEST(single_char_pair_search, past_end) {
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_past_end");
// cppcheck-suppress unsignedLessThanZero
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 0, ());
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
hs_free_single_char_pair_pattern(database);
}
TEST(single_char_pair_search, bad_case) {
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_bad_case");
// cppcheck-suppress unsignedLessThanZero
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, ());
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
hs_free_single_char_pair_pattern(database);
}
TEST(single_char_pair_search, several_search) {
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_several_search");
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
hs_free_single_char_pair_pattern(database);
}
TEST(single_char_pair_search, null_char_buff_and_pattern) {
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_WITH_NULL);
ASSERT_COMPILE_SUCCESS(
"test_single_char_pair_search_null_char_buff_and_pattern");
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5));
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
hs_free_single_char_pair_pattern(database);
}
TEST(single_char_pair_search, null_char_buff) {
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_null_char_buff");
// cppcheck-suppress unsignedLessThanZero
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, ());
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
hs_free_single_char_pair_pattern(database);
}
TEST(single_char_pair_search, empty_buff) {
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_empty_buff");
// cppcheck-suppress unsignedLessThanZero
SEARCH_SINGLE_CHAR_PAIR("", 0, 0, ());
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
hs_free_single_char_pair_pattern(database);
}
#if !defined(RELEASE_BUILD)
// test asserts
TEST(single_char_pair_search, nullptr_pattern) {
const hs_single_char_pair_compiled_pattern_t *database = nullptr;
context_t context;
EXPECT_DEATH(
{
const char *buffer;
hs_error_t ret;
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, ());
},
"called with nullptr database");
}
TEST(single_char_pair_search, nullptr_buffer) {
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_nullptr_buffer");
EXPECT_DEATH(
{
// cppcheck-suppress unsignedLessThanZero
// cppcheck-suppress unreadVariable
SEARCH_SINGLE_CHAR_PAIR(nullptr, EXPR_NOISE_LEN, 0, ());
},
"called with nullptr buffer");
}
TEST(single_char_pair_search, nullptr_callback) {
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_nullptr_callback");
buffer = EXPR_NOISE_5;
const size_t buffer_len = EXPR_NOISE_LEN;
const size_t expected_match = 1;
size_t expected_start_array[expected_match] = {5};
size_t expected_end_array[expected_match] = {5};
for (size_t i = 0; i < expected_match; i++) {
expected_end_array[i] += 2;
}
context.expected_start_array = expected_start_array;
context.expected_end_array = expected_end_array;
context.array_size = expected_match;
context.number_matched = 0;
context.number_wrong = 0;
EXPECT_DEATH(
{
hs_single_char_pair_search(database, buffer, buffer_len, nullptr,
&context);
},
"called with nullptr callback");
}
#endif