mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge 9f3867c5d20fbc8c7cdeeac62374dc4cdab65948 into 9e9a10ad01fceb2032ae6e36cb0262c4dbba90c7
This commit is contained in:
commit
272888499d
@ -304,6 +304,7 @@ set (hs_exec_SRCS
|
|||||||
src/crc32.h
|
src/crc32.h
|
||||||
src/report.h
|
src/report.h
|
||||||
src/runtime.c
|
src/runtime.c
|
||||||
|
src/hs_direct_search.cpp
|
||||||
src/stream_compress.c
|
src/stream_compress.c
|
||||||
src/stream_compress.h
|
src/stream_compress.h
|
||||||
src/stream_compress_impl.h
|
src/stream_compress_impl.h
|
||||||
@ -484,6 +485,7 @@ SET (hs_compile_SRCS
|
|||||||
src/hs.cpp
|
src/hs.cpp
|
||||||
src/hs_internal.h
|
src/hs_internal.h
|
||||||
src/hs_version.h.in
|
src/hs_version.h.in
|
||||||
|
src/hs_direct_search_compile.cpp
|
||||||
src/scratch.h
|
src/scratch.h
|
||||||
src/state.h
|
src/state.h
|
||||||
src/ue2common.h
|
src/ue2common.h
|
||||||
|
@ -51,3 +51,10 @@ Compile mode flags
|
|||||||
.. doxygengroup:: HS_MODE_FLAG
|
.. doxygengroup:: HS_MODE_FLAG
|
||||||
:content-only:
|
:content-only:
|
||||||
:no-link:
|
:no-link:
|
||||||
|
|
||||||
|
******************************
|
||||||
|
Other Constants
|
||||||
|
******************************
|
||||||
|
|
||||||
|
.. doxygendefine:: HS_SHORT_PATTERN_THRESHOLD
|
||||||
|
:no-link:
|
||||||
|
@ -123,6 +123,9 @@ Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_SINGLEMATCH`,
|
|||||||
The new literal APIs introduced here are designed for rule sets
|
The new literal APIs introduced here are designed for rule sets
|
||||||
containing only pure literal expressions.
|
containing only pure literal expressions.
|
||||||
|
|
||||||
|
In tight loops where performance is critical, some further specialization of the
|
||||||
|
literal search exists in the form of :ref:`direct_api`
|
||||||
|
|
||||||
***************
|
***************
|
||||||
Pattern Support
|
Pattern Support
|
||||||
***************
|
***************
|
||||||
|
@ -23,7 +23,7 @@ import os
|
|||||||
# -- General configuration ------------------------------------------------
|
# -- General configuration ------------------------------------------------
|
||||||
|
|
||||||
# If your documentation needs a minimal Sphinx version, state it here.
|
# If your documentation needs a minimal Sphinx version, state it here.
|
||||||
#needs_sphinx = '1.0'
|
needs_sphinx = '4.0'
|
||||||
|
|
||||||
# Add any Sphinx extension module names here, as strings. They can be
|
# Add any Sphinx extension module names here, as strings. They can be
|
||||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||||
@ -272,4 +272,4 @@ breathe_domain_by_extension = {"h" : "c"}
|
|||||||
# -- Add some customisation -----------------------------------------------
|
# -- Add some customisation -----------------------------------------------
|
||||||
|
|
||||||
def setup(app):
|
def setup(app):
|
||||||
app.add_stylesheet("hyperscan.css") # Custom stylesheet for e.g. :regex:
|
app.add_css_file("hyperscan.css") # Custom stylesheet for e.g. :regex:
|
||||||
|
28
doc/dev-reference/direct_api.rst
Normal file
28
doc/dev-reference/direct_api.rst
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
.. _direct_api:
|
||||||
|
|
||||||
|
####################
|
||||||
|
Direct API extension
|
||||||
|
####################
|
||||||
|
|
||||||
|
Even though pure literal searches are fast, there is still some overhead.
|
||||||
|
In tight loops where both the pattern and the data are small (say, a
|
||||||
|
4-character pattern with a 32-character data buffer), this overhead can
|
||||||
|
become noticeable. In such cases, the functions provided by the Direct API
|
||||||
|
offer a minimal-overhead alternative, at the cost of a reduced set of
|
||||||
|
functionality.
|
||||||
|
|
||||||
|
Each type of call is designed for a specific pattern type:
|
||||||
|
- Strings
|
||||||
|
- Pairs of two characters
|
||||||
|
- Single characters
|
||||||
|
Each type comes in a ``single`` search and ``set`` search variant, depending
|
||||||
|
on whether you need to search for one or multiple patterns.
|
||||||
|
|
||||||
|
For each case, compile, search, and free functions are provided.
|
||||||
|
|
||||||
|
All search functions are case-sensitive.
|
||||||
|
|
||||||
|
The single string search has an additional specialization based on the length
|
||||||
|
of the pattern. If the pattern is "short", ie shorter than or equal to
|
||||||
|
:c:member:`HS_SHORT_PATTERN_THRESHOLD` characters—then
|
||||||
|
:c:func:`hs_compile_short_literal_search` may be used instead.
|
@ -21,3 +21,4 @@ Vectorscan |version| Developer's Reference Guide
|
|||||||
api_constants
|
api_constants
|
||||||
api_files
|
api_files
|
||||||
chimera
|
chimera
|
||||||
|
direct_api
|
||||||
|
21
hs.def
21
hs.def
@ -41,3 +41,24 @@ EXPORTS
|
|||||||
hs_stream_size
|
hs_stream_size
|
||||||
hs_valid_platform
|
hs_valid_platform
|
||||||
hs_version
|
hs_version
|
||||||
|
hs_short_literal_search
|
||||||
|
hs_long_literal_search
|
||||||
|
hs_multi_literal_search
|
||||||
|
hs_single_char_search
|
||||||
|
hs_char_set_search
|
||||||
|
hs_single_char_pair_search
|
||||||
|
hs_char_pair_set_search
|
||||||
|
hs_compile_short_literal_search
|
||||||
|
hs_compile_long_literal_search
|
||||||
|
hs_compile_multi_literal_search
|
||||||
|
hs_compile_single_char_search
|
||||||
|
hs_compile_char_set_search
|
||||||
|
hs_compile_single_char_pair_search
|
||||||
|
hs_compile_char_pair_set_search
|
||||||
|
hs_free_short_literal_pattern
|
||||||
|
hs_free_long_literal_pattern
|
||||||
|
hs_free_multi_literal_pattern
|
||||||
|
hs_free_single_char_pattern
|
||||||
|
hs_free_char_set_pattern
|
||||||
|
hs_free_single_char_pair_pattern
|
||||||
|
hs_free_char_pair_set_pattern
|
@ -33,4 +33,11 @@ EXPORTS
|
|||||||
hs_set_stream_allocator
|
hs_set_stream_allocator
|
||||||
hs_stream_size
|
hs_stream_size
|
||||||
hs_valid_platform
|
hs_valid_platform
|
||||||
hs_version
|
hs_version
|
||||||
|
hs_short_literal_search
|
||||||
|
hs_long_literal_search
|
||||||
|
hs_multi_literal_search
|
||||||
|
hs_single_char_search
|
||||||
|
hs_char_set_search
|
||||||
|
hs_single_char_pair_search
|
||||||
|
hs_char_pair_set_search
|
@ -1,6 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2020, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
* Copyright (c) 2024, VectorCamp PC
|
* Copyright (c) 2024, VectorCamp PC
|
||||||
|
* Copyright (c) 2025, Arm ltd
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -352,6 +353,99 @@ CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_strea
|
|||||||
CONNECT_ARGS_3(hs_error_t, hs_reset_and_expand_stream, to_stream,
|
CONNECT_ARGS_3(hs_error_t, hs_reset_and_expand_stream, to_stream,
|
||||||
buf, buf_size, scratch, onEvent, context);
|
buf, buf_size, scratch, onEvent, context);
|
||||||
|
|
||||||
|
/** DIRECT API **/
|
||||||
|
|
||||||
|
CREATE_DISPATCH(hs_error_t, hs_short_literal_search,
|
||||||
|
const hs_short_literal_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_short_literal_search, database, data, length,
|
||||||
|
onEvent, context);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_short_literal_search,
|
||||||
|
const hs_short_literal_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_short_literal_search, database, data, length,
|
||||||
|
onEvent, context);
|
||||||
|
|
||||||
|
CREATE_DISPATCH(hs_error_t, hs_long_literal_search,
|
||||||
|
const hs_long_literal_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_long_literal_search, database, data, length,
|
||||||
|
onEvent, context);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_long_literal_search,
|
||||||
|
const hs_long_literal_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_long_literal_search, database, data, length,
|
||||||
|
onEvent, context);
|
||||||
|
|
||||||
|
CREATE_DISPATCH(hs_error_t, hs_multi_literal_search,
|
||||||
|
const hs_multi_literal_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_multi_literal_search, database, data, length,
|
||||||
|
onEvent, context);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_multi_literal_search,
|
||||||
|
const hs_multi_literal_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_multi_literal_search, database, data, length,
|
||||||
|
onEvent, context);
|
||||||
|
|
||||||
|
CREATE_DISPATCH(hs_error_t, hs_single_char_search,
|
||||||
|
const hs_single_char_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_single_char_search, database, data, length,
|
||||||
|
onEvent, context);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_single_char_search,
|
||||||
|
const hs_single_char_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_single_char_search, database, data, length,
|
||||||
|
onEvent, context);
|
||||||
|
|
||||||
|
CREATE_DISPATCH(hs_error_t, hs_char_set_search,
|
||||||
|
const hs_char_set_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_char_set_search, database, data, length, onEvent,
|
||||||
|
context);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_char_set_search,
|
||||||
|
const hs_char_set_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_char_set_search, database, data, length, onEvent,
|
||||||
|
context);
|
||||||
|
|
||||||
|
CREATE_DISPATCH(hs_error_t, hs_single_char_pair_search,
|
||||||
|
const hs_single_char_pair_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_single_char_pair_search, database, data, length,
|
||||||
|
onEvent, context);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_single_char_pair_search,
|
||||||
|
const hs_single_char_pair_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_single_char_pair_search, database, data, length,
|
||||||
|
onEvent, context);
|
||||||
|
|
||||||
|
CREATE_DISPATCH(hs_error_t, hs_char_pair_set_search,
|
||||||
|
const hs_char_pair_set_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_1(hs_error_t, hs_char_pair_set_search, database, data, length,
|
||||||
|
onEvent, context);
|
||||||
|
CONNECT_DISPATCH_2(hs_error_t, hs_char_pair_set_search,
|
||||||
|
const hs_char_pair_set_compiled_pattern_t *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
CONNECT_ARGS_3(hs_error_t, hs_char_pair_set_search, database, data, length,
|
||||||
|
onEvent, context);
|
||||||
|
|
||||||
/** INTERNALS **/
|
/** INTERNALS **/
|
||||||
|
|
||||||
CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);
|
CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2019, Intel Corporation
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -585,6 +586,90 @@ hs_error_t HS_CDECL hs_valid_platform(void);
|
|||||||
|
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The following functions are part of the extended API.
|
||||||
|
* This extension offers direct access to search algorithms
|
||||||
|
* allowing the user to minimise calling overhead for simple
|
||||||
|
* search use cases where type of the search is known.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @defgroup DIRECT_API_COMMON
|
||||||
|
*
|
||||||
|
* @{
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The size threshold after which a pattern is considered long and must be fed
|
||||||
|
* to @ref hs_compile_long_literal_search(). Patterns up to this length may be
|
||||||
|
* fed to hs_compile_short_literal_search() instead.
|
||||||
|
*/
|
||||||
|
#define HS_SHORT_PATTERN_THRESHOLD 8
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The compiled pattern type for searching for short literals
|
||||||
|
*
|
||||||
|
* Generated by @ref hs_compile_short_literal_search() and to be freed with @ref
|
||||||
|
* hs_free_short_literal_pattern
|
||||||
|
*/
|
||||||
|
typedef struct hs_short_literal_compiled_pattern
|
||||||
|
hs_short_literal_compiled_pattern_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The compiled pattern type for searching for long literals
|
||||||
|
*
|
||||||
|
* Generated by @ref hs_compile_long_literal_search() and to be freed with @ref
|
||||||
|
* hs_free_long_literal_pattern
|
||||||
|
*/
|
||||||
|
typedef struct hs_long_literal_compiled_pattern
|
||||||
|
hs_long_literal_compiled_pattern_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The compiled pattern type for searching for several long literal
|
||||||
|
*
|
||||||
|
* Generated by @ref hs_compile_multi_literal_search() and to be freed with @ref
|
||||||
|
* hs_free_multi_literal_pattern
|
||||||
|
*/
|
||||||
|
typedef struct hs_multi_literal_compiled_pattern
|
||||||
|
hs_multi_literal_compiled_pattern_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The compiled pattern type for searching for a single character
|
||||||
|
*
|
||||||
|
* Generated by @ref hs_compile_single_char_search() and to be freed with @ref
|
||||||
|
* hs_free_single_char_pattern
|
||||||
|
*/
|
||||||
|
typedef struct hs_single_char_compiled_pattern
|
||||||
|
hs_single_char_compiled_pattern_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The compiled pattern type for searching for a character set
|
||||||
|
*
|
||||||
|
* Generated by @ref hs_compile_char_set_search() and to be freed with @ref
|
||||||
|
* hs_free_char_set_pattern
|
||||||
|
*/
|
||||||
|
typedef struct hs_char_set_compiled_pattern hs_char_set_compiled_pattern_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The compiled pattern type for searching for a character pair
|
||||||
|
*
|
||||||
|
* Generated by @ref hs_compile_char_pair_search() and to be freed with @ref
|
||||||
|
* hs_free_char_pair_pattern
|
||||||
|
*/
|
||||||
|
typedef struct hs_single_char_pair_compiled_pattern
|
||||||
|
hs_single_char_pair_compiled_pattern_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The compiled pattern type for searching for a set of character pairs
|
||||||
|
*
|
||||||
|
* Generated by @ref hs_compile_char_pair_set_search() and to be freed with
|
||||||
|
* @ref hs_free_char_pair_set_pattern
|
||||||
|
*/
|
||||||
|
typedef struct hs_char_pair_set_compiled_pattern
|
||||||
|
hs_char_pair_set_compiled_pattern_t;
|
||||||
|
|
||||||
|
/** @} */
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
#endif
|
#endif
|
||||||
|
271
src/hs_compile.h
271
src/hs_compile.h
@ -1,5 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2021, Intel Corporation
|
* Copyright (c) 2015-2021, Intel Corporation
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -1211,6 +1212,276 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
|
|||||||
|
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The following functions are part of the extended API.
|
||||||
|
* This extension offers direct access to search algorithms
|
||||||
|
* allowing the user to minimise calling overhead for simple
|
||||||
|
* search use cases where type of the search is known.
|
||||||
|
*
|
||||||
|
* All search functions handle a limited type of pattern.
|
||||||
|
* For more generic patterns, use @ref hs_compile().
|
||||||
|
*
|
||||||
|
* NOTE: All search functions are considered case-sensitive.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @defgroup DIRECT_API_COMPILE
|
||||||
|
*
|
||||||
|
* @{
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compiles a short literal expression used in @ref hs_short_literal_search().
|
||||||
|
*
|
||||||
|
* The expression must be at most @ref HS_SHORT_PATTERN_THRESHOLD characters
|
||||||
|
* long. For longer expressions, use @ref hs_compile_long_literal_search() and
|
||||||
|
* @ref hs_long_literal_search() instead.
|
||||||
|
*
|
||||||
|
* @param expression
|
||||||
|
* The expression to parse. Note that this string must represent ONLY the
|
||||||
|
* pattern to be matched, with no delimiters. Null characters are accepted
|
||||||
|
* as part of the expression.
|
||||||
|
*
|
||||||
|
* @param expression_length
|
||||||
|
* The length of the expression in bytes. Up to @ref
|
||||||
|
* HS_SHORT_PATTERN_THRESHOLD characters long.
|
||||||
|
*
|
||||||
|
* @param output_database
|
||||||
|
* Returns pointer to buffer containing @ref
|
||||||
|
* hs_short_literal_compiled_pattern_t. The buffer must be freed with
|
||||||
|
* @ref hs_free_short_literal_pattern.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||||
|
* HS_COMPILER_ERROR otherwise.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_compile_short_literal_search(
|
||||||
|
const char *expression, size_t expression_length,
|
||||||
|
hs_short_literal_compiled_pattern_t **output_database);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free a short literal pattern.
|
||||||
|
*
|
||||||
|
* @param database
|
||||||
|
* The @ref hs_short_literal_compiled_pattern_t pointer to be freed.
|
||||||
|
*/
|
||||||
|
void HS_CDECL
|
||||||
|
hs_free_short_literal_pattern(hs_short_literal_compiled_pattern_t *database);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compiles a literal expression used in @ref hs_long_literal_search().
|
||||||
|
*
|
||||||
|
* There is no size limit. For expressions up to @ref
|
||||||
|
* HS_SHORT_PATTERN_THRESHOLD character long, @ref
|
||||||
|
* hs_compile_short_literal_search() and @ref hs_short_literal_search() might be
|
||||||
|
* faster
|
||||||
|
*
|
||||||
|
* @param expression
|
||||||
|
* The expression to parse. Note that this string must represent ONLY the
|
||||||
|
* pattern to be matched, with no delimiters. Null characters are accepted
|
||||||
|
* as part of the expression.
|
||||||
|
*
|
||||||
|
* @param expression_length
|
||||||
|
* The length of the expression in bytes.
|
||||||
|
*
|
||||||
|
* @param output_database
|
||||||
|
* Returns pointer to buffer containing @ref
|
||||||
|
* hs_long_literal_compiled_pattern_t. The buffer must be freed with
|
||||||
|
* @ref hs_free_long_literal_pattern.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||||
|
* HS_COMPILER_ERROR otherwise.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_compile_long_literal_search(
|
||||||
|
const char *expression, size_t expression_length,
|
||||||
|
hs_long_literal_compiled_pattern_t **output_database);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free a long literal pattern.
|
||||||
|
*
|
||||||
|
* @param database
|
||||||
|
* The @ref hs_long_literal_compiled_pattern_t pointer to be freed.
|
||||||
|
*/
|
||||||
|
void HS_CDECL
|
||||||
|
hs_free_long_literal_pattern(hs_long_literal_compiled_pattern_t *database);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compiles several literal expressions used in @ref hs_multi_literal_search().
|
||||||
|
*
|
||||||
|
* There is no size limit.
|
||||||
|
*
|
||||||
|
* @param expression
|
||||||
|
* The array of expressions to parse. Note that the strings must represent
|
||||||
|
* ONLY the patterns to be matched, with no delimiters. Null characters are
|
||||||
|
* accepted as part of the expression. The expression id in
|
||||||
|
* @ref match_event_handler will match the order of the expression given
|
||||||
|
* here (ie: expression[0] will be id 0).
|
||||||
|
*
|
||||||
|
* @param pattern_count
|
||||||
|
* The number of expressions in the @p expression array.
|
||||||
|
*
|
||||||
|
* @param expression_length
|
||||||
|
* The array of length of each expression in the @p expression array.
|
||||||
|
* Expressed in bytes.
|
||||||
|
*
|
||||||
|
* @param output_database
|
||||||
|
* Returns pointer to buffer containing @ref
|
||||||
|
* hs_multi_literal_compiled_pattern_t. The buffer must be freed with
|
||||||
|
* @ref hs_free_multi_literal_pattern.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||||
|
* HS_COMPILER_ERROR otherwise.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_compile_multi_literal_search(
|
||||||
|
const char **expression, size_t pattern_count,
|
||||||
|
const size_t *expression_length,
|
||||||
|
hs_multi_literal_compiled_pattern_t **output_database);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free a multi literal pattern.
|
||||||
|
*
|
||||||
|
* @param database
|
||||||
|
* The @ref hs_multi_literal_compiled_pattern_t pointer to be freed.
|
||||||
|
*/
|
||||||
|
void HS_CDECL
|
||||||
|
hs_free_multi_literal_pattern(hs_multi_literal_compiled_pattern_t *database);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compiles a single character used in @ref hs_single_char_search().
|
||||||
|
*
|
||||||
|
* @param character
|
||||||
|
* The single character to be searched. It is case sensitive.
|
||||||
|
*
|
||||||
|
* @param output_database
|
||||||
|
* Returns pointer to buffer containing @ref
|
||||||
|
* hs_single_char_compiled_pattern_t. The buffer must be freed with
|
||||||
|
* @ref hs_free_single_char_pattern.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||||
|
* HS_COMPILER_ERROR otherwise.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_compile_single_char_search(
|
||||||
|
const char character, hs_single_char_compiled_pattern_t **output_database);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free a single char pattern
|
||||||
|
* @param database
|
||||||
|
* The @ref hs_single_char_compiled_pattern_t pointer to be freed.
|
||||||
|
*/
|
||||||
|
void HS_CDECL
|
||||||
|
hs_free_single_char_pattern(hs_single_char_compiled_pattern_t *database);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compiles a set of characters used in @ref hs_char_set_search().
|
||||||
|
*
|
||||||
|
* @param character_array
|
||||||
|
* The string or character array containing all the characters in the set.
|
||||||
|
* It is case sensitive. Null terminator is optional.
|
||||||
|
*
|
||||||
|
* @param character_count
|
||||||
|
* The number of characters in @p character_array
|
||||||
|
*
|
||||||
|
* @param output_database
|
||||||
|
* Returns pointer to buffer containing @ref
|
||||||
|
* hs_char_set_compiled_pattern_t. The buffer must be freed with
|
||||||
|
* @ref hs_free_char_set_pattern.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||||
|
* HS_COMPILER_ERROR otherwise.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_compile_char_set_search(
|
||||||
|
const char *character_array, size_t character_count,
|
||||||
|
hs_char_set_compiled_pattern_t **output_database);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free a multi char pattern.
|
||||||
|
*
|
||||||
|
* @param database
|
||||||
|
* The @ref hs_char_set_compiled_pattern_t pointer to be freed.
|
||||||
|
*/
|
||||||
|
void HS_CDECL
|
||||||
|
hs_free_char_set_pattern(hs_char_set_compiled_pattern_t *database);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compiles a pair of characters used in @ref hs_single_char_pair_search().
|
||||||
|
*
|
||||||
|
* NOTE: The character order matters in the pair. "Aj" won't match "jA"
|
||||||
|
*
|
||||||
|
* @param pair
|
||||||
|
* The string or character array containing the pair. Null terminator is
|
||||||
|
* optional.
|
||||||
|
*
|
||||||
|
* @param output_database
|
||||||
|
* Returns pointer to buffer containing @ref
|
||||||
|
* hs_single_char_pair_compiled_pattern_t. The buffer must be freed with
|
||||||
|
* @ref hs_free_single_char_pair_pattern.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||||
|
* HS_COMPILER_ERROR otherwise.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_compile_single_char_pair_search(
|
||||||
|
const char *pair, hs_single_char_pair_compiled_pattern_t **output_database);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free a single char pair pattern.
|
||||||
|
*
|
||||||
|
* @param database
|
||||||
|
* The @ref hs_single_char_pair_compiled_pattern_t pointer to be freed.
|
||||||
|
*/
|
||||||
|
void HS_CDECL hs_free_single_char_pair_pattern(
|
||||||
|
hs_single_char_pair_compiled_pattern_t *database);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compiles severals pairs used in @ref hs_char_pair_set_search().
|
||||||
|
*
|
||||||
|
* IMPORTANT: Compilation is only guaranteed for up to 8 pairs. If you search
|
||||||
|
* for more, internal compression may attempt to merge adjacent patterns
|
||||||
|
* (e.g., [ab, ac, ad] becomes a[bcd]) to reduce the total to 8 pairs. If the
|
||||||
|
* compression is insufficient, compilation will fail with
|
||||||
|
* @ref HS_COMPILER_ERROR. In such cases, use @ref multi_literal_search instead.
|
||||||
|
* The compression does not affect the match IDs returned by
|
||||||
|
* @ref hs_char_pair_set_search(). For example, a[bcd] will still report "ab" as
|
||||||
|
* ID 0, "ac" as ID 1, and "ad" as ID 2.
|
||||||
|
*
|
||||||
|
* NOTE: The character order matters in the pair. "Aj" won't match "jA"
|
||||||
|
*
|
||||||
|
* @param expression
|
||||||
|
* The concatenation of all pairs to be parsed. If one want to search for
|
||||||
|
* "ab" or "Cd", then @p expression would be ['a','b','C','d']. Null
|
||||||
|
* terminator is ignored, use @ref pair_count to set the length.
|
||||||
|
*
|
||||||
|
* @param pair_count
|
||||||
|
* The number of characters pair in @p expression
|
||||||
|
*
|
||||||
|
* @param output_database
|
||||||
|
* Returns pointer to buffer containing @ref
|
||||||
|
* hs_char_pair_set_compiled_pattern_t. The buffer must be freed with
|
||||||
|
* @ref hs_free_char_pair_set_pattern.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||||
|
* HS_COMPILER_ERROR otherwise.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_compile_char_pair_set_search(
|
||||||
|
const char *expression, size_t pair_count,
|
||||||
|
hs_char_pair_set_compiled_pattern_t **output_database);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free a multi char pairs pattern.
|
||||||
|
*
|
||||||
|
* @param database
|
||||||
|
* The @ref hs_char_pair_set_compiled_pattern_t pointer to be freed.
|
||||||
|
*/
|
||||||
|
void HS_CDECL
|
||||||
|
hs_free_char_pair_set_pattern(hs_char_pair_set_compiled_pattern_t *database);
|
||||||
|
|
||||||
|
/** @} */
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
#endif
|
#endif
|
||||||
|
435
src/hs_direct_search.cpp
Normal file
435
src/hs_direct_search.cpp
Normal file
@ -0,0 +1,435 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#include "hs_common.h"
|
||||||
|
#include "hs_runtime.h"
|
||||||
|
#include "hs_direct_search.h"
|
||||||
|
#include "hs_direct_search_types.h"
|
||||||
|
|
||||||
|
#include "scratch.h"
|
||||||
|
#include "util/arch.h" // CAN_USE_WIDE_TRUFFLE
|
||||||
|
#include "util/bitutils.h" // ctz64()
|
||||||
|
#include "util/simd_utils.h" // load128()
|
||||||
|
#include "util/supervector/supervector.hpp"
|
||||||
|
|
||||||
|
#include "fdr/fdr.h"
|
||||||
|
#include "hwlm/noodle_engine.h"
|
||||||
|
#include "nfa/shufti.h"
|
||||||
|
#include "nfa/truffle.h"
|
||||||
|
|
||||||
|
typedef typename SuperVector<VECTORSIZE>::comparemask_type vector_mask_type;
|
||||||
|
|
||||||
|
static_assert((uint64_t)CB_CONTINUE_MATCHING == HWLM_CONTINUE_MATCHING,
|
||||||
|
"CB_CONTINUE_MATCHING doesn't match HWLM_CONTINUE_MATCHING");
|
||||||
|
static_assert((uint64_t)CB_TERMINATE_MATCHING == HWLM_TERMINATE_MATCHING,
|
||||||
|
"CB_TERMINATE_MATCHING doesn't match HWLM_TERMINATE_MATCHING");
|
||||||
|
|
||||||
|
static inline hs_error_t hwlm_to_hs_error(const hwlm_error_t error) {
|
||||||
|
switch (error) {
|
||||||
|
case HWLM_SUCCESS:
|
||||||
|
return HS_SUCCESS;
|
||||||
|
case HWLM_TERMINATED:
|
||||||
|
return HS_SCAN_TERMINATED;
|
||||||
|
case HWLM_ERROR_UNKNOWN:
|
||||||
|
return HS_UNKNOWN_ERROR;
|
||||||
|
case HWLM_LITERAL_MAX_LEN:
|
||||||
|
return HS_COMPILER_ERROR;
|
||||||
|
default:
|
||||||
|
return HS_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// convert the callback type of Noodle
|
||||||
|
hwlmcb_rv_t HS_CDECL noodle_to_hs_callback(size_t end, u32 id,
|
||||||
|
struct hs_scratch *scratch) {
|
||||||
|
struct noodle_context *storage = reinterpret_cast<struct noodle_context *>(
|
||||||
|
scratch->core_info.userContext);
|
||||||
|
// hwlm's end is the last char of the pattern, but hs's end is the first
|
||||||
|
// char after the pattern
|
||||||
|
size_t match_start = end + 1 - storage->pattern_length;
|
||||||
|
return (hwlmcb_rv_t)(scratch->core_info.userCallback(
|
||||||
|
id, match_start, end + 1, 0, storage->usr_context));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Receive the FDR callback and perform the check for longer patterns (>8 char)
|
||||||
|
hwlmcb_rv_t HS_CDECL FDR_to_hs_callback(size_t end, u32 id,
|
||||||
|
struct hs_scratch *scratch) {
|
||||||
|
const struct FDR_cb_context *combined_ctx =
|
||||||
|
reinterpret_cast<struct FDR_cb_context *>(
|
||||||
|
scratch->core_info.userContext);
|
||||||
|
const FDR_pattern_storage *ps = combined_ctx->patterns;
|
||||||
|
size_t pattern_length = get_const_pattern_sizes(ps)[id];
|
||||||
|
size_t start_offset =
|
||||||
|
end + 1 - std::min(pattern_length, (size_t)HWLM_LITERAL_MAX_LEN);
|
||||||
|
if (pattern_length > HWLM_LITERAL_MAX_LEN) {
|
||||||
|
// long pattern for FDR, we need to confirm it.
|
||||||
|
const char *pattern = get_const_pattern_ptrs(ps)[id];
|
||||||
|
const char *buffer = combined_ctx->buffer;
|
||||||
|
size_t buffer_length = combined_ctx->buffer_length;
|
||||||
|
|
||||||
|
if (start_offset + pattern_length > buffer_length) {
|
||||||
|
// pattern too long for the remaining buffer, no match
|
||||||
|
return HWLM_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *confirm_buffer_start =
|
||||||
|
buffer + start_offset + HWLM_LITERAL_MAX_LEN;
|
||||||
|
const char *confirm_pattern_start = pattern + HWLM_LITERAL_MAX_LEN;
|
||||||
|
size_t confirm_len = pattern_length - HWLM_LITERAL_MAX_LEN;
|
||||||
|
|
||||||
|
if (confirm_len >= VECTORSIZE) {
|
||||||
|
while (confirm_len > VECTORSIZE) {
|
||||||
|
SuperVector<VECTORSIZE> buffer_vector =
|
||||||
|
SuperVector<VECTORSIZE>::loadu(confirm_buffer_start);
|
||||||
|
SuperVector<VECTORSIZE> pattern_vector =
|
||||||
|
SuperVector<VECTORSIZE>::loadu(confirm_pattern_start);
|
||||||
|
vector_mask_type mask = buffer_vector.eqmask(pattern_vector);
|
||||||
|
if(~mask)
|
||||||
|
// don't match the pattern, continue searching
|
||||||
|
return HWLM_CONTINUE_MATCHING;
|
||||||
|
confirm_buffer_start += VECTORSIZE;
|
||||||
|
confirm_pattern_start += VECTORSIZE;
|
||||||
|
confirm_len -= VECTORSIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// unaligned load: we cannot risk loading any extra byte, so we run
|
||||||
|
// the vector one last time with an offset to overlap the previous
|
||||||
|
// check, but avoid overflowing.
|
||||||
|
size_t overlap = VECTORSIZE - confirm_len;
|
||||||
|
SuperVector<VECTORSIZE> buffer_vector =
|
||||||
|
SuperVector<VECTORSIZE>::loadu(confirm_buffer_start - overlap);
|
||||||
|
SuperVector<VECTORSIZE> pattern_vector =
|
||||||
|
SuperVector<VECTORSIZE>::loadu(confirm_pattern_start - overlap);
|
||||||
|
vector_mask_type mask = buffer_vector.eqmask(pattern_vector);
|
||||||
|
if(~mask)
|
||||||
|
// don't match the pattern, continue searching
|
||||||
|
return HWLM_CONTINUE_MATCHING;
|
||||||
|
} else {
|
||||||
|
size_t confirm_64 = confirm_len / 8;
|
||||||
|
for (size_t i = 0; i < confirm_64; i++) {
|
||||||
|
if ((reinterpret_cast<const uint64_t *>(confirm_buffer_start))[i] !=
|
||||||
|
(reinterpret_cast<const uint64_t *>(confirm_pattern_start))[i])
|
||||||
|
// don't match the pattern, continue searching
|
||||||
|
return HWLM_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
confirm_len = confirm_len % 8;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < confirm_len; i++) {
|
||||||
|
if (confirm_buffer_start[i] != confirm_pattern_start[i])
|
||||||
|
// don't match the pattern, continue searching
|
||||||
|
return HWLM_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// we have a valid match. Call the user callback
|
||||||
|
return (hwlmcb_rv_t)(scratch->core_info.userCallback(
|
||||||
|
id, start_offset, start_offset + pattern_length, 0,
|
||||||
|
combined_ctx->usr_context));
|
||||||
|
} else {
|
||||||
|
// short pattern, no confirmation needed
|
||||||
|
return (hwlmcb_rv_t)(scratch->core_info.userCallback(
|
||||||
|
id, start_offset, end + 1, 0, combined_ctx->usr_context));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- short_literal (Noodle) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL hs_short_literal_search(
|
||||||
|
const hs_short_literal_compiled_pattern *database, const char *data,
|
||||||
|
size_t length, match_event_handler onEvent, void *context) {
|
||||||
|
assert(onEvent != nullptr &&
|
||||||
|
"hs_short_literal_search called with nullptr callback");
|
||||||
|
assert(data != nullptr &&
|
||||||
|
"hs_short_literal_search called with nullptr buffer");
|
||||||
|
assert(database != nullptr &&
|
||||||
|
"hs_short_literal_search called with nullptr database");
|
||||||
|
struct noodle_context storage;
|
||||||
|
storage.usr_context = context;
|
||||||
|
storage.pattern_length = database->pattern_length;
|
||||||
|
struct hs_scratch scratch;
|
||||||
|
scratch.core_info.userContext = &storage;
|
||||||
|
scratch.core_info.userCallback = onEvent;
|
||||||
|
|
||||||
|
hwlm_error_t error = noodExec(&(database->noodle_database),
|
||||||
|
reinterpret_cast<const uint8_t *>(data),
|
||||||
|
length, 0, noodle_to_hs_callback, &scratch);
|
||||||
|
return hwlm_to_hs_error(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- long_literal (FDR) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL hs_long_literal_search(
|
||||||
|
const hs_long_literal_compiled_pattern_t *database, const char *data,
|
||||||
|
size_t length, match_event_handler onEvent,
|
||||||
|
void *context) {
|
||||||
|
assert(onEvent != nullptr &&
|
||||||
|
"hs_long_literal_search called with nullptr callback");
|
||||||
|
assert(data != nullptr &&
|
||||||
|
"hs_long_literal_search called with nullptr buffer");
|
||||||
|
assert(database != nullptr &&
|
||||||
|
"hs_long_literal_search called with nullptr database");
|
||||||
|
|
||||||
|
struct hs_scratch scratch;
|
||||||
|
struct FDR_cb_context combined_ctx = {
|
||||||
|
context, database->fdr_database.patterns, data, length};
|
||||||
|
scratch.core_info.userContext = &combined_ctx;
|
||||||
|
scratch.core_info.userCallback = onEvent;
|
||||||
|
scratch.fdr_conf = nullptr;
|
||||||
|
hwlm_error_t error =
|
||||||
|
fdrExec(database->fdr_database.database,
|
||||||
|
reinterpret_cast<const uint8_t *>(data), length, 0,
|
||||||
|
FDR_to_hs_callback, &scratch, HWLM_ALL_GROUPS);
|
||||||
|
return hwlm_to_hs_error(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- multi_literal (FDR) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL hs_multi_literal_search(
|
||||||
|
const hs_multi_literal_compiled_pattern_t *database, const char *data,
|
||||||
|
size_t length, match_event_handler onEvent, void *context) {
|
||||||
|
assert(onEvent != nullptr &&
|
||||||
|
"hs_multi_literal_search called with nullptr callback");
|
||||||
|
assert(data != nullptr &&
|
||||||
|
"hs_multi_literal_search called with nullptr buffer");
|
||||||
|
assert(database != nullptr &&
|
||||||
|
"hs_multi_literal_search called with nullptr database");
|
||||||
|
|
||||||
|
struct hs_scratch scratch;
|
||||||
|
struct FDR_cb_context combined_ctx = {
|
||||||
|
context, database->fdr_database.patterns, data, length};
|
||||||
|
scratch.core_info.userContext = &combined_ctx;
|
||||||
|
scratch.core_info.userCallback = onEvent;
|
||||||
|
scratch.fdr_conf = nullptr;
|
||||||
|
hwlm_error_t error =
|
||||||
|
fdrExec(database->fdr_database.database,
|
||||||
|
reinterpret_cast<const uint8_t *>(data), length, 0,
|
||||||
|
FDR_to_hs_callback, &scratch, HWLM_ALL_GROUPS);
|
||||||
|
return hwlm_to_hs_error(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- single_char (Noodle) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL hs_single_char_search(
|
||||||
|
const hs_single_char_compiled_pattern *database, const char *data,
|
||||||
|
size_t length, match_event_handler onEvent, void *context) {
|
||||||
|
assert(onEvent != nullptr &&
|
||||||
|
"hs_single_char_search called with nullptr callback");
|
||||||
|
assert(data != nullptr &&
|
||||||
|
"hs_single_char_search called with nullptr buffer");
|
||||||
|
assert(database != nullptr &&
|
||||||
|
"hs_single_char_search called with nullptr database");
|
||||||
|
struct noodle_context storage;
|
||||||
|
storage.usr_context = context;
|
||||||
|
storage.pattern_length = 1;
|
||||||
|
struct hs_scratch scratch;
|
||||||
|
scratch.core_info.userContext = &storage;
|
||||||
|
scratch.core_info.userCallback = onEvent;
|
||||||
|
|
||||||
|
hwlm_error_t error = noodExec(&(database->noodle_database),
|
||||||
|
reinterpret_cast<const uint8_t *>(data),
|
||||||
|
length, 0, noodle_to_hs_callback, &scratch);
|
||||||
|
return hwlm_to_hs_error(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- char_set (Truffle) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL hs_char_set_search(
|
||||||
|
const hs_char_set_compiled_pattern *database, const char *data,
|
||||||
|
size_t length, match_event_handler onEvent, void *context) {
|
||||||
|
assert(onEvent != nullptr &&
|
||||||
|
"hs_char_set_search called with nullptr callback");
|
||||||
|
assert(data != nullptr &&
|
||||||
|
"hs_char_set_search called with nullptr buffer");
|
||||||
|
assert(database != nullptr &&
|
||||||
|
"hs_char_set_search called with nullptr database");
|
||||||
|
|
||||||
|
const u8 *current_buf = reinterpret_cast<const u8*>(data);
|
||||||
|
// buf_end must be the first char past the buffer, so current_buf==buf_end
|
||||||
|
// means current_buf is empty.
|
||||||
|
const u8 *buf_end = reinterpret_cast<const u8*>(data) + length;
|
||||||
|
while(current_buf < buf_end) {
|
||||||
|
const u8 *current_match;
|
||||||
|
#ifdef CAN_USE_WIDE_TRUFFLE
|
||||||
|
current_match = truffleExecWide(
|
||||||
|
loadu256(database->wide_mask), current_buf, buf_end);
|
||||||
|
#else
|
||||||
|
current_match = truffleExec(load128(database->mask1),
|
||||||
|
load128(database->mask2),
|
||||||
|
current_buf, buf_end);
|
||||||
|
#endif
|
||||||
|
// current_match is the pointer to the matching char, NOT past the
|
||||||
|
// matching char. or buf_end if no match.
|
||||||
|
if(current_match < buf_end) {
|
||||||
|
size_t id = database->char_id_map[*current_match];
|
||||||
|
size_t match_start =
|
||||||
|
current_match - reinterpret_cast<const u8 *>(data);
|
||||||
|
if( ! onEvent(id, match_start, match_start + 1, 0, context)) {
|
||||||
|
// user requested to stop matching
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
current_buf = current_match + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return HS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- single_char_pair (Noodle) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL hs_single_char_pair_search(
|
||||||
|
const hs_single_char_pair_compiled_pattern *database,
|
||||||
|
const char *data, size_t length, match_event_handler onEvent,
|
||||||
|
void *context) {
|
||||||
|
assert(onEvent != nullptr &&
|
||||||
|
"hs_single_char_pair_search called with nullptr callback");
|
||||||
|
assert(data != nullptr &&
|
||||||
|
"hs_single_char_pair_search called with nullptr buffer");
|
||||||
|
assert(database != nullptr &&
|
||||||
|
"hs_single_char_pair_search called with nullptr database");
|
||||||
|
struct noodle_context storage;
|
||||||
|
storage.usr_context = context;
|
||||||
|
storage.pattern_length = 2;
|
||||||
|
struct hs_scratch scratch;
|
||||||
|
scratch.core_info.userContext = &storage;
|
||||||
|
scratch.core_info.userCallback = onEvent;
|
||||||
|
|
||||||
|
hwlm_error_t error = noodExec(&(database->noodle_database),
|
||||||
|
reinterpret_cast<const uint8_t *>(data),
|
||||||
|
length, 0, noodle_to_hs_callback, &scratch);
|
||||||
|
return hwlm_to_hs_error(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- char_pair_set (Double shufti) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL hs_char_pair_set_search(
|
||||||
|
const hs_char_pair_set_compiled_pattern *database, const char *data,
|
||||||
|
size_t length, match_event_handler onEvent, void *context) {
|
||||||
|
assert(onEvent != nullptr &&
|
||||||
|
"hs_char_pair_set_search called with nullptr callback");
|
||||||
|
assert(data != nullptr &&
|
||||||
|
"hs_char_pair_set_search called with nullptr buffer");
|
||||||
|
assert(database != nullptr &&
|
||||||
|
"hs_char_pair_set_search called with nullptr database");
|
||||||
|
|
||||||
|
const u8 *current_buf = reinterpret_cast<const u8*>(data);
|
||||||
|
// buf_end must be the first char past the buffer, so current_buf==buf_end
|
||||||
|
// means current_buf is empty.
|
||||||
|
const u8 *buf_end = reinterpret_cast<const u8*>(data) + length;
|
||||||
|
while(current_buf < buf_end) {
|
||||||
|
const u8 *current_match;
|
||||||
|
current_match = shuftiDoubleExec(
|
||||||
|
load128(database->dshufti_database.mask1),
|
||||||
|
load128(database->dshufti_database.mask2),
|
||||||
|
load128(database->dshufti_database.mask3),
|
||||||
|
load128(database->dshufti_database.mask4), current_buf, buf_end);
|
||||||
|
// current_match is the pointer to the matching char, NOT past the
|
||||||
|
// matching char. or buf_end if no match.
|
||||||
|
if (current_match < buf_end) {
|
||||||
|
// Shufti doesn't return which pair matched so we have to find out.
|
||||||
|
// Use a 16 bits vector search on the original pattern string,
|
||||||
|
// then return the <first match>/2 as ID.
|
||||||
|
SuperVector<VECTORSIZE> found_pair = SuperVector<VECTORSIZE>(
|
||||||
|
*reinterpret_cast<const u16 *>(current_match));
|
||||||
|
size_t width = SuperVector<VECTORSIZE>::mask_width();
|
||||||
|
SuperVector<VECTORSIZE> all_pair;
|
||||||
|
vector_mask_type mask;
|
||||||
|
vector_mask_type merged_mask;
|
||||||
|
size_t loop = 0;
|
||||||
|
size_t vector_match_iterations_needed =
|
||||||
|
((database->dshufti_database.pair_count - 1) /
|
||||||
|
(VECTORSIZE / 2));
|
||||||
|
for (; loop <= vector_match_iterations_needed; loop++) {
|
||||||
|
all_pair = SuperVector<VECTORSIZE>::load(
|
||||||
|
database->dshufti_database.all_pairs + (VECTORSIZE * loop));
|
||||||
|
// It is fine if the vector isn't filled as we are guaranteed to
|
||||||
|
// have a match before reaching the garbage data
|
||||||
|
mask = all_pair.eqmask(found_pair);
|
||||||
|
// now we have <width> bit set to 1 when a char match.
|
||||||
|
// first we merge the lane result to keep only consecutive
|
||||||
|
// matches
|
||||||
|
merged_mask = mask & (mask >> width);
|
||||||
|
// Then we filter to keep only a single bit per lane, and only
|
||||||
|
// every other lane
|
||||||
|
merged_mask =
|
||||||
|
merged_mask & database->dshufti_database.bit_filter_mask;
|
||||||
|
if (merged_mask)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// And finaly we can ctz to get the first pair that match
|
||||||
|
unsigned int id =
|
||||||
|
(ctz64(merged_mask) / width / 2) + (loop * (VECTORSIZE / 2));
|
||||||
|
size_t match_start = current_match - reinterpret_cast<const u8*>(data);
|
||||||
|
if (!onEvent(id, match_start, match_start + 2, 0, context)) {
|
||||||
|
// user requested to stop matching
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
current_buf = current_match + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return HS_SUCCESS;
|
||||||
|
}
|
207
src/hs_direct_search.h
Normal file
207
src/hs_direct_search.h
Normal file
@ -0,0 +1,207 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef DIRECT_SEARCH_H
|
||||||
|
#define DIRECT_SEARCH_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "allocator.h"
|
||||||
|
|
||||||
|
#include "fdr/fdr_internal.h"
|
||||||
|
#include "util/arch.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FDR_pattern_storage memory layout:
|
||||||
|
*
|
||||||
|
* |-------------------------------------------------|
|
||||||
|
* | size_t pattern_count |
|
||||||
|
* |------------------------|------------------------|
|
||||||
|
* | pattern_raw_storage : char* pattern_ptrs[] |
|
||||||
|
* | :------------------------|
|
||||||
|
* | : size_t pattern_sizes[] |
|
||||||
|
* | :------------------------|
|
||||||
|
* | : char actual_storage[] |
|
||||||
|
* |------------------------|------------------------|
|
||||||
|
*
|
||||||
|
* Use size_fdr_pattern() to get the size to allocate.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct FDR_pattern_storage {
|
||||||
|
size_t pattern_count;
|
||||||
|
char pattern_raw_storage[];
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline char **get_pattern_ptrs(struct FDR_pattern_storage *pat) {
|
||||||
|
// cppcheck-suppress cstyleCast
|
||||||
|
return (char **)((char *)pat +
|
||||||
|
offsetof(struct FDR_pattern_storage, pattern_raw_storage));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline char *const *
|
||||||
|
get_const_pattern_ptrs(const struct FDR_pattern_storage *pat) {
|
||||||
|
// cppcheck-suppress cstyleCast
|
||||||
|
return (char *const *)((const char *)pat +
|
||||||
|
offsetof(struct FDR_pattern_storage,
|
||||||
|
pattern_raw_storage));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline size_t *get_pattern_sizes(struct FDR_pattern_storage *pat) {
|
||||||
|
// cppcheck-suppress cstyleCast
|
||||||
|
return (size_t *)((char *)get_pattern_ptrs(pat) +
|
||||||
|
pat->pattern_count * sizeof(char *));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline const size_t *
|
||||||
|
get_const_pattern_sizes(const struct FDR_pattern_storage *pat) {
|
||||||
|
// cppcheck-suppress cstyleCast
|
||||||
|
return (const size_t *)((const char *)get_const_pattern_ptrs(pat) +
|
||||||
|
pat->pattern_count * sizeof(char *));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline char *
|
||||||
|
get_pattern_string_storage(struct FDR_pattern_storage *pat) {
|
||||||
|
return (char *)get_pattern_sizes(pat) + pat->pattern_count * sizeof(size_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline const char *
|
||||||
|
get_const_pattern_string_storage(const struct FDR_pattern_storage *pat) {
|
||||||
|
return (const char *)get_const_pattern_sizes(pat) +
|
||||||
|
pat->pattern_count * sizeof(size_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void init_pattern_store(struct FDR_pattern_storage *storage,
|
||||||
|
const char **in_expression, size_t in_pattern_count,
|
||||||
|
const size_t *in_expression_length) {
|
||||||
|
storage->pattern_count = in_pattern_count;
|
||||||
|
memcpy(get_pattern_sizes(storage), in_expression_length,
|
||||||
|
storage->pattern_count);
|
||||||
|
char *next_string = get_pattern_string_storage(storage);
|
||||||
|
for (size_t i = 0; i < storage->pattern_count; i++) {
|
||||||
|
memcpy(next_string, in_expression[i], in_expression_length[i]);
|
||||||
|
get_pattern_ptrs(storage)[i] = next_string;
|
||||||
|
get_pattern_sizes(storage)[i] = in_expression_length[i];
|
||||||
|
next_string += in_expression_length[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
void init_pattern_store_single(struct FDR_pattern_storage *storage,
|
||||||
|
const char *in_expression,
|
||||||
|
const size_t in_expression_length) {
|
||||||
|
init_pattern_store(storage, &in_expression, 1, &in_expression_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
size_t size_fdr_pattern(size_t in_pattern_count,
|
||||||
|
const size_t *in_expression_length) {
|
||||||
|
size_t total_string_size = 0;
|
||||||
|
for (size_t i = 0; i < in_pattern_count; i++) {
|
||||||
|
total_string_size += in_expression_length[i];
|
||||||
|
}
|
||||||
|
size_t ptr_array_size = in_pattern_count * sizeof(char *);
|
||||||
|
size_t pattern_sizes_array_size = in_pattern_count * sizeof(size_t);
|
||||||
|
size_t required_mem = sizeof(struct FDR_pattern_storage) + ptr_array_size +
|
||||||
|
pattern_sizes_array_size + total_string_size;
|
||||||
|
return required_mem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* combined_fdr_database memory layout:
|
||||||
|
*
|
||||||
|
* |-------------------------------------------------|
|
||||||
|
* | FDR *database |
|
||||||
|
* |-------------------------------------------------|
|
||||||
|
* | FDR_pattern_storage *patterns |
|
||||||
|
* |------------------------|------------------------|
|
||||||
|
* | raw_storage : FDR fdr_storage |
|
||||||
|
* | :------------------------|
|
||||||
|
* | : FDR_pattern_storage |
|
||||||
|
* |------------------------|------------------------|
|
||||||
|
*
|
||||||
|
* Use size_fdr_database() to get the size to allocate.
|
||||||
|
*/
|
||||||
|
struct combined_fdr_database {
|
||||||
|
struct FDR *database;
|
||||||
|
struct FDR_pattern_storage *patterns;
|
||||||
|
unsigned char raw_storage[];
|
||||||
|
};
|
||||||
|
|
||||||
|
void init_combined_fdr_database(struct combined_fdr_database *database,
|
||||||
|
size_t fdr_size, const char **in_expression,
|
||||||
|
size_t in_pattern_count,
|
||||||
|
const size_t *in_expression_length);
|
||||||
|
|
||||||
|
void init_combined_fdr_database_single(struct combined_fdr_database *database,
|
||||||
|
size_t fdr_size,
|
||||||
|
const char *in_expression,
|
||||||
|
const size_t in_expression_length);
|
||||||
|
static inline
|
||||||
|
size_t size_fdr_database(size_t fdr_size, size_t in_pattern_count,
|
||||||
|
const size_t *in_expression_length) {
|
||||||
|
return sizeof(struct combined_fdr_database) +
|
||||||
|
size_fdr_pattern(in_pattern_count, in_expression_length) + fdr_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
size_t size_fdr_database_single(size_t fdr_size,
|
||||||
|
const size_t in_expression_length) {
|
||||||
|
return size_fdr_database(fdr_size, 1, &in_expression_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
hwlmcb_rv_t HS_CDECL noodle_to_hs_callback(size_t end, u32 id,
|
||||||
|
struct hs_scratch *scratch);
|
||||||
|
|
||||||
|
// Receive the FDR callback and perform the check for longer patterns (>8 char)
|
||||||
|
hwlmcb_rv_t HS_CDECL FDR_to_hs_callback(size_t end, u32 id,
|
||||||
|
struct hs_scratch *scratch);
|
||||||
|
|
||||||
|
struct FDR_cb_context {
|
||||||
|
void *usr_context;
|
||||||
|
const struct FDR_pattern_storage *patterns;
|
||||||
|
const char *buffer;
|
||||||
|
size_t buffer_length;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct noodle_context {
|
||||||
|
void *usr_context;
|
||||||
|
u8 pattern_length;
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern "C"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // DIRECT_SEARCH_H
|
495
src/hs_direct_search_compile.cpp
Normal file
495
src/hs_direct_search_compile.cpp
Normal file
@ -0,0 +1,495 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#include "hs_common.h"
|
||||||
|
#include "hs_compile.h"
|
||||||
|
#include "hs_direct_search.h"
|
||||||
|
#include "hs_direct_search_types.h"
|
||||||
|
|
||||||
|
#include "allocator.h" // hs_database_alloc()
|
||||||
|
#include "grey.h"
|
||||||
|
#include "hwlm/hwlm.h" // HWLM_LITERAL_MAX_LEN
|
||||||
|
#include "hwlm/hwlm_internal.h" // HWLM_ENGINE_FDR
|
||||||
|
#include "hwlm/hwlm_literal.h" // ue2::hwlmLiteral
|
||||||
|
#include "hwlm/noodle_internal.h" // noodTable
|
||||||
|
#include "ue2common.h" // likely() - unlikely()
|
||||||
|
#include "util/arch.h" // CAN_USE_WIDE_TRUFFLE
|
||||||
|
#include "util/bytecode_ptr.h"
|
||||||
|
#include "util/charreach.h"
|
||||||
|
#include "util/flat_containers.h" // flat_set
|
||||||
|
#include "util/supervector/supervector.hpp"
|
||||||
|
#include "util/target_info.h" // target_t
|
||||||
|
|
||||||
|
#include "fdr/fdr_compile.h"
|
||||||
|
#include "hwlm/noodle_build.h"
|
||||||
|
#include "nfa/shufticompile.h"
|
||||||
|
#include "nfa/trufflecompile.h"
|
||||||
|
|
||||||
|
typedef typename SuperVector<VECTORSIZE>::comparemask_type vector_mask_type;
|
||||||
|
|
||||||
|
void init_combined_fdr_database(struct combined_fdr_database *database,
|
||||||
|
size_t fdr_size, const char **in_expression,
|
||||||
|
size_t in_pattern_count,
|
||||||
|
const size_t *in_expression_length) {
|
||||||
|
database->database = reinterpret_cast<FDR *>(database->raw_storage);
|
||||||
|
database->patterns = reinterpret_cast<FDR_pattern_storage *>(
|
||||||
|
database->raw_storage + fdr_size);
|
||||||
|
init_pattern_store(database->patterns, in_expression, in_pattern_count,
|
||||||
|
in_expression_length);
|
||||||
|
};
|
||||||
|
|
||||||
|
void init_combined_fdr_database_single(struct combined_fdr_database *database,
|
||||||
|
size_t fdr_size,
|
||||||
|
const char *in_expression,
|
||||||
|
const size_t in_expression_length) {
|
||||||
|
database->database = reinterpret_cast<FDR *>(database->raw_storage);
|
||||||
|
database->patterns = reinterpret_cast<FDR_pattern_storage *>(
|
||||||
|
database->raw_storage + fdr_size);
|
||||||
|
init_pattern_store_single(database->patterns, in_expression,
|
||||||
|
in_expression_length);
|
||||||
|
};
|
||||||
|
|
||||||
|
inline void generic_free(void *database) {
|
||||||
|
if (likely(database)) {
|
||||||
|
hs_database_free(database);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- short_literal (Noodle) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL hs_compile_short_literal_search(
|
||||||
|
const char *expression, size_t expression_length,
|
||||||
|
hs_short_literal_compiled_pattern **output_database) {
|
||||||
|
assert(expression_length > 0 &&
|
||||||
|
"hs_compile_short_literal_search called with an empty pattern");
|
||||||
|
assert(expression != nullptr &&
|
||||||
|
"hs_compile_short_literal_search called with nullptr");
|
||||||
|
assert(output_database != nullptr &&
|
||||||
|
"hs_compile_short_literal_search called with nullptr");
|
||||||
|
if (unlikely(expression_length > HS_SHORT_PATTERN_THRESHOLD)) {
|
||||||
|
return HS_INVALID;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Exposing caseness at the api level may restrict our ability to change
|
||||||
|
* the backing algorithm, so we decided to make all algo case sensitive
|
||||||
|
*/
|
||||||
|
bool is_case_insensitive = false;
|
||||||
|
bool only_need_first_match = false;
|
||||||
|
ue2::hwlmLiteral lit(std::string(expression, expression_length),
|
||||||
|
is_case_insensitive, only_need_first_match, 0,
|
||||||
|
HWLM_ALL_GROUPS, {}, {});
|
||||||
|
|
||||||
|
hs_short_literal_compiled_pattern *database =
|
||||||
|
reinterpret_cast<hs_short_literal_compiled_pattern *>(hs_database_alloc(
|
||||||
|
sizeof(hs_short_literal_compiled_pattern)));
|
||||||
|
if (unlikely(database == nullptr)) {
|
||||||
|
return HS_NOMEM;
|
||||||
|
}
|
||||||
|
ue2::bytecode_ptr<noodTable> bytecode_database = ue2::noodBuildTable(lit);
|
||||||
|
if (unlikely(bytecode_database.get() == nullptr)) {
|
||||||
|
return HS_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
database->pattern_length = expression_length;
|
||||||
|
memcpy(&(database->noodle_database), bytecode_database.get(),
|
||||||
|
sizeof(noodTable));
|
||||||
|
*output_database = database;
|
||||||
|
|
||||||
|
return HS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
void hs_free_short_literal_pattern(
|
||||||
|
hs_short_literal_compiled_pattern *database) {
|
||||||
|
generic_free(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- long_literal (FDR) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL hs_compile_long_literal_search(
|
||||||
|
const char *expression, size_t expression_length,
|
||||||
|
hs_long_literal_compiled_pattern_t **output_database) {
|
||||||
|
assert(expression_length > 0 &&
|
||||||
|
"hs_compile_long_literal_search called with an empty pattern");
|
||||||
|
assert(expression != nullptr &&
|
||||||
|
"hs_compile_long_literal_search called with nullptr");
|
||||||
|
assert(output_database != nullptr &&
|
||||||
|
"hs_compile_long_literal_search called with nullptr");
|
||||||
|
/*
|
||||||
|
* Exposing caseness at the api level may restrict our ability to change
|
||||||
|
* the backing algorithm, so we decided to make all algo case sensitive
|
||||||
|
*/
|
||||||
|
bool is_case_insensitive = false;
|
||||||
|
bool only_need_first_match = false;
|
||||||
|
std::vector<ue2::hwlmLiteral> lits;
|
||||||
|
// longer strings are checked in the callback
|
||||||
|
ue2::hwlmLiteral lit(
|
||||||
|
std::string(expression,
|
||||||
|
std::min(expression_length, (size_t)HWLM_LITERAL_MAX_LEN)),
|
||||||
|
is_case_insensitive, only_need_first_match, 0, HWLM_ALL_GROUPS, {}, {});
|
||||||
|
lits.push_back(lit);
|
||||||
|
|
||||||
|
ue2::Grey g = ue2::Grey();
|
||||||
|
u8 engType = HWLM_ENGINE_FDR;
|
||||||
|
bool make_small = false;
|
||||||
|
|
||||||
|
hs_platform_info platform_info;
|
||||||
|
hs_populate_platform(&platform_info);
|
||||||
|
|
||||||
|
ue2::target_t target = ue2::target_t(platform_info);
|
||||||
|
|
||||||
|
std::unique_ptr<ue2::HWLMProto> proto =
|
||||||
|
ue2::fdrBuildProto(engType, lits, make_small, target, g);
|
||||||
|
|
||||||
|
ue2::bytecode_ptr<FDR> bytecode_database = ue2::fdrBuildTable(*proto, g);
|
||||||
|
if (unlikely(bytecode_database.get() == nullptr)) {
|
||||||
|
return HS_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
size_t fdr_size = bytecode_database.get()->size;
|
||||||
|
|
||||||
|
size_t mem_required = size_fdr_database_single(fdr_size, expression_length);
|
||||||
|
struct combined_fdr_database *combined_database =
|
||||||
|
reinterpret_cast<struct combined_fdr_database *>(
|
||||||
|
hs_database_alloc(mem_required));
|
||||||
|
if (unlikely(combined_database == nullptr)) {
|
||||||
|
return HS_NOMEM;
|
||||||
|
}
|
||||||
|
init_combined_fdr_database_single(combined_database, fdr_size, expression,
|
||||||
|
expression_length);
|
||||||
|
memcpy(combined_database->database, bytecode_database.get(), fdr_size);
|
||||||
|
*output_database = reinterpret_cast<hs_long_literal_compiled_pattern_t *>(
|
||||||
|
combined_database);
|
||||||
|
|
||||||
|
return HS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
void hs_free_long_literal_pattern(
|
||||||
|
hs_long_literal_compiled_pattern_t *database) {
|
||||||
|
generic_free(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- multi_literal (FDR) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL hs_compile_multi_literal_search(
|
||||||
|
const char **expression, size_t pattern_count,
|
||||||
|
const size_t *expression_length,
|
||||||
|
hs_multi_literal_compiled_pattern_t **output_database) {
|
||||||
|
assert(pattern_count > 0 &&
|
||||||
|
"hs_compile_multi_literal_search called with no pattern");
|
||||||
|
assert(expression != nullptr &&
|
||||||
|
"hs_compile_multi_literal_search called with nullptr");
|
||||||
|
assert(expression_length != nullptr &&
|
||||||
|
"hs_compile_multi_literal_search called with nullptr");
|
||||||
|
assert(output_database != nullptr &&
|
||||||
|
"hs_compile_multi_literal_search called with nullptr");
|
||||||
|
/*
|
||||||
|
* Exposing caseness at the api level may restrict our ability to change
|
||||||
|
* the backing algorithm, so we decided to make all algo case sensitive
|
||||||
|
*/
|
||||||
|
bool is_case_insensitive = false;
|
||||||
|
bool only_need_first_match = false;
|
||||||
|
std::vector<ue2::hwlmLiteral> lits;
|
||||||
|
for (size_t i = 0; i < pattern_count; i++) {
|
||||||
|
assert(expression_length[i] > 0 && expression[i] &&
|
||||||
|
"hs_compile_multi_literal_search called with an empty pattern");
|
||||||
|
// longer strings are checked in the callback
|
||||||
|
ue2::hwlmLiteral lit(
|
||||||
|
std::string(expression[i], std::min(expression_length[i],
|
||||||
|
(size_t)HWLM_LITERAL_MAX_LEN)),
|
||||||
|
is_case_insensitive, only_need_first_match, i, HWLM_ALL_GROUPS, {},
|
||||||
|
{});
|
||||||
|
lits.push_back(lit);
|
||||||
|
}
|
||||||
|
|
||||||
|
ue2::Grey g = ue2::Grey();
|
||||||
|
u8 engType = HWLM_ENGINE_FDR;
|
||||||
|
bool make_small = false;
|
||||||
|
|
||||||
|
hs_platform_info platform_info;
|
||||||
|
hs_populate_platform(&platform_info);
|
||||||
|
|
||||||
|
ue2::target_t target = ue2::target_t(platform_info);
|
||||||
|
|
||||||
|
std::unique_ptr<ue2::HWLMProto> proto =
|
||||||
|
ue2::fdrBuildProto(engType, lits, make_small, target, g);
|
||||||
|
|
||||||
|
ue2::bytecode_ptr<FDR> bytecode_database = ue2::fdrBuildTable(*proto, g);
|
||||||
|
if (unlikely(bytecode_database.get() == nullptr)) {
|
||||||
|
return HS_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
size_t fdr_size = bytecode_database.get()->size;
|
||||||
|
|
||||||
|
size_t mem_required =
|
||||||
|
size_fdr_database(fdr_size, pattern_count, expression_length);
|
||||||
|
struct combined_fdr_database *combined_database =
|
||||||
|
reinterpret_cast<struct combined_fdr_database *>(
|
||||||
|
hs_database_alloc(mem_required));
|
||||||
|
if (unlikely(combined_database == nullptr)) {
|
||||||
|
return HS_NOMEM;
|
||||||
|
}
|
||||||
|
init_combined_fdr_database(combined_database, fdr_size, expression,
|
||||||
|
pattern_count, expression_length);
|
||||||
|
memcpy(combined_database->database, bytecode_database.get(), fdr_size);
|
||||||
|
*output_database = reinterpret_cast<hs_multi_literal_compiled_pattern_t *>(
|
||||||
|
combined_database);
|
||||||
|
|
||||||
|
return HS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
void hs_free_multi_literal_pattern(
|
||||||
|
hs_multi_literal_compiled_pattern_t *database) {
|
||||||
|
generic_free(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- single_char (Noodle) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL hs_compile_single_char_search(
|
||||||
|
const char character, hs_single_char_compiled_pattern **output_database) {
|
||||||
|
assert(output_database != nullptr &&
|
||||||
|
"hs_compile_single_char_search called with nullptr");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Exposing caseness at the api level may restrict our ability to change
|
||||||
|
* the backing algorithm, so we decided to make all algo case sensitive
|
||||||
|
*/
|
||||||
|
bool is_case_insensitive = false;
|
||||||
|
bool only_need_first_match = false;
|
||||||
|
ue2::hwlmLiteral lit(std::string(&character, 1), is_case_insensitive,
|
||||||
|
only_need_first_match, 0, HWLM_ALL_GROUPS, {}, {});
|
||||||
|
|
||||||
|
hs_single_char_compiled_pattern *database =
|
||||||
|
reinterpret_cast<hs_single_char_compiled_pattern *>(hs_database_alloc(
|
||||||
|
sizeof(hs_single_char_compiled_pattern)));
|
||||||
|
if (unlikely(database == nullptr)) {
|
||||||
|
return HS_NOMEM;
|
||||||
|
}
|
||||||
|
ue2::bytecode_ptr<noodTable> bytecode_database = ue2::noodBuildTable(lit);
|
||||||
|
if (unlikely(bytecode_database.get() == nullptr)) {
|
||||||
|
return HS_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
memcpy(&(database->noodle_database), bytecode_database.get(),
|
||||||
|
sizeof(noodTable));
|
||||||
|
*output_database = database;
|
||||||
|
|
||||||
|
return HS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
void hs_free_single_char_pattern(
|
||||||
|
hs_single_char_compiled_pattern *database) {
|
||||||
|
generic_free(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- char_set (Truffle) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL
|
||||||
|
hs_compile_char_set_search(const char *character_array, size_t character_count,
|
||||||
|
hs_char_set_compiled_pattern **output_database) {
|
||||||
|
assert(character_count > 0 &&
|
||||||
|
"hs_compile_char_set_search called with an empty set");
|
||||||
|
assert(character_array != nullptr &&
|
||||||
|
"hs_compile_char_set_search called with nullptr");
|
||||||
|
assert(output_database != nullptr &&
|
||||||
|
"hs_compile_char_set_search called with nullptr");
|
||||||
|
|
||||||
|
const ue2::CharReach cr =
|
||||||
|
ue2::CharReach(std::string(character_array, character_count));
|
||||||
|
truffle_storage *database = reinterpret_cast<truffle_storage *>(
|
||||||
|
hs_database_alloc(sizeof(truffle_storage)));
|
||||||
|
// hs_database_alloc is meant to align to a machine word (likely 64b), which
|
||||||
|
// is actually required here
|
||||||
|
assert((((intptr_t)(database) & 3) == 0) &&
|
||||||
|
"user-provided alloc didn't meet alignment requirement in "
|
||||||
|
"hs_compile_char_set_search");
|
||||||
|
for (u8 i = 0; i < character_count; i++) {
|
||||||
|
database->char_id_map[(u8)character_array[i]] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CAN_USE_WIDE_TRUFFLE
|
||||||
|
ue2::truffleBuildMasksWide(cr, database->wide_mask);
|
||||||
|
#else
|
||||||
|
ue2::truffleBuildMasks(cr, database->mask1,
|
||||||
|
database->mask2);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
*output_database = database;
|
||||||
|
|
||||||
|
return HS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
void hs_free_char_set_pattern(hs_char_set_compiled_pattern *database) {
|
||||||
|
generic_free(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- single_char_pair (Noodle) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL hs_compile_single_char_pair_search(
|
||||||
|
const char *pair, hs_single_char_pair_compiled_pattern **output_database) {
|
||||||
|
assert(pair != nullptr &&
|
||||||
|
"hs_compile_single_char_pair_search called with nullptr");
|
||||||
|
assert(output_database != nullptr &&
|
||||||
|
"hs_compile_single_char_pair_search called with nullptr");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Exposing caseness at the api level may restrict our ability to change
|
||||||
|
* the backing algorithm, so we decided to make all algo case sensitive
|
||||||
|
*/
|
||||||
|
bool is_case_insensitive = false;
|
||||||
|
bool only_need_first_match = false;
|
||||||
|
ue2::hwlmLiteral lit(std::string(pair, 2), is_case_insensitive,
|
||||||
|
only_need_first_match, 0, HWLM_ALL_GROUPS, {}, {});
|
||||||
|
|
||||||
|
hs_single_char_pair_compiled_pattern *database =
|
||||||
|
reinterpret_cast<hs_single_char_pair_compiled_pattern *>(
|
||||||
|
hs_database_alloc(sizeof(hs_single_char_pair_compiled_pattern)));
|
||||||
|
if (unlikely(database == nullptr)) {
|
||||||
|
return HS_NOMEM;
|
||||||
|
}
|
||||||
|
ue2::bytecode_ptr<noodTable> bytecode_database = ue2::noodBuildTable(lit);
|
||||||
|
if (unlikely(bytecode_database.get() == nullptr)) {
|
||||||
|
return HS_UNKNOWN_ERROR;
|
||||||
|
}
|
||||||
|
memcpy(&(database->noodle_database), bytecode_database.get(),
|
||||||
|
sizeof(noodTable));
|
||||||
|
*output_database = database;
|
||||||
|
|
||||||
|
return HS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
void hs_free_single_char_pair_pattern(
|
||||||
|
hs_single_char_pair_compiled_pattern *database) {
|
||||||
|
generic_free(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// --- char_pair_set (Double shufti) ---
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
hs_error_t HS_CDECL hs_compile_char_pair_set_search(
|
||||||
|
const char *expression, size_t pair_count,
|
||||||
|
hs_char_pair_set_compiled_pattern **output_database) {
|
||||||
|
assert(pair_count > 0 &&
|
||||||
|
"hs_compile_char_pair_set_search called with an empty set");
|
||||||
|
assert(expression != nullptr &&
|
||||||
|
"hs_compile_char_pair_set_search called with nullptr");
|
||||||
|
assert(output_database != nullptr &&
|
||||||
|
"hs_compile_char_pair_set_search called with nullptr");
|
||||||
|
|
||||||
|
ue2::flat_set<std::pair<u8, u8>> pairs;
|
||||||
|
for (u8 i = 0; i < pair_count; i++) {
|
||||||
|
pairs.insert(
|
||||||
|
std::make_pair((u8)expression[2 * i], (u8)expression[2 * i + 1]));
|
||||||
|
}
|
||||||
|
|
||||||
|
hs_char_pair_set_compiled_pattern *database =
|
||||||
|
reinterpret_cast<hs_char_pair_set_compiled_pattern *>(hs_database_alloc(
|
||||||
|
sizeof(hs_char_pair_set_compiled_pattern) +
|
||||||
|
sizeof(char) * 2 * pair_count));
|
||||||
|
// hs_database_alloc is meant to align to a machine word (likely 64b), which
|
||||||
|
// is actually required here
|
||||||
|
assert((((intptr_t)(database) & 3) == 0) &&
|
||||||
|
"user-provided alloc didn't meet alignment requirement in "
|
||||||
|
"hs_compile_char_pair_set_search");
|
||||||
|
|
||||||
|
bool success = ue2::shuftiBuildDoubleMasks(
|
||||||
|
ue2::CharReach(), pairs, database->dshufti_database.mask1,
|
||||||
|
database->dshufti_database.mask2, database->dshufti_database.mask3,
|
||||||
|
database->dshufti_database.mask4);
|
||||||
|
|
||||||
|
if (!success) {
|
||||||
|
return HS_COMPILER_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
database->dshufti_database.pair_count = pair_count;
|
||||||
|
|
||||||
|
size_t width = SuperVector<VECTORSIZE>::mask_width();
|
||||||
|
assert(width <= 4 &&
|
||||||
|
"Code needs rework if supervector's mask are bigger than 4");
|
||||||
|
assert(width != 3 &&
|
||||||
|
"Code needs rework if supervector's mask aren't a power of 2");
|
||||||
|
// we need a mask such that every 2*width bits, only the lsb is set to 1
|
||||||
|
// so for a width of 4, we repeat 0X01
|
||||||
|
unsigned char bit_filter_mask = 0;
|
||||||
|
for (size_t i = 8; i > 0; i -= 2 * width) {
|
||||||
|
bit_filter_mask = bit_filter_mask << (2 * width) | 0x1;
|
||||||
|
}
|
||||||
|
memset(&(database->dshufti_database.bit_filter_mask), bit_filter_mask,
|
||||||
|
sizeof(vector_mask_type));
|
||||||
|
memcpy(database->dshufti_database.all_pairs, expression, 2 * pair_count);
|
||||||
|
|
||||||
|
*output_database = database;
|
||||||
|
|
||||||
|
return HS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
HS_PUBLIC_API
|
||||||
|
void hs_free_char_pair_set_pattern(
|
||||||
|
hs_char_pair_set_compiled_pattern *database) {
|
||||||
|
generic_free(database);
|
||||||
|
}
|
||||||
|
|
87
src/hs_direct_search_types.h
Normal file
87
src/hs_direct_search_types.h
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef DIRECT_SEARCH_TYPES_H
|
||||||
|
#define DIRECT_SEARCH_TYPES_H
|
||||||
|
|
||||||
|
#include <stdalign.h>
|
||||||
|
|
||||||
|
#include "util/supervector/supervector.hpp"
|
||||||
|
|
||||||
|
#include "fdr/fdr_internal.h"
|
||||||
|
#include "hwlm/noodle_internal.h"
|
||||||
|
|
||||||
|
|
||||||
|
struct hs_short_literal_compiled_pattern {
|
||||||
|
noodTable noodle_database;
|
||||||
|
u8 pattern_length;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hs_long_literal_compiled_pattern {
|
||||||
|
struct combined_fdr_database fdr_database;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hs_multi_literal_compiled_pattern {
|
||||||
|
struct combined_fdr_database fdr_database;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hs_single_char_compiled_pattern {
|
||||||
|
struct noodTable noodle_database;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hs_single_char_pair_compiled_pattern {
|
||||||
|
struct noodTable noodle_database;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct hs_char_set_compiled_pattern {
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct {
|
||||||
|
uint8_t mask1[16] __attribute__((aligned));
|
||||||
|
uint8_t mask2[16] __attribute__((aligned));
|
||||||
|
};
|
||||||
|
uint8_t wide_mask[32] __attribute__((aligned));
|
||||||
|
};
|
||||||
|
// allows us to get the id from the character
|
||||||
|
u8 char_id_map[256];
|
||||||
|
} truffle_storage;
|
||||||
|
|
||||||
|
struct dshufti_storage {
|
||||||
|
alignas(16) uint8_t mask1[16];
|
||||||
|
alignas(16) uint8_t mask2[16];
|
||||||
|
alignas(16) uint8_t mask3[16];
|
||||||
|
alignas(16) uint8_t mask4[16];
|
||||||
|
size_t pair_count;
|
||||||
|
typename SuperVector<VECTORSIZE>::comparemask_type bit_filter_mask;
|
||||||
|
alignas(VECTORSIZE) uint8_t all_pairs[];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hs_char_pair_set_compiled_pattern {
|
||||||
|
struct dshufti_storage dshufti_database;
|
||||||
|
};
|
||||||
|
#endif // DIRECT_SEARCH_TYPES_H
|
221
src/hs_runtime.h
221
src/hs_runtime.h
@ -1,5 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2018, Intel Corporation
|
* Copyright (c) 2015-2018, Intel Corporation
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -614,6 +615,226 @@ hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch);
|
|||||||
*/
|
*/
|
||||||
#define HS_OFFSET_PAST_HORIZON (~0ULL)
|
#define HS_OFFSET_PAST_HORIZON (~0ULL)
|
||||||
|
|
||||||
|
/** @} */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The following functions are part of the extended API.
|
||||||
|
* This extension offers direct access to search algorithms
|
||||||
|
* allowing the user to minimise calling overhead for simple
|
||||||
|
* search use cases where type of the search is known.
|
||||||
|
*
|
||||||
|
* All search functions handle a limited kind of patterns. For more generic
|
||||||
|
* patterns, use @ref hs_scan()
|
||||||
|
*
|
||||||
|
* NOTE: All search functions are considered case-sensitive.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @defgroup DIRECT_API_RUNTIME
|
||||||
|
*
|
||||||
|
* @{
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Callback return value indicating that we should continue matching. */
|
||||||
|
#define CB_CONTINUE_MATCHING (int)(~0U)
|
||||||
|
|
||||||
|
/** Callback return value indicating that we should halt matching. */
|
||||||
|
#define CB_TERMINATE_MATCHING (int)0
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search the given data for the short literal pattern up to
|
||||||
|
* @ref HS_SHORT_PATTERN_THRESHOLD chars long. For longer patterns, use @ref
|
||||||
|
* hs_long_literal_search(). Other options exists for character pairs or set.
|
||||||
|
*
|
||||||
|
* @param database
|
||||||
|
* The compiled pattern returned by @ref hs_compile_short_literal_search()
|
||||||
|
* @param data
|
||||||
|
* Pointer to the data to be scanned.
|
||||||
|
* @param length
|
||||||
|
* The number of bytes to scan.
|
||||||
|
* @param onEvent
|
||||||
|
* Pointer to a @ref match_event_handler callback function. If a NULL
|
||||||
|
* pointer is given, no matches will be returned.
|
||||||
|
* The "flag" argument is unused.
|
||||||
|
* @param context
|
||||||
|
* The user defined pointer which will be passed to the callback function.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
|
||||||
|
* match callback indicated that scanning should stop; other values on
|
||||||
|
* error.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_short_literal_search(
|
||||||
|
const hs_short_literal_compiled_pattern_t *database, const char *data,
|
||||||
|
size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search the given data for the long literal pattern.
|
||||||
|
*
|
||||||
|
* If the pattern length is less or equal to @ref HS_SHORT_PATTERN_THRESHOLD,
|
||||||
|
* @ref hs_short_literal_search() may be faster.
|
||||||
|
*
|
||||||
|
* @param database
|
||||||
|
* The compiled pattern returned by @ref hs_compile_long_literal_search()
|
||||||
|
* @param data
|
||||||
|
* Pointer to the data to be scanned.
|
||||||
|
* @param length
|
||||||
|
* The number of bytes to scan.
|
||||||
|
* @param onEvent
|
||||||
|
* Pointer to a @ref match_event_handler callback function. If a NULL
|
||||||
|
* pointer is given, no matches will be returned.
|
||||||
|
* The "flag" argument is unused.
|
||||||
|
* @param context
|
||||||
|
* The user defined pointer which will be passed to the callback function.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
|
||||||
|
* match callback indicated that scanning should stop; other values on
|
||||||
|
* error.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_long_literal_search(
|
||||||
|
const hs_long_literal_compiled_pattern_t *database, const char *data,
|
||||||
|
size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search the given data for several long literal patterns at once.
|
||||||
|
*
|
||||||
|
* @param database
|
||||||
|
* The compiled pattern returned by @ref hs_compile_multi_literal_search()
|
||||||
|
* @param data
|
||||||
|
* Pointer to the data to be scanned.
|
||||||
|
* @param length
|
||||||
|
* The number of bytes to scan.
|
||||||
|
* @param onEvent
|
||||||
|
* Pointer to a @ref match_event_handler callback function. If a NULL
|
||||||
|
* pointer is given, no matches will be returned.
|
||||||
|
* The "flag" argument is unused.
|
||||||
|
* The reported ID is the index of the matching literal.
|
||||||
|
* @param context
|
||||||
|
* The user defined pointer which will be passed to the callback function.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
|
||||||
|
* match callback indicated that scanning should stop; other values on
|
||||||
|
* error.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_multi_literal_search(
|
||||||
|
const hs_multi_literal_compiled_pattern_t *database, const char *data,
|
||||||
|
size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search the given data for any occurrence of the given character.
|
||||||
|
*
|
||||||
|
* @param database
|
||||||
|
* The compiled pattern returned by @ref hs_compile_single_char_search()
|
||||||
|
* @param data
|
||||||
|
* Pointer to the data to be scanned.
|
||||||
|
* @param length
|
||||||
|
* The number of bytes to scan.
|
||||||
|
* @param onEvent
|
||||||
|
* Pointer to a @ref match_event_handler callback function. If a NULL
|
||||||
|
* pointer is given, no matches will be returned.
|
||||||
|
* The "flag" argument is unused.
|
||||||
|
* @param context
|
||||||
|
* The user defined pointer which will be passed to the callback function.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
|
||||||
|
* match callback indicated that scanning should stop; other values on
|
||||||
|
* error.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_single_char_search(
|
||||||
|
const hs_single_char_compiled_pattern_t *database, const char *data,
|
||||||
|
size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search the given data for occurrences of any character from the given
|
||||||
|
* character set.
|
||||||
|
*
|
||||||
|
* @param database
|
||||||
|
* The compiled pattern returned by @ref hs_compile_char_set_search()
|
||||||
|
* @param data
|
||||||
|
* Pointer to the data to be scanned.
|
||||||
|
* @param length
|
||||||
|
* The number of bytes to scan.
|
||||||
|
* @param onEvent
|
||||||
|
* Pointer to a @ref match_event_handler callback function. If a NULL
|
||||||
|
* pointer is given, no matches will be returned.
|
||||||
|
* The "flag" argument is unused.
|
||||||
|
* The reported ID is the index of the matching char.
|
||||||
|
* @param context
|
||||||
|
* The user defined pointer which will be passed to the callback function.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
|
||||||
|
* match callback indicated that scanning should stop; other values on
|
||||||
|
* error.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_char_set_search(
|
||||||
|
const hs_char_set_compiled_pattern_t *database, const char *data,
|
||||||
|
size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search the given data for occurrences of the given ordered character pair
|
||||||
|
* ("Aj" won't match "jA").
|
||||||
|
*
|
||||||
|
* @param database
|
||||||
|
* The compiled pattern returned by @ref hs_compile_char_pair_search()
|
||||||
|
* @param data
|
||||||
|
* Pointer to the data to be scanned.
|
||||||
|
* @param length
|
||||||
|
* The number of bytes to scan.
|
||||||
|
* @param onEvent
|
||||||
|
* Pointer to a @ref match_event_handler callback function. If a NULL
|
||||||
|
* pointer is given, no matches will be returned.
|
||||||
|
* The "flag" argument is unused.
|
||||||
|
* @param context
|
||||||
|
* The user defined pointer which will be passed to the callback function.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
|
||||||
|
* match callback indicated that scanning should stop; other values on
|
||||||
|
* error.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_single_char_pair_search(
|
||||||
|
const hs_single_char_pair_compiled_pattern_t *database, const char *data,
|
||||||
|
size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search the given data for occurrences of any of the ordered character pair
|
||||||
|
* from the given set ("Aj" won't match "jA")
|
||||||
|
*
|
||||||
|
* @param database
|
||||||
|
* The compiled pattern returned by @ref
|
||||||
|
* hs_compile_char_pair_set_search()
|
||||||
|
* @param data
|
||||||
|
* Pointer to the data to be scanned.
|
||||||
|
* @param length
|
||||||
|
* The number of bytes to scan.
|
||||||
|
* @param onEvent
|
||||||
|
* Pointer to a @ref match_event_handler callback function. If a NULL
|
||||||
|
* pointer is given, no matches will be returned.
|
||||||
|
* The "flag" argument is unused.
|
||||||
|
* The reported ID is the index of the matching pair.
|
||||||
|
* @param context
|
||||||
|
* The user defined pointer which will be passed to the callback function.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
|
||||||
|
* match callback indicated that scanning should stop; other values on
|
||||||
|
* error.
|
||||||
|
*/
|
||||||
|
hs_error_t HS_CDECL hs_char_pair_set_search(
|
||||||
|
const hs_char_pair_set_compiled_pattern_t *database, const char *data,
|
||||||
|
size_t length, match_event_handler onEvent,
|
||||||
|
void *context);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
#endif
|
#endif
|
||||||
|
@ -63,6 +63,21 @@ set(unit_hyperscan_SOURCES
|
|||||||
add_executable(unit-hyperscan ${unit_hyperscan_SOURCES})
|
add_executable(unit-hyperscan ${unit_hyperscan_SOURCES})
|
||||||
target_link_libraries(unit-hyperscan hs expressionutil)
|
target_link_libraries(unit-hyperscan hs expressionutil)
|
||||||
|
|
||||||
|
set(unit_direct_api_SOURCES
|
||||||
|
${gtest_SOURCES}
|
||||||
|
direct_API/char_pair_set.cpp
|
||||||
|
direct_API/char_set.cpp
|
||||||
|
direct_API/common.h
|
||||||
|
direct_API/long_literal.cpp
|
||||||
|
direct_API/main.cpp
|
||||||
|
direct_API/multi_literal.cpp
|
||||||
|
direct_API/short_literal.cpp
|
||||||
|
direct_API/single_char_pair.cpp
|
||||||
|
direct_API/single_char.cpp
|
||||||
|
)
|
||||||
|
add_executable(unit-direct-API ${unit_direct_api_SOURCES})
|
||||||
|
target_link_libraries(unit-direct-API hs)
|
||||||
|
|
||||||
if (NOT FAT_RUNTIME AND BUILD_STATIC_LIBS)
|
if (NOT FAT_RUNTIME AND BUILD_STATIC_LIBS)
|
||||||
set(BUILD_UNIT_INTERNAL TRUE)
|
set(BUILD_UNIT_INTERNAL TRUE)
|
||||||
set(unit_internal_SOURCES
|
set(unit_internal_SOURCES
|
||||||
|
378
unit/direct_API/char_pair_set.cpp
Normal file
378
unit/direct_API/char_pair_set.cpp
Normal file
@ -0,0 +1,378 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#include "hwlm/noodle_internal.h"
|
||||||
|
|
||||||
|
#define COMPILE_CHAR_PAIR_SET(in_character_array, in_pair_count) \
|
||||||
|
const size_t pair_count = (in_pair_count); \
|
||||||
|
const char *character_array = (in_character_array); \
|
||||||
|
hs_char_pair_set_compiled_pattern_t *database = nullptr; \
|
||||||
|
hs_error_t compile_ret = hs_compile_char_pair_set_search( \
|
||||||
|
character_array, pair_count, &database); \
|
||||||
|
hs_error_t ret = 0; \
|
||||||
|
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
|
||||||
|
const char *buffer = nullptr; \
|
||||||
|
(void)buffer; \
|
||||||
|
context_t context = {}; \
|
||||||
|
(void) context;
|
||||||
|
|
||||||
|
#define SEARCH_CHAR_PAIR_SET(in_buffer, in_buffer_len, in_expected_match, \
|
||||||
|
in_expected_start_array, in_expected_id_array) \
|
||||||
|
{ \
|
||||||
|
buffer = (in_buffer); \
|
||||||
|
const size_t buffer_len = (in_buffer_len); \
|
||||||
|
const size_t expected_match = (in_expected_match); \
|
||||||
|
size_t expected_start_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_end_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_id_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_id_array; \
|
||||||
|
for (size_t i = 0; i < expected_match; i++) { \
|
||||||
|
expected_end_array[i] += 2; \
|
||||||
|
} \
|
||||||
|
context.expected_start_array = expected_start_array; \
|
||||||
|
context.expected_end_array = expected_end_array; \
|
||||||
|
context.expected_id_array = expected_id_array; \
|
||||||
|
context.array_size = expected_match; \
|
||||||
|
context.number_matched = 0; \
|
||||||
|
context.number_wrong = 0; \
|
||||||
|
\
|
||||||
|
ret = hs_char_pair_set_search(database, buffer, buffer_len, callback, \
|
||||||
|
&context); \
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------free tests-------------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_free_char_pair_set_pattern
|
||||||
|
nullptr
|
||||||
|
general
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(char_pair_set_free, nullptr) {
|
||||||
|
hs_char_pair_set_compiled_pattern_t *database = nullptr;
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_free, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
noodTable *clear_database =
|
||||||
|
reinterpret_cast<noodTable *>(test_malloc(sizeof(noodTable)));
|
||||||
|
|
||||||
|
hs_char_pair_set_compiled_pattern_t *database =
|
||||||
|
reinterpret_cast<hs_char_pair_set_compiled_pattern_t*>(clear_database);
|
||||||
|
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------compile tests----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_compile_char_pair_set_search
|
||||||
|
single pair
|
||||||
|
multiple pair
|
||||||
|
pair duplicate
|
||||||
|
valid pair including null char
|
||||||
|
|
||||||
|
empty char array
|
||||||
|
nullptr char array
|
||||||
|
nullptr output
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(char_pair_set_compile, single_pair) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
|
||||||
|
(void) ret;
|
||||||
|
(void) buffer;
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_char_pair_set_single_pair");
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_compile, two_pairs) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 2);
|
||||||
|
(void) ret;
|
||||||
|
(void) buffer;
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_char_pair_set_two_pairs");
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_compile, duplicate) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_AB_DUPLICATE, 2);
|
||||||
|
(void) ret;
|
||||||
|
(void) buffer;
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_char_pair_set_duplicate");
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_compile, null_char) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_A_NULL_BC, 1);
|
||||||
|
(void) ret;
|
||||||
|
(void) buffer;
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_char_pair_set_null_char");
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(char_pair_set_compile, no_expression) {
|
||||||
|
const size_t pair_count = 0;
|
||||||
|
const char *character_array = PAIR_SET_ABCD;
|
||||||
|
hs_char_pair_set_compiled_pattern_t *database = nullptr;
|
||||||
|
EXPECT_DEATH(hs_compile_char_pair_set_search(character_array, pair_count,
|
||||||
|
&database),
|
||||||
|
"called with an empty set");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_compile, nullptr_char_array) {
|
||||||
|
hs_char_pair_set_compiled_pattern_t *database = nullptr;
|
||||||
|
EXPECT_DEATH(
|
||||||
|
hs_compile_char_pair_set_search(nullptr, 1, &database),
|
||||||
|
"called with nullptr");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_compile, nullptr_database) {
|
||||||
|
const size_t pair_count = 2;
|
||||||
|
const char *character_array = PAIR_SET_ABCD;
|
||||||
|
EXPECT_DEATH(hs_compile_char_pair_set_search(character_array,
|
||||||
|
pair_count, nullptr),
|
||||||
|
"called with nullptr");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// ------------------------search tests-----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_char_pair_set_search
|
||||||
|
general pattern
|
||||||
|
match at start
|
||||||
|
match middle (general)
|
||||||
|
match index 15 (cross over vector)
|
||||||
|
match at end
|
||||||
|
match past end
|
||||||
|
match null char
|
||||||
|
bad caseness
|
||||||
|
search several times
|
||||||
|
match a pair duplicate
|
||||||
|
match several pattern in the same search
|
||||||
|
match when there's more pairs than fit in a vector
|
||||||
|
buffer containing null char
|
||||||
|
pattern with null char
|
||||||
|
general pattern
|
||||||
|
|
||||||
|
buff size 0
|
||||||
|
nullptr pattern
|
||||||
|
nullptr buffer
|
||||||
|
nullptr callback
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, start) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_start");
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_general");
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, cross_vector) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_cross_vector");
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, end) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_end");
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN, 1, (30), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, past_end) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_past_end");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 0, (), ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, null_char) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_A_NULL_BC, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_null_char");
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, bad_case) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_bad_case");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, (), ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, several_search) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_several_search");
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, duplicate) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_AB_DUPLICATE, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("char_pair_set_search_duplicate");
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, match_multiple) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_match_multiple");
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 2, (5, 7), (0, 1));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, last_of_long_pattern) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_LONG_PATTERN_AB, 9);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_last_of_long_pattern");
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (8));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, null_char_buff_and_pattern) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_A_NULL_BC, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS(
|
||||||
|
"test_char_pair_set_search_null_char_buff_and_pattern");
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, null_char_buff) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_null_char_buff");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, (), ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, empty_buff) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_empty_buff");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_CHAR_PAIR_SET("", 0, 0, (), ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_pair_set_search", character_array, buffer);
|
||||||
|
hs_free_char_pair_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, nullptr_pattern) {
|
||||||
|
const hs_char_pair_set_compiled_pattern_t *database = nullptr;
|
||||||
|
context_t context;
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
const char *buffer;
|
||||||
|
hs_error_t ret;
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_CHAR_PAIR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, (), ());
|
||||||
|
},
|
||||||
|
"called with nullptr database");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, nullptr_buffer) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_nullptr_buffer");
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_CHAR_PAIR_SET(nullptr, EXPR_NOISE_LEN, 0, (), ());
|
||||||
|
},
|
||||||
|
"called with nullptr buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_pair_set_search, nullptr_callback) {
|
||||||
|
COMPILE_CHAR_PAIR_SET(PAIR_SET_ABCD, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_pair_set_search_nullptr_callback");
|
||||||
|
|
||||||
|
buffer = EXPR_NOISE_5;
|
||||||
|
const size_t buffer_len = EXPR_NOISE_LEN;
|
||||||
|
const size_t expected_match = 1;
|
||||||
|
size_t expected_start_array[expected_match] = {5};
|
||||||
|
size_t expected_end_array[expected_match] = {5};
|
||||||
|
size_t expected_id_array[expected_match] = {0};
|
||||||
|
for (size_t i = 0; i < expected_match; i++) {
|
||||||
|
expected_end_array[i] += 2;
|
||||||
|
}
|
||||||
|
context.expected_start_array = expected_start_array;
|
||||||
|
context.expected_end_array = expected_end_array;
|
||||||
|
context.expected_id_array = expected_id_array;
|
||||||
|
context.array_size = expected_match;
|
||||||
|
context.number_matched = 0;
|
||||||
|
context.number_wrong = 0;
|
||||||
|
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
hs_char_pair_set_search(database, buffer, buffer_len, nullptr,
|
||||||
|
&context);
|
||||||
|
},
|
||||||
|
"called with nullptr callback");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
333
unit/direct_API/char_set.cpp
Normal file
333
unit/direct_API/char_set.cpp
Normal file
@ -0,0 +1,333 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#define COMPILE_CHAR_SET(in_character_array, in_character_count) \
|
||||||
|
const size_t character_count = (in_character_count); \
|
||||||
|
const char *character_array = (in_character_array); \
|
||||||
|
hs_char_set_compiled_pattern_t *database = nullptr; \
|
||||||
|
hs_error_t compile_ret = hs_compile_char_set_search( \
|
||||||
|
character_array, character_count, &database); \
|
||||||
|
hs_error_t ret = 0; \
|
||||||
|
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
|
||||||
|
const char *buffer = nullptr; \
|
||||||
|
(void)buffer; \
|
||||||
|
context_t context = {}; \
|
||||||
|
(void) context;
|
||||||
|
|
||||||
|
#define SEARCH_CHAR_SET(in_buffer, in_buffer_len, in_expected_match, \
|
||||||
|
in_expected_start_array, in_expected_id_array) \
|
||||||
|
{ \
|
||||||
|
buffer = (in_buffer); \
|
||||||
|
const size_t buffer_len = (in_buffer_len); \
|
||||||
|
const size_t expected_match = (in_expected_match); \
|
||||||
|
size_t expected_start_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_end_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_id_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_id_array; \
|
||||||
|
for (size_t i = 0; i < expected_match; i++) { \
|
||||||
|
expected_end_array[i] += 1; \
|
||||||
|
} \
|
||||||
|
context.expected_start_array = expected_start_array; \
|
||||||
|
context.expected_end_array = expected_end_array; \
|
||||||
|
context.expected_id_array = expected_id_array; \
|
||||||
|
context.array_size = expected_match; \
|
||||||
|
context.number_matched = 0; \
|
||||||
|
context.number_wrong = 0; \
|
||||||
|
\
|
||||||
|
ret = hs_char_set_search(database, buffer, buffer_len, callback, \
|
||||||
|
&context); \
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------free tests-------------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_free_char_set_pattern
|
||||||
|
nullptr
|
||||||
|
general
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(char_set_free, nullptr) {
|
||||||
|
hs_char_set_compiled_pattern_t *database = nullptr;
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_free, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
truffle_storage *clear_database = reinterpret_cast<truffle_storage *>(
|
||||||
|
test_malloc(sizeof(truffle_storage)));
|
||||||
|
|
||||||
|
hs_char_set_compiled_pattern_t *database =
|
||||||
|
reinterpret_cast<hs_char_set_compiled_pattern_t*>(clear_database);
|
||||||
|
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------compile tests----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_compile_char_set_search
|
||||||
|
single char
|
||||||
|
same char twice
|
||||||
|
general (several different chars)
|
||||||
|
null char
|
||||||
|
|
||||||
|
empty char array
|
||||||
|
nullptr char array
|
||||||
|
nullptr output
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(char_set_compile, single_char) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_A, 1);
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_char_set_single_char");
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_compile, single_char_twice) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_A, 2);
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_char_set_single_char_twice");
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_compile, general) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_ABCDE, 5);
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_char_set_general");
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_compile, null_char) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_NULL, 1);
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_char_set_null_char");
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(char_set_compile, no_expression) {
|
||||||
|
const size_t character_count = 0;
|
||||||
|
const char *character_array = CHAR_SET_AB;
|
||||||
|
hs_char_set_compiled_pattern_t *database = nullptr;
|
||||||
|
EXPECT_DEATH(hs_compile_char_set_search(character_array, character_count,
|
||||||
|
&database),
|
||||||
|
"called with an empty set");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_compile, nullptr_char_array) {
|
||||||
|
hs_char_set_compiled_pattern_t *database = nullptr;
|
||||||
|
EXPECT_DEATH(hs_compile_char_set_search(nullptr, 1, &database),
|
||||||
|
"called with nullptr");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_compile, nullptr_database) {
|
||||||
|
const size_t character_count = 2;
|
||||||
|
const char *character_array = CHAR_SET_AB;
|
||||||
|
EXPECT_DEATH(hs_compile_char_set_search(character_array,
|
||||||
|
character_count, nullptr),
|
||||||
|
"called with nullptr");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// ------------------------search tests-----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_char_set_search
|
||||||
|
match at start
|
||||||
|
match middle (general)
|
||||||
|
match index 15 (last char of a vector)
|
||||||
|
match at end
|
||||||
|
match past end
|
||||||
|
match null char
|
||||||
|
bad caseness
|
||||||
|
search several times
|
||||||
|
match first char
|
||||||
|
match last char
|
||||||
|
buff size 0
|
||||||
|
|
||||||
|
nullptr pattern
|
||||||
|
nullptr buffer
|
||||||
|
nullptr callback
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(char_set_search, start) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_set_search_start");
|
||||||
|
SEARCH_CHAR_SET(EXPR_NOISE_0, EXPR_NOISE_LEN, 2, (0, 1), (0, 1));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_search, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_set_search_general");
|
||||||
|
SEARCH_CHAR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 2, (5, 6), (0, 1));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_search, cross_vector) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_set_search_cross_vector");
|
||||||
|
SEARCH_CHAR_SET(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 4, (5, 6, 15, 16),
|
||||||
|
(0, 1, 0, 1));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_search, end) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_set_search_end");
|
||||||
|
SEARCH_CHAR_SET(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN, 2, (30, 31), (0, 1));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_search, past_end) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_set_search_past_end");
|
||||||
|
SEARCH_CHAR_SET(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 1, (30), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_search, null_char) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_NULL, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_set_search_null_char");
|
||||||
|
SEARCH_CHAR_SET(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (6), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_search, bad_case) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_set_search_bad_case");
|
||||||
|
SEARCH_CHAR_SET(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 1, (16), (1));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_search, several_search) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_set_search_several_search");
|
||||||
|
SEARCH_CHAR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 2, (5, 6), (0, 1));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
|
||||||
|
SEARCH_CHAR_SET(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 4, (5, 6, 15, 16),
|
||||||
|
(0, 1, 0, 1));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_search, first_char) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_set_search_first_char");
|
||||||
|
SEARCH_CHAR_SET(EXPR_UNIFORM_1_A, EXPR_UNIFORM_LEN, 1, (5), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array[0], buffer);
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_search, last_char) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_set_search_last_char");
|
||||||
|
SEARCH_CHAR_SET(EXPR_UNIFORM_1_B, EXPR_UNIFORM_LEN, 1, (5), (1));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array[1], buffer);
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_search, empty_buff) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_set_search_empty_buff");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_CHAR_SET("", 0, 0, (), ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_char_set_search", character_array, buffer);
|
||||||
|
hs_free_char_set_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(char_set_search, nullptr_pattern) {
|
||||||
|
const hs_char_set_compiled_pattern_t *database = nullptr;
|
||||||
|
context_t context;
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
const char *buffer;
|
||||||
|
hs_error_t ret;
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_CHAR_SET(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, (), ());
|
||||||
|
},
|
||||||
|
"called with nullptr database");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_search, nullptr_buffer) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_set_search_nullptr_buffer");
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_CHAR_SET(nullptr, EXPR_NOISE_LEN, 0, (), ());
|
||||||
|
},
|
||||||
|
"called with nullptr buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(char_set_search, nullptr_callback) {
|
||||||
|
COMPILE_CHAR_SET(CHAR_SET_AB, 2);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_char_set_search_nullptr_callback");
|
||||||
|
|
||||||
|
buffer = EXPR_NOISE_5;
|
||||||
|
const size_t buffer_len = EXPR_NOISE_LEN;
|
||||||
|
const size_t expected_match = 2;
|
||||||
|
size_t expected_start_array[expected_match] = {5, 6};
|
||||||
|
size_t expected_end_array[expected_match] = {5, 6};
|
||||||
|
for (size_t i = 0; i < expected_match; i++) {
|
||||||
|
expected_end_array[i] += 1;
|
||||||
|
}
|
||||||
|
context.expected_start_array = expected_start_array;
|
||||||
|
context.expected_end_array = expected_end_array;
|
||||||
|
context.array_size = expected_match;
|
||||||
|
context.number_matched = 0;
|
||||||
|
context.number_wrong = 0;
|
||||||
|
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
hs_char_set_search(database, buffer, buffer_len, nullptr, &context);
|
||||||
|
},
|
||||||
|
"called with nullptr callback");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
210
unit/direct_API/common.h
Normal file
210
unit/direct_API/common.h
Normal file
@ -0,0 +1,210 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef COMMON_H
|
||||||
|
#define COMMON_H
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
|
#include "hs_common.h"
|
||||||
|
#include "hs_compile.h"
|
||||||
|
#include "hs_runtime.h"
|
||||||
|
#include "hs_direct_search.h"
|
||||||
|
#include "hs_direct_search_types.h"
|
||||||
|
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#define PATTERN_0_CHAR ""
|
||||||
|
#define PATTERN_1_CHAR "a"
|
||||||
|
#define PATTERN_1_CHAR_NULL "\0"
|
||||||
|
#define PATTERN_2_CHAR "aB"
|
||||||
|
#define PATTERN_2_WITH_NULL "a\0"
|
||||||
|
#define PATTERN_3_CHAR "aBc"
|
||||||
|
#define PATTERN_5_CHAR "aBcde"
|
||||||
|
#define PATTERN_5_WITH_NULL "a\0Bcd"
|
||||||
|
#define PATTERN_8_CHAR "aBcdeoAb"
|
||||||
|
#define PATTERN_10_CHAR "aBcdeoAbCD"
|
||||||
|
#define PATTERN_25_CHAR "aBcdeoAbCDumefnvqmuz,crhUq"
|
||||||
|
|
||||||
|
#define CHAR_SET_NULL "\0"
|
||||||
|
#define CHAR_SET_A "aaAA"
|
||||||
|
#define CHAR_SET_AB "aB"
|
||||||
|
#define CHAR_SET_ABCDE "aBcde"
|
||||||
|
|
||||||
|
#define PAIR_SET_ABCD "aBcd"
|
||||||
|
#define PAIR_SET_A_NULL_BC "a\0Bc"
|
||||||
|
#define PAIR_SET_AB_DUPLICATE "aBaB"
|
||||||
|
#define PAIR_SET_LONG_PATTERN_AB "u0u1u2u3u4u5u6u7aB"
|
||||||
|
|
||||||
|
#define PATTERN_ARRAY_CONTAIN_EMPTY_0 {""}
|
||||||
|
#define PATTERN_ARRAY_SINGLE_CHAR_PAT_1 {"a"}
|
||||||
|
#define PATTERN_ARRAY_SINGLE_PAT_5 {"aBcde"}
|
||||||
|
#define PATTERN_ARRAY_GENERAL_5_5 {"aBcde","fghij"}
|
||||||
|
#define PATTERN_ARRAY_GENERAL_5_DUPLICATE {"aBcde","aBcde"}
|
||||||
|
#define PATTERN_ARRAY_LONG_10_10 {"aBcdeoAbCD","muz,crhUqu"}
|
||||||
|
#define PATTERN_ARRAY_CONTAIN_NULLPTR_5_0 {"aBcde",nullptr}
|
||||||
|
#define PATTERN_ARRAY_CONTAIN_EMPTY_0 {""}
|
||||||
|
#define PATTERN_ARRAY_WITH_NULL_5_5 {"a\0Bcd","aBcde"}
|
||||||
|
#define PATTERN_ARRAY_OVERLAP_5_8 {"aBcde","cdeoAbCD"}
|
||||||
|
#define PATTERN_ARRAY_NULLPTR ((char**)nullptr)
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#define EXPR_NOISE_LEN 32
|
||||||
|
#define EXPR_NOISE "zmeh vnMezr,xbzumefnvqmuz,crhUqu"
|
||||||
|
#define EXPR_NOISE_0 "aBcdeoAbCDr,xbzumefnvqmuz,crhUqu"
|
||||||
|
#define EXPR_NOISE_5 "zmeh aBcdeoAbCDumefnvqmuz,crhUqu"
|
||||||
|
#define EXPR_NOISE_5_NULL "zmeh a\0Bcdr,xbzumefnvqmuz,crhUqu"
|
||||||
|
#define EXPR_NOISE_5_15 "zmeh aBcdeoAbCDaBcdeoAbCD,crhUqu"
|
||||||
|
#define EXPR_NOISE_5_15_BAD_CASE "zmeh AbcdeoAbCDABcdeoAbCD,crhUqu"
|
||||||
|
#define EXPR_NOISE_MIX "zmeh fgcder,xbzumefnvqmuz,crhUqu"
|
||||||
|
#define EXPR_NOISE_PAT2_5 "zmeh fghijr,xbzumefnvqmuz,crhUqu"
|
||||||
|
#define EXPR_NOISE_DUO_5_15 "zmeh aBcdeoAbCDfghijvqmuz,crhUqu"
|
||||||
|
#define EXPR_NOISE_SHORT_ONLY_5 "zmeh aBcdeoAbHHumefnvqmuz,crhUqu"
|
||||||
|
#define EXPR_NOISE_5_AB "zmeh aBMezr,xbzumefnvqmuz,crhUqu"
|
||||||
|
|
||||||
|
#define EXPR_NOISE_A_END_31 "zmeh vnMezr,xbzumefnvqmuz,crhUqa"
|
||||||
|
#define EXPR_NOISE_AB_END_30 "zmeh vnMezr,xbzumefnvqmuz,crhUaB"
|
||||||
|
#define EXPR_NOISE_ABCDE_END_27 "zmeh vnMezr,xbzumefnvqmuz,caBcde"
|
||||||
|
#define EXPR_NOISE_ABCDEOABCD_END_22 "zmeh vnMezr,xbzumefnvqaBcdeoAbCD"
|
||||||
|
|
||||||
|
#define EXPR_UNIFORM_LEN 32
|
||||||
|
#define EXPR_UNIFORM "uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu"
|
||||||
|
#define EXPR_UNIFORM_1_A "uuuuuauuuuuuuuuuuuuuuuuuuuuuuuuu"
|
||||||
|
#define EXPR_UNIFORM_1_B "uuuuuBuuuuuuuuuuuuuuuuuuuuuuuuuu"
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#define BRACED_INIT_LIST(...) {__VA_ARGS__}
|
||||||
|
|
||||||
|
#define EXPECT_COMPILE_SUCCESS(func_name) \
|
||||||
|
EXPECT_EQ(compile_ret, HS_SUCCESS) \
|
||||||
|
<< "Fail to build the pattern in " << (func_name) << "\n"; \
|
||||||
|
EXPECT_NE(database, nullptr) \
|
||||||
|
<< "Compilation returned nullptr database " << (func_name) << "\n";
|
||||||
|
|
||||||
|
#define EXPECT_COMPILE_FAILURE(func_name) \
|
||||||
|
EXPECT_NE(compile_ret, HS_SUCCESS) \
|
||||||
|
<< "Pattern built fine when error was expected in " << (func_name) \
|
||||||
|
<< "\n";
|
||||||
|
|
||||||
|
#define ASSERT_COMPILE_SUCCESS(func_name) \
|
||||||
|
ASSERT_EQ(compile_ret, HS_SUCCESS) \
|
||||||
|
<< "Fail to build the pattern in " << (func_name) << "\n"; \
|
||||||
|
ASSERT_NE(database, nullptr) \
|
||||||
|
<< "Compilation returned nullptr database " << (func_name) << "\n";
|
||||||
|
|
||||||
|
#define ASSERT_COMPILE_FAILURE(func_name) \
|
||||||
|
ASSERT_NE(compile_ret, HS_SUCCESS) \
|
||||||
|
<< "Pattern built fine when error was expected in " << (func_name) \
|
||||||
|
<< "\n";
|
||||||
|
|
||||||
|
#define EXPECT_SEARCH_SUCCESS(search_func_name, pattern, buffer) \
|
||||||
|
EXPECT_EQ(HS_SUCCESS, ret) \
|
||||||
|
<< (search_func_name) << ", pattern: " << (pattern) << ", buffer: \"" \
|
||||||
|
<< (buffer) << "\"\n Search failed"; \
|
||||||
|
EXPECT_EQ(context.array_size, context.number_matched) \
|
||||||
|
<< (search_func_name) << ", pattern: " << (pattern) << ", buffer: \"" \
|
||||||
|
<< (buffer) << "\"\n Missed some matches.\n"; \
|
||||||
|
EXPECT_LE(0, context.number_wrong) \
|
||||||
|
<< (search_func_name) << ", pattern: " << (pattern) << ", buffer: \"" \
|
||||||
|
<< (buffer) << "\"\n Unexpected matches.\n";
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
typedef struct callback_context {
|
||||||
|
/* array of indices in the string where we expect match to start*/
|
||||||
|
size_t *expected_start_array;
|
||||||
|
/* array of indices in the string where we expect match to end*/
|
||||||
|
size_t *expected_end_array;
|
||||||
|
/* array of pattern ID we expect match to be reported, in order */
|
||||||
|
size_t *expected_id_array;
|
||||||
|
size_t array_size;
|
||||||
|
/* counter of matches happening at a position in expected_array */
|
||||||
|
size_t number_matched;
|
||||||
|
/* counter of matches happening at a position NOT in expected_array */
|
||||||
|
size_t number_wrong;
|
||||||
|
} context_t;
|
||||||
|
|
||||||
|
static
|
||||||
|
int callback(unsigned int id, unsigned long long start,
|
||||||
|
unsigned long long end_offset, unsigned int flags,
|
||||||
|
void *raw_context) {
|
||||||
|
(void)flags;
|
||||||
|
context_t *context = reinterpret_cast<context_t*>(raw_context);
|
||||||
|
bool matched = false;
|
||||||
|
// Check if the match is expected
|
||||||
|
for (size_t i = 0; i < context->array_size; i++) {
|
||||||
|
if (end_offset == context->expected_end_array[i] &&
|
||||||
|
start == context->expected_start_array[i] &&
|
||||||
|
id == context->expected_id_array[i]) {
|
||||||
|
matched = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Tally the right counter whether the match was expected or not
|
||||||
|
if (matched) {
|
||||||
|
context->number_matched += 1;
|
||||||
|
// printf("match at index %llu\n", end_offset);
|
||||||
|
} else {
|
||||||
|
context->number_wrong += 1;
|
||||||
|
// printf("unplanned match at index %llu\n", end_offset);
|
||||||
|
}
|
||||||
|
return CB_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::unordered_set<void *> alloced_mem;
|
||||||
|
|
||||||
|
static void* test_malloc(size_t size) {
|
||||||
|
void * mem = malloc(size);
|
||||||
|
alloced_mem.insert(mem);
|
||||||
|
return mem;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_free(void *ptr) {
|
||||||
|
size_t erased_count = alloced_mem.erase(ptr);
|
||||||
|
if(erased_count == 1) {
|
||||||
|
free(ptr);
|
||||||
|
} else {
|
||||||
|
printf("all currently allocated memory:\n");
|
||||||
|
for (const void *elem : alloced_mem)
|
||||||
|
printf("%p ", elem);
|
||||||
|
printf("\nTrying to free: %p\n", ptr);
|
||||||
|
FAIL();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SETUP_MEM_LEAK_TEST() hs_set_allocator(test_malloc, test_free);
|
||||||
|
#define UNSET_MEM_LEAK_TEST() hs_set_allocator(nullptr, nullptr);
|
||||||
|
#define EXPECT_MEMORY_CLEAN() \
|
||||||
|
EXPECT_TRUE(alloced_mem.empty()); \
|
||||||
|
alloced_mem.clear();
|
||||||
|
|
||||||
|
#endif // COMMON_H
|
394
unit/direct_API/long_literal.cpp
Normal file
394
unit/direct_API/long_literal.cpp
Normal file
@ -0,0 +1,394 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#include "fdr/fdr_internal.h"
|
||||||
|
|
||||||
|
#define COMPILE_LONG_LITERAL(in_pattern, in_pattern_len) \
|
||||||
|
size_t pattern_len = (in_pattern_len); \
|
||||||
|
const char *pattern = (in_pattern); \
|
||||||
|
hs_long_literal_compiled_pattern_t *database = nullptr; \
|
||||||
|
hs_error_t compile_ret = \
|
||||||
|
hs_compile_long_literal_search(pattern, pattern_len, &database); \
|
||||||
|
hs_error_t ret = 0; \
|
||||||
|
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
|
||||||
|
const char *buffer = nullptr; \
|
||||||
|
(void)buffer; \
|
||||||
|
context_t context = {}; \
|
||||||
|
(void) context;
|
||||||
|
|
||||||
|
// expected match array here is the index of the start of match.
|
||||||
|
#define SEARCH_LONG_LITERAL(in_buffer, in_buffer_len, in_expected_match, \
|
||||||
|
in_expected_start_array) \
|
||||||
|
{ \
|
||||||
|
buffer = (in_buffer); \
|
||||||
|
const size_t buffer_len = (in_buffer_len); \
|
||||||
|
const size_t expected_match = (in_expected_match); \
|
||||||
|
size_t expected_start_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_end_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_id_array[expected_match]; \
|
||||||
|
for (size_t i = 0; i < expected_match; i++) { \
|
||||||
|
expected_end_array[i] += pattern_len; \
|
||||||
|
expected_id_array[i] = 0; \
|
||||||
|
} \
|
||||||
|
context.expected_start_array = expected_start_array; \
|
||||||
|
context.expected_end_array = expected_end_array; \
|
||||||
|
context.expected_id_array = expected_id_array; \
|
||||||
|
context.array_size = expected_match; \
|
||||||
|
context.number_matched = 0; \
|
||||||
|
context.number_wrong = 0; \
|
||||||
|
\
|
||||||
|
ret = hs_long_literal_search(database, buffer, buffer_len, callback, \
|
||||||
|
&context); \
|
||||||
|
}
|
||||||
|
|
||||||
|
static_assert(HS_SHORT_PATTERN_THRESHOLD == 8,
|
||||||
|
"changing the threshold for short/long literal require changing "
|
||||||
|
"the tests to still test the threshold behavior");
|
||||||
|
|
||||||
|
// ------------------------free tests-------------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_free_long_literal_pattern
|
||||||
|
nullptr
|
||||||
|
general
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(long_literal_free, nullptr) {
|
||||||
|
hs_long_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_free, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
combined_fdr_database *clear_database =
|
||||||
|
reinterpret_cast<combined_fdr_database *>(
|
||||||
|
test_malloc(sizeof(combined_fdr_database)));
|
||||||
|
|
||||||
|
hs_long_literal_compiled_pattern_t *database =
|
||||||
|
reinterpret_cast<hs_long_literal_compiled_pattern_t*>(clear_database);
|
||||||
|
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------compile tests----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_compile_long_literal_search
|
||||||
|
<=8 char
|
||||||
|
general (>8 char)
|
||||||
|
valid pattern including null char
|
||||||
|
|
||||||
|
empty expression
|
||||||
|
nullptr expression
|
||||||
|
nullptr output
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(long_literal_compile, short) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_long_literal_general");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_compile, general) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_long_literal_general");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_compile, null_char) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_5_WITH_NULL, 5);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_long_literal_null_char");
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(long_literal_compile, empty_pattern) {
|
||||||
|
hs_long_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
EXPECT_DEATH(
|
||||||
|
hs_compile_long_literal_search(PATTERN_0_CHAR, 0, &database),
|
||||||
|
"called with an empty pattern");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_compile, nullptr_pattern) {
|
||||||
|
hs_long_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
EXPECT_DEATH(hs_compile_long_literal_search(nullptr, 5, &database),
|
||||||
|
"called with nullptr");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_compile, nullptr_database) {
|
||||||
|
EXPECT_DEATH(hs_compile_long_literal_search(PATTERN_5_CHAR, 5, nullptr),
|
||||||
|
"called with nullptr");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// ------------------------search tests-----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_long_literal_search
|
||||||
|
short pattern
|
||||||
|
positive match
|
||||||
|
negative match
|
||||||
|
general pattern
|
||||||
|
general pattern but the buffer only have the short part of it
|
||||||
|
extra long pattern (vectorized confirm)
|
||||||
|
match at start
|
||||||
|
match middle (general)
|
||||||
|
match index 15 (cross over vector)
|
||||||
|
match at end
|
||||||
|
match past end (a few char ok, then end, so missing some chars)
|
||||||
|
bad caseness
|
||||||
|
search several times
|
||||||
|
single char pattern
|
||||||
|
general match
|
||||||
|
match at end
|
||||||
|
no match
|
||||||
|
buffer containing null char
|
||||||
|
pattern with null char
|
||||||
|
general pattern (no null char searched for)
|
||||||
|
buff size 0
|
||||||
|
nullptr pattern
|
||||||
|
nullptr buffer
|
||||||
|
nullptr callback
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(long_literal_search, short_positive) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_general");
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, short_negative) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_general");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE, EXPR_NOISE_LEN, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, short_but_negative_long) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_short_but_negative_long");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_SHORT_ONLY_5, EXPR_NOISE_LEN, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, start) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_start");
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_general");
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, extra_long) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_25_CHAR, 25);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_extra_long");
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, cross_vector) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_cross_vector");
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, end) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_end");
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_ABCDEOABCD_END_22, EXPR_NOISE_LEN, 1, (22));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, past_end) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_past_end");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_ABCDEOABCD_END_22, EXPR_NOISE_LEN - 3, 0,
|
||||||
|
());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, bad_case) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_bad_case");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, several_search) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_several_search");
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, single_char) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_1_CHAR, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_single_char");
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, single_char_end) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_1_CHAR, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_single_char_end");
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 1, (30));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, single_char_no_match) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_1_CHAR, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_single_char_no_match");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE, EXPR_NOISE_LEN, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, null_char_buff_and_pattern) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_5_WITH_NULL, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS(
|
||||||
|
"test_long_literal_search_null_char_buff_and_pattern");
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, null_char_buff) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_null_char_buff");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, empty_buff) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_empty_buff");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_LONG_LITERAL("", 0, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_long_literal_search", pattern, buffer);
|
||||||
|
hs_free_long_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(long_literal_search, nullptr_pattern) {
|
||||||
|
const hs_long_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
context_t context;
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
const char *buffer;
|
||||||
|
hs_error_t ret;
|
||||||
|
size_t pattern_len = 5;
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_LONG_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, ());
|
||||||
|
},
|
||||||
|
"called with nullptr database");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, nullptr_buffer) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_nullptr_buffer");
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_LONG_LITERAL(nullptr, EXPR_NOISE_LEN, 0, ());
|
||||||
|
},
|
||||||
|
"called with nullptr buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(long_literal_search, nullptr_callback) {
|
||||||
|
COMPILE_LONG_LITERAL(PATTERN_10_CHAR, 10);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_long_literal_search_nullptr_callback");
|
||||||
|
|
||||||
|
buffer = EXPR_NOISE_5;
|
||||||
|
const size_t buffer_len = EXPR_NOISE_LEN;
|
||||||
|
const size_t expected_match = 1;
|
||||||
|
size_t expected_start_array[expected_match] = {5};
|
||||||
|
size_t expected_end_array[expected_match] = {5};
|
||||||
|
for (size_t i = 0; i < expected_match; i++) {
|
||||||
|
expected_end_array[i] += pattern_len;
|
||||||
|
}
|
||||||
|
context.expected_start_array = expected_start_array;
|
||||||
|
context.expected_end_array = expected_end_array;
|
||||||
|
context.array_size = expected_match;
|
||||||
|
context.number_matched = 0;
|
||||||
|
context.number_wrong = 0;
|
||||||
|
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
hs_long_literal_search(database, buffer, buffer_len, nullptr,
|
||||||
|
&context);
|
||||||
|
},
|
||||||
|
"called with nullptr callback");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
36
unit/direct_API/main.cpp
Normal file
36
unit/direct_API/main.cpp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
// Driver: run all the tests (defined in other source files in this directory)
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
testing::InitGoogleTest(&argc, argv);
|
||||||
|
|
||||||
|
return RUN_ALL_TESTS();
|
||||||
|
}
|
515
unit/direct_API/multi_literal.cpp
Normal file
515
unit/direct_API/multi_literal.cpp
Normal file
@ -0,0 +1,515 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#include "fdr/fdr_internal.h"
|
||||||
|
|
||||||
|
#define COMPILE_MULTI_LITERAL(in_pattern, in_pattern_count, in_pattern_len) \
|
||||||
|
const size_t pattern_count = (in_pattern_count); \
|
||||||
|
size_t pattern_len[pattern_count] = BRACED_INIT_LIST in_pattern_len; \
|
||||||
|
const char *pattern_storage[] = in_pattern; \
|
||||||
|
const char **pattern = pattern_storage; \
|
||||||
|
hs_multi_literal_compiled_pattern_t *database = nullptr; \
|
||||||
|
hs_error_t compile_ret = hs_compile_multi_literal_search( \
|
||||||
|
pattern, pattern_count, pattern_len, &database); \
|
||||||
|
hs_error_t ret = 0; \
|
||||||
|
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
|
||||||
|
const char *buffer = nullptr; \
|
||||||
|
(void)buffer; \
|
||||||
|
context_t context = {}; \
|
||||||
|
(void) context;
|
||||||
|
|
||||||
|
// expected match array here is the index of the start of match, assuming it
|
||||||
|
// match a pattern with the same length as pattern 0
|
||||||
|
#define SEARCH_MULTI_LITERAL(in_buffer, in_buffer_len, in_expected_match, \
|
||||||
|
in_expected_start_array, in_expected_id_array) \
|
||||||
|
{ \
|
||||||
|
buffer = (in_buffer); \
|
||||||
|
const size_t buffer_len = (in_buffer_len); \
|
||||||
|
const size_t expected_match = (in_expected_match); \
|
||||||
|
size_t expected_start_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_end_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_id_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_id_array; \
|
||||||
|
for (size_t i = 0; i < expected_match; i++) { \
|
||||||
|
expected_end_array[i] += pattern_len[0]; \
|
||||||
|
} \
|
||||||
|
context.expected_start_array = expected_start_array; \
|
||||||
|
context.expected_end_array = expected_end_array; \
|
||||||
|
context.expected_id_array = expected_id_array; \
|
||||||
|
context.array_size = expected_match; \
|
||||||
|
context.number_matched = 0; \
|
||||||
|
context.number_wrong = 0; \
|
||||||
|
\
|
||||||
|
ret = hs_multi_literal_search(database, buffer, buffer_len, callback, \
|
||||||
|
&context); \
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------free tests-------------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_free_multi_literal_pattern
|
||||||
|
nullptr
|
||||||
|
general
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(multi_literal_free, nullptr) {
|
||||||
|
hs_multi_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_free, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
combined_fdr_database *clear_database =
|
||||||
|
reinterpret_cast<combined_fdr_database *>(
|
||||||
|
test_malloc(sizeof(combined_fdr_database)));
|
||||||
|
|
||||||
|
hs_multi_literal_compiled_pattern_t *database =
|
||||||
|
reinterpret_cast<hs_multi_literal_compiled_pattern_t*>(clear_database);
|
||||||
|
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------compile tests----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_compile_multi_literal_search
|
||||||
|
single expression
|
||||||
|
single char expression
|
||||||
|
general (several expressions)
|
||||||
|
pattern duplicate
|
||||||
|
valid pattern including null char
|
||||||
|
overlaping patterns (eg, "abba" and "bb")
|
||||||
|
|
||||||
|
no expressions
|
||||||
|
empty expression
|
||||||
|
nullptr expression array
|
||||||
|
one of the expression is nullptr
|
||||||
|
nullptr output
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(multi_literal_compile, single_pattern) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_PAT_5, 1, (5));
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_single_pattern");
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_compile, single_pattern_single_char) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_CHAR_PAT_1, 1, (1));
|
||||||
|
EXPECT_COMPILE_SUCCESS(
|
||||||
|
"test_compile_multi_literal_single_pattern_single_char");
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_compile, general) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_general");
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_compile, duplicate) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_DUPLICATE, 2, (5, 5));
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_duplicate");
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_compile, with_null_char) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_WITH_NULL_5_5, 2, (5, 5));
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_with_null_char");
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_compile, overlapping_patterns) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_OVERLAP_5_8, 2, (5, 8));
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_multi_literal_overlapping_patterns");
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(multi_literal_compile, no_expression) {
|
||||||
|
const size_t pattern_count = 0;
|
||||||
|
const char *pattern_storage[] = PATTERN_ARRAY_GENERAL_5_5;
|
||||||
|
const char **pattern = pattern_storage;
|
||||||
|
hs_multi_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
size_t pattern_len[2];
|
||||||
|
pattern_len[0] = 5;
|
||||||
|
pattern_len[1] = 5;
|
||||||
|
hs_compile_multi_literal_search(pattern, pattern_count, pattern_len,
|
||||||
|
&database);
|
||||||
|
},
|
||||||
|
"called with no pattern");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_compile, empty_expression) {
|
||||||
|
const size_t pattern_count = 1;
|
||||||
|
const size_t pattern_len[pattern_count] = {0};
|
||||||
|
const char *pattern_storage[] = PATTERN_ARRAY_CONTAIN_EMPTY_0;
|
||||||
|
const char **pattern = pattern_storage;
|
||||||
|
hs_multi_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
EXPECT_DEATH(hs_compile_multi_literal_search(pattern, pattern_count,
|
||||||
|
pattern_len, &database),
|
||||||
|
"called with an empty pattern");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_compile, nullptr_pattern_array) {
|
||||||
|
const size_t pattern_count = 1;
|
||||||
|
const size_t pattern_len[pattern_count] = {5};
|
||||||
|
const char **pattern = nullptr;
|
||||||
|
hs_multi_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
EXPECT_DEATH(hs_compile_multi_literal_search(pattern, pattern_count,
|
||||||
|
pattern_len, &database),
|
||||||
|
"called with nullptr");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_compile, nullptr_pattern_in_array) {
|
||||||
|
const size_t pattern_count = 2;
|
||||||
|
const size_t pattern_len[pattern_count] = {5, 5};
|
||||||
|
const char *pattern_storage[] = PATTERN_ARRAY_CONTAIN_NULLPTR_5_0;
|
||||||
|
const char **pattern = pattern_storage;
|
||||||
|
hs_multi_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
EXPECT_DEATH(hs_compile_multi_literal_search(pattern, pattern_count,
|
||||||
|
pattern_len, &database),
|
||||||
|
"called with an empty pattern");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_compile, nullptr_database) {
|
||||||
|
const size_t pattern_count = 2;
|
||||||
|
const size_t pattern_len[pattern_count] = {5, 5};
|
||||||
|
const char *pattern_storage[] = PATTERN_ARRAY_GENERAL_5_5;
|
||||||
|
const char **pattern = pattern_storage;
|
||||||
|
EXPECT_DEATH(hs_compile_multi_literal_search(pattern, pattern_count,
|
||||||
|
pattern_len, nullptr),
|
||||||
|
"called with nullptr");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// ------------------------search tests-----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_multi_literal_search
|
||||||
|
general pattern
|
||||||
|
match at start
|
||||||
|
match middle (general)
|
||||||
|
match index 15 (cross over vector)
|
||||||
|
match at end
|
||||||
|
match past end (a few char ok, then end, so missing some chars)
|
||||||
|
match long patterns
|
||||||
|
long pattern but the buffer only have the short part of it
|
||||||
|
bad caseness
|
||||||
|
search several times
|
||||||
|
match first pattern
|
||||||
|
match last pattern
|
||||||
|
match several pattern in the same search
|
||||||
|
match overlapping patterns
|
||||||
|
pattern mix (start with pattern A, finish with pattern B. Expect no
|
||||||
|
match)
|
||||||
|
match a pattern duplicate
|
||||||
|
single char pattern
|
||||||
|
general match
|
||||||
|
match at end
|
||||||
|
no match
|
||||||
|
buffer containing null char
|
||||||
|
pattern with null char
|
||||||
|
general pattern (no null char searched for)
|
||||||
|
buff size 0
|
||||||
|
nullptr pattern
|
||||||
|
nullptr buffer
|
||||||
|
nullptr callback
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(multi_literal_search, start) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_start");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_general");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, cross_vector) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_cross_vector");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, end) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_end");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_ABCDE_END_27, EXPR_NOISE_LEN, 1, (27), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, past_end) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_past_end");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_ABCDE_END_27, EXPR_NOISE_LEN - 3, 0, (), ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, long_pattern) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_LONG_10_10, 2, (10, 10));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_long_pattern");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 2, (5, 22), (0, 1));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, short_but_negative_long) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_LONG_10_10, 2, (10, 10));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_short_but_negative_long");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_SHORT_ONLY_5, EXPR_NOISE_LEN, 1, (22), (1));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[1], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, bad_case) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_bad_case");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, (), ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, several_search) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_several_search");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, first_pattern) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_first_pattern");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, last_pattern) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_last_pattern");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_PAT2_5, EXPR_NOISE_LEN, 1, (5), (1));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[1], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, multi_pattern) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_multi_pattern");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_DUO_5_15, EXPR_NOISE_LEN, 2, (5, 15),
|
||||||
|
(0, 1));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, overlap) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_OVERLAP_5_8, 2, (5, 8));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_overlap");
|
||||||
|
|
||||||
|
buffer = EXPR_NOISE_5;
|
||||||
|
const size_t buffer_len = EXPR_NOISE_LEN;
|
||||||
|
const size_t expected_match = 2;
|
||||||
|
size_t expected_start_array[expected_match] = {5, 7};
|
||||||
|
size_t expected_end_array[expected_match] = {5, 7};
|
||||||
|
size_t expected_id_array[expected_match] = {0, 1};
|
||||||
|
for (size_t i = 0; i < expected_match; i++) {
|
||||||
|
// we need the length of the second pattern, hence not using the macro
|
||||||
|
expected_end_array[i] += pattern_len[i];
|
||||||
|
}
|
||||||
|
context.expected_start_array = expected_start_array;
|
||||||
|
context.expected_end_array = expected_end_array;
|
||||||
|
context.expected_id_array = expected_id_array;
|
||||||
|
context.array_size = expected_match;
|
||||||
|
context.number_matched = 0;
|
||||||
|
context.number_wrong = 0;
|
||||||
|
|
||||||
|
ret = hs_multi_literal_search(database, buffer, buffer_len,
|
||||||
|
callback, &context);
|
||||||
|
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, pattern_mix) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_pattern_mix");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_MIX, EXPR_NOISE_LEN, 0, (), ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, duplicate) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_DUPLICATE, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_duplicate");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, single_char) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_CHAR_PAT_1, 1, (1));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_single_char");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15), (0, 0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, single_char_end) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_CHAR_PAT_1, 1, (1));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_single_char_end");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 1, (30),
|
||||||
|
(0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, single_char_no_match) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_SINGLE_CHAR_PAT_1, 1, (1));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_single_char_no_match");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE, EXPR_NOISE_LEN, 0, (), ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, null_char_buff_and_pattern) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_WITH_NULL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS(
|
||||||
|
"test_multi_literal_search_null_char_buff_and_pattern");
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5), (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, null_char_buff) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_null_char_buff");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, (), ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, empty_buff) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_empty_buff");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_MULTI_LITERAL("", 0, 0, (), ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_multi_literal_search", pattern[0], buffer);
|
||||||
|
hs_free_multi_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(multi_literal_search, nullptr_pattern) {
|
||||||
|
const hs_multi_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
context_t context;
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
const char *buffer;
|
||||||
|
hs_error_t ret;
|
||||||
|
size_t pattern_len[2];
|
||||||
|
pattern_len[0] = 5;
|
||||||
|
pattern_len[1] = 5;
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_MULTI_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, (), ());
|
||||||
|
},
|
||||||
|
"called with nullptr database");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, nullptr_buffer) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_nullptr_buffer");
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_MULTI_LITERAL(nullptr, EXPR_NOISE_LEN, 0, (), ());
|
||||||
|
},
|
||||||
|
"called with nullptr buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(multi_literal_search, nullptr_callback) {
|
||||||
|
COMPILE_MULTI_LITERAL(PATTERN_ARRAY_GENERAL_5_5, 2, (5, 5));
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_multi_literal_search_nullptr_callback");
|
||||||
|
|
||||||
|
buffer = EXPR_NOISE_5;
|
||||||
|
const size_t buffer_len = EXPR_NOISE_LEN;
|
||||||
|
const size_t expected_match = 1;
|
||||||
|
size_t expected_start_array[expected_match] = {5};
|
||||||
|
size_t expected_end_array[expected_match] = {5};
|
||||||
|
for (size_t i = 0; i < expected_match; i++) {
|
||||||
|
expected_end_array[i] += pattern_len[0];
|
||||||
|
}
|
||||||
|
context.expected_start_array = expected_start_array;
|
||||||
|
context.expected_end_array = expected_end_array;
|
||||||
|
context.array_size = expected_match;
|
||||||
|
context.number_matched = 0;
|
||||||
|
context.number_wrong = 0;
|
||||||
|
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
hs_multi_literal_search(database, buffer, buffer_len, nullptr,
|
||||||
|
&context);
|
||||||
|
},
|
||||||
|
"called with nullptr callback");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
377
unit/direct_API/short_literal.cpp
Normal file
377
unit/direct_API/short_literal.cpp
Normal file
@ -0,0 +1,377 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "direct_API/common.h"
|
||||||
|
|
||||||
|
#include "hwlm/noodle_internal.h"
|
||||||
|
|
||||||
|
#define COMPILE_SHORT_LITERAL(in_pattern, in_pattern_len) \
|
||||||
|
size_t pattern_len = (in_pattern_len); \
|
||||||
|
const char *pattern = (in_pattern); \
|
||||||
|
hs_short_literal_compiled_pattern_t *database = nullptr; \
|
||||||
|
hs_error_t compile_ret = \
|
||||||
|
hs_compile_short_literal_search(pattern, pattern_len, &database); \
|
||||||
|
hs_error_t ret = 0; \
|
||||||
|
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
|
||||||
|
const char *buffer = nullptr; \
|
||||||
|
(void)buffer; \
|
||||||
|
context_t context = {}; \
|
||||||
|
(void) context;
|
||||||
|
|
||||||
|
// expected match array here is the index of the start of match.
|
||||||
|
#define SEARCH_SHORT_LITERAL(in_buffer, in_buffer_len, in_expected_match, \
|
||||||
|
in_expected_start_array) \
|
||||||
|
{ \
|
||||||
|
buffer = (in_buffer); \
|
||||||
|
const size_t buffer_len = (in_buffer_len); \
|
||||||
|
const size_t expected_match = (in_expected_match); \
|
||||||
|
size_t expected_start_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_end_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_id_array[expected_match]; \
|
||||||
|
for (size_t i = 0; i < expected_match; i++) { \
|
||||||
|
expected_end_array[i] += pattern_len; \
|
||||||
|
expected_id_array[i] = 0; \
|
||||||
|
} \
|
||||||
|
context.expected_start_array = expected_start_array; \
|
||||||
|
context.expected_end_array = expected_end_array; \
|
||||||
|
context.expected_id_array = expected_id_array; \
|
||||||
|
context.array_size = expected_match; \
|
||||||
|
context.number_matched = 0; \
|
||||||
|
context.number_wrong = 0; \
|
||||||
|
\
|
||||||
|
ret = hs_short_literal_search(database, buffer, buffer_len, callback, \
|
||||||
|
&context); \
|
||||||
|
}
|
||||||
|
|
||||||
|
static_assert(HS_SHORT_PATTERN_THRESHOLD == 8,
|
||||||
|
"changing the threshold for short/long literal require changing "
|
||||||
|
"the tests to still test the threshold behavior");
|
||||||
|
|
||||||
|
// ------------------------free tests-------------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_free_short_literal_pattern
|
||||||
|
nullptr
|
||||||
|
general
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(short_literal_free, nullptr) {
|
||||||
|
hs_short_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_free, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
noodTable *clear_database =
|
||||||
|
reinterpret_cast<noodTable *>(test_malloc(sizeof(noodTable)));
|
||||||
|
hs_short_literal_compiled_pattern_t *database =
|
||||||
|
reinterpret_cast<hs_short_literal_compiled_pattern_t *>(
|
||||||
|
clear_database);
|
||||||
|
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------compile tests----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_compile_short_literal_search
|
||||||
|
single char
|
||||||
|
general
|
||||||
|
8 char
|
||||||
|
>8 char
|
||||||
|
valid pattern including null char
|
||||||
|
empty expression
|
||||||
|
nullptr expression
|
||||||
|
nullptr output
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(short_literal_compile, single_char) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_1_CHAR, 1);
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_short_literal_single_char");
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_compile, general) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_short_literal_general");
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_compile, max_length) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_8_CHAR, 8);
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_short_literal_max_len");
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_compile, too_long) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_10_CHAR, 10);
|
||||||
|
EXPECT_COMPILE_FAILURE("test_compile_short_literal_too_long");
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_compile, null_char) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_WITH_NULL, 5);
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_short_literal_null_char");
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(short_literal_compile, empty_pattern) {
|
||||||
|
hs_short_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
EXPECT_DEATH(
|
||||||
|
hs_compile_short_literal_search(PATTERN_0_CHAR, 0, &database),
|
||||||
|
"called with an empty pattern");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_compile, nullptr_pattern) {
|
||||||
|
hs_short_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
EXPECT_DEATH(hs_compile_short_literal_search(nullptr, 5, &database),
|
||||||
|
"called with nullptr");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_compile, nullptr_database) {
|
||||||
|
EXPECT_DEATH(hs_compile_short_literal_search(PATTERN_5_CHAR, 5, nullptr),
|
||||||
|
"called with nullptr");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// ------------------------search tests-----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_short_literal_search
|
||||||
|
general pattern
|
||||||
|
match at start
|
||||||
|
match middle (general)
|
||||||
|
match index 15 (noodle cross over vector)
|
||||||
|
match at end
|
||||||
|
match the full pattern, not just the first pair
|
||||||
|
match past end (2 char ok, then end, so missing some chars)
|
||||||
|
bad caseness
|
||||||
|
search several times
|
||||||
|
single char pattern
|
||||||
|
general match
|
||||||
|
match at end
|
||||||
|
no match
|
||||||
|
buffer containing null char
|
||||||
|
pattern with null char
|
||||||
|
general pattern
|
||||||
|
buff size 0
|
||||||
|
nullptr pattern
|
||||||
|
nullptr buffer
|
||||||
|
nullptr callback
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(short_literal_search, start) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_start");
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_general");
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, cross_vector) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_cross_vector");
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, end) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_end");
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_ABCDE_END_27, EXPR_NOISE_LEN, 1, (27));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, past_end) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_past_end");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_ABCDE_END_27, EXPR_NOISE_LEN - 3, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, short_no_match) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_short_no_match");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_AB, EXPR_NOISE_LEN, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, bad_case) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_bad_case");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, several_search) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_several_search");
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
// cppcheck-suppress redundantAssignment
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, single_char) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_1_CHAR, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_single_char");
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, single_char_end) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_1_CHAR, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_single_char_end");
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 1, (30));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, single_char_no_match) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_1_CHAR, 1);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_single_char_no_match");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE, EXPR_NOISE_LEN, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, null_char_buff_and_pattern) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_WITH_NULL, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS(
|
||||||
|
"test_short_literal_search_null_char_buff_and_pattern");
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, null_char_buff) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_null_char_buff");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, empty_buff) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_empty_buff");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_SHORT_LITERAL("", 0, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_short_literal_search", pattern, buffer);
|
||||||
|
hs_free_short_literal_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(short_literal_search, nullptr_pattern) {
|
||||||
|
const hs_short_literal_compiled_pattern_t *database = nullptr;
|
||||||
|
context_t context;
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
const char *buffer;
|
||||||
|
hs_error_t ret;
|
||||||
|
size_t pattern_len = 5;
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_SHORT_LITERAL(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, ());
|
||||||
|
},
|
||||||
|
"called with nullptr database");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, nullptr_buffer) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_nullptr_buffer");
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_SHORT_LITERAL(nullptr, EXPR_NOISE_LEN, 0, ());
|
||||||
|
},
|
||||||
|
"called with nullptr buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(short_literal_search, nullptr_callback) {
|
||||||
|
COMPILE_SHORT_LITERAL(PATTERN_5_CHAR, 5);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_short_literal_search_nullptr_callback");
|
||||||
|
|
||||||
|
buffer = EXPR_NOISE_5;
|
||||||
|
const size_t buffer_len = EXPR_NOISE_LEN;
|
||||||
|
const size_t expected_match = 1;
|
||||||
|
size_t expected_start_array[expected_match] = {5};
|
||||||
|
size_t expected_end_array[expected_match] = {5};
|
||||||
|
for (size_t i = 0; i < expected_match; i++) {
|
||||||
|
expected_end_array[i] += pattern_len;
|
||||||
|
}
|
||||||
|
context.expected_start_array = expected_start_array;
|
||||||
|
context.expected_end_array = expected_end_array;
|
||||||
|
context.array_size = expected_match;
|
||||||
|
context.number_matched = 0;
|
||||||
|
context.number_wrong = 0;
|
||||||
|
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
hs_short_literal_search(database, buffer, buffer_len, nullptr,
|
||||||
|
&context);
|
||||||
|
},
|
||||||
|
"called with nullptr callback");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
293
unit/direct_API/single_char.cpp
Normal file
293
unit/direct_API/single_char.cpp
Normal file
@ -0,0 +1,293 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#include "hwlm/noodle_internal.h"
|
||||||
|
|
||||||
|
#define COMPILE_SINGLE_CHAR(in_pattern) \
|
||||||
|
const char pattern = *(in_pattern); \
|
||||||
|
hs_single_char_compiled_pattern_t *database = nullptr; \
|
||||||
|
hs_error_t compile_ret = hs_compile_single_char_search(pattern, &database);\
|
||||||
|
hs_error_t ret = 0; \
|
||||||
|
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
|
||||||
|
const char *buffer = nullptr; \
|
||||||
|
(void)buffer; \
|
||||||
|
context_t context = {}; \
|
||||||
|
(void) context;
|
||||||
|
|
||||||
|
// expected match array here is the index of the start of match.
|
||||||
|
#define SEARCH_SINGLE_CHAR(in_buffer, in_buffer_len, in_expected_match, \
|
||||||
|
in_expected_start_array) \
|
||||||
|
{ \
|
||||||
|
buffer = (in_buffer); \
|
||||||
|
const size_t buffer_len = (in_buffer_len); \
|
||||||
|
const size_t expected_match = (in_expected_match); \
|
||||||
|
size_t expected_start_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_end_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_id_array[expected_match]; \
|
||||||
|
for (size_t i = 0; i < expected_match; i++) { \
|
||||||
|
expected_end_array[i] += 1; \
|
||||||
|
expected_id_array[i] = 0; \
|
||||||
|
} \
|
||||||
|
context.expected_start_array = expected_start_array; \
|
||||||
|
context.expected_end_array = expected_end_array; \
|
||||||
|
context.expected_id_array = expected_id_array; \
|
||||||
|
context.array_size = expected_match; \
|
||||||
|
context.number_matched = 0; \
|
||||||
|
context.number_wrong = 0; \
|
||||||
|
\
|
||||||
|
ret = hs_single_char_search(database, buffer, buffer_len, callback, \
|
||||||
|
&context); \
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------free tests-------------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_free_single_char_pattern
|
||||||
|
nullptr
|
||||||
|
general
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(single_char_free, nullptr) {
|
||||||
|
hs_single_char_compiled_pattern_t *database = nullptr;
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_free, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
truffle_storage *clear_database = reinterpret_cast<truffle_storage *>(
|
||||||
|
test_malloc(sizeof(truffle_storage)));
|
||||||
|
hs_single_char_compiled_pattern_t *database =
|
||||||
|
reinterpret_cast<hs_single_char_compiled_pattern_t*>(clear_database);
|
||||||
|
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------compile tests----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_compile_single_char_search
|
||||||
|
general (1 char)
|
||||||
|
null char pattern
|
||||||
|
|
||||||
|
nullptr output
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(single_char_compile, general) {
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR)
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_single_char_general")
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_compile, null_char) {
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR)
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_single_char_null_char")
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(single_char_compile, nullptr_database) {
|
||||||
|
EXPECT_DEATH(hs_compile_single_char_search(*PATTERN_1_CHAR, nullptr),
|
||||||
|
"called with nullptr");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// ------------------------search tests-----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_single_char_search
|
||||||
|
general pattern
|
||||||
|
match at start
|
||||||
|
match middle (general)
|
||||||
|
match vector end
|
||||||
|
match at buffer end
|
||||||
|
match past end
|
||||||
|
bad caseness
|
||||||
|
search several times
|
||||||
|
buffer containing null char
|
||||||
|
null char pattern
|
||||||
|
general pattern
|
||||||
|
buff size 0
|
||||||
|
nullptr pattern
|
||||||
|
nullptr buffer
|
||||||
|
nullptr callback
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(single_char_search, start) {
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_search_start");
|
||||||
|
SEARCH_SINGLE_CHAR(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_search, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_search_general");
|
||||||
|
SEARCH_SINGLE_CHAR(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_search, end_vector) {
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_search_end_vector");
|
||||||
|
SEARCH_SINGLE_CHAR(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_search, end) {
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_search_end");
|
||||||
|
SEARCH_SINGLE_CHAR(EXPR_NOISE_A_END_31, EXPR_NOISE_LEN, 1, (31));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_search, past_end) {
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_search_past_end");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_SINGLE_CHAR(EXPR_NOISE_A_END_31, EXPR_NOISE_LEN - 1, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_search, bad_case) {
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_search_bad_case");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_SINGLE_CHAR(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_search, several_search) {
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_search_several_search");
|
||||||
|
SEARCH_SINGLE_CHAR(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
|
||||||
|
SEARCH_SINGLE_CHAR(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_search, null_char_buff_and_pattern) {
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR_NULL);
|
||||||
|
ASSERT_COMPILE_SUCCESS(
|
||||||
|
"test_single_char_search_null_char_buff_and_pattern");
|
||||||
|
SEARCH_SINGLE_CHAR(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (6));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_search, null_char_buff) {
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_search_null_char_buff");
|
||||||
|
SEARCH_SINGLE_CHAR(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_search, empty_buff) {
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_search_empty_buff");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_SINGLE_CHAR("", 0, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(single_char_search, nullptr_pattern) {
|
||||||
|
const hs_single_char_compiled_pattern_t *database = nullptr;
|
||||||
|
context_t context;
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
const char *buffer;
|
||||||
|
hs_error_t ret;
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_SINGLE_CHAR(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, ());
|
||||||
|
},
|
||||||
|
"called with nullptr database");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_search, nullptr_buffer) {
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_search_nullptr_buffer");
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_SINGLE_CHAR(nullptr, EXPR_NOISE_LEN, 0, ());
|
||||||
|
},
|
||||||
|
"called with nullptr buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_search, nullptr_callback) {
|
||||||
|
COMPILE_SINGLE_CHAR(PATTERN_1_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_search_nullptr_callback");
|
||||||
|
|
||||||
|
buffer = EXPR_NOISE_5;
|
||||||
|
const size_t buffer_len = EXPR_NOISE_LEN;
|
||||||
|
const size_t expected_match = 1;
|
||||||
|
size_t expected_start_array[expected_match] = {5};
|
||||||
|
size_t expected_end_array[expected_match] = {5};
|
||||||
|
for (size_t i = 0; i < expected_match; i++) {
|
||||||
|
expected_end_array[i] += 1;
|
||||||
|
}
|
||||||
|
context.expected_start_array = expected_start_array;
|
||||||
|
context.expected_end_array = expected_end_array;
|
||||||
|
context.array_size = expected_match;
|
||||||
|
context.number_matched = 0;
|
||||||
|
context.number_wrong = 0;
|
||||||
|
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
hs_single_char_search(database, buffer, buffer_len, nullptr,
|
||||||
|
&context);
|
||||||
|
},
|
||||||
|
"called with nullptr callback");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
303
unit/direct_API/single_char_pair.cpp
Normal file
303
unit/direct_API/single_char_pair.cpp
Normal file
@ -0,0 +1,303 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024-2025, Arm ltd
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#include "hwlm/noodle_internal.h"
|
||||||
|
|
||||||
|
#define COMPILE_SINGLE_CHAR_PAIR(in_pattern) \
|
||||||
|
const char *pattern = (in_pattern); \
|
||||||
|
hs_single_char_pair_compiled_pattern_t *database = nullptr; \
|
||||||
|
hs_error_t compile_ret = \
|
||||||
|
hs_compile_single_char_pair_search(pattern, &database); \
|
||||||
|
hs_error_t ret = 0; \
|
||||||
|
(void)ret; /* suppress a cppcheck warning when SEARCH is not called */ \
|
||||||
|
const char *buffer = nullptr; \
|
||||||
|
(void)buffer; \
|
||||||
|
context_t context = {}; \
|
||||||
|
(void) context;
|
||||||
|
|
||||||
|
// expected match array here is the index of the start of match.
|
||||||
|
#define SEARCH_SINGLE_CHAR_PAIR(in_buffer, in_buffer_len, in_expected_match, \
|
||||||
|
in_expected_start_array) \
|
||||||
|
{ \
|
||||||
|
buffer = (in_buffer); \
|
||||||
|
const size_t buffer_len = (in_buffer_len); \
|
||||||
|
const size_t expected_match = (in_expected_match); \
|
||||||
|
size_t expected_start_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_end_array[expected_match] = \
|
||||||
|
BRACED_INIT_LIST in_expected_start_array; \
|
||||||
|
size_t expected_id_array[expected_match]; \
|
||||||
|
for (size_t i = 0; i < expected_match; i++) { \
|
||||||
|
expected_end_array[i] += 2; \
|
||||||
|
expected_id_array[i] = 0; \
|
||||||
|
} \
|
||||||
|
context.expected_start_array = expected_start_array; \
|
||||||
|
context.expected_end_array = expected_end_array; \
|
||||||
|
context.expected_id_array = expected_id_array; \
|
||||||
|
context.array_size = expected_match; \
|
||||||
|
context.number_matched = 0; \
|
||||||
|
context.number_wrong = 0; \
|
||||||
|
\
|
||||||
|
ret = hs_single_char_pair_search(database, buffer, buffer_len, \
|
||||||
|
callback, &context); \
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------free tests-------------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_free_single_char_pair_pattern
|
||||||
|
nullptr
|
||||||
|
general
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(single_char_pair_free, nullptr) {
|
||||||
|
hs_single_char_pair_compiled_pattern_t *database = nullptr;
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_free, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
noodTable *clear_database =
|
||||||
|
reinterpret_cast<noodTable *>(test_malloc(sizeof(noodTable)));
|
||||||
|
hs_single_char_pair_compiled_pattern_t *database =
|
||||||
|
reinterpret_cast<hs_single_char_pair_compiled_pattern_t*>(
|
||||||
|
clear_database);
|
||||||
|
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------compile tests----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_compile_single_char_pair_search
|
||||||
|
general (2 char)
|
||||||
|
valid pattern including null char
|
||||||
|
|
||||||
|
nullptr expression
|
||||||
|
nullptr output
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(single_char_pair_compile, general) {
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR)
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_single_char_pair_general")
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_compile, with_null_char) {
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_WITH_NULL)
|
||||||
|
EXPECT_COMPILE_SUCCESS("test_compile_single_char_pair_with_null_char")
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(single_char_pair_compile, nullptr_pattern) {
|
||||||
|
hs_single_char_pair_compiled_pattern_t *database = nullptr;
|
||||||
|
EXPECT_DEATH(hs_compile_single_char_pair_search(nullptr, &database),
|
||||||
|
"called with nullptr");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_compile, nullptr_database) {
|
||||||
|
EXPECT_DEATH(hs_compile_single_char_pair_search(PATTERN_5_CHAR, nullptr),
|
||||||
|
"called with nullptr");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// ------------------------search tests-----------------------------------------
|
||||||
|
|
||||||
|
/*
|
||||||
|
hs_single_char_pair_search
|
||||||
|
general pattern
|
||||||
|
match at start
|
||||||
|
match middle (general)
|
||||||
|
match index 15 (cross over vector)
|
||||||
|
match at end
|
||||||
|
match past end (1 char ok, then end, so missing one chars)
|
||||||
|
bad caseness
|
||||||
|
search several times
|
||||||
|
buffer containing null char
|
||||||
|
pattern with null char
|
||||||
|
general pattern
|
||||||
|
buff size 0
|
||||||
|
nullptr pattern
|
||||||
|
nullptr buffer
|
||||||
|
nullptr callback
|
||||||
|
*/
|
||||||
|
|
||||||
|
TEST(single_char_pair_search, start) {
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_start");
|
||||||
|
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_0, EXPR_NOISE_LEN, 1, (0));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_search, general) {
|
||||||
|
SETUP_MEM_LEAK_TEST();
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_general");
|
||||||
|
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
EXPECT_MEMORY_CLEAN();
|
||||||
|
UNSET_MEM_LEAK_TEST();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_search, cross_vector) {
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_cross_vector");
|
||||||
|
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_search, end) {
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_end");
|
||||||
|
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN, 1, (30));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_search, past_end) {
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_past_end");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_AB_END_30, EXPR_NOISE_LEN - 1, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_search, bad_case) {
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_bad_case");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_15_BAD_CASE, EXPR_NOISE_LEN, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_search, several_search) {
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_several_search");
|
||||||
|
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
|
||||||
|
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_15, EXPR_NOISE_LEN, 2, (5, 15));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_search, null_char_buff_and_pattern) {
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_WITH_NULL);
|
||||||
|
ASSERT_COMPILE_SUCCESS(
|
||||||
|
"test_single_char_pair_search_null_char_buff_and_pattern");
|
||||||
|
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 1, (5));
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_search, null_char_buff) {
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_null_char_buff");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5_NULL, EXPR_NOISE_LEN, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_search, empty_buff) {
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_empty_buff");
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
SEARCH_SINGLE_CHAR_PAIR("", 0, 0, ());
|
||||||
|
EXPECT_SEARCH_SUCCESS("hs_single_char_pair_search", pattern, buffer);
|
||||||
|
hs_free_single_char_pair_pattern(database);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(RELEASE_BUILD)
|
||||||
|
// test asserts
|
||||||
|
|
||||||
|
TEST(single_char_pair_search, nullptr_pattern) {
|
||||||
|
const hs_single_char_pair_compiled_pattern_t *database = nullptr;
|
||||||
|
context_t context;
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
const char *buffer;
|
||||||
|
hs_error_t ret;
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_SINGLE_CHAR_PAIR(EXPR_NOISE_5, EXPR_NOISE_LEN, 0, ());
|
||||||
|
},
|
||||||
|
"called with nullptr database");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_search, nullptr_buffer) {
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_nullptr_buffer");
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
SEARCH_SINGLE_CHAR_PAIR(nullptr, EXPR_NOISE_LEN, 0, ());
|
||||||
|
},
|
||||||
|
"called with nullptr buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(single_char_pair_search, nullptr_callback) {
|
||||||
|
COMPILE_SINGLE_CHAR_PAIR(PATTERN_2_CHAR);
|
||||||
|
ASSERT_COMPILE_SUCCESS("test_single_char_pair_search_nullptr_callback");
|
||||||
|
|
||||||
|
buffer = EXPR_NOISE_5;
|
||||||
|
const size_t buffer_len = EXPR_NOISE_LEN;
|
||||||
|
const size_t expected_match = 1;
|
||||||
|
size_t expected_start_array[expected_match] = {5};
|
||||||
|
size_t expected_end_array[expected_match] = {5};
|
||||||
|
for (size_t i = 0; i < expected_match; i++) {
|
||||||
|
expected_end_array[i] += 2;
|
||||||
|
}
|
||||||
|
context.expected_start_array = expected_start_array;
|
||||||
|
context.expected_end_array = expected_end_array;
|
||||||
|
context.array_size = expected_match;
|
||||||
|
context.number_matched = 0;
|
||||||
|
context.number_wrong = 0;
|
||||||
|
|
||||||
|
EXPECT_DEATH(
|
||||||
|
{
|
||||||
|
hs_single_char_pair_search(database, buffer, buffer_len, nullptr,
|
||||||
|
&context);
|
||||||
|
},
|
||||||
|
"called with nullptr callback");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
Loading…
x
Reference in New Issue
Block a user