mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch 'github_develop' into github_master
This commit is contained in:
commit
c00683d739
23
CHANGELOG.md
23
CHANGELOG.md
@ -2,6 +2,29 @@
|
|||||||
|
|
||||||
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
||||||
|
|
||||||
|
## [5.3.0] 2020-05-15
|
||||||
|
- Improvement on literal matcher "Teddy" performance, including support for
|
||||||
|
Intel(R) AVX-512 Vector Byte Manipulation Instructions (Intel(R) AVX-512
|
||||||
|
VBMI).
|
||||||
|
- Improvement on single-byte/two-byte matching performance, including support
|
||||||
|
for Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512).
|
||||||
|
- hsbench: add hyphen support for -T option.
|
||||||
|
- tools/fuzz: add test scripts for synthetic pattern generation.
|
||||||
|
- Bugfix for acceleration path analysis in LimEx NFA.
|
||||||
|
- Bugfix for duplicate matches for Small-write engine.
|
||||||
|
- Bugfix for UTF8 checking problem for hscollider.
|
||||||
|
- Bugfix for issue #205: avoid crash of `hs_compile_lit_multi()` with clang and
|
||||||
|
ASAN.
|
||||||
|
- Bugfix for issue #211: fix error in `db_check_platform()` function.
|
||||||
|
- Bugfix for issue #217: fix cmake parsing issue of CPU arch for non-English
|
||||||
|
locale.
|
||||||
|
- Bugfix for issue #228: avoid undefined behavior when calling `close()` after
|
||||||
|
`fdopendir()` in `loadExpressions()`.
|
||||||
|
- Bugfix for issue #239: fix hyperscan compile issue under gcc-10.
|
||||||
|
- Add VLAN packets processing capability in pcap analysis script. (#214)
|
||||||
|
- Avoid extra convert instruction for "Noodle". (#221)
|
||||||
|
- Add Hyperscan version marcro in `hs.h`. (#222)
|
||||||
|
|
||||||
## [5.2.1] 2019-10-13
|
## [5.2.1] 2019-10-13
|
||||||
- Bugfix for issue #186: fix compile issue when `BUILD_SHARED_LIBS` is on in
|
- Bugfix for issue #186: fix compile issue when `BUILD_SHARED_LIBS` is on in
|
||||||
release mode.
|
release mode.
|
||||||
|
@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11)
|
|||||||
project (hyperscan C CXX)
|
project (hyperscan C CXX)
|
||||||
|
|
||||||
set (HS_MAJOR_VERSION 5)
|
set (HS_MAJOR_VERSION 5)
|
||||||
set (HS_MINOR_VERSION 2)
|
set (HS_MINOR_VERSION 3)
|
||||||
set (HS_PATCH_VERSION 1)
|
set (HS_PATCH_VERSION 0)
|
||||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||||
|
|
||||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||||
@ -187,9 +187,9 @@ else()
|
|||||||
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
|
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
|
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
|
||||||
OUTPUT_VARIABLE _GCC_OUTPUT)
|
OUTPUT_VARIABLE _GCC_OUTPUT)
|
||||||
string(FIND "${_GCC_OUTPUT}" "Known" POS)
|
string(FIND "${_GCC_OUTPUT}" "march" POS)
|
||||||
string(SUBSTRING "${_GCC_OUTPUT}" 0 ${POS} _GCC_OUTPUT)
|
string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
|
||||||
string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1"
|
string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1"
|
||||||
GNUCC_ARCH "${_GCC_OUTPUT}")
|
GNUCC_ARCH "${_GCC_OUTPUT}")
|
||||||
|
|
||||||
# test the parsed flag
|
# test the parsed flag
|
||||||
@ -326,7 +326,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|||||||
set (FAT_RUNTIME_REQUISITES TRUE)
|
set (FAT_RUNTIME_REQUISITES TRUE)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitecures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF)
|
CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitectures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
include (${CMAKE_MODULE_PATH}/arch.cmake)
|
include (${CMAKE_MODULE_PATH}/arch.cmake)
|
||||||
@ -340,7 +340,7 @@ if (NOT WIN32)
|
|||||||
set(C_FLAGS_TO_CHECK
|
set(C_FLAGS_TO_CHECK
|
||||||
# Variable length arrays are way bad, most especially at run time
|
# Variable length arrays are way bad, most especially at run time
|
||||||
"-Wvla"
|
"-Wvla"
|
||||||
# Pointer arith on void pointers is doing it wong.
|
# Pointer arith on void pointers is doing it wrong.
|
||||||
"-Wpointer-arith"
|
"-Wpointer-arith"
|
||||||
# Build our C code with -Wstrict-prototypes -Wmissing-prototypes
|
# Build our C code with -Wstrict-prototypes -Wmissing-prototypes
|
||||||
"-Wstrict-prototypes"
|
"-Wstrict-prototypes"
|
||||||
@ -383,7 +383,7 @@ if (CC_PAREN_EQUALITY)
|
|||||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-parentheses-equality")
|
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-parentheses-equality")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# clang compains about unused const vars in our Ragel-generated code.
|
# clang complains about unused const vars in our Ragel-generated code.
|
||||||
CHECK_CXX_COMPILER_FLAG("-Wunused-const-variable" CXX_UNUSED_CONST_VAR)
|
CHECK_CXX_COMPILER_FLAG("-Wunused-const-variable" CXX_UNUSED_CONST_VAR)
|
||||||
if (CXX_UNUSED_CONST_VAR)
|
if (CXX_UNUSED_CONST_VAR)
|
||||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
|
||||||
@ -418,6 +418,12 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-local-typedefs" CXX_UNUSED_LOCAL_TYPEDEFS)
|
|||||||
# gcc5 complains about this
|
# gcc5 complains about this
|
||||||
CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
|
CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
|
||||||
|
|
||||||
|
# gcc 10 complains about this
|
||||||
|
CHECK_C_COMPILER_FLAG("-Wstringop-overflow" CC_STRINGOP_OVERFLOW)
|
||||||
|
if(CC_STRINGOP_OVERFLOW)
|
||||||
|
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-stringop-overflow")
|
||||||
|
endif()
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
|
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
|
||||||
|
@ -58,6 +58,18 @@ int main(){
|
|||||||
(void)_mm512_abs_epi8(z);
|
(void)_mm512_abs_epi8(z);
|
||||||
}" HAVE_AVX512)
|
}" HAVE_AVX512)
|
||||||
|
|
||||||
|
# and now for AVX512VBMI
|
||||||
|
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
|
||||||
|
#if !defined(__AVX512VBMI__)
|
||||||
|
#error no avx512vbmi
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(){
|
||||||
|
__m512i a = _mm512_set1_epi8(0xFF);
|
||||||
|
__m512i idx = _mm512_set_epi64(3ULL, 2ULL, 1ULL, 0ULL, 7ULL, 6ULL, 5ULL, 4ULL);
|
||||||
|
(void)_mm512_permutexvar_epi8(idx, a);
|
||||||
|
}" HAVE_AVX512VBMI)
|
||||||
|
|
||||||
if (FAT_RUNTIME)
|
if (FAT_RUNTIME)
|
||||||
if (NOT HAVE_SSSE3)
|
if (NOT HAVE_SSSE3)
|
||||||
message(FATAL_ERROR "SSSE3 support required to build fat runtime")
|
message(FATAL_ERROR "SSSE3 support required to build fat runtime")
|
||||||
|
@ -55,7 +55,7 @@ Hyperscan provides support for targeting a database at a particular CPU
|
|||||||
platform; see :ref:`instr_specialization` for details.
|
platform; see :ref:`instr_specialization` for details.
|
||||||
|
|
||||||
=====================
|
=====================
|
||||||
Compile Pure Literals
|
Compile Pure Literals
|
||||||
=====================
|
=====================
|
||||||
|
|
||||||
Pure literal is a special case of regular expression. A character sequence is
|
Pure literal is a special case of regular expression. A character sequence is
|
||||||
@ -75,12 +75,12 @@ characters exist in regular grammer like ``[``, ``]``, ``(``, ``)``, ``{``,
|
|||||||
While in pure literal case, all these meta characters lost extra meanings
|
While in pure literal case, all these meta characters lost extra meanings
|
||||||
expect for that they are just common ASCII codes.
|
expect for that they are just common ASCII codes.
|
||||||
|
|
||||||
Hyperscan is initially designed to process common regualr expressions. It is
|
Hyperscan is initially designed to process common regular expressions. It is
|
||||||
hence embedded with a complex parser to do comprehensive regular grammer
|
hence embedded with a complex parser to do comprehensive regular grammer
|
||||||
interpretion. Particularly, the identification of above meta characters is the
|
interpretion. Particularly, the identification of above meta characters is the
|
||||||
basic step for the interpretion of far more complex regular grammers.
|
basic step for the interpretion of far more complex regular grammers.
|
||||||
|
|
||||||
However in real cases, patterns may not always be regualr expressions. They
|
However in real cases, patterns may not always be regular expressions. They
|
||||||
could just be pure literals. Problem will come if the pure literals contain
|
could just be pure literals. Problem will come if the pure literals contain
|
||||||
regular meta characters. Supposing fed directly into traditional Hyperscan
|
regular meta characters. Supposing fed directly into traditional Hyperscan
|
||||||
compile API, all these meta characters will be interpreted in predefined ways,
|
compile API, all these meta characters will be interpreted in predefined ways,
|
||||||
@ -98,7 +98,7 @@ In ``v5.2.0``, Hyperscan introduces 2 new compile APIs for pure literal patterns
|
|||||||
#. :c:func:`hs_compile_lit_multi`: compiles an array of pure literals into a
|
#. :c:func:`hs_compile_lit_multi`: compiles an array of pure literals into a
|
||||||
pattern database. All of the supplied patterns will be scanned for
|
pattern database. All of the supplied patterns will be scanned for
|
||||||
concurrently at scan time, with user-supplied identifiers returned when they
|
concurrently at scan time, with user-supplied identifiers returned when they
|
||||||
match.
|
match.
|
||||||
|
|
||||||
These 2 APIs are designed for use cases where all patterns contained in the
|
These 2 APIs are designed for use cases where all patterns contained in the
|
||||||
target rule set are pure literals. Users can pass the initial pure literal
|
target rule set are pure literals. Users can pass the initial pure literal
|
||||||
@ -110,8 +110,8 @@ Hyperscan needs to locate the end position of the input expression via clearly
|
|||||||
knowing each literal's length, not by simply identifying character ``\0`` of a
|
knowing each literal's length, not by simply identifying character ``\0`` of a
|
||||||
string.
|
string.
|
||||||
|
|
||||||
Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_MULTILINE`,
|
Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_SINGLEMATCH`,
|
||||||
:c:member:`HS_FLAG_SINGLEMATCH`, :c:member:`HS_FLAG_SOM_LEFTMOST`.
|
:c:member:`HS_FLAG_SOM_LEFTMOST`.
|
||||||
|
|
||||||
.. note:: We don't support literal compilation API with :ref:`extparam`. And
|
.. note:: We don't support literal compilation API with :ref:`extparam`. And
|
||||||
for runtime implementation, traditional runtime APIs can still be
|
for runtime implementation, traditional runtime APIs can still be
|
||||||
|
@ -260,7 +260,7 @@ instead of potentially executing illegal instructions. The API function
|
|||||||
:c:func:`hs_valid_platform` can be used by application writers to determine if
|
:c:func:`hs_valid_platform` can be used by application writers to determine if
|
||||||
the current platform is supported by Hyperscan.
|
the current platform is supported by Hyperscan.
|
||||||
|
|
||||||
At of this release, the variants of the runtime that are built, and the CPU
|
As of this release, the variants of the runtime that are built, and the CPU
|
||||||
capability that is required, are the following:
|
capability that is required, are the following:
|
||||||
|
|
||||||
+----------+-------------------------------+---------------------------+
|
+----------+-------------------------------+---------------------------+
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -125,7 +125,7 @@ ParsedLitExpression::ParsedLitExpression(unsigned index_in,
|
|||||||
: expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false,
|
: expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false,
|
||||||
SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) {
|
SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) {
|
||||||
// For pure literal expression, below 'HS_FLAG_'s are unuseful:
|
// For pure literal expression, below 'HS_FLAG_'s are unuseful:
|
||||||
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET
|
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET/MULTILINE
|
||||||
|
|
||||||
if (flags & ~HS_FLAG_ALL) {
|
if (flags & ~HS_FLAG_ALL) {
|
||||||
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
|
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
|
||||||
@ -402,19 +402,18 @@ void addLitExpression(NG &ng, unsigned index, const char *expression,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Ensure that our pattern isn't too long (in characters).
|
// Ensure that our pattern isn't too long (in characters).
|
||||||
if (strlen(expression) > cc.grey.limitPatternLength) {
|
if (expLength > cc.grey.limitPatternLength) {
|
||||||
throw CompileError("Pattern length exceeds limit.");
|
throw CompileError("Pattern length exceeds limit.");
|
||||||
}
|
}
|
||||||
|
|
||||||
// filter out flags not supported by pure literal API.
|
// filter out flags not supported by pure literal API.
|
||||||
u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
|
u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
|
||||||
HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION |
|
HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION |
|
||||||
HS_FLAG_QUIET;
|
HS_FLAG_QUIET | HS_FLAG_MULTILINE;
|
||||||
|
|
||||||
if (flags & not_supported) {
|
if (flags & not_supported) {
|
||||||
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, "
|
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_SINGLEMATCH and "
|
||||||
"HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are "
|
"HS_FLAG_SOM_LEFTMOST are supported in literal API.");
|
||||||
"supported in literal API.");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This expression must be a pure literal, we can build ue2_literal
|
// This expression must be a pure literal, we can build ue2_literal
|
||||||
|
@ -114,8 +114,8 @@ hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes,
|
|||||||
static
|
static
|
||||||
hs_error_t db_check_platform(const u64a p) {
|
hs_error_t db_check_platform(const u64a p) {
|
||||||
if (p != hs_current_platform
|
if (p != hs_current_platform
|
||||||
&& p != hs_current_platform_no_avx2
|
&& p != (hs_current_platform | hs_current_platform_no_avx2)
|
||||||
&& p != hs_current_platform_no_avx512) {
|
&& p != (hs_current_platform | hs_current_platform_no_avx512)) {
|
||||||
return HS_DB_PLATFORM_ERROR;
|
return HS_DB_PLATFORM_ERROR;
|
||||||
}
|
}
|
||||||
// passed all checks
|
// passed all checks
|
||||||
|
305
src/fdr/teddy.c
305
src/fdr/teddy.c
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -74,6 +74,30 @@ const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
|||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI) // VBMI strong teddy
|
||||||
|
|
||||||
|
#define CONF_CHUNK_64(chunk, bucket, off, reason, pt, conf_fn) \
|
||||||
|
do { \
|
||||||
|
if (unlikely(chunk != ones_u64a)) { \
|
||||||
|
chunk = ~chunk; \
|
||||||
|
conf_fn(&chunk, bucket, off, confBase, reason, a, pt, \
|
||||||
|
&control, &last_match); \
|
||||||
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define CONF_CHUNK_32(chunk, bucket, off, reason, pt, conf_fn) \
|
||||||
|
do { \
|
||||||
|
if (unlikely(chunk != ones_u32)) { \
|
||||||
|
chunk = ~chunk; \
|
||||||
|
conf_fn(&chunk, bucket, off, confBase, reason, a, pt, \
|
||||||
|
&control, &last_match); \
|
||||||
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
#define CONF_CHUNK_64(chunk, bucket, off, reason, conf_fn) \
|
#define CONF_CHUNK_64(chunk, bucket, off, reason, conf_fn) \
|
||||||
do { \
|
do { \
|
||||||
if (unlikely(chunk != ones_u64a)) { \
|
if (unlikely(chunk != ones_u64a)) { \
|
||||||
@ -94,7 +118,284 @@ do { \
|
|||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
#if defined(HAVE_AVX512) // AVX512 reinforced teddy
|
#endif
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI) // VBMI strong teddy
|
||||||
|
|
||||||
|
#ifdef ARCH_64_BIT
|
||||||
|
#define CONFIRM_TEDDY(var, bucket, offset, reason, pt, conf_fn) \
|
||||||
|
do { \
|
||||||
|
if (unlikely(diff512(var, ones512()))) { \
|
||||||
|
m128 p128_0 = extract128from512(var, 0); \
|
||||||
|
m128 p128_1 = extract128from512(var, 1); \
|
||||||
|
m128 p128_2 = extract128from512(var, 2); \
|
||||||
|
m128 p128_3 = extract128from512(var, 3); \
|
||||||
|
u64a part1 = movq(p128_0); \
|
||||||
|
u64a part2 = movq(rshiftbyte_m128(p128_0, 8)); \
|
||||||
|
u64a part3 = movq(p128_1); \
|
||||||
|
u64a part4 = movq(rshiftbyte_m128(p128_1, 8)); \
|
||||||
|
u64a part5 = movq(p128_2); \
|
||||||
|
u64a part6 = movq(rshiftbyte_m128(p128_2, 8)); \
|
||||||
|
u64a part7 = movq(p128_3); \
|
||||||
|
u64a part8 = movq(rshiftbyte_m128(p128_3, 8)); \
|
||||||
|
CONF_CHUNK_64(part1, bucket, offset, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_64(part2, bucket, offset + 8, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_64(part3, bucket, offset + 16, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_64(part4, bucket, offset + 24, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_64(part5, bucket, offset + 32, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_64(part6, bucket, offset + 40, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_64(part7, bucket, offset + 48, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_64(part8, bucket, offset + 56, reason, pt, conf_fn); \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
#else
|
||||||
|
#define CONFIRM_TEDDY(var, bucket, offset, reason, pt, conf_fn) \
|
||||||
|
do { \
|
||||||
|
if (unlikely(diff512(var, ones512()))) { \
|
||||||
|
m128 p128_0 = extract128from512(var, 0); \
|
||||||
|
m128 p128_1 = extract128from512(var, 1); \
|
||||||
|
m128 p128_2 = extract128from512(var, 2); \
|
||||||
|
m128 p128_3 = extract128from512(var, 3); \
|
||||||
|
u32 part1 = movd(p128_0); \
|
||||||
|
u32 part2 = movd(rshiftbyte_m128(p128_0, 4)); \
|
||||||
|
u32 part3 = movd(rshiftbyte_m128(p128_0, 8)); \
|
||||||
|
u32 part4 = movd(rshiftbyte_m128(p128_0, 12)); \
|
||||||
|
u32 part5 = movd(p128_1); \
|
||||||
|
u32 part6 = movd(rshiftbyte_m128(p128_1, 4)); \
|
||||||
|
u32 part7 = movd(rshiftbyte_m128(p128_1, 8)); \
|
||||||
|
u32 part8 = movd(rshiftbyte_m128(p128_1, 12)); \
|
||||||
|
u32 part9 = movd(p128_2); \
|
||||||
|
u32 part10 = movd(rshiftbyte_m128(p128_2, 4)); \
|
||||||
|
u32 part11 = movd(rshiftbyte_m128(p128_2, 8)); \
|
||||||
|
u32 part12 = movd(rshiftbyte_m128(p128_2, 12)); \
|
||||||
|
u32 part13 = movd(p128_3); \
|
||||||
|
u32 part14 = movd(rshiftbyte_m128(p128_3, 4)); \
|
||||||
|
u32 part15 = movd(rshiftbyte_m128(p128_3, 8)); \
|
||||||
|
u32 part16 = movd(rshiftbyte_m128(p128_3, 12)); \
|
||||||
|
CONF_CHUNK_32(part1, bucket, offset, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part2, bucket, offset + 4, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part3, bucket, offset + 8, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part4, bucket, offset + 12, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part5, bucket, offset + 16, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part6, bucket, offset + 20, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part7, bucket, offset + 24, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part8, bucket, offset + 28, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part9, bucket, offset + 32, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part10, bucket, offset + 36, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part11, bucket, offset + 40, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part12, bucket, offset + 44, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part13, bucket, offset + 48, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part14, bucket, offset + 52, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part15, bucket, offset + 56, reason, pt, conf_fn); \
|
||||||
|
CONF_CHUNK_32(part16, bucket, offset + 60, reason, pt, conf_fn); \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define PREP_SHUF_MASK \
|
||||||
|
m512 lo = and512(val, *lo_mask); \
|
||||||
|
m512 hi = and512(rshift64_m512(val, 4), *lo_mask)
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_PSHUFB_OR_M1 \
|
||||||
|
m512 shuf_or_b0 = or512(pshufb_m512(dup_mask[0], lo), \
|
||||||
|
pshufb_m512(dup_mask[1], hi));
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_PSHUFB_OR_M2 \
|
||||||
|
TEDDY_VBMI_PSHUFB_OR_M1 \
|
||||||
|
m512 shuf_or_b1 = or512(pshufb_m512(dup_mask[2], lo), \
|
||||||
|
pshufb_m512(dup_mask[3], hi));
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_PSHUFB_OR_M3 \
|
||||||
|
TEDDY_VBMI_PSHUFB_OR_M2 \
|
||||||
|
m512 shuf_or_b2 = or512(pshufb_m512(dup_mask[4], lo), \
|
||||||
|
pshufb_m512(dup_mask[5], hi));
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_PSHUFB_OR_M4 \
|
||||||
|
TEDDY_VBMI_PSHUFB_OR_M3 \
|
||||||
|
m512 shuf_or_b3 = or512(pshufb_m512(dup_mask[6], lo), \
|
||||||
|
pshufb_m512(dup_mask[7], hi));
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_SL1_MASK 0xfffffffffffffffeULL
|
||||||
|
#define TEDDY_VBMI_SL2_MASK 0xfffffffffffffffcULL
|
||||||
|
#define TEDDY_VBMI_SL3_MASK 0xfffffffffffffff8ULL
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_SHIFT_M1
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_SHIFT_M2 \
|
||||||
|
TEDDY_VBMI_SHIFT_M1 \
|
||||||
|
m512 sl1 = maskz_vpermb512(TEDDY_VBMI_SL1_MASK, sl_msk[0], shuf_or_b1);
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_SHIFT_M3 \
|
||||||
|
TEDDY_VBMI_SHIFT_M2 \
|
||||||
|
m512 sl2 = maskz_vpermb512(TEDDY_VBMI_SL2_MASK, sl_msk[1], shuf_or_b2);
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_SHIFT_M4 \
|
||||||
|
TEDDY_VBMI_SHIFT_M3 \
|
||||||
|
m512 sl3 = maskz_vpermb512(TEDDY_VBMI_SL3_MASK, sl_msk[2], shuf_or_b3);
|
||||||
|
|
||||||
|
#define SHIFT_OR_M1 \
|
||||||
|
shuf_or_b0
|
||||||
|
|
||||||
|
#define SHIFT_OR_M2 \
|
||||||
|
or512(sl1, SHIFT_OR_M1)
|
||||||
|
|
||||||
|
#define SHIFT_OR_M3 \
|
||||||
|
or512(sl2, SHIFT_OR_M2)
|
||||||
|
|
||||||
|
#define SHIFT_OR_M4 \
|
||||||
|
or512(sl3, SHIFT_OR_M3)
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 prep_conf_teddy_m1(const m512 *lo_mask, const m512 *dup_mask,
|
||||||
|
UNUSED const m512 *sl_msk, const m512 val) {
|
||||||
|
PREP_SHUF_MASK;
|
||||||
|
TEDDY_VBMI_PSHUFB_OR_M1;
|
||||||
|
TEDDY_VBMI_SHIFT_M1;
|
||||||
|
return SHIFT_OR_M1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 prep_conf_teddy_m2(const m512 *lo_mask, const m512 *dup_mask,
|
||||||
|
const m512 *sl_msk, const m512 val) {
|
||||||
|
PREP_SHUF_MASK;
|
||||||
|
TEDDY_VBMI_PSHUFB_OR_M2;
|
||||||
|
TEDDY_VBMI_SHIFT_M2;
|
||||||
|
return SHIFT_OR_M2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 prep_conf_teddy_m3(const m512 *lo_mask, const m512 *dup_mask,
|
||||||
|
const m512 *sl_msk, const m512 val) {
|
||||||
|
PREP_SHUF_MASK;
|
||||||
|
TEDDY_VBMI_PSHUFB_OR_M3;
|
||||||
|
TEDDY_VBMI_SHIFT_M3;
|
||||||
|
return SHIFT_OR_M3;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 prep_conf_teddy_m4(const m512 *lo_mask, const m512 *dup_mask,
|
||||||
|
const m512 *sl_msk, const m512 val) {
|
||||||
|
PREP_SHUF_MASK;
|
||||||
|
TEDDY_VBMI_PSHUFB_OR_M4;
|
||||||
|
TEDDY_VBMI_SHIFT_M4;
|
||||||
|
return SHIFT_OR_M4;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define PREP_CONF_FN(val, n) \
|
||||||
|
prep_conf_teddy_m##n(&lo_mask, dup_mask, sl_msk, val)
|
||||||
|
|
||||||
|
const u8 ALIGN_DIRECTIVE p_sh_mask_arr[80] = {
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||||
|
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||||
|
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
|
||||||
|
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f
|
||||||
|
};
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_SL1_POS 15
|
||||||
|
#define TEDDY_VBMI_SL2_POS 14
|
||||||
|
#define TEDDY_VBMI_SL3_POS 13
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_LOAD_SHIFT_MASK_M1
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_LOAD_SHIFT_MASK_M2 \
|
||||||
|
TEDDY_VBMI_LOAD_SHIFT_MASK_M1 \
|
||||||
|
sl_msk[0] = loadu512(p_sh_mask_arr + TEDDY_VBMI_SL1_POS);
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_LOAD_SHIFT_MASK_M3 \
|
||||||
|
TEDDY_VBMI_LOAD_SHIFT_MASK_M2 \
|
||||||
|
sl_msk[1] = loadu512(p_sh_mask_arr + TEDDY_VBMI_SL2_POS);
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_LOAD_SHIFT_MASK_M4 \
|
||||||
|
TEDDY_VBMI_LOAD_SHIFT_MASK_M3 \
|
||||||
|
sl_msk[2] = loadu512(p_sh_mask_arr + TEDDY_VBMI_SL3_POS);
|
||||||
|
|
||||||
|
#define PREPARE_MASKS_1 \
|
||||||
|
dup_mask[0] = set4x128(maskBase[0]); \
|
||||||
|
dup_mask[1] = set4x128(maskBase[1]);
|
||||||
|
|
||||||
|
#define PREPARE_MASKS_2 \
|
||||||
|
PREPARE_MASKS_1 \
|
||||||
|
dup_mask[2] = set4x128(maskBase[2]); \
|
||||||
|
dup_mask[3] = set4x128(maskBase[3]);
|
||||||
|
|
||||||
|
#define PREPARE_MASKS_3 \
|
||||||
|
PREPARE_MASKS_2 \
|
||||||
|
dup_mask[4] = set4x128(maskBase[4]); \
|
||||||
|
dup_mask[5] = set4x128(maskBase[5]);
|
||||||
|
|
||||||
|
#define PREPARE_MASKS_4 \
|
||||||
|
PREPARE_MASKS_3 \
|
||||||
|
dup_mask[6] = set4x128(maskBase[6]); \
|
||||||
|
dup_mask[7] = set4x128(maskBase[7]);
|
||||||
|
|
||||||
|
#define PREPARE_MASKS(n) \
|
||||||
|
m512 lo_mask = set64x8(0xf); \
|
||||||
|
m512 dup_mask[n * 2]; \
|
||||||
|
m512 sl_msk[n - 1]; \
|
||||||
|
PREPARE_MASKS_##n \
|
||||||
|
TEDDY_VBMI_LOAD_SHIFT_MASK_M##n
|
||||||
|
|
||||||
|
#define TEDDY_VBMI_CONF_MASK_HEAD (0xffffffffffffffffULL >> n_sh)
|
||||||
|
#define TEDDY_VBMI_CONF_MASK_FULL (0xffffffffffffffffULL << n_sh)
|
||||||
|
#define TEDDY_VBMI_CONF_MASK_VAR(n) (0xffffffffffffffffULL >> (64 - n) << overlap)
|
||||||
|
#define TEDDY_VBMI_LOAD_MASK_PATCH (0xffffffffffffffffULL >> (64 - n_sh))
|
||||||
|
|
||||||
|
#define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn) \
|
||||||
|
do { \
|
||||||
|
const u8 *buf_end = a->buf + a->len; \
|
||||||
|
const u8 *ptr = a->buf + a->start_offset; \
|
||||||
|
u32 floodBackoff = FLOOD_BACKOFF_START; \
|
||||||
|
const u8 *tryFloodDetect = a->firstFloodDetect; \
|
||||||
|
u32 last_match = ones_u32; \
|
||||||
|
const struct Teddy *teddy = (const struct Teddy *)fdr; \
|
||||||
|
const size_t iterBytes = 64; \
|
||||||
|
u32 n_sh = n_msk - 1; \
|
||||||
|
const size_t loopBytes = 64 - n_sh; \
|
||||||
|
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \
|
||||||
|
a->buf, a->len, a->start_offset); \
|
||||||
|
\
|
||||||
|
const m128 *maskBase = getMaskBase(teddy); \
|
||||||
|
PREPARE_MASKS(n_msk); \
|
||||||
|
const u32 *confBase = getConfBase(teddy); \
|
||||||
|
\
|
||||||
|
u64a k = TEDDY_VBMI_CONF_MASK_FULL; \
|
||||||
|
m512 p_mask = set_mask_m512(~k); \
|
||||||
|
u32 overlap = 0; \
|
||||||
|
u64a patch = 0; \
|
||||||
|
if (likely(ptr + loopBytes <= buf_end)) { \
|
||||||
|
m512 p_mask0 = set_mask_m512(~TEDDY_VBMI_CONF_MASK_HEAD); \
|
||||||
|
m512 r_0 = PREP_CONF_FN(loadu512(ptr), n_msk); \
|
||||||
|
r_0 = or512(r_0, p_mask0); \
|
||||||
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, ptr, conf_fn); \
|
||||||
|
ptr += loopBytes; \
|
||||||
|
overlap = n_sh; \
|
||||||
|
patch = TEDDY_VBMI_LOAD_MASK_PATCH; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
for (; ptr + loopBytes <= buf_end; ptr += loopBytes) { \
|
||||||
|
__builtin_prefetch(ptr - n_sh + (64 * 2)); \
|
||||||
|
CHECK_FLOOD; \
|
||||||
|
m512 r_0 = PREP_CONF_FN(loadu512(ptr - n_sh), n_msk); \
|
||||||
|
r_0 = or512(r_0, p_mask); \
|
||||||
|
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, ptr - n_sh, conf_fn); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
assert(ptr + loopBytes > buf_end); \
|
||||||
|
if (ptr < buf_end) { \
|
||||||
|
u32 left = (u32)(buf_end - ptr); \
|
||||||
|
u64a k1 = TEDDY_VBMI_CONF_MASK_VAR(left); \
|
||||||
|
m512 p_mask1 = set_mask_m512(~k1); \
|
||||||
|
m512 val_0 = loadu_maskz_m512(k1 | patch, ptr - overlap); \
|
||||||
|
m512 r_0 = PREP_CONF_FN(val_0, n_msk); \
|
||||||
|
r_0 = or512(r_0, p_mask1); \
|
||||||
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, ptr - overlap, conf_fn); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
return HWLM_SUCCESS; \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#elif defined(HAVE_AVX512) // AVX512 reinforced teddy
|
||||||
|
|
||||||
#ifdef ARCH_64_BIT
|
#ifdef ARCH_64_BIT
|
||||||
#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \
|
#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2017, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -134,7 +134,7 @@ const m256 *getMaskBase_fat(const struct Teddy *teddy) {
|
|||||||
return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)));
|
return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_AVX512)
|
#if defined(HAVE_AVX512_REVERT) // revert to AVX2 Fat Teddy
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const u64a *getReinforcedMaskBase_fat(const struct Teddy *teddy, u8 numMask) {
|
const u64a *getReinforcedMaskBase_fat(const struct Teddy *teddy, u8 numMask) {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2017, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -383,12 +383,16 @@ m512 vectoredLoad512(m512 *p_mask, const u8 *ptr, const size_t start_offset,
|
|||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u64a getConfVal(const struct FDR_Runtime_Args *a, const u8 *ptr, u32 byte,
|
u64a getConfVal(const struct FDR_Runtime_Args *a, const u8 *ptr, u32 byte,
|
||||||
CautionReason reason) {
|
UNUSED CautionReason reason) {
|
||||||
u64a confVal = 0;
|
u64a confVal = 0;
|
||||||
const u8 *buf = a->buf;
|
const u8 *buf = a->buf;
|
||||||
size_t len = a->len;
|
size_t len = a->len;
|
||||||
const u8 *confirm_loc = ptr + byte - 7;
|
const u8 *confirm_loc = ptr + byte - 7;
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
if (likely(confirm_loc >= buf)) {
|
||||||
|
#else
|
||||||
if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) {
|
if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) {
|
||||||
|
#endif
|
||||||
confVal = lv_u64a(confirm_loc, buf, buf + len);
|
confVal = lv_u64a(confirm_loc, buf, buf + len);
|
||||||
} else { // r == VECTORING, confirm_loc < buf
|
} else { // r == VECTORING, confirm_loc < buf
|
||||||
u64a histBytes = a->histBytes;
|
u64a histBytes = a->histBytes;
|
||||||
|
8
src/hs.h
8
src/hs.h
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -39,6 +39,12 @@
|
|||||||
* the individual component headers for documentation.
|
* the individual component headers for documentation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* The current Hyperscan version information. */
|
||||||
|
|
||||||
|
#define HS_MAJOR 5
|
||||||
|
#define HS_MINOR 3
|
||||||
|
#define HS_PATCH 0
|
||||||
|
|
||||||
#include "hs_compile.h"
|
#include "hs_compile.h"
|
||||||
#include "hs_runtime.h"
|
#include "hs_runtime.h"
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -563,7 +563,6 @@ hs_error_t HS_CDECL hs_compile_ext_multi(const char *const *expressions,
|
|||||||
* be used by ORing them together. Compared to @ref hs_compile(), fewer
|
* be used by ORing them together. Compared to @ref hs_compile(), fewer
|
||||||
* valid values are provided:
|
* valid values are provided:
|
||||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
|
||||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
|
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
|
||||||
* expression per stream.
|
* expression per stream.
|
||||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||||
@ -637,7 +636,6 @@ hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags,
|
|||||||
* in place of an array will set the flags value for all patterns to zero.
|
* in place of an array will set the flags value for all patterns to zero.
|
||||||
* Compared to @ref hs_compile_multi(), fewer valid values are provided:
|
* Compared to @ref hs_compile_multi(), fewer valid values are provided:
|
||||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
|
||||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
|
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
|
||||||
* expression per stream.
|
* expression per stream.
|
||||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||||
@ -985,8 +983,8 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
|
|||||||
* offset when a match is reported for this expression. (By default, no start
|
* offset when a match is reported for this expression. (By default, no start
|
||||||
* of match is returned.)
|
* of match is returned.)
|
||||||
*
|
*
|
||||||
* Enabling this behaviour may reduce performance and increase stream state
|
* For all the 3 modes, enabling this behaviour may reduce performance. And
|
||||||
* requirements in streaming mode.
|
* particularly, it may increase stream state requirements in streaming mode.
|
||||||
*/
|
*/
|
||||||
#define HS_FLAG_SOM_LEFTMOST 256
|
#define HS_FLAG_SOM_LEFTMOST 256
|
||||||
|
|
||||||
|
@ -210,7 +210,7 @@ hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf,
|
|||||||
const u8 *d = buf + start, *e = buf + end;
|
const u8 *d = buf + start, *e = buf + end;
|
||||||
DEBUG_PRINTF("start %zu end %zu \n", start, end);
|
DEBUG_PRINTF("start %zu end %zu \n", start, end);
|
||||||
assert(d < e);
|
assert(d < e);
|
||||||
u8 lastz0 = 0;
|
u32 lastz0 = 0;
|
||||||
|
|
||||||
for (; d < e; d += 32) {
|
for (; d < e; d += 32) {
|
||||||
m256 v = noCase ? and256(load256(d), caseMask) : load256(d);
|
m256 v = noCase ? and256(load256(d), caseMask) : load256(d);
|
||||||
|
@ -214,7 +214,7 @@ static
|
|||||||
bool double_byte_ok(const AccelScheme &info) {
|
bool double_byte_ok(const AccelScheme &info) {
|
||||||
return !info.double_byte.empty() &&
|
return !info.double_byte.empty() &&
|
||||||
info.double_cr.count() < info.double_byte.size() &&
|
info.double_cr.count() < info.double_byte.size() &&
|
||||||
info.double_cr.count() <= 2 && !info.double_byte.empty();
|
info.double_cr.count() <= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -46,7 +46,20 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf,
|
|||||||
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
||||||
assert(buf < buf_end);
|
assert(buf < buf_end);
|
||||||
|
|
||||||
|
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
||||||
|
|
||||||
// Handle small scans.
|
// Handle small scans.
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||||
|
const u8 *ptr = nocase
|
||||||
|
? vermMiniNocase(chars, buf, buf_end, 0)
|
||||||
|
: vermMini(chars, buf, buf_end, 0);
|
||||||
|
if (ptr) {
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
return buf_end;
|
||||||
|
}
|
||||||
|
#else
|
||||||
if (buf_end - buf < VERM_BOUNDARY) {
|
if (buf_end - buf < VERM_BOUNDARY) {
|
||||||
for (; buf < buf_end; buf++) {
|
for (; buf < buf_end; buf++) {
|
||||||
char cur = (char)*buf;
|
char cur = (char)*buf;
|
||||||
@ -59,8 +72,8 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf,
|
|||||||
}
|
}
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
|
||||||
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
||||||
if (min) {
|
if (min) {
|
||||||
// Input isn't aligned, so we need to run one iteration with an
|
// Input isn't aligned, so we need to run one iteration with an
|
||||||
@ -99,7 +112,20 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf,
|
|||||||
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
||||||
assert(buf < buf_end);
|
assert(buf < buf_end);
|
||||||
|
|
||||||
|
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
||||||
|
|
||||||
// Handle small scans.
|
// Handle small scans.
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||||
|
const u8 *ptr = nocase
|
||||||
|
? vermMiniNocase(chars, buf, buf_end, 1)
|
||||||
|
: vermMini(chars, buf, buf_end, 1);
|
||||||
|
if (ptr) {
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
return buf_end;
|
||||||
|
}
|
||||||
|
#else
|
||||||
if (buf_end - buf < VERM_BOUNDARY) {
|
if (buf_end - buf < VERM_BOUNDARY) {
|
||||||
for (; buf < buf_end; buf++) {
|
for (; buf < buf_end; buf++) {
|
||||||
char cur = (char)*buf;
|
char cur = (char)*buf;
|
||||||
@ -112,8 +138,8 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf,
|
|||||||
}
|
}
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
|
||||||
size_t min = (size_t)buf % VERM_BOUNDARY;
|
size_t min = (size_t)buf % VERM_BOUNDARY;
|
||||||
if (min) {
|
if (min) {
|
||||||
// Input isn't aligned, so we need to run one iteration with an
|
// Input isn't aligned, so we need to run one iteration with an
|
||||||
@ -149,12 +175,32 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
|
|||||||
DEBUG_PRINTF("double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
|
DEBUG_PRINTF("double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
|
||||||
nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
|
nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
|
||||||
assert(buf < buf_end);
|
assert(buf < buf_end);
|
||||||
assert((buf_end - buf) >= VERM_BOUNDARY);
|
|
||||||
|
|
||||||
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
|
||||||
VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
|
VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
|
||||||
VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
|
VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||||
|
const u8 *ptr = nocase
|
||||||
|
? dvermMiniNocase(chars1, chars2, buf, buf_end)
|
||||||
|
: dvermMini(chars1, chars2, buf, buf_end);
|
||||||
|
if (ptr) {
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check for partial match at end */
|
||||||
|
u8 mask = nocase ? CASE_CLEAR : 0xff;
|
||||||
|
if ((buf_end[-1] & mask) == (u8)c1) {
|
||||||
|
DEBUG_PRINTF("partial!!!\n");
|
||||||
|
return buf_end - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf_end;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
assert((buf_end - buf) >= VERM_BOUNDARY);
|
||||||
|
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
||||||
if (min) {
|
if (min) {
|
||||||
// Input isn't aligned, so we need to run one iteration with an
|
// Input isn't aligned, so we need to run one iteration with an
|
||||||
// unaligned load, then skip buf forward to the next aligned address.
|
// unaligned load, then skip buf forward to the next aligned address.
|
||||||
@ -205,14 +251,32 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
|
|||||||
DEBUG_PRINTF("double verm scan (\\x%02hhx&\\x%02hhx)(\\x%02hhx&\\x%02hhx) "
|
DEBUG_PRINTF("double verm scan (\\x%02hhx&\\x%02hhx)(\\x%02hhx&\\x%02hhx) "
|
||||||
"over %zu bytes\n", c1, m1, c2, m2, (size_t)(buf_end - buf));
|
"over %zu bytes\n", c1, m1, c2, m2, (size_t)(buf_end - buf));
|
||||||
assert(buf < buf_end);
|
assert(buf < buf_end);
|
||||||
assert((buf_end - buf) >= VERM_BOUNDARY);
|
|
||||||
|
|
||||||
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
|
||||||
VERM_TYPE chars1 = VERM_SET_FN(c1);
|
VERM_TYPE chars1 = VERM_SET_FN(c1);
|
||||||
VERM_TYPE chars2 = VERM_SET_FN(c2);
|
VERM_TYPE chars2 = VERM_SET_FN(c2);
|
||||||
VERM_TYPE mask1 = VERM_SET_FN(m1);
|
VERM_TYPE mask1 = VERM_SET_FN(m1);
|
||||||
VERM_TYPE mask2 = VERM_SET_FN(m2);
|
VERM_TYPE mask2 = VERM_SET_FN(m2);
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||||
|
const u8 *ptr = dvermMiniMasked(chars1, chars2, mask1, mask2, buf,
|
||||||
|
buf_end);
|
||||||
|
if (ptr) {
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check for partial match at end */
|
||||||
|
if ((buf_end[-1] & m1) == (u8)c1) {
|
||||||
|
DEBUG_PRINTF("partial!!!\n");
|
||||||
|
return buf_end - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf_end;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
assert((buf_end - buf) >= VERM_BOUNDARY);
|
||||||
|
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
||||||
if (min) {
|
if (min) {
|
||||||
// Input isn't aligned, so we need to run one iteration with an
|
// Input isn't aligned, so we need to run one iteration with an
|
||||||
// unaligned load, then skip buf forward to the next aligned address.
|
// unaligned load, then skip buf forward to the next aligned address.
|
||||||
@ -244,6 +308,7 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
|
|||||||
|
|
||||||
/* check for partial match at end */
|
/* check for partial match at end */
|
||||||
if ((buf_end[-1] & m1) == (u8)c1) {
|
if ((buf_end[-1] & m1) == (u8)c1) {
|
||||||
|
DEBUG_PRINTF("partial!!!\n");
|
||||||
return buf_end - 1;
|
return buf_end - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -259,7 +324,20 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
|
|||||||
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
||||||
assert(buf < buf_end);
|
assert(buf < buf_end);
|
||||||
|
|
||||||
|
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
||||||
|
|
||||||
// Handle small scans.
|
// Handle small scans.
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||||
|
const u8 *ptr = nocase
|
||||||
|
? rvermMiniNocase(chars, buf, buf_end, 0)
|
||||||
|
: rvermMini(chars, buf, buf_end, 0);
|
||||||
|
if (ptr) {
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
return buf - 1;
|
||||||
|
}
|
||||||
|
#else
|
||||||
if (buf_end - buf < VERM_BOUNDARY) {
|
if (buf_end - buf < VERM_BOUNDARY) {
|
||||||
for (buf_end--; buf_end >= buf; buf_end--) {
|
for (buf_end--; buf_end >= buf; buf_end--) {
|
||||||
char cur = (char)*buf_end;
|
char cur = (char)*buf_end;
|
||||||
@ -272,26 +350,22 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
|
|||||||
}
|
}
|
||||||
return buf_end;
|
return buf_end;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
|
||||||
size_t min = (size_t)buf_end % VERM_BOUNDARY;
|
size_t min = (size_t)buf_end % VERM_BOUNDARY;
|
||||||
|
|
||||||
if (min) {
|
if (min) {
|
||||||
// Input isn't aligned, so we need to run one iteration with an
|
// Input isn't aligned, so we need to run one iteration with an
|
||||||
// unaligned load, then skip buf backward to the next aligned address.
|
// unaligned load, then skip buf backward to the next aligned address.
|
||||||
// There's some small overlap here, but we don't mind scanning it twice
|
// There's some small overlap here, but we don't mind scanning it twice
|
||||||
// if we can do it quickly, do we?
|
// if we can do it quickly, do we?
|
||||||
if (nocase) {
|
const u8 *ptr = nocase ? rvermUnalignNocase(chars,
|
||||||
const u8 *ptr =
|
buf_end - VERM_BOUNDARY,
|
||||||
rvermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 0);
|
0)
|
||||||
if (ptr) {
|
: rvermUnalign(chars, buf_end - VERM_BOUNDARY,
|
||||||
return ptr;
|
0);
|
||||||
}
|
|
||||||
} else {
|
if (ptr) {
|
||||||
const u8 *ptr = rvermUnalign(chars, buf_end - VERM_BOUNDARY, 0);
|
return ptr;
|
||||||
if (ptr) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
buf_end -= min;
|
buf_end -= min;
|
||||||
@ -322,7 +396,20 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
|
|||||||
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
||||||
assert(buf < buf_end);
|
assert(buf < buf_end);
|
||||||
|
|
||||||
|
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
||||||
|
|
||||||
// Handle small scans.
|
// Handle small scans.
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||||
|
const u8 *ptr = nocase
|
||||||
|
? rvermMiniNocase(chars, buf, buf_end, 1)
|
||||||
|
: rvermMini(chars, buf, buf_end, 1);
|
||||||
|
if (ptr) {
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
return buf - 1;
|
||||||
|
}
|
||||||
|
#else
|
||||||
if (buf_end - buf < VERM_BOUNDARY) {
|
if (buf_end - buf < VERM_BOUNDARY) {
|
||||||
for (buf_end--; buf_end >= buf; buf_end--) {
|
for (buf_end--; buf_end >= buf; buf_end--) {
|
||||||
char cur = (char)*buf_end;
|
char cur = (char)*buf_end;
|
||||||
@ -335,26 +422,22 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
|
|||||||
}
|
}
|
||||||
return buf_end;
|
return buf_end;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
|
||||||
size_t min = (size_t)buf_end % VERM_BOUNDARY;
|
size_t min = (size_t)buf_end % VERM_BOUNDARY;
|
||||||
|
|
||||||
if (min) {
|
if (min) {
|
||||||
// Input isn't aligned, so we need to run one iteration with an
|
// Input isn't aligned, so we need to run one iteration with an
|
||||||
// unaligned load, then skip buf backward to the next aligned address.
|
// unaligned load, then skip buf backward to the next aligned address.
|
||||||
// There's some small overlap here, but we don't mind scanning it twice
|
// There's some small overlap here, but we don't mind scanning it twice
|
||||||
// if we can do it quickly, do we?
|
// if we can do it quickly, do we?
|
||||||
if (nocase) {
|
const u8 *ptr = nocase ? rvermUnalignNocase(chars,
|
||||||
const u8 *ptr =
|
buf_end - VERM_BOUNDARY,
|
||||||
rvermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 1);
|
1)
|
||||||
if (ptr) {
|
: rvermUnalign(chars, buf_end - VERM_BOUNDARY,
|
||||||
return ptr;
|
1);
|
||||||
}
|
|
||||||
} else {
|
if (ptr) {
|
||||||
const u8 *ptr = rvermUnalign(chars, buf_end - VERM_BOUNDARY, 1);
|
return ptr;
|
||||||
if (ptr) {
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
buf_end -= min;
|
buf_end -= min;
|
||||||
@ -383,24 +466,36 @@ const u8 *rvermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
|
|||||||
DEBUG_PRINTF("rev double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
|
DEBUG_PRINTF("rev double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
|
||||||
nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
|
nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
|
||||||
assert(buf < buf_end);
|
assert(buf < buf_end);
|
||||||
assert((buf_end - buf) >= VERM_BOUNDARY);
|
|
||||||
|
|
||||||
size_t min = (size_t)buf_end % VERM_BOUNDARY;
|
|
||||||
VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
|
VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
|
||||||
VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
|
VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||||
|
const u8 *ptr = nocase
|
||||||
|
? rdvermMiniNocase(chars1, chars2, buf, buf_end)
|
||||||
|
: rdvermMini(chars1, chars2, buf, buf_end);
|
||||||
|
|
||||||
|
if (ptr) {
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check for partial match at end ???
|
||||||
|
return buf - 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
assert((buf_end - buf) >= VERM_BOUNDARY);
|
||||||
|
size_t min = (size_t)buf_end % VERM_BOUNDARY;
|
||||||
if (min) {
|
if (min) {
|
||||||
// input not aligned, so we need to run one iteration with an unaligned
|
// input not aligned, so we need to run one iteration with an unaligned
|
||||||
// load, then skip buf forward to the next aligned address. There's
|
// load, then skip buf forward to the next aligned address. There's
|
||||||
// some small overlap here, but we don't mind scanning it twice if we
|
// some small overlap here, but we don't mind scanning it twice if we
|
||||||
// can do it quickly, do we?
|
// can do it quickly, do we?
|
||||||
const u8 *ptr;
|
const u8 *ptr = nocase ? rdvermPreconditionNocase(chars1, chars2,
|
||||||
if (nocase) {
|
buf_end - VERM_BOUNDARY)
|
||||||
ptr = rdvermPreconditionNocase(chars1, chars2,
|
: rdvermPrecondition(chars1, chars2,
|
||||||
buf_end - VERM_BOUNDARY);
|
buf_end - VERM_BOUNDARY);
|
||||||
} else {
|
|
||||||
ptr = rdvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ptr) {
|
if (ptr) {
|
||||||
return ptr;
|
return ptr;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -32,6 +32,8 @@
|
|||||||
* (users should include vermicelli.h)
|
* (users should include vermicelli.h)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#if !defined(HAVE_AVX512)
|
||||||
|
|
||||||
#define VERM_BOUNDARY 16
|
#define VERM_BOUNDARY 16
|
||||||
#define VERM_TYPE m128
|
#define VERM_TYPE m128
|
||||||
#define VERM_SET_FN set16x8
|
#define VERM_SET_FN set16x8
|
||||||
@ -391,3 +393,497 @@ const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
|
|||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else // HAVE_AVX512
|
||||||
|
|
||||||
|
#define VERM_BOUNDARY 64
|
||||||
|
#define VERM_TYPE m512
|
||||||
|
#define VERM_SET_FN set64x8
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *vermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) {
|
||||||
|
uintptr_t len = buf_end - buf;
|
||||||
|
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||||
|
m512 data = loadu_maskz_m512(mask, buf);
|
||||||
|
|
||||||
|
u64a z = eq512mask(chars, data);
|
||||||
|
|
||||||
|
if (negate) {
|
||||||
|
z = ~z & mask;
|
||||||
|
}
|
||||||
|
z &= mask;
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return buf + ctz64(z);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *vermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end,
|
||||||
|
char negate) {
|
||||||
|
uintptr_t len = buf_end - buf;
|
||||||
|
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||||
|
m512 data = loadu_maskz_m512(mask, buf);
|
||||||
|
m512 casemask = set64x8(CASE_CLEAR);
|
||||||
|
m512 v = and512(casemask, data);
|
||||||
|
|
||||||
|
u64a z = eq512mask(chars, v);
|
||||||
|
|
||||||
|
if (negate) {
|
||||||
|
z = ~z & mask;
|
||||||
|
}
|
||||||
|
z &= mask;
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return buf + ctz64(z);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *vermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end,
|
||||||
|
char negate) {
|
||||||
|
assert((size_t)buf % 64 == 0);
|
||||||
|
for (; buf + 63 < buf_end; buf += 64) {
|
||||||
|
m512 data = load512(buf);
|
||||||
|
u64a z = eq512mask(chars, data);
|
||||||
|
if (negate) {
|
||||||
|
z = ~z & ~0ULL;
|
||||||
|
}
|
||||||
|
if (unlikely(z)) {
|
||||||
|
u64a pos = ctz64(z);
|
||||||
|
return buf + pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *vermSearchAlignedNocase(m512 chars, const u8 *buf,
|
||||||
|
const u8 *buf_end, char negate) {
|
||||||
|
assert((size_t)buf % 64 == 0);
|
||||||
|
m512 casemask = set64x8(CASE_CLEAR);
|
||||||
|
|
||||||
|
for (; buf + 63 < buf_end; buf += 64) {
|
||||||
|
m512 data = load512(buf);
|
||||||
|
u64a z = eq512mask(chars, and512(casemask, data));
|
||||||
|
if (negate) {
|
||||||
|
z = ~z & ~0ULL;
|
||||||
|
}
|
||||||
|
if (unlikely(z)) {
|
||||||
|
u64a pos = ctz64(z);
|
||||||
|
return buf + pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns NULL if not found
|
||||||
|
static really_inline
|
||||||
|
const u8 *vermUnalign(m512 chars, const u8 *buf, char negate) {
|
||||||
|
m512 data = loadu512(buf); // unaligned
|
||||||
|
u64a z = eq512mask(chars, data);
|
||||||
|
if (negate) {
|
||||||
|
z = ~z & ~0ULL;
|
||||||
|
}
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return buf + ctz64(z);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns NULL if not found
|
||||||
|
static really_inline
|
||||||
|
const u8 *vermUnalignNocase(m512 chars, const u8 *buf, char negate) {
|
||||||
|
m512 casemask = set64x8(CASE_CLEAR);
|
||||||
|
m512 data = loadu512(buf); // unaligned
|
||||||
|
u64a z = eq512mask(chars, and512(casemask, data));
|
||||||
|
if (negate) {
|
||||||
|
z = ~z & ~0ULL;
|
||||||
|
}
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return buf + ctz64(z);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *dvermMini(m512 chars1, m512 chars2, const u8 *buf,
|
||||||
|
const u8 *buf_end) {
|
||||||
|
uintptr_t len = buf_end - buf;
|
||||||
|
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||||
|
m512 data = loadu_maskz_m512(mask, buf);
|
||||||
|
|
||||||
|
u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
|
||||||
|
|
||||||
|
z &= mask;
|
||||||
|
if (unlikely(z)) {
|
||||||
|
u64a pos = ctz64(z);
|
||||||
|
return buf + pos;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *dvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf,
|
||||||
|
const u8 *buf_end) {
|
||||||
|
uintptr_t len = buf_end - buf;
|
||||||
|
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||||
|
m512 data = loadu_maskz_m512(mask, buf);
|
||||||
|
m512 casemask = set64x8(CASE_CLEAR);
|
||||||
|
m512 v = and512(casemask, data);
|
||||||
|
|
||||||
|
u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
|
||||||
|
|
||||||
|
z &= mask;
|
||||||
|
if (unlikely(z)) {
|
||||||
|
u64a pos = ctz64(z);
|
||||||
|
return buf + pos;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *dvermMiniMasked(m512 chars1, m512 chars2, m512 mask1, m512 mask2,
|
||||||
|
const u8 *buf, const u8 *buf_end) {
|
||||||
|
uintptr_t len = buf_end - buf;
|
||||||
|
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||||
|
m512 data = loadu_maskz_m512(mask, buf);
|
||||||
|
m512 v1 = and512(data, mask1);
|
||||||
|
m512 v2 = and512(data, mask2);
|
||||||
|
|
||||||
|
u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
|
||||||
|
|
||||||
|
z &= mask;
|
||||||
|
if (unlikely(z)) {
|
||||||
|
u64a pos = ctz64(z);
|
||||||
|
return buf + pos;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *dvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2,
|
||||||
|
const u8 *buf, const u8 *buf_end) {
|
||||||
|
for (; buf + 64 < buf_end; buf += 64) {
|
||||||
|
m512 data = load512(buf);
|
||||||
|
u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
|
||||||
|
if (buf[63] == c1 && buf[64] == c2) {
|
||||||
|
z |= (1ULL << 63);
|
||||||
|
}
|
||||||
|
if (unlikely(z)) {
|
||||||
|
u64a pos = ctz64(z);
|
||||||
|
return buf + pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *dvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2,
|
||||||
|
const u8 *buf, const u8 *buf_end) {
|
||||||
|
assert((size_t)buf % 64 == 0);
|
||||||
|
m512 casemask = set64x8(CASE_CLEAR);
|
||||||
|
|
||||||
|
for (; buf + 64 < buf_end; buf += 64) {
|
||||||
|
m512 data = load512(buf);
|
||||||
|
m512 v = and512(casemask, data);
|
||||||
|
u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
|
||||||
|
if ((buf[63] & CASE_CLEAR) == c1 && (buf[64] & CASE_CLEAR) == c2) {
|
||||||
|
z |= (1ULL << 63);
|
||||||
|
}
|
||||||
|
if (unlikely(z)) {
|
||||||
|
u64a pos = ctz64(z);
|
||||||
|
return buf + pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *dvermSearchAlignedMasked(m512 chars1, m512 chars2,
|
||||||
|
m512 mask1, m512 mask2, u8 c1, u8 c2, u8 m1,
|
||||||
|
u8 m2, const u8 *buf, const u8 *buf_end) {
|
||||||
|
assert((size_t)buf % 64 == 0);
|
||||||
|
|
||||||
|
for (; buf + 64 < buf_end; buf += 64) {
|
||||||
|
m512 data = load512(buf);
|
||||||
|
m512 v1 = and512(data, mask1);
|
||||||
|
m512 v2 = and512(data, mask2);
|
||||||
|
u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
|
||||||
|
|
||||||
|
if ((buf[63] & m1) == c1 && (buf[64] & m2) == c2) {
|
||||||
|
z |= (1ULL << 63);
|
||||||
|
}
|
||||||
|
if (unlikely(z)) {
|
||||||
|
u64a pos = ctz64(z);
|
||||||
|
return buf + pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns NULL if not found
|
||||||
|
static really_inline
|
||||||
|
const u8 *dvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) {
|
||||||
|
m512 data = loadu512(buf); // unaligned
|
||||||
|
u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
|
||||||
|
|
||||||
|
/* no fixup of the boundary required - the aligned run will pick it up */
|
||||||
|
if (unlikely(z)) {
|
||||||
|
u64a pos = ctz64(z);
|
||||||
|
return buf + pos;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns NULL if not found
|
||||||
|
static really_inline
|
||||||
|
const u8 *dvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) {
|
||||||
|
/* due to laziness, nonalphas and nocase having interesting behaviour */
|
||||||
|
m512 casemask = set64x8(CASE_CLEAR);
|
||||||
|
m512 data = loadu512(buf); // unaligned
|
||||||
|
m512 v = and512(casemask, data);
|
||||||
|
u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
|
||||||
|
|
||||||
|
/* no fixup of the boundary required - the aligned run will pick it up */
|
||||||
|
if (unlikely(z)) {
|
||||||
|
u64a pos = ctz64(z);
|
||||||
|
return buf + pos;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns NULL if not found
|
||||||
|
static really_inline
|
||||||
|
const u8 *dvermPreconditionMasked(m512 chars1, m512 chars2,
|
||||||
|
m512 mask1, m512 mask2, const u8 *buf) {
|
||||||
|
m512 data = loadu512(buf); // unaligned
|
||||||
|
m512 v1 = and512(data, mask1);
|
||||||
|
m512 v2 = and512(data, mask2);
|
||||||
|
u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
|
||||||
|
|
||||||
|
/* no fixup of the boundary required - the aligned run will pick it up */
|
||||||
|
if (unlikely(z)) {
|
||||||
|
u64a pos = ctz64(z);
|
||||||
|
return buf + pos;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *lastMatchOffset(const u8 *buf_end, u64a z) {
|
||||||
|
assert(z);
|
||||||
|
return buf_end - 64 + 63 - clz64(z);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *rvermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) {
|
||||||
|
uintptr_t len = buf_end - buf;
|
||||||
|
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||||
|
m512 data = loadu_maskz_m512(mask, buf);
|
||||||
|
|
||||||
|
u64a z = eq512mask(chars, data);
|
||||||
|
|
||||||
|
if (negate) {
|
||||||
|
z = ~z & mask;
|
||||||
|
}
|
||||||
|
z &= mask;
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return lastMatchOffset(buf + 64, z);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *rvermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end,
|
||||||
|
char negate) {
|
||||||
|
uintptr_t len = buf_end - buf;
|
||||||
|
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||||
|
m512 data = loadu_maskz_m512(mask, buf);
|
||||||
|
m512 casemask = set64x8(CASE_CLEAR);
|
||||||
|
m512 v = and512(casemask, data);
|
||||||
|
|
||||||
|
u64a z = eq512mask(chars, v);
|
||||||
|
|
||||||
|
if (negate) {
|
||||||
|
z = ~z & mask;
|
||||||
|
}
|
||||||
|
z &= mask;
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return lastMatchOffset(buf + 64, z);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *rvermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end,
|
||||||
|
char negate) {
|
||||||
|
assert((size_t)buf_end % 64 == 0);
|
||||||
|
for (; buf + 63 < buf_end; buf_end -= 64) {
|
||||||
|
m512 data = load512(buf_end - 64);
|
||||||
|
u64a z = eq512mask(chars, data);
|
||||||
|
if (negate) {
|
||||||
|
z = ~z & ~0ULL;
|
||||||
|
}
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return lastMatchOffset(buf_end, z);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *rvermSearchAlignedNocase(m512 chars, const u8 *buf,
|
||||||
|
const u8 *buf_end, char negate) {
|
||||||
|
assert((size_t)buf_end % 64 == 0);
|
||||||
|
m512 casemask = set64x8(CASE_CLEAR);
|
||||||
|
|
||||||
|
for (; buf + 63 < buf_end; buf_end -= 64) {
|
||||||
|
m512 data = load512(buf_end - 64);
|
||||||
|
u64a z = eq512mask(chars, and512(casemask, data));
|
||||||
|
if (negate) {
|
||||||
|
z = ~z & ~0ULL;
|
||||||
|
}
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return lastMatchOffset(buf_end, z);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns NULL if not found
|
||||||
|
static really_inline
|
||||||
|
const u8 *rvermUnalign(m512 chars, const u8 *buf, char negate) {
|
||||||
|
m512 data = loadu512(buf); // unaligned
|
||||||
|
u64a z = eq512mask(chars, data);
|
||||||
|
if (negate) {
|
||||||
|
z = ~z & ~0ULL;
|
||||||
|
}
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return lastMatchOffset(buf + 64, z);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns NULL if not found
|
||||||
|
static really_inline
|
||||||
|
const u8 *rvermUnalignNocase(m512 chars, const u8 *buf, char negate) {
|
||||||
|
m512 casemask = set64x8(CASE_CLEAR);
|
||||||
|
m512 data = loadu512(buf); // unaligned
|
||||||
|
u64a z = eq512mask(chars, and512(casemask, data));
|
||||||
|
if (negate) {
|
||||||
|
z = ~z & ~0ULL;
|
||||||
|
}
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return lastMatchOffset(buf + 64, z);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *rdvermMini(m512 chars1, m512 chars2, const u8 *buf,
|
||||||
|
const u8 *buf_end) {
|
||||||
|
uintptr_t len = buf_end - buf;
|
||||||
|
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||||
|
m512 data = loadu_maskz_m512(mask, buf);
|
||||||
|
|
||||||
|
u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
|
||||||
|
|
||||||
|
z &= mask;
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return lastMatchOffset(buf + 64, z);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *rdvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf,
|
||||||
|
const u8 *buf_end) {
|
||||||
|
uintptr_t len = buf_end - buf;
|
||||||
|
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||||
|
m512 data = loadu_maskz_m512(mask, buf);
|
||||||
|
m512 casemask = set64x8(CASE_CLEAR);
|
||||||
|
m512 v = and512(casemask, data);
|
||||||
|
|
||||||
|
u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
|
||||||
|
|
||||||
|
z &= mask;
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return lastMatchOffset(buf + 64, z);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *rdvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2,
|
||||||
|
const u8 *buf, const u8 *buf_end) {
|
||||||
|
assert((size_t)buf_end % 64 == 0);
|
||||||
|
|
||||||
|
for (; buf + 64 < buf_end; buf_end -= 64) {
|
||||||
|
m512 data = load512(buf_end - 64);
|
||||||
|
u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
|
||||||
|
if (buf_end[-65] == c1 && buf_end[-64] == c2) {
|
||||||
|
z |= 1;
|
||||||
|
}
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return lastMatchOffset(buf_end, z);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buf_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *rdvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2,
|
||||||
|
const u8 *buf, const u8 *buf_end) {
|
||||||
|
assert((size_t)buf_end % 64 == 0);
|
||||||
|
m512 casemask = set64x8(CASE_CLEAR);
|
||||||
|
|
||||||
|
for (; buf + 64 < buf_end; buf_end -= 64) {
|
||||||
|
m512 data = load512(buf_end - 64);
|
||||||
|
m512 v = and512(casemask, data);
|
||||||
|
u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
|
||||||
|
if ((buf_end[-65] & CASE_CLEAR) == c1
|
||||||
|
&& (buf_end[-64] & CASE_CLEAR) == c2) {
|
||||||
|
z |= 1;
|
||||||
|
}
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return lastMatchOffset(buf_end, z);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buf_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns NULL if not found
|
||||||
|
static really_inline
|
||||||
|
const u8 *rdvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) {
|
||||||
|
m512 data = loadu512(buf);
|
||||||
|
u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
|
||||||
|
|
||||||
|
// no fixup of the boundary required - the aligned run will pick it up
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return lastMatchOffset(buf + 64, z);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns NULL if not found
|
||||||
|
static really_inline
|
||||||
|
const u8 *rdvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) {
|
||||||
|
// due to laziness, nonalphas and nocase having interesting behaviour
|
||||||
|
m512 casemask = set64x8(CASE_CLEAR);
|
||||||
|
m512 data = loadu512(buf);
|
||||||
|
m512 v = and512(casemask, data);
|
||||||
|
u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
|
||||||
|
// no fixup of the boundary required - the aligned run will pick it up
|
||||||
|
if (unlikely(z)) {
|
||||||
|
return lastMatchOffset(buf + 64, z);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // HAVE_AVX512
|
||||||
|
@ -205,7 +205,7 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
|
|||||||
|
|
||||||
DEBUG_PRINTF(" - checking w %zu\n", g[w].index);
|
DEBUG_PRINTF(" - checking w %zu\n", g[w].index);
|
||||||
|
|
||||||
if (!searchForward(g, reach, colours, s, w)) {
|
if (!searchForward(g, reach, colours, succ_v, w)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,7 +170,7 @@ void findPaths(const NGHolder &g, NFAVertex v,
|
|||||||
/* path has looped back to one of the active+boring acceleration
|
/* path has looped back to one of the active+boring acceleration
|
||||||
* states. We can ignore this path if we have sufficient back-
|
* states. We can ignore this path if we have sufficient back-
|
||||||
* off. */
|
* off. */
|
||||||
paths->push_back({CharReach()});
|
paths->push_back({cr});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#include "rose_build_impl.h"
|
#include "rose_build_impl.h"
|
||||||
#include "nfa/castlecompile.h"
|
#include "nfa/castlecompile.h"
|
||||||
#include "nfagraph/ng_repeat.h"
|
#include "nfagraph/ng_repeat.h"
|
||||||
|
#include "smallwrite/smallwrite_build.h"
|
||||||
#include "util/compile_context.h"
|
#include "util/compile_context.h"
|
||||||
#include "util/boundary_reports.h"
|
#include "util/boundary_reports.h"
|
||||||
#include "util/make_unique.h"
|
#include "util/make_unique.h"
|
||||||
@ -159,6 +160,10 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &build_in)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (const auto &report_id : build.smwr.all_reports()) {
|
||||||
|
live_reports.insert(report_id);
|
||||||
|
}
|
||||||
|
|
||||||
// Collect live reports from boundary reports.
|
// Collect live reports from boundary reports.
|
||||||
insert(&live_reports, build.boundary.report_at_0);
|
insert(&live_reports, build.boundary.report_at_0);
|
||||||
insert(&live_reports, build.boundary.report_at_0_eod);
|
insert(&live_reports, build.boundary.report_at_0_eod);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2017, Intel Corporation
|
* Copyright (c) 2017-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -57,6 +57,10 @@
|
|||||||
#define HAVE_AVX512
|
#define HAVE_AVX512
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX512VBMI__)
|
||||||
|
#define HAVE_AVX512VBMI
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ICC and MSVC don't break out POPCNT or BMI/2 as separate pre-def macros
|
* ICC and MSVC don't break out POPCNT or BMI/2 as separate pre-def macros
|
||||||
*/
|
*/
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -150,6 +150,14 @@ static really_inline u32 movd(const m128 in) {
|
|||||||
return _mm_cvtsi128_si32(in);
|
return _mm_cvtsi128_si32(in);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512)
|
||||||
|
static really_inline u32 movd512(const m512 in) {
|
||||||
|
// NOTE: seems gcc doesn't support _mm512_cvtsi512_si32(in),
|
||||||
|
// so we use 2-step convertions to work around.
|
||||||
|
return _mm_cvtsi128_si32(_mm512_castsi512_si128(in));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static really_inline u64a movq(const m128 in) {
|
static really_inline u64a movq(const m128 in) {
|
||||||
#if defined(ARCH_X86_64)
|
#if defined(ARCH_X86_64)
|
||||||
return _mm_cvtsi128_si64(in);
|
return _mm_cvtsi128_si64(in);
|
||||||
@ -318,6 +326,12 @@ static really_inline
|
|||||||
m512 maskz_pshufb_m512(__mmask64 k, m512 a, m512 b) {
|
m512 maskz_pshufb_m512(__mmask64 k, m512 a, m512 b) {
|
||||||
return _mm512_maskz_shuffle_epi8(k, a, b);
|
return _mm512_maskz_shuffle_epi8(k, a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define vpermb512(idx, a) _mm512_permutexvar_epi8(idx, a)
|
||||||
|
#define maskz_vpermb512(k, idx, a) _mm512_maskz_permutexvar_epi8(k, idx, a)
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
|
45
tools/fuzz/aristocrats.py
Executable file
45
tools/fuzz/aristocrats.py
Executable file
@ -0,0 +1,45 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from random import choice,randint
|
||||||
|
from optparse import OptionParser
|
||||||
|
|
||||||
|
def generateRandomOptions():
|
||||||
|
if options.hybrid:
|
||||||
|
allflags = "smiH8W"
|
||||||
|
else:
|
||||||
|
# Maintain an ordering for consistency.
|
||||||
|
allflags = "smiHV8WLP"
|
||||||
|
flags = ""
|
||||||
|
for f in allflags:
|
||||||
|
flags += choice(['', f])
|
||||||
|
return flags
|
||||||
|
|
||||||
|
parser = OptionParser()
|
||||||
|
parser.add_option("-d", "--depth",
|
||||||
|
action="store", type="int", dest="depth", default=200,
|
||||||
|
help="Depth of generation (akin to maximum length)")
|
||||||
|
parser.add_option("-c", "--count",
|
||||||
|
action="store", type="int", dest="count", default=1000,
|
||||||
|
help="Number of expressions to generate")
|
||||||
|
parser.add_option("-f", "--full",
|
||||||
|
action="store_true", dest="full", default=False,
|
||||||
|
help="Use a full character set including unprintables")
|
||||||
|
parser.add_option("-H", "--hybrid",
|
||||||
|
action="store_true", dest="hybrid",
|
||||||
|
help="Generate random flags for hybrid mode")
|
||||||
|
|
||||||
|
(options, args) = parser.parse_args()
|
||||||
|
if len(args) != 0:
|
||||||
|
parser.error("incorrect number of arguments")
|
||||||
|
|
||||||
|
if (options.full):
|
||||||
|
crange = range(0,256)
|
||||||
|
crange.remove(ord('\n'))
|
||||||
|
else:
|
||||||
|
crange = range(32, 127)
|
||||||
|
|
||||||
|
for i in xrange(0, options.count):
|
||||||
|
len = randint(1, options.depth)
|
||||||
|
s = [ chr(choice(crange)) for x in xrange(len) ]
|
||||||
|
line = str(i) + ":/" + "".join(s) + "/" + generateRandomOptions()
|
||||||
|
print line
|
39
tools/fuzz/completocrats.py
Executable file
39
tools/fuzz/completocrats.py
Executable file
@ -0,0 +1,39 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from itertools import *
|
||||||
|
from optparse import OptionParser
|
||||||
|
|
||||||
|
LIMITED_ALPHABET = "abc[](){}*?+^$|:=.\\-"
|
||||||
|
|
||||||
|
parser = OptionParser()
|
||||||
|
parser.add_option("-d", "--depth",
|
||||||
|
action="store", type="int", dest="depth", default=200,
|
||||||
|
help="Depth of generation (akin to maximum length)")
|
||||||
|
|
||||||
|
parser.add_option("-f", "--full",
|
||||||
|
action="store_true", dest="full", default=False,
|
||||||
|
help="Use a full character set including unprintables")
|
||||||
|
|
||||||
|
parser.add_option("-l", "--limited",
|
||||||
|
action="store_true", dest="limited", default=False,
|
||||||
|
help="Use a very limited character set: just " + LIMITED_ALPHABET)
|
||||||
|
|
||||||
|
(options, args) = parser.parse_args()
|
||||||
|
if len(args) != 0:
|
||||||
|
parser.error("incorrect number of arguments")
|
||||||
|
|
||||||
|
if (options.full):
|
||||||
|
crange = range(0,256)
|
||||||
|
crange.remove(ord('\n'))
|
||||||
|
elif (options.limited):
|
||||||
|
crange = [ ord(c) for c in LIMITED_ALPHABET ]
|
||||||
|
else:
|
||||||
|
crange = range(32, 127)
|
||||||
|
|
||||||
|
srange = [ chr(c) for c in crange ]
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
for x in product(srange, repeat = options.depth):
|
||||||
|
line = str(i) + ":/" + "".join(x) + "/"
|
||||||
|
print line
|
||||||
|
i += 1
|
259
tools/fuzz/heuristocrats.py
Executable file
259
tools/fuzz/heuristocrats.py
Executable file
@ -0,0 +1,259 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from optparse import OptionParser
|
||||||
|
from random import *
|
||||||
|
import string
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# return a random non-degenerate (ie not [10]) partition of nChildren
|
||||||
|
def chooseLeafWidth(nChildren):
|
||||||
|
width = randint(1, 5)
|
||||||
|
width = min(width, nChildren-1)
|
||||||
|
s = sample(range(1, nChildren), width)
|
||||||
|
s.sort()
|
||||||
|
s = [0] + s + [nChildren]
|
||||||
|
v = [ s[i+1] - s[i] for i in range(0, len(s)-1) if s[i+1] != s[i] ]
|
||||||
|
return v
|
||||||
|
|
||||||
|
def generateConcat(nChildren, atTopIgnored):
|
||||||
|
v = [ generateRE(w, atTop = False) for w in chooseLeafWidth(nChildren) ]
|
||||||
|
v = [ r for r in v if r != '' ]
|
||||||
|
return string.join(v, "")
|
||||||
|
|
||||||
|
def makeGroup(s):
|
||||||
|
# Parenthesise either in normal parens or a non-capturing group.
|
||||||
|
if randint(0, 1) == 0:
|
||||||
|
return "(" + s + ")"
|
||||||
|
else:
|
||||||
|
return "(?:" + s + ")"
|
||||||
|
|
||||||
|
def generateAlt(nChildren, atTop):
|
||||||
|
v = [ generateRE(w, [generateAlt], atTop) for w in chooseLeafWidth(nChildren) ]
|
||||||
|
v = [ r for r in v if r != '' ]
|
||||||
|
s = string.join(v, "|")
|
||||||
|
if len(v) == 1:
|
||||||
|
return s
|
||||||
|
else:
|
||||||
|
return makeGroup(s)
|
||||||
|
|
||||||
|
def generateQuant(nChildren, atTopIgnored):
|
||||||
|
lo = int(round(expovariate(0.2)))
|
||||||
|
hi = lo + int(round(expovariate(0.2)))
|
||||||
|
q = choice(["*", "?", "+", "{%d}"%lo, "{%d,}"%lo, "{%d,%d}"%(lo,hi)])
|
||||||
|
r = generateRE(nChildren, [generateQuant], atTop = False)
|
||||||
|
if (len(r) == 1) or (r[0] != '(' and r[-1] != ")"):
|
||||||
|
return r + q
|
||||||
|
else:
|
||||||
|
return makeGroup(r) + q
|
||||||
|
|
||||||
|
def generateChar(nChildren, atTop = False):
|
||||||
|
return chr(choice(alphabet))
|
||||||
|
|
||||||
|
def generateNocaseChar(nChildren, atTop = False):
|
||||||
|
'Either generate an uppercase char from the alphabet or a nocase class [Aa]'
|
||||||
|
c = generateChar(nChildren, atTop)
|
||||||
|
if random() < 0.5:
|
||||||
|
return c.upper()
|
||||||
|
else:
|
||||||
|
return '[' + c.upper() + c.lower() + ']'
|
||||||
|
|
||||||
|
def generateDot(nChildren, atTop = False):
|
||||||
|
return "."
|
||||||
|
|
||||||
|
def generateBoundary(nChildren, atTop = False):
|
||||||
|
# \b, \B in parens so that we can repeat them and still be accepted by
|
||||||
|
# libpcre
|
||||||
|
return makeGroup('\\' + choice('bB'))
|
||||||
|
|
||||||
|
def generateCharClass(nChildren, atTop = False):
|
||||||
|
s = ""
|
||||||
|
if random() < 0.2:
|
||||||
|
s = "^"
|
||||||
|
nChars = randint(1,4)
|
||||||
|
else:
|
||||||
|
nChars = randint(2,4)
|
||||||
|
|
||||||
|
for i in xrange(nChars):
|
||||||
|
s += generateChar(1)
|
||||||
|
return "[" + s + "]"
|
||||||
|
|
||||||
|
def generateOptionsFlags(nChildren, atTop = False):
|
||||||
|
allflags = "smix"
|
||||||
|
pos_flags = sample(allflags, randint(1, len(allflags)))
|
||||||
|
neg_flags = sample(allflags, randint(1, len(allflags)))
|
||||||
|
s = '(?' + ''.join(pos_flags) + '-' + ''.join(neg_flags) + ')'
|
||||||
|
return s
|
||||||
|
|
||||||
|
def generateLogicalId(nChildren, atTop = False):
|
||||||
|
return str(randint(0, options.count))
|
||||||
|
|
||||||
|
def makeLogicalGroup(s):
|
||||||
|
return "(" + s + ")"
|
||||||
|
|
||||||
|
def generateLogicalNot(nChildren, atTop):
|
||||||
|
r = generateCombination(nChildren, [generateLogicalNot], atTop = False)
|
||||||
|
return "!" + makeLogicalGroup(r)
|
||||||
|
|
||||||
|
def generateLogicalAnd(nChildren, atTop):
|
||||||
|
v = [ generateCombination(w, [generateLogicalAnd], atTop = False) for w in chooseLeafWidth(nChildren) ]
|
||||||
|
v = [ r for r in v if r != '' ]
|
||||||
|
s = string.join(v, "&")
|
||||||
|
if len(v) == 1:
|
||||||
|
return s
|
||||||
|
else:
|
||||||
|
return makeLogicalGroup(s)
|
||||||
|
|
||||||
|
def generateLogicalOr(nChildren, atTop):
|
||||||
|
v = [ generateCombination(w, [generateLogicalOr], atTop = False) for w in chooseLeafWidth(nChildren) ]
|
||||||
|
v = [ r for r in v if r != '' ]
|
||||||
|
s = string.join(v, "|")
|
||||||
|
if len(v) == 1:
|
||||||
|
return s
|
||||||
|
else:
|
||||||
|
return makeLogicalGroup(s)
|
||||||
|
|
||||||
|
weightsTree = [
|
||||||
|
(generateConcat, 10),
|
||||||
|
(generateAlt, 3),
|
||||||
|
(generateQuant, 2),
|
||||||
|
]
|
||||||
|
|
||||||
|
weightsLeaf = [
|
||||||
|
(generateChar, 30),
|
||||||
|
(generateCharClass, 5),
|
||||||
|
(generateDot, 5),
|
||||||
|
(generateNocaseChar, 2),
|
||||||
|
(generateBoundary, 1),
|
||||||
|
(generateOptionsFlags, 1)
|
||||||
|
]
|
||||||
|
|
||||||
|
weightsLogicalTree = [
|
||||||
|
(generateLogicalNot, 1),
|
||||||
|
(generateLogicalAnd, 5),
|
||||||
|
(generateLogicalOr, 5),
|
||||||
|
]
|
||||||
|
|
||||||
|
weightsLogicalLeaf = [
|
||||||
|
(generateLogicalId, 1),
|
||||||
|
]
|
||||||
|
|
||||||
|
def genChoices(weighted):
|
||||||
|
r = []
|
||||||
|
for (f, w) in weighted:
|
||||||
|
r = r + [f] * w
|
||||||
|
return r
|
||||||
|
|
||||||
|
choicesTree = genChoices(weightsTree)
|
||||||
|
choicesLeaf = genChoices(weightsLeaf)
|
||||||
|
choicesLogicalTree = genChoices(weightsLogicalTree)
|
||||||
|
choicesLogicalLeaf = genChoices(weightsLogicalLeaf)
|
||||||
|
|
||||||
|
weightsAnchor = [
|
||||||
|
("\\A%s\\Z", 1),
|
||||||
|
("\\A%s\\z", 1),
|
||||||
|
("\\A%s", 4),
|
||||||
|
("%s\\Z", 2),
|
||||||
|
("%s\\z", 2),
|
||||||
|
("^%s$", 1),
|
||||||
|
("^%s", 4),
|
||||||
|
("%s$", 2),
|
||||||
|
("%s", 25)
|
||||||
|
]
|
||||||
|
choicesAnchor = genChoices(weightsAnchor)
|
||||||
|
|
||||||
|
def generateRE(nChildren, suppressList = [], atTop = False):
|
||||||
|
if atTop:
|
||||||
|
anchorSubstituteString = choice(choicesAnchor)
|
||||||
|
else:
|
||||||
|
anchorSubstituteString = "%s"
|
||||||
|
|
||||||
|
nChildren -= 1
|
||||||
|
if nChildren == 0:
|
||||||
|
res = choice(choicesLeaf)(nChildren, atTop)
|
||||||
|
else:
|
||||||
|
c = [ ch for ch in choicesTree if ch not in suppressList ]
|
||||||
|
res = choice(c)(nChildren, atTop)
|
||||||
|
|
||||||
|
return anchorSubstituteString % res
|
||||||
|
|
||||||
|
def generateCombination(nChildren, suppressList = [], atTop = False):
|
||||||
|
nChildren -= 1
|
||||||
|
if nChildren == 0:
|
||||||
|
res = choice(choicesLogicalLeaf)(nChildren, atTop)
|
||||||
|
else:
|
||||||
|
c = [ ch for ch in choicesLogicalTree if ch not in suppressList ]
|
||||||
|
res = choice(c)(nChildren, atTop)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
def generateRandomOptions():
|
||||||
|
if options.hybrid:
|
||||||
|
allflags = "smiH8W"
|
||||||
|
else:
|
||||||
|
# Maintain an ordering for consistency.
|
||||||
|
allflags = "smiHV8WLP"
|
||||||
|
flags = ""
|
||||||
|
for f in allflags:
|
||||||
|
flags += choice(['', f])
|
||||||
|
if options.logical:
|
||||||
|
flags += choice(['', 'Q'])
|
||||||
|
return flags
|
||||||
|
|
||||||
|
def generateRandomExtParam(depth, extparam):
|
||||||
|
if not extparam:
|
||||||
|
return ""
|
||||||
|
params = []
|
||||||
|
if choice((False, True)):
|
||||||
|
params.append("min_length=%u" % randint(1, depth))
|
||||||
|
if choice((False, True)):
|
||||||
|
params.append("min_offset=%u" % randint(1, depth))
|
||||||
|
if choice((False, True)):
|
||||||
|
params.append("max_offset=%u" % randint(1, depth*3))
|
||||||
|
if choice((False, True)):
|
||||||
|
dist = randint(1, 3)
|
||||||
|
if choice((False, True)):
|
||||||
|
params.append("edit_distance=%u" % dist)
|
||||||
|
else:
|
||||||
|
params.append("hamming_distance=%u" % dist)
|
||||||
|
if params:
|
||||||
|
return "{" + ",".join(params) + "}"
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
parser = OptionParser()
|
||||||
|
parser.add_option("-d", "--depth",
|
||||||
|
action="store", type="int", dest="depth", default=200,
|
||||||
|
help="Depth of generation (akin to maximum length)")
|
||||||
|
parser.add_option("-c", "--count",
|
||||||
|
action="store", type="int", dest="count", default=1000,
|
||||||
|
help="Number of expressions to generate")
|
||||||
|
parser.add_option("-a", "--alphabet",
|
||||||
|
action="store", type="int", dest="alphabet", default=26,
|
||||||
|
help="Size of alphabet to generate character expressions over (starting with lowercase 'a')")
|
||||||
|
parser.add_option("-i", "--nocase",
|
||||||
|
action="store_true", dest="nocase",
|
||||||
|
help="Use a caseless alphabet for character generation")
|
||||||
|
parser.add_option("-x", "--extparam",
|
||||||
|
action="store_true", dest="extparam",
|
||||||
|
help="Generate random extended parameters")
|
||||||
|
parser.add_option("-l", "--logical",
|
||||||
|
action="store_true", dest="logical",
|
||||||
|
help="Generate logical combination expressions")
|
||||||
|
parser.add_option("-H", "--hybrid",
|
||||||
|
action="store_true", dest="hybrid",
|
||||||
|
help="Generate random flags for hybrid mode")
|
||||||
|
|
||||||
|
(options, args) = parser.parse_args()
|
||||||
|
if len(args) != 0:
|
||||||
|
parser.error("incorrect number of arguments")
|
||||||
|
|
||||||
|
alphabet = range(ord('a'), ord('a') + options.alphabet)
|
||||||
|
if options.nocase:
|
||||||
|
alphabet += range(ord('A'), ord('A') + options.alphabet)
|
||||||
|
|
||||||
|
for i in xrange(0, options.count):
|
||||||
|
print "%08d:/%s/%s%s" % (i, generateRE(randint(1, options.depth), atTop = True), generateRandomOptions(), generateRandomExtParam(options.depth, options.extparam))
|
||||||
|
|
||||||
|
if options.logical:
|
||||||
|
for i in xrange(options.count, options.count + 3000):
|
||||||
|
print "%08d:/%s/C" % (i, generateCombination(randint(1, options.depth), atTop = True))
|
9
tools/fuzz/limited_dict.txt
Normal file
9
tools/fuzz/limited_dict.txt
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
hatstand
|
||||||
|
teakettle
|
||||||
|
badgerbrush
|
||||||
|
mnemosyne
|
||||||
|
rapscallion
|
||||||
|
acerbic
|
||||||
|
blackhat
|
||||||
|
rufous
|
||||||
|
echolalia
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2019, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -207,7 +207,9 @@ void usage(const char *error) {
|
|||||||
printf(" -P Benchmark using PCRE (if supported).\n");
|
printf(" -P Benchmark using PCRE (if supported).\n");
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAVE_DECL_PTHREAD_SETAFFINITY_NP) || defined(_WIN32)
|
#if defined(HAVE_DECL_PTHREAD_SETAFFINITY_NP) || defined(_WIN32)
|
||||||
printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n");
|
printf(" -T CPU,CPU,... or -T CPU-CPU\n");
|
||||||
|
printf(" Benchmark with threads on specified CPUs or CPU"
|
||||||
|
" range.\n");
|
||||||
#endif
|
#endif
|
||||||
printf(" -i DIR Don't compile, load from files in DIR"
|
printf(" -i DIR Don't compile, load from files in DIR"
|
||||||
" instead.\n");
|
" instead.\n");
|
||||||
@ -354,7 +356,8 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
|||||||
case 'T':
|
case 'T':
|
||||||
if (!strToList(optarg, threadCores)) {
|
if (!strToList(optarg, threadCores)) {
|
||||||
usage("Couldn't parse argument to -T flag, should be"
|
usage("Couldn't parse argument to -T flag, should be"
|
||||||
" a list of positive integers.");
|
" a list of positive integers or 2 integers"
|
||||||
|
" connected with hyphen.");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -216,8 +216,9 @@ def enchunk_pcap(pcapFN, sqliteFN):
|
|||||||
#
|
#
|
||||||
# Read in the contents of the pcap file, adding stream segments as found
|
# Read in the contents of the pcap file, adding stream segments as found
|
||||||
#
|
#
|
||||||
pkt_cnt = 0;
|
pkt_cnt = 0
|
||||||
ip_pkt_cnt = 0;
|
ip_pkt_cnt = 0
|
||||||
|
ip_pkt_off = 0
|
||||||
unsupported_ip_protocol_cnt = 0
|
unsupported_ip_protocol_cnt = 0
|
||||||
pcap_ref = pcap.pcap(pcapFN)
|
pcap_ref = pcap.pcap(pcapFN)
|
||||||
done = False
|
done = False
|
||||||
@ -231,16 +232,24 @@ def enchunk_pcap(pcapFN, sqliteFN):
|
|||||||
pkt_cnt += 1
|
pkt_cnt += 1
|
||||||
|
|
||||||
linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff - 2):pcap_ref.dloff])[0]
|
linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff - 2):pcap_ref.dloff])[0]
|
||||||
if linkLayerType != ETHERTYPE_IP:
|
#
|
||||||
#
|
# We're only interested in IP packets
|
||||||
# We're only interested in IP packets
|
#
|
||||||
#
|
if linkLayerType == ETHERTYPE_VLAN:
|
||||||
|
linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff + 2):(pcap_ref.dloff + 4)])[0]
|
||||||
|
if linkLayerType != ETHERTYPE_IP:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
ip_pkt_off = pcap_ref.dloff + 4
|
||||||
|
elif linkLayerType == ETHERTYPE_IP:
|
||||||
|
ip_pkt_off = pcap_ref.dloff
|
||||||
|
else:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ip_pkt_cnt += 1
|
ip_pkt_cnt += 1
|
||||||
|
|
||||||
ip_pkt_total_len = struct.unpack('!H', packet[pcap_ref.dloff + 2: pcap_ref.dloff + 4])[0]
|
ip_pkt_total_len = struct.unpack('!H', packet[ip_pkt_off + 2: ip_pkt_off + 4])[0]
|
||||||
ip_pkt = packet[pcap_ref.dloff:pcap_ref.dloff + ip_pkt_total_len]
|
ip_pkt = packet[ip_pkt_off:ip_pkt_off + ip_pkt_total_len]
|
||||||
pkt_protocol = struct.unpack('B', ip_pkt[9])[0]
|
pkt_protocol = struct.unpack('B', ip_pkt[9])[0]
|
||||||
|
|
||||||
if (pkt_protocol != IPPROTO_UDP) and (pkt_protocol != IPPROTO_TCP):
|
if (pkt_protocol != IPPROTO_UDP) and (pkt_protocol != IPPROTO_TCP):
|
||||||
|
@ -241,6 +241,13 @@ void addCallout(string &re) {
|
|||||||
re.append("\\E)(?C)");
|
re.append("\\E)(?C)");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool isUtf8(const CompiledPcre &compiled) {
|
||||||
|
unsigned long int options = 0;
|
||||||
|
pcre_fullinfo(compiled.bytecode, NULL, PCRE_INFO_OPTIONS, &options);
|
||||||
|
return options & PCRE_UTF8;
|
||||||
|
}
|
||||||
|
|
||||||
unique_ptr<CompiledPcre>
|
unique_ptr<CompiledPcre>
|
||||||
GroundTruth::compile(unsigned id, bool no_callouts) {
|
GroundTruth::compile(unsigned id, bool no_callouts) {
|
||||||
bool highlander = false;
|
bool highlander = false;
|
||||||
@ -380,6 +387,8 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
|
|||||||
throw PcreCompileFailure(oss.str());
|
throw PcreCompileFailure(oss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
compiled->utf8 |= isUtf8(*compiled);
|
||||||
|
|
||||||
return compiled;
|
return compiled;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -451,13 +460,6 @@ int scanBasic(const CompiledPcre &compiled, const string &buffer,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
|
||||||
bool isUtf8(const CompiledPcre &compiled) {
|
|
||||||
unsigned long int options = 0;
|
|
||||||
pcre_fullinfo(compiled.bytecode, NULL, PCRE_INFO_OPTIONS, &options);
|
|
||||||
return options & PCRE_UTF8;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
static
|
||||||
CaptureVec makeCaptureVec(const vector<int> &ovector, int ret) {
|
CaptureVec makeCaptureVec(const vector<int> &ovector, int ret) {
|
||||||
assert(ret > 0);
|
assert(ret > 0);
|
||||||
|
@ -40,12 +40,12 @@ using namespace std;
|
|||||||
using namespace ue2;
|
using namespace ue2;
|
||||||
|
|
||||||
struct SimpleV {
|
struct SimpleV {
|
||||||
size_t index;
|
size_t index = 0;
|
||||||
string test_v = "SimpleV";
|
string test_v = "SimpleV";
|
||||||
};
|
};
|
||||||
|
|
||||||
struct SimpleE {
|
struct SimpleE {
|
||||||
size_t index;
|
size_t index = 0;
|
||||||
string test_e = "SimpleE";
|
string test_e = "SimpleE";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -146,9 +146,8 @@ bool isIgnorable(const std::string &f) {
|
|||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
void loadExpressions(const string &inPath, ExpressionMap &exprMap) {
|
void loadExpressions(const string &inPath, ExpressionMap &exprMap) {
|
||||||
// Is our input path a file or a directory?
|
// Is our input path a file or a directory?
|
||||||
int fd = open(inPath.c_str(), O_RDONLY);
|
|
||||||
struct stat st;
|
struct stat st;
|
||||||
if (fstat(fd, &st) != 0) {
|
if (stat(inPath.c_str(), &st) != 0) {
|
||||||
cerr << "Can't stat path: '" << inPath << "'" << endl;
|
cerr << "Can't stat path: '" << inPath << "'" << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@ -161,7 +160,7 @@ void loadExpressions(const string &inPath, ExpressionMap &exprMap) {
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
} else if (S_ISDIR(st.st_mode)) {
|
} else if (S_ISDIR(st.st_mode)) {
|
||||||
DIR *d = fdopendir(fd);
|
DIR *d = opendir(inPath.c_str());
|
||||||
if (d == nullptr) {
|
if (d == nullptr) {
|
||||||
cerr << "Can't open directory: '" << inPath << "'" << endl;
|
cerr << "Can't open directory: '" << inPath << "'" << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
@ -192,10 +191,11 @@ void loadExpressions(const string &inPath, ExpressionMap &exprMap) {
|
|||||||
}
|
}
|
||||||
(void)closedir(d);
|
(void)closedir(d);
|
||||||
} else {
|
} else {
|
||||||
cerr << "Can't stat path: '" << inPath << "'" << endl;
|
cerr << "Unsupported file type "
|
||||||
|
<< hex << showbase << (st.st_mode & S_IFMT)
|
||||||
|
<< " for path: '" << inPath << "'" << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
(void)close(fd);
|
|
||||||
}
|
}
|
||||||
#else // windows TODO: improve
|
#else // windows TODO: improve
|
||||||
void HS_CDECL loadExpressions(const string &inPath, ExpressionMap &exprMap) {
|
void HS_CDECL loadExpressions(const string &inPath, ExpressionMap &exprMap) {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -54,8 +54,8 @@ inline bool fromString(const std::string &s, T& val)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// read in a comma-separated set of values: very simple impl, not for
|
// read in a comma-separated or hyphen-connected set of values: very simple
|
||||||
// external consumption
|
// impl, not for external consumption
|
||||||
template<typename T>
|
template<typename T>
|
||||||
inline bool strToList(const std::string &s, std::vector<T>& out)
|
inline bool strToList(const std::string &s, std::vector<T>& out)
|
||||||
{
|
{
|
||||||
@ -68,7 +68,17 @@ inline bool strToList(const std::string &s, std::vector<T>& out)
|
|||||||
}
|
}
|
||||||
|
|
||||||
out.push_back(val);
|
out.push_back(val);
|
||||||
} while (i.get(c) && c == ',');
|
|
||||||
|
i.get(c);
|
||||||
|
if (c == '-') {
|
||||||
|
T val_end;
|
||||||
|
i >> val_end;
|
||||||
|
while (val < val_end) {
|
||||||
|
out.push_back(++val);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (c == ',');
|
||||||
|
|
||||||
return !out.empty();
|
return !out.empty();
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user