Merge branch 'github_develop' into github_master

This commit is contained in:
Hong, Yang A 2020-05-25 14:30:57 +00:00
commit c00683d739
31 changed files with 1453 additions and 114 deletions

View File

@ -2,6 +2,29 @@
This is a list of notable changes to Hyperscan, in reverse chronological order. This is a list of notable changes to Hyperscan, in reverse chronological order.
## [5.3.0] 2020-05-15
- Improvement on literal matcher "Teddy" performance, including support for
Intel(R) AVX-512 Vector Byte Manipulation Instructions (Intel(R) AVX-512
VBMI).
- Improvement on single-byte/two-byte matching performance, including support
for Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512).
- hsbench: add hyphen support for -T option.
- tools/fuzz: add test scripts for synthetic pattern generation.
- Bugfix for acceleration path analysis in LimEx NFA.
- Bugfix for duplicate matches for Small-write engine.
- Bugfix for UTF8 checking problem for hscollider.
- Bugfix for issue #205: avoid crash of `hs_compile_lit_multi()` with clang and
ASAN.
- Bugfix for issue #211: fix error in `db_check_platform()` function.
- Bugfix for issue #217: fix cmake parsing issue of CPU arch for non-English
locale.
- Bugfix for issue #228: avoid undefined behavior when calling `close()` after
`fdopendir()` in `loadExpressions()`.
- Bugfix for issue #239: fix hyperscan compile issue under gcc-10.
- Add VLAN packets processing capability in pcap analysis script. (#214)
- Avoid extra convert instruction for "Noodle". (#221)
- Add Hyperscan version marcro in `hs.h`. (#222)
## [5.2.1] 2019-10-13 ## [5.2.1] 2019-10-13
- Bugfix for issue #186: fix compile issue when `BUILD_SHARED_LIBS` is on in - Bugfix for issue #186: fix compile issue when `BUILD_SHARED_LIBS` is on in
release mode. release mode.

View File

@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11)
project (hyperscan C CXX) project (hyperscan C CXX)
set (HS_MAJOR_VERSION 5) set (HS_MAJOR_VERSION 5)
set (HS_MINOR_VERSION 2) set (HS_MINOR_VERSION 3)
set (HS_PATCH_VERSION 1) set (HS_PATCH_VERSION 0)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
@ -187,9 +187,9 @@ else()
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native) set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_VARIABLE _GCC_OUTPUT) OUTPUT_VARIABLE _GCC_OUTPUT)
string(FIND "${_GCC_OUTPUT}" "Known" POS) string(FIND "${_GCC_OUTPUT}" "march" POS)
string(SUBSTRING "${_GCC_OUTPUT}" 0 ${POS} _GCC_OUTPUT) string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1" string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1"
GNUCC_ARCH "${_GCC_OUTPUT}") GNUCC_ARCH "${_GCC_OUTPUT}")
# test the parsed flag # test the parsed flag
@ -326,7 +326,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
set (FAT_RUNTIME_REQUISITES TRUE) set (FAT_RUNTIME_REQUISITES TRUE)
endif() endif()
endif() endif()
CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitecures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF) CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitectures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF)
endif () endif ()
include (${CMAKE_MODULE_PATH}/arch.cmake) include (${CMAKE_MODULE_PATH}/arch.cmake)
@ -340,7 +340,7 @@ if (NOT WIN32)
set(C_FLAGS_TO_CHECK set(C_FLAGS_TO_CHECK
# Variable length arrays are way bad, most especially at run time # Variable length arrays are way bad, most especially at run time
"-Wvla" "-Wvla"
# Pointer arith on void pointers is doing it wong. # Pointer arith on void pointers is doing it wrong.
"-Wpointer-arith" "-Wpointer-arith"
# Build our C code with -Wstrict-prototypes -Wmissing-prototypes # Build our C code with -Wstrict-prototypes -Wmissing-prototypes
"-Wstrict-prototypes" "-Wstrict-prototypes"
@ -383,7 +383,7 @@ if (CC_PAREN_EQUALITY)
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-parentheses-equality") set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-parentheses-equality")
endif() endif()
# clang compains about unused const vars in our Ragel-generated code. # clang complains about unused const vars in our Ragel-generated code.
CHECK_CXX_COMPILER_FLAG("-Wunused-const-variable" CXX_UNUSED_CONST_VAR) CHECK_CXX_COMPILER_FLAG("-Wunused-const-variable" CXX_UNUSED_CONST_VAR)
if (CXX_UNUSED_CONST_VAR) if (CXX_UNUSED_CONST_VAR)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
@ -418,6 +418,12 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-local-typedefs" CXX_UNUSED_LOCAL_TYPEDEFS)
# gcc5 complains about this # gcc5 complains about this
CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE) CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
# gcc 10 complains about this
CHECK_C_COMPILER_FLAG("-Wstringop-overflow" CC_STRINGOP_OVERFLOW)
if(CC_STRINGOP_OVERFLOW)
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-stringop-overflow")
endif()
endif() endif()
include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) include_directories(SYSTEM ${Boost_INCLUDE_DIRS})

View File

@ -58,6 +58,18 @@ int main(){
(void)_mm512_abs_epi8(z); (void)_mm512_abs_epi8(z);
}" HAVE_AVX512) }" HAVE_AVX512)
# and now for AVX512VBMI
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
#if !defined(__AVX512VBMI__)
#error no avx512vbmi
#endif
int main(){
__m512i a = _mm512_set1_epi8(0xFF);
__m512i idx = _mm512_set_epi64(3ULL, 2ULL, 1ULL, 0ULL, 7ULL, 6ULL, 5ULL, 4ULL);
(void)_mm512_permutexvar_epi8(idx, a);
}" HAVE_AVX512VBMI)
if (FAT_RUNTIME) if (FAT_RUNTIME)
if (NOT HAVE_SSSE3) if (NOT HAVE_SSSE3)
message(FATAL_ERROR "SSSE3 support required to build fat runtime") message(FATAL_ERROR "SSSE3 support required to build fat runtime")

View File

@ -55,7 +55,7 @@ Hyperscan provides support for targeting a database at a particular CPU
platform; see :ref:`instr_specialization` for details. platform; see :ref:`instr_specialization` for details.
===================== =====================
Compile Pure Literals Compile Pure Literals
===================== =====================
Pure literal is a special case of regular expression. A character sequence is Pure literal is a special case of regular expression. A character sequence is
@ -75,12 +75,12 @@ characters exist in regular grammer like ``[``, ``]``, ``(``, ``)``, ``{``,
While in pure literal case, all these meta characters lost extra meanings While in pure literal case, all these meta characters lost extra meanings
expect for that they are just common ASCII codes. expect for that they are just common ASCII codes.
Hyperscan is initially designed to process common regualr expressions. It is Hyperscan is initially designed to process common regular expressions. It is
hence embedded with a complex parser to do comprehensive regular grammer hence embedded with a complex parser to do comprehensive regular grammer
interpretion. Particularly, the identification of above meta characters is the interpretion. Particularly, the identification of above meta characters is the
basic step for the interpretion of far more complex regular grammers. basic step for the interpretion of far more complex regular grammers.
However in real cases, patterns may not always be regualr expressions. They However in real cases, patterns may not always be regular expressions. They
could just be pure literals. Problem will come if the pure literals contain could just be pure literals. Problem will come if the pure literals contain
regular meta characters. Supposing fed directly into traditional Hyperscan regular meta characters. Supposing fed directly into traditional Hyperscan
compile API, all these meta characters will be interpreted in predefined ways, compile API, all these meta characters will be interpreted in predefined ways,
@ -98,7 +98,7 @@ In ``v5.2.0``, Hyperscan introduces 2 new compile APIs for pure literal patterns
#. :c:func:`hs_compile_lit_multi`: compiles an array of pure literals into a #. :c:func:`hs_compile_lit_multi`: compiles an array of pure literals into a
pattern database. All of the supplied patterns will be scanned for pattern database. All of the supplied patterns will be scanned for
concurrently at scan time, with user-supplied identifiers returned when they concurrently at scan time, with user-supplied identifiers returned when they
match. match.
These 2 APIs are designed for use cases where all patterns contained in the These 2 APIs are designed for use cases where all patterns contained in the
target rule set are pure literals. Users can pass the initial pure literal target rule set are pure literals. Users can pass the initial pure literal
@ -110,8 +110,8 @@ Hyperscan needs to locate the end position of the input expression via clearly
knowing each literal's length, not by simply identifying character ``\0`` of a knowing each literal's length, not by simply identifying character ``\0`` of a
string. string.
Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_MULTILINE`, Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_SINGLEMATCH`,
:c:member:`HS_FLAG_SINGLEMATCH`, :c:member:`HS_FLAG_SOM_LEFTMOST`. :c:member:`HS_FLAG_SOM_LEFTMOST`.
.. note:: We don't support literal compilation API with :ref:`extparam`. And .. note:: We don't support literal compilation API with :ref:`extparam`. And
for runtime implementation, traditional runtime APIs can still be for runtime implementation, traditional runtime APIs can still be

View File

@ -260,7 +260,7 @@ instead of potentially executing illegal instructions. The API function
:c:func:`hs_valid_platform` can be used by application writers to determine if :c:func:`hs_valid_platform` can be used by application writers to determine if
the current platform is supported by Hyperscan. the current platform is supported by Hyperscan.
At of this release, the variants of the runtime that are built, and the CPU As of this release, the variants of the runtime that are built, and the CPU
capability that is required, are the following: capability that is required, are the following:
+----------+-------------------------------+---------------------------+ +----------+-------------------------------+---------------------------+

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2019, Intel Corporation * Copyright (c) 2015-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -125,7 +125,7 @@ ParsedLitExpression::ParsedLitExpression(unsigned index_in,
: expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false, : expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false,
SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) { SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) {
// For pure literal expression, below 'HS_FLAG_'s are unuseful: // For pure literal expression, below 'HS_FLAG_'s are unuseful:
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET // DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET/MULTILINE
if (flags & ~HS_FLAG_ALL) { if (flags & ~HS_FLAG_ALL) {
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags); DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
@ -402,19 +402,18 @@ void addLitExpression(NG &ng, unsigned index, const char *expression,
} }
// Ensure that our pattern isn't too long (in characters). // Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) { if (expLength > cc.grey.limitPatternLength) {
throw CompileError("Pattern length exceeds limit."); throw CompileError("Pattern length exceeds limit.");
} }
// filter out flags not supported by pure literal API. // filter out flags not supported by pure literal API.
u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 | u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION | HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION |
HS_FLAG_QUIET; HS_FLAG_QUIET | HS_FLAG_MULTILINE;
if (flags & not_supported) { if (flags & not_supported) {
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, " throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_SINGLEMATCH and "
"HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are " "HS_FLAG_SOM_LEFTMOST are supported in literal API.");
"supported in literal API.");
} }
// This expression must be a pure literal, we can build ue2_literal // This expression must be a pure literal, we can build ue2_literal

View File

@ -114,8 +114,8 @@ hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes,
static static
hs_error_t db_check_platform(const u64a p) { hs_error_t db_check_platform(const u64a p) {
if (p != hs_current_platform if (p != hs_current_platform
&& p != hs_current_platform_no_avx2 && p != (hs_current_platform | hs_current_platform_no_avx2)
&& p != hs_current_platform_no_avx512) { && p != (hs_current_platform | hs_current_platform_no_avx512)) {
return HS_DB_PLATFORM_ERROR; return HS_DB_PLATFORM_ERROR;
} }
// passed all checks // passed all checks

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -74,6 +74,30 @@ const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
}; };
#if defined(HAVE_AVX512VBMI) // VBMI strong teddy
#define CONF_CHUNK_64(chunk, bucket, off, reason, pt, conf_fn) \
do { \
if (unlikely(chunk != ones_u64a)) { \
chunk = ~chunk; \
conf_fn(&chunk, bucket, off, confBase, reason, a, pt, \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
} while(0)
#define CONF_CHUNK_32(chunk, bucket, off, reason, pt, conf_fn) \
do { \
if (unlikely(chunk != ones_u32)) { \
chunk = ~chunk; \
conf_fn(&chunk, bucket, off, confBase, reason, a, pt, \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
} while(0)
#else
#define CONF_CHUNK_64(chunk, bucket, off, reason, conf_fn) \ #define CONF_CHUNK_64(chunk, bucket, off, reason, conf_fn) \
do { \ do { \
if (unlikely(chunk != ones_u64a)) { \ if (unlikely(chunk != ones_u64a)) { \
@ -94,7 +118,284 @@ do { \
} \ } \
} while(0) } while(0)
#if defined(HAVE_AVX512) // AVX512 reinforced teddy #endif
#if defined(HAVE_AVX512VBMI) // VBMI strong teddy
#ifdef ARCH_64_BIT
#define CONFIRM_TEDDY(var, bucket, offset, reason, pt, conf_fn) \
do { \
if (unlikely(diff512(var, ones512()))) { \
m128 p128_0 = extract128from512(var, 0); \
m128 p128_1 = extract128from512(var, 1); \
m128 p128_2 = extract128from512(var, 2); \
m128 p128_3 = extract128from512(var, 3); \
u64a part1 = movq(p128_0); \
u64a part2 = movq(rshiftbyte_m128(p128_0, 8)); \
u64a part3 = movq(p128_1); \
u64a part4 = movq(rshiftbyte_m128(p128_1, 8)); \
u64a part5 = movq(p128_2); \
u64a part6 = movq(rshiftbyte_m128(p128_2, 8)); \
u64a part7 = movq(p128_3); \
u64a part8 = movq(rshiftbyte_m128(p128_3, 8)); \
CONF_CHUNK_64(part1, bucket, offset, reason, pt, conf_fn); \
CONF_CHUNK_64(part2, bucket, offset + 8, reason, pt, conf_fn); \
CONF_CHUNK_64(part3, bucket, offset + 16, reason, pt, conf_fn); \
CONF_CHUNK_64(part4, bucket, offset + 24, reason, pt, conf_fn); \
CONF_CHUNK_64(part5, bucket, offset + 32, reason, pt, conf_fn); \
CONF_CHUNK_64(part6, bucket, offset + 40, reason, pt, conf_fn); \
CONF_CHUNK_64(part7, bucket, offset + 48, reason, pt, conf_fn); \
CONF_CHUNK_64(part8, bucket, offset + 56, reason, pt, conf_fn); \
} \
} while(0)
#else
#define CONFIRM_TEDDY(var, bucket, offset, reason, pt, conf_fn) \
do { \
if (unlikely(diff512(var, ones512()))) { \
m128 p128_0 = extract128from512(var, 0); \
m128 p128_1 = extract128from512(var, 1); \
m128 p128_2 = extract128from512(var, 2); \
m128 p128_3 = extract128from512(var, 3); \
u32 part1 = movd(p128_0); \
u32 part2 = movd(rshiftbyte_m128(p128_0, 4)); \
u32 part3 = movd(rshiftbyte_m128(p128_0, 8)); \
u32 part4 = movd(rshiftbyte_m128(p128_0, 12)); \
u32 part5 = movd(p128_1); \
u32 part6 = movd(rshiftbyte_m128(p128_1, 4)); \
u32 part7 = movd(rshiftbyte_m128(p128_1, 8)); \
u32 part8 = movd(rshiftbyte_m128(p128_1, 12)); \
u32 part9 = movd(p128_2); \
u32 part10 = movd(rshiftbyte_m128(p128_2, 4)); \
u32 part11 = movd(rshiftbyte_m128(p128_2, 8)); \
u32 part12 = movd(rshiftbyte_m128(p128_2, 12)); \
u32 part13 = movd(p128_3); \
u32 part14 = movd(rshiftbyte_m128(p128_3, 4)); \
u32 part15 = movd(rshiftbyte_m128(p128_3, 8)); \
u32 part16 = movd(rshiftbyte_m128(p128_3, 12)); \
CONF_CHUNK_32(part1, bucket, offset, reason, pt, conf_fn); \
CONF_CHUNK_32(part2, bucket, offset + 4, reason, pt, conf_fn); \
CONF_CHUNK_32(part3, bucket, offset + 8, reason, pt, conf_fn); \
CONF_CHUNK_32(part4, bucket, offset + 12, reason, pt, conf_fn); \
CONF_CHUNK_32(part5, bucket, offset + 16, reason, pt, conf_fn); \
CONF_CHUNK_32(part6, bucket, offset + 20, reason, pt, conf_fn); \
CONF_CHUNK_32(part7, bucket, offset + 24, reason, pt, conf_fn); \
CONF_CHUNK_32(part8, bucket, offset + 28, reason, pt, conf_fn); \
CONF_CHUNK_32(part9, bucket, offset + 32, reason, pt, conf_fn); \
CONF_CHUNK_32(part10, bucket, offset + 36, reason, pt, conf_fn); \
CONF_CHUNK_32(part11, bucket, offset + 40, reason, pt, conf_fn); \
CONF_CHUNK_32(part12, bucket, offset + 44, reason, pt, conf_fn); \
CONF_CHUNK_32(part13, bucket, offset + 48, reason, pt, conf_fn); \
CONF_CHUNK_32(part14, bucket, offset + 52, reason, pt, conf_fn); \
CONF_CHUNK_32(part15, bucket, offset + 56, reason, pt, conf_fn); \
CONF_CHUNK_32(part16, bucket, offset + 60, reason, pt, conf_fn); \
} \
} while(0)
#endif
#define PREP_SHUF_MASK \
m512 lo = and512(val, *lo_mask); \
m512 hi = and512(rshift64_m512(val, 4), *lo_mask)
#define TEDDY_VBMI_PSHUFB_OR_M1 \
m512 shuf_or_b0 = or512(pshufb_m512(dup_mask[0], lo), \
pshufb_m512(dup_mask[1], hi));
#define TEDDY_VBMI_PSHUFB_OR_M2 \
TEDDY_VBMI_PSHUFB_OR_M1 \
m512 shuf_or_b1 = or512(pshufb_m512(dup_mask[2], lo), \
pshufb_m512(dup_mask[3], hi));
#define TEDDY_VBMI_PSHUFB_OR_M3 \
TEDDY_VBMI_PSHUFB_OR_M2 \
m512 shuf_or_b2 = or512(pshufb_m512(dup_mask[4], lo), \
pshufb_m512(dup_mask[5], hi));
#define TEDDY_VBMI_PSHUFB_OR_M4 \
TEDDY_VBMI_PSHUFB_OR_M3 \
m512 shuf_or_b3 = or512(pshufb_m512(dup_mask[6], lo), \
pshufb_m512(dup_mask[7], hi));
#define TEDDY_VBMI_SL1_MASK 0xfffffffffffffffeULL
#define TEDDY_VBMI_SL2_MASK 0xfffffffffffffffcULL
#define TEDDY_VBMI_SL3_MASK 0xfffffffffffffff8ULL
#define TEDDY_VBMI_SHIFT_M1
#define TEDDY_VBMI_SHIFT_M2 \
TEDDY_VBMI_SHIFT_M1 \
m512 sl1 = maskz_vpermb512(TEDDY_VBMI_SL1_MASK, sl_msk[0], shuf_or_b1);
#define TEDDY_VBMI_SHIFT_M3 \
TEDDY_VBMI_SHIFT_M2 \
m512 sl2 = maskz_vpermb512(TEDDY_VBMI_SL2_MASK, sl_msk[1], shuf_or_b2);
#define TEDDY_VBMI_SHIFT_M4 \
TEDDY_VBMI_SHIFT_M3 \
m512 sl3 = maskz_vpermb512(TEDDY_VBMI_SL3_MASK, sl_msk[2], shuf_or_b3);
#define SHIFT_OR_M1 \
shuf_or_b0
#define SHIFT_OR_M2 \
or512(sl1, SHIFT_OR_M1)
#define SHIFT_OR_M3 \
or512(sl2, SHIFT_OR_M2)
#define SHIFT_OR_M4 \
or512(sl3, SHIFT_OR_M3)
static really_inline
m512 prep_conf_teddy_m1(const m512 *lo_mask, const m512 *dup_mask,
UNUSED const m512 *sl_msk, const m512 val) {
PREP_SHUF_MASK;
TEDDY_VBMI_PSHUFB_OR_M1;
TEDDY_VBMI_SHIFT_M1;
return SHIFT_OR_M1;
}
static really_inline
m512 prep_conf_teddy_m2(const m512 *lo_mask, const m512 *dup_mask,
const m512 *sl_msk, const m512 val) {
PREP_SHUF_MASK;
TEDDY_VBMI_PSHUFB_OR_M2;
TEDDY_VBMI_SHIFT_M2;
return SHIFT_OR_M2;
}
static really_inline
m512 prep_conf_teddy_m3(const m512 *lo_mask, const m512 *dup_mask,
const m512 *sl_msk, const m512 val) {
PREP_SHUF_MASK;
TEDDY_VBMI_PSHUFB_OR_M3;
TEDDY_VBMI_SHIFT_M3;
return SHIFT_OR_M3;
}
static really_inline
m512 prep_conf_teddy_m4(const m512 *lo_mask, const m512 *dup_mask,
const m512 *sl_msk, const m512 val) {
PREP_SHUF_MASK;
TEDDY_VBMI_PSHUFB_OR_M4;
TEDDY_VBMI_SHIFT_M4;
return SHIFT_OR_M4;
}
#define PREP_CONF_FN(val, n) \
prep_conf_teddy_m##n(&lo_mask, dup_mask, sl_msk, val)
const u8 ALIGN_DIRECTIVE p_sh_mask_arr[80] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f
};
#define TEDDY_VBMI_SL1_POS 15
#define TEDDY_VBMI_SL2_POS 14
#define TEDDY_VBMI_SL3_POS 13
#define TEDDY_VBMI_LOAD_SHIFT_MASK_M1
#define TEDDY_VBMI_LOAD_SHIFT_MASK_M2 \
TEDDY_VBMI_LOAD_SHIFT_MASK_M1 \
sl_msk[0] = loadu512(p_sh_mask_arr + TEDDY_VBMI_SL1_POS);
#define TEDDY_VBMI_LOAD_SHIFT_MASK_M3 \
TEDDY_VBMI_LOAD_SHIFT_MASK_M2 \
sl_msk[1] = loadu512(p_sh_mask_arr + TEDDY_VBMI_SL2_POS);
#define TEDDY_VBMI_LOAD_SHIFT_MASK_M4 \
TEDDY_VBMI_LOAD_SHIFT_MASK_M3 \
sl_msk[2] = loadu512(p_sh_mask_arr + TEDDY_VBMI_SL3_POS);
#define PREPARE_MASKS_1 \
dup_mask[0] = set4x128(maskBase[0]); \
dup_mask[1] = set4x128(maskBase[1]);
#define PREPARE_MASKS_2 \
PREPARE_MASKS_1 \
dup_mask[2] = set4x128(maskBase[2]); \
dup_mask[3] = set4x128(maskBase[3]);
#define PREPARE_MASKS_3 \
PREPARE_MASKS_2 \
dup_mask[4] = set4x128(maskBase[4]); \
dup_mask[5] = set4x128(maskBase[5]);
#define PREPARE_MASKS_4 \
PREPARE_MASKS_3 \
dup_mask[6] = set4x128(maskBase[6]); \
dup_mask[7] = set4x128(maskBase[7]);
#define PREPARE_MASKS(n) \
m512 lo_mask = set64x8(0xf); \
m512 dup_mask[n * 2]; \
m512 sl_msk[n - 1]; \
PREPARE_MASKS_##n \
TEDDY_VBMI_LOAD_SHIFT_MASK_M##n
#define TEDDY_VBMI_CONF_MASK_HEAD (0xffffffffffffffffULL >> n_sh)
#define TEDDY_VBMI_CONF_MASK_FULL (0xffffffffffffffffULL << n_sh)
#define TEDDY_VBMI_CONF_MASK_VAR(n) (0xffffffffffffffffULL >> (64 - n) << overlap)
#define TEDDY_VBMI_LOAD_MASK_PATCH (0xffffffffffffffffULL >> (64 - n_sh))
#define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn) \
do { \
const u8 *buf_end = a->buf + a->len; \
const u8 *ptr = a->buf + a->start_offset; \
u32 floodBackoff = FLOOD_BACKOFF_START; \
const u8 *tryFloodDetect = a->firstFloodDetect; \
u32 last_match = ones_u32; \
const struct Teddy *teddy = (const struct Teddy *)fdr; \
const size_t iterBytes = 64; \
u32 n_sh = n_msk - 1; \
const size_t loopBytes = 64 - n_sh; \
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \
a->buf, a->len, a->start_offset); \
\
const m128 *maskBase = getMaskBase(teddy); \
PREPARE_MASKS(n_msk); \
const u32 *confBase = getConfBase(teddy); \
\
u64a k = TEDDY_VBMI_CONF_MASK_FULL; \
m512 p_mask = set_mask_m512(~k); \
u32 overlap = 0; \
u64a patch = 0; \
if (likely(ptr + loopBytes <= buf_end)) { \
m512 p_mask0 = set_mask_m512(~TEDDY_VBMI_CONF_MASK_HEAD); \
m512 r_0 = PREP_CONF_FN(loadu512(ptr), n_msk); \
r_0 = or512(r_0, p_mask0); \
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, ptr, conf_fn); \
ptr += loopBytes; \
overlap = n_sh; \
patch = TEDDY_VBMI_LOAD_MASK_PATCH; \
} \
\
for (; ptr + loopBytes <= buf_end; ptr += loopBytes) { \
__builtin_prefetch(ptr - n_sh + (64 * 2)); \
CHECK_FLOOD; \
m512 r_0 = PREP_CONF_FN(loadu512(ptr - n_sh), n_msk); \
r_0 = or512(r_0, p_mask); \
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, ptr - n_sh, conf_fn); \
} \
\
assert(ptr + loopBytes > buf_end); \
if (ptr < buf_end) { \
u32 left = (u32)(buf_end - ptr); \
u64a k1 = TEDDY_VBMI_CONF_MASK_VAR(left); \
m512 p_mask1 = set_mask_m512(~k1); \
m512 val_0 = loadu_maskz_m512(k1 | patch, ptr - overlap); \
m512 r_0 = PREP_CONF_FN(val_0, n_msk); \
r_0 = or512(r_0, p_mask1); \
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, ptr - overlap, conf_fn); \
} \
\
return HWLM_SUCCESS; \
} while(0)
#elif defined(HAVE_AVX512) // AVX512 reinforced teddy
#ifdef ARCH_64_BIT #ifdef ARCH_64_BIT
#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ #define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-2017, Intel Corporation * Copyright (c) 2016-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -134,7 +134,7 @@ const m256 *getMaskBase_fat(const struct Teddy *teddy) {
return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))); return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)));
} }
#if defined(HAVE_AVX512) #if defined(HAVE_AVX512_REVERT) // revert to AVX2 Fat Teddy
static really_inline static really_inline
const u64a *getReinforcedMaskBase_fat(const struct Teddy *teddy, u8 numMask) { const u64a *getReinforcedMaskBase_fat(const struct Teddy *teddy, u8 numMask) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-2017, Intel Corporation * Copyright (c) 2016-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -383,12 +383,16 @@ m512 vectoredLoad512(m512 *p_mask, const u8 *ptr, const size_t start_offset,
static really_inline static really_inline
u64a getConfVal(const struct FDR_Runtime_Args *a, const u8 *ptr, u32 byte, u64a getConfVal(const struct FDR_Runtime_Args *a, const u8 *ptr, u32 byte,
CautionReason reason) { UNUSED CautionReason reason) {
u64a confVal = 0; u64a confVal = 0;
const u8 *buf = a->buf; const u8 *buf = a->buf;
size_t len = a->len; size_t len = a->len;
const u8 *confirm_loc = ptr + byte - 7; const u8 *confirm_loc = ptr + byte - 7;
#if defined(HAVE_AVX512VBMI)
if (likely(confirm_loc >= buf)) {
#else
if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) { if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) {
#endif
confVal = lv_u64a(confirm_loc, buf, buf + len); confVal = lv_u64a(confirm_loc, buf, buf + len);
} else { // r == VECTORING, confirm_loc < buf } else { // r == VECTORING, confirm_loc < buf
u64a histBytes = a->histBytes; u64a histBytes = a->histBytes;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -39,6 +39,12 @@
* the individual component headers for documentation. * the individual component headers for documentation.
*/ */
/* The current Hyperscan version information. */
#define HS_MAJOR 5
#define HS_MINOR 3
#define HS_PATCH 0
#include "hs_compile.h" #include "hs_compile.h"
#include "hs_runtime.h" #include "hs_runtime.h"

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2019, Intel Corporation * Copyright (c) 2015-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -563,7 +563,6 @@ hs_error_t HS_CDECL hs_compile_ext_multi(const char *const *expressions,
* be used by ORing them together. Compared to @ref hs_compile(), fewer * be used by ORing them together. Compared to @ref hs_compile(), fewer
* valid values are provided: * valid values are provided:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively. * - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the * - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
* expression per stream. * expression per stream.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
@ -637,7 +636,6 @@ hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags,
* in place of an array will set the flags value for all patterns to zero. * in place of an array will set the flags value for all patterns to zero.
* Compared to @ref hs_compile_multi(), fewer valid values are provided: * Compared to @ref hs_compile_multi(), fewer valid values are provided:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively. * - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the * - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
* expression per stream. * expression per stream.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
@ -985,8 +983,8 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
* offset when a match is reported for this expression. (By default, no start * offset when a match is reported for this expression. (By default, no start
* of match is returned.) * of match is returned.)
* *
* Enabling this behaviour may reduce performance and increase stream state * For all the 3 modes, enabling this behaviour may reduce performance. And
* requirements in streaming mode. * particularly, it may increase stream state requirements in streaming mode.
*/ */
#define HS_FLAG_SOM_LEFTMOST 256 #define HS_FLAG_SOM_LEFTMOST 256

View File

@ -210,7 +210,7 @@ hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf,
const u8 *d = buf + start, *e = buf + end; const u8 *d = buf + start, *e = buf + end;
DEBUG_PRINTF("start %zu end %zu \n", start, end); DEBUG_PRINTF("start %zu end %zu \n", start, end);
assert(d < e); assert(d < e);
u8 lastz0 = 0; u32 lastz0 = 0;
for (; d < e; d += 32) { for (; d < e; d += 32) {
m256 v = noCase ? and256(load256(d), caseMask) : load256(d); m256 v = noCase ? and256(load256(d), caseMask) : load256(d);

View File

@ -214,7 +214,7 @@ static
bool double_byte_ok(const AccelScheme &info) { bool double_byte_ok(const AccelScheme &info) {
return !info.double_byte.empty() && return !info.double_byte.empty() &&
info.double_cr.count() < info.double_byte.size() && info.double_cr.count() < info.double_byte.size() &&
info.double_cr.count() <= 2 && !info.double_byte.empty(); info.double_cr.count() <= 2;
} }
static static

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -46,7 +46,20 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf,
nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end); assert(buf < buf_end);
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
// Handle small scans. // Handle small scans.
#ifdef HAVE_AVX512
if (buf_end - buf <= VERM_BOUNDARY) {
const u8 *ptr = nocase
? vermMiniNocase(chars, buf, buf_end, 0)
: vermMini(chars, buf, buf_end, 0);
if (ptr) {
return ptr;
}
return buf_end;
}
#else
if (buf_end - buf < VERM_BOUNDARY) { if (buf_end - buf < VERM_BOUNDARY) {
for (; buf < buf_end; buf++) { for (; buf < buf_end; buf++) {
char cur = (char)*buf; char cur = (char)*buf;
@ -59,8 +72,8 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf,
} }
return buf; return buf;
} }
#endif
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
if (min) { if (min) {
// Input isn't aligned, so we need to run one iteration with an // Input isn't aligned, so we need to run one iteration with an
@ -99,7 +112,20 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf,
nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end); assert(buf < buf_end);
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
// Handle small scans. // Handle small scans.
#ifdef HAVE_AVX512
if (buf_end - buf <= VERM_BOUNDARY) {
const u8 *ptr = nocase
? vermMiniNocase(chars, buf, buf_end, 1)
: vermMini(chars, buf, buf_end, 1);
if (ptr) {
return ptr;
}
return buf_end;
}
#else
if (buf_end - buf < VERM_BOUNDARY) { if (buf_end - buf < VERM_BOUNDARY) {
for (; buf < buf_end; buf++) { for (; buf < buf_end; buf++) {
char cur = (char)*buf; char cur = (char)*buf;
@ -112,8 +138,8 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf,
} }
return buf; return buf;
} }
#endif
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
size_t min = (size_t)buf % VERM_BOUNDARY; size_t min = (size_t)buf % VERM_BOUNDARY;
if (min) { if (min) {
// Input isn't aligned, so we need to run one iteration with an // Input isn't aligned, so we need to run one iteration with an
@ -149,12 +175,32 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
DEBUG_PRINTF("double verm scan %s\\x%02hhx%02hhx over %zu bytes\n", DEBUG_PRINTF("double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf)); nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
assert(buf < buf_end); assert(buf < buf_end);
assert((buf_end - buf) >= VERM_BOUNDARY);
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */ VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */ VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
#ifdef HAVE_AVX512
if (buf_end - buf <= VERM_BOUNDARY) {
const u8 *ptr = nocase
? dvermMiniNocase(chars1, chars2, buf, buf_end)
: dvermMini(chars1, chars2, buf, buf_end);
if (ptr) {
return ptr;
}
/* check for partial match at end */
u8 mask = nocase ? CASE_CLEAR : 0xff;
if ((buf_end[-1] & mask) == (u8)c1) {
DEBUG_PRINTF("partial!!!\n");
return buf_end - 1;
}
return buf_end;
}
#endif
assert((buf_end - buf) >= VERM_BOUNDARY);
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
if (min) { if (min) {
// Input isn't aligned, so we need to run one iteration with an // Input isn't aligned, so we need to run one iteration with an
// unaligned load, then skip buf forward to the next aligned address. // unaligned load, then skip buf forward to the next aligned address.
@ -205,14 +251,32 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
DEBUG_PRINTF("double verm scan (\\x%02hhx&\\x%02hhx)(\\x%02hhx&\\x%02hhx) " DEBUG_PRINTF("double verm scan (\\x%02hhx&\\x%02hhx)(\\x%02hhx&\\x%02hhx) "
"over %zu bytes\n", c1, m1, c2, m2, (size_t)(buf_end - buf)); "over %zu bytes\n", c1, m1, c2, m2, (size_t)(buf_end - buf));
assert(buf < buf_end); assert(buf < buf_end);
assert((buf_end - buf) >= VERM_BOUNDARY);
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
VERM_TYPE chars1 = VERM_SET_FN(c1); VERM_TYPE chars1 = VERM_SET_FN(c1);
VERM_TYPE chars2 = VERM_SET_FN(c2); VERM_TYPE chars2 = VERM_SET_FN(c2);
VERM_TYPE mask1 = VERM_SET_FN(m1); VERM_TYPE mask1 = VERM_SET_FN(m1);
VERM_TYPE mask2 = VERM_SET_FN(m2); VERM_TYPE mask2 = VERM_SET_FN(m2);
#ifdef HAVE_AVX512
if (buf_end - buf <= VERM_BOUNDARY) {
const u8 *ptr = dvermMiniMasked(chars1, chars2, mask1, mask2, buf,
buf_end);
if (ptr) {
return ptr;
}
/* check for partial match at end */
if ((buf_end[-1] & m1) == (u8)c1) {
DEBUG_PRINTF("partial!!!\n");
return buf_end - 1;
}
return buf_end;
}
#endif
assert((buf_end - buf) >= VERM_BOUNDARY);
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
if (min) { if (min) {
// Input isn't aligned, so we need to run one iteration with an // Input isn't aligned, so we need to run one iteration with an
// unaligned load, then skip buf forward to the next aligned address. // unaligned load, then skip buf forward to the next aligned address.
@ -244,6 +308,7 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
/* check for partial match at end */ /* check for partial match at end */
if ((buf_end[-1] & m1) == (u8)c1) { if ((buf_end[-1] & m1) == (u8)c1) {
DEBUG_PRINTF("partial!!!\n");
return buf_end - 1; return buf_end - 1;
} }
@ -259,7 +324,20 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end); assert(buf < buf_end);
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
// Handle small scans. // Handle small scans.
#ifdef HAVE_AVX512
if (buf_end - buf <= VERM_BOUNDARY) {
const u8 *ptr = nocase
? rvermMiniNocase(chars, buf, buf_end, 0)
: rvermMini(chars, buf, buf_end, 0);
if (ptr) {
return ptr;
}
return buf - 1;
}
#else
if (buf_end - buf < VERM_BOUNDARY) { if (buf_end - buf < VERM_BOUNDARY) {
for (buf_end--; buf_end >= buf; buf_end--) { for (buf_end--; buf_end >= buf; buf_end--) {
char cur = (char)*buf_end; char cur = (char)*buf_end;
@ -272,26 +350,22 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
} }
return buf_end; return buf_end;
} }
#endif
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
size_t min = (size_t)buf_end % VERM_BOUNDARY; size_t min = (size_t)buf_end % VERM_BOUNDARY;
if (min) { if (min) {
// Input isn't aligned, so we need to run one iteration with an // Input isn't aligned, so we need to run one iteration with an
// unaligned load, then skip buf backward to the next aligned address. // unaligned load, then skip buf backward to the next aligned address.
// There's some small overlap here, but we don't mind scanning it twice // There's some small overlap here, but we don't mind scanning it twice
// if we can do it quickly, do we? // if we can do it quickly, do we?
if (nocase) { const u8 *ptr = nocase ? rvermUnalignNocase(chars,
const u8 *ptr = buf_end - VERM_BOUNDARY,
rvermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 0); 0)
if (ptr) { : rvermUnalign(chars, buf_end - VERM_BOUNDARY,
return ptr; 0);
}
} else { if (ptr) {
const u8 *ptr = rvermUnalign(chars, buf_end - VERM_BOUNDARY, 0); return ptr;
if (ptr) {
return ptr;
}
} }
buf_end -= min; buf_end -= min;
@ -322,7 +396,20 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end); assert(buf < buf_end);
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
// Handle small scans. // Handle small scans.
#ifdef HAVE_AVX512
if (buf_end - buf <= VERM_BOUNDARY) {
const u8 *ptr = nocase
? rvermMiniNocase(chars, buf, buf_end, 1)
: rvermMini(chars, buf, buf_end, 1);
if (ptr) {
return ptr;
}
return buf - 1;
}
#else
if (buf_end - buf < VERM_BOUNDARY) { if (buf_end - buf < VERM_BOUNDARY) {
for (buf_end--; buf_end >= buf; buf_end--) { for (buf_end--; buf_end >= buf; buf_end--) {
char cur = (char)*buf_end; char cur = (char)*buf_end;
@ -335,26 +422,22 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
} }
return buf_end; return buf_end;
} }
#endif
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
size_t min = (size_t)buf_end % VERM_BOUNDARY; size_t min = (size_t)buf_end % VERM_BOUNDARY;
if (min) { if (min) {
// Input isn't aligned, so we need to run one iteration with an // Input isn't aligned, so we need to run one iteration with an
// unaligned load, then skip buf backward to the next aligned address. // unaligned load, then skip buf backward to the next aligned address.
// There's some small overlap here, but we don't mind scanning it twice // There's some small overlap here, but we don't mind scanning it twice
// if we can do it quickly, do we? // if we can do it quickly, do we?
if (nocase) { const u8 *ptr = nocase ? rvermUnalignNocase(chars,
const u8 *ptr = buf_end - VERM_BOUNDARY,
rvermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 1); 1)
if (ptr) { : rvermUnalign(chars, buf_end - VERM_BOUNDARY,
return ptr; 1);
}
} else { if (ptr) {
const u8 *ptr = rvermUnalign(chars, buf_end - VERM_BOUNDARY, 1); return ptr;
if (ptr) {
return ptr;
}
} }
buf_end -= min; buf_end -= min;
@ -383,24 +466,36 @@ const u8 *rvermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
DEBUG_PRINTF("rev double verm scan %s\\x%02hhx%02hhx over %zu bytes\n", DEBUG_PRINTF("rev double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf)); nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
assert(buf < buf_end); assert(buf < buf_end);
assert((buf_end - buf) >= VERM_BOUNDARY);
size_t min = (size_t)buf_end % VERM_BOUNDARY;
VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */ VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */ VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
#ifdef HAVE_AVX512
if (buf_end - buf <= VERM_BOUNDARY) {
const u8 *ptr = nocase
? rdvermMiniNocase(chars1, chars2, buf, buf_end)
: rdvermMini(chars1, chars2, buf, buf_end);
if (ptr) {
return ptr;
}
// check for partial match at end ???
return buf - 1;
}
#endif
assert((buf_end - buf) >= VERM_BOUNDARY);
size_t min = (size_t)buf_end % VERM_BOUNDARY;
if (min) { if (min) {
// input not aligned, so we need to run one iteration with an unaligned // input not aligned, so we need to run one iteration with an unaligned
// load, then skip buf forward to the next aligned address. There's // load, then skip buf forward to the next aligned address. There's
// some small overlap here, but we don't mind scanning it twice if we // some small overlap here, but we don't mind scanning it twice if we
// can do it quickly, do we? // can do it quickly, do we?
const u8 *ptr; const u8 *ptr = nocase ? rdvermPreconditionNocase(chars1, chars2,
if (nocase) { buf_end - VERM_BOUNDARY)
ptr = rdvermPreconditionNocase(chars1, chars2, : rdvermPrecondition(chars1, chars2,
buf_end - VERM_BOUNDARY); buf_end - VERM_BOUNDARY);
} else {
ptr = rdvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY);
}
if (ptr) { if (ptr) {
return ptr; return ptr;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -32,6 +32,8 @@
* (users should include vermicelli.h) * (users should include vermicelli.h)
*/ */
#if !defined(HAVE_AVX512)
#define VERM_BOUNDARY 16 #define VERM_BOUNDARY 16
#define VERM_TYPE m128 #define VERM_TYPE m128
#define VERM_SET_FN set16x8 #define VERM_SET_FN set16x8
@ -391,3 +393,497 @@ const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
return NULL; return NULL;
} }
#else // HAVE_AVX512
#define VERM_BOUNDARY 64
#define VERM_TYPE m512
#define VERM_SET_FN set64x8
static really_inline
const u8 *vermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) {
uintptr_t len = buf_end - buf;
__mmask64 mask = (~0ULL) >> (64 - len);
m512 data = loadu_maskz_m512(mask, buf);
u64a z = eq512mask(chars, data);
if (negate) {
z = ~z & mask;
}
z &= mask;
if (unlikely(z)) {
return buf + ctz64(z);
}
return NULL;
}
static really_inline
const u8 *vermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end,
char negate) {
uintptr_t len = buf_end - buf;
__mmask64 mask = (~0ULL) >> (64 - len);
m512 data = loadu_maskz_m512(mask, buf);
m512 casemask = set64x8(CASE_CLEAR);
m512 v = and512(casemask, data);
u64a z = eq512mask(chars, v);
if (negate) {
z = ~z & mask;
}
z &= mask;
if (unlikely(z)) {
return buf + ctz64(z);
}
return NULL;
}
static really_inline
const u8 *vermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end,
char negate) {
assert((size_t)buf % 64 == 0);
for (; buf + 63 < buf_end; buf += 64) {
m512 data = load512(buf);
u64a z = eq512mask(chars, data);
if (negate) {
z = ~z & ~0ULL;
}
if (unlikely(z)) {
u64a pos = ctz64(z);
return buf + pos;
}
}
return NULL;
}
static really_inline
const u8 *vermSearchAlignedNocase(m512 chars, const u8 *buf,
const u8 *buf_end, char negate) {
assert((size_t)buf % 64 == 0);
m512 casemask = set64x8(CASE_CLEAR);
for (; buf + 63 < buf_end; buf += 64) {
m512 data = load512(buf);
u64a z = eq512mask(chars, and512(casemask, data));
if (negate) {
z = ~z & ~0ULL;
}
if (unlikely(z)) {
u64a pos = ctz64(z);
return buf + pos;
}
}
return NULL;
}
// returns NULL if not found
static really_inline
const u8 *vermUnalign(m512 chars, const u8 *buf, char negate) {
m512 data = loadu512(buf); // unaligned
u64a z = eq512mask(chars, data);
if (negate) {
z = ~z & ~0ULL;
}
if (unlikely(z)) {
return buf + ctz64(z);
}
return NULL;
}
// returns NULL if not found
static really_inline
const u8 *vermUnalignNocase(m512 chars, const u8 *buf, char negate) {
m512 casemask = set64x8(CASE_CLEAR);
m512 data = loadu512(buf); // unaligned
u64a z = eq512mask(chars, and512(casemask, data));
if (negate) {
z = ~z & ~0ULL;
}
if (unlikely(z)) {
return buf + ctz64(z);
}
return NULL;
}
static really_inline
const u8 *dvermMini(m512 chars1, m512 chars2, const u8 *buf,
const u8 *buf_end) {
uintptr_t len = buf_end - buf;
__mmask64 mask = (~0ULL) >> (64 - len);
m512 data = loadu_maskz_m512(mask, buf);
u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
z &= mask;
if (unlikely(z)) {
u64a pos = ctz64(z);
return buf + pos;
}
return NULL;
}
static really_inline
const u8 *dvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf,
const u8 *buf_end) {
uintptr_t len = buf_end - buf;
__mmask64 mask = (~0ULL) >> (64 - len);
m512 data = loadu_maskz_m512(mask, buf);
m512 casemask = set64x8(CASE_CLEAR);
m512 v = and512(casemask, data);
u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
z &= mask;
if (unlikely(z)) {
u64a pos = ctz64(z);
return buf + pos;
}
return NULL;
}
static really_inline
const u8 *dvermMiniMasked(m512 chars1, m512 chars2, m512 mask1, m512 mask2,
const u8 *buf, const u8 *buf_end) {
uintptr_t len = buf_end - buf;
__mmask64 mask = (~0ULL) >> (64 - len);
m512 data = loadu_maskz_m512(mask, buf);
m512 v1 = and512(data, mask1);
m512 v2 = and512(data, mask2);
u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
z &= mask;
if (unlikely(z)) {
u64a pos = ctz64(z);
return buf + pos;
}
return NULL;
}
static really_inline
const u8 *dvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2,
const u8 *buf, const u8 *buf_end) {
for (; buf + 64 < buf_end; buf += 64) {
m512 data = load512(buf);
u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
if (buf[63] == c1 && buf[64] == c2) {
z |= (1ULL << 63);
}
if (unlikely(z)) {
u64a pos = ctz64(z);
return buf + pos;
}
}
return NULL;
}
static really_inline
const u8 *dvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2,
const u8 *buf, const u8 *buf_end) {
assert((size_t)buf % 64 == 0);
m512 casemask = set64x8(CASE_CLEAR);
for (; buf + 64 < buf_end; buf += 64) {
m512 data = load512(buf);
m512 v = and512(casemask, data);
u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
if ((buf[63] & CASE_CLEAR) == c1 && (buf[64] & CASE_CLEAR) == c2) {
z |= (1ULL << 63);
}
if (unlikely(z)) {
u64a pos = ctz64(z);
return buf + pos;
}
}
return NULL;
}
static really_inline
const u8 *dvermSearchAlignedMasked(m512 chars1, m512 chars2,
m512 mask1, m512 mask2, u8 c1, u8 c2, u8 m1,
u8 m2, const u8 *buf, const u8 *buf_end) {
assert((size_t)buf % 64 == 0);
for (; buf + 64 < buf_end; buf += 64) {
m512 data = load512(buf);
m512 v1 = and512(data, mask1);
m512 v2 = and512(data, mask2);
u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
if ((buf[63] & m1) == c1 && (buf[64] & m2) == c2) {
z |= (1ULL << 63);
}
if (unlikely(z)) {
u64a pos = ctz64(z);
return buf + pos;
}
}
return NULL;
}
// returns NULL if not found
static really_inline
const u8 *dvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) {
m512 data = loadu512(buf); // unaligned
u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
/* no fixup of the boundary required - the aligned run will pick it up */
if (unlikely(z)) {
u64a pos = ctz64(z);
return buf + pos;
}
return NULL;
}
// returns NULL if not found
static really_inline
const u8 *dvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) {
/* due to laziness, nonalphas and nocase having interesting behaviour */
m512 casemask = set64x8(CASE_CLEAR);
m512 data = loadu512(buf); // unaligned
m512 v = and512(casemask, data);
u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
/* no fixup of the boundary required - the aligned run will pick it up */
if (unlikely(z)) {
u64a pos = ctz64(z);
return buf + pos;
}
return NULL;
}
// returns NULL if not found
static really_inline
const u8 *dvermPreconditionMasked(m512 chars1, m512 chars2,
m512 mask1, m512 mask2, const u8 *buf) {
m512 data = loadu512(buf); // unaligned
m512 v1 = and512(data, mask1);
m512 v2 = and512(data, mask2);
u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
/* no fixup of the boundary required - the aligned run will pick it up */
if (unlikely(z)) {
u64a pos = ctz64(z);
return buf + pos;
}
return NULL;
}
static really_inline
const u8 *lastMatchOffset(const u8 *buf_end, u64a z) {
assert(z);
return buf_end - 64 + 63 - clz64(z);
}
static really_inline
const u8 *rvermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) {
uintptr_t len = buf_end - buf;
__mmask64 mask = (~0ULL) >> (64 - len);
m512 data = loadu_maskz_m512(mask, buf);
u64a z = eq512mask(chars, data);
if (negate) {
z = ~z & mask;
}
z &= mask;
if (unlikely(z)) {
return lastMatchOffset(buf + 64, z);
}
return NULL;
}
static really_inline
const u8 *rvermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end,
char negate) {
uintptr_t len = buf_end - buf;
__mmask64 mask = (~0ULL) >> (64 - len);
m512 data = loadu_maskz_m512(mask, buf);
m512 casemask = set64x8(CASE_CLEAR);
m512 v = and512(casemask, data);
u64a z = eq512mask(chars, v);
if (negate) {
z = ~z & mask;
}
z &= mask;
if (unlikely(z)) {
return lastMatchOffset(buf + 64, z);
}
return NULL;
}
static really_inline
const u8 *rvermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end,
char negate) {
assert((size_t)buf_end % 64 == 0);
for (; buf + 63 < buf_end; buf_end -= 64) {
m512 data = load512(buf_end - 64);
u64a z = eq512mask(chars, data);
if (negate) {
z = ~z & ~0ULL;
}
if (unlikely(z)) {
return lastMatchOffset(buf_end, z);
}
}
return NULL;
}
static really_inline
const u8 *rvermSearchAlignedNocase(m512 chars, const u8 *buf,
const u8 *buf_end, char negate) {
assert((size_t)buf_end % 64 == 0);
m512 casemask = set64x8(CASE_CLEAR);
for (; buf + 63 < buf_end; buf_end -= 64) {
m512 data = load512(buf_end - 64);
u64a z = eq512mask(chars, and512(casemask, data));
if (negate) {
z = ~z & ~0ULL;
}
if (unlikely(z)) {
return lastMatchOffset(buf_end, z);
}
}
return NULL;
}
// returns NULL if not found
static really_inline
const u8 *rvermUnalign(m512 chars, const u8 *buf, char negate) {
m512 data = loadu512(buf); // unaligned
u64a z = eq512mask(chars, data);
if (negate) {
z = ~z & ~0ULL;
}
if (unlikely(z)) {
return lastMatchOffset(buf + 64, z);
}
return NULL;
}
// returns NULL if not found
static really_inline
const u8 *rvermUnalignNocase(m512 chars, const u8 *buf, char negate) {
m512 casemask = set64x8(CASE_CLEAR);
m512 data = loadu512(buf); // unaligned
u64a z = eq512mask(chars, and512(casemask, data));
if (negate) {
z = ~z & ~0ULL;
}
if (unlikely(z)) {
return lastMatchOffset(buf + 64, z);
}
return NULL;
}
static really_inline
const u8 *rdvermMini(m512 chars1, m512 chars2, const u8 *buf,
const u8 *buf_end) {
uintptr_t len = buf_end - buf;
__mmask64 mask = (~0ULL) >> (64 - len);
m512 data = loadu_maskz_m512(mask, buf);
u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
z &= mask;
if (unlikely(z)) {
return lastMatchOffset(buf + 64, z);
}
return NULL;
}
static really_inline
const u8 *rdvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf,
const u8 *buf_end) {
uintptr_t len = buf_end - buf;
__mmask64 mask = (~0ULL) >> (64 - len);
m512 data = loadu_maskz_m512(mask, buf);
m512 casemask = set64x8(CASE_CLEAR);
m512 v = and512(casemask, data);
u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
z &= mask;
if (unlikely(z)) {
return lastMatchOffset(buf + 64, z);
}
return NULL;
}
static really_inline
const u8 *rdvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2,
const u8 *buf, const u8 *buf_end) {
assert((size_t)buf_end % 64 == 0);
for (; buf + 64 < buf_end; buf_end -= 64) {
m512 data = load512(buf_end - 64);
u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
if (buf_end[-65] == c1 && buf_end[-64] == c2) {
z |= 1;
}
if (unlikely(z)) {
return lastMatchOffset(buf_end, z);
}
}
return buf_end;
}
static really_inline
const u8 *rdvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2,
const u8 *buf, const u8 *buf_end) {
assert((size_t)buf_end % 64 == 0);
m512 casemask = set64x8(CASE_CLEAR);
for (; buf + 64 < buf_end; buf_end -= 64) {
m512 data = load512(buf_end - 64);
m512 v = and512(casemask, data);
u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
if ((buf_end[-65] & CASE_CLEAR) == c1
&& (buf_end[-64] & CASE_CLEAR) == c2) {
z |= 1;
}
if (unlikely(z)) {
return lastMatchOffset(buf_end, z);
}
}
return buf_end;
}
// returns NULL if not found
static really_inline
const u8 *rdvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) {
m512 data = loadu512(buf);
u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
// no fixup of the boundary required - the aligned run will pick it up
if (unlikely(z)) {
return lastMatchOffset(buf + 64, z);
}
return NULL;
}
// returns NULL if not found
static really_inline
const u8 *rdvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) {
// due to laziness, nonalphas and nocase having interesting behaviour
m512 casemask = set64x8(CASE_CLEAR);
m512 data = loadu512(buf);
m512 v = and512(casemask, data);
u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
// no fixup of the boundary required - the aligned run will pick it up
if (unlikely(z)) {
return lastMatchOffset(buf + 64, z);
}
return NULL;
}
#endif // HAVE_AVX512

View File

@ -205,7 +205,7 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
DEBUG_PRINTF(" - checking w %zu\n", g[w].index); DEBUG_PRINTF(" - checking w %zu\n", g[w].index);
if (!searchForward(g, reach, colours, s, w)) { if (!searchForward(g, reach, colours, succ_v, w)) {
continue; continue;
} }

View File

@ -170,7 +170,7 @@ void findPaths(const NGHolder &g, NFAVertex v,
/* path has looped back to one of the active+boring acceleration /* path has looped back to one of the active+boring acceleration
* states. We can ignore this path if we have sufficient back- * states. We can ignore this path if we have sufficient back-
* off. */ * off. */
paths->push_back({CharReach()}); paths->push_back({cr});
continue; continue;
} }

View File

@ -29,6 +29,7 @@
#include "rose_build_impl.h" #include "rose_build_impl.h"
#include "nfa/castlecompile.h" #include "nfa/castlecompile.h"
#include "nfagraph/ng_repeat.h" #include "nfagraph/ng_repeat.h"
#include "smallwrite/smallwrite_build.h"
#include "util/compile_context.h" #include "util/compile_context.h"
#include "util/boundary_reports.h" #include "util/boundary_reports.h"
#include "util/make_unique.h" #include "util/make_unique.h"
@ -159,6 +160,10 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &build_in)
} }
} }
for (const auto &report_id : build.smwr.all_reports()) {
live_reports.insert(report_id);
}
// Collect live reports from boundary reports. // Collect live reports from boundary reports.
insert(&live_reports, build.boundary.report_at_0); insert(&live_reports, build.boundary.report_at_0);
insert(&live_reports, build.boundary.report_at_0_eod); insert(&live_reports, build.boundary.report_at_0_eod);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, Intel Corporation * Copyright (c) 2017-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -57,6 +57,10 @@
#define HAVE_AVX512 #define HAVE_AVX512
#endif #endif
#if defined(__AVX512VBMI__)
#define HAVE_AVX512VBMI
#endif
/* /*
* ICC and MSVC don't break out POPCNT or BMI/2 as separate pre-def macros * ICC and MSVC don't break out POPCNT or BMI/2 as separate pre-def macros
*/ */

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2017, Intel Corporation * Copyright (c) 2015-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -150,6 +150,14 @@ static really_inline u32 movd(const m128 in) {
return _mm_cvtsi128_si32(in); return _mm_cvtsi128_si32(in);
} }
#if defined(HAVE_AVX512)
static really_inline u32 movd512(const m512 in) {
// NOTE: seems gcc doesn't support _mm512_cvtsi512_si32(in),
// so we use 2-step convertions to work around.
return _mm_cvtsi128_si32(_mm512_castsi512_si128(in));
}
#endif
static really_inline u64a movq(const m128 in) { static really_inline u64a movq(const m128 in) {
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
return _mm_cvtsi128_si64(in); return _mm_cvtsi128_si64(in);
@ -318,6 +326,12 @@ static really_inline
m512 maskz_pshufb_m512(__mmask64 k, m512 a, m512 b) { m512 maskz_pshufb_m512(__mmask64 k, m512 a, m512 b) {
return _mm512_maskz_shuffle_epi8(k, a, b); return _mm512_maskz_shuffle_epi8(k, a, b);
} }
#if defined(HAVE_AVX512VBMI)
#define vpermb512(idx, a) _mm512_permutexvar_epi8(idx, a)
#define maskz_vpermb512(k, idx, a) _mm512_maskz_permutexvar_epi8(k, idx, a)
#endif
#endif #endif
static really_inline static really_inline

45
tools/fuzz/aristocrats.py Executable file
View File

@ -0,0 +1,45 @@
#!/usr/bin/env python
from random import choice,randint
from optparse import OptionParser
def generateRandomOptions():
if options.hybrid:
allflags = "smiH8W"
else:
# Maintain an ordering for consistency.
allflags = "smiHV8WLP"
flags = ""
for f in allflags:
flags += choice(['', f])
return flags
parser = OptionParser()
parser.add_option("-d", "--depth",
action="store", type="int", dest="depth", default=200,
help="Depth of generation (akin to maximum length)")
parser.add_option("-c", "--count",
action="store", type="int", dest="count", default=1000,
help="Number of expressions to generate")
parser.add_option("-f", "--full",
action="store_true", dest="full", default=False,
help="Use a full character set including unprintables")
parser.add_option("-H", "--hybrid",
action="store_true", dest="hybrid",
help="Generate random flags for hybrid mode")
(options, args) = parser.parse_args()
if len(args) != 0:
parser.error("incorrect number of arguments")
if (options.full):
crange = range(0,256)
crange.remove(ord('\n'))
else:
crange = range(32, 127)
for i in xrange(0, options.count):
len = randint(1, options.depth)
s = [ chr(choice(crange)) for x in xrange(len) ]
line = str(i) + ":/" + "".join(s) + "/" + generateRandomOptions()
print line

39
tools/fuzz/completocrats.py Executable file
View File

@ -0,0 +1,39 @@
#!/usr/bin/env python
from itertools import *
from optparse import OptionParser
LIMITED_ALPHABET = "abc[](){}*?+^$|:=.\\-"
parser = OptionParser()
parser.add_option("-d", "--depth",
action="store", type="int", dest="depth", default=200,
help="Depth of generation (akin to maximum length)")
parser.add_option("-f", "--full",
action="store_true", dest="full", default=False,
help="Use a full character set including unprintables")
parser.add_option("-l", "--limited",
action="store_true", dest="limited", default=False,
help="Use a very limited character set: just " + LIMITED_ALPHABET)
(options, args) = parser.parse_args()
if len(args) != 0:
parser.error("incorrect number of arguments")
if (options.full):
crange = range(0,256)
crange.remove(ord('\n'))
elif (options.limited):
crange = [ ord(c) for c in LIMITED_ALPHABET ]
else:
crange = range(32, 127)
srange = [ chr(c) for c in crange ]
i = 0
for x in product(srange, repeat = options.depth):
line = str(i) + ":/" + "".join(x) + "/"
print line
i += 1

259
tools/fuzz/heuristocrats.py Executable file
View File

@ -0,0 +1,259 @@
#!/usr/bin/env python
from optparse import OptionParser
from random import *
import string
import sys
# return a random non-degenerate (ie not [10]) partition of nChildren
def chooseLeafWidth(nChildren):
width = randint(1, 5)
width = min(width, nChildren-1)
s = sample(range(1, nChildren), width)
s.sort()
s = [0] + s + [nChildren]
v = [ s[i+1] - s[i] for i in range(0, len(s)-1) if s[i+1] != s[i] ]
return v
def generateConcat(nChildren, atTopIgnored):
v = [ generateRE(w, atTop = False) for w in chooseLeafWidth(nChildren) ]
v = [ r for r in v if r != '' ]
return string.join(v, "")
def makeGroup(s):
# Parenthesise either in normal parens or a non-capturing group.
if randint(0, 1) == 0:
return "(" + s + ")"
else:
return "(?:" + s + ")"
def generateAlt(nChildren, atTop):
v = [ generateRE(w, [generateAlt], atTop) for w in chooseLeafWidth(nChildren) ]
v = [ r for r in v if r != '' ]
s = string.join(v, "|")
if len(v) == 1:
return s
else:
return makeGroup(s)
def generateQuant(nChildren, atTopIgnored):
lo = int(round(expovariate(0.2)))
hi = lo + int(round(expovariate(0.2)))
q = choice(["*", "?", "+", "{%d}"%lo, "{%d,}"%lo, "{%d,%d}"%(lo,hi)])
r = generateRE(nChildren, [generateQuant], atTop = False)
if (len(r) == 1) or (r[0] != '(' and r[-1] != ")"):
return r + q
else:
return makeGroup(r) + q
def generateChar(nChildren, atTop = False):
return chr(choice(alphabet))
def generateNocaseChar(nChildren, atTop = False):
'Either generate an uppercase char from the alphabet or a nocase class [Aa]'
c = generateChar(nChildren, atTop)
if random() < 0.5:
return c.upper()
else:
return '[' + c.upper() + c.lower() + ']'
def generateDot(nChildren, atTop = False):
return "."
def generateBoundary(nChildren, atTop = False):
# \b, \B in parens so that we can repeat them and still be accepted by
# libpcre
return makeGroup('\\' + choice('bB'))
def generateCharClass(nChildren, atTop = False):
s = ""
if random() < 0.2:
s = "^"
nChars = randint(1,4)
else:
nChars = randint(2,4)
for i in xrange(nChars):
s += generateChar(1)
return "[" + s + "]"
def generateOptionsFlags(nChildren, atTop = False):
allflags = "smix"
pos_flags = sample(allflags, randint(1, len(allflags)))
neg_flags = sample(allflags, randint(1, len(allflags)))
s = '(?' + ''.join(pos_flags) + '-' + ''.join(neg_flags) + ')'
return s
def generateLogicalId(nChildren, atTop = False):
return str(randint(0, options.count))
def makeLogicalGroup(s):
return "(" + s + ")"
def generateLogicalNot(nChildren, atTop):
r = generateCombination(nChildren, [generateLogicalNot], atTop = False)
return "!" + makeLogicalGroup(r)
def generateLogicalAnd(nChildren, atTop):
v = [ generateCombination(w, [generateLogicalAnd], atTop = False) for w in chooseLeafWidth(nChildren) ]
v = [ r for r in v if r != '' ]
s = string.join(v, "&")
if len(v) == 1:
return s
else:
return makeLogicalGroup(s)
def generateLogicalOr(nChildren, atTop):
v = [ generateCombination(w, [generateLogicalOr], atTop = False) for w in chooseLeafWidth(nChildren) ]
v = [ r for r in v if r != '' ]
s = string.join(v, "|")
if len(v) == 1:
return s
else:
return makeLogicalGroup(s)
weightsTree = [
(generateConcat, 10),
(generateAlt, 3),
(generateQuant, 2),
]
weightsLeaf = [
(generateChar, 30),
(generateCharClass, 5),
(generateDot, 5),
(generateNocaseChar, 2),
(generateBoundary, 1),
(generateOptionsFlags, 1)
]
weightsLogicalTree = [
(generateLogicalNot, 1),
(generateLogicalAnd, 5),
(generateLogicalOr, 5),
]
weightsLogicalLeaf = [
(generateLogicalId, 1),
]
def genChoices(weighted):
r = []
for (f, w) in weighted:
r = r + [f] * w
return r
choicesTree = genChoices(weightsTree)
choicesLeaf = genChoices(weightsLeaf)
choicesLogicalTree = genChoices(weightsLogicalTree)
choicesLogicalLeaf = genChoices(weightsLogicalLeaf)
weightsAnchor = [
("\\A%s\\Z", 1),
("\\A%s\\z", 1),
("\\A%s", 4),
("%s\\Z", 2),
("%s\\z", 2),
("^%s$", 1),
("^%s", 4),
("%s$", 2),
("%s", 25)
]
choicesAnchor = genChoices(weightsAnchor)
def generateRE(nChildren, suppressList = [], atTop = False):
if atTop:
anchorSubstituteString = choice(choicesAnchor)
else:
anchorSubstituteString = "%s"
nChildren -= 1
if nChildren == 0:
res = choice(choicesLeaf)(nChildren, atTop)
else:
c = [ ch for ch in choicesTree if ch not in suppressList ]
res = choice(c)(nChildren, atTop)
return anchorSubstituteString % res
def generateCombination(nChildren, suppressList = [], atTop = False):
nChildren -= 1
if nChildren == 0:
res = choice(choicesLogicalLeaf)(nChildren, atTop)
else:
c = [ ch for ch in choicesLogicalTree if ch not in suppressList ]
res = choice(c)(nChildren, atTop)
return res
def generateRandomOptions():
if options.hybrid:
allflags = "smiH8W"
else:
# Maintain an ordering for consistency.
allflags = "smiHV8WLP"
flags = ""
for f in allflags:
flags += choice(['', f])
if options.logical:
flags += choice(['', 'Q'])
return flags
def generateRandomExtParam(depth, extparam):
if not extparam:
return ""
params = []
if choice((False, True)):
params.append("min_length=%u" % randint(1, depth))
if choice((False, True)):
params.append("min_offset=%u" % randint(1, depth))
if choice((False, True)):
params.append("max_offset=%u" % randint(1, depth*3))
if choice((False, True)):
dist = randint(1, 3)
if choice((False, True)):
params.append("edit_distance=%u" % dist)
else:
params.append("hamming_distance=%u" % dist)
if params:
return "{" + ",".join(params) + "}"
else:
return ""
parser = OptionParser()
parser.add_option("-d", "--depth",
action="store", type="int", dest="depth", default=200,
help="Depth of generation (akin to maximum length)")
parser.add_option("-c", "--count",
action="store", type="int", dest="count", default=1000,
help="Number of expressions to generate")
parser.add_option("-a", "--alphabet",
action="store", type="int", dest="alphabet", default=26,
help="Size of alphabet to generate character expressions over (starting with lowercase 'a')")
parser.add_option("-i", "--nocase",
action="store_true", dest="nocase",
help="Use a caseless alphabet for character generation")
parser.add_option("-x", "--extparam",
action="store_true", dest="extparam",
help="Generate random extended parameters")
parser.add_option("-l", "--logical",
action="store_true", dest="logical",
help="Generate logical combination expressions")
parser.add_option("-H", "--hybrid",
action="store_true", dest="hybrid",
help="Generate random flags for hybrid mode")
(options, args) = parser.parse_args()
if len(args) != 0:
parser.error("incorrect number of arguments")
alphabet = range(ord('a'), ord('a') + options.alphabet)
if options.nocase:
alphabet += range(ord('A'), ord('A') + options.alphabet)
for i in xrange(0, options.count):
print "%08d:/%s/%s%s" % (i, generateRE(randint(1, options.depth), atTop = True), generateRandomOptions(), generateRandomExtParam(options.depth, options.extparam))
if options.logical:
for i in xrange(options.count, options.count + 3000):
print "%08d:/%s/C" % (i, generateCombination(randint(1, options.depth), atTop = True))

View File

@ -0,0 +1,9 @@
hatstand
teakettle
badgerbrush
mnemosyne
rapscallion
acerbic
blackhat
rufous
echolalia

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-2019, Intel Corporation * Copyright (c) 2016-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -207,7 +207,9 @@ void usage(const char *error) {
printf(" -P Benchmark using PCRE (if supported).\n"); printf(" -P Benchmark using PCRE (if supported).\n");
#endif #endif
#if defined(HAVE_DECL_PTHREAD_SETAFFINITY_NP) || defined(_WIN32) #if defined(HAVE_DECL_PTHREAD_SETAFFINITY_NP) || defined(_WIN32)
printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n"); printf(" -T CPU,CPU,... or -T CPU-CPU\n");
printf(" Benchmark with threads on specified CPUs or CPU"
" range.\n");
#endif #endif
printf(" -i DIR Don't compile, load from files in DIR" printf(" -i DIR Don't compile, load from files in DIR"
" instead.\n"); " instead.\n");
@ -354,7 +356,8 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
case 'T': case 'T':
if (!strToList(optarg, threadCores)) { if (!strToList(optarg, threadCores)) {
usage("Couldn't parse argument to -T flag, should be" usage("Couldn't parse argument to -T flag, should be"
" a list of positive integers."); " a list of positive integers or 2 integers"
" connected with hyphen.");
exit(1); exit(1);
} }
break; break;

View File

@ -216,8 +216,9 @@ def enchunk_pcap(pcapFN, sqliteFN):
# #
# Read in the contents of the pcap file, adding stream segments as found # Read in the contents of the pcap file, adding stream segments as found
# #
pkt_cnt = 0; pkt_cnt = 0
ip_pkt_cnt = 0; ip_pkt_cnt = 0
ip_pkt_off = 0
unsupported_ip_protocol_cnt = 0 unsupported_ip_protocol_cnt = 0
pcap_ref = pcap.pcap(pcapFN) pcap_ref = pcap.pcap(pcapFN)
done = False done = False
@ -231,16 +232,24 @@ def enchunk_pcap(pcapFN, sqliteFN):
pkt_cnt += 1 pkt_cnt += 1
linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff - 2):pcap_ref.dloff])[0] linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff - 2):pcap_ref.dloff])[0]
if linkLayerType != ETHERTYPE_IP: #
# # We're only interested in IP packets
# We're only interested in IP packets #
# if linkLayerType == ETHERTYPE_VLAN:
linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff + 2):(pcap_ref.dloff + 4)])[0]
if linkLayerType != ETHERTYPE_IP:
continue
else:
ip_pkt_off = pcap_ref.dloff + 4
elif linkLayerType == ETHERTYPE_IP:
ip_pkt_off = pcap_ref.dloff
else:
continue continue
ip_pkt_cnt += 1 ip_pkt_cnt += 1
ip_pkt_total_len = struct.unpack('!H', packet[pcap_ref.dloff + 2: pcap_ref.dloff + 4])[0] ip_pkt_total_len = struct.unpack('!H', packet[ip_pkt_off + 2: ip_pkt_off + 4])[0]
ip_pkt = packet[pcap_ref.dloff:pcap_ref.dloff + ip_pkt_total_len] ip_pkt = packet[ip_pkt_off:ip_pkt_off + ip_pkt_total_len]
pkt_protocol = struct.unpack('B', ip_pkt[9])[0] pkt_protocol = struct.unpack('B', ip_pkt[9])[0]
if (pkt_protocol != IPPROTO_UDP) and (pkt_protocol != IPPROTO_TCP): if (pkt_protocol != IPPROTO_UDP) and (pkt_protocol != IPPROTO_TCP):

View File

@ -241,6 +241,13 @@ void addCallout(string &re) {
re.append("\\E)(?C)"); re.append("\\E)(?C)");
} }
static
bool isUtf8(const CompiledPcre &compiled) {
unsigned long int options = 0;
pcre_fullinfo(compiled.bytecode, NULL, PCRE_INFO_OPTIONS, &options);
return options & PCRE_UTF8;
}
unique_ptr<CompiledPcre> unique_ptr<CompiledPcre>
GroundTruth::compile(unsigned id, bool no_callouts) { GroundTruth::compile(unsigned id, bool no_callouts) {
bool highlander = false; bool highlander = false;
@ -380,6 +387,8 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
throw PcreCompileFailure(oss.str()); throw PcreCompileFailure(oss.str());
} }
compiled->utf8 |= isUtf8(*compiled);
return compiled; return compiled;
} }
@ -451,13 +460,6 @@ int scanBasic(const CompiledPcre &compiled, const string &buffer,
return ret; return ret;
} }
static
bool isUtf8(const CompiledPcre &compiled) {
unsigned long int options = 0;
pcre_fullinfo(compiled.bytecode, NULL, PCRE_INFO_OPTIONS, &options);
return options & PCRE_UTF8;
}
static static
CaptureVec makeCaptureVec(const vector<int> &ovector, int ret) { CaptureVec makeCaptureVec(const vector<int> &ovector, int ret) {
assert(ret > 0); assert(ret > 0);

View File

@ -40,12 +40,12 @@ using namespace std;
using namespace ue2; using namespace ue2;
struct SimpleV { struct SimpleV {
size_t index; size_t index = 0;
string test_v = "SimpleV"; string test_v = "SimpleV";
}; };
struct SimpleE { struct SimpleE {
size_t index; size_t index = 0;
string test_e = "SimpleE"; string test_e = "SimpleE";
}; };

View File

@ -146,9 +146,8 @@ bool isIgnorable(const std::string &f) {
#ifndef _WIN32 #ifndef _WIN32
void loadExpressions(const string &inPath, ExpressionMap &exprMap) { void loadExpressions(const string &inPath, ExpressionMap &exprMap) {
// Is our input path a file or a directory? // Is our input path a file or a directory?
int fd = open(inPath.c_str(), O_RDONLY);
struct stat st; struct stat st;
if (fstat(fd, &st) != 0) { if (stat(inPath.c_str(), &st) != 0) {
cerr << "Can't stat path: '" << inPath << "'" << endl; cerr << "Can't stat path: '" << inPath << "'" << endl;
exit(1); exit(1);
} }
@ -161,7 +160,7 @@ void loadExpressions(const string &inPath, ExpressionMap &exprMap) {
exit(1); exit(1);
} }
} else if (S_ISDIR(st.st_mode)) { } else if (S_ISDIR(st.st_mode)) {
DIR *d = fdopendir(fd); DIR *d = opendir(inPath.c_str());
if (d == nullptr) { if (d == nullptr) {
cerr << "Can't open directory: '" << inPath << "'" << endl; cerr << "Can't open directory: '" << inPath << "'" << endl;
exit(1); exit(1);
@ -192,10 +191,11 @@ void loadExpressions(const string &inPath, ExpressionMap &exprMap) {
} }
(void)closedir(d); (void)closedir(d);
} else { } else {
cerr << "Can't stat path: '" << inPath << "'" << endl; cerr << "Unsupported file type "
<< hex << showbase << (st.st_mode & S_IFMT)
<< " for path: '" << inPath << "'" << endl;
exit(1); exit(1);
} }
(void)close(fd);
} }
#else // windows TODO: improve #else // windows TODO: improve
void HS_CDECL loadExpressions(const string &inPath, ExpressionMap &exprMap) { void HS_CDECL loadExpressions(const string &inPath, ExpressionMap &exprMap) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2019, Intel Corporation * Copyright (c) 2015-2020, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -54,8 +54,8 @@ inline bool fromString(const std::string &s, T& val)
return true; return true;
} }
// read in a comma-separated set of values: very simple impl, not for // read in a comma-separated or hyphen-connected set of values: very simple
// external consumption // impl, not for external consumption
template<typename T> template<typename T>
inline bool strToList(const std::string &s, std::vector<T>& out) inline bool strToList(const std::string &s, std::vector<T>& out)
{ {
@ -68,7 +68,17 @@ inline bool strToList(const std::string &s, std::vector<T>& out)
} }
out.push_back(val); out.push_back(val);
} while (i.get(c) && c == ',');
i.get(c);
if (c == '-') {
T val_end;
i >> val_end;
while (val < val_end) {
out.push_back(++val);
}
break;
}
} while (c == ',');
return !out.empty(); return !out.empty();
} }