mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch 'github_develop' into github_master
This commit is contained in:
commit
c00683d739
23
CHANGELOG.md
23
CHANGELOG.md
@ -2,6 +2,29 @@
|
||||
|
||||
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
||||
|
||||
## [5.3.0] 2020-05-15
|
||||
- Improvement on literal matcher "Teddy" performance, including support for
|
||||
Intel(R) AVX-512 Vector Byte Manipulation Instructions (Intel(R) AVX-512
|
||||
VBMI).
|
||||
- Improvement on single-byte/two-byte matching performance, including support
|
||||
for Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512).
|
||||
- hsbench: add hyphen support for -T option.
|
||||
- tools/fuzz: add test scripts for synthetic pattern generation.
|
||||
- Bugfix for acceleration path analysis in LimEx NFA.
|
||||
- Bugfix for duplicate matches for Small-write engine.
|
||||
- Bugfix for UTF8 checking problem for hscollider.
|
||||
- Bugfix for issue #205: avoid crash of `hs_compile_lit_multi()` with clang and
|
||||
ASAN.
|
||||
- Bugfix for issue #211: fix error in `db_check_platform()` function.
|
||||
- Bugfix for issue #217: fix cmake parsing issue of CPU arch for non-English
|
||||
locale.
|
||||
- Bugfix for issue #228: avoid undefined behavior when calling `close()` after
|
||||
`fdopendir()` in `loadExpressions()`.
|
||||
- Bugfix for issue #239: fix hyperscan compile issue under gcc-10.
|
||||
- Add VLAN packets processing capability in pcap analysis script. (#214)
|
||||
- Avoid extra convert instruction for "Noodle". (#221)
|
||||
- Add Hyperscan version marcro in `hs.h`. (#222)
|
||||
|
||||
## [5.2.1] 2019-10-13
|
||||
- Bugfix for issue #186: fix compile issue when `BUILD_SHARED_LIBS` is on in
|
||||
release mode.
|
||||
|
@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11)
|
||||
project (hyperscan C CXX)
|
||||
|
||||
set (HS_MAJOR_VERSION 5)
|
||||
set (HS_MINOR_VERSION 2)
|
||||
set (HS_PATCH_VERSION 1)
|
||||
set (HS_MINOR_VERSION 3)
|
||||
set (HS_PATCH_VERSION 0)
|
||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||
|
||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
@ -187,9 +187,9 @@ else()
|
||||
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
|
||||
OUTPUT_VARIABLE _GCC_OUTPUT)
|
||||
string(FIND "${_GCC_OUTPUT}" "Known" POS)
|
||||
string(SUBSTRING "${_GCC_OUTPUT}" 0 ${POS} _GCC_OUTPUT)
|
||||
string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1"
|
||||
string(FIND "${_GCC_OUTPUT}" "march" POS)
|
||||
string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
|
||||
string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1"
|
||||
GNUCC_ARCH "${_GCC_OUTPUT}")
|
||||
|
||||
# test the parsed flag
|
||||
@ -326,7 +326,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
set (FAT_RUNTIME_REQUISITES TRUE)
|
||||
endif()
|
||||
endif()
|
||||
CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitecures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF)
|
||||
CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitectures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF)
|
||||
endif ()
|
||||
|
||||
include (${CMAKE_MODULE_PATH}/arch.cmake)
|
||||
@ -340,7 +340,7 @@ if (NOT WIN32)
|
||||
set(C_FLAGS_TO_CHECK
|
||||
# Variable length arrays are way bad, most especially at run time
|
||||
"-Wvla"
|
||||
# Pointer arith on void pointers is doing it wong.
|
||||
# Pointer arith on void pointers is doing it wrong.
|
||||
"-Wpointer-arith"
|
||||
# Build our C code with -Wstrict-prototypes -Wmissing-prototypes
|
||||
"-Wstrict-prototypes"
|
||||
@ -383,7 +383,7 @@ if (CC_PAREN_EQUALITY)
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-parentheses-equality")
|
||||
endif()
|
||||
|
||||
# clang compains about unused const vars in our Ragel-generated code.
|
||||
# clang complains about unused const vars in our Ragel-generated code.
|
||||
CHECK_CXX_COMPILER_FLAG("-Wunused-const-variable" CXX_UNUSED_CONST_VAR)
|
||||
if (CXX_UNUSED_CONST_VAR)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
|
||||
@ -418,6 +418,12 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-local-typedefs" CXX_UNUSED_LOCAL_TYPEDEFS)
|
||||
# gcc5 complains about this
|
||||
CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
|
||||
|
||||
# gcc 10 complains about this
|
||||
CHECK_C_COMPILER_FLAG("-Wstringop-overflow" CC_STRINGOP_OVERFLOW)
|
||||
if(CC_STRINGOP_OVERFLOW)
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-stringop-overflow")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
|
||||
|
@ -58,6 +58,18 @@ int main(){
|
||||
(void)_mm512_abs_epi8(z);
|
||||
}" HAVE_AVX512)
|
||||
|
||||
# and now for AVX512VBMI
|
||||
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
|
||||
#if !defined(__AVX512VBMI__)
|
||||
#error no avx512vbmi
|
||||
#endif
|
||||
|
||||
int main(){
|
||||
__m512i a = _mm512_set1_epi8(0xFF);
|
||||
__m512i idx = _mm512_set_epi64(3ULL, 2ULL, 1ULL, 0ULL, 7ULL, 6ULL, 5ULL, 4ULL);
|
||||
(void)_mm512_permutexvar_epi8(idx, a);
|
||||
}" HAVE_AVX512VBMI)
|
||||
|
||||
if (FAT_RUNTIME)
|
||||
if (NOT HAVE_SSSE3)
|
||||
message(FATAL_ERROR "SSSE3 support required to build fat runtime")
|
||||
|
@ -75,12 +75,12 @@ characters exist in regular grammer like ``[``, ``]``, ``(``, ``)``, ``{``,
|
||||
While in pure literal case, all these meta characters lost extra meanings
|
||||
expect for that they are just common ASCII codes.
|
||||
|
||||
Hyperscan is initially designed to process common regualr expressions. It is
|
||||
Hyperscan is initially designed to process common regular expressions. It is
|
||||
hence embedded with a complex parser to do comprehensive regular grammer
|
||||
interpretion. Particularly, the identification of above meta characters is the
|
||||
basic step for the interpretion of far more complex regular grammers.
|
||||
|
||||
However in real cases, patterns may not always be regualr expressions. They
|
||||
However in real cases, patterns may not always be regular expressions. They
|
||||
could just be pure literals. Problem will come if the pure literals contain
|
||||
regular meta characters. Supposing fed directly into traditional Hyperscan
|
||||
compile API, all these meta characters will be interpreted in predefined ways,
|
||||
@ -110,8 +110,8 @@ Hyperscan needs to locate the end position of the input expression via clearly
|
||||
knowing each literal's length, not by simply identifying character ``\0`` of a
|
||||
string.
|
||||
|
||||
Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_MULTILINE`,
|
||||
:c:member:`HS_FLAG_SINGLEMATCH`, :c:member:`HS_FLAG_SOM_LEFTMOST`.
|
||||
Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_SINGLEMATCH`,
|
||||
:c:member:`HS_FLAG_SOM_LEFTMOST`.
|
||||
|
||||
.. note:: We don't support literal compilation API with :ref:`extparam`. And
|
||||
for runtime implementation, traditional runtime APIs can still be
|
||||
|
@ -260,7 +260,7 @@ instead of potentially executing illegal instructions. The API function
|
||||
:c:func:`hs_valid_platform` can be used by application writers to determine if
|
||||
the current platform is supported by Hyperscan.
|
||||
|
||||
At of this release, the variants of the runtime that are built, and the CPU
|
||||
As of this release, the variants of the runtime that are built, and the CPU
|
||||
capability that is required, are the following:
|
||||
|
||||
+----------+-------------------------------+---------------------------+
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -125,7 +125,7 @@ ParsedLitExpression::ParsedLitExpression(unsigned index_in,
|
||||
: expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false,
|
||||
SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) {
|
||||
// For pure literal expression, below 'HS_FLAG_'s are unuseful:
|
||||
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET
|
||||
// DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET/MULTILINE
|
||||
|
||||
if (flags & ~HS_FLAG_ALL) {
|
||||
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
|
||||
@ -402,19 +402,18 @@ void addLitExpression(NG &ng, unsigned index, const char *expression,
|
||||
}
|
||||
|
||||
// Ensure that our pattern isn't too long (in characters).
|
||||
if (strlen(expression) > cc.grey.limitPatternLength) {
|
||||
if (expLength > cc.grey.limitPatternLength) {
|
||||
throw CompileError("Pattern length exceeds limit.");
|
||||
}
|
||||
|
||||
// filter out flags not supported by pure literal API.
|
||||
u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 |
|
||||
HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION |
|
||||
HS_FLAG_QUIET;
|
||||
HS_FLAG_QUIET | HS_FLAG_MULTILINE;
|
||||
|
||||
if (flags & not_supported) {
|
||||
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, "
|
||||
"HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are "
|
||||
"supported in literal API.");
|
||||
throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_SINGLEMATCH and "
|
||||
"HS_FLAG_SOM_LEFTMOST are supported in literal API.");
|
||||
}
|
||||
|
||||
// This expression must be a pure literal, we can build ue2_literal
|
||||
|
@ -114,8 +114,8 @@ hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes,
|
||||
static
|
||||
hs_error_t db_check_platform(const u64a p) {
|
||||
if (p != hs_current_platform
|
||||
&& p != hs_current_platform_no_avx2
|
||||
&& p != hs_current_platform_no_avx512) {
|
||||
&& p != (hs_current_platform | hs_current_platform_no_avx2)
|
||||
&& p != (hs_current_platform | hs_current_platform_no_avx512)) {
|
||||
return HS_DB_PLATFORM_ERROR;
|
||||
}
|
||||
// passed all checks
|
||||
|
305
src/fdr/teddy.c
305
src/fdr/teddy.c
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -74,6 +74,30 @@ const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
|
||||
};
|
||||
|
||||
#if defined(HAVE_AVX512VBMI) // VBMI strong teddy
|
||||
|
||||
#define CONF_CHUNK_64(chunk, bucket, off, reason, pt, conf_fn) \
|
||||
do { \
|
||||
if (unlikely(chunk != ones_u64a)) { \
|
||||
chunk = ~chunk; \
|
||||
conf_fn(&chunk, bucket, off, confBase, reason, a, pt, \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define CONF_CHUNK_32(chunk, bucket, off, reason, pt, conf_fn) \
|
||||
do { \
|
||||
if (unlikely(chunk != ones_u32)) { \
|
||||
chunk = ~chunk; \
|
||||
conf_fn(&chunk, bucket, off, confBase, reason, a, pt, \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#else
|
||||
|
||||
#define CONF_CHUNK_64(chunk, bucket, off, reason, conf_fn) \
|
||||
do { \
|
||||
if (unlikely(chunk != ones_u64a)) { \
|
||||
@ -94,7 +118,284 @@ do { \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#if defined(HAVE_AVX512) // AVX512 reinforced teddy
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_AVX512VBMI) // VBMI strong teddy
|
||||
|
||||
#ifdef ARCH_64_BIT
|
||||
#define CONFIRM_TEDDY(var, bucket, offset, reason, pt, conf_fn) \
|
||||
do { \
|
||||
if (unlikely(diff512(var, ones512()))) { \
|
||||
m128 p128_0 = extract128from512(var, 0); \
|
||||
m128 p128_1 = extract128from512(var, 1); \
|
||||
m128 p128_2 = extract128from512(var, 2); \
|
||||
m128 p128_3 = extract128from512(var, 3); \
|
||||
u64a part1 = movq(p128_0); \
|
||||
u64a part2 = movq(rshiftbyte_m128(p128_0, 8)); \
|
||||
u64a part3 = movq(p128_1); \
|
||||
u64a part4 = movq(rshiftbyte_m128(p128_1, 8)); \
|
||||
u64a part5 = movq(p128_2); \
|
||||
u64a part6 = movq(rshiftbyte_m128(p128_2, 8)); \
|
||||
u64a part7 = movq(p128_3); \
|
||||
u64a part8 = movq(rshiftbyte_m128(p128_3, 8)); \
|
||||
CONF_CHUNK_64(part1, bucket, offset, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_64(part2, bucket, offset + 8, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_64(part3, bucket, offset + 16, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_64(part4, bucket, offset + 24, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_64(part5, bucket, offset + 32, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_64(part6, bucket, offset + 40, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_64(part7, bucket, offset + 48, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_64(part8, bucket, offset + 56, reason, pt, conf_fn); \
|
||||
} \
|
||||
} while(0)
|
||||
#else
|
||||
#define CONFIRM_TEDDY(var, bucket, offset, reason, pt, conf_fn) \
|
||||
do { \
|
||||
if (unlikely(diff512(var, ones512()))) { \
|
||||
m128 p128_0 = extract128from512(var, 0); \
|
||||
m128 p128_1 = extract128from512(var, 1); \
|
||||
m128 p128_2 = extract128from512(var, 2); \
|
||||
m128 p128_3 = extract128from512(var, 3); \
|
||||
u32 part1 = movd(p128_0); \
|
||||
u32 part2 = movd(rshiftbyte_m128(p128_0, 4)); \
|
||||
u32 part3 = movd(rshiftbyte_m128(p128_0, 8)); \
|
||||
u32 part4 = movd(rshiftbyte_m128(p128_0, 12)); \
|
||||
u32 part5 = movd(p128_1); \
|
||||
u32 part6 = movd(rshiftbyte_m128(p128_1, 4)); \
|
||||
u32 part7 = movd(rshiftbyte_m128(p128_1, 8)); \
|
||||
u32 part8 = movd(rshiftbyte_m128(p128_1, 12)); \
|
||||
u32 part9 = movd(p128_2); \
|
||||
u32 part10 = movd(rshiftbyte_m128(p128_2, 4)); \
|
||||
u32 part11 = movd(rshiftbyte_m128(p128_2, 8)); \
|
||||
u32 part12 = movd(rshiftbyte_m128(p128_2, 12)); \
|
||||
u32 part13 = movd(p128_3); \
|
||||
u32 part14 = movd(rshiftbyte_m128(p128_3, 4)); \
|
||||
u32 part15 = movd(rshiftbyte_m128(p128_3, 8)); \
|
||||
u32 part16 = movd(rshiftbyte_m128(p128_3, 12)); \
|
||||
CONF_CHUNK_32(part1, bucket, offset, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part2, bucket, offset + 4, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part3, bucket, offset + 8, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part4, bucket, offset + 12, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part5, bucket, offset + 16, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part6, bucket, offset + 20, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part7, bucket, offset + 24, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part8, bucket, offset + 28, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part9, bucket, offset + 32, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part10, bucket, offset + 36, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part11, bucket, offset + 40, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part12, bucket, offset + 44, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part13, bucket, offset + 48, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part14, bucket, offset + 52, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part15, bucket, offset + 56, reason, pt, conf_fn); \
|
||||
CONF_CHUNK_32(part16, bucket, offset + 60, reason, pt, conf_fn); \
|
||||
} \
|
||||
} while(0)
|
||||
#endif
|
||||
|
||||
#define PREP_SHUF_MASK \
|
||||
m512 lo = and512(val, *lo_mask); \
|
||||
m512 hi = and512(rshift64_m512(val, 4), *lo_mask)
|
||||
|
||||
#define TEDDY_VBMI_PSHUFB_OR_M1 \
|
||||
m512 shuf_or_b0 = or512(pshufb_m512(dup_mask[0], lo), \
|
||||
pshufb_m512(dup_mask[1], hi));
|
||||
|
||||
#define TEDDY_VBMI_PSHUFB_OR_M2 \
|
||||
TEDDY_VBMI_PSHUFB_OR_M1 \
|
||||
m512 shuf_or_b1 = or512(pshufb_m512(dup_mask[2], lo), \
|
||||
pshufb_m512(dup_mask[3], hi));
|
||||
|
||||
#define TEDDY_VBMI_PSHUFB_OR_M3 \
|
||||
TEDDY_VBMI_PSHUFB_OR_M2 \
|
||||
m512 shuf_or_b2 = or512(pshufb_m512(dup_mask[4], lo), \
|
||||
pshufb_m512(dup_mask[5], hi));
|
||||
|
||||
#define TEDDY_VBMI_PSHUFB_OR_M4 \
|
||||
TEDDY_VBMI_PSHUFB_OR_M3 \
|
||||
m512 shuf_or_b3 = or512(pshufb_m512(dup_mask[6], lo), \
|
||||
pshufb_m512(dup_mask[7], hi));
|
||||
|
||||
#define TEDDY_VBMI_SL1_MASK 0xfffffffffffffffeULL
|
||||
#define TEDDY_VBMI_SL2_MASK 0xfffffffffffffffcULL
|
||||
#define TEDDY_VBMI_SL3_MASK 0xfffffffffffffff8ULL
|
||||
|
||||
#define TEDDY_VBMI_SHIFT_M1
|
||||
|
||||
#define TEDDY_VBMI_SHIFT_M2 \
|
||||
TEDDY_VBMI_SHIFT_M1 \
|
||||
m512 sl1 = maskz_vpermb512(TEDDY_VBMI_SL1_MASK, sl_msk[0], shuf_or_b1);
|
||||
|
||||
#define TEDDY_VBMI_SHIFT_M3 \
|
||||
TEDDY_VBMI_SHIFT_M2 \
|
||||
m512 sl2 = maskz_vpermb512(TEDDY_VBMI_SL2_MASK, sl_msk[1], shuf_or_b2);
|
||||
|
||||
#define TEDDY_VBMI_SHIFT_M4 \
|
||||
TEDDY_VBMI_SHIFT_M3 \
|
||||
m512 sl3 = maskz_vpermb512(TEDDY_VBMI_SL3_MASK, sl_msk[2], shuf_or_b3);
|
||||
|
||||
#define SHIFT_OR_M1 \
|
||||
shuf_or_b0
|
||||
|
||||
#define SHIFT_OR_M2 \
|
||||
or512(sl1, SHIFT_OR_M1)
|
||||
|
||||
#define SHIFT_OR_M3 \
|
||||
or512(sl2, SHIFT_OR_M2)
|
||||
|
||||
#define SHIFT_OR_M4 \
|
||||
or512(sl3, SHIFT_OR_M3)
|
||||
|
||||
static really_inline
|
||||
m512 prep_conf_teddy_m1(const m512 *lo_mask, const m512 *dup_mask,
|
||||
UNUSED const m512 *sl_msk, const m512 val) {
|
||||
PREP_SHUF_MASK;
|
||||
TEDDY_VBMI_PSHUFB_OR_M1;
|
||||
TEDDY_VBMI_SHIFT_M1;
|
||||
return SHIFT_OR_M1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m512 prep_conf_teddy_m2(const m512 *lo_mask, const m512 *dup_mask,
|
||||
const m512 *sl_msk, const m512 val) {
|
||||
PREP_SHUF_MASK;
|
||||
TEDDY_VBMI_PSHUFB_OR_M2;
|
||||
TEDDY_VBMI_SHIFT_M2;
|
||||
return SHIFT_OR_M2;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m512 prep_conf_teddy_m3(const m512 *lo_mask, const m512 *dup_mask,
|
||||
const m512 *sl_msk, const m512 val) {
|
||||
PREP_SHUF_MASK;
|
||||
TEDDY_VBMI_PSHUFB_OR_M3;
|
||||
TEDDY_VBMI_SHIFT_M3;
|
||||
return SHIFT_OR_M3;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m512 prep_conf_teddy_m4(const m512 *lo_mask, const m512 *dup_mask,
|
||||
const m512 *sl_msk, const m512 val) {
|
||||
PREP_SHUF_MASK;
|
||||
TEDDY_VBMI_PSHUFB_OR_M4;
|
||||
TEDDY_VBMI_SHIFT_M4;
|
||||
return SHIFT_OR_M4;
|
||||
}
|
||||
|
||||
#define PREP_CONF_FN(val, n) \
|
||||
prep_conf_teddy_m##n(&lo_mask, dup_mask, sl_msk, val)
|
||||
|
||||
const u8 ALIGN_DIRECTIVE p_sh_mask_arr[80] = {
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
|
||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f
|
||||
};
|
||||
|
||||
#define TEDDY_VBMI_SL1_POS 15
|
||||
#define TEDDY_VBMI_SL2_POS 14
|
||||
#define TEDDY_VBMI_SL3_POS 13
|
||||
|
||||
#define TEDDY_VBMI_LOAD_SHIFT_MASK_M1
|
||||
|
||||
#define TEDDY_VBMI_LOAD_SHIFT_MASK_M2 \
|
||||
TEDDY_VBMI_LOAD_SHIFT_MASK_M1 \
|
||||
sl_msk[0] = loadu512(p_sh_mask_arr + TEDDY_VBMI_SL1_POS);
|
||||
|
||||
#define TEDDY_VBMI_LOAD_SHIFT_MASK_M3 \
|
||||
TEDDY_VBMI_LOAD_SHIFT_MASK_M2 \
|
||||
sl_msk[1] = loadu512(p_sh_mask_arr + TEDDY_VBMI_SL2_POS);
|
||||
|
||||
#define TEDDY_VBMI_LOAD_SHIFT_MASK_M4 \
|
||||
TEDDY_VBMI_LOAD_SHIFT_MASK_M3 \
|
||||
sl_msk[2] = loadu512(p_sh_mask_arr + TEDDY_VBMI_SL3_POS);
|
||||
|
||||
#define PREPARE_MASKS_1 \
|
||||
dup_mask[0] = set4x128(maskBase[0]); \
|
||||
dup_mask[1] = set4x128(maskBase[1]);
|
||||
|
||||
#define PREPARE_MASKS_2 \
|
||||
PREPARE_MASKS_1 \
|
||||
dup_mask[2] = set4x128(maskBase[2]); \
|
||||
dup_mask[3] = set4x128(maskBase[3]);
|
||||
|
||||
#define PREPARE_MASKS_3 \
|
||||
PREPARE_MASKS_2 \
|
||||
dup_mask[4] = set4x128(maskBase[4]); \
|
||||
dup_mask[5] = set4x128(maskBase[5]);
|
||||
|
||||
#define PREPARE_MASKS_4 \
|
||||
PREPARE_MASKS_3 \
|
||||
dup_mask[6] = set4x128(maskBase[6]); \
|
||||
dup_mask[7] = set4x128(maskBase[7]);
|
||||
|
||||
#define PREPARE_MASKS(n) \
|
||||
m512 lo_mask = set64x8(0xf); \
|
||||
m512 dup_mask[n * 2]; \
|
||||
m512 sl_msk[n - 1]; \
|
||||
PREPARE_MASKS_##n \
|
||||
TEDDY_VBMI_LOAD_SHIFT_MASK_M##n
|
||||
|
||||
#define TEDDY_VBMI_CONF_MASK_HEAD (0xffffffffffffffffULL >> n_sh)
|
||||
#define TEDDY_VBMI_CONF_MASK_FULL (0xffffffffffffffffULL << n_sh)
|
||||
#define TEDDY_VBMI_CONF_MASK_VAR(n) (0xffffffffffffffffULL >> (64 - n) << overlap)
|
||||
#define TEDDY_VBMI_LOAD_MASK_PATCH (0xffffffffffffffffULL >> (64 - n_sh))
|
||||
|
||||
#define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn) \
|
||||
do { \
|
||||
const u8 *buf_end = a->buf + a->len; \
|
||||
const u8 *ptr = a->buf + a->start_offset; \
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START; \
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect; \
|
||||
u32 last_match = ones_u32; \
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr; \
|
||||
const size_t iterBytes = 64; \
|
||||
u32 n_sh = n_msk - 1; \
|
||||
const size_t loopBytes = 64 - n_sh; \
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \
|
||||
a->buf, a->len, a->start_offset); \
|
||||
\
|
||||
const m128 *maskBase = getMaskBase(teddy); \
|
||||
PREPARE_MASKS(n_msk); \
|
||||
const u32 *confBase = getConfBase(teddy); \
|
||||
\
|
||||
u64a k = TEDDY_VBMI_CONF_MASK_FULL; \
|
||||
m512 p_mask = set_mask_m512(~k); \
|
||||
u32 overlap = 0; \
|
||||
u64a patch = 0; \
|
||||
if (likely(ptr + loopBytes <= buf_end)) { \
|
||||
m512 p_mask0 = set_mask_m512(~TEDDY_VBMI_CONF_MASK_HEAD); \
|
||||
m512 r_0 = PREP_CONF_FN(loadu512(ptr), n_msk); \
|
||||
r_0 = or512(r_0, p_mask0); \
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, ptr, conf_fn); \
|
||||
ptr += loopBytes; \
|
||||
overlap = n_sh; \
|
||||
patch = TEDDY_VBMI_LOAD_MASK_PATCH; \
|
||||
} \
|
||||
\
|
||||
for (; ptr + loopBytes <= buf_end; ptr += loopBytes) { \
|
||||
__builtin_prefetch(ptr - n_sh + (64 * 2)); \
|
||||
CHECK_FLOOD; \
|
||||
m512 r_0 = PREP_CONF_FN(loadu512(ptr - n_sh), n_msk); \
|
||||
r_0 = or512(r_0, p_mask); \
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, ptr - n_sh, conf_fn); \
|
||||
} \
|
||||
\
|
||||
assert(ptr + loopBytes > buf_end); \
|
||||
if (ptr < buf_end) { \
|
||||
u32 left = (u32)(buf_end - ptr); \
|
||||
u64a k1 = TEDDY_VBMI_CONF_MASK_VAR(left); \
|
||||
m512 p_mask1 = set_mask_m512(~k1); \
|
||||
m512 val_0 = loadu_maskz_m512(k1 | patch, ptr - overlap); \
|
||||
m512 r_0 = PREP_CONF_FN(val_0, n_msk); \
|
||||
r_0 = or512(r_0, p_mask1); \
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, ptr - overlap, conf_fn); \
|
||||
} \
|
||||
\
|
||||
return HWLM_SUCCESS; \
|
||||
} while(0)
|
||||
|
||||
#elif defined(HAVE_AVX512) // AVX512 reinforced teddy
|
||||
|
||||
#ifdef ARCH_64_BIT
|
||||
#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -134,7 +134,7 @@ const m256 *getMaskBase_fat(const struct Teddy *teddy) {
|
||||
return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)));
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512)
|
||||
#if defined(HAVE_AVX512_REVERT) // revert to AVX2 Fat Teddy
|
||||
|
||||
static really_inline
|
||||
const u64a *getReinforcedMaskBase_fat(const struct Teddy *teddy, u8 numMask) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -383,12 +383,16 @@ m512 vectoredLoad512(m512 *p_mask, const u8 *ptr, const size_t start_offset,
|
||||
|
||||
static really_inline
|
||||
u64a getConfVal(const struct FDR_Runtime_Args *a, const u8 *ptr, u32 byte,
|
||||
CautionReason reason) {
|
||||
UNUSED CautionReason reason) {
|
||||
u64a confVal = 0;
|
||||
const u8 *buf = a->buf;
|
||||
size_t len = a->len;
|
||||
const u8 *confirm_loc = ptr + byte - 7;
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
if (likely(confirm_loc >= buf)) {
|
||||
#else
|
||||
if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) {
|
||||
#endif
|
||||
confVal = lv_u64a(confirm_loc, buf, buf + len);
|
||||
} else { // r == VECTORING, confirm_loc < buf
|
||||
u64a histBytes = a->histBytes;
|
||||
|
8
src/hs.h
8
src/hs.h
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -39,6 +39,12 @@
|
||||
* the individual component headers for documentation.
|
||||
*/
|
||||
|
||||
/* The current Hyperscan version information. */
|
||||
|
||||
#define HS_MAJOR 5
|
||||
#define HS_MINOR 3
|
||||
#define HS_PATCH 0
|
||||
|
||||
#include "hs_compile.h"
|
||||
#include "hs_runtime.h"
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -563,7 +563,6 @@ hs_error_t HS_CDECL hs_compile_ext_multi(const char *const *expressions,
|
||||
* be used by ORing them together. Compared to @ref hs_compile(), fewer
|
||||
* valid values are provided:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
|
||||
* expression per stream.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
@ -637,7 +636,6 @@ hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags,
|
||||
* in place of an array will set the flags value for all patterns to zero.
|
||||
* Compared to @ref hs_compile_multi(), fewer valid values are provided:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
|
||||
* expression per stream.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
@ -985,8 +983,8 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
|
||||
* offset when a match is reported for this expression. (By default, no start
|
||||
* of match is returned.)
|
||||
*
|
||||
* Enabling this behaviour may reduce performance and increase stream state
|
||||
* requirements in streaming mode.
|
||||
* For all the 3 modes, enabling this behaviour may reduce performance. And
|
||||
* particularly, it may increase stream state requirements in streaming mode.
|
||||
*/
|
||||
#define HS_FLAG_SOM_LEFTMOST 256
|
||||
|
||||
|
@ -210,7 +210,7 @@ hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf,
|
||||
const u8 *d = buf + start, *e = buf + end;
|
||||
DEBUG_PRINTF("start %zu end %zu \n", start, end);
|
||||
assert(d < e);
|
||||
u8 lastz0 = 0;
|
||||
u32 lastz0 = 0;
|
||||
|
||||
for (; d < e; d += 32) {
|
||||
m256 v = noCase ? and256(load256(d), caseMask) : load256(d);
|
||||
|
@ -214,7 +214,7 @@ static
|
||||
bool double_byte_ok(const AccelScheme &info) {
|
||||
return !info.double_byte.empty() &&
|
||||
info.double_cr.count() < info.double_byte.size() &&
|
||||
info.double_cr.count() <= 2 && !info.double_byte.empty();
|
||||
info.double_cr.count() <= 2;
|
||||
}
|
||||
|
||||
static
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -46,7 +46,20 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf,
|
||||
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
||||
assert(buf < buf_end);
|
||||
|
||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
||||
|
||||
// Handle small scans.
|
||||
#ifdef HAVE_AVX512
|
||||
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||
const u8 *ptr = nocase
|
||||
? vermMiniNocase(chars, buf, buf_end, 0)
|
||||
: vermMini(chars, buf, buf_end, 0);
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
return buf_end;
|
||||
}
|
||||
#else
|
||||
if (buf_end - buf < VERM_BOUNDARY) {
|
||||
for (; buf < buf_end; buf++) {
|
||||
char cur = (char)*buf;
|
||||
@ -59,8 +72,8 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf,
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
#endif
|
||||
|
||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
||||
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
||||
if (min) {
|
||||
// Input isn't aligned, so we need to run one iteration with an
|
||||
@ -99,7 +112,20 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf,
|
||||
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
||||
assert(buf < buf_end);
|
||||
|
||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
||||
|
||||
// Handle small scans.
|
||||
#ifdef HAVE_AVX512
|
||||
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||
const u8 *ptr = nocase
|
||||
? vermMiniNocase(chars, buf, buf_end, 1)
|
||||
: vermMini(chars, buf, buf_end, 1);
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
return buf_end;
|
||||
}
|
||||
#else
|
||||
if (buf_end - buf < VERM_BOUNDARY) {
|
||||
for (; buf < buf_end; buf++) {
|
||||
char cur = (char)*buf;
|
||||
@ -112,8 +138,8 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf,
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
#endif
|
||||
|
||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
||||
size_t min = (size_t)buf % VERM_BOUNDARY;
|
||||
if (min) {
|
||||
// Input isn't aligned, so we need to run one iteration with an
|
||||
@ -149,12 +175,32 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
|
||||
DEBUG_PRINTF("double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
|
||||
nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
|
||||
assert(buf < buf_end);
|
||||
assert((buf_end - buf) >= VERM_BOUNDARY);
|
||||
|
||||
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
||||
VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
|
||||
VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
|
||||
|
||||
#ifdef HAVE_AVX512
|
||||
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||
const u8 *ptr = nocase
|
||||
? dvermMiniNocase(chars1, chars2, buf, buf_end)
|
||||
: dvermMini(chars1, chars2, buf, buf_end);
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* check for partial match at end */
|
||||
u8 mask = nocase ? CASE_CLEAR : 0xff;
|
||||
if ((buf_end[-1] & mask) == (u8)c1) {
|
||||
DEBUG_PRINTF("partial!!!\n");
|
||||
return buf_end - 1;
|
||||
}
|
||||
|
||||
return buf_end;
|
||||
}
|
||||
#endif
|
||||
|
||||
assert((buf_end - buf) >= VERM_BOUNDARY);
|
||||
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
||||
if (min) {
|
||||
// Input isn't aligned, so we need to run one iteration with an
|
||||
// unaligned load, then skip buf forward to the next aligned address.
|
||||
@ -205,14 +251,32 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
|
||||
DEBUG_PRINTF("double verm scan (\\x%02hhx&\\x%02hhx)(\\x%02hhx&\\x%02hhx) "
|
||||
"over %zu bytes\n", c1, m1, c2, m2, (size_t)(buf_end - buf));
|
||||
assert(buf < buf_end);
|
||||
assert((buf_end - buf) >= VERM_BOUNDARY);
|
||||
|
||||
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
||||
VERM_TYPE chars1 = VERM_SET_FN(c1);
|
||||
VERM_TYPE chars2 = VERM_SET_FN(c2);
|
||||
VERM_TYPE mask1 = VERM_SET_FN(m1);
|
||||
VERM_TYPE mask2 = VERM_SET_FN(m2);
|
||||
|
||||
#ifdef HAVE_AVX512
|
||||
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||
const u8 *ptr = dvermMiniMasked(chars1, chars2, mask1, mask2, buf,
|
||||
buf_end);
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* check for partial match at end */
|
||||
if ((buf_end[-1] & m1) == (u8)c1) {
|
||||
DEBUG_PRINTF("partial!!!\n");
|
||||
return buf_end - 1;
|
||||
}
|
||||
|
||||
return buf_end;
|
||||
}
|
||||
#endif
|
||||
|
||||
assert((buf_end - buf) >= VERM_BOUNDARY);
|
||||
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
||||
if (min) {
|
||||
// Input isn't aligned, so we need to run one iteration with an
|
||||
// unaligned load, then skip buf forward to the next aligned address.
|
||||
@ -244,6 +308,7 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
|
||||
|
||||
/* check for partial match at end */
|
||||
if ((buf_end[-1] & m1) == (u8)c1) {
|
||||
DEBUG_PRINTF("partial!!!\n");
|
||||
return buf_end - 1;
|
||||
}
|
||||
|
||||
@ -259,7 +324,20 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
|
||||
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
||||
assert(buf < buf_end);
|
||||
|
||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
||||
|
||||
// Handle small scans.
|
||||
#ifdef HAVE_AVX512
|
||||
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||
const u8 *ptr = nocase
|
||||
? rvermMiniNocase(chars, buf, buf_end, 0)
|
||||
: rvermMini(chars, buf, buf_end, 0);
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
return buf - 1;
|
||||
}
|
||||
#else
|
||||
if (buf_end - buf < VERM_BOUNDARY) {
|
||||
for (buf_end--; buf_end >= buf; buf_end--) {
|
||||
char cur = (char)*buf_end;
|
||||
@ -272,27 +350,23 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
|
||||
}
|
||||
return buf_end;
|
||||
}
|
||||
#endif
|
||||
|
||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
||||
size_t min = (size_t)buf_end % VERM_BOUNDARY;
|
||||
|
||||
if (min) {
|
||||
// Input isn't aligned, so we need to run one iteration with an
|
||||
// unaligned load, then skip buf backward to the next aligned address.
|
||||
// There's some small overlap here, but we don't mind scanning it twice
|
||||
// if we can do it quickly, do we?
|
||||
if (nocase) {
|
||||
const u8 *ptr =
|
||||
rvermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 0);
|
||||
const u8 *ptr = nocase ? rvermUnalignNocase(chars,
|
||||
buf_end - VERM_BOUNDARY,
|
||||
0)
|
||||
: rvermUnalign(chars, buf_end - VERM_BOUNDARY,
|
||||
0);
|
||||
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
} else {
|
||||
const u8 *ptr = rvermUnalign(chars, buf_end - VERM_BOUNDARY, 0);
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
|
||||
buf_end -= min;
|
||||
if (buf >= buf_end) {
|
||||
@ -322,7 +396,20 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
|
||||
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
|
||||
assert(buf < buf_end);
|
||||
|
||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
||||
|
||||
// Handle small scans.
|
||||
#ifdef HAVE_AVX512
|
||||
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||
const u8 *ptr = nocase
|
||||
? rvermMiniNocase(chars, buf, buf_end, 1)
|
||||
: rvermMini(chars, buf, buf_end, 1);
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
return buf - 1;
|
||||
}
|
||||
#else
|
||||
if (buf_end - buf < VERM_BOUNDARY) {
|
||||
for (buf_end--; buf_end >= buf; buf_end--) {
|
||||
char cur = (char)*buf_end;
|
||||
@ -335,27 +422,23 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
|
||||
}
|
||||
return buf_end;
|
||||
}
|
||||
#endif
|
||||
|
||||
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
|
||||
size_t min = (size_t)buf_end % VERM_BOUNDARY;
|
||||
|
||||
if (min) {
|
||||
// Input isn't aligned, so we need to run one iteration with an
|
||||
// unaligned load, then skip buf backward to the next aligned address.
|
||||
// There's some small overlap here, but we don't mind scanning it twice
|
||||
// if we can do it quickly, do we?
|
||||
if (nocase) {
|
||||
const u8 *ptr =
|
||||
rvermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 1);
|
||||
const u8 *ptr = nocase ? rvermUnalignNocase(chars,
|
||||
buf_end - VERM_BOUNDARY,
|
||||
1)
|
||||
: rvermUnalign(chars, buf_end - VERM_BOUNDARY,
|
||||
1);
|
||||
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
} else {
|
||||
const u8 *ptr = rvermUnalign(chars, buf_end - VERM_BOUNDARY, 1);
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
|
||||
buf_end -= min;
|
||||
if (buf >= buf_end) {
|
||||
@ -383,24 +466,36 @@ const u8 *rvermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
|
||||
DEBUG_PRINTF("rev double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
|
||||
nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
|
||||
assert(buf < buf_end);
|
||||
assert((buf_end - buf) >= VERM_BOUNDARY);
|
||||
|
||||
size_t min = (size_t)buf_end % VERM_BOUNDARY;
|
||||
VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
|
||||
VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
|
||||
|
||||
#ifdef HAVE_AVX512
|
||||
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||
const u8 *ptr = nocase
|
||||
? rdvermMiniNocase(chars1, chars2, buf, buf_end)
|
||||
: rdvermMini(chars1, chars2, buf, buf_end);
|
||||
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// check for partial match at end ???
|
||||
return buf - 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
assert((buf_end - buf) >= VERM_BOUNDARY);
|
||||
size_t min = (size_t)buf_end % VERM_BOUNDARY;
|
||||
if (min) {
|
||||
// input not aligned, so we need to run one iteration with an unaligned
|
||||
// load, then skip buf forward to the next aligned address. There's
|
||||
// some small overlap here, but we don't mind scanning it twice if we
|
||||
// can do it quickly, do we?
|
||||
const u8 *ptr;
|
||||
if (nocase) {
|
||||
ptr = rdvermPreconditionNocase(chars1, chars2,
|
||||
const u8 *ptr = nocase ? rdvermPreconditionNocase(chars1, chars2,
|
||||
buf_end - VERM_BOUNDARY)
|
||||
: rdvermPrecondition(chars1, chars2,
|
||||
buf_end - VERM_BOUNDARY);
|
||||
} else {
|
||||
ptr = rdvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY);
|
||||
}
|
||||
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -32,6 +32,8 @@
|
||||
* (users should include vermicelli.h)
|
||||
*/
|
||||
|
||||
#if !defined(HAVE_AVX512)
|
||||
|
||||
#define VERM_BOUNDARY 16
|
||||
#define VERM_TYPE m128
|
||||
#define VERM_SET_FN set16x8
|
||||
@ -391,3 +393,497 @@ const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#else // HAVE_AVX512
|
||||
|
||||
#define VERM_BOUNDARY 64
|
||||
#define VERM_TYPE m512
|
||||
#define VERM_SET_FN set64x8
|
||||
|
||||
static really_inline
|
||||
const u8 *vermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) {
|
||||
uintptr_t len = buf_end - buf;
|
||||
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||
m512 data = loadu_maskz_m512(mask, buf);
|
||||
|
||||
u64a z = eq512mask(chars, data);
|
||||
|
||||
if (negate) {
|
||||
z = ~z & mask;
|
||||
}
|
||||
z &= mask;
|
||||
if (unlikely(z)) {
|
||||
return buf + ctz64(z);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *vermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end,
|
||||
char negate) {
|
||||
uintptr_t len = buf_end - buf;
|
||||
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||
m512 data = loadu_maskz_m512(mask, buf);
|
||||
m512 casemask = set64x8(CASE_CLEAR);
|
||||
m512 v = and512(casemask, data);
|
||||
|
||||
u64a z = eq512mask(chars, v);
|
||||
|
||||
if (negate) {
|
||||
z = ~z & mask;
|
||||
}
|
||||
z &= mask;
|
||||
if (unlikely(z)) {
|
||||
return buf + ctz64(z);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *vermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end,
|
||||
char negate) {
|
||||
assert((size_t)buf % 64 == 0);
|
||||
for (; buf + 63 < buf_end; buf += 64) {
|
||||
m512 data = load512(buf);
|
||||
u64a z = eq512mask(chars, data);
|
||||
if (negate) {
|
||||
z = ~z & ~0ULL;
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
u64a pos = ctz64(z);
|
||||
return buf + pos;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *vermSearchAlignedNocase(m512 chars, const u8 *buf,
|
||||
const u8 *buf_end, char negate) {
|
||||
assert((size_t)buf % 64 == 0);
|
||||
m512 casemask = set64x8(CASE_CLEAR);
|
||||
|
||||
for (; buf + 63 < buf_end; buf += 64) {
|
||||
m512 data = load512(buf);
|
||||
u64a z = eq512mask(chars, and512(casemask, data));
|
||||
if (negate) {
|
||||
z = ~z & ~0ULL;
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
u64a pos = ctz64(z);
|
||||
return buf + pos;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *vermUnalign(m512 chars, const u8 *buf, char negate) {
|
||||
m512 data = loadu512(buf); // unaligned
|
||||
u64a z = eq512mask(chars, data);
|
||||
if (negate) {
|
||||
z = ~z & ~0ULL;
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
return buf + ctz64(z);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *vermUnalignNocase(m512 chars, const u8 *buf, char negate) {
|
||||
m512 casemask = set64x8(CASE_CLEAR);
|
||||
m512 data = loadu512(buf); // unaligned
|
||||
u64a z = eq512mask(chars, and512(casemask, data));
|
||||
if (negate) {
|
||||
z = ~z & ~0ULL;
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
return buf + ctz64(z);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *dvermMini(m512 chars1, m512 chars2, const u8 *buf,
|
||||
const u8 *buf_end) {
|
||||
uintptr_t len = buf_end - buf;
|
||||
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||
m512 data = loadu_maskz_m512(mask, buf);
|
||||
|
||||
u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
|
||||
|
||||
z &= mask;
|
||||
if (unlikely(z)) {
|
||||
u64a pos = ctz64(z);
|
||||
return buf + pos;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *dvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf,
|
||||
const u8 *buf_end) {
|
||||
uintptr_t len = buf_end - buf;
|
||||
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||
m512 data = loadu_maskz_m512(mask, buf);
|
||||
m512 casemask = set64x8(CASE_CLEAR);
|
||||
m512 v = and512(casemask, data);
|
||||
|
||||
u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
|
||||
|
||||
z &= mask;
|
||||
if (unlikely(z)) {
|
||||
u64a pos = ctz64(z);
|
||||
return buf + pos;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *dvermMiniMasked(m512 chars1, m512 chars2, m512 mask1, m512 mask2,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
uintptr_t len = buf_end - buf;
|
||||
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||
m512 data = loadu_maskz_m512(mask, buf);
|
||||
m512 v1 = and512(data, mask1);
|
||||
m512 v2 = and512(data, mask2);
|
||||
|
||||
u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
|
||||
|
||||
z &= mask;
|
||||
if (unlikely(z)) {
|
||||
u64a pos = ctz64(z);
|
||||
return buf + pos;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *dvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
for (; buf + 64 < buf_end; buf += 64) {
|
||||
m512 data = load512(buf);
|
||||
u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
|
||||
if (buf[63] == c1 && buf[64] == c2) {
|
||||
z |= (1ULL << 63);
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
u64a pos = ctz64(z);
|
||||
return buf + pos;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *dvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
assert((size_t)buf % 64 == 0);
|
||||
m512 casemask = set64x8(CASE_CLEAR);
|
||||
|
||||
for (; buf + 64 < buf_end; buf += 64) {
|
||||
m512 data = load512(buf);
|
||||
m512 v = and512(casemask, data);
|
||||
u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
|
||||
if ((buf[63] & CASE_CLEAR) == c1 && (buf[64] & CASE_CLEAR) == c2) {
|
||||
z |= (1ULL << 63);
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
u64a pos = ctz64(z);
|
||||
return buf + pos;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *dvermSearchAlignedMasked(m512 chars1, m512 chars2,
|
||||
m512 mask1, m512 mask2, u8 c1, u8 c2, u8 m1,
|
||||
u8 m2, const u8 *buf, const u8 *buf_end) {
|
||||
assert((size_t)buf % 64 == 0);
|
||||
|
||||
for (; buf + 64 < buf_end; buf += 64) {
|
||||
m512 data = load512(buf);
|
||||
m512 v1 = and512(data, mask1);
|
||||
m512 v2 = and512(data, mask2);
|
||||
u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
|
||||
|
||||
if ((buf[63] & m1) == c1 && (buf[64] & m2) == c2) {
|
||||
z |= (1ULL << 63);
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
u64a pos = ctz64(z);
|
||||
return buf + pos;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *dvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) {
|
||||
m512 data = loadu512(buf); // unaligned
|
||||
u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1);
|
||||
|
||||
/* no fixup of the boundary required - the aligned run will pick it up */
|
||||
if (unlikely(z)) {
|
||||
u64a pos = ctz64(z);
|
||||
return buf + pos;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *dvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) {
|
||||
/* due to laziness, nonalphas and nocase having interesting behaviour */
|
||||
m512 casemask = set64x8(CASE_CLEAR);
|
||||
m512 data = loadu512(buf); // unaligned
|
||||
m512 v = and512(casemask, data);
|
||||
u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1);
|
||||
|
||||
/* no fixup of the boundary required - the aligned run will pick it up */
|
||||
if (unlikely(z)) {
|
||||
u64a pos = ctz64(z);
|
||||
return buf + pos;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *dvermPreconditionMasked(m512 chars1, m512 chars2,
|
||||
m512 mask1, m512 mask2, const u8 *buf) {
|
||||
m512 data = loadu512(buf); // unaligned
|
||||
m512 v1 = and512(data, mask1);
|
||||
m512 v2 = and512(data, mask2);
|
||||
u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1);
|
||||
|
||||
/* no fixup of the boundary required - the aligned run will pick it up */
|
||||
if (unlikely(z)) {
|
||||
u64a pos = ctz64(z);
|
||||
return buf + pos;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *lastMatchOffset(const u8 *buf_end, u64a z) {
|
||||
assert(z);
|
||||
return buf_end - 64 + 63 - clz64(z);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *rvermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) {
|
||||
uintptr_t len = buf_end - buf;
|
||||
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||
m512 data = loadu_maskz_m512(mask, buf);
|
||||
|
||||
u64a z = eq512mask(chars, data);
|
||||
|
||||
if (negate) {
|
||||
z = ~z & mask;
|
||||
}
|
||||
z &= mask;
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf + 64, z);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *rvermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end,
|
||||
char negate) {
|
||||
uintptr_t len = buf_end - buf;
|
||||
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||
m512 data = loadu_maskz_m512(mask, buf);
|
||||
m512 casemask = set64x8(CASE_CLEAR);
|
||||
m512 v = and512(casemask, data);
|
||||
|
||||
u64a z = eq512mask(chars, v);
|
||||
|
||||
if (negate) {
|
||||
z = ~z & mask;
|
||||
}
|
||||
z &= mask;
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf + 64, z);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *rvermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end,
|
||||
char negate) {
|
||||
assert((size_t)buf_end % 64 == 0);
|
||||
for (; buf + 63 < buf_end; buf_end -= 64) {
|
||||
m512 data = load512(buf_end - 64);
|
||||
u64a z = eq512mask(chars, data);
|
||||
if (negate) {
|
||||
z = ~z & ~0ULL;
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf_end, z);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *rvermSearchAlignedNocase(m512 chars, const u8 *buf,
|
||||
const u8 *buf_end, char negate) {
|
||||
assert((size_t)buf_end % 64 == 0);
|
||||
m512 casemask = set64x8(CASE_CLEAR);
|
||||
|
||||
for (; buf + 63 < buf_end; buf_end -= 64) {
|
||||
m512 data = load512(buf_end - 64);
|
||||
u64a z = eq512mask(chars, and512(casemask, data));
|
||||
if (negate) {
|
||||
z = ~z & ~0ULL;
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf_end, z);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *rvermUnalign(m512 chars, const u8 *buf, char negate) {
|
||||
m512 data = loadu512(buf); // unaligned
|
||||
u64a z = eq512mask(chars, data);
|
||||
if (negate) {
|
||||
z = ~z & ~0ULL;
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf + 64, z);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *rvermUnalignNocase(m512 chars, const u8 *buf, char negate) {
|
||||
m512 casemask = set64x8(CASE_CLEAR);
|
||||
m512 data = loadu512(buf); // unaligned
|
||||
u64a z = eq512mask(chars, and512(casemask, data));
|
||||
if (negate) {
|
||||
z = ~z & ~0ULL;
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf + 64, z);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *rdvermMini(m512 chars1, m512 chars2, const u8 *buf,
|
||||
const u8 *buf_end) {
|
||||
uintptr_t len = buf_end - buf;
|
||||
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||
m512 data = loadu_maskz_m512(mask, buf);
|
||||
|
||||
u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
|
||||
|
||||
z &= mask;
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf + 64, z);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *rdvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf,
|
||||
const u8 *buf_end) {
|
||||
uintptr_t len = buf_end - buf;
|
||||
__mmask64 mask = (~0ULL) >> (64 - len);
|
||||
m512 data = loadu_maskz_m512(mask, buf);
|
||||
m512 casemask = set64x8(CASE_CLEAR);
|
||||
m512 v = and512(casemask, data);
|
||||
|
||||
u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
|
||||
|
||||
z &= mask;
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf + 64, z);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *rdvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
assert((size_t)buf_end % 64 == 0);
|
||||
|
||||
for (; buf + 64 < buf_end; buf_end -= 64) {
|
||||
m512 data = load512(buf_end - 64);
|
||||
u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
|
||||
if (buf_end[-65] == c1 && buf_end[-64] == c2) {
|
||||
z |= 1;
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf_end, z);
|
||||
}
|
||||
}
|
||||
return buf_end;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *rdvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
assert((size_t)buf_end % 64 == 0);
|
||||
m512 casemask = set64x8(CASE_CLEAR);
|
||||
|
||||
for (; buf + 64 < buf_end; buf_end -= 64) {
|
||||
m512 data = load512(buf_end - 64);
|
||||
m512 v = and512(casemask, data);
|
||||
u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
|
||||
if ((buf_end[-65] & CASE_CLEAR) == c1
|
||||
&& (buf_end[-64] & CASE_CLEAR) == c2) {
|
||||
z |= 1;
|
||||
}
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf_end, z);
|
||||
}
|
||||
}
|
||||
return buf_end;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *rdvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) {
|
||||
m512 data = loadu512(buf);
|
||||
u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1);
|
||||
|
||||
// no fixup of the boundary required - the aligned run will pick it up
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf + 64, z);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// returns NULL if not found
|
||||
static really_inline
|
||||
const u8 *rdvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) {
|
||||
// due to laziness, nonalphas and nocase having interesting behaviour
|
||||
m512 casemask = set64x8(CASE_CLEAR);
|
||||
m512 data = loadu512(buf);
|
||||
m512 v = and512(casemask, data);
|
||||
u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1);
|
||||
// no fixup of the boundary required - the aligned run will pick it up
|
||||
if (unlikely(z)) {
|
||||
return lastMatchOffset(buf + 64, z);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif // HAVE_AVX512
|
||||
|
@ -205,7 +205,7 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
|
||||
|
||||
DEBUG_PRINTF(" - checking w %zu\n", g[w].index);
|
||||
|
||||
if (!searchForward(g, reach, colours, s, w)) {
|
||||
if (!searchForward(g, reach, colours, succ_v, w)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -170,7 +170,7 @@ void findPaths(const NGHolder &g, NFAVertex v,
|
||||
/* path has looped back to one of the active+boring acceleration
|
||||
* states. We can ignore this path if we have sufficient back-
|
||||
* off. */
|
||||
paths->push_back({CharReach()});
|
||||
paths->push_back({cr});
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "rose_build_impl.h"
|
||||
#include "nfa/castlecompile.h"
|
||||
#include "nfagraph/ng_repeat.h"
|
||||
#include "smallwrite/smallwrite_build.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/boundary_reports.h"
|
||||
#include "util/make_unique.h"
|
||||
@ -159,6 +160,10 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &build_in)
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &report_id : build.smwr.all_reports()) {
|
||||
live_reports.insert(report_id);
|
||||
}
|
||||
|
||||
// Collect live reports from boundary reports.
|
||||
insert(&live_reports, build.boundary.report_at_0);
|
||||
insert(&live_reports, build.boundary.report_at_0_eod);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -57,6 +57,10 @@
|
||||
#define HAVE_AVX512
|
||||
#endif
|
||||
|
||||
#if defined(__AVX512VBMI__)
|
||||
#define HAVE_AVX512VBMI
|
||||
#endif
|
||||
|
||||
/*
|
||||
* ICC and MSVC don't break out POPCNT or BMI/2 as separate pre-def macros
|
||||
*/
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -150,6 +150,14 @@ static really_inline u32 movd(const m128 in) {
|
||||
return _mm_cvtsi128_si32(in);
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512)
|
||||
static really_inline u32 movd512(const m512 in) {
|
||||
// NOTE: seems gcc doesn't support _mm512_cvtsi512_si32(in),
|
||||
// so we use 2-step convertions to work around.
|
||||
return _mm_cvtsi128_si32(_mm512_castsi512_si128(in));
|
||||
}
|
||||
#endif
|
||||
|
||||
static really_inline u64a movq(const m128 in) {
|
||||
#if defined(ARCH_X86_64)
|
||||
return _mm_cvtsi128_si64(in);
|
||||
@ -318,6 +326,12 @@ static really_inline
|
||||
m512 maskz_pshufb_m512(__mmask64 k, m512 a, m512 b) {
|
||||
return _mm512_maskz_shuffle_epi8(k, a, b);
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX512VBMI)
|
||||
#define vpermb512(idx, a) _mm512_permutexvar_epi8(idx, a)
|
||||
#define maskz_vpermb512(k, idx, a) _mm512_maskz_permutexvar_epi8(k, idx, a)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
static really_inline
|
||||
|
45
tools/fuzz/aristocrats.py
Executable file
45
tools/fuzz/aristocrats.py
Executable file
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from random import choice,randint
|
||||
from optparse import OptionParser
|
||||
|
||||
def generateRandomOptions():
|
||||
if options.hybrid:
|
||||
allflags = "smiH8W"
|
||||
else:
|
||||
# Maintain an ordering for consistency.
|
||||
allflags = "smiHV8WLP"
|
||||
flags = ""
|
||||
for f in allflags:
|
||||
flags += choice(['', f])
|
||||
return flags
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option("-d", "--depth",
|
||||
action="store", type="int", dest="depth", default=200,
|
||||
help="Depth of generation (akin to maximum length)")
|
||||
parser.add_option("-c", "--count",
|
||||
action="store", type="int", dest="count", default=1000,
|
||||
help="Number of expressions to generate")
|
||||
parser.add_option("-f", "--full",
|
||||
action="store_true", dest="full", default=False,
|
||||
help="Use a full character set including unprintables")
|
||||
parser.add_option("-H", "--hybrid",
|
||||
action="store_true", dest="hybrid",
|
||||
help="Generate random flags for hybrid mode")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
if len(args) != 0:
|
||||
parser.error("incorrect number of arguments")
|
||||
|
||||
if (options.full):
|
||||
crange = range(0,256)
|
||||
crange.remove(ord('\n'))
|
||||
else:
|
||||
crange = range(32, 127)
|
||||
|
||||
for i in xrange(0, options.count):
|
||||
len = randint(1, options.depth)
|
||||
s = [ chr(choice(crange)) for x in xrange(len) ]
|
||||
line = str(i) + ":/" + "".join(s) + "/" + generateRandomOptions()
|
||||
print line
|
39
tools/fuzz/completocrats.py
Executable file
39
tools/fuzz/completocrats.py
Executable file
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from itertools import *
|
||||
from optparse import OptionParser
|
||||
|
||||
LIMITED_ALPHABET = "abc[](){}*?+^$|:=.\\-"
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option("-d", "--depth",
|
||||
action="store", type="int", dest="depth", default=200,
|
||||
help="Depth of generation (akin to maximum length)")
|
||||
|
||||
parser.add_option("-f", "--full",
|
||||
action="store_true", dest="full", default=False,
|
||||
help="Use a full character set including unprintables")
|
||||
|
||||
parser.add_option("-l", "--limited",
|
||||
action="store_true", dest="limited", default=False,
|
||||
help="Use a very limited character set: just " + LIMITED_ALPHABET)
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
if len(args) != 0:
|
||||
parser.error("incorrect number of arguments")
|
||||
|
||||
if (options.full):
|
||||
crange = range(0,256)
|
||||
crange.remove(ord('\n'))
|
||||
elif (options.limited):
|
||||
crange = [ ord(c) for c in LIMITED_ALPHABET ]
|
||||
else:
|
||||
crange = range(32, 127)
|
||||
|
||||
srange = [ chr(c) for c in crange ]
|
||||
|
||||
i = 0
|
||||
for x in product(srange, repeat = options.depth):
|
||||
line = str(i) + ":/" + "".join(x) + "/"
|
||||
print line
|
||||
i += 1
|
259
tools/fuzz/heuristocrats.py
Executable file
259
tools/fuzz/heuristocrats.py
Executable file
@ -0,0 +1,259 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from optparse import OptionParser
|
||||
from random import *
|
||||
import string
|
||||
import sys
|
||||
|
||||
# return a random non-degenerate (ie not [10]) partition of nChildren
|
||||
def chooseLeafWidth(nChildren):
|
||||
width = randint(1, 5)
|
||||
width = min(width, nChildren-1)
|
||||
s = sample(range(1, nChildren), width)
|
||||
s.sort()
|
||||
s = [0] + s + [nChildren]
|
||||
v = [ s[i+1] - s[i] for i in range(0, len(s)-1) if s[i+1] != s[i] ]
|
||||
return v
|
||||
|
||||
def generateConcat(nChildren, atTopIgnored):
|
||||
v = [ generateRE(w, atTop = False) for w in chooseLeafWidth(nChildren) ]
|
||||
v = [ r for r in v if r != '' ]
|
||||
return string.join(v, "")
|
||||
|
||||
def makeGroup(s):
|
||||
# Parenthesise either in normal parens or a non-capturing group.
|
||||
if randint(0, 1) == 0:
|
||||
return "(" + s + ")"
|
||||
else:
|
||||
return "(?:" + s + ")"
|
||||
|
||||
def generateAlt(nChildren, atTop):
|
||||
v = [ generateRE(w, [generateAlt], atTop) for w in chooseLeafWidth(nChildren) ]
|
||||
v = [ r for r in v if r != '' ]
|
||||
s = string.join(v, "|")
|
||||
if len(v) == 1:
|
||||
return s
|
||||
else:
|
||||
return makeGroup(s)
|
||||
|
||||
def generateQuant(nChildren, atTopIgnored):
|
||||
lo = int(round(expovariate(0.2)))
|
||||
hi = lo + int(round(expovariate(0.2)))
|
||||
q = choice(["*", "?", "+", "{%d}"%lo, "{%d,}"%lo, "{%d,%d}"%(lo,hi)])
|
||||
r = generateRE(nChildren, [generateQuant], atTop = False)
|
||||
if (len(r) == 1) or (r[0] != '(' and r[-1] != ")"):
|
||||
return r + q
|
||||
else:
|
||||
return makeGroup(r) + q
|
||||
|
||||
def generateChar(nChildren, atTop = False):
|
||||
return chr(choice(alphabet))
|
||||
|
||||
def generateNocaseChar(nChildren, atTop = False):
|
||||
'Either generate an uppercase char from the alphabet or a nocase class [Aa]'
|
||||
c = generateChar(nChildren, atTop)
|
||||
if random() < 0.5:
|
||||
return c.upper()
|
||||
else:
|
||||
return '[' + c.upper() + c.lower() + ']'
|
||||
|
||||
def generateDot(nChildren, atTop = False):
|
||||
return "."
|
||||
|
||||
def generateBoundary(nChildren, atTop = False):
|
||||
# \b, \B in parens so that we can repeat them and still be accepted by
|
||||
# libpcre
|
||||
return makeGroup('\\' + choice('bB'))
|
||||
|
||||
def generateCharClass(nChildren, atTop = False):
|
||||
s = ""
|
||||
if random() < 0.2:
|
||||
s = "^"
|
||||
nChars = randint(1,4)
|
||||
else:
|
||||
nChars = randint(2,4)
|
||||
|
||||
for i in xrange(nChars):
|
||||
s += generateChar(1)
|
||||
return "[" + s + "]"
|
||||
|
||||
def generateOptionsFlags(nChildren, atTop = False):
|
||||
allflags = "smix"
|
||||
pos_flags = sample(allflags, randint(1, len(allflags)))
|
||||
neg_flags = sample(allflags, randint(1, len(allflags)))
|
||||
s = '(?' + ''.join(pos_flags) + '-' + ''.join(neg_flags) + ')'
|
||||
return s
|
||||
|
||||
def generateLogicalId(nChildren, atTop = False):
|
||||
return str(randint(0, options.count))
|
||||
|
||||
def makeLogicalGroup(s):
|
||||
return "(" + s + ")"
|
||||
|
||||
def generateLogicalNot(nChildren, atTop):
|
||||
r = generateCombination(nChildren, [generateLogicalNot], atTop = False)
|
||||
return "!" + makeLogicalGroup(r)
|
||||
|
||||
def generateLogicalAnd(nChildren, atTop):
|
||||
v = [ generateCombination(w, [generateLogicalAnd], atTop = False) for w in chooseLeafWidth(nChildren) ]
|
||||
v = [ r for r in v if r != '' ]
|
||||
s = string.join(v, "&")
|
||||
if len(v) == 1:
|
||||
return s
|
||||
else:
|
||||
return makeLogicalGroup(s)
|
||||
|
||||
def generateLogicalOr(nChildren, atTop):
|
||||
v = [ generateCombination(w, [generateLogicalOr], atTop = False) for w in chooseLeafWidth(nChildren) ]
|
||||
v = [ r for r in v if r != '' ]
|
||||
s = string.join(v, "|")
|
||||
if len(v) == 1:
|
||||
return s
|
||||
else:
|
||||
return makeLogicalGroup(s)
|
||||
|
||||
weightsTree = [
|
||||
(generateConcat, 10),
|
||||
(generateAlt, 3),
|
||||
(generateQuant, 2),
|
||||
]
|
||||
|
||||
weightsLeaf = [
|
||||
(generateChar, 30),
|
||||
(generateCharClass, 5),
|
||||
(generateDot, 5),
|
||||
(generateNocaseChar, 2),
|
||||
(generateBoundary, 1),
|
||||
(generateOptionsFlags, 1)
|
||||
]
|
||||
|
||||
weightsLogicalTree = [
|
||||
(generateLogicalNot, 1),
|
||||
(generateLogicalAnd, 5),
|
||||
(generateLogicalOr, 5),
|
||||
]
|
||||
|
||||
weightsLogicalLeaf = [
|
||||
(generateLogicalId, 1),
|
||||
]
|
||||
|
||||
def genChoices(weighted):
|
||||
r = []
|
||||
for (f, w) in weighted:
|
||||
r = r + [f] * w
|
||||
return r
|
||||
|
||||
choicesTree = genChoices(weightsTree)
|
||||
choicesLeaf = genChoices(weightsLeaf)
|
||||
choicesLogicalTree = genChoices(weightsLogicalTree)
|
||||
choicesLogicalLeaf = genChoices(weightsLogicalLeaf)
|
||||
|
||||
weightsAnchor = [
|
||||
("\\A%s\\Z", 1),
|
||||
("\\A%s\\z", 1),
|
||||
("\\A%s", 4),
|
||||
("%s\\Z", 2),
|
||||
("%s\\z", 2),
|
||||
("^%s$", 1),
|
||||
("^%s", 4),
|
||||
("%s$", 2),
|
||||
("%s", 25)
|
||||
]
|
||||
choicesAnchor = genChoices(weightsAnchor)
|
||||
|
||||
def generateRE(nChildren, suppressList = [], atTop = False):
|
||||
if atTop:
|
||||
anchorSubstituteString = choice(choicesAnchor)
|
||||
else:
|
||||
anchorSubstituteString = "%s"
|
||||
|
||||
nChildren -= 1
|
||||
if nChildren == 0:
|
||||
res = choice(choicesLeaf)(nChildren, atTop)
|
||||
else:
|
||||
c = [ ch for ch in choicesTree if ch not in suppressList ]
|
||||
res = choice(c)(nChildren, atTop)
|
||||
|
||||
return anchorSubstituteString % res
|
||||
|
||||
def generateCombination(nChildren, suppressList = [], atTop = False):
|
||||
nChildren -= 1
|
||||
if nChildren == 0:
|
||||
res = choice(choicesLogicalLeaf)(nChildren, atTop)
|
||||
else:
|
||||
c = [ ch for ch in choicesLogicalTree if ch not in suppressList ]
|
||||
res = choice(c)(nChildren, atTop)
|
||||
|
||||
return res
|
||||
|
||||
def generateRandomOptions():
|
||||
if options.hybrid:
|
||||
allflags = "smiH8W"
|
||||
else:
|
||||
# Maintain an ordering for consistency.
|
||||
allflags = "smiHV8WLP"
|
||||
flags = ""
|
||||
for f in allflags:
|
||||
flags += choice(['', f])
|
||||
if options.logical:
|
||||
flags += choice(['', 'Q'])
|
||||
return flags
|
||||
|
||||
def generateRandomExtParam(depth, extparam):
|
||||
if not extparam:
|
||||
return ""
|
||||
params = []
|
||||
if choice((False, True)):
|
||||
params.append("min_length=%u" % randint(1, depth))
|
||||
if choice((False, True)):
|
||||
params.append("min_offset=%u" % randint(1, depth))
|
||||
if choice((False, True)):
|
||||
params.append("max_offset=%u" % randint(1, depth*3))
|
||||
if choice((False, True)):
|
||||
dist = randint(1, 3)
|
||||
if choice((False, True)):
|
||||
params.append("edit_distance=%u" % dist)
|
||||
else:
|
||||
params.append("hamming_distance=%u" % dist)
|
||||
if params:
|
||||
return "{" + ",".join(params) + "}"
|
||||
else:
|
||||
return ""
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option("-d", "--depth",
|
||||
action="store", type="int", dest="depth", default=200,
|
||||
help="Depth of generation (akin to maximum length)")
|
||||
parser.add_option("-c", "--count",
|
||||
action="store", type="int", dest="count", default=1000,
|
||||
help="Number of expressions to generate")
|
||||
parser.add_option("-a", "--alphabet",
|
||||
action="store", type="int", dest="alphabet", default=26,
|
||||
help="Size of alphabet to generate character expressions over (starting with lowercase 'a')")
|
||||
parser.add_option("-i", "--nocase",
|
||||
action="store_true", dest="nocase",
|
||||
help="Use a caseless alphabet for character generation")
|
||||
parser.add_option("-x", "--extparam",
|
||||
action="store_true", dest="extparam",
|
||||
help="Generate random extended parameters")
|
||||
parser.add_option("-l", "--logical",
|
||||
action="store_true", dest="logical",
|
||||
help="Generate logical combination expressions")
|
||||
parser.add_option("-H", "--hybrid",
|
||||
action="store_true", dest="hybrid",
|
||||
help="Generate random flags for hybrid mode")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
if len(args) != 0:
|
||||
parser.error("incorrect number of arguments")
|
||||
|
||||
alphabet = range(ord('a'), ord('a') + options.alphabet)
|
||||
if options.nocase:
|
||||
alphabet += range(ord('A'), ord('A') + options.alphabet)
|
||||
|
||||
for i in xrange(0, options.count):
|
||||
print "%08d:/%s/%s%s" % (i, generateRE(randint(1, options.depth), atTop = True), generateRandomOptions(), generateRandomExtParam(options.depth, options.extparam))
|
||||
|
||||
if options.logical:
|
||||
for i in xrange(options.count, options.count + 3000):
|
||||
print "%08d:/%s/C" % (i, generateCombination(randint(1, options.depth), atTop = True))
|
9
tools/fuzz/limited_dict.txt
Normal file
9
tools/fuzz/limited_dict.txt
Normal file
@ -0,0 +1,9 @@
|
||||
hatstand
|
||||
teakettle
|
||||
badgerbrush
|
||||
mnemosyne
|
||||
rapscallion
|
||||
acerbic
|
||||
blackhat
|
||||
rufous
|
||||
echolalia
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2019, Intel Corporation
|
||||
* Copyright (c) 2016-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -207,7 +207,9 @@ void usage(const char *error) {
|
||||
printf(" -P Benchmark using PCRE (if supported).\n");
|
||||
#endif
|
||||
#if defined(HAVE_DECL_PTHREAD_SETAFFINITY_NP) || defined(_WIN32)
|
||||
printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n");
|
||||
printf(" -T CPU,CPU,... or -T CPU-CPU\n");
|
||||
printf(" Benchmark with threads on specified CPUs or CPU"
|
||||
" range.\n");
|
||||
#endif
|
||||
printf(" -i DIR Don't compile, load from files in DIR"
|
||||
" instead.\n");
|
||||
@ -354,7 +356,8 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
case 'T':
|
||||
if (!strToList(optarg, threadCores)) {
|
||||
usage("Couldn't parse argument to -T flag, should be"
|
||||
" a list of positive integers.");
|
||||
" a list of positive integers or 2 integers"
|
||||
" connected with hyphen.");
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
|
@ -216,8 +216,9 @@ def enchunk_pcap(pcapFN, sqliteFN):
|
||||
#
|
||||
# Read in the contents of the pcap file, adding stream segments as found
|
||||
#
|
||||
pkt_cnt = 0;
|
||||
ip_pkt_cnt = 0;
|
||||
pkt_cnt = 0
|
||||
ip_pkt_cnt = 0
|
||||
ip_pkt_off = 0
|
||||
unsupported_ip_protocol_cnt = 0
|
||||
pcap_ref = pcap.pcap(pcapFN)
|
||||
done = False
|
||||
@ -231,16 +232,24 @@ def enchunk_pcap(pcapFN, sqliteFN):
|
||||
pkt_cnt += 1
|
||||
|
||||
linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff - 2):pcap_ref.dloff])[0]
|
||||
if linkLayerType != ETHERTYPE_IP:
|
||||
#
|
||||
# We're only interested in IP packets
|
||||
#
|
||||
if linkLayerType == ETHERTYPE_VLAN:
|
||||
linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff + 2):(pcap_ref.dloff + 4)])[0]
|
||||
if linkLayerType != ETHERTYPE_IP:
|
||||
continue
|
||||
else:
|
||||
ip_pkt_off = pcap_ref.dloff + 4
|
||||
elif linkLayerType == ETHERTYPE_IP:
|
||||
ip_pkt_off = pcap_ref.dloff
|
||||
else:
|
||||
continue
|
||||
|
||||
ip_pkt_cnt += 1
|
||||
|
||||
ip_pkt_total_len = struct.unpack('!H', packet[pcap_ref.dloff + 2: pcap_ref.dloff + 4])[0]
|
||||
ip_pkt = packet[pcap_ref.dloff:pcap_ref.dloff + ip_pkt_total_len]
|
||||
ip_pkt_total_len = struct.unpack('!H', packet[ip_pkt_off + 2: ip_pkt_off + 4])[0]
|
||||
ip_pkt = packet[ip_pkt_off:ip_pkt_off + ip_pkt_total_len]
|
||||
pkt_protocol = struct.unpack('B', ip_pkt[9])[0]
|
||||
|
||||
if (pkt_protocol != IPPROTO_UDP) and (pkt_protocol != IPPROTO_TCP):
|
||||
|
@ -241,6 +241,13 @@ void addCallout(string &re) {
|
||||
re.append("\\E)(?C)");
|
||||
}
|
||||
|
||||
static
|
||||
bool isUtf8(const CompiledPcre &compiled) {
|
||||
unsigned long int options = 0;
|
||||
pcre_fullinfo(compiled.bytecode, NULL, PCRE_INFO_OPTIONS, &options);
|
||||
return options & PCRE_UTF8;
|
||||
}
|
||||
|
||||
unique_ptr<CompiledPcre>
|
||||
GroundTruth::compile(unsigned id, bool no_callouts) {
|
||||
bool highlander = false;
|
||||
@ -380,6 +387,8 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
|
||||
throw PcreCompileFailure(oss.str());
|
||||
}
|
||||
|
||||
compiled->utf8 |= isUtf8(*compiled);
|
||||
|
||||
return compiled;
|
||||
}
|
||||
|
||||
@ -451,13 +460,6 @@ int scanBasic(const CompiledPcre &compiled, const string &buffer,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static
|
||||
bool isUtf8(const CompiledPcre &compiled) {
|
||||
unsigned long int options = 0;
|
||||
pcre_fullinfo(compiled.bytecode, NULL, PCRE_INFO_OPTIONS, &options);
|
||||
return options & PCRE_UTF8;
|
||||
}
|
||||
|
||||
static
|
||||
CaptureVec makeCaptureVec(const vector<int> &ovector, int ret) {
|
||||
assert(ret > 0);
|
||||
|
@ -40,12 +40,12 @@ using namespace std;
|
||||
using namespace ue2;
|
||||
|
||||
struct SimpleV {
|
||||
size_t index;
|
||||
size_t index = 0;
|
||||
string test_v = "SimpleV";
|
||||
};
|
||||
|
||||
struct SimpleE {
|
||||
size_t index;
|
||||
size_t index = 0;
|
||||
string test_e = "SimpleE";
|
||||
};
|
||||
|
||||
|
@ -146,9 +146,8 @@ bool isIgnorable(const std::string &f) {
|
||||
#ifndef _WIN32
|
||||
void loadExpressions(const string &inPath, ExpressionMap &exprMap) {
|
||||
// Is our input path a file or a directory?
|
||||
int fd = open(inPath.c_str(), O_RDONLY);
|
||||
struct stat st;
|
||||
if (fstat(fd, &st) != 0) {
|
||||
if (stat(inPath.c_str(), &st) != 0) {
|
||||
cerr << "Can't stat path: '" << inPath << "'" << endl;
|
||||
exit(1);
|
||||
}
|
||||
@ -161,7 +160,7 @@ void loadExpressions(const string &inPath, ExpressionMap &exprMap) {
|
||||
exit(1);
|
||||
}
|
||||
} else if (S_ISDIR(st.st_mode)) {
|
||||
DIR *d = fdopendir(fd);
|
||||
DIR *d = opendir(inPath.c_str());
|
||||
if (d == nullptr) {
|
||||
cerr << "Can't open directory: '" << inPath << "'" << endl;
|
||||
exit(1);
|
||||
@ -192,10 +191,11 @@ void loadExpressions(const string &inPath, ExpressionMap &exprMap) {
|
||||
}
|
||||
(void)closedir(d);
|
||||
} else {
|
||||
cerr << "Can't stat path: '" << inPath << "'" << endl;
|
||||
cerr << "Unsupported file type "
|
||||
<< hex << showbase << (st.st_mode & S_IFMT)
|
||||
<< " for path: '" << inPath << "'" << endl;
|
||||
exit(1);
|
||||
}
|
||||
(void)close(fd);
|
||||
}
|
||||
#else // windows TODO: improve
|
||||
void HS_CDECL loadExpressions(const string &inPath, ExpressionMap &exprMap) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2019, Intel Corporation
|
||||
* Copyright (c) 2015-2020, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -54,8 +54,8 @@ inline bool fromString(const std::string &s, T& val)
|
||||
return true;
|
||||
}
|
||||
|
||||
// read in a comma-separated set of values: very simple impl, not for
|
||||
// external consumption
|
||||
// read in a comma-separated or hyphen-connected set of values: very simple
|
||||
// impl, not for external consumption
|
||||
template<typename T>
|
||||
inline bool strToList(const std::string &s, std::vector<T>& out)
|
||||
{
|
||||
@ -68,7 +68,17 @@ inline bool strToList(const std::string &s, std::vector<T>& out)
|
||||
}
|
||||
|
||||
out.push_back(val);
|
||||
} while (i.get(c) && c == ',');
|
||||
|
||||
i.get(c);
|
||||
if (c == '-') {
|
||||
T val_end;
|
||||
i >> val_end;
|
||||
while (val < val_end) {
|
||||
out.push_back(++val);
|
||||
}
|
||||
break;
|
||||
}
|
||||
} while (c == ',');
|
||||
|
||||
return !out.empty();
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user