mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Compare commits
64 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
bc3b191ab5 | ||
|
64731738a6 | ||
|
bc9a37c0ff | ||
|
ef16059c3e | ||
|
0bf86a7c15 | ||
|
c37166d52b | ||
|
f815639830 | ||
|
a775768988 | ||
|
2fbef65905 | ||
|
277fc40089 | ||
|
5aa4bd565f | ||
|
f47b69a01d | ||
|
9e254af71f | ||
|
c81293c696 | ||
|
c1539d32df | ||
|
44b5955ecd | ||
|
e1f4542e65 | ||
|
a3ba1ad369 | ||
|
676490427c | ||
|
838a04e66f | ||
|
f194a85d51 | ||
|
7bf5a9f5cd | ||
|
811f909d41 | ||
|
47bc68339f | ||
|
1baf340d1c | ||
|
9b4ba34c68 | ||
|
85019432f4 | ||
|
b386cbd20d | ||
|
b254a88c43 | ||
|
819da8df17 | ||
|
7f4a806118 | ||
|
0b246c801a | ||
|
9e17e8520f | ||
|
1ecb3aef8b | ||
|
62e35c910b | ||
|
95cd19c6f0 | ||
|
98daf283b1 | ||
|
e0c489f98f | ||
|
64a995bf44 | ||
|
433d2f386a | ||
|
76066b9ef2 | ||
|
66dc649197 | ||
|
d1ea4c762a | ||
|
2945c9bd20 | ||
|
20e69f6ad8 | ||
|
845ea5c9e3 | ||
|
b16c6200ee | ||
|
1a43a63218 | ||
|
04d3be487d | ||
|
5eab583df5 | ||
|
ddc247516c | ||
|
5326b3e688 | ||
|
0102f03c9c | ||
|
f06e19e6cb | ||
|
00b697bb3b | ||
|
007117146c | ||
|
1bd99d9318 | ||
|
0c4c149433 | ||
|
d8dc1ad685 | ||
|
27ab2e086d | ||
|
cf06d552f8 | ||
|
33cef12050 | ||
|
15f0ccd1b8 | ||
|
475ad00f53 |
49
CHANGELOG.md
49
CHANGELOG.md
@ -2,6 +2,55 @@
|
|||||||
|
|
||||||
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
||||||
|
|
||||||
|
## [5.4.2] 2023-04-19
|
||||||
|
- Roll back bugfix for github issue #350: Besides using scratch for
|
||||||
|
corresponding database, Hyperscan also allows user to use larger scratch
|
||||||
|
allocated for another database. Users can leverage this property to achieve
|
||||||
|
safe scratch usage in multi-database scenarios. Behaviors beyond these are
|
||||||
|
discouraged and results are undefined.
|
||||||
|
- Fix hsdump issue due to invalid nfa type.
|
||||||
|
|
||||||
|
## [5.4.1] 2023-02-20
|
||||||
|
- The Intel Hyperscan team is pleased to provide a bug fix release to our open source library.
|
||||||
|
Intel also maintains an upgraded version available through your Intel sales representative.
|
||||||
|
- Bugfix for issue #184: fix random char value of UTF-8.
|
||||||
|
- Bugfix for issue #291: bypass logical combination flag in hs_expression_info().
|
||||||
|
- Bugfix for issue #292: fix build error due to libc symbol parsing.
|
||||||
|
- Bugfix for issue #302/304: add empty string check for pure literal API.
|
||||||
|
- Bugfix for issue #303: fix unknown instruction error in pure literal API.
|
||||||
|
- Bugfix for issue #303: avoid memory leak in stream close stage.
|
||||||
|
- Bugfix for issue #305: fix assertion failure in DFA construction.
|
||||||
|
- Bugfix for issue #317: fix aligned allocator segment faults.
|
||||||
|
- Bugfix for issue #350: add quick validity check for scratch.
|
||||||
|
- Bugfix for issue #359: fix glibc-2.34 stack size issue.
|
||||||
|
- Bugfix for issue #360: fix SKIP flag issue in chimera.
|
||||||
|
- Bugfix for issue #362: fix one cotec check corner issue in UTF-8 validation.
|
||||||
|
- Fix other compile issues.
|
||||||
|
|
||||||
|
## [5.4.0] 2020-12-31
|
||||||
|
- Improvement on literal matcher "Fat Teddy" performance, including
|
||||||
|
support for Intel(R) AVX-512 Vector Byte Manipulation Instructions (Intel(R)
|
||||||
|
AVX-512 VBMI).
|
||||||
|
- Introduce a new 32-state shuffle-based DFA engine ("Sheng32"). This improves
|
||||||
|
scanning performance by leveraging AVX-512 VBMI.
|
||||||
|
- Introduce a new 64-state shuffle-based DFA engine ("Sheng64"). This improves
|
||||||
|
scanning performance by leveraging AVX-512 VBMI.
|
||||||
|
- Introduce a new shuffle-based hybrid DFA engine ("McSheng64"). This improves
|
||||||
|
scanning performance by leveraging AVX-512 VBMI.
|
||||||
|
- Improvement on exceptional state handling performance for LimEx NFA, including
|
||||||
|
support for AVX-512 VBMI.
|
||||||
|
- Improvement on lookaround performance with new models, including support for
|
||||||
|
AVX-512.
|
||||||
|
- Improvement on DFA state space efficiency.
|
||||||
|
- Optimization on decision of NFA/DFA generation.
|
||||||
|
- hsbench: add CSV dump support for hsbench.
|
||||||
|
- Bugfix for cmake error on Icelake under release mode.
|
||||||
|
- Bugfix in find_vertices_in_cycles() to avoid self-loop checking in SCC.
|
||||||
|
- Bugfix for issue #270: fix return value handling in chimera.
|
||||||
|
- Bugfix for issue #284: use correct free function in logical combination.
|
||||||
|
- Add BUILD_EXAMPLES cmake option to enable example code compilation. (#260)
|
||||||
|
- Some typo fixing. (#242, #259)
|
||||||
|
|
||||||
## [5.3.0] 2020-05-15
|
## [5.3.0] 2020-05-15
|
||||||
- Improvement on literal matcher "Teddy" performance, including support for
|
- Improvement on literal matcher "Teddy" performance, including support for
|
||||||
Intel(R) AVX-512 Vector Byte Manipulation Instructions (Intel(R) AVX-512
|
Intel(R) AVX-512 Vector Byte Manipulation Instructions (Intel(R) AVX-512
|
||||||
|
@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11)
|
|||||||
project (hyperscan C CXX)
|
project (hyperscan C CXX)
|
||||||
|
|
||||||
set (HS_MAJOR_VERSION 5)
|
set (HS_MAJOR_VERSION 5)
|
||||||
set (HS_MINOR_VERSION 3)
|
set (HS_MINOR_VERSION 4)
|
||||||
set (HS_PATCH_VERSION 0)
|
set (HS_PATCH_VERSION 2)
|
||||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||||
|
|
||||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||||
@ -133,6 +133,13 @@ CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in
|
|||||||
option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime"
|
option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime"
|
||||||
OFF)
|
OFF)
|
||||||
|
|
||||||
|
option(BUILD_AVX512VBMI "Experimental: support avx512vbmi in the fat runtime"
|
||||||
|
OFF)
|
||||||
|
|
||||||
|
if (BUILD_AVX512VBMI)
|
||||||
|
set(BUILD_AVX512 ON)
|
||||||
|
endif ()
|
||||||
|
|
||||||
option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF)
|
option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF)
|
||||||
|
|
||||||
# TODO: per platform config files?
|
# TODO: per platform config files?
|
||||||
@ -277,6 +284,7 @@ else()
|
|||||||
set(SKYLAKE_FLAG "-xCORE-AVX512")
|
set(SKYLAKE_FLAG "-xCORE-AVX512")
|
||||||
else ()
|
else ()
|
||||||
set(SKYLAKE_FLAG "-march=skylake-avx512")
|
set(SKYLAKE_FLAG "-march=skylake-avx512")
|
||||||
|
set(ICELAKE_FLAG "-march=icelake-server")
|
||||||
endif ()
|
endif ()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@ -389,6 +397,18 @@ if (CXX_UNUSED_CONST_VAR)
|
|||||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# clang-14 complains about unused-but-set variable.
|
||||||
|
CHECK_CXX_COMPILER_FLAG("-Wunused-but-set-variable" CXX_UNUSED_BUT_SET_VAR)
|
||||||
|
if (CXX_UNUSED_BUT_SET_VAR)
|
||||||
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-but-set-variable")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# clang-14 complains about using bitwise operator instead of logical ones.
|
||||||
|
CHECK_CXX_COMPILER_FLAG("-Wbitwise-instead-of-logical" CXX_BITWISE_INSTEAD_OF_LOGICAL)
|
||||||
|
if (CXX_BITWISE_INSTEAD_OF_LOGICAL)
|
||||||
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-bitwise-instead-of-logical")
|
||||||
|
endif()
|
||||||
|
|
||||||
# gcc 6 complains about type attributes that get ignored, like alignment
|
# gcc 6 complains about type attributes that get ignored, like alignment
|
||||||
CHECK_CXX_COMPILER_FLAG("-Wignored-attributes" CXX_IGNORED_ATTR)
|
CHECK_CXX_COMPILER_FLAG("-Wignored-attributes" CXX_IGNORED_ATTR)
|
||||||
if (CXX_IGNORED_ATTR)
|
if (CXX_IGNORED_ATTR)
|
||||||
@ -420,8 +440,10 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
|
|||||||
|
|
||||||
# gcc 10 complains about this
|
# gcc 10 complains about this
|
||||||
CHECK_C_COMPILER_FLAG("-Wstringop-overflow" CC_STRINGOP_OVERFLOW)
|
CHECK_C_COMPILER_FLAG("-Wstringop-overflow" CC_STRINGOP_OVERFLOW)
|
||||||
if(CC_STRINGOP_OVERFLOW)
|
CHECK_CXX_COMPILER_FLAG("-Wstringop-overflow" CXX_STRINGOP_OVERFLOW)
|
||||||
|
if(CC_STRINGOP_OVERFLOW OR CXX_STRINGOP_OVERFLOW)
|
||||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-stringop-overflow")
|
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-stringop-overflow")
|
||||||
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-stringop-overflow")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
@ -571,7 +593,7 @@ set (hs_exec_common_SRCS
|
|||||||
|
|
||||||
set (hs_exec_SRCS
|
set (hs_exec_SRCS
|
||||||
${hs_HEADERS}
|
${hs_HEADERS}
|
||||||
src/hs_version.h
|
src/hs_version.h.in
|
||||||
src/ue2common.h
|
src/ue2common.h
|
||||||
src/allocator.h
|
src/allocator.h
|
||||||
src/crc32.c
|
src/crc32.c
|
||||||
@ -728,7 +750,7 @@ SET (hs_compile_SRCS
|
|||||||
src/grey.h
|
src/grey.h
|
||||||
src/hs.cpp
|
src/hs.cpp
|
||||||
src/hs_internal.h
|
src/hs_internal.h
|
||||||
src/hs_version.h
|
src/hs_version.h.in
|
||||||
src/scratch.h
|
src/scratch.h
|
||||||
src/state.h
|
src/state.h
|
||||||
src/ue2common.h
|
src/ue2common.h
|
||||||
@ -1197,6 +1219,9 @@ else (FAT_RUNTIME)
|
|||||||
if (NOT BUILD_AVX512)
|
if (NOT BUILD_AVX512)
|
||||||
set (DISPATCHER_DEFINE "-DDISABLE_AVX512_DISPATCH")
|
set (DISPATCHER_DEFINE "-DDISABLE_AVX512_DISPATCH")
|
||||||
endif (NOT BUILD_AVX512)
|
endif (NOT BUILD_AVX512)
|
||||||
|
if (NOT BUILD_AVX512VBMI)
|
||||||
|
set (DISPATCHER_DEFINE "${DISPATCHER_DEFINE} -DDISABLE_AVX512VBMI_DISPATCH")
|
||||||
|
endif (NOT BUILD_AVX512VBMI)
|
||||||
set_source_files_properties(src/dispatcher.c PROPERTIES
|
set_source_files_properties(src/dispatcher.c PROPERTIES
|
||||||
COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function ${DISPATCHER_DEFINE}")
|
COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function ${DISPATCHER_DEFINE}")
|
||||||
|
|
||||||
@ -1229,6 +1254,14 @@ else (FAT_RUNTIME)
|
|||||||
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||||
)
|
)
|
||||||
endif (BUILD_AVX512)
|
endif (BUILD_AVX512)
|
||||||
|
if (BUILD_AVX512VBMI)
|
||||||
|
add_library(hs_exec_avx512vbmi OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
|
||||||
|
list(APPEND RUNTIME_LIBS $<TARGET_OBJECTS:hs_exec_avx512vbmi>)
|
||||||
|
set_target_properties(hs_exec_avx512vbmi PROPERTIES
|
||||||
|
COMPILE_FLAGS "${ICELAKE_FLAG}"
|
||||||
|
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512vbmi ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||||
|
)
|
||||||
|
endif (BUILD_AVX512VBMI)
|
||||||
|
|
||||||
add_library(hs_exec_common OBJECT
|
add_library(hs_exec_common OBJECT
|
||||||
${hs_exec_common_SRCS}
|
${hs_exec_common_SRCS}
|
||||||
@ -1287,6 +1320,15 @@ else (FAT_RUNTIME)
|
|||||||
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||||
)
|
)
|
||||||
endif (BUILD_AVX512)
|
endif (BUILD_AVX512)
|
||||||
|
if (BUILD_AVX512VBMI)
|
||||||
|
add_library(hs_exec_shared_avx512vbmi OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
|
||||||
|
list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_avx512vbmi>)
|
||||||
|
set_target_properties(hs_exec_shared_avx512vbmi PROPERTIES
|
||||||
|
COMPILE_FLAGS "${ICELAKE_FLAG}"
|
||||||
|
POSITION_INDEPENDENT_CODE TRUE
|
||||||
|
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512vbmi ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||||
|
)
|
||||||
|
endif (BUILD_AVX512VBMI)
|
||||||
add_library(hs_exec_common_shared OBJECT
|
add_library(hs_exec_common_shared OBJECT
|
||||||
${hs_exec_common_SRCS}
|
${hs_exec_common_SRCS}
|
||||||
src/dispatcher.c
|
src/dispatcher.c
|
||||||
@ -1380,7 +1422,7 @@ if (NOT BUILD_STATIC_LIBS)
|
|||||||
add_library(hs ALIAS hs_shared)
|
add_library(hs ALIAS hs_shared)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
option(BUILD_EXAMPLES "Build Hyperscan example code (default TRUE)" TRUE)
|
||||||
if(NOT WIN32)
|
if(NOT WIN32 AND BUILD_EXAMPLES)
|
||||||
add_subdirectory(examples)
|
add_subdirectory(examples)
|
||||||
endif()
|
endif()
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2018, Intel Corporation
|
* Copyright (c) 2018-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -345,6 +345,16 @@ ch_error_t HS_CDECL ch_set_scratch_allocator(ch_alloc_t alloc_func,
|
|||||||
*/
|
*/
|
||||||
#define CH_SCRATCH_IN_USE (-10)
|
#define CH_SCRATCH_IN_USE (-10)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unexpected internal error from Hyperscan.
|
||||||
|
*
|
||||||
|
* This error indicates that there was unexpected matching behaviors from
|
||||||
|
* Hyperscan. This could be related to invalid usage of scratch space or
|
||||||
|
* invalid memory operations by users.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#define CH_UNKNOWN_HS_ERROR (-13)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returned when pcre_exec (called for some expressions internally from @ref
|
* Returned when pcre_exec (called for some expressions internally from @ref
|
||||||
* ch_scan) failed due to a fatal error.
|
* ch_scan) failed due to a fatal error.
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2018, Intel Corporation
|
* Copyright (c) 2018-2022, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -326,6 +326,10 @@ ch_error_t catchupPcre(struct HybridContext *hyctx, unsigned int id,
|
|||||||
} else if (cbrv == CH_CALLBACK_SKIP_PATTERN) {
|
} else if (cbrv == CH_CALLBACK_SKIP_PATTERN) {
|
||||||
DEBUG_PRINTF("user callback told us to skip this pattern\n");
|
DEBUG_PRINTF("user callback told us to skip this pattern\n");
|
||||||
pd->scanStart = hyctx->length;
|
pd->scanStart = hyctx->length;
|
||||||
|
if (top_id == id) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (top_id == id) {
|
if (top_id == id) {
|
||||||
@ -419,6 +423,7 @@ int HS_CDECL multiCallback(unsigned int id, unsigned long long from,
|
|||||||
DEBUG_PRINTF("user callback told us to skip this pattern\n");
|
DEBUG_PRINTF("user callback told us to skip this pattern\n");
|
||||||
pd->scanStart = hyctx->length;
|
pd->scanStart = hyctx->length;
|
||||||
ret = HS_SUCCESS;
|
ret = HS_SUCCESS;
|
||||||
|
hyctx->scratch->ret = ret;
|
||||||
} else if (ret == CH_FAIL_INTERNAL) {
|
} else if (ret == CH_FAIL_INTERNAL) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -590,11 +595,24 @@ ch_error_t ch_scan_i(const ch_database_t *hydb,
|
|||||||
|
|
||||||
if (!(db->flags & CHIMERA_FLAG_NO_MULTIMATCH)) {
|
if (!(db->flags & CHIMERA_FLAG_NO_MULTIMATCH)) {
|
||||||
ret = scanHyperscan(&hyctx, data, length);
|
ret = scanHyperscan(&hyctx, data, length);
|
||||||
if (ret != HS_SUCCESS && scratch->ret != CH_SUCCESS) {
|
// Errors from pcre scan.
|
||||||
DEBUG_PRINTF("Hyperscan returned error %d\n", scratch->ret);
|
if (scratch->ret == CH_CALLBACK_TERMINATE) {
|
||||||
|
DEBUG_PRINTF("Pcre terminates scan\n");
|
||||||
|
unmarkScratchInUse(scratch);
|
||||||
|
return CH_SCAN_TERMINATED;
|
||||||
|
} else if (scratch->ret != CH_SUCCESS) {
|
||||||
|
DEBUG_PRINTF("Pcre internal error\n");
|
||||||
unmarkScratchInUse(scratch);
|
unmarkScratchInUse(scratch);
|
||||||
return scratch->ret;
|
return scratch->ret;
|
||||||
}
|
}
|
||||||
|
// Errors from Hyperscan scan. Note Chimera could terminate
|
||||||
|
// Hyperscan callback on purpose so this is not counted as an error.
|
||||||
|
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
|
||||||
|
assert(scratch->ret == CH_SUCCESS);
|
||||||
|
DEBUG_PRINTF("Hyperscan returned error %d\n", ret);
|
||||||
|
unmarkScratchInUse(scratch);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("Flush priority queue\n");
|
DEBUG_PRINTF("Flush priority queue\n");
|
||||||
|
@ -17,10 +17,21 @@ if (BUILD_AVX512)
|
|||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (BUILD_AVX512VBMI)
|
||||||
|
CHECK_C_COMPILER_FLAG(${ICELAKE_FLAG} HAS_ARCH_ICELAKE)
|
||||||
|
if (NOT HAS_ARCH_ICELAKE)
|
||||||
|
message (FATAL_ERROR "AVX512VBMI not supported by compiler")
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (FAT_RUNTIME)
|
if (FAT_RUNTIME)
|
||||||
# test the highest level microarch to make sure everything works
|
# test the highest level microarch to make sure everything works
|
||||||
if (BUILD_AVX512)
|
if (BUILD_AVX512)
|
||||||
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}")
|
if (BUILD_AVX512VBMI)
|
||||||
|
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ICELAKE_FLAG}")
|
||||||
|
else ()
|
||||||
|
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}")
|
||||||
|
endif (BUILD_AVX512VBMI)
|
||||||
else ()
|
else ()
|
||||||
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2")
|
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2")
|
||||||
endif ()
|
endif ()
|
||||||
@ -80,6 +91,9 @@ if (FAT_RUNTIME)
|
|||||||
if (BUILD_AVX512 AND NOT HAVE_AVX512)
|
if (BUILD_AVX512 AND NOT HAVE_AVX512)
|
||||||
message(FATAL_ERROR "AVX512 support requested but not supported")
|
message(FATAL_ERROR "AVX512 support requested but not supported")
|
||||||
endif ()
|
endif ()
|
||||||
|
if (BUILD_AVX512VBMI AND NOT HAVE_AVX512VBMI)
|
||||||
|
message(FATAL_ERROR "AVX512VBMI support requested but not supported")
|
||||||
|
endif ()
|
||||||
else (NOT FAT_RUNTIME)
|
else (NOT FAT_RUNTIME)
|
||||||
if (NOT HAVE_AVX2)
|
if (NOT HAVE_AVX2)
|
||||||
message(STATUS "Building without AVX2 support")
|
message(STATUS "Building without AVX2 support")
|
||||||
@ -87,6 +101,9 @@ else (NOT FAT_RUNTIME)
|
|||||||
if (NOT HAVE_AVX512)
|
if (NOT HAVE_AVX512)
|
||||||
message(STATUS "Building without AVX512 support")
|
message(STATUS "Building without AVX512 support")
|
||||||
endif ()
|
endif ()
|
||||||
|
if (NOT HAVE_AVX512VBMI)
|
||||||
|
message(STATUS "Building without AVX512VBMI support")
|
||||||
|
endif ()
|
||||||
if (NOT HAVE_SSSE3)
|
if (NOT HAVE_SSSE3)
|
||||||
message(FATAL_ERROR "A minimum of SSSE3 compiler support is required")
|
message(FATAL_ERROR "A minimum of SSSE3 compiler support is required")
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -17,7 +17,7 @@ KEEPSYMS=$(mktemp -p /tmp keep.syms.XXXXX)
|
|||||||
LIBC_SO=$("$@" --print-file-name=libc.so.6)
|
LIBC_SO=$("$@" --print-file-name=libc.so.6)
|
||||||
cp ${KEEPSYMS_IN} ${KEEPSYMS}
|
cp ${KEEPSYMS_IN} ${KEEPSYMS}
|
||||||
# get all symbols from libc and turn them into patterns
|
# get all symbols from libc and turn them into patterns
|
||||||
nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ ]*\).*/^\1$/' >> ${KEEPSYMS}
|
nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ @]*\).*/^\1$/' >> ${KEEPSYMS}
|
||||||
# build the object
|
# build the object
|
||||||
"$@"
|
"$@"
|
||||||
# rename the symbols in the object
|
# rename the symbols in the object
|
||||||
|
@ -24,6 +24,9 @@
|
|||||||
/* Define if building AVX-512 in the fat runtime. */
|
/* Define if building AVX-512 in the fat runtime. */
|
||||||
#cmakedefine BUILD_AVX512
|
#cmakedefine BUILD_AVX512
|
||||||
|
|
||||||
|
/* Define if building AVX512VBMI in the fat runtime. */
|
||||||
|
#cmakedefine BUILD_AVX512VBMI
|
||||||
|
|
||||||
/* Define to 1 if `backtrace' works. */
|
/* Define to 1 if `backtrace' works. */
|
||||||
#cmakedefine HAVE_BACKTRACE
|
#cmakedefine HAVE_BACKTRACE
|
||||||
|
|
||||||
|
@ -212,7 +212,7 @@ space is required for that context.
|
|||||||
In the absence of recursive scanning, only one such space is required per thread
|
In the absence of recursive scanning, only one such space is required per thread
|
||||||
and can (and indeed should) be allocated before data scanning is to commence.
|
and can (and indeed should) be allocated before data scanning is to commence.
|
||||||
|
|
||||||
In a scenario where a set of expressions are compiled by a single "master"
|
In a scenario where a set of expressions are compiled by a single "main"
|
||||||
thread and data will be scanned by multiple "worker" threads, the convenience
|
thread and data will be scanned by multiple "worker" threads, the convenience
|
||||||
function :c:func:`ch_clone_scratch` allows multiple copies of an existing
|
function :c:func:`ch_clone_scratch` allows multiple copies of an existing
|
||||||
scratch space to be made for each thread (rather than forcing the caller to pass
|
scratch space to be made for each thread (rather than forcing the caller to pass
|
||||||
|
@ -64,21 +64,21 @@ interpreted independently. No syntax association happens between any adjacent
|
|||||||
characters.
|
characters.
|
||||||
|
|
||||||
For example, given an expression written as :regexp:`/bc?/`. We could say it is
|
For example, given an expression written as :regexp:`/bc?/`. We could say it is
|
||||||
a regluar expression, with the meaning that character ``b`` followed by nothing
|
a regular expression, with the meaning that character ``b`` followed by nothing
|
||||||
or by one character ``c``. On the other view, we could also say it is a pure
|
or by one character ``c``. On the other view, we could also say it is a pure
|
||||||
literal expression, with the meaning that this is a character sequence of 3-byte
|
literal expression, with the meaning that this is a character sequence of 3-byte
|
||||||
length, containing characters ``b``, ``c`` and ``?``. In regular case, the
|
length, containing characters ``b``, ``c`` and ``?``. In regular case, the
|
||||||
question mark character ``?`` has a particular syntax role called 0-1 quantifier,
|
question mark character ``?`` has a particular syntax role called 0-1 quantifier,
|
||||||
which has an syntax association with the character ahead of it. Similar
|
which has a syntax association with the character ahead of it. Similar
|
||||||
characters exist in regular grammer like ``[``, ``]``, ``(``, ``)``, ``{``,
|
characters exist in regular grammar like ``[``, ``]``, ``(``, ``)``, ``{``,
|
||||||
``}``, ``-``, ``*``, ``+``, ``\``, ``|``, ``/``, ``:``, ``^``, ``.``, ``$``.
|
``}``, ``-``, ``*``, ``+``, ``\``, ``|``, ``/``, ``:``, ``^``, ``.``, ``$``.
|
||||||
While in pure literal case, all these meta characters lost extra meanings
|
While in pure literal case, all these meta characters lost extra meanings
|
||||||
expect for that they are just common ASCII codes.
|
expect for that they are just common ASCII codes.
|
||||||
|
|
||||||
Hyperscan is initially designed to process common regular expressions. It is
|
Hyperscan is initially designed to process common regular expressions. It is
|
||||||
hence embedded with a complex parser to do comprehensive regular grammer
|
hence embedded with a complex parser to do comprehensive regular grammar
|
||||||
interpretion. Particularly, the identification of above meta characters is the
|
interpretation. Particularly, the identification of above meta characters is the
|
||||||
basic step for the interpretion of far more complex regular grammers.
|
basic step for the interpretation of far more complex regular grammars.
|
||||||
|
|
||||||
However in real cases, patterns may not always be regular expressions. They
|
However in real cases, patterns may not always be regular expressions. They
|
||||||
could just be pure literals. Problem will come if the pure literals contain
|
could just be pure literals. Problem will come if the pure literals contain
|
||||||
@ -165,7 +165,7 @@ The following regex constructs are supported by Hyperscan:
|
|||||||
:regexp:`{n,}` are supported with limitations.
|
:regexp:`{n,}` are supported with limitations.
|
||||||
|
|
||||||
* For arbitrary repeated sub-patterns: *n* and *m* should be either small
|
* For arbitrary repeated sub-patterns: *n* and *m* should be either small
|
||||||
or infinite, e.g. :regexp:`(a|b}{4}`, :regexp:`(ab?c?d){4,10}` or
|
or infinite, e.g. :regexp:`(a|b){4}`, :regexp:`(ab?c?d){4,10}` or
|
||||||
:regexp:`(ab(cd)*){6,}`.
|
:regexp:`(ab(cd)*){6,}`.
|
||||||
|
|
||||||
* For single-character width sub-patterns such as :regexp:`[^\\a]` or
|
* For single-character width sub-patterns such as :regexp:`[^\\a]` or
|
||||||
|
@ -263,17 +263,19 @@ the current platform is supported by Hyperscan.
|
|||||||
As of this release, the variants of the runtime that are built, and the CPU
|
As of this release, the variants of the runtime that are built, and the CPU
|
||||||
capability that is required, are the following:
|
capability that is required, are the following:
|
||||||
|
|
||||||
+----------+-------------------------------+---------------------------+
|
+--------------+---------------------------------+---------------------------+
|
||||||
| Variant | CPU Feature Flag(s) Required | gcc arch flag |
|
| Variant | CPU Feature Flag(s) Required | gcc arch flag |
|
||||||
+==========+===============================+===========================+
|
+==============+=================================+===========================+
|
||||||
| Core 2 | ``SSSE3`` | ``-march=core2`` |
|
| Core 2 | ``SSSE3`` | ``-march=core2`` |
|
||||||
+----------+-------------------------------+---------------------------+
|
+--------------+---------------------------------+---------------------------+
|
||||||
| Core i7 | ``SSE4_2`` and ``POPCNT`` | ``-march=corei7`` |
|
| Core i7 | ``SSE4_2`` and ``POPCNT`` | ``-march=corei7`` |
|
||||||
+----------+-------------------------------+---------------------------+
|
+--------------+---------------------------------+---------------------------+
|
||||||
| AVX 2 | ``AVX2`` | ``-march=core-avx2`` |
|
| AVX 2 | ``AVX2`` | ``-march=core-avx2`` |
|
||||||
+----------+-------------------------------+---------------------------+
|
+--------------+---------------------------------+---------------------------+
|
||||||
| AVX 512 | ``AVX512BW`` (see note below) | ``-march=skylake-avx512`` |
|
| AVX 512 | ``AVX512BW`` (see note below) | ``-march=skylake-avx512`` |
|
||||||
+----------+-------------------------------+---------------------------+
|
+--------------+---------------------------------+---------------------------+
|
||||||
|
| AVX 512 VBMI | ``AVX512VBMI`` (see note below) | ``-march=icelake-server`` |
|
||||||
|
+--------------+---------------------------------+---------------------------+
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
@ -287,6 +289,16 @@ capability that is required, are the following:
|
|||||||
|
|
||||||
cmake -DBUILD_AVX512=on <...>
|
cmake -DBUILD_AVX512=on <...>
|
||||||
|
|
||||||
|
Hyperscan v5.3 adds support for AVX512VBMI instructions - in particular the
|
||||||
|
``AVX512VBMI`` instruction set that was introduced on Intel "Icelake" Xeon
|
||||||
|
processors - however the AVX512VBMI runtime variant is **not** enabled by
|
||||||
|
default in fat runtime builds as not all toolchains support AVX512VBMI
|
||||||
|
instruction sets. To build an AVX512VBMI runtime, the CMake variable
|
||||||
|
``BUILD_AVX512VBMI`` must be enabled manually during configuration. For
|
||||||
|
example: ::
|
||||||
|
|
||||||
|
cmake -DBUILD_AVX512VBMI=on <...>
|
||||||
|
|
||||||
As the fat runtime requires compiler, libc, and binutils support, at this time
|
As the fat runtime requires compiler, libc, and binutils support, at this time
|
||||||
it will only be enabled for Linux builds where the compiler supports the
|
it will only be enabled for Linux builds where the compiler supports the
|
||||||
`indirect function "ifunc" function attribute
|
`indirect function "ifunc" function attribute
|
||||||
|
@ -178,7 +178,7 @@ space is required for that context.
|
|||||||
In the absence of recursive scanning, only one such space is required per thread
|
In the absence of recursive scanning, only one such space is required per thread
|
||||||
and can (and indeed should) be allocated before data scanning is to commence.
|
and can (and indeed should) be allocated before data scanning is to commence.
|
||||||
|
|
||||||
In a scenario where a set of expressions are compiled by a single "master"
|
In a scenario where a set of expressions are compiled by a single "main"
|
||||||
thread and data will be scanned by multiple "worker" threads, the convenience
|
thread and data will be scanned by multiple "worker" threads, the convenience
|
||||||
function :c:func:`hs_clone_scratch` allows multiple copies of an existing
|
function :c:func:`hs_clone_scratch` allows multiple copies of an existing
|
||||||
scratch space to be made for each thread (rather than forcing the caller to pass
|
scratch space to be made for each thread (rather than forcing the caller to pass
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2021, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -115,6 +115,7 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <climits>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@ -657,6 +658,10 @@ int main(int argc, char **argv) {
|
|||||||
break;
|
break;
|
||||||
case 'n':
|
case 'n':
|
||||||
repeatCount = atoi(optarg);
|
repeatCount = atoi(optarg);
|
||||||
|
if (repeatCount <= 0 || repeatCount > UINT_MAX) {
|
||||||
|
cerr << "Invalid repeatCount." << endl;
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2021, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -51,6 +51,7 @@
|
|||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <climits>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@ -489,6 +490,10 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
// Streaming mode scans.
|
// Streaming mode scans.
|
||||||
double secsStreamingScan = 0.0, secsStreamingOpenClose = 0.0;
|
double secsStreamingScan = 0.0, secsStreamingOpenClose = 0.0;
|
||||||
|
if (repeatCount <= 0 || repeatCount > UINT_MAX) {
|
||||||
|
cerr << "Invalid repeatCount." << endl;
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
for (unsigned int i = 0; i < repeatCount; i++) {
|
for (unsigned int i = 0; i < repeatCount; i++) {
|
||||||
// Open streams.
|
// Open streams.
|
||||||
clock.start();
|
clock.start();
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2021, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -57,6 +57,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
#include <hs.h>
|
#include <hs.h>
|
||||||
|
|
||||||
@ -152,6 +153,15 @@ int main(int argc, char *argv[]) {
|
|||||||
char *pattern = argv[1];
|
char *pattern = argv[1];
|
||||||
char *inputFN = argv[2];
|
char *inputFN = argv[2];
|
||||||
|
|
||||||
|
if (access(inputFN, F_OK) != 0) {
|
||||||
|
fprintf(stderr, "ERROR: file doesn't exist.\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (access(inputFN, R_OK) != 0) {
|
||||||
|
fprintf(stderr, "ERROR: can't be read.\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
/* First, we attempt to compile the pattern provided on the command line.
|
/* First, we attempt to compile the pattern provided on the command line.
|
||||||
* We assume 'DOTALL' semantics, meaning that the '.' meta-character will
|
* We assume 'DOTALL' semantics, meaning that the '.' meta-character will
|
||||||
* match newline characters. The compiler will analyse the given pattern and
|
* match newline characters. The compiler will analyse the given pattern and
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2020, Intel Corporation
|
* Copyright (c) 2015-2021, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -323,7 +323,8 @@ void addExpression(NG &ng, unsigned index, const char *expression,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Ensure that our pattern isn't too long (in characters).
|
// Ensure that our pattern isn't too long (in characters).
|
||||||
if (strlen(expression) > cc.grey.limitPatternLength) {
|
size_t maxlen = cc.grey.limitPatternLength + 1;
|
||||||
|
if (strnlen(expression, maxlen) >= maxlen) {
|
||||||
throw CompileError("Pattern length exceeds limit.");
|
throw CompileError("Pattern length exceeds limit.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -416,6 +417,10 @@ void addLitExpression(NG &ng, unsigned index, const char *expression,
|
|||||||
"HS_FLAG_SOM_LEFTMOST are supported in literal API.");
|
"HS_FLAG_SOM_LEFTMOST are supported in literal API.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!strcmp(expression, "")) {
|
||||||
|
throw CompileError("Pure literal API doesn't support empty string.");
|
||||||
|
}
|
||||||
|
|
||||||
// This expression must be a pure literal, we can build ue2_literal
|
// This expression must be a pure literal, we can build ue2_literal
|
||||||
// directly based on expression text.
|
// directly based on expression text.
|
||||||
ParsedLitExpression ple(index, expression, expLength, flags, id);
|
ParsedLitExpression ple(index, expression, expLength, flags, id);
|
||||||
@ -458,6 +463,9 @@ platform_t target_to_platform(const target_t &target_info) {
|
|||||||
if (!target_info.has_avx512()) {
|
if (!target_info.has_avx512()) {
|
||||||
p |= HS_PLATFORM_NOAVX512;
|
p |= HS_PLATFORM_NOAVX512;
|
||||||
}
|
}
|
||||||
|
if (!target_info.has_avx512vbmi()) {
|
||||||
|
p |= HS_PLATFORM_NOAVX512VBMI;
|
||||||
|
}
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -115,7 +115,8 @@ static
|
|||||||
hs_error_t db_check_platform(const u64a p) {
|
hs_error_t db_check_platform(const u64a p) {
|
||||||
if (p != hs_current_platform
|
if (p != hs_current_platform
|
||||||
&& p != (hs_current_platform | hs_current_platform_no_avx2)
|
&& p != (hs_current_platform | hs_current_platform_no_avx2)
|
||||||
&& p != (hs_current_platform | hs_current_platform_no_avx512)) {
|
&& p != (hs_current_platform | hs_current_platform_no_avx512)
|
||||||
|
&& p != (hs_current_platform | hs_current_platform_no_avx512vbmi)) {
|
||||||
return HS_DB_PLATFORM_ERROR;
|
return HS_DB_PLATFORM_ERROR;
|
||||||
}
|
}
|
||||||
// passed all checks
|
// passed all checks
|
||||||
@ -370,9 +371,11 @@ hs_error_t print_database_string(char **s, u32 version, const platform_t plat,
|
|||||||
u8 minor = (version >> 16) & 0xff;
|
u8 minor = (version >> 16) & 0xff;
|
||||||
u8 major = (version >> 24) & 0xff;
|
u8 major = (version >> 24) & 0xff;
|
||||||
|
|
||||||
const char *features = (plat & HS_PLATFORM_NOAVX512)
|
const char *features = (plat & HS_PLATFORM_NOAVX512VBMI)
|
||||||
? (plat & HS_PLATFORM_NOAVX2) ? "" : "AVX2"
|
? (plat & HS_PLATFORM_NOAVX512)
|
||||||
: "AVX512";
|
? (plat & HS_PLATFORM_NOAVX2) ? "" : "AVX2"
|
||||||
|
: "AVX512"
|
||||||
|
: "AVX512VBMI";
|
||||||
|
|
||||||
const char *mode = NULL;
|
const char *mode = NULL;
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -55,6 +55,7 @@ extern "C"
|
|||||||
|
|
||||||
#define HS_PLATFORM_NOAVX2 (4<<13)
|
#define HS_PLATFORM_NOAVX2 (4<<13)
|
||||||
#define HS_PLATFORM_NOAVX512 (8<<13)
|
#define HS_PLATFORM_NOAVX512 (8<<13)
|
||||||
|
#define HS_PLATFORM_NOAVX512VBMI (0x10<<13)
|
||||||
|
|
||||||
/** \brief Platform features bitmask. */
|
/** \brief Platform features bitmask. */
|
||||||
typedef u64a platform_t;
|
typedef u64a platform_t;
|
||||||
@ -66,6 +67,9 @@ const platform_t hs_current_platform = {
|
|||||||
#endif
|
#endif
|
||||||
#if !defined(HAVE_AVX512)
|
#if !defined(HAVE_AVX512)
|
||||||
HS_PLATFORM_NOAVX512 |
|
HS_PLATFORM_NOAVX512 |
|
||||||
|
#endif
|
||||||
|
#if !defined(HAVE_AVX512VBMI)
|
||||||
|
HS_PLATFORM_NOAVX512VBMI |
|
||||||
#endif
|
#endif
|
||||||
0,
|
0,
|
||||||
};
|
};
|
||||||
@ -74,12 +78,20 @@ static UNUSED
|
|||||||
const platform_t hs_current_platform_no_avx2 = {
|
const platform_t hs_current_platform_no_avx2 = {
|
||||||
HS_PLATFORM_NOAVX2 |
|
HS_PLATFORM_NOAVX2 |
|
||||||
HS_PLATFORM_NOAVX512 |
|
HS_PLATFORM_NOAVX512 |
|
||||||
|
HS_PLATFORM_NOAVX512VBMI |
|
||||||
0,
|
0,
|
||||||
};
|
};
|
||||||
|
|
||||||
static UNUSED
|
static UNUSED
|
||||||
const platform_t hs_current_platform_no_avx512 = {
|
const platform_t hs_current_platform_no_avx512 = {
|
||||||
HS_PLATFORM_NOAVX512 |
|
HS_PLATFORM_NOAVX512 |
|
||||||
|
HS_PLATFORM_NOAVX512VBMI |
|
||||||
|
0,
|
||||||
|
};
|
||||||
|
|
||||||
|
static UNUSED
|
||||||
|
const platform_t hs_current_platform_no_avx512vbmi = {
|
||||||
|
HS_PLATFORM_NOAVX512VBMI |
|
||||||
0,
|
0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2017, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -38,8 +38,14 @@
|
|||||||
#define check_avx512() (0)
|
#define check_avx512() (0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(DISABLE_AVX512VBMI_DISPATCH)
|
||||||
|
#define avx512vbmi_ disabled_
|
||||||
|
#define check_avx512vbmi() (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define CREATE_DISPATCH(RTYPE, NAME, ...) \
|
#define CREATE_DISPATCH(RTYPE, NAME, ...) \
|
||||||
/* create defns */ \
|
/* create defns */ \
|
||||||
|
RTYPE JOIN(avx512vbmi_, NAME)(__VA_ARGS__); \
|
||||||
RTYPE JOIN(avx512_, NAME)(__VA_ARGS__); \
|
RTYPE JOIN(avx512_, NAME)(__VA_ARGS__); \
|
||||||
RTYPE JOIN(avx2_, NAME)(__VA_ARGS__); \
|
RTYPE JOIN(avx2_, NAME)(__VA_ARGS__); \
|
||||||
RTYPE JOIN(corei7_, NAME)(__VA_ARGS__); \
|
RTYPE JOIN(corei7_, NAME)(__VA_ARGS__); \
|
||||||
@ -52,6 +58,9 @@
|
|||||||
\
|
\
|
||||||
/* resolver */ \
|
/* resolver */ \
|
||||||
static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \
|
static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \
|
||||||
|
if (check_avx512vbmi()) { \
|
||||||
|
return JOIN(avx512vbmi_, NAME); \
|
||||||
|
} \
|
||||||
if (check_avx512()) { \
|
if (check_avx512()) { \
|
||||||
return JOIN(avx512_, NAME); \
|
return JOIN(avx512_, NAME); \
|
||||||
} \
|
} \
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -107,6 +107,25 @@ void dumpTeddyReinforced(const u8 *rmsk, const u32 num_tables, FILE *f) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpTeddyDupMasks(const u8 *dmsk, u32 numMasks, FILE *f) {
|
||||||
|
// dump nibble masks
|
||||||
|
u32 maskWidth = 2;
|
||||||
|
fprintf(f, " dup nibble masks:\n");
|
||||||
|
for (u32 i = 0; i < numMasks * 2; i++) {
|
||||||
|
fprintf(f, " -%d%s: ", 1 + i / 2, (i % 2) ? "hi" : "lo");
|
||||||
|
for (u32 j = 0; j < 16 * maskWidth * 2; j++) {
|
||||||
|
u8 val = dmsk[i * 16 * maskWidth * 2 + j];
|
||||||
|
for (u32 k = 0; k < 8; k++) {
|
||||||
|
fprintf(f, "%s", ((val >> k) & 0x1) ? "1" : "0");
|
||||||
|
}
|
||||||
|
fprintf(f, " ");
|
||||||
|
}
|
||||||
|
fprintf(f, "\n");
|
||||||
|
}
|
||||||
|
fprintf(f, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void dumpTeddyMasks(const u8 *baseMsk, u32 numMasks, u32 maskWidth, FILE *f) {
|
void dumpTeddyMasks(const u8 *baseMsk, u32 numMasks, u32 maskWidth, FILE *f) {
|
||||||
// dump nibble masks
|
// dump nibble masks
|
||||||
@ -146,12 +165,17 @@ void dumpTeddy(const Teddy *teddy, FILE *f) {
|
|||||||
|
|
||||||
u32 maskWidth = des->getNumBuckets() / 8;
|
u32 maskWidth = des->getNumBuckets() / 8;
|
||||||
size_t headerSize = sizeof(Teddy);
|
size_t headerSize = sizeof(Teddy);
|
||||||
size_t maskLen = des->numMasks * 16 * 2 * maskWidth;
|
|
||||||
const u8 *teddy_base = (const u8 *)teddy;
|
const u8 *teddy_base = (const u8 *)teddy;
|
||||||
const u8 *baseMsk = teddy_base + ROUNDUP_CL(headerSize);
|
const u8 *baseMsk = teddy_base + ROUNDUP_CL(headerSize);
|
||||||
const u8 *rmsk = baseMsk + ROUNDUP_CL(maskLen);
|
|
||||||
dumpTeddyMasks(baseMsk, des->numMasks, maskWidth, f);
|
dumpTeddyMasks(baseMsk, des->numMasks, maskWidth, f);
|
||||||
dumpTeddyReinforced(rmsk, maskWidth, f);
|
size_t maskLen = des->numMasks * 16 * 2 * maskWidth;
|
||||||
|
const u8 *rdmsk = baseMsk + ROUNDUP_CL(maskLen);
|
||||||
|
if (maskWidth == 1) { // reinforcement table in Teddy
|
||||||
|
dumpTeddyReinforced(rdmsk, maskWidth, f);
|
||||||
|
} else { // dup nibble mask table in Fat Teddy
|
||||||
|
assert(maskWidth == 2);
|
||||||
|
dumpTeddyDupMasks(rdmsk, des->numMasks, f);
|
||||||
|
}
|
||||||
dumpConfirms(teddy, teddy->confOffset, des->getNumBuckets(), f);
|
dumpConfirms(teddy, teddy->confOffset, des->getNumBuckets(), f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -284,14 +284,6 @@ m512 prep_conf_teddy_m4(const m512 *lo_mask, const m512 *dup_mask,
|
|||||||
#define PREP_CONF_FN(val, n) \
|
#define PREP_CONF_FN(val, n) \
|
||||||
prep_conf_teddy_m##n(&lo_mask, dup_mask, sl_msk, val)
|
prep_conf_teddy_m##n(&lo_mask, dup_mask, sl_msk, val)
|
||||||
|
|
||||||
const u8 ALIGN_DIRECTIVE p_sh_mask_arr[80] = {
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
|
||||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
|
|
||||||
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
|
|
||||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f
|
|
||||||
};
|
|
||||||
|
|
||||||
#define TEDDY_VBMI_SL1_POS 15
|
#define TEDDY_VBMI_SL1_POS 15
|
||||||
#define TEDDY_VBMI_SL2_POS 14
|
#define TEDDY_VBMI_SL2_POS 14
|
||||||
#define TEDDY_VBMI_SL3_POS 13
|
#define TEDDY_VBMI_SL3_POS 13
|
||||||
|
@ -109,6 +109,36 @@ const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = {
|
|||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI) // VBMI strong fat teddy
|
||||||
|
|
||||||
|
#define CONF_FAT_CHUNK_64(chunk, bucket, off, reason, pt, conf_fn) \
|
||||||
|
do { \
|
||||||
|
if (unlikely(chunk != ones_u64a)) { \
|
||||||
|
chunk = ~chunk; \
|
||||||
|
conf_fn(&chunk, bucket, off, confBase, reason, a, pt, \
|
||||||
|
&control, &last_match); \
|
||||||
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define CONF_FAT_CHUNK_32(chunk, bucket, off, reason, pt, conf_fn) \
|
||||||
|
do { \
|
||||||
|
if (unlikely(chunk != ones_u32)) { \
|
||||||
|
chunk = ~chunk; \
|
||||||
|
conf_fn(&chunk, bucket, off, confBase, reason, a, pt, \
|
||||||
|
&control, &last_match); \
|
||||||
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const m512 *getDupMaskBase(const struct Teddy *teddy, u8 numMask) {
|
||||||
|
return (const m512 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))
|
||||||
|
+ ROUNDUP_CL(2 * numMask * sizeof(m256)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
#define CONF_FAT_CHUNK_64(chunk, bucket, off, reason, conf_fn) \
|
#define CONF_FAT_CHUNK_64(chunk, bucket, off, reason, conf_fn) \
|
||||||
do { \
|
do { \
|
||||||
if (unlikely(chunk != ones_u64a)) { \
|
if (unlikely(chunk != ones_u64a)) { \
|
||||||
@ -134,203 +164,200 @@ const m256 *getMaskBase_fat(const struct Teddy *teddy) {
|
|||||||
return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)));
|
return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_AVX512_REVERT) // revert to AVX2 Fat Teddy
|
#endif
|
||||||
|
|
||||||
static really_inline
|
#if defined(HAVE_AVX512VBMI) // VBMI strong fat teddy
|
||||||
const u64a *getReinforcedMaskBase_fat(const struct Teddy *teddy, u8 numMask) {
|
|
||||||
return (const u64a *)((const u8 *)getMaskBase_fat(teddy)
|
const u8 ALIGN_AVX_DIRECTIVE p_mask_interleave[64] = {
|
||||||
+ ROUNDUP_CL(2 * numMask * sizeof(m256)));
|
0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39,
|
||||||
}
|
8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47,
|
||||||
|
16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55,
|
||||||
|
24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63
|
||||||
|
};
|
||||||
|
|
||||||
#ifdef ARCH_64_BIT
|
#ifdef ARCH_64_BIT
|
||||||
#define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \
|
#define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, pt, conf_fn) \
|
||||||
do { \
|
do { \
|
||||||
if (unlikely(diff512(var, ones512()))) { \
|
if (unlikely(diff512(var, ones512()))) { \
|
||||||
m512 swap = swap256in512(var); \
|
m512 msk_interleave = load512(p_mask_interleave); \
|
||||||
m512 r = interleave512lo(var, swap); \
|
m512 r = vpermb512(msk_interleave, var); \
|
||||||
m128 r0 = extract128from512(r, 0); \
|
m128 r0 = extract128from512(r, 0); \
|
||||||
m128 r1 = extract128from512(r, 1); \
|
m128 r1 = extract128from512(r, 1); \
|
||||||
|
m128 r2 = extract128from512(r, 2); \
|
||||||
|
m128 r3 = extract128from512(r, 3); \
|
||||||
u64a part1 = movq(r0); \
|
u64a part1 = movq(r0); \
|
||||||
u64a part2 = extract64from128(r0, 1); \
|
u64a part2 = extract64from128(r0, 1); \
|
||||||
u64a part5 = movq(r1); \
|
u64a part3 = movq(r1); \
|
||||||
u64a part6 = extract64from128(r1, 1); \
|
u64a part4 = extract64from128(r1, 1); \
|
||||||
r = interleave512hi(var, swap); \
|
u64a part5 = movq(r2); \
|
||||||
r0 = extract128from512(r, 0); \
|
u64a part6 = extract64from128(r2, 1); \
|
||||||
r1 = extract128from512(r, 1); \
|
u64a part7 = movq(r3); \
|
||||||
u64a part3 = movq(r0); \
|
u64a part8 = extract64from128(r3, 1); \
|
||||||
u64a part4 = extract64from128(r0, 1); \
|
CONF_FAT_CHUNK_64(part1, bucket, offset, reason, pt, conf_fn); \
|
||||||
u64a part7 = movq(r1); \
|
CONF_FAT_CHUNK_64(part2, bucket, offset + 4, reason, pt, conf_fn); \
|
||||||
u64a part8 = extract64from128(r1, 1); \
|
CONF_FAT_CHUNK_64(part3, bucket, offset + 8, reason, pt, conf_fn); \
|
||||||
CONF_FAT_CHUNK_64(part1, bucket, offset, reason, conf_fn); \
|
CONF_FAT_CHUNK_64(part4, bucket, offset + 12, reason, pt, conf_fn); \
|
||||||
CONF_FAT_CHUNK_64(part2, bucket, offset + 4, reason, conf_fn); \
|
CONF_FAT_CHUNK_64(part5, bucket, offset + 16, reason, pt, conf_fn); \
|
||||||
CONF_FAT_CHUNK_64(part3, bucket, offset + 8, reason, conf_fn); \
|
CONF_FAT_CHUNK_64(part6, bucket, offset + 20, reason, pt, conf_fn); \
|
||||||
CONF_FAT_CHUNK_64(part4, bucket, offset + 12, reason, conf_fn); \
|
CONF_FAT_CHUNK_64(part7, bucket, offset + 24, reason, pt, conf_fn); \
|
||||||
CONF_FAT_CHUNK_64(part5, bucket, offset + 16, reason, conf_fn); \
|
CONF_FAT_CHUNK_64(part8, bucket, offset + 28, reason, pt, conf_fn); \
|
||||||
CONF_FAT_CHUNK_64(part6, bucket, offset + 20, reason, conf_fn); \
|
|
||||||
CONF_FAT_CHUNK_64(part7, bucket, offset + 24, reason, conf_fn); \
|
|
||||||
CONF_FAT_CHUNK_64(part8, bucket, offset + 28, reason, conf_fn); \
|
|
||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
#else
|
#else
|
||||||
#define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \
|
#define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, pt, conf_fn) \
|
||||||
do { \
|
do { \
|
||||||
if (unlikely(diff512(var, ones512()))) { \
|
if (unlikely(diff512(var, ones512()))) { \
|
||||||
m512 swap = swap256in512(var); \
|
m512 msk_interleave = load512(p_mask_interleave); \
|
||||||
m512 r = interleave512lo(var, swap); \
|
m512 r = vpermb512(msk_interleave, var); \
|
||||||
m128 r0 = extract128from512(r, 0); \
|
m128 r0 = extract128from512(r, 0); \
|
||||||
m128 r1 = extract128from512(r, 1); \
|
m128 r1 = extract128from512(r, 1); \
|
||||||
|
m128 r2 = extract128from512(r, 2); \
|
||||||
|
m128 r3 = extract128from512(r, 3); \
|
||||||
u32 part1 = movd(r0); \
|
u32 part1 = movd(r0); \
|
||||||
u32 part2 = extract32from128(r0, 1); \
|
u32 part2 = extract32from128(r0, 1); \
|
||||||
u32 part3 = extract32from128(r0, 2); \
|
u32 part3 = extract32from128(r0, 2); \
|
||||||
u32 part4 = extract32from128(r0, 3); \
|
u32 part4 = extract32from128(r0, 3); \
|
||||||
u32 part9 = movd(r1); \
|
u32 part5 = movd(r1); \
|
||||||
u32 part10 = extract32from128(r1, 1); \
|
u32 part6 = extract32from128(r1, 1); \
|
||||||
u32 part11 = extract32from128(r1, 2); \
|
u32 part7 = extract32from128(r1, 2); \
|
||||||
u32 part12 = extract32from128(r1, 3); \
|
u32 part8 = extract32from128(r1, 3); \
|
||||||
r = interleave512hi(var, swap); \
|
u32 part9 = movd(r2); \
|
||||||
r0 = extract128from512(r, 0); \
|
u32 part10 = extract32from128(r2, 1); \
|
||||||
r1 = extract128from512(r, 1); \
|
u32 part11 = extract32from128(r2, 2); \
|
||||||
u32 part5 = movd(r0); \
|
u32 part12 = extract32from128(r2, 3); \
|
||||||
u32 part6 = extract32from128(r0, 1); \
|
u32 part13 = movd(r3); \
|
||||||
u32 part7 = extract32from128(r0, 2); \
|
u32 part14 = extract32from128(r3, 1); \
|
||||||
u32 part8 = extract32from128(r0, 3); \
|
u32 part15 = extract32from128(r3, 2); \
|
||||||
u32 part13 = movd(r1); \
|
u32 part16 = extract32from128(r3, 3); \
|
||||||
u32 part14 = extract32from128(r1, 1); \
|
CONF_FAT_CHUNK_32(part1, bucket, offset, reason, pt, conf_fn); \
|
||||||
u32 part15 = extract32from128(r1, 2); \
|
CONF_FAT_CHUNK_32(part2, bucket, offset + 2, reason, pt, conf_fn); \
|
||||||
u32 part16 = extract32from128(r1, 3); \
|
CONF_FAT_CHUNK_32(part3, bucket, offset + 4, reason, pt, conf_fn); \
|
||||||
CONF_FAT_CHUNK_32(part1, bucket, offset, reason, conf_fn); \
|
CONF_FAT_CHUNK_32(part4, bucket, offset + 6, reason, pt, conf_fn); \
|
||||||
CONF_FAT_CHUNK_32(part2, bucket, offset + 2, reason, conf_fn); \
|
CONF_FAT_CHUNK_32(part5, bucket, offset + 8, reason, pt, conf_fn); \
|
||||||
CONF_FAT_CHUNK_32(part3, bucket, offset + 4, reason, conf_fn); \
|
CONF_FAT_CHUNK_32(part6, bucket, offset + 10, reason, pt, conf_fn); \
|
||||||
CONF_FAT_CHUNK_32(part4, bucket, offset + 6, reason, conf_fn); \
|
CONF_FAT_CHUNK_32(part7, bucket, offset + 12, reason, pt, conf_fn); \
|
||||||
CONF_FAT_CHUNK_32(part5, bucket, offset + 8, reason, conf_fn); \
|
CONF_FAT_CHUNK_32(part8, bucket, offset + 14, reason, pt, conf_fn); \
|
||||||
CONF_FAT_CHUNK_32(part6, bucket, offset + 10, reason, conf_fn); \
|
CONF_FAT_CHUNK_32(part9, bucket, offset + 16, reason, pt, conf_fn); \
|
||||||
CONF_FAT_CHUNK_32(part7, bucket, offset + 12, reason, conf_fn); \
|
CONF_FAT_CHUNK_32(part10, bucket, offset + 18, reason, pt, conf_fn);\
|
||||||
CONF_FAT_CHUNK_32(part8, bucket, offset + 14, reason, conf_fn); \
|
CONF_FAT_CHUNK_32(part11, bucket, offset + 20, reason, pt, conf_fn);\
|
||||||
CONF_FAT_CHUNK_32(part9, bucket, offset + 16, reason, conf_fn); \
|
CONF_FAT_CHUNK_32(part12, bucket, offset + 22, reason, pt, conf_fn);\
|
||||||
CONF_FAT_CHUNK_32(part10, bucket, offset + 18, reason, conf_fn); \
|
CONF_FAT_CHUNK_32(part13, bucket, offset + 24, reason, pt, conf_fn);\
|
||||||
CONF_FAT_CHUNK_32(part11, bucket, offset + 20, reason, conf_fn); \
|
CONF_FAT_CHUNK_32(part14, bucket, offset + 26, reason, pt, conf_fn);\
|
||||||
CONF_FAT_CHUNK_32(part12, bucket, offset + 22, reason, conf_fn); \
|
CONF_FAT_CHUNK_32(part15, bucket, offset + 28, reason, pt, conf_fn);\
|
||||||
CONF_FAT_CHUNK_32(part13, bucket, offset + 24, reason, conf_fn); \
|
CONF_FAT_CHUNK_32(part16, bucket, offset + 30, reason, pt, conf_fn);\
|
||||||
CONF_FAT_CHUNK_32(part14, bucket, offset + 26, reason, conf_fn); \
|
|
||||||
CONF_FAT_CHUNK_32(part15, bucket, offset + 28, reason, conf_fn); \
|
|
||||||
CONF_FAT_CHUNK_32(part16, bucket, offset + 30, reason, conf_fn); \
|
|
||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static really_inline
|
#define PREP_FAT_SHUF_MASK \
|
||||||
m512 vectoredLoad2x256(m512 *p_mask, const u8 *ptr, const size_t start_offset,
|
|
||||||
const u8 *lo, const u8 *hi,
|
|
||||||
const u8 *buf_history, size_t len_history,
|
|
||||||
const u32 nMasks) {
|
|
||||||
m256 p_mask256;
|
|
||||||
m512 ret = set2x256(vectoredLoad256(&p_mask256, ptr, start_offset, lo, hi,
|
|
||||||
buf_history, len_history, nMasks));
|
|
||||||
*p_mask = set2x256(p_mask256);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define PREP_FAT_SHUF_MASK_NO_REINFORCEMENT(val) \
|
|
||||||
m512 lo = and512(val, *lo_mask); \
|
m512 lo = and512(val, *lo_mask); \
|
||||||
m512 hi = and512(rshift64_m512(val, 4), *lo_mask)
|
m512 hi = and512(rshift64_m512(val, 4), *lo_mask)
|
||||||
|
|
||||||
#define PREP_FAT_SHUF_MASK \
|
#define FAT_TEDDY_VBMI_PSHUFB_OR_M1 \
|
||||||
PREP_FAT_SHUF_MASK_NO_REINFORCEMENT(set2x256(load256(ptr))); \
|
m512 shuf_or_b0 = or512(pshufb_m512(dup_mask[0], lo), \
|
||||||
*c_16 = *(ptr + 15); \
|
pshufb_m512(dup_mask[1], hi));
|
||||||
m512 r_msk = set512_64(0ULL, r_msk_base_hi[*c_16], \
|
|
||||||
0ULL, r_msk_base_hi[*c_0], \
|
|
||||||
0ULL, r_msk_base_lo[*c_16], \
|
|
||||||
0ULL, r_msk_base_lo[*c_0]); \
|
|
||||||
*c_0 = *(ptr + 31)
|
|
||||||
|
|
||||||
#define FAT_SHIFT_OR_M1 \
|
#define FAT_TEDDY_VBMI_PSHUFB_OR_M2 \
|
||||||
or512(pshufb_m512(dup_mask[0], lo), pshufb_m512(dup_mask[1], hi))
|
FAT_TEDDY_VBMI_PSHUFB_OR_M1 \
|
||||||
|
m512 shuf_or_b1 = or512(pshufb_m512(dup_mask[2], lo), \
|
||||||
|
pshufb_m512(dup_mask[3], hi));
|
||||||
|
|
||||||
#define FAT_SHIFT_OR_M2 \
|
#define FAT_TEDDY_VBMI_PSHUFB_OR_M3 \
|
||||||
or512(lshift128_m512(or512(pshufb_m512(dup_mask[2], lo), \
|
FAT_TEDDY_VBMI_PSHUFB_OR_M2 \
|
||||||
pshufb_m512(dup_mask[3], hi)), \
|
m512 shuf_or_b2 = or512(pshufb_m512(dup_mask[4], lo), \
|
||||||
1), FAT_SHIFT_OR_M1)
|
pshufb_m512(dup_mask[5], hi));
|
||||||
|
|
||||||
#define FAT_SHIFT_OR_M3 \
|
#define FAT_TEDDY_VBMI_PSHUFB_OR_M4 \
|
||||||
or512(lshift128_m512(or512(pshufb_m512(dup_mask[4], lo), \
|
FAT_TEDDY_VBMI_PSHUFB_OR_M3 \
|
||||||
pshufb_m512(dup_mask[5], hi)), \
|
m512 shuf_or_b3 = or512(pshufb_m512(dup_mask[6], lo), \
|
||||||
2), FAT_SHIFT_OR_M2)
|
pshufb_m512(dup_mask[7], hi));
|
||||||
|
|
||||||
#define FAT_SHIFT_OR_M4 \
|
#define FAT_TEDDY_VBMI_SL1_MASK 0xfffffffefffffffeULL
|
||||||
or512(lshift128_m512(or512(pshufb_m512(dup_mask[6], lo), \
|
#define FAT_TEDDY_VBMI_SL2_MASK 0xfffffffcfffffffcULL
|
||||||
pshufb_m512(dup_mask[7], hi)), \
|
#define FAT_TEDDY_VBMI_SL3_MASK 0xfffffff8fffffff8ULL
|
||||||
3), FAT_SHIFT_OR_M3)
|
|
||||||
|
#define FAT_TEDDY_VBMI_SHIFT_M1
|
||||||
|
|
||||||
|
#define FAT_TEDDY_VBMI_SHIFT_M2 \
|
||||||
|
FAT_TEDDY_VBMI_SHIFT_M1 \
|
||||||
|
m512 sl1 = maskz_vpermb512(FAT_TEDDY_VBMI_SL1_MASK, sl_msk[0], shuf_or_b1);
|
||||||
|
|
||||||
|
#define FAT_TEDDY_VBMI_SHIFT_M3 \
|
||||||
|
FAT_TEDDY_VBMI_SHIFT_M2 \
|
||||||
|
m512 sl2 = maskz_vpermb512(FAT_TEDDY_VBMI_SL2_MASK, sl_msk[1], shuf_or_b2);
|
||||||
|
|
||||||
|
#define FAT_TEDDY_VBMI_SHIFT_M4 \
|
||||||
|
FAT_TEDDY_VBMI_SHIFT_M3 \
|
||||||
|
m512 sl3 = maskz_vpermb512(FAT_TEDDY_VBMI_SL3_MASK, sl_msk[2], shuf_or_b3);
|
||||||
|
|
||||||
|
#define FAT_SHIFT_OR_M1 \
|
||||||
|
shuf_or_b0
|
||||||
|
|
||||||
|
#define FAT_SHIFT_OR_M2 \
|
||||||
|
or512(sl1, FAT_SHIFT_OR_M1)
|
||||||
|
|
||||||
|
#define FAT_SHIFT_OR_M3 \
|
||||||
|
or512(sl2, FAT_SHIFT_OR_M2)
|
||||||
|
|
||||||
|
#define FAT_SHIFT_OR_M4 \
|
||||||
|
or512(sl3, FAT_SHIFT_OR_M3)
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m512 prep_conf_fat_teddy_no_reinforcement_m1(const m512 *lo_mask,
|
m512 prep_conf_fat_teddy_m1(const m512 *lo_mask, const m512 *dup_mask,
|
||||||
const m512 *dup_mask,
|
UNUSED const m512 *sl_msk, const m512 val) {
|
||||||
const m512 val) {
|
PREP_FAT_SHUF_MASK;
|
||||||
PREP_FAT_SHUF_MASK_NO_REINFORCEMENT(val);
|
FAT_TEDDY_VBMI_PSHUFB_OR_M1;
|
||||||
|
FAT_TEDDY_VBMI_SHIFT_M1;
|
||||||
return FAT_SHIFT_OR_M1;
|
return FAT_SHIFT_OR_M1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m512 prep_conf_fat_teddy_no_reinforcement_m2(const m512 *lo_mask,
|
m512 prep_conf_fat_teddy_m2(const m512 *lo_mask, const m512 *dup_mask,
|
||||||
const m512 *dup_mask,
|
const m512 *sl_msk, const m512 val) {
|
||||||
const m512 val) {
|
PREP_FAT_SHUF_MASK;
|
||||||
PREP_FAT_SHUF_MASK_NO_REINFORCEMENT(val);
|
FAT_TEDDY_VBMI_PSHUFB_OR_M2;
|
||||||
|
FAT_TEDDY_VBMI_SHIFT_M2;
|
||||||
return FAT_SHIFT_OR_M2;
|
return FAT_SHIFT_OR_M2;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m512 prep_conf_fat_teddy_no_reinforcement_m3(const m512 *lo_mask,
|
m512 prep_conf_fat_teddy_m3(const m512 *lo_mask, const m512 *dup_mask,
|
||||||
const m512 *dup_mask,
|
const m512 *sl_msk, const m512 val) {
|
||||||
const m512 val) {
|
PREP_FAT_SHUF_MASK;
|
||||||
PREP_FAT_SHUF_MASK_NO_REINFORCEMENT(val);
|
FAT_TEDDY_VBMI_PSHUFB_OR_M3;
|
||||||
|
FAT_TEDDY_VBMI_SHIFT_M3;
|
||||||
return FAT_SHIFT_OR_M3;
|
return FAT_SHIFT_OR_M3;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m512 prep_conf_fat_teddy_no_reinforcement_m4(const m512 *lo_mask,
|
m512 prep_conf_fat_teddy_m4(const m512 *lo_mask, const m512 *dup_mask,
|
||||||
const m512 *dup_mask,
|
const m512 *sl_msk, const m512 val) {
|
||||||
const m512 val) {
|
PREP_FAT_SHUF_MASK;
|
||||||
PREP_FAT_SHUF_MASK_NO_REINFORCEMENT(val);
|
FAT_TEDDY_VBMI_PSHUFB_OR_M4;
|
||||||
|
FAT_TEDDY_VBMI_SHIFT_M4;
|
||||||
return FAT_SHIFT_OR_M4;
|
return FAT_SHIFT_OR_M4;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
#define PREP_CONF_FAT_FN(val, n) \
|
||||||
m512 prep_conf_fat_teddy_m1(const m512 *lo_mask, const m512 *dup_mask,
|
prep_conf_fat_teddy_m##n(&lo_mask, dup_mask, sl_msk, val)
|
||||||
const u8 *ptr, const u64a *r_msk_base_lo,
|
|
||||||
const u64a *r_msk_base_hi, u32 *c_0, u32 *c_16) {
|
|
||||||
PREP_FAT_SHUF_MASK;
|
|
||||||
return or512(FAT_SHIFT_OR_M1, r_msk);
|
|
||||||
}
|
|
||||||
|
|
||||||
static really_inline
|
#define FAT_TEDDY_VBMI_SL1_POS 15
|
||||||
m512 prep_conf_fat_teddy_m2(const m512 *lo_mask, const m512 *dup_mask,
|
#define FAT_TEDDY_VBMI_SL2_POS 14
|
||||||
const u8 *ptr, const u64a *r_msk_base_lo,
|
#define FAT_TEDDY_VBMI_SL3_POS 13
|
||||||
const u64a *r_msk_base_hi, u32 *c_0, u32 *c_16) {
|
|
||||||
PREP_FAT_SHUF_MASK;
|
|
||||||
return or512(FAT_SHIFT_OR_M2, r_msk);
|
|
||||||
}
|
|
||||||
|
|
||||||
static really_inline
|
#define FAT_TEDDY_VBMI_LOAD_SHIFT_MASK_M1
|
||||||
m512 prep_conf_fat_teddy_m3(const m512 *lo_mask, const m512 *dup_mask,
|
|
||||||
const u8 *ptr, const u64a *r_msk_base_lo,
|
|
||||||
const u64a *r_msk_base_hi, u32 *c_0, u32 *c_16) {
|
|
||||||
PREP_FAT_SHUF_MASK;
|
|
||||||
return or512(FAT_SHIFT_OR_M3, r_msk);
|
|
||||||
}
|
|
||||||
|
|
||||||
static really_inline
|
#define FAT_TEDDY_VBMI_LOAD_SHIFT_MASK_M2 \
|
||||||
m512 prep_conf_fat_teddy_m4(const m512 *lo_mask, const m512 *dup_mask,
|
FAT_TEDDY_VBMI_LOAD_SHIFT_MASK_M1 \
|
||||||
const u8 *ptr, const u64a *r_msk_base_lo,
|
sl_msk[0] = loadu512(p_sh_mask_arr + FAT_TEDDY_VBMI_SL1_POS);
|
||||||
const u64a *r_msk_base_hi, u32 *c_0, u32 *c_16) {
|
|
||||||
PREP_FAT_SHUF_MASK;
|
|
||||||
return or512(FAT_SHIFT_OR_M4, r_msk);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define PREP_CONF_FAT_FN_NO_REINFORCEMENT(val, n) \
|
#define FAT_TEDDY_VBMI_LOAD_SHIFT_MASK_M3 \
|
||||||
prep_conf_fat_teddy_no_reinforcement_m##n(&lo_mask, dup_mask, val)
|
FAT_TEDDY_VBMI_LOAD_SHIFT_MASK_M2 \
|
||||||
|
sl_msk[1] = loadu512(p_sh_mask_arr + FAT_TEDDY_VBMI_SL2_POS);
|
||||||
|
|
||||||
#define PREP_CONF_FAT_FN(ptr, n) \
|
#define FAT_TEDDY_VBMI_LOAD_SHIFT_MASK_M4 \
|
||||||
prep_conf_fat_teddy_m##n(&lo_mask, dup_mask, ptr, \
|
FAT_TEDDY_VBMI_LOAD_SHIFT_MASK_M3 \
|
||||||
r_msk_base_lo, r_msk_base_hi, &c_0, &c_16)
|
sl_msk[2] = loadu512(p_sh_mask_arr + FAT_TEDDY_VBMI_SL3_POS);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In FAT teddy, it needs 2 bytes to represent result of each position,
|
* In FAT teddy, it needs 2 bytes to represent result of each position,
|
||||||
@ -355,31 +382,15 @@ m512 prep_conf_fat_teddy_m4(const m512 *lo_mask, const m512 *dup_mask,
|
|||||||
* then do pshufb_m512(AABB, XYXY).
|
* then do pshufb_m512(AABB, XYXY).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define DUP_FAT_MASK(a) mask_set2x256(set2x256(swap128in256(a)), 0xC3, a)
|
|
||||||
|
|
||||||
#define PREPARE_FAT_MASKS_1 \
|
|
||||||
dup_mask[0] = DUP_FAT_MASK(maskBase[0]); \
|
|
||||||
dup_mask[1] = DUP_FAT_MASK(maskBase[1]);
|
|
||||||
|
|
||||||
#define PREPARE_FAT_MASKS_2 \
|
|
||||||
PREPARE_FAT_MASKS_1 \
|
|
||||||
dup_mask[2] = DUP_FAT_MASK(maskBase[2]); \
|
|
||||||
dup_mask[3] = DUP_FAT_MASK(maskBase[3]);
|
|
||||||
|
|
||||||
#define PREPARE_FAT_MASKS_3 \
|
|
||||||
PREPARE_FAT_MASKS_2 \
|
|
||||||
dup_mask[4] = DUP_FAT_MASK(maskBase[4]); \
|
|
||||||
dup_mask[5] = DUP_FAT_MASK(maskBase[5]);
|
|
||||||
|
|
||||||
#define PREPARE_FAT_MASKS_4 \
|
|
||||||
PREPARE_FAT_MASKS_3 \
|
|
||||||
dup_mask[6] = DUP_FAT_MASK(maskBase[6]); \
|
|
||||||
dup_mask[7] = DUP_FAT_MASK(maskBase[7]);
|
|
||||||
|
|
||||||
#define PREPARE_FAT_MASKS(n) \
|
#define PREPARE_FAT_MASKS(n) \
|
||||||
m512 lo_mask = set64x8(0xf); \
|
m512 lo_mask = set64x8(0xf); \
|
||||||
m512 dup_mask[n * 2]; \
|
m512 sl_msk[n - 1]; \
|
||||||
PREPARE_FAT_MASKS_##n
|
FAT_TEDDY_VBMI_LOAD_SHIFT_MASK_M##n
|
||||||
|
|
||||||
|
#define FAT_TEDDY_VBMI_CONF_MASK_HEAD (0xffffffffULL >> n_sh)
|
||||||
|
#define FAT_TEDDY_VBMI_CONF_MASK_FULL ((0xffffffffULL << n_sh) & 0xffffffffULL)
|
||||||
|
#define FAT_TEDDY_VBMI_CONF_MASK_VAR(n) (0xffffffffULL >> (32 - n) << overlap)
|
||||||
|
#define FAT_TEDDY_VBMI_LOAD_MASK_PATCH (0xffffffffULL >> (32 - n_sh))
|
||||||
|
|
||||||
#define FDR_EXEC_FAT_TEDDY(fdr, a, control, n_msk, conf_fn) \
|
#define FDR_EXEC_FAT_TEDDY(fdr, a, control, n_msk, conf_fn) \
|
||||||
do { \
|
do { \
|
||||||
@ -389,67 +400,53 @@ do { \
|
|||||||
const u8 *tryFloodDetect = a->firstFloodDetect; \
|
const u8 *tryFloodDetect = a->firstFloodDetect; \
|
||||||
u32 last_match = ones_u32; \
|
u32 last_match = ones_u32; \
|
||||||
const struct Teddy *teddy = (const struct Teddy *)fdr; \
|
const struct Teddy *teddy = (const struct Teddy *)fdr; \
|
||||||
const size_t iterBytes = 64; \
|
const size_t iterBytes = 32; \
|
||||||
|
u32 n_sh = n_msk - 1; \
|
||||||
|
const size_t loopBytes = 32 - n_sh; \
|
||||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \
|
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \
|
||||||
a->buf, a->len, a->start_offset); \
|
a->buf, a->len, a->start_offset); \
|
||||||
\
|
\
|
||||||
const m256 *maskBase = getMaskBase_fat(teddy); \
|
const m512 *dup_mask = getDupMaskBase(teddy, n_msk); \
|
||||||
PREPARE_FAT_MASKS(n_msk); \
|
PREPARE_FAT_MASKS(n_msk); \
|
||||||
const u32 *confBase = getConfBase(teddy); \
|
const u32 *confBase = getConfBase(teddy); \
|
||||||
\
|
\
|
||||||
const u64a *r_msk_base_lo = getReinforcedMaskBase_fat(teddy, n_msk); \
|
u64a k = FAT_TEDDY_VBMI_CONF_MASK_FULL; \
|
||||||
const u64a *r_msk_base_hi = r_msk_base_lo + (N_CHARS + 1); \
|
m512 p_mask = set_mask_m512(~((k << 32) | k)); \
|
||||||
u32 c_0 = 0x100; \
|
u32 overlap = 0; \
|
||||||
u32 c_16 = 0x100; \
|
u64a patch = 0; \
|
||||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 32); \
|
if (likely(ptr + loopBytes <= buf_end)) { \
|
||||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); \
|
u64a k0 = FAT_TEDDY_VBMI_CONF_MASK_HEAD; \
|
||||||
if (ptr < mainStart) { \
|
m512 p_mask0 = set_mask_m512(~((k0 << 32) | k0)); \
|
||||||
ptr = mainStart - 32; \
|
m512 r_0 = PREP_CONF_FAT_FN(set2x256(loadu256(ptr)), n_msk); \
|
||||||
m512 p_mask; \
|
r_0 = or512(r_0, p_mask0); \
|
||||||
m512 val_0 = vectoredLoad2x256(&p_mask, ptr, a->start_offset, \
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, ptr, conf_fn); \
|
||||||
a->buf, buf_end, \
|
ptr += loopBytes; \
|
||||||
a->buf_history, a->len_history, n_msk); \
|
overlap = n_sh; \
|
||||||
m512 r_0 = PREP_CONF_FAT_FN_NO_REINFORCEMENT(val_0, n_msk); \
|
patch = FAT_TEDDY_VBMI_LOAD_MASK_PATCH; \
|
||||||
r_0 = or512(r_0, p_mask); \
|
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, conf_fn); \
|
|
||||||
ptr += 32; \
|
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
if (ptr + 32 <= buf_end) { \
|
for (; ptr + loopBytes <= buf_end; ptr += loopBytes) { \
|
||||||
m512 r_0 = PREP_CONF_FAT_FN(ptr, n_msk); \
|
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, conf_fn); \
|
|
||||||
ptr += 32; \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { \
|
|
||||||
__builtin_prefetch(ptr + (iterBytes * 4)); \
|
|
||||||
CHECK_FLOOD; \
|
CHECK_FLOOD; \
|
||||||
m512 r_0 = PREP_CONF_FAT_FN(ptr, n_msk); \
|
m512 r_0 = PREP_CONF_FAT_FN(set2x256(loadu256(ptr - n_sh)), n_msk); \
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, conf_fn); \
|
|
||||||
m512 r_1 = PREP_CONF_FAT_FN(ptr + 32, n_msk); \
|
|
||||||
CONFIRM_FAT_TEDDY(r_1, 16, 32, NOT_CAUTIOUS, conf_fn); \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
if (ptr + 32 <= buf_end) { \
|
|
||||||
m512 r_0 = PREP_CONF_FAT_FN(ptr, n_msk); \
|
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, conf_fn); \
|
|
||||||
ptr += 32; \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
assert(ptr + 32 > buf_end); \
|
|
||||||
if (ptr < buf_end) { \
|
|
||||||
m512 p_mask; \
|
|
||||||
m512 val_0 = vectoredLoad2x256(&p_mask, ptr, 0, ptr, buf_end, \
|
|
||||||
a->buf_history, a->len_history, n_msk); \
|
|
||||||
m512 r_0 = PREP_CONF_FAT_FN_NO_REINFORCEMENT(val_0, n_msk); \
|
|
||||||
r_0 = or512(r_0, p_mask); \
|
r_0 = or512(r_0, p_mask); \
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, conf_fn); \
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, ptr - n_sh, conf_fn); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
assert(ptr + loopBytes > buf_end); \
|
||||||
|
if (ptr < buf_end) { \
|
||||||
|
u32 left = (u32)(buf_end - ptr); \
|
||||||
|
u64a k1 = FAT_TEDDY_VBMI_CONF_MASK_VAR(left); \
|
||||||
|
m512 p_mask1 = set_mask_m512(~((k1 << 32) | k1)); \
|
||||||
|
m512 val_0 = set2x256(loadu_maskz_m256(k1 | patch, ptr - overlap)); \
|
||||||
|
m512 r_0 = PREP_CONF_FAT_FN(val_0, n_msk); \
|
||||||
|
r_0 = or512(r_0, p_mask1); \
|
||||||
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, ptr - overlap, conf_fn); \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
return HWLM_SUCCESS; \
|
return HWLM_SUCCESS; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
#else // HAVE_AVX512
|
#else // !HAVE_AVX512VBMI, AVX2 normal fat teddy
|
||||||
|
|
||||||
#ifdef ARCH_64_BIT
|
#ifdef ARCH_64_BIT
|
||||||
#define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \
|
#define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \
|
||||||
@ -659,7 +656,7 @@ do { \
|
|||||||
return HWLM_SUCCESS; \
|
return HWLM_SUCCESS; \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
#endif // HAVE_AVX512
|
#endif // HAVE_AVX512VBMI
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_fat_teddy_msks1(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_fat_teddy_msks1(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a,
|
const struct FDR_Runtime_Args *a,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -353,6 +353,89 @@ void fillReinforcedMsk(u8 *rmsk, u16 c, u32 j, u8 bmsk) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void fillDupNibbleMasks(const map<BucketIndex,
|
||||||
|
vector<LiteralIndex>> &bucketToLits,
|
||||||
|
const vector<hwlmLiteral> &lits,
|
||||||
|
u32 numMasks, size_t maskLen,
|
||||||
|
u8 *baseMsk) {
|
||||||
|
u32 maskWidth = 2;
|
||||||
|
memset(baseMsk, 0xff, maskLen);
|
||||||
|
|
||||||
|
for (const auto &b2l : bucketToLits) {
|
||||||
|
const u32 &bucket_id = b2l.first;
|
||||||
|
const vector<LiteralIndex> &ids = b2l.second;
|
||||||
|
const u8 bmsk = 1U << (bucket_id % 8);
|
||||||
|
|
||||||
|
for (const LiteralIndex &lit_id : ids) {
|
||||||
|
const hwlmLiteral &l = lits[lit_id];
|
||||||
|
DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
|
||||||
|
const u32 sz = verify_u32(l.s.size());
|
||||||
|
|
||||||
|
// fill in masks
|
||||||
|
for (u32 j = 0; j < numMasks; j++) {
|
||||||
|
const u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8);
|
||||||
|
const u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8);
|
||||||
|
const u32 lo_base0 = msk_id_lo * 32;
|
||||||
|
const u32 lo_base1 = msk_id_lo * 32 + 16;
|
||||||
|
const u32 hi_base0 = msk_id_hi * 32;
|
||||||
|
const u32 hi_base1 = msk_id_hi * 32 + 16;
|
||||||
|
|
||||||
|
// if we don't have a char at this position, fill in i
|
||||||
|
// locations in these masks with '1'
|
||||||
|
if (j >= sz) {
|
||||||
|
for (u32 n = 0; n < 16; n++) {
|
||||||
|
baseMsk[lo_base0 + n] &= ~bmsk;
|
||||||
|
baseMsk[lo_base1 + n] &= ~bmsk;
|
||||||
|
baseMsk[hi_base0 + n] &= ~bmsk;
|
||||||
|
baseMsk[hi_base1 + n] &= ~bmsk;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
u8 c = l.s[sz - 1 - j];
|
||||||
|
// if we do have a char at this position
|
||||||
|
const u32 hiShift = 4;
|
||||||
|
u32 n_hi = (c >> hiShift) & 0xf;
|
||||||
|
u32 n_lo = c & 0xf;
|
||||||
|
|
||||||
|
if (j < l.msk.size() && l.msk[l.msk.size() - 1 - j]) {
|
||||||
|
u8 m = l.msk[l.msk.size() - 1 - j];
|
||||||
|
u8 m_hi = (m >> hiShift) & 0xf;
|
||||||
|
u8 m_lo = m & 0xf;
|
||||||
|
u8 cmp = l.cmp[l.msk.size() - 1 - j];
|
||||||
|
u8 cmp_lo = cmp & 0xf;
|
||||||
|
u8 cmp_hi = (cmp >> hiShift) & 0xf;
|
||||||
|
|
||||||
|
for (u8 cm = 0; cm < 0x10; cm++) {
|
||||||
|
if ((cm & m_lo) == (cmp_lo & m_lo)) {
|
||||||
|
baseMsk[lo_base0 + cm] &= ~bmsk;
|
||||||
|
baseMsk[lo_base1 + cm] &= ~bmsk;
|
||||||
|
}
|
||||||
|
if ((cm & m_hi) == (cmp_hi & m_hi)) {
|
||||||
|
baseMsk[hi_base0 + cm] &= ~bmsk;
|
||||||
|
baseMsk[hi_base1 + cm] &= ~bmsk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (l.nocase && ourisalpha(c)) {
|
||||||
|
u32 cmHalfClear = (0xdf >> hiShift) & 0xf;
|
||||||
|
u32 cmHalfSet = (0x20 >> hiShift) & 0xf;
|
||||||
|
baseMsk[hi_base0 + (n_hi & cmHalfClear)] &= ~bmsk;
|
||||||
|
baseMsk[hi_base1 + (n_hi & cmHalfClear)] &= ~bmsk;
|
||||||
|
baseMsk[hi_base0 + (n_hi | cmHalfSet)] &= ~bmsk;
|
||||||
|
baseMsk[hi_base1 + (n_hi | cmHalfSet)] &= ~bmsk;
|
||||||
|
} else {
|
||||||
|
baseMsk[hi_base0 + n_hi] &= ~bmsk;
|
||||||
|
baseMsk[hi_base1 + n_hi] &= ~bmsk;
|
||||||
|
}
|
||||||
|
baseMsk[lo_base0 + n_lo] &= ~bmsk;
|
||||||
|
baseMsk[lo_base1 + n_lo] &= ~bmsk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void fillNibbleMasks(const map<BucketIndex,
|
void fillNibbleMasks(const map<BucketIndex,
|
||||||
vector<LiteralIndex>> &bucketToLits,
|
vector<LiteralIndex>> &bucketToLits,
|
||||||
@ -479,14 +562,17 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
|
|||||||
|
|
||||||
size_t headerSize = sizeof(Teddy);
|
size_t headerSize = sizeof(Teddy);
|
||||||
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
|
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
|
||||||
size_t reinforcedMaskLen = RTABLE_SIZE * maskWidth;
|
size_t reinforcedDupMaskLen = RTABLE_SIZE * maskWidth;
|
||||||
|
if (maskWidth == 2) { // dup nibble mask table in Fat Teddy
|
||||||
|
reinforcedDupMaskLen = maskLen * 2;
|
||||||
|
}
|
||||||
|
|
||||||
auto floodTable = setupFDRFloodControl(lits, eng, grey);
|
auto floodTable = setupFDRFloodControl(lits, eng, grey);
|
||||||
auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
|
auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
|
||||||
|
|
||||||
// Note: we place each major structure here on a cacheline boundary.
|
// Note: we place each major structure here on a cacheline boundary.
|
||||||
size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) +
|
size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) +
|
||||||
ROUNDUP_CL(reinforcedMaskLen) +
|
ROUNDUP_CL(reinforcedDupMaskLen) +
|
||||||
ROUNDUP_CL(confirmTable.size()) + floodTable.size();
|
ROUNDUP_CL(confirmTable.size()) + floodTable.size();
|
||||||
|
|
||||||
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
|
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
|
||||||
@ -502,7 +588,7 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
|
|||||||
|
|
||||||
// Write confirm structures.
|
// Write confirm structures.
|
||||||
u8 *ptr = teddy_base + ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) +
|
u8 *ptr = teddy_base + ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) +
|
||||||
ROUNDUP_CL(reinforcedMaskLen);
|
ROUNDUP_CL(reinforcedDupMaskLen);
|
||||||
assert(ISALIGNED_CL(ptr));
|
assert(ISALIGNED_CL(ptr));
|
||||||
teddy->confOffset = verify_u32(ptr - teddy_base);
|
teddy->confOffset = verify_u32(ptr - teddy_base);
|
||||||
memcpy(ptr, confirmTable.get(), confirmTable.size());
|
memcpy(ptr, confirmTable.get(), confirmTable.size());
|
||||||
@ -519,9 +605,16 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
|
|||||||
fillNibbleMasks(bucketToLits, lits, eng.numMasks, maskWidth, maskLen,
|
fillNibbleMasks(bucketToLits, lits, eng.numMasks, maskWidth, maskLen,
|
||||||
baseMsk);
|
baseMsk);
|
||||||
|
|
||||||
// Write reinforcement masks.
|
if (maskWidth == 1) { // reinforcement table in Teddy
|
||||||
u8 *reinforcedMsk = baseMsk + ROUNDUP_CL(maskLen);
|
// Write reinforcement masks.
|
||||||
fillReinforcedTable(bucketToLits, lits, reinforcedMsk, maskWidth);
|
u8 *reinforcedMsk = baseMsk + ROUNDUP_CL(maskLen);
|
||||||
|
fillReinforcedTable(bucketToLits, lits, reinforcedMsk, maskWidth);
|
||||||
|
} else { // dup nibble mask table in Fat Teddy
|
||||||
|
assert(maskWidth == 2);
|
||||||
|
u8 *dupMsk = baseMsk + ROUNDUP_CL(maskLen);
|
||||||
|
fillDupNibbleMasks(bucketToLits, lits, eng.numMasks,
|
||||||
|
reinforcedDupMaskLen, dupMsk);
|
||||||
|
}
|
||||||
|
|
||||||
return fdr;
|
return fdr;
|
||||||
}
|
}
|
||||||
|
@ -45,6 +45,16 @@ extern const u8 ALIGN_DIRECTIVE p_mask_arr[17][32];
|
|||||||
extern const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64];
|
extern const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
static const u8 ALIGN_DIRECTIVE p_sh_mask_arr[80] = {
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||||
|
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||||
|
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
|
||||||
|
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef ARCH_64_BIT
|
#ifdef ARCH_64_BIT
|
||||||
#define TEDDY_CONF_TYPE u64a
|
#define TEDDY_CONF_TYPE u64a
|
||||||
#define TEDDY_FIND_AND_CLEAR_LSB(conf) findAndClearLSB_64(conf)
|
#define TEDDY_FIND_AND_CLEAR_LSB(conf) findAndClearLSB_64(conf)
|
||||||
|
13
src/hs.cpp
13
src/hs.cpp
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2021, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -120,9 +120,10 @@ bool checkMode(unsigned int mode, hs_compile_error **comp_error) {
|
|||||||
|
|
||||||
static
|
static
|
||||||
bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) {
|
bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) {
|
||||||
static constexpr u32 HS_TUNE_LAST = HS_TUNE_FAMILY_GLM;
|
static constexpr u32 HS_TUNE_LAST = HS_TUNE_FAMILY_ICX;
|
||||||
static constexpr u32 HS_CPU_FEATURES_ALL =
|
static constexpr u32 HS_CPU_FEATURES_ALL =
|
||||||
HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512;
|
HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512 |
|
||||||
|
HS_CPU_FEATURES_AVX512VBMI;
|
||||||
|
|
||||||
if (!p) {
|
if (!p) {
|
||||||
return true;
|
return true;
|
||||||
@ -513,6 +514,12 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
|||||||
return HS_COMPILER_ERROR;
|
return HS_COMPILER_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (flags & HS_FLAG_COMBINATION) {
|
||||||
|
*error = generateCompileError("Invalid parameter: unsupported "
|
||||||
|
"logical combination expression", -1);
|
||||||
|
return HS_COMPILER_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
*info = nullptr;
|
*info = nullptr;
|
||||||
*error = nullptr;
|
*error = nullptr;
|
||||||
|
|
||||||
|
4
src/hs.h
4
src/hs.h
@ -42,8 +42,8 @@
|
|||||||
/* The current Hyperscan version information. */
|
/* The current Hyperscan version information. */
|
||||||
|
|
||||||
#define HS_MAJOR 5
|
#define HS_MAJOR 5
|
||||||
#define HS_MINOR 3
|
#define HS_MINOR 4
|
||||||
#define HS_PATCH 0
|
#define HS_PATCH 2
|
||||||
|
|
||||||
#include "hs_compile.h"
|
#include "hs_compile.h"
|
||||||
#include "hs_runtime.h"
|
#include "hs_runtime.h"
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2020, Intel Corporation
|
* Copyright (c) 2015-2021, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -748,10 +748,7 @@ hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error);
|
|||||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||||
* when a match is found.
|
* when a match is found.
|
||||||
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
|
* - HS_FLAG_QUIET - This flag will be ignored.
|
||||||
* syntax.
|
|
||||||
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
|
|
||||||
* the sub-expressions in logical combinations.
|
|
||||||
*
|
*
|
||||||
* @param info
|
* @param info
|
||||||
* On success, a pointer to the pattern information will be returned in
|
* On success, a pointer to the pattern information will be returned in
|
||||||
@ -814,10 +811,7 @@ hs_error_t HS_CDECL hs_expression_info(const char *expression,
|
|||||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||||
* when a match is found.
|
* when a match is found.
|
||||||
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
|
* - HS_FLAG_QUIET - This flag will be ignored.
|
||||||
* syntax.
|
|
||||||
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
|
|
||||||
* the sub-expressions in logical combinations.
|
|
||||||
*
|
*
|
||||||
* @param ext
|
* @param ext
|
||||||
* A pointer to a filled @ref hs_expr_ext_t structure that defines
|
* A pointer to a filled @ref hs_expr_ext_t structure that defines
|
||||||
@ -1034,6 +1028,15 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
|
|||||||
*/
|
*/
|
||||||
#define HS_CPU_FEATURES_AVX512 (1ULL << 3)
|
#define HS_CPU_FEATURES_AVX512 (1ULL << 3)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CPU features flag - Intel(R) Advanced Vector Extensions 512
|
||||||
|
* Vector Byte Manipulation Instructions (Intel(R) AVX512VBMI)
|
||||||
|
*
|
||||||
|
* Setting this flag indicates that the target platform supports AVX512VBMI
|
||||||
|
* instructions. Using AVX512VBMI implies the use of AVX512.
|
||||||
|
*/
|
||||||
|
#define HS_CPU_FEATURES_AVX512VBMI (1ULL << 4)
|
||||||
|
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1114,6 +1117,22 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
|
|||||||
*/
|
*/
|
||||||
#define HS_TUNE_FAMILY_GLM 8
|
#define HS_TUNE_FAMILY_GLM 8
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tuning Parameter - Intel(R) microarchitecture code name Icelake
|
||||||
|
*
|
||||||
|
* This indicates that the compiled database should be tuned for the
|
||||||
|
* Icelake microarchitecture.
|
||||||
|
*/
|
||||||
|
#define HS_TUNE_FAMILY_ICL 9
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tuning Parameter - Intel(R) microarchitecture code name Icelake Server
|
||||||
|
*
|
||||||
|
* This indicates that the compiled database should be tuned for the
|
||||||
|
* Icelake Server microarchitecture.
|
||||||
|
*/
|
||||||
|
#define HS_TUNE_FAMILY_ICX 10
|
||||||
|
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2019, Intel Corporation
|
* Copyright (c) 2019-2021, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -80,7 +80,9 @@ extern "C"
|
|||||||
| HS_FLAG_PREFILTER \
|
| HS_FLAG_PREFILTER \
|
||||||
| HS_FLAG_SINGLEMATCH \
|
| HS_FLAG_SINGLEMATCH \
|
||||||
| HS_FLAG_ALLOWEMPTY \
|
| HS_FLAG_ALLOWEMPTY \
|
||||||
| HS_FLAG_SOM_LEFTMOST)
|
| HS_FLAG_SOM_LEFTMOST \
|
||||||
|
| HS_FLAG_COMBINATION \
|
||||||
|
| HS_FLAG_QUIET)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
|
@ -106,7 +106,7 @@ hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf,
|
|||||||
if (!l) {
|
if (!l) {
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
assert(l <= 32);
|
assert(l <= 16);
|
||||||
|
|
||||||
DEBUG_PRINTF("d %zu\n", d - buf);
|
DEBUG_PRINTF("d %zu\n", d - buf);
|
||||||
m128 v = zeroes128();
|
m128 v = zeroes128();
|
||||||
|
@ -207,6 +207,10 @@ void makeCFG_top_edge(GoughGraph &cfg, const vector<GoughVertex> &vertices,
|
|||||||
assert(contains(src_slots, slot_id));
|
assert(contains(src_slots, slot_id));
|
||||||
|
|
||||||
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
|
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
|
||||||
|
if (!vmin) {
|
||||||
|
assert(0);
|
||||||
|
throw std::bad_alloc();
|
||||||
|
}
|
||||||
cfg[e].vars.push_back(vmin);
|
cfg[e].vars.push_back(vmin);
|
||||||
final_var = vmin.get();
|
final_var = vmin.get();
|
||||||
|
|
||||||
@ -318,6 +322,10 @@ void makeCFG_edge(GoughGraph &cfg, const map<u32, u32> &som_creators,
|
|||||||
DEBUG_PRINTF("bypassing min on join %u\n", slot_id);
|
DEBUG_PRINTF("bypassing min on join %u\n", slot_id);
|
||||||
} else {
|
} else {
|
||||||
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
|
shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
|
||||||
|
if (!vmin) {
|
||||||
|
assert(0);
|
||||||
|
throw std::bad_alloc();
|
||||||
|
}
|
||||||
cfg[e].vars.push_back(vmin);
|
cfg[e].vars.push_back(vmin);
|
||||||
final_var = vmin.get();
|
final_var = vmin.get();
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -85,6 +85,18 @@ namespace ue2 {
|
|||||||
*/
|
*/
|
||||||
static constexpr u32 NO_STATE = ~0;
|
static constexpr u32 NO_STATE = ~0;
|
||||||
|
|
||||||
|
/* Maximum number of states taken as a small NFA */
|
||||||
|
static constexpr u32 MAX_SMALL_NFA_STATES = 64;
|
||||||
|
|
||||||
|
/* Maximum bounded repeat upper bound to consider as a fast NFA */
|
||||||
|
static constexpr u64a MAX_REPEAT_SIZE = 200;
|
||||||
|
|
||||||
|
/* Maximum bounded repeat char reach size to consider as a fast NFA */
|
||||||
|
static constexpr u32 MAX_REPEAT_CHAR_REACH = 26;
|
||||||
|
|
||||||
|
/* Minimum bounded repeat trigger distance to consider as a fast NFA */
|
||||||
|
static constexpr u8 MIN_REPEAT_TRIGGER_DISTANCE = 6;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
struct precalcAccel {
|
struct precalcAccel {
|
||||||
@ -1910,7 +1922,8 @@ struct Factory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void writeExceptions(const map<ExceptionProto, vector<u32>> &exceptionMap,
|
void writeExceptions(const build_info &args,
|
||||||
|
const map<ExceptionProto, vector<u32>> &exceptionMap,
|
||||||
const vector<u32> &repeatOffsets, implNFA_t *limex,
|
const vector<u32> &repeatOffsets, implNFA_t *limex,
|
||||||
const u32 exceptionsOffset,
|
const u32 exceptionsOffset,
|
||||||
const u32 reportListOffset) {
|
const u32 reportListOffset) {
|
||||||
@ -1962,6 +1975,59 @@ struct Factory {
|
|||||||
|
|
||||||
limex->exceptionOffset = exceptionsOffset;
|
limex->exceptionOffset = exceptionsOffset;
|
||||||
limex->exceptionCount = ecount;
|
limex->exceptionCount = ecount;
|
||||||
|
|
||||||
|
if (args.num_states > 64 && args.cc.target_info.has_avx512vbmi()) {
|
||||||
|
const u8 *exceptionMask = (const u8 *)(&limex->exceptionMask);
|
||||||
|
u8 *shufMask = (u8 *)&limex->exceptionShufMask;
|
||||||
|
u8 *bitMask = (u8 *)&limex->exceptionBitMask;
|
||||||
|
u8 *andMask = (u8 *)&limex->exceptionAndMask;
|
||||||
|
|
||||||
|
u32 tot_cnt = 0;
|
||||||
|
u32 pos = 0;
|
||||||
|
bool valid = true;
|
||||||
|
size_t tot = sizeof(limex->exceptionMask);
|
||||||
|
size_t base = 0;
|
||||||
|
|
||||||
|
// We normally have up to 64 exceptions to handle,
|
||||||
|
// but treat 384 state Limex differently to simplify operations
|
||||||
|
size_t limit = 64;
|
||||||
|
if (args.num_states > 256 && args.num_states <= 384) {
|
||||||
|
limit = 48;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < tot; i++) {
|
||||||
|
if (!exceptionMask[i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u32 bit_cnt = popcount32(exceptionMask[i]);
|
||||||
|
|
||||||
|
tot_cnt += bit_cnt;
|
||||||
|
if (tot_cnt > limit) {
|
||||||
|
valid = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 emsk = exceptionMask[i];
|
||||||
|
while (emsk) {
|
||||||
|
u32 t = findAndClearLSB_32(&emsk);
|
||||||
|
bitMask[pos] = 1U << t;
|
||||||
|
andMask[pos] = 1U << t;
|
||||||
|
shufMask[pos++] = i + base;
|
||||||
|
|
||||||
|
if (pos == 32 &&
|
||||||
|
(args.num_states > 128 && args.num_states <= 256)) {
|
||||||
|
base += 32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Avoid matching unused bytes
|
||||||
|
for (u32 i = pos; i < 64; i++) {
|
||||||
|
bitMask[i] = 0xff;
|
||||||
|
}
|
||||||
|
if (valid) {
|
||||||
|
setLimexFlag(limex, LIMEX_FLAG_EXTRACT_EXP);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -2287,7 +2353,7 @@ struct Factory {
|
|||||||
writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset,
|
writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset,
|
||||||
repeatsOffset);
|
repeatsOffset);
|
||||||
|
|
||||||
writeExceptions(exceptionMap, repeatOffsets, limex, exceptionsOffset,
|
writeExceptions(args, exceptionMap, repeatOffsets, limex, exceptionsOffset,
|
||||||
reportListOffset);
|
reportListOffset);
|
||||||
|
|
||||||
writeLimexMasks(args, limex);
|
writeLimexMasks(args, limex);
|
||||||
@ -2422,6 +2488,68 @@ bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops,
|
|||||||
}
|
}
|
||||||
#endif // NDEBUG
|
#endif // NDEBUG
|
||||||
|
|
||||||
|
static
|
||||||
|
bool isFast(const build_info &args) {
|
||||||
|
const NGHolder &h = args.h;
|
||||||
|
const u32 num_states = args.num_states;
|
||||||
|
|
||||||
|
if (num_states > MAX_SMALL_NFA_STATES) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
unordered_map<NFAVertex, bool> pos_trigger;
|
||||||
|
for (u32 i = 0; i < args.repeats.size(); i++) {
|
||||||
|
const BoundedRepeatData &br = args.repeats[i];
|
||||||
|
assert(!contains(pos_trigger, br.pos_trigger));
|
||||||
|
pos_trigger[br.pos_trigger] = br.repeatMax <= MAX_REPEAT_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Small NFA without bounded repeat should be fast.
|
||||||
|
if (pos_trigger.empty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<NFAVertex> cur;
|
||||||
|
unordered_set<NFAVertex> visited;
|
||||||
|
for (const auto &m : args.tops) {
|
||||||
|
for (NFAVertex v : m.second) {
|
||||||
|
cur.push_back(v);
|
||||||
|
visited.insert(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 pos_dist = 0;
|
||||||
|
while (!cur.empty()) {
|
||||||
|
vector<NFAVertex> next;
|
||||||
|
for (const auto &v : cur) {
|
||||||
|
if (contains(pos_trigger, v)) {
|
||||||
|
const CharReach &cr = h[v].char_reach;
|
||||||
|
if (!pos_trigger[v] && cr.count() > MAX_REPEAT_CHAR_REACH) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const auto &w : adjacent_vertices_range(v, h)) {
|
||||||
|
if (w == v) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u32 j = args.state_ids.at(w);
|
||||||
|
if (j == NO_STATE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!contains(visited, w)) {
|
||||||
|
next.push_back(w);
|
||||||
|
visited.insert(w);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (++pos_dist >= MIN_REPEAT_TRIGGER_DISTANCE) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
swap(cur, next);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) {
|
u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) {
|
||||||
u32 rv = 0;
|
u32 rv = 0;
|
||||||
@ -2442,7 +2570,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
|
|||||||
const unordered_map<NFAVertex, NFAStateSet> &squashMap,
|
const unordered_map<NFAVertex, NFAStateSet> &squashMap,
|
||||||
const map<u32, set<NFAVertex>> &tops,
|
const map<u32, set<NFAVertex>> &tops,
|
||||||
const set<NFAVertex> &zombies, bool do_accel,
|
const set<NFAVertex> &zombies, bool do_accel,
|
||||||
bool stateCompression, u32 hint,
|
bool stateCompression, bool &fast, u32 hint,
|
||||||
const CompileContext &cc) {
|
const CompileContext &cc) {
|
||||||
const u32 num_states = max_state(states) + 1;
|
const u32 num_states = max_state(states) + 1;
|
||||||
DEBUG_PRINTF("total states: %u\n", num_states);
|
DEBUG_PRINTF("total states: %u\n", num_states);
|
||||||
@ -2497,6 +2625,7 @@ bytecode_ptr<NFA> generate(NGHolder &h,
|
|||||||
if (nfa) {
|
if (nfa) {
|
||||||
DEBUG_PRINTF("successful build with NFA engine: %s\n",
|
DEBUG_PRINTF("successful build with NFA engine: %s\n",
|
||||||
nfa_type_name(limex_model));
|
nfa_type_name(limex_model));
|
||||||
|
fast = isFast(arg);
|
||||||
return nfa;
|
return nfa;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -78,6 +78,7 @@ bytecode_ptr<NFA> generate(NGHolder &g,
|
|||||||
const std::set<NFAVertex> &zombies,
|
const std::set<NFAVertex> &zombies,
|
||||||
bool do_accel,
|
bool do_accel,
|
||||||
bool stateCompression,
|
bool stateCompression,
|
||||||
|
bool &fast,
|
||||||
u32 hint,
|
u32 hint,
|
||||||
const CompileContext &cc);
|
const CompileContext &cc);
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -47,6 +47,8 @@
|
|||||||
#define AND_STATE JOIN(and_, STATE_T)
|
#define AND_STATE JOIN(and_, STATE_T)
|
||||||
#define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b)))
|
#define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b)))
|
||||||
#define OR_STATE JOIN(or_, STATE_T)
|
#define OR_STATE JOIN(or_, STATE_T)
|
||||||
|
#define EXPAND_STATE JOIN(expand_, STATE_T)
|
||||||
|
#define SHUFFLE_BYTE_STATE JOIN(shuffle_byte_, STATE_T)
|
||||||
#define TESTBIT_STATE JOIN(testbit_, STATE_T)
|
#define TESTBIT_STATE JOIN(testbit_, STATE_T)
|
||||||
#define EXCEPTION_T JOIN(struct NFAException, SIZE)
|
#define EXCEPTION_T JOIN(struct NFAException, SIZE)
|
||||||
#define CONTEXT_T JOIN(NFAContext, SIZE)
|
#define CONTEXT_T JOIN(NFAContext, SIZE)
|
||||||
@ -208,7 +210,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
|
|||||||
/** \brief Process all of the exceptions associated with the states in the \a
|
/** \brief Process all of the exceptions associated with the states in the \a
|
||||||
* estate. */
|
* estate. */
|
||||||
static really_inline
|
static really_inline
|
||||||
int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
|
||||||
const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
|
const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
|
||||||
u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) {
|
u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) {
|
||||||
assert(diffmask > 0); // guaranteed by caller macro
|
assert(diffmask > 0); // guaranteed by caller macro
|
||||||
@ -233,6 +235,72 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
|||||||
ctx->local_succ = ZERO_STATE;
|
ctx->local_succ = ZERO_STATE;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
struct proto_cache new_cache = {0, NULL};
|
||||||
|
enum CacheResult cacheable = CACHE_RESULT;
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI) && SIZE > 64
|
||||||
|
if (likely(limex->flags & LIMEX_FLAG_EXTRACT_EXP)) {
|
||||||
|
m512 emask = EXPAND_STATE(*STATE_ARG_P);
|
||||||
|
emask = SHUFFLE_BYTE_STATE(load_m512(&limex->exceptionShufMask), emask);
|
||||||
|
emask = and512(emask, load_m512(&limex->exceptionAndMask));
|
||||||
|
u64a word = eq512mask(emask, load_m512(&limex->exceptionBitMask));
|
||||||
|
|
||||||
|
do {
|
||||||
|
u32 bit = FIND_AND_CLEAR_FN(&word);
|
||||||
|
const EXCEPTION_T *e = &exceptions[bit];
|
||||||
|
|
||||||
|
if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
|
||||||
|
#ifndef BIG_MODEL
|
||||||
|
&local_succ,
|
||||||
|
#endif
|
||||||
|
limex, offset, ctx, &new_cache, &cacheable,
|
||||||
|
in_rev, flags)) {
|
||||||
|
return PE_RV_HALT;
|
||||||
|
}
|
||||||
|
} while (word);
|
||||||
|
} else {
|
||||||
|
// A copy of the estate as an array of GPR-sized chunks.
|
||||||
|
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
|
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
|
#ifdef ESTATE_ON_STACK
|
||||||
|
memcpy(chunks, &estate, sizeof(STATE_T));
|
||||||
|
#else
|
||||||
|
memcpy(chunks, estatep, sizeof(STATE_T));
|
||||||
|
#endif
|
||||||
|
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
|
||||||
|
|
||||||
|
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
|
base_index[0] = 0;
|
||||||
|
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
|
||||||
|
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
u32 t = findAndClearLSB_32(&diffmask);
|
||||||
|
#ifdef ARCH_64_BIT
|
||||||
|
t >>= 1; // Due to diffmask64, which leaves holes in the bitmask.
|
||||||
|
#endif
|
||||||
|
assert(t < ARRAY_LENGTH(chunks));
|
||||||
|
CHUNK_T word = chunks[t];
|
||||||
|
assert(word != 0);
|
||||||
|
do {
|
||||||
|
u32 bit = FIND_AND_CLEAR_FN(&word);
|
||||||
|
u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit);
|
||||||
|
u32 idx = local_index + base_index[t];
|
||||||
|
const EXCEPTION_T *e = &exceptions[idx];
|
||||||
|
|
||||||
|
if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
|
||||||
|
#ifndef BIG_MODEL
|
||||||
|
&local_succ,
|
||||||
|
#endif
|
||||||
|
limex, offset, ctx, &new_cache, &cacheable,
|
||||||
|
in_rev, flags)) {
|
||||||
|
return PE_RV_HALT;
|
||||||
|
}
|
||||||
|
} while (word);
|
||||||
|
} while (diffmask);
|
||||||
|
}
|
||||||
|
#else
|
||||||
// A copy of the estate as an array of GPR-sized chunks.
|
// A copy of the estate as an array of GPR-sized chunks.
|
||||||
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
@ -243,9 +311,6 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
|||||||
#endif
|
#endif
|
||||||
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
|
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
|
||||||
|
|
||||||
struct proto_cache new_cache = {0, NULL};
|
|
||||||
enum CacheResult cacheable = CACHE_RESULT;
|
|
||||||
|
|
||||||
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
base_index[0] = 0;
|
base_index[0] = 0;
|
||||||
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
|
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
|
||||||
@ -276,6 +341,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
|||||||
}
|
}
|
||||||
} while (word);
|
} while (word);
|
||||||
} while (diffmask);
|
} while (diffmask);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef BIG_MODEL
|
#ifndef BIG_MODEL
|
||||||
*succ = OR_STATE(*succ, local_succ);
|
*succ = OR_STATE(*succ, local_succ);
|
||||||
@ -307,6 +373,8 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
|||||||
#undef AND_STATE
|
#undef AND_STATE
|
||||||
#undef EQ_STATE
|
#undef EQ_STATE
|
||||||
#undef OR_STATE
|
#undef OR_STATE
|
||||||
|
#undef EXPAND_STATE
|
||||||
|
#undef SHUFFLE_BYTE_STATE
|
||||||
#undef TESTBIT_STATE
|
#undef TESTBIT_STATE
|
||||||
#undef PE_FN
|
#undef PE_FN
|
||||||
#undef RUN_EXCEPTION_FN
|
#undef RUN_EXCEPTION_FN
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -86,6 +86,7 @@
|
|||||||
#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
|
#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
|
||||||
#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
|
#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
|
||||||
#define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */
|
#define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */
|
||||||
|
#define LIMEX_FLAG_EXTRACT_EXP 8 /**< use limex exception bit extraction */
|
||||||
|
|
||||||
enum LimExTrigger {
|
enum LimExTrigger {
|
||||||
LIMEX_TRIGGER_NONE = 0,
|
LIMEX_TRIGGER_NONE = 0,
|
||||||
@ -157,6 +158,9 @@ struct LimExNFA##size { \
|
|||||||
u_##size shift[MAX_SHIFT_COUNT]; \
|
u_##size shift[MAX_SHIFT_COUNT]; \
|
||||||
u32 shiftCount; /**< number of shift masks used */ \
|
u32 shiftCount; /**< number of shift masks used */ \
|
||||||
u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \
|
u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \
|
||||||
|
m512 exceptionShufMask; /**< exception byte shuffle mask */ \
|
||||||
|
m512 exceptionBitMask; /**< exception bit mask */ \
|
||||||
|
m512 exceptionAndMask; /**< exception and mask */ \
|
||||||
};
|
};
|
||||||
|
|
||||||
CREATE_NFA_LIMEX(32)
|
CREATE_NFA_LIMEX(32)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2018, Intel Corporation
|
* Copyright (c) 2015-2021, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -1082,7 +1082,9 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
|
|||||||
// Use the daddy already set for this state so long as it isn't already
|
// Use the daddy already set for this state so long as it isn't already
|
||||||
// a Sherman state.
|
// a Sherman state.
|
||||||
dstate_id_t daddy = currState.daddy;
|
dstate_id_t daddy = currState.daddy;
|
||||||
if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) {
|
if (info.is_widestate(daddy)) {
|
||||||
|
return;
|
||||||
|
} else if (!info.is_sherman(daddy)) {
|
||||||
hinted.insert(currState.daddy);
|
hinted.insert(currState.daddy);
|
||||||
} else {
|
} else {
|
||||||
// Fall back to granddaddy, which has already been processed (due
|
// Fall back to granddaddy, which has already been processed (due
|
||||||
@ -1477,6 +1479,7 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
|||||||
|
|
||||||
bytecode_ptr<NFA> nfa;
|
bytecode_ptr<NFA> nfa;
|
||||||
if (!using8bit) {
|
if (!using8bit) {
|
||||||
|
// Wide state optimization
|
||||||
if (cc.grey.allowWideStates && strat.getType() == McClellan
|
if (cc.grey.allowWideStates && strat.getType() == McClellan
|
||||||
&& !is_triggered(raw.kind)) {
|
&& !is_triggered(raw.kind)) {
|
||||||
find_wide_state(info);
|
find_wide_state(info);
|
||||||
@ -1486,19 +1489,22 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
|||||||
bool any_cyclic_near_anchored_state
|
bool any_cyclic_near_anchored_state
|
||||||
= is_cyclic_near(raw, raw.start_anchored);
|
= is_cyclic_near(raw, raw.start_anchored);
|
||||||
|
|
||||||
for (u32 i = 0; i < info.size(); i++) {
|
// Sherman optimization
|
||||||
if (info.is_widestate(i)) {
|
if (info.impl_alpha_size > 16) {
|
||||||
continue;
|
for (u32 i = 0; i < info.size(); i++) {
|
||||||
|
if (info.is_widestate(i)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
find_better_daddy(info, i, using8bit,
|
||||||
|
any_cyclic_near_anchored_state,
|
||||||
|
trust_daddy_states, cc.grey);
|
||||||
|
total_daddy += info.extra[i].daddytaken;
|
||||||
}
|
}
|
||||||
find_better_daddy(info, i, using8bit,
|
|
||||||
any_cyclic_near_anchored_state,
|
|
||||||
trust_daddy_states, cc.grey);
|
|
||||||
total_daddy += info.extra[i].daddytaken;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
||||||
info.size() * info.impl_alpha_size, info.size(),
|
info.size() * info.impl_alpha_size, info.size(),
|
||||||
info.impl_alpha_size);
|
info.impl_alpha_size);
|
||||||
|
}
|
||||||
|
|
||||||
nfa = mcclellanCompile16(info, cc, accel_states);
|
nfa = mcclellanCompile16(info, cc, accel_states);
|
||||||
} else {
|
} else {
|
||||||
|
1333
src/nfa/mcsheng.c
1333
src/nfa/mcsheng.c
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -80,5 +80,78 @@ char nfaExecMcSheng16_expandState(const struct NFA *nfa, void *dest,
|
|||||||
|
|
||||||
#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL
|
#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL
|
||||||
#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
/* 64-8 bit Sheng-McClellan hybrid */
|
||||||
|
char nfaExecMcSheng64_8_testEOD(const struct NFA *nfa, const char *state,
|
||||||
|
const char *streamState, u64a offset,
|
||||||
|
NfaCallback callback, void *context);
|
||||||
|
char nfaExecMcSheng64_8_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecMcSheng64_8_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecMcSheng64_8_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
|
char nfaExecMcSheng64_8_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecMcSheng64_8_inAccept(const struct NFA *n, ReportID report,
|
||||||
|
struct mq *q);
|
||||||
|
char nfaExecMcSheng64_8_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecMcSheng64_8_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecMcSheng64_8_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
|
void *state, u8 key);
|
||||||
|
char nfaExecMcSheng64_8_queueCompressState(const struct NFA *nfa,
|
||||||
|
const struct mq *q, s64a loc);
|
||||||
|
char nfaExecMcSheng64_8_expandState(const struct NFA *nfa, void *dest,
|
||||||
|
const void *src, u64a offset, u8 key);
|
||||||
|
|
||||||
|
#define nfaExecMcSheng64_8_B_Reverse NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
|
||||||
|
/* 64-16 bit Sheng-McClellan hybrid */
|
||||||
|
char nfaExecMcSheng64_16_testEOD(const struct NFA *nfa, const char *state,
|
||||||
|
const char *streamState, u64a offset,
|
||||||
|
NfaCallback callback, void *context);
|
||||||
|
char nfaExecMcSheng64_16_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecMcSheng64_16_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecMcSheng64_16_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
|
char nfaExecMcSheng64_16_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecMcSheng64_16_inAccept(const struct NFA *n, ReportID report,
|
||||||
|
struct mq *q);
|
||||||
|
char nfaExecMcSheng64_16_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecMcSheng64_16_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecMcSheng64_16_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
|
void *state, u8 key);
|
||||||
|
char nfaExecMcSheng64_16_queueCompressState(const struct NFA *nfa,
|
||||||
|
const struct mq *q, s64a loc);
|
||||||
|
char nfaExecMcSheng64_16_expandState(const struct NFA *nfa, void *dest,
|
||||||
|
const void *src, u64a offset, u8 key);
|
||||||
|
#define nfaExecMcSheng64_16_B_Reverse NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
#else // !HAVE_AVX512VBMI
|
||||||
|
#define nfaExecMcSheng64_8_B_Reverse NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_Q NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_Q2 NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_QR NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_inAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_inAnyAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_queueInitState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_queueCompressState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_expandState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_initCompressedState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_testEOD NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_8_reportCurrent NFA_API_NO_IMPL
|
||||||
|
|
||||||
|
#define nfaExecMcSheng64_16_B_Reverse NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_Q NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_Q2 NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_QR NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_inAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_inAnyAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_queueInitState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_queueCompressState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_expandState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_initCompressedState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_testEOD NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMcSheng64_16_reportCurrent NFA_API_NO_IMPL
|
||||||
|
|
||||||
|
#endif //end of HAVE_AVX512VBMI
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2017, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -64,7 +64,6 @@
|
|||||||
#include <set>
|
#include <set>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <boost/range/adaptor/map.hpp>
|
#include <boost/range/adaptor/map.hpp>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -244,6 +243,106 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
mstate_aux *getAux64(NFA *n, dstate_id_t i) {
|
||||||
|
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(n);
|
||||||
|
mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
|
||||||
|
|
||||||
|
mstate_aux *aux = aux_base + i;
|
||||||
|
assert((const char *)aux < (const char *)n + m->length);
|
||||||
|
return aux;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void createShuffleMasks64(mcsheng64 *m, const dfa_info &info,
|
||||||
|
dstate_id_t sheng_end,
|
||||||
|
const map<dstate_id_t, AccelScheme> &accel_escape_info) {
|
||||||
|
DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end);
|
||||||
|
assert(sheng_end > DEAD_STATE + 1);
|
||||||
|
assert(sheng_end <= sizeof(m512) + 1);
|
||||||
|
vector<array<u8, sizeof(m512)>> masks;
|
||||||
|
masks.resize(info.alpha_size);
|
||||||
|
/* -1 to avoid wasting a slot as we do not include dead state */
|
||||||
|
vector<dstate_id_t> raw_ids;
|
||||||
|
raw_ids.resize(sheng_end - 1);
|
||||||
|
for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) {
|
||||||
|
assert(info.implId(s)); /* should not map to DEAD_STATE */
|
||||||
|
if (info.is_sheng(s)) {
|
||||||
|
raw_ids[info.extra[s].sheng_id] = s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (u32 i = 0; i < info.alpha_size; i++) {
|
||||||
|
if (i == info.alpha_remap[TOP]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto &mask = masks[i];
|
||||||
|
assert(sizeof(mask) == sizeof(m512));
|
||||||
|
mask.fill(0);
|
||||||
|
|
||||||
|
for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) {
|
||||||
|
dstate_id_t raw_id = raw_ids[sheng_id];
|
||||||
|
dstate_id_t next_id = info.implId(info.states[raw_id].next[i]);
|
||||||
|
if (next_id == DEAD_STATE) {
|
||||||
|
next_id = sheng_end - 1;
|
||||||
|
} else if (next_id < sheng_end) {
|
||||||
|
next_id--;
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id);
|
||||||
|
mask[sheng_id] = verify_u8(next_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (u32 i = 0; i < N_CHARS; i++) {
|
||||||
|
assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
|
||||||
|
memcpy((u8 *)&m->sheng_succ_masks[i],
|
||||||
|
(u8 *)masks[info.alpha_remap[i]].data(), sizeof(m512));
|
||||||
|
}
|
||||||
|
m->sheng_end = sheng_end;
|
||||||
|
m->sheng_accel_limit = sheng_end - 1;
|
||||||
|
|
||||||
|
for (dstate_id_t s : raw_ids) {
|
||||||
|
if (contains(accel_escape_info, s)) {
|
||||||
|
LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void populateBasicInfo64(size_t state_size, const dfa_info &info,
|
||||||
|
u32 total_size, u32 aux_offset, u32 accel_offset,
|
||||||
|
u32 accel_count, ReportID arb, bool single, NFA *nfa) {
|
||||||
|
assert(state_size == sizeof(u16) || state_size == sizeof(u8));
|
||||||
|
|
||||||
|
nfa->length = total_size;
|
||||||
|
nfa->nPositions = info.states.size();
|
||||||
|
|
||||||
|
nfa->scratchStateSize = verify_u32(state_size);
|
||||||
|
nfa->streamStateSize = verify_u32(state_size);
|
||||||
|
|
||||||
|
if (state_size == sizeof(u8)) {
|
||||||
|
nfa->type = MCSHENG_64_NFA_8;
|
||||||
|
} else {
|
||||||
|
nfa->type = MCSHENG_64_NFA_16;
|
||||||
|
}
|
||||||
|
|
||||||
|
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
|
||||||
|
for (u32 i = 0; i < 256; i++) {
|
||||||
|
m->remap[i] = verify_u8(info.alpha_remap[i]);
|
||||||
|
}
|
||||||
|
m->alphaShift = info.getAlphaShift();
|
||||||
|
m->length = total_size;
|
||||||
|
m->aux_offset = aux_offset;
|
||||||
|
m->accel_offset = accel_offset;
|
||||||
|
m->arb_report = arb;
|
||||||
|
m->state_count = verify_u16(info.size());
|
||||||
|
m->start_anchored = info.implId(info.raw.start_anchored);
|
||||||
|
m->start_floating = info.implId(info.raw.start_floating);
|
||||||
|
m->has_accel = accel_count ? 1 : 0;
|
||||||
|
|
||||||
|
if (single) {
|
||||||
|
m->flags |= MCSHENG_FLAG_SINGLE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
size_t calcShermanRegionSize(const dfa_info &info) {
|
size_t calcShermanRegionSize(const dfa_info &info) {
|
||||||
size_t rv = 0;
|
size_t rv = 0;
|
||||||
@ -272,7 +371,7 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
|
|||||||
/* returns false on error */
|
/* returns false on error */
|
||||||
static
|
static
|
||||||
bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end,
|
bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end,
|
||||||
dstate_id_t *sherman_base) {
|
dstate_id_t *sherman_base) {
|
||||||
info.states[0].impl_id = 0; /* dead is always 0 */
|
info.states[0].impl_id = 0; /* dead is always 0 */
|
||||||
|
|
||||||
vector<dstate_id_t> norm;
|
vector<dstate_id_t> norm;
|
||||||
@ -382,6 +481,7 @@ CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define MAX_SHENG_STATES 16
|
#define MAX_SHENG_STATES 16
|
||||||
|
#define MAX_SHENG64_STATES 64
|
||||||
#define MAX_SHENG_LEAKINESS 0.05
|
#define MAX_SHENG_LEAKINESS 0.05
|
||||||
|
|
||||||
using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>;
|
using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>;
|
||||||
@ -435,7 +535,8 @@ double leakiness(const RdfaGraph &g, dfa_info &info,
|
|||||||
|
|
||||||
static
|
static
|
||||||
dstate_id_t find_sheng_states(dfa_info &info,
|
dstate_id_t find_sheng_states(dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accel_escape_info) {
|
map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||||
|
size_t max_sheng_states) {
|
||||||
RdfaGraph g(info.raw);
|
RdfaGraph g(info.raw);
|
||||||
auto cyclics = find_vertices_in_cycles(g);
|
auto cyclics = find_vertices_in_cycles(g);
|
||||||
|
|
||||||
@ -470,7 +571,7 @@ dstate_id_t find_sheng_states(dfa_info &info,
|
|||||||
flat_set<dstate_id_t> considered = { DEAD_STATE };
|
flat_set<dstate_id_t> considered = { DEAD_STATE };
|
||||||
bool seen_back_edge = false;
|
bool seen_back_edge = false;
|
||||||
while (!to_consider.empty()
|
while (!to_consider.empty()
|
||||||
&& sheng_states.size() < MAX_SHENG_STATES) {
|
&& sheng_states.size() < max_sheng_states) {
|
||||||
auto v = to_consider.front();
|
auto v = to_consider.front();
|
||||||
to_consider.pop_front();
|
to_consider.pop_front();
|
||||||
if (!considered.insert(g[v].index).second) {
|
if (!considered.insert(g[v].index).second) {
|
||||||
@ -616,6 +717,80 @@ void fill_in_succ_table_16(NFA *nfa, const dfa_info &info,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void fill_in_aux_info64(NFA *nfa, const dfa_info &info,
|
||||||
|
const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||||
|
u32 accel_offset, UNUSED u32 accel_end_offset,
|
||||||
|
const vector<u32> &reports,
|
||||||
|
const vector<u32> &reports_eod,
|
||||||
|
u32 report_base_offset,
|
||||||
|
const raw_report_info &ri) {
|
||||||
|
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
|
||||||
|
|
||||||
|
vector<u32> reportOffsets;
|
||||||
|
|
||||||
|
ri.fillReportLists(nfa, report_base_offset, reportOffsets);
|
||||||
|
|
||||||
|
for (u32 i = 0; i < info.size(); i++) {
|
||||||
|
u16 impl_id = info.implId(i);
|
||||||
|
mstate_aux *this_aux = getAux64(nfa, impl_id);
|
||||||
|
|
||||||
|
fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets);
|
||||||
|
if (contains(accel_escape_info, i)) {
|
||||||
|
this_aux->accel_offset = accel_offset;
|
||||||
|
accel_offset += info.strat.accelSize();
|
||||||
|
assert(accel_offset <= accel_end_offset);
|
||||||
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
info.strat.buildAccel(i, accel_escape_info.at(i),
|
||||||
|
(void *)((char *)m + this_aux->accel_offset));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
u16 get_edge_flags64(NFA *nfa, dstate_id_t target_impl_id) {
|
||||||
|
mstate_aux *aux = getAux64(nfa, target_impl_id);
|
||||||
|
u16 flags = 0;
|
||||||
|
|
||||||
|
if (aux->accept) {
|
||||||
|
flags |= ACCEPT_FLAG;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->accel_offset) {
|
||||||
|
flags |= ACCEL_FLAG;
|
||||||
|
}
|
||||||
|
|
||||||
|
return flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void fill_in_succ_table_64_16(NFA *nfa, const dfa_info &info,
|
||||||
|
dstate_id_t sheng_end,
|
||||||
|
UNUSED dstate_id_t sherman_base) {
|
||||||
|
u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng64));
|
||||||
|
|
||||||
|
u8 alphaShift = info.getAlphaShift();
|
||||||
|
assert(alphaShift <= 8);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < info.size(); i++) {
|
||||||
|
if (!info.is_normal(i)) {
|
||||||
|
assert(info.implId(i) < sheng_end || info.is_sherman(i));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(info.implId(i) < sherman_base);
|
||||||
|
u16 normal_id = verify_u16(info.implId(i) - sheng_end);
|
||||||
|
|
||||||
|
for (size_t s = 0; s < info.impl_alpha_size; s++) {
|
||||||
|
dstate_id_t raw_succ = info.states[i].next[s];
|
||||||
|
u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s];
|
||||||
|
|
||||||
|
entry = info.implId(raw_succ);
|
||||||
|
entry |= get_edge_flags64(nfa, entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#define MAX_SHERMAN_LIST_LEN 8
|
#define MAX_SHERMAN_LIST_LEN 8
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -842,17 +1017,20 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
|
|
||||||
assert(info.getAlphaShift() <= 8);
|
assert(info.getAlphaShift() <= 8);
|
||||||
|
|
||||||
u16 total_daddy = 0;
|
// Sherman optimization
|
||||||
for (u32 i = 0; i < info.size(); i++) {
|
if (info.impl_alpha_size > 16) {
|
||||||
find_better_daddy(info, i,
|
u16 total_daddy = 0;
|
||||||
is_cyclic_near(info.raw, info.raw.start_anchored),
|
for (u32 i = 0; i < info.size(); i++) {
|
||||||
grey);
|
find_better_daddy(info, i,
|
||||||
total_daddy += info.extra[i].daddytaken;
|
is_cyclic_near(info.raw, info.raw.start_anchored),
|
||||||
}
|
grey);
|
||||||
|
total_daddy += info.extra[i].daddytaken;
|
||||||
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
||||||
info.size() * info.impl_alpha_size, info.size(),
|
info.size() * info.impl_alpha_size, info.size(),
|
||||||
info.impl_alpha_size);
|
info.impl_alpha_size);
|
||||||
|
}
|
||||||
|
|
||||||
u16 sherman_limit;
|
u16 sherman_limit;
|
||||||
if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
|
if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
|
||||||
@ -931,6 +1109,160 @@ void fill_in_succ_table_8(NFA *nfa, const dfa_info &info,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
||||||
|
char *nfa_base = (char *)nfa;
|
||||||
|
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa);
|
||||||
|
char *sherman_table = nfa_base + m->sherman_offset;
|
||||||
|
|
||||||
|
assert(ISALIGNED_16(sherman_table));
|
||||||
|
for (size_t i = 0; i < info.size(); i++) {
|
||||||
|
if (!info.is_sherman(i)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u16 fs = verify_u16(info.implId(i));
|
||||||
|
DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs);
|
||||||
|
|
||||||
|
assert(fs >= sherman_limit);
|
||||||
|
|
||||||
|
char *curr_sherman_entry
|
||||||
|
= sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE;
|
||||||
|
assert(curr_sherman_entry <= nfa_base + m->length);
|
||||||
|
|
||||||
|
u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken);
|
||||||
|
assert(len <= 9);
|
||||||
|
dstate_id_t d = info.states[i].daddy;
|
||||||
|
|
||||||
|
*(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
|
||||||
|
*(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
|
||||||
|
*(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
|
||||||
|
u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
|
||||||
|
|
||||||
|
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
||||||
|
if (info.states[i].next[s] != info.states[d].next[s]) {
|
||||||
|
*(chars++) = (u8)s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
|
||||||
|
for (u16 s = 0; s < info.impl_alpha_size; s++) {
|
||||||
|
if (info.states[i].next[s] != info.states[d].next[s]) {
|
||||||
|
DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs,
|
||||||
|
info.implId(d),
|
||||||
|
info.implId(info.states[i].next[s]));
|
||||||
|
u16 entry_val = info.implId(info.states[i].next[s]);
|
||||||
|
entry_val |= get_edge_flags64(nfa, entry_val);
|
||||||
|
unaligned_store_u16((u8 *)states++, entry_val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end,
|
||||||
|
const map<dstate_id_t, AccelScheme>&accel_escape_info,
|
||||||
|
const Grey &grey) {
|
||||||
|
DEBUG_PRINTF("building mcsheng 64-16\n");
|
||||||
|
|
||||||
|
vector<u32> reports; /* index in ri for the appropriate report list */
|
||||||
|
vector<u32> reports_eod; /* as above */
|
||||||
|
ReportID arb;
|
||||||
|
u8 single;
|
||||||
|
|
||||||
|
assert(info.getAlphaShift() <= 8);
|
||||||
|
|
||||||
|
// Sherman optimization
|
||||||
|
if (info.impl_alpha_size > 16) {
|
||||||
|
u16 total_daddy = 0;
|
||||||
|
for (u32 i = 0; i < info.size(); i++) {
|
||||||
|
find_better_daddy(info, i,
|
||||||
|
is_cyclic_near(info.raw, info.raw.start_anchored),
|
||||||
|
grey);
|
||||||
|
total_daddy += info.extra[i].daddytaken;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
||||||
|
info.size() * info.impl_alpha_size, info.size(),
|
||||||
|
info.impl_alpha_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 sherman_limit;
|
||||||
|
if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
|
||||||
|
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
|
||||||
|
info.size());
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
u16 count_real_states = sherman_limit - sheng_end;
|
||||||
|
|
||||||
|
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
||||||
|
|
||||||
|
size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16)
|
||||||
|
* count_real_states;
|
||||||
|
|
||||||
|
size_t aux_size = sizeof(mstate_aux) * info.size();
|
||||||
|
|
||||||
|
size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng64) + tran_size);
|
||||||
|
size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
|
||||||
|
size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
|
||||||
|
+ ri->getReportListSize(), 32);
|
||||||
|
size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
|
||||||
|
size_t sherman_size = calcShermanRegionSize(info);
|
||||||
|
|
||||||
|
size_t total_size = sherman_offset + sherman_size;
|
||||||
|
|
||||||
|
accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
|
||||||
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
|
||||||
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
|
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get());
|
||||||
|
|
||||||
|
populateBasicInfo64(sizeof(u16), info, total_size, aux_offset, accel_offset,
|
||||||
|
accel_escape_info.size(), arb, single, nfa.get());
|
||||||
|
createShuffleMasks64(m, info, sheng_end, accel_escape_info);
|
||||||
|
|
||||||
|
/* copy in the mc header information */
|
||||||
|
m->sherman_offset = sherman_offset;
|
||||||
|
m->sherman_end = total_size;
|
||||||
|
m->sherman_limit = sherman_limit;
|
||||||
|
|
||||||
|
DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end,
|
||||||
|
count_real_states, info.size());
|
||||||
|
|
||||||
|
fill_in_aux_info64(nfa.get(), info, accel_escape_info, accel_offset,
|
||||||
|
sherman_offset - sizeof(NFA), reports, reports_eod,
|
||||||
|
aux_offset + aux_size, *ri);
|
||||||
|
|
||||||
|
fill_in_succ_table_64_16(nfa.get(), info, sheng_end, sherman_limit);
|
||||||
|
|
||||||
|
fill_in_sherman64(nfa.get(), info, sherman_limit);
|
||||||
|
|
||||||
|
return nfa;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void fill_in_succ_table_64_8(NFA *nfa, const dfa_info &info,
|
||||||
|
dstate_id_t sheng_end) {
|
||||||
|
u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng64);
|
||||||
|
|
||||||
|
u8 alphaShift = info.getAlphaShift();
|
||||||
|
assert(alphaShift <= 8);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < info.size(); i++) {
|
||||||
|
assert(!info.is_sherman(i));
|
||||||
|
if (!info.is_normal(i)) {
|
||||||
|
assert(info.implId(i) < sheng_end);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u8 normal_id = verify_u8(info.implId(i) - sheng_end);
|
||||||
|
|
||||||
|
for (size_t s = 0; s < info.impl_alpha_size; s++) {
|
||||||
|
dstate_id_t raw_succ = info.states[i].next[s];
|
||||||
|
succ_table[((size_t)normal_id << alphaShift) + s]
|
||||||
|
= info.implId(raw_succ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void allocateImplId8(dfa_info &info, dstate_id_t sheng_end,
|
void allocateImplId8(dfa_info &info, dstate_id_t sheng_end,
|
||||||
const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||||
@ -1028,6 +1360,58 @@ bytecode_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
return nfa;
|
return nfa;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bytecode_ptr<NFA> mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end,
|
||||||
|
const map<dstate_id_t, AccelScheme> &accel_escape_info) {
|
||||||
|
DEBUG_PRINTF("building mcsheng 64-8\n");
|
||||||
|
|
||||||
|
vector<u32> reports;
|
||||||
|
vector<u32> reports_eod;
|
||||||
|
ReportID arb;
|
||||||
|
u8 single;
|
||||||
|
|
||||||
|
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
||||||
|
|
||||||
|
size_t normal_count = info.size() - sheng_end;
|
||||||
|
|
||||||
|
size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count;
|
||||||
|
size_t aux_size = sizeof(mstate_aux) * info.size();
|
||||||
|
size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng64) + tran_size);
|
||||||
|
size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
|
||||||
|
size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
|
||||||
|
+ ri->getReportListSize(), 32);
|
||||||
|
size_t total_size = accel_offset + accel_size;
|
||||||
|
|
||||||
|
DEBUG_PRINTF("aux_size %zu\n", aux_size);
|
||||||
|
DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
|
||||||
|
DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
|
||||||
|
DEBUG_PRINTF("accel_size %zu\n", accel_size);
|
||||||
|
DEBUG_PRINTF("accel_offset %zu\n", accel_offset);
|
||||||
|
DEBUG_PRINTF("total_size %zu\n", total_size);
|
||||||
|
|
||||||
|
accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
|
||||||
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
|
||||||
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
|
mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get());
|
||||||
|
|
||||||
|
allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
|
||||||
|
&m->accept_limit_8);
|
||||||
|
|
||||||
|
populateBasicInfo64(sizeof(u8), info, total_size, aux_offset, accel_offset,
|
||||||
|
accel_escape_info.size(), arb, single, nfa.get());
|
||||||
|
createShuffleMasks64(m, info, sheng_end, accel_escape_info);
|
||||||
|
|
||||||
|
fill_in_aux_info64(nfa.get(), info, accel_escape_info, accel_offset,
|
||||||
|
total_size - sizeof(NFA), reports, reports_eod,
|
||||||
|
aux_offset + aux_size, *ri);
|
||||||
|
|
||||||
|
fill_in_succ_table_64_8(nfa.get(), info, sheng_end);
|
||||||
|
DEBUG_PRINTF("rl size %zu\n", ri->size());
|
||||||
|
|
||||||
|
return nfa;
|
||||||
|
}
|
||||||
|
|
||||||
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||||
const ReportManager &rm) {
|
const ReportManager &rm) {
|
||||||
if (!cc.grey.allowMcSheng) {
|
if (!cc.grey.allowMcSheng) {
|
||||||
@ -1047,19 +1431,83 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
|
|
||||||
map<dstate_id_t, AccelScheme> accel_escape_info
|
map<dstate_id_t, AccelScheme> accel_escape_info
|
||||||
= info.strat.getAccelInfo(cc.grey);
|
= info.strat.getAccelInfo(cc.grey);
|
||||||
|
auto old_states = info.states;
|
||||||
|
dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info, MAX_SHENG_STATES);
|
||||||
|
|
||||||
dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info);
|
|
||||||
if (sheng_end <= DEAD_STATE + 1) {
|
if (sheng_end <= DEAD_STATE + 1) {
|
||||||
|
info.states = old_states;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
bytecode_ptr<NFA> nfa;
|
bytecode_ptr<NFA> nfa;
|
||||||
|
|
||||||
if (!using8bit) {
|
if (!using8bit) {
|
||||||
nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey);
|
nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey);
|
||||||
} else {
|
} else {
|
||||||
nfa = mcshengCompile8(info, sheng_end, accel_escape_info);
|
nfa = mcshengCompile8(info, sheng_end, accel_escape_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!nfa) {
|
||||||
|
info.states = old_states;
|
||||||
|
return nfa;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_eod_reports) {
|
||||||
|
nfa->flags |= NFA_ACCEPTS_EOD;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("compile done\n");
|
||||||
|
return nfa;
|
||||||
|
}
|
||||||
|
|
||||||
|
bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
|
||||||
|
const ReportManager &rm) {
|
||||||
|
if (!cc.grey.allowMcSheng) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!cc.target_info.has_avx512vbmi()) {
|
||||||
|
DEBUG_PRINTF("McSheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
mcclellan_build_strat mbs(raw, rm, false);
|
||||||
|
dfa_info info(mbs);
|
||||||
|
bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;
|
||||||
|
|
||||||
|
if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
|
||||||
|
* mode with our semantics */
|
||||||
|
raw.stripExtraEodReports();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool has_eod_reports = raw.hasEodReports();
|
||||||
|
|
||||||
|
map<dstate_id_t, AccelScheme> accel_escape_info
|
||||||
|
= info.strat.getAccelInfo(cc.grey);
|
||||||
|
bool using64state = false; /*default flag*/
|
||||||
|
dstate_id_t sheng_end64;
|
||||||
|
sheng_end64 = find_sheng_states(info, accel_escape_info, MAX_SHENG64_STATES);
|
||||||
|
|
||||||
|
if (sheng_end64 <= DEAD_STATE + 1) {
|
||||||
|
return nullptr;
|
||||||
|
} else {
|
||||||
|
using64state = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bytecode_ptr<NFA> nfa;
|
||||||
|
|
||||||
|
if (using64state) {
|
||||||
|
assert((sheng_end64 > 17) && (sheng_end64 <= 65));
|
||||||
|
if (!using8bit) {
|
||||||
|
nfa = mcsheng64Compile16(info, sheng_end64, accel_escape_info, cc.grey);
|
||||||
|
} else {
|
||||||
|
assert(using8bit);
|
||||||
|
nfa = mcsheng64Compile8(info, sheng_end64, accel_escape_info);
|
||||||
|
assert(nfa);
|
||||||
|
assert(nfa->type == MCSHENG_64_NFA_8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!nfa) {
|
if (!nfa) {
|
||||||
return nfa;
|
return nfa;
|
||||||
}
|
}
|
||||||
|
@ -42,7 +42,8 @@ struct raw_dfa;
|
|||||||
|
|
||||||
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||||
const ReportManager &rm);
|
const ReportManager &rm);
|
||||||
|
bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc,
|
||||||
|
const ReportManager &rm);
|
||||||
bool has_accel_mcsheng(const NFA *nfa);
|
bool has_accel_mcsheng(const NFA *nfa);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -41,3 +41,15 @@ const u64a mcsheng_pext_mask[8] = {
|
|||||||
0x00ff00000000000f,
|
0x00ff00000000000f,
|
||||||
0xff0000000000000f,
|
0xff0000000000000f,
|
||||||
};
|
};
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
const u64a mcsheng64_pext_mask[8] = {
|
||||||
|
0, /* dummy */
|
||||||
|
0x000000000000ff3f,
|
||||||
|
0x0000000000ff003f,
|
||||||
|
0x00000000ff00003f,
|
||||||
|
0x000000ff0000003f,
|
||||||
|
0x0000ff000000003f,
|
||||||
|
0x00ff00000000003f,
|
||||||
|
0xff0000000000003f,
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2017, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -174,6 +174,124 @@ void describeEdge(FILE *f, const mcsheng *m, const u16 *t, u16 i) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
const mstate_aux *getAux64(const NFA *n, dstate_id_t i) {
|
||||||
|
auto *m = (const mcsheng64 *)getImplNfa(n);
|
||||||
|
auto *aux_base = (const mstate_aux *)((const char *)n + m->aux_offset);
|
||||||
|
|
||||||
|
const mstate_aux *aux = aux_base + i;
|
||||||
|
|
||||||
|
assert((const char *)aux < (const char *)n + m->length);
|
||||||
|
return aux;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void next_states64(const NFA *n, u16 s, u16 *t) {
|
||||||
|
const mcsheng64 *m = (const mcsheng64 *)getImplNfa(n);
|
||||||
|
const mstate_aux *aux = getAux64(n, s);
|
||||||
|
const u32 as = m->alphaShift;
|
||||||
|
assert(s != DEAD_STATE);
|
||||||
|
|
||||||
|
if (s < m->sheng_end) {
|
||||||
|
for (u16 c = 0; c < N_CHARS; c++) {
|
||||||
|
u8 sheng_s = s - 1;
|
||||||
|
auto trans_for_c = (const char *)&m->sheng_succ_masks[c];
|
||||||
|
assert(sheng_s < sizeof(m512));
|
||||||
|
u8 raw_succ = trans_for_c[sheng_s];
|
||||||
|
if (raw_succ == m->sheng_end - 1) {
|
||||||
|
t[c] = DEAD_STATE;
|
||||||
|
} else if (raw_succ < m->sheng_end) {
|
||||||
|
t[c] = raw_succ + 1;
|
||||||
|
} else {
|
||||||
|
t[c] = raw_succ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (n->type == MCSHENG_64_NFA_8) {
|
||||||
|
const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcsheng64));
|
||||||
|
for (u16 c = 0; c < N_CHARS; c++) {
|
||||||
|
u32 normal_id = s - m->sheng_end;
|
||||||
|
t[c] = succ_table[(normal_id << as) + m->remap[c]];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
u16 base_s = s;
|
||||||
|
const char *winfo_base = (const char *)n + m->sherman_offset;
|
||||||
|
const char *state_base
|
||||||
|
= winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit);
|
||||||
|
|
||||||
|
if (s >= m->sherman_limit) {
|
||||||
|
base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET);
|
||||||
|
assert(base_s >= m->sheng_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
const u16 *succ_table = (const u16 *)((const char *)m
|
||||||
|
+ sizeof(mcsheng64));
|
||||||
|
for (u16 c = 0; c < N_CHARS; c++) {
|
||||||
|
u32 normal_id = base_s - m->sheng_end;
|
||||||
|
t[c] = succ_table[(normal_id << as) + m->remap[c]];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s >= m->sherman_limit) {
|
||||||
|
UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET);
|
||||||
|
assert(type == SHERMAN_STATE);
|
||||||
|
u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base);
|
||||||
|
const char *chars = state_base + SHERMAN_CHARS_OFFSET;
|
||||||
|
const u16 *states = (const u16 *)(state_base
|
||||||
|
+ SHERMAN_STATES_OFFSET(len));
|
||||||
|
|
||||||
|
for (u8 i = 0; i < len; i++) {
|
||||||
|
for (u16 c = 0; c < N_CHARS; c++) {
|
||||||
|
if (m->remap[c] == chars[i]) {
|
||||||
|
t[c] = unaligned_load_u16((const u8*)&states[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (u16 c = 0; c < N_CHARS; c++) {
|
||||||
|
t[c] &= STATE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
t[TOP] = aux->top & STATE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void describeEdge64(FILE *f, const mcsheng64 *m, const u16 *t, u16 i) {
|
||||||
|
for (u16 s = 0; s < N_CHARS; s++) {
|
||||||
|
if (!t[s]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 ss;
|
||||||
|
for (ss = 0; ss < s; ss++) {
|
||||||
|
if (t[s] == t[ss]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ss != s) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
CharReach reach;
|
||||||
|
for (ss = s; ss < 256; ss++) {
|
||||||
|
if (t[s] == t[ss]) {
|
||||||
|
reach.set(ss);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "%u -> %u [ ", i, t[s]);
|
||||||
|
if (i < m->sheng_end && t[s] < m->sheng_end) {
|
||||||
|
fprintf(f, "color = red, fontcolor = red ");
|
||||||
|
}
|
||||||
|
fprintf(f, "label = \"");
|
||||||
|
describeClass(f, reach, 5, CC_OUT_DOT);
|
||||||
|
|
||||||
|
fprintf(f, "\" ];\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel) {
|
void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel) {
|
||||||
switch(accel->accel_type) {
|
switch(accel->accel_type) {
|
||||||
@ -256,6 +374,66 @@ void describeNode(const NFA *n, const mcsheng *m, u16 i, FILE *f) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void describeNode64(const NFA *n, const mcsheng64 *m, u16 i, FILE *f) {
|
||||||
|
const mstate_aux *aux = getAux64(n, i);
|
||||||
|
|
||||||
|
bool isSherman = m->sherman_limit && i >= m->sherman_limit;
|
||||||
|
|
||||||
|
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
|
||||||
|
"label = \"%u%s\" ]; \n", i, i, isSherman ? "w":"");
|
||||||
|
|
||||||
|
if (aux->accel_offset) {
|
||||||
|
dumpAccelDot(f, i, (const union AccelAux *)
|
||||||
|
((const char *)m + aux->accel_offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i && i < m->sheng_end) {
|
||||||
|
fprintf(f, "%u [color = red, fontcolor = red]; \n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->accept_eod) {
|
||||||
|
fprintf(f, "%u [ color = darkorchid ];\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->accept) {
|
||||||
|
fprintf(f, "%u [ shape = doublecircle ];\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->top && aux->top != i) {
|
||||||
|
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
|
||||||
|
aux->top);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == m->start_anchored) {
|
||||||
|
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == m->start_floating) {
|
||||||
|
fprintf(f, "STARTF -> %u [color = red ]\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isSherman) {
|
||||||
|
const char *winfo_base = (const char *)n + m->sherman_offset;
|
||||||
|
const char *state_base
|
||||||
|
= winfo_base + SHERMAN_FIXED_SIZE * (i - m->sherman_limit);
|
||||||
|
assert(state_base < (const char *)m + m->length - sizeof(NFA));
|
||||||
|
UNUSED u8 type = *(const u8 *)(state_base + SHERMAN_TYPE_OFFSET);
|
||||||
|
assert(type == SHERMAN_STATE);
|
||||||
|
fprintf(f, "%u [ fillcolor = lightblue style=filled ];\n", i);
|
||||||
|
u16 daddy = *(const u16 *)(state_base + SHERMAN_DADDY_OFFSET);
|
||||||
|
if (daddy) {
|
||||||
|
fprintf(f, "%u -> %u [ color=royalblue style=dashed weight=0.1]\n",
|
||||||
|
i, daddy);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i && i < m->sheng_end) {
|
||||||
|
fprintf(f, "subgraph cluster_sheng { %u } \n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void dumpDotPreambleDfa(FILE *f) {
|
void dumpDotPreambleDfa(FILE *f) {
|
||||||
dumpDotPreamble(f);
|
dumpDotPreamble(f);
|
||||||
@ -392,6 +570,131 @@ void dump_text_8(const NFA *nfa, FILE *f) {
|
|||||||
dumpTextReverse(nfa, f);
|
dumpTextReverse(nfa, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dump64_dot_16(const NFA *nfa, FILE *f) {
|
||||||
|
auto *m = (const mcsheng64 *)getImplNfa(nfa);
|
||||||
|
|
||||||
|
dumpDotPreambleDfa(f);
|
||||||
|
|
||||||
|
for (u16 i = 1; i < m->state_count; i++) {
|
||||||
|
describeNode64(nfa, m, i, f);
|
||||||
|
|
||||||
|
u16 t[ALPHABET_SIZE];
|
||||||
|
|
||||||
|
next_states64(nfa, i, t);
|
||||||
|
|
||||||
|
describeEdge64(f, m, t, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "}\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dump64_dot_8(const NFA *nfa, FILE *f) {
|
||||||
|
auto m = (const mcsheng64 *)getImplNfa(nfa);
|
||||||
|
|
||||||
|
dumpDotPreambleDfa(f);
|
||||||
|
|
||||||
|
for (u16 i = 1; i < m->state_count; i++) {
|
||||||
|
describeNode64(nfa, m, i, f);
|
||||||
|
|
||||||
|
u16 t[ALPHABET_SIZE];
|
||||||
|
|
||||||
|
next_states64(nfa, i, t);
|
||||||
|
|
||||||
|
describeEdge64(f, m, t, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "}\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpAccelMasks64(FILE *f, const mcsheng64 *m, const mstate_aux *aux) {
|
||||||
|
fprintf(f, "\n");
|
||||||
|
fprintf(f, "Acceleration\n");
|
||||||
|
fprintf(f, "------------\n");
|
||||||
|
|
||||||
|
for (u16 i = 0; i < m->state_count; i++) {
|
||||||
|
if (!aux[i].accel_offset) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto accel = (const AccelAux *)((const char *)m + aux[i].accel_offset);
|
||||||
|
fprintf(f, "%05hu ", i);
|
||||||
|
dumpAccelInfo(f, *accel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void describeAlphabet64(FILE *f, const mcsheng64 *m) {
|
||||||
|
map<u8, CharReach> rev;
|
||||||
|
|
||||||
|
for (u16 i = 0; i < N_CHARS; i++) {
|
||||||
|
rev[m->remap[i]].clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (u16 i = 0; i < N_CHARS; i++) {
|
||||||
|
rev[m->remap[i]].set(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
map<u8, CharReach>::const_iterator it;
|
||||||
|
fprintf(f, "\nAlphabet\n");
|
||||||
|
for (it = rev.begin(); it != rev.end(); ++it) {
|
||||||
|
fprintf(f, "%3hhu: ", it->first);
|
||||||
|
describeClass(f, it->second, 10240, CC_OUT_TEXT);
|
||||||
|
fprintf(f, "\n");
|
||||||
|
}
|
||||||
|
fprintf(f, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpCommonHeader64(FILE *f, const mcsheng64 *m) {
|
||||||
|
fprintf(f, "report: %u, states: %u, length: %u\n", m->arb_report,
|
||||||
|
m->state_count, m->length);
|
||||||
|
fprintf(f, "astart: %hu, fstart: %hu\n", m->start_anchored,
|
||||||
|
m->start_floating);
|
||||||
|
fprintf(f, "single accept: %d, has_accel: %d\n",
|
||||||
|
!!(int)m->flags & MCSHENG_FLAG_SINGLE, m->has_accel);
|
||||||
|
fprintf(f, "sheng_end: %hu\n", m->sheng_end);
|
||||||
|
fprintf(f, "sheng_accel_limit: %hu\n", m->sheng_accel_limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dump64_text_8(const NFA *nfa, FILE *f) {
|
||||||
|
auto m = (const mcsheng64 *)getImplNfa(nfa);
|
||||||
|
auto aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);
|
||||||
|
|
||||||
|
fprintf(f, "mcsheng 64-8\n");
|
||||||
|
dumpCommonHeader64(f, m);
|
||||||
|
fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8,
|
||||||
|
m->accept_limit_8);
|
||||||
|
fprintf(f, "\n");
|
||||||
|
|
||||||
|
describeAlphabet64(f, m);
|
||||||
|
dumpAccelMasks64(f, m, aux);
|
||||||
|
|
||||||
|
fprintf(f, "\n");
|
||||||
|
dumpTextReverse(nfa, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dump64_text_16(const NFA *nfa, FILE *f) {
|
||||||
|
auto *m = (const mcsheng64 *)getImplNfa(nfa);
|
||||||
|
auto *aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);
|
||||||
|
|
||||||
|
fprintf(f, "mcsheng 64-16\n");
|
||||||
|
dumpCommonHeader64(f, m);
|
||||||
|
fprintf(f, "sherman_limit: %d, sherman_end: %d\n", (int)m->sherman_limit,
|
||||||
|
(int)m->sherman_end);
|
||||||
|
fprintf(f, "\n");
|
||||||
|
|
||||||
|
describeAlphabet64(f, m);
|
||||||
|
dumpAccelMasks64(f, m, aux);
|
||||||
|
|
||||||
|
fprintf(f, "\n");
|
||||||
|
dumpTextReverse(nfa, f);
|
||||||
|
}
|
||||||
|
|
||||||
void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) {
|
void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) {
|
||||||
assert(nfa->type == MCSHENG_NFA_16);
|
assert(nfa->type == MCSHENG_NFA_16);
|
||||||
dump_text_16(nfa, StdioFile(base + ".txt", "w"));
|
dump_text_16(nfa, StdioFile(base + ".txt", "w"));
|
||||||
@ -404,4 +707,16 @@ void nfaExecMcSheng8_dump(const NFA *nfa, const string &base) {
|
|||||||
dump_dot_8(nfa, StdioFile(base + ".dot", "w"));
|
dump_dot_8(nfa, StdioFile(base + ".dot", "w"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void nfaExecMcSheng64_16_dump(UNUSED const NFA *nfa, UNUSED const string &base) {
|
||||||
|
assert(nfa->type == MCSHENG_64_NFA_16);
|
||||||
|
dump64_text_16(nfa, StdioFile(base + ".txt", "w"));
|
||||||
|
dump64_dot_16(nfa, StdioFile(base + ".dot", "w"));
|
||||||
|
}
|
||||||
|
|
||||||
|
void nfaExecMcSheng64_8_dump(UNUSED const NFA *nfa, UNUSED const string &base) {
|
||||||
|
assert(nfa->type == MCSHENG_64_NFA_8);
|
||||||
|
dump64_text_8(nfa, StdioFile(base + ".txt", "w"));
|
||||||
|
dump64_dot_8(nfa, StdioFile(base + ".dot", "w"));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -42,7 +42,8 @@ namespace ue2 {
|
|||||||
|
|
||||||
void nfaExecMcSheng8_dump(const struct NFA *nfa, const std::string &base);
|
void nfaExecMcSheng8_dump(const struct NFA *nfa, const std::string &base);
|
||||||
void nfaExecMcSheng16_dump(const struct NFA *nfa, const std::string &base);
|
void nfaExecMcSheng16_dump(const struct NFA *nfa, const std::string &base);
|
||||||
|
void nfaExecMcSheng64_8_dump(const struct NFA *nfa, const std::string &base);
|
||||||
|
void nfaExecMcSheng64_16_dump(const struct NFA *nfa, const std::string &base);
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
#endif // DUMP_SUPPORT
|
#endif // DUMP_SUPPORT
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2018, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -92,4 +92,33 @@ struct mcsheng {
|
|||||||
* representing the data from a u64a. */
|
* representing the data from a u64a. */
|
||||||
extern const u64a mcsheng_pext_mask[8];
|
extern const u64a mcsheng_pext_mask[8];
|
||||||
|
|
||||||
|
struct mcsheng64 {
|
||||||
|
u16 state_count; /**< total number of states */
|
||||||
|
u32 length; /**< length of dfa in bytes */
|
||||||
|
u16 start_anchored; /**< anchored start state */
|
||||||
|
u16 start_floating; /**< floating start state */
|
||||||
|
u32 aux_offset; /**< offset of the aux structures relative to the start of
|
||||||
|
* the nfa structure */
|
||||||
|
u32 sherman_offset; /**< offset of array of sherman state offsets the
|
||||||
|
* state_info structures relative to the start of the
|
||||||
|
* nfa structure */
|
||||||
|
u32 sherman_end; /**< offset of the end of the state_info structures
|
||||||
|
* relative to the start of the nfa structure */
|
||||||
|
u16 sheng_end; /**< first non-sheng state */
|
||||||
|
u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of
|
||||||
|
* internal sheng ids */
|
||||||
|
u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
|
||||||
|
u16 accept_limit_8; /**< 8 bit, lowest accept state */
|
||||||
|
u16 sherman_limit; /**< lowest sherman state */
|
||||||
|
u8 alphaShift;
|
||||||
|
u8 flags;
|
||||||
|
u8 has_accel; /**< 1 iff there are any accel plans */
|
||||||
|
u8 remap[256]; /**< remaps characters to a smaller alphabet */
|
||||||
|
ReportID arb_report; /**< one of the accepts that this dfa may raise */
|
||||||
|
u32 accel_offset; /**< offset of accel structures from start of McClellan */
|
||||||
|
m512 sheng_succ_masks[N_CHARS];
|
||||||
|
};
|
||||||
|
|
||||||
|
extern const u64a mcsheng64_pext_mask[8];
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -76,6 +76,10 @@
|
|||||||
DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \
|
DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \
|
||||||
DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \
|
DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \
|
||||||
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
|
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
|
||||||
|
DISPATCH_CASE(SHENG_NFA_32, Sheng32, dbnt_func); \
|
||||||
|
DISPATCH_CASE(SHENG_NFA_64, Sheng64, dbnt_func); \
|
||||||
|
DISPATCH_CASE(MCSHENG_64_NFA_8, McSheng64_8, dbnt_func); \
|
||||||
|
DISPATCH_CASE(MCSHENG_64_NFA_16, McSheng64_16, dbnt_func); \
|
||||||
default: \
|
default: \
|
||||||
assert(0); \
|
assert(0); \
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -181,7 +181,6 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER};
|
|||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts; \
|
static const nfa_dispatch_fn has_repeats_other_than_firsts; \
|
||||||
static const u32 stateAlign = \
|
static const u32 stateAlign = \
|
||||||
MAX(mlt_align, alignof(RepeatControl)); \
|
MAX(mlt_align, alignof(RepeatControl)); \
|
||||||
static const bool fast = mlt_size <= 64; \
|
|
||||||
}; \
|
}; \
|
||||||
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_accel \
|
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_accel \
|
||||||
= has_accel_limex<LimExNFA##mlt_size>; \
|
= has_accel_limex<LimExNFA##mlt_size>; \
|
||||||
@ -210,7 +209,6 @@ template<> struct NFATraits<MCCLELLAN_NFA_8> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 1;
|
static const u32 stateAlign = 1;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -226,7 +224,6 @@ template<> struct NFATraits<MCCLELLAN_NFA_16> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 2;
|
static const u32 stateAlign = 2;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -242,7 +239,6 @@ template<> struct NFATraits<GOUGH_NFA_8> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -258,7 +254,6 @@ template<> struct NFATraits<GOUGH_NFA_16> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -274,7 +269,6 @@ template<> struct NFATraits<MPV_NFA> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -290,7 +284,6 @@ template<> struct NFATraits<CASTLE_NFA> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -306,7 +299,6 @@ template<> struct NFATraits<LBR_NFA_DOT> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -322,7 +314,6 @@ template<> struct NFATraits<LBR_NFA_VERM> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -338,7 +329,6 @@ template<> struct NFATraits<LBR_NFA_NVERM> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -354,7 +344,6 @@ template<> struct NFATraits<LBR_NFA_SHUF> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -370,7 +359,6 @@ template<> struct NFATraits<LBR_NFA_TRUF> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -386,7 +374,6 @@ template<> struct NFATraits<SHENG_NFA> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 1;
|
static const u32 stateAlign = 1;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -402,7 +389,6 @@ template<> struct NFATraits<TAMARAMA_NFA> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 64;
|
static const u32 stateAlign = 64;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -418,7 +404,6 @@ template<> struct NFATraits<MCSHENG_NFA_8> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 1;
|
static const u32 stateAlign = 1;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -434,7 +419,6 @@ template<> struct NFATraits<MCSHENG_NFA_16> {
|
|||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 2;
|
static const u32 stateAlign = 2;
|
||||||
static const bool fast = true;
|
|
||||||
static const nfa_dispatch_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
static const nfa_dispatch_fn has_repeats;
|
static const nfa_dispatch_fn has_repeats;
|
||||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
@ -446,6 +430,65 @@ const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats_other_than_firsts =
|
|||||||
const char *NFATraits<MCSHENG_NFA_16>::name = "Shengy McShengFace 16";
|
const char *NFATraits<MCSHENG_NFA_16>::name = "Shengy McShengFace 16";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template<> struct NFATraits<SHENG_NFA_32> {
|
||||||
|
UNUSED static const char *name;
|
||||||
|
static const NFACategory category = NFA_OTHER;
|
||||||
|
static const u32 stateAlign = 1;
|
||||||
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
|
};
|
||||||
|
const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_accel = has_accel_sheng;
|
||||||
|
const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
|
#if defined(DUMP_SUPPORT)
|
||||||
|
const char *NFATraits<SHENG_NFA_32>::name = "Sheng 32";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<> struct NFATraits<SHENG_NFA_64> {
|
||||||
|
UNUSED static const char *name;
|
||||||
|
static const NFACategory category = NFA_OTHER;
|
||||||
|
static const u32 stateAlign = 1;
|
||||||
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
|
};
|
||||||
|
const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_accel = has_accel_sheng;
|
||||||
|
const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
|
#if defined(DUMP_SUPPORT)
|
||||||
|
const char *NFATraits<SHENG_NFA_64>::name = "Sheng 64";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<> struct NFATraits<MCSHENG_64_NFA_8> {
|
||||||
|
UNUSED static const char *name;
|
||||||
|
static const NFACategory category = NFA_OTHER;
|
||||||
|
static const u32 stateAlign = 1;
|
||||||
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
|
};
|
||||||
|
const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_accel = has_accel_mcsheng;
|
||||||
|
const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
|
#if defined(DUMP_SUPPORT)
|
||||||
|
const char *NFATraits<MCSHENG_64_NFA_8>::name = "Shengy64 McShengFace 8";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<> struct NFATraits<MCSHENG_64_NFA_16> {
|
||||||
|
UNUSED static const char *name;
|
||||||
|
static const NFACategory category = NFA_OTHER;
|
||||||
|
static const u32 stateAlign = 2;
|
||||||
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
|
};
|
||||||
|
const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_accel = has_accel_mcsheng;
|
||||||
|
const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
|
#if defined(DUMP_SUPPORT)
|
||||||
|
const char *NFATraits<MCSHENG_64_NFA_16>::name = "Shengy64 McShengFace 16";
|
||||||
|
#endif
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
@ -473,20 +516,6 @@ u32 state_alignment(const NFA &nfa) {
|
|||||||
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, getStateAlign, nullptr);
|
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, getStateAlign, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
|
||||||
template<NFAEngineType t>
|
|
||||||
struct getFastness {
|
|
||||||
static u32 call(void *) {
|
|
||||||
return NFATraits<t>::fast;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_fast(const NFA &nfa) {
|
|
||||||
NFAEngineType t = (NFAEngineType)nfa.type;
|
|
||||||
return DISPATCH_BY_NFA_TYPE(t, getFastness, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
template<NFAEngineType t>
|
template<NFAEngineType t>
|
||||||
struct is_limex {
|
struct is_limex {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -47,10 +47,6 @@ std::string describe(const NFA &nfa);
|
|||||||
// For a given NFA, retrieve the alignment required by its uncompressed state.
|
// For a given NFA, retrieve the alignment required by its uncompressed state.
|
||||||
u32 state_alignment(const NFA &nfa);
|
u32 state_alignment(const NFA &nfa);
|
||||||
|
|
||||||
/* returns true if the nfa is considered 'fast'. TODO: work out what we mean by
|
|
||||||
* fast. */
|
|
||||||
bool is_fast(const NFA &n);
|
|
||||||
|
|
||||||
bool has_bounded_repeats_other_than_firsts(const NFA &n);
|
bool has_bounded_repeats_other_than_firsts(const NFA &n);
|
||||||
|
|
||||||
bool has_bounded_repeats(const NFA &n);
|
bool has_bounded_repeats(const NFA &n);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -81,6 +81,10 @@ namespace ue2 {
|
|||||||
DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \
|
DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \
|
||||||
DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \
|
DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \
|
||||||
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
|
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
|
||||||
|
DISPATCH_CASE(SHENG_NFA_32, Sheng32, dbnt_func); \
|
||||||
|
DISPATCH_CASE(SHENG_NFA_64, Sheng64, dbnt_func); \
|
||||||
|
DISPATCH_CASE(MCSHENG_64_NFA_8, McSheng64_8, dbnt_func); \
|
||||||
|
DISPATCH_CASE(MCSHENG_64_NFA_16, McSheng64_16, dbnt_func); \
|
||||||
default: \
|
default: \
|
||||||
assert(0); \
|
assert(0); \
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -72,6 +72,10 @@ enum NFAEngineType {
|
|||||||
TAMARAMA_NFA, /**< magic nfa container */
|
TAMARAMA_NFA, /**< magic nfa container */
|
||||||
MCSHENG_NFA_8, /**< magic pseudo nfa */
|
MCSHENG_NFA_8, /**< magic pseudo nfa */
|
||||||
MCSHENG_NFA_16, /**< magic pseudo nfa */
|
MCSHENG_NFA_16, /**< magic pseudo nfa */
|
||||||
|
SHENG_NFA_32, /**< magic pseudo nfa */
|
||||||
|
SHENG_NFA_64, /**< magic pseudo nfa */
|
||||||
|
MCSHENG_64_NFA_8, /**< magic pseudo nfa */
|
||||||
|
MCSHENG_64_NFA_16, /**< magic pseudo nfa */
|
||||||
/** \brief bogus NFA - not used */
|
/** \brief bogus NFA - not used */
|
||||||
INVALID_NFA
|
INVALID_NFA
|
||||||
};
|
};
|
||||||
@ -148,7 +152,8 @@ static really_inline int isMcClellanType(u8 t) {
|
|||||||
/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid
|
/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid
|
||||||
* DFA. */
|
* DFA. */
|
||||||
static really_inline int isShengMcClellanType(u8 t) {
|
static really_inline int isShengMcClellanType(u8 t) {
|
||||||
return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16;
|
return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16 ||
|
||||||
|
t == MCSHENG_64_NFA_8 || t == MCSHENG_64_NFA_16;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \brief True if the given type (from NFA::type) is a Gough DFA. */
|
/** \brief True if the given type (from NFA::type) is a Gough DFA. */
|
||||||
@ -157,10 +162,25 @@ static really_inline int isGoughType(u8 t) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** \brief True if the given type (from NFA::type) is a Sheng DFA. */
|
/** \brief True if the given type (from NFA::type) is a Sheng DFA. */
|
||||||
static really_inline int isShengType(u8 t) {
|
static really_inline int isSheng16Type(u8 t) {
|
||||||
return t == SHENG_NFA;
|
return t == SHENG_NFA;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \brief True if the given type (from NFA::type) is a Sheng32 DFA. */
|
||||||
|
static really_inline int isSheng32Type(u8 t) {
|
||||||
|
return t == SHENG_NFA_32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief True if the given type (from NFA::type) is a Sheng64 DFA. */
|
||||||
|
static really_inline int isSheng64Type(u8 t) {
|
||||||
|
return t == SHENG_NFA_64;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief True if the given type (from NFA::type) is a Sheng16/32/64 DFA. */
|
||||||
|
static really_inline int isShengType(u8 t) {
|
||||||
|
return t == SHENG_NFA || t == SHENG_NFA_32 || t == SHENG_NFA_64;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief True if the given type (from NFA::type) is a McClellan, Gough or
|
* \brief True if the given type (from NFA::type) is a McClellan, Gough or
|
||||||
* Sheng DFA.
|
* Sheng DFA.
|
||||||
|
@ -124,6 +124,10 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin,
|
|||||||
const depth &repeatMax, u32 minPeriod)
|
const depth &repeatMax, u32 minPeriod)
|
||||||
: stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0),
|
: stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0),
|
||||||
patchSize(0), encodingSize(0), patchesOffset(0) {
|
patchSize(0), encodingSize(0), patchesOffset(0) {
|
||||||
|
if (type == REPEAT_SPARSE_OPTIMAL_P && minPeriod == 0) {
|
||||||
|
assert(0);
|
||||||
|
throw std::domain_error("SPARSE_OPTIMAL_P must have non-zero minPeriod.");
|
||||||
|
}
|
||||||
assert(repeatMin <= repeatMax);
|
assert(repeatMin <= repeatMax);
|
||||||
assert(repeatMax.is_reachable());
|
assert(repeatMax.is_reachable());
|
||||||
assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P);
|
assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P);
|
||||||
|
1206
src/nfa/sheng.c
1206
src/nfa/sheng.c
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -58,4 +58,86 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q);
|
|||||||
char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||||
size_t length, NfaCallback cb, void *context);
|
size_t length, NfaCallback cb, void *context);
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
|
||||||
|
char nfaExecSheng32_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecSheng32_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecSheng32_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
|
char nfaExecSheng32_inAccept(const struct NFA *n, ReportID report,
|
||||||
|
struct mq *q);
|
||||||
|
char nfaExecSheng32_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecSheng32_queueInitState(const struct NFA *nfa, struct mq *q);
|
||||||
|
char nfaExecSheng32_queueCompressState(const struct NFA *nfa,
|
||||||
|
const struct mq *q, s64a loc);
|
||||||
|
char nfaExecSheng32_expandState(const struct NFA *nfa, void *dest,
|
||||||
|
const void *src, u64a offset, u8 key);
|
||||||
|
char nfaExecSheng32_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||||
|
void *state, u8 key);
|
||||||
|
char nfaExecSheng32_testEOD(const struct NFA *nfa, const char *state,
|
||||||
|
const char *streamState, u64a offset,
|
||||||
|
NfaCallback callback, void *context);
|
||||||
|
char nfaExecSheng32_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
|
|
||||||
|
char nfaExecSheng32_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||||
|
size_t length, NfaCallback cb, void *context);
|
||||||
|
|
||||||
|
#define nfaExecSheng64_B_Reverse NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng64_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
|
||||||
|
char nfaExecSheng64_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecSheng64_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecSheng64_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
|
char nfaExecSheng64_inAccept(const struct NFA *n, ReportID report,
|
||||||
|
struct mq *q);
|
||||||
|
char nfaExecSheng64_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecSheng64_queueInitState(const struct NFA *nfa, struct mq *q);
|
||||||
|
char nfaExecSheng64_queueCompressState(const struct NFA *nfa,
|
||||||
|
const struct mq *q, s64a loc);
|
||||||
|
char nfaExecSheng64_expandState(const struct NFA *nfa, void *dest,
|
||||||
|
const void *src, u64a offset, u8 key);
|
||||||
|
char nfaExecSheng64_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||||
|
void *state, u8 key);
|
||||||
|
char nfaExecSheng64_testEOD(const struct NFA *nfa, const char *state,
|
||||||
|
const char *streamState, u64a offset,
|
||||||
|
NfaCallback callback, void *context);
|
||||||
|
char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
|
|
||||||
|
char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||||
|
size_t length, NfaCallback cb, void *context);
|
||||||
|
|
||||||
|
#else // !HAVE_AVX512VBMI
|
||||||
|
|
||||||
|
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
#define nfaExecSheng32_Q NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng32_Q2 NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng32_QR NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng32_inAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng32_inAnyAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng32_queueInitState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng32_queueCompressState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng32_expandState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng32_initCompressedState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng32_testEOD NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng32_reportCurrent NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng32_B NFA_API_NO_IMPL
|
||||||
|
|
||||||
|
#define nfaExecSheng64_B_Reverse NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng64_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
#define nfaExecSheng64_Q NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng64_Q2 NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng64_QR NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng64_inAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng64_inAnyAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng64_queueInitState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng64_queueCompressState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng64_expandState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng64_initCompressedState NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng64_testEOD NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng64_B NFA_API_NO_IMPL
|
||||||
|
#endif // end of HAVE_AVX512VBMI
|
||||||
|
|
||||||
#endif /* SHENG_H_ */
|
#endif /* SHENG_H_ */
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -52,6 +52,43 @@ u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
|
|||||||
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
|
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
static really_inline
|
||||||
|
u8 isDeadState32(const u8 a) {
|
||||||
|
return a & SHENG32_STATE_DEAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u8 isAcceptState32(const u8 a) {
|
||||||
|
return a & SHENG32_STATE_ACCEPT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u8 isAccelState32(const u8 a) {
|
||||||
|
return a & SHENG32_STATE_ACCEL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u8 hasInterestingStates32(const u8 a, const u8 b, const u8 c, const u8 d) {
|
||||||
|
return (a | b | c | d) & (SHENG32_STATE_FLAG_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u8 isDeadState64(const u8 a) {
|
||||||
|
return a & SHENG64_STATE_DEAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u8 isAcceptState64(const u8 a) {
|
||||||
|
return a & SHENG64_STATE_ACCEPT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u8 hasInterestingStates64(const u8 a, const u8 b, const u8 c, const u8 d) {
|
||||||
|
return (a | b | c | d) & (SHENG64_STATE_FLAG_MASK);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* these functions should be optimized out, used by NO_MATCHES mode */
|
/* these functions should be optimized out, used by NO_MATCHES mode */
|
||||||
static really_inline
|
static really_inline
|
||||||
u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c,
|
u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c,
|
||||||
@ -71,66 +108,162 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define SHENG_IMPL sheng_cod
|
#define SHENG_IMPL sheng_cod
|
||||||
#define DEAD_FUNC isDeadState
|
#define DEAD_FUNC isDeadState
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_cod
|
||||||
|
#define DEAD_FUNC32 isDeadState32
|
||||||
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
|
#define SHENG64_IMPL sheng64_cod
|
||||||
|
#define DEAD_FUNC64 isDeadState64
|
||||||
|
#define ACCEPT_FUNC64 isAcceptState64
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 0
|
#define STOP_AT_MATCH 0
|
||||||
#include "sheng_impl.h"
|
#include "sheng_impl.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
#undef DEAD_FUNC
|
#undef DEAD_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef DEAD_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef SHENG64_IMPL
|
||||||
|
#undef DEAD_FUNC64
|
||||||
|
#undef ACCEPT_FUNC64
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* callback output, can't die */
|
/* callback output, can't die */
|
||||||
#define SHENG_IMPL sheng_co
|
#define SHENG_IMPL sheng_co
|
||||||
#define DEAD_FUNC dummyFunc
|
#define DEAD_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_co
|
||||||
|
#define DEAD_FUNC32 dummyFunc
|
||||||
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
|
#define SHENG64_IMPL sheng64_co
|
||||||
|
#define DEAD_FUNC64 dummyFunc
|
||||||
|
#define ACCEPT_FUNC64 isAcceptState64
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 0
|
#define STOP_AT_MATCH 0
|
||||||
#include "sheng_impl.h"
|
#include "sheng_impl.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
#undef DEAD_FUNC
|
#undef DEAD_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef DEAD_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef SHENG64_IMPL
|
||||||
|
#undef DEAD_FUNC64
|
||||||
|
#undef ACCEPT_FUNC64
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* stop at match, can die */
|
/* stop at match, can die */
|
||||||
#define SHENG_IMPL sheng_samd
|
#define SHENG_IMPL sheng_samd
|
||||||
#define DEAD_FUNC isDeadState
|
#define DEAD_FUNC isDeadState
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_samd
|
||||||
|
#define DEAD_FUNC32 isDeadState32
|
||||||
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
|
#define SHENG64_IMPL sheng64_samd
|
||||||
|
#define DEAD_FUNC64 isDeadState64
|
||||||
|
#define ACCEPT_FUNC64 isAcceptState64
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 1
|
#define STOP_AT_MATCH 1
|
||||||
#include "sheng_impl.h"
|
#include "sheng_impl.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
#undef DEAD_FUNC
|
#undef DEAD_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef DEAD_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef SHENG64_IMPL
|
||||||
|
#undef DEAD_FUNC64
|
||||||
|
#undef ACCEPT_FUNC64
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* stop at match, can't die */
|
/* stop at match, can't die */
|
||||||
#define SHENG_IMPL sheng_sam
|
#define SHENG_IMPL sheng_sam
|
||||||
#define DEAD_FUNC dummyFunc
|
#define DEAD_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_sam
|
||||||
|
#define DEAD_FUNC32 dummyFunc
|
||||||
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
|
#define SHENG64_IMPL sheng64_sam
|
||||||
|
#define DEAD_FUNC64 dummyFunc
|
||||||
|
#define ACCEPT_FUNC64 isAcceptState64
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 1
|
#define STOP_AT_MATCH 1
|
||||||
#include "sheng_impl.h"
|
#include "sheng_impl.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
#undef DEAD_FUNC
|
#undef DEAD_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef DEAD_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef SHENG64_IMPL
|
||||||
|
#undef DEAD_FUNC64
|
||||||
|
#undef ACCEPT_FUNC64
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* no match, can die */
|
/* no match, can die */
|
||||||
#define SHENG_IMPL sheng_nmd
|
#define SHENG_IMPL sheng_nmd
|
||||||
#define DEAD_FUNC isDeadState
|
#define DEAD_FUNC isDeadState
|
||||||
#define ACCEPT_FUNC dummyFunc
|
#define ACCEPT_FUNC dummyFunc
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_nmd
|
||||||
|
#define DEAD_FUNC32 isDeadState32
|
||||||
|
#define ACCEPT_FUNC32 dummyFunc
|
||||||
|
#define SHENG64_IMPL sheng64_nmd
|
||||||
|
#define DEAD_FUNC64 isDeadState64
|
||||||
|
#define ACCEPT_FUNC64 dummyFunc
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 0
|
#define STOP_AT_MATCH 0
|
||||||
#include "sheng_impl.h"
|
#include "sheng_impl.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
#undef DEAD_FUNC
|
#undef DEAD_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef DEAD_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef SHENG64_IMPL
|
||||||
|
#undef DEAD_FUNC64
|
||||||
|
#undef ACCEPT_FUNC64
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* no match, can't die */
|
/* no match, can't die */
|
||||||
#define SHENG_IMPL sheng_nm
|
#define SHENG_IMPL sheng_nm
|
||||||
#define DEAD_FUNC dummyFunc
|
#define DEAD_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC dummyFunc
|
#define ACCEPT_FUNC dummyFunc
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_nm
|
||||||
|
#define DEAD_FUNC32 dummyFunc
|
||||||
|
#define ACCEPT_FUNC32 dummyFunc
|
||||||
|
#define SHENG64_IMPL sheng64_nm
|
||||||
|
#define DEAD_FUNC64 dummyFunc
|
||||||
|
#define ACCEPT_FUNC64 dummyFunc
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 0
|
#define STOP_AT_MATCH 0
|
||||||
#include "sheng_impl.h"
|
#include "sheng_impl.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
#undef DEAD_FUNC
|
#undef DEAD_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef DEAD_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef SHENG64_IMPL
|
||||||
|
#undef DEAD_FUNC64
|
||||||
|
#undef ACCEPT_FUNC64
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -144,6 +277,16 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC isAccelState
|
#define INNER_ACCEL_FUNC isAccelState
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_4_coda
|
||||||
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
|
#define INNER_DEAD_FUNC32 isDeadState32
|
||||||
|
#define OUTER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC32 isAccelState32
|
||||||
|
#define OUTER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
|
#define NO_SHENG64_IMPL
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 0
|
#define STOP_AT_MATCH 0
|
||||||
#include "sheng_impl4.h"
|
#include "sheng_impl4.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
@ -153,6 +296,16 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef INTERESTING_FUNC32
|
||||||
|
#undef INNER_DEAD_FUNC32
|
||||||
|
#undef OUTER_DEAD_FUNC32
|
||||||
|
#undef INNER_ACCEL_FUNC32
|
||||||
|
#undef OUTER_ACCEL_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef NO_SHENG64_IMPL
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* callback output, can die, not accelerated */
|
/* callback output, can die, not accelerated */
|
||||||
@ -163,6 +316,20 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_4_cod
|
||||||
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
|
#define INNER_DEAD_FUNC32 isDeadState32
|
||||||
|
#define OUTER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
|
#define SHENG64_IMPL sheng64_4_cod
|
||||||
|
#define INTERESTING_FUNC64 hasInterestingStates64
|
||||||
|
#define INNER_DEAD_FUNC64 isDeadState64
|
||||||
|
#define OUTER_DEAD_FUNC64 dummyFunc
|
||||||
|
#define ACCEPT_FUNC64 isAcceptState64
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 0
|
#define STOP_AT_MATCH 0
|
||||||
#include "sheng_impl4.h"
|
#include "sheng_impl4.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
@ -172,6 +339,20 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef INTERESTING_FUNC32
|
||||||
|
#undef INNER_DEAD_FUNC32
|
||||||
|
#undef OUTER_DEAD_FUNC32
|
||||||
|
#undef INNER_ACCEL_FUNC32
|
||||||
|
#undef OUTER_ACCEL_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef SHENG64_IMPL
|
||||||
|
#undef INTERESTING_FUNC64
|
||||||
|
#undef INNER_DEAD_FUNC64
|
||||||
|
#undef OUTER_DEAD_FUNC64
|
||||||
|
#undef ACCEPT_FUNC64
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* callback output, can't die, accelerated */
|
/* callback output, can't die, accelerated */
|
||||||
@ -182,6 +363,16 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC isAccelState
|
#define INNER_ACCEL_FUNC isAccelState
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_4_coa
|
||||||
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC32 isAccelState32
|
||||||
|
#define OUTER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
|
#define NO_SHENG64_IMPL
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 0
|
#define STOP_AT_MATCH 0
|
||||||
#include "sheng_impl4.h"
|
#include "sheng_impl4.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
@ -191,6 +382,16 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef INTERESTING_FUNC32
|
||||||
|
#undef INNER_DEAD_FUNC32
|
||||||
|
#undef OUTER_DEAD_FUNC32
|
||||||
|
#undef INNER_ACCEL_FUNC32
|
||||||
|
#undef OUTER_ACCEL_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef NO_SHENG64_IMPL
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* callback output, can't die, not accelerated */
|
/* callback output, can't die, not accelerated */
|
||||||
@ -201,6 +402,20 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_4_co
|
||||||
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
|
#define SHENG64_IMPL sheng64_4_co
|
||||||
|
#define INTERESTING_FUNC64 hasInterestingStates64
|
||||||
|
#define INNER_DEAD_FUNC64 dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC64 dummyFunc
|
||||||
|
#define ACCEPT_FUNC64 isAcceptState64
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 0
|
#define STOP_AT_MATCH 0
|
||||||
#include "sheng_impl4.h"
|
#include "sheng_impl4.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
@ -210,6 +425,20 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef INTERESTING_FUNC32
|
||||||
|
#undef INNER_DEAD_FUNC32
|
||||||
|
#undef OUTER_DEAD_FUNC32
|
||||||
|
#undef INNER_ACCEL_FUNC32
|
||||||
|
#undef OUTER_ACCEL_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef SHENG64_IMPL
|
||||||
|
#undef INTERESTING_FUNC64
|
||||||
|
#undef INNER_DEAD_FUNC64
|
||||||
|
#undef OUTER_DEAD_FUNC64
|
||||||
|
#undef ACCEPT_FUNC64
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* stop at match, can die, accelerated */
|
/* stop at match, can die, accelerated */
|
||||||
@ -220,6 +449,16 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC isAccelState
|
#define INNER_ACCEL_FUNC isAccelState
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_4_samda
|
||||||
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
|
#define INNER_DEAD_FUNC32 isDeadState32
|
||||||
|
#define OUTER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC32 isAccelState32
|
||||||
|
#define OUTER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
|
#define NO_SHENG64_IMPL
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 1
|
#define STOP_AT_MATCH 1
|
||||||
#include "sheng_impl4.h"
|
#include "sheng_impl4.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
@ -229,6 +468,16 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef INTERESTING_FUNC32
|
||||||
|
#undef INNER_DEAD_FUNC32
|
||||||
|
#undef OUTER_DEAD_FUNC32
|
||||||
|
#undef INNER_ACCEL_FUNC32
|
||||||
|
#undef OUTER_ACCEL_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef NO_SHENG64_IMPL
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* stop at match, can die, not accelerated */
|
/* stop at match, can die, not accelerated */
|
||||||
@ -239,6 +488,20 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_4_samd
|
||||||
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
|
#define INNER_DEAD_FUNC32 isDeadState32
|
||||||
|
#define OUTER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
|
#define SHENG64_IMPL sheng64_4_samd
|
||||||
|
#define INTERESTING_FUNC64 hasInterestingStates64
|
||||||
|
#define INNER_DEAD_FUNC64 isDeadState64
|
||||||
|
#define OUTER_DEAD_FUNC64 dummyFunc
|
||||||
|
#define ACCEPT_FUNC64 isAcceptState64
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 1
|
#define STOP_AT_MATCH 1
|
||||||
#include "sheng_impl4.h"
|
#include "sheng_impl4.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
@ -248,6 +511,20 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef INTERESTING_FUNC32
|
||||||
|
#undef INNER_DEAD_FUNC32
|
||||||
|
#undef OUTER_DEAD_FUNC32
|
||||||
|
#undef INNER_ACCEL_FUNC32
|
||||||
|
#undef OUTER_ACCEL_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef SHENG64_IMPL
|
||||||
|
#undef INTERESTING_FUNC64
|
||||||
|
#undef INNER_DEAD_FUNC64
|
||||||
|
#undef OUTER_DEAD_FUNC64
|
||||||
|
#undef ACCEPT_FUNC64
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* stop at match, can't die, accelerated */
|
/* stop at match, can't die, accelerated */
|
||||||
@ -258,6 +535,16 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC isAccelState
|
#define INNER_ACCEL_FUNC isAccelState
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_4_sama
|
||||||
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC32 isAccelState32
|
||||||
|
#define OUTER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
|
#define NO_SHENG64_IMPL
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 1
|
#define STOP_AT_MATCH 1
|
||||||
#include "sheng_impl4.h"
|
#include "sheng_impl4.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
@ -267,6 +554,16 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef INTERESTING_FUNC32
|
||||||
|
#undef INNER_DEAD_FUNC32
|
||||||
|
#undef OUTER_DEAD_FUNC32
|
||||||
|
#undef INNER_ACCEL_FUNC32
|
||||||
|
#undef OUTER_ACCEL_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef NO_SHENG64_IMPL
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* stop at match, can't die, not accelerated */
|
/* stop at match, can't die, not accelerated */
|
||||||
@ -277,6 +574,20 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC isAcceptState
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_4_sam
|
||||||
|
#define INTERESTING_FUNC32 hasInterestingStates32
|
||||||
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define ACCEPT_FUNC32 isAcceptState32
|
||||||
|
#define SHENG64_IMPL sheng64_4_sam
|
||||||
|
#define INTERESTING_FUNC64 hasInterestingStates64
|
||||||
|
#define INNER_DEAD_FUNC64 dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC64 dummyFunc
|
||||||
|
#define ACCEPT_FUNC64 isAcceptState64
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 1
|
#define STOP_AT_MATCH 1
|
||||||
#include "sheng_impl4.h"
|
#include "sheng_impl4.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
@ -286,6 +597,20 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef INTERESTING_FUNC32
|
||||||
|
#undef INNER_DEAD_FUNC32
|
||||||
|
#undef OUTER_DEAD_FUNC32
|
||||||
|
#undef INNER_ACCEL_FUNC32
|
||||||
|
#undef OUTER_ACCEL_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef SHENG64_IMPL
|
||||||
|
#undef INTERESTING_FUNC64
|
||||||
|
#undef INNER_DEAD_FUNC64
|
||||||
|
#undef OUTER_DEAD_FUNC64
|
||||||
|
#undef ACCEPT_FUNC64
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* no-match have interesting func as dummy, and die/accel checks are outer */
|
/* no-match have interesting func as dummy, and die/accel checks are outer */
|
||||||
@ -298,6 +623,16 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC isAccelState
|
#define OUTER_ACCEL_FUNC isAccelState
|
||||||
#define ACCEPT_FUNC dummyFunc
|
#define ACCEPT_FUNC dummyFunc
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_4_nmda
|
||||||
|
#define INTERESTING_FUNC32 dummyFunc4
|
||||||
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC32 isDeadState32
|
||||||
|
#define INNER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC32 isAccelState32
|
||||||
|
#define ACCEPT_FUNC32 dummyFunc
|
||||||
|
#define NO_SHENG64_IMPL
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 0
|
#define STOP_AT_MATCH 0
|
||||||
#include "sheng_impl4.h"
|
#include "sheng_impl4.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
@ -307,6 +642,16 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef INTERESTING_FUNC32
|
||||||
|
#undef INNER_DEAD_FUNC32
|
||||||
|
#undef OUTER_DEAD_FUNC32
|
||||||
|
#undef INNER_ACCEL_FUNC32
|
||||||
|
#undef OUTER_ACCEL_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef NO_SHENG64_IMPL
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* no match, can die, not accelerated */
|
/* no match, can die, not accelerated */
|
||||||
@ -317,6 +662,20 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC dummyFunc
|
#define ACCEPT_FUNC dummyFunc
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_4_nmd
|
||||||
|
#define INTERESTING_FUNC32 dummyFunc4
|
||||||
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC32 isDeadState32
|
||||||
|
#define INNER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define ACCEPT_FUNC32 dummyFunc
|
||||||
|
#define SHENG64_IMPL sheng64_4_nmd
|
||||||
|
#define INTERESTING_FUNC64 dummyFunc4
|
||||||
|
#define INNER_DEAD_FUNC64 dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC64 isDeadState64
|
||||||
|
#define ACCEPT_FUNC64 dummyFunc
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 0
|
#define STOP_AT_MATCH 0
|
||||||
#include "sheng_impl4.h"
|
#include "sheng_impl4.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
@ -326,6 +685,20 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef INTERESTING_FUNC32
|
||||||
|
#undef INNER_DEAD_FUNC32
|
||||||
|
#undef OUTER_DEAD_FUNC32
|
||||||
|
#undef INNER_ACCEL_FUNC32
|
||||||
|
#undef OUTER_ACCEL_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef SHENG64_IMPL
|
||||||
|
#undef INTERESTING_FUNC64
|
||||||
|
#undef INNER_DEAD_FUNC64
|
||||||
|
#undef OUTER_DEAD_FUNC64
|
||||||
|
#undef ACCEPT_FUNC64
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
/* there is no performance benefit in accelerating a no-match case that can't
|
/* there is no performance benefit in accelerating a no-match case that can't
|
||||||
@ -339,6 +712,20 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#define INNER_ACCEL_FUNC dummyFunc
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
#define OUTER_ACCEL_FUNC dummyFunc
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
#define ACCEPT_FUNC dummyFunc
|
#define ACCEPT_FUNC dummyFunc
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define SHENG32_IMPL sheng32_4_nm
|
||||||
|
#define INTERESTING_FUNC32 dummyFunc4
|
||||||
|
#define INNER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC32 dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC32 dummyFunc
|
||||||
|
#define ACCEPT_FUNC32 dummyFunc
|
||||||
|
#define SHENG64_IMPL sheng64_4_nm
|
||||||
|
#define INTERESTING_FUNC64 dummyFunc4
|
||||||
|
#define INNER_DEAD_FUNC64 dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC64 dummyFunc
|
||||||
|
#define ACCEPT_FUNC64 dummyFunc
|
||||||
|
#endif
|
||||||
#define STOP_AT_MATCH 0
|
#define STOP_AT_MATCH 0
|
||||||
#include "sheng_impl4.h"
|
#include "sheng_impl4.h"
|
||||||
#undef SHENG_IMPL
|
#undef SHENG_IMPL
|
||||||
@ -348,6 +735,20 @@ u8 dummyFunc(UNUSED const u8 a) {
|
|||||||
#undef INNER_ACCEL_FUNC
|
#undef INNER_ACCEL_FUNC
|
||||||
#undef OUTER_ACCEL_FUNC
|
#undef OUTER_ACCEL_FUNC
|
||||||
#undef ACCEPT_FUNC
|
#undef ACCEPT_FUNC
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#undef SHENG32_IMPL
|
||||||
|
#undef INTERESTING_FUNC32
|
||||||
|
#undef INNER_DEAD_FUNC32
|
||||||
|
#undef OUTER_DEAD_FUNC32
|
||||||
|
#undef INNER_ACCEL_FUNC32
|
||||||
|
#undef OUTER_ACCEL_FUNC32
|
||||||
|
#undef ACCEPT_FUNC32
|
||||||
|
#undef SHENG64_IMPL
|
||||||
|
#undef INTERESTING_FUNC64
|
||||||
|
#undef INNER_DEAD_FUNC64
|
||||||
|
#undef OUTER_DEAD_FUNC64
|
||||||
|
#undef ACCEPT_FUNC64
|
||||||
|
#endif
|
||||||
#undef STOP_AT_MATCH
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
#endif // SHENG_DEFS_H
|
#endif // SHENG_DEFS_H
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2017, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -95,3 +95,127 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
|||||||
*scan_end = cur_buf;
|
*scan_end = cur_buf;
|
||||||
return MO_CONTINUE_MATCHING;
|
return MO_CONTINUE_MATCHING;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
static really_inline
|
||||||
|
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||||
|
const struct sheng32 *s,
|
||||||
|
u8 *const cached_accept_state,
|
||||||
|
ReportID *const cached_accept_id,
|
||||||
|
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
|
||||||
|
const u8 *end, const u8 **scan_end) {
|
||||||
|
DEBUG_PRINTF("Starting DFA execution in state %u\n",
|
||||||
|
*state & SHENG32_STATE_MASK);
|
||||||
|
const u8 *cur_buf = start;
|
||||||
|
if (DEAD_FUNC32(*state)) {
|
||||||
|
DEBUG_PRINTF("Dead on arrival\n");
|
||||||
|
*scan_end = end;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||||
|
|
||||||
|
m512 cur_state = set64x8(*state);
|
||||||
|
const m512 *masks = s->succ_masks;
|
||||||
|
|
||||||
|
while (likely(cur_buf != end)) {
|
||||||
|
const u8 c = *cur_buf;
|
||||||
|
const m512 succ_mask = masks[c];
|
||||||
|
cur_state = vpermb512(cur_state, succ_mask);
|
||||||
|
const u8 tmp = movd512(cur_state);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK,
|
||||||
|
tmp & SHENG32_STATE_FLAG_MASK);
|
||||||
|
|
||||||
|
if (unlikely(ACCEPT_FUNC32(tmp))) {
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG32_STATE_MASK);
|
||||||
|
u64a match_offset = base_offset + (cur_buf - buf) + 1;
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(u64a)(cur_buf - start));
|
||||||
|
*state = tmp;
|
||||||
|
*scan_end = cur_buf;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports32(s, cb, ctxt, tmp, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cur_buf++;
|
||||||
|
}
|
||||||
|
*state = movd512(cur_state);
|
||||||
|
*scan_end = cur_buf;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||||
|
const struct sheng64 *s,
|
||||||
|
u8 *const cached_accept_state,
|
||||||
|
ReportID *const cached_accept_id,
|
||||||
|
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
|
||||||
|
const u8 *end, const u8 **scan_end) {
|
||||||
|
DEBUG_PRINTF("Starting DFA execution in state %u\n",
|
||||||
|
*state & SHENG64_STATE_MASK);
|
||||||
|
const u8 *cur_buf = start;
|
||||||
|
if (DEAD_FUNC64(*state)) {
|
||||||
|
DEBUG_PRINTF("Dead on arrival\n");
|
||||||
|
*scan_end = end;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||||
|
|
||||||
|
m512 cur_state = set64x8(*state);
|
||||||
|
const m512 *masks = s->succ_masks;
|
||||||
|
|
||||||
|
while (likely(cur_buf != end)) {
|
||||||
|
const u8 c = *cur_buf;
|
||||||
|
const m512 succ_mask = masks[c];
|
||||||
|
cur_state = vpermb512(cur_state, succ_mask);
|
||||||
|
const u8 tmp = movd512(cur_state);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK,
|
||||||
|
tmp & SHENG64_STATE_FLAG_MASK);
|
||||||
|
|
||||||
|
if (unlikely(ACCEPT_FUNC64(tmp))) {
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG64_STATE_MASK);
|
||||||
|
u64a match_offset = base_offset + (cur_buf - buf) + 1;
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(u64a)(cur_buf - start));
|
||||||
|
*state = tmp;
|
||||||
|
*scan_end = cur_buf;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports64(s, cb, ctxt, tmp, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cur_buf++;
|
||||||
|
}
|
||||||
|
*state = movd512(cur_state);
|
||||||
|
*scan_end = cur_buf;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2017, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -282,3 +282,430 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
|||||||
*scan_end = cur_buf;
|
*scan_end = cur_buf;
|
||||||
return MO_CONTINUE_MATCHING;
|
return MO_CONTINUE_MATCHING;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
static really_inline
|
||||||
|
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||||
|
const struct sheng32 *s,
|
||||||
|
u8 *const cached_accept_state,
|
||||||
|
ReportID *const cached_accept_id,
|
||||||
|
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
|
||||||
|
const u8 *end, const u8 **scan_end) {
|
||||||
|
DEBUG_PRINTF("Starting DFAx4 execution in state %u\n",
|
||||||
|
*state & SHENG32_STATE_MASK);
|
||||||
|
const u8 *cur_buf = start;
|
||||||
|
const u8 *min_accel_dist = start;
|
||||||
|
base_offset++;
|
||||||
|
DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start));
|
||||||
|
|
||||||
|
if (INNER_ACCEL_FUNC32(*state) || OUTER_ACCEL_FUNC32(*state)) {
|
||||||
|
DEBUG_PRINTF("Accel state reached @ 0\n");
|
||||||
|
const union AccelAux *aaux =
|
||||||
|
get_accel32(s, *state & SHENG32_STATE_MASK);
|
||||||
|
const u8 *new_offset = run_accel(aaux, cur_buf, end);
|
||||||
|
if (new_offset < cur_buf + BAD_ACCEL_DIST) {
|
||||||
|
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
|
||||||
|
} else {
|
||||||
|
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("Next accel chance: %llu\n",
|
||||||
|
(u64a)(min_accel_dist - start));
|
||||||
|
DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf);
|
||||||
|
cur_buf = new_offset;
|
||||||
|
DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start));
|
||||||
|
}
|
||||||
|
if (INNER_DEAD_FUNC32(*state) || OUTER_DEAD_FUNC32(*state)) {
|
||||||
|
DEBUG_PRINTF("Dead on arrival\n");
|
||||||
|
*scan_end = end;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
|
||||||
|
m512 cur_state = set64x8(*state);
|
||||||
|
const m512 *masks = s->succ_masks;
|
||||||
|
|
||||||
|
while (likely(end - cur_buf >= 4)) {
|
||||||
|
const u8 *b1 = cur_buf;
|
||||||
|
const u8 *b2 = cur_buf + 1;
|
||||||
|
const u8 *b3 = cur_buf + 2;
|
||||||
|
const u8 *b4 = cur_buf + 3;
|
||||||
|
const u8 c1 = *b1;
|
||||||
|
const u8 c2 = *b2;
|
||||||
|
const u8 c3 = *b3;
|
||||||
|
const u8 c4 = *b4;
|
||||||
|
|
||||||
|
const m512 succ_mask1 = masks[c1];
|
||||||
|
cur_state = vpermb512(cur_state, succ_mask1);
|
||||||
|
const u8 a1 = movd512(cur_state);
|
||||||
|
|
||||||
|
const m512 succ_mask2 = masks[c2];
|
||||||
|
cur_state = vpermb512(cur_state, succ_mask2);
|
||||||
|
const u8 a2 = movd512(cur_state);
|
||||||
|
|
||||||
|
const m512 succ_mask3 = masks[c3];
|
||||||
|
cur_state = vpermb512(cur_state, succ_mask3);
|
||||||
|
const u8 a3 = movd512(cur_state);
|
||||||
|
|
||||||
|
const m512 succ_mask4 = masks[c4];
|
||||||
|
cur_state = vpermb512(cur_state, succ_mask4);
|
||||||
|
const u8 a4 = movd512(cur_state);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK,
|
||||||
|
a1 & SHENG32_STATE_FLAG_MASK);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (flag: %u)\n", a2 & SHENG32_STATE_MASK,
|
||||||
|
a2 & SHENG32_STATE_FLAG_MASK);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (flag: %u)\n", a3 & SHENG32_STATE_MASK,
|
||||||
|
a3 & SHENG32_STATE_FLAG_MASK);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (flag: %u)\n", a4 & SHENG32_STATE_MASK,
|
||||||
|
a4 & SHENG32_STATE_FLAG_MASK);
|
||||||
|
|
||||||
|
if (unlikely(INTERESTING_FUNC32(a1, a2, a3, a4))) {
|
||||||
|
if (ACCEPT_FUNC32(a1)) {
|
||||||
|
u64a match_offset = base_offset + b1 - buf;
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n",
|
||||||
|
a1 & SHENG32_STATE_MASK);
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(s64a)(b1 - start));
|
||||||
|
*scan_end = b1;
|
||||||
|
*state = a1;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports32(s, cb, ctxt, a1, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ACCEPT_FUNC32(a2)) {
|
||||||
|
u64a match_offset = base_offset + b2 - buf;
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n",
|
||||||
|
a2 & SHENG32_STATE_MASK);
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(s64a)(b2 - start));
|
||||||
|
*scan_end = b2;
|
||||||
|
*state = a2;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports32(s, cb, ctxt, a2, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ACCEPT_FUNC32(a3)) {
|
||||||
|
u64a match_offset = base_offset + b3 - buf;
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n",
|
||||||
|
a3 & SHENG32_STATE_MASK);
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(s64a)(b3 - start));
|
||||||
|
*scan_end = b3;
|
||||||
|
*state = a3;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports32(s, cb, ctxt, a3, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ACCEPT_FUNC32(a4)) {
|
||||||
|
u64a match_offset = base_offset + b4 - buf;
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n",
|
||||||
|
a4 & SHENG32_STATE_MASK);
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(s64a)(b4 - start));
|
||||||
|
*scan_end = b4;
|
||||||
|
*state = a4;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports32(s, cb, ctxt, a4, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (INNER_DEAD_FUNC32(a4)) {
|
||||||
|
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf));
|
||||||
|
*scan_end = end;
|
||||||
|
*state = a4;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC32(a4)) {
|
||||||
|
DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
|
||||||
|
const union AccelAux *aaux =
|
||||||
|
get_accel32(s, a4 & SHENG32_STATE_MASK);
|
||||||
|
const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
|
||||||
|
if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
|
||||||
|
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
|
||||||
|
} else {
|
||||||
|
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("Next accel chance: %llu\n",
|
||||||
|
(u64a)(min_accel_dist - start));
|
||||||
|
DEBUG_PRINTF("Accel scanned %llu bytes\n",
|
||||||
|
(u64a)(new_offset - cur_buf - 4));
|
||||||
|
cur_buf = new_offset;
|
||||||
|
DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (OUTER_DEAD_FUNC32(a4)) {
|
||||||
|
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf));
|
||||||
|
*scan_end = end;
|
||||||
|
*state = a4;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
};
|
||||||
|
if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC32(a4)) {
|
||||||
|
DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
|
||||||
|
const union AccelAux *aaux =
|
||||||
|
get_accel32(s, a4 & SHENG32_STATE_MASK);
|
||||||
|
const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
|
||||||
|
if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
|
||||||
|
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
|
||||||
|
} else {
|
||||||
|
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("Next accel chance: %llu\n",
|
||||||
|
(u64a)(min_accel_dist - start));
|
||||||
|
DEBUG_PRINTF("Accel scanned %llu bytes\n",
|
||||||
|
(u64a)(new_offset - cur_buf - 4));
|
||||||
|
cur_buf = new_offset;
|
||||||
|
DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
cur_buf += 4;
|
||||||
|
}
|
||||||
|
*state = movd512(cur_state);
|
||||||
|
*scan_end = cur_buf;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef NO_SHENG64_IMPL
|
||||||
|
static really_inline
|
||||||
|
char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt,
|
||||||
|
const struct sheng64 *s,
|
||||||
|
u8 *const cached_accept_state,
|
||||||
|
ReportID *const cached_accept_id,
|
||||||
|
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
|
||||||
|
const u8 *end, const u8 **scan_end) {
|
||||||
|
DEBUG_PRINTF("Starting DFAx4 execution in state %u\n",
|
||||||
|
*state & SHENG64_STATE_MASK);
|
||||||
|
const u8 *cur_buf = start;
|
||||||
|
base_offset++;
|
||||||
|
DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start));
|
||||||
|
|
||||||
|
if (INNER_DEAD_FUNC64(*state) || OUTER_DEAD_FUNC64(*state)) {
|
||||||
|
DEBUG_PRINTF("Dead on arrival\n");
|
||||||
|
*scan_end = end;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
|
||||||
|
m512 cur_state = set64x8(*state);
|
||||||
|
const m512 *masks = s->succ_masks;
|
||||||
|
|
||||||
|
while (likely(end - cur_buf >= 4)) {
|
||||||
|
const u8 *b1 = cur_buf;
|
||||||
|
const u8 *b2 = cur_buf + 1;
|
||||||
|
const u8 *b3 = cur_buf + 2;
|
||||||
|
const u8 *b4 = cur_buf + 3;
|
||||||
|
const u8 c1 = *b1;
|
||||||
|
const u8 c2 = *b2;
|
||||||
|
const u8 c3 = *b3;
|
||||||
|
const u8 c4 = *b4;
|
||||||
|
|
||||||
|
const m512 succ_mask1 = masks[c1];
|
||||||
|
cur_state = vpermb512(cur_state, succ_mask1);
|
||||||
|
const u8 a1 = movd512(cur_state);
|
||||||
|
|
||||||
|
const m512 succ_mask2 = masks[c2];
|
||||||
|
cur_state = vpermb512(cur_state, succ_mask2);
|
||||||
|
const u8 a2 = movd512(cur_state);
|
||||||
|
|
||||||
|
const m512 succ_mask3 = masks[c3];
|
||||||
|
cur_state = vpermb512(cur_state, succ_mask3);
|
||||||
|
const u8 a3 = movd512(cur_state);
|
||||||
|
|
||||||
|
const m512 succ_mask4 = masks[c4];
|
||||||
|
cur_state = vpermb512(cur_state, succ_mask4);
|
||||||
|
const u8 a4 = movd512(cur_state);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK,
|
||||||
|
a1 & SHENG64_STATE_FLAG_MASK);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (flag: %u)\n", a2 & SHENG64_STATE_MASK,
|
||||||
|
a2 & SHENG64_STATE_FLAG_MASK);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (flag: %u)\n", a3 & SHENG64_STATE_MASK,
|
||||||
|
a3 & SHENG64_STATE_FLAG_MASK);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (flag: %u)\n", a4 & SHENG64_STATE_MASK,
|
||||||
|
a4 & SHENG64_STATE_FLAG_MASK);
|
||||||
|
|
||||||
|
if (unlikely(INTERESTING_FUNC64(a1, a2, a3, a4))) {
|
||||||
|
if (ACCEPT_FUNC64(a1)) {
|
||||||
|
u64a match_offset = base_offset + b1 - buf;
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n",
|
||||||
|
a1 & SHENG64_STATE_MASK);
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(s64a)(b1 - start));
|
||||||
|
*scan_end = b1;
|
||||||
|
*state = a1;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports64(s, cb, ctxt, a1, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ACCEPT_FUNC64(a2)) {
|
||||||
|
u64a match_offset = base_offset + b2 - buf;
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n",
|
||||||
|
a2 & SHENG64_STATE_MASK);
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(s64a)(b2 - start));
|
||||||
|
*scan_end = b2;
|
||||||
|
*state = a2;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports64(s, cb, ctxt, a2, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ACCEPT_FUNC64(a3)) {
|
||||||
|
u64a match_offset = base_offset + b3 - buf;
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n",
|
||||||
|
a3 & SHENG64_STATE_MASK);
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(s64a)(b3 - start));
|
||||||
|
*scan_end = b3;
|
||||||
|
*state = a3;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports64(s, cb, ctxt, a3, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ACCEPT_FUNC64(a4)) {
|
||||||
|
u64a match_offset = base_offset + b4 - buf;
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n",
|
||||||
|
a4 & SHENG64_STATE_MASK);
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(s64a)(b4 - start));
|
||||||
|
*scan_end = b4;
|
||||||
|
*state = a4;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports64(s, cb, ctxt, a4, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (INNER_DEAD_FUNC64(a4)) {
|
||||||
|
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf));
|
||||||
|
*scan_end = end;
|
||||||
|
*state = a4;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (OUTER_DEAD_FUNC64(a4)) {
|
||||||
|
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf));
|
||||||
|
*scan_end = end;
|
||||||
|
*state = a4;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
cur_buf += 4;
|
||||||
|
}
|
||||||
|
*state = movd512(cur_state);
|
||||||
|
*scan_end = cur_buf;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
#endif // !NO_SHENG64_IMPL
|
||||||
|
#endif
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -38,6 +38,17 @@
|
|||||||
#define SHENG_STATE_MASK 0xF
|
#define SHENG_STATE_MASK 0xF
|
||||||
#define SHENG_STATE_FLAG_MASK 0x70
|
#define SHENG_STATE_FLAG_MASK 0x70
|
||||||
|
|
||||||
|
#define SHENG32_STATE_ACCEPT 0x20
|
||||||
|
#define SHENG32_STATE_DEAD 0x40
|
||||||
|
#define SHENG32_STATE_ACCEL 0x80
|
||||||
|
#define SHENG32_STATE_MASK 0x1F
|
||||||
|
#define SHENG32_STATE_FLAG_MASK 0xE0
|
||||||
|
|
||||||
|
#define SHENG64_STATE_ACCEPT 0x40
|
||||||
|
#define SHENG64_STATE_DEAD 0x80
|
||||||
|
#define SHENG64_STATE_MASK 0x3F
|
||||||
|
#define SHENG64_STATE_FLAG_MASK 0xC0
|
||||||
|
|
||||||
#define SHENG_FLAG_SINGLE_REPORT 0x1
|
#define SHENG_FLAG_SINGLE_REPORT 0x1
|
||||||
#define SHENG_FLAG_CAN_DIE 0x2
|
#define SHENG_FLAG_CAN_DIE 0x2
|
||||||
#define SHENG_FLAG_HAS_ACCEL 0x4
|
#define SHENG_FLAG_HAS_ACCEL 0x4
|
||||||
@ -67,4 +78,30 @@ struct sheng {
|
|||||||
ReportID report;
|
ReportID report;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct sheng32 {
|
||||||
|
m512 succ_masks[256];
|
||||||
|
u32 length;
|
||||||
|
u32 aux_offset;
|
||||||
|
u32 report_offset;
|
||||||
|
u32 accel_offset;
|
||||||
|
u8 n_states;
|
||||||
|
u8 anchored;
|
||||||
|
u8 floating;
|
||||||
|
u8 flags;
|
||||||
|
ReportID report;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct sheng64 {
|
||||||
|
m512 succ_masks[256];
|
||||||
|
u32 length;
|
||||||
|
u32 aux_offset;
|
||||||
|
u32 report_offset;
|
||||||
|
u32 accel_offset;
|
||||||
|
u8 n_states;
|
||||||
|
u8 anchored;
|
||||||
|
u8 floating;
|
||||||
|
u8 flags;
|
||||||
|
ReportID report;
|
||||||
|
};
|
||||||
|
|
||||||
#endif /* SHENG_INTERNAL_H_ */
|
#endif /* SHENG_INTERNAL_H_ */
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2017, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -301,6 +301,28 @@ void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) {
|
|||||||
}
|
}
|
||||||
DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
|
DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
void dumpShuffleMask32(const u8 chr, const u8 *buf, unsigned sz) {
|
||||||
|
stringstream o;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sz; i++) {
|
||||||
|
o.width(2);
|
||||||
|
o << (buf[i] & SHENG32_STATE_MASK) << " ";
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
void dumpShuffleMask64(const u8 chr, const u8 *buf, unsigned sz) {
|
||||||
|
stringstream o;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sz; i++) {
|
||||||
|
o.width(2);
|
||||||
|
o << (buf[i] & SHENG64_STATE_MASK) << " ";
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -311,9 +333,16 @@ void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
static
|
static
|
||||||
u8 getShengState(dstate &state, dfa_info &info,
|
u8 getShengState(UNUSED dstate &state, UNUSED dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
u8 getShengState<sheng>(dstate &state, dfa_info &info,
|
||||||
|
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
u8 s = state.impl_id;
|
u8 s = state.impl_id;
|
||||||
if (!state.reports.empty()) {
|
if (!state.reports.empty()) {
|
||||||
s |= SHENG_STATE_ACCEPT;
|
s |= SHENG_STATE_ACCEPT;
|
||||||
@ -327,11 +356,41 @@ u8 getShengState(dstate &state, dfa_info &info,
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
u8 getShengState<sheng32>(dstate &state, dfa_info &info,
|
||||||
|
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
|
u8 s = state.impl_id;
|
||||||
|
if (!state.reports.empty()) {
|
||||||
|
s |= SHENG32_STATE_ACCEPT;
|
||||||
|
}
|
||||||
|
if (info.isDead(state)) {
|
||||||
|
s |= SHENG32_STATE_DEAD;
|
||||||
|
}
|
||||||
|
if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) {
|
||||||
|
s |= SHENG32_STATE_ACCEL;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
u8 getShengState<sheng64>(dstate &state, dfa_info &info,
|
||||||
|
UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
|
u8 s = state.impl_id;
|
||||||
|
if (!state.reports.empty()) {
|
||||||
|
s |= SHENG64_STATE_ACCEPT;
|
||||||
|
}
|
||||||
|
if (info.isDead(state)) {
|
||||||
|
s |= SHENG64_STATE_DEAD;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
static
|
static
|
||||||
void fillAccelAux(struct NFA *n, dfa_info &info,
|
void fillAccelAux(struct NFA *n, dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
DEBUG_PRINTF("Filling accel aux structures\n");
|
DEBUG_PRINTF("Filling accel aux structures\n");
|
||||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
T *s = (T *)getMutableImplNfa(n);
|
||||||
u32 offset = s->accel_offset;
|
u32 offset = s->accel_offset;
|
||||||
|
|
||||||
for (dstate_id_t i = 0; i < info.size(); i++) {
|
for (dstate_id_t i = 0; i < info.size(); i++) {
|
||||||
@ -349,11 +408,21 @@ void fillAccelAux(struct NFA *n, dfa_info &info,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
static
|
static
|
||||||
void populateBasicInfo(struct NFA *n, dfa_info &info,
|
void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accelInfo, u32 aux_offset,
|
UNUSED map<dstate_id_t, AccelScheme> &accelInfo,
|
||||||
u32 report_offset, u32 accel_offset, u32 total_size,
|
UNUSED u32 aux_offset, UNUSED u32 report_offset,
|
||||||
u32 dfa_size) {
|
UNUSED u32 accel_offset, UNUSED u32 total_size,
|
||||||
|
UNUSED u32 dfa_size) {
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info,
|
||||||
|
map<dstate_id_t, AccelScheme> &accelInfo,
|
||||||
|
u32 aux_offset, u32 report_offset,
|
||||||
|
u32 accel_offset, u32 total_size,
|
||||||
|
u32 dfa_size) {
|
||||||
n->length = total_size;
|
n->length = total_size;
|
||||||
n->scratchStateSize = 1;
|
n->scratchStateSize = 1;
|
||||||
n->streamStateSize = 1;
|
n->streamStateSize = 1;
|
||||||
@ -369,14 +438,65 @@ void populateBasicInfo(struct NFA *n, dfa_info &info,
|
|||||||
s->length = dfa_size;
|
s->length = dfa_size;
|
||||||
s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
|
s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
|
||||||
|
|
||||||
s->anchored = getShengState(info.anchored, info, accelInfo);
|
s->anchored = getShengState<sheng>(info.anchored, info, accelInfo);
|
||||||
s->floating = getShengState(info.floating, info, accelInfo);
|
s->floating = getShengState<sheng>(info.floating, info, accelInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info,
|
||||||
|
map<dstate_id_t, AccelScheme> &accelInfo,
|
||||||
|
u32 aux_offset, u32 report_offset,
|
||||||
|
u32 accel_offset, u32 total_size,
|
||||||
|
u32 dfa_size) {
|
||||||
|
n->length = total_size;
|
||||||
|
n->scratchStateSize = 1;
|
||||||
|
n->streamStateSize = 1;
|
||||||
|
n->nPositions = info.size();
|
||||||
|
n->type = SHENG_NFA_32;
|
||||||
|
n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0;
|
||||||
|
|
||||||
|
sheng32 *s = (sheng32 *)getMutableImplNfa(n);
|
||||||
|
s->aux_offset = aux_offset;
|
||||||
|
s->report_offset = report_offset;
|
||||||
|
s->accel_offset = accel_offset;
|
||||||
|
s->n_states = info.size();
|
||||||
|
s->length = dfa_size;
|
||||||
|
s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
|
||||||
|
|
||||||
|
s->anchored = getShengState<sheng32>(info.anchored, info, accelInfo);
|
||||||
|
s->floating = getShengState<sheng32>(info.floating, info, accelInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void populateBasicInfo<sheng64>(struct NFA *n, dfa_info &info,
|
||||||
|
map<dstate_id_t, AccelScheme> &accelInfo,
|
||||||
|
u32 aux_offset, u32 report_offset,
|
||||||
|
u32 accel_offset, u32 total_size,
|
||||||
|
u32 dfa_size) {
|
||||||
|
n->length = total_size;
|
||||||
|
n->scratchStateSize = 1;
|
||||||
|
n->streamStateSize = 1;
|
||||||
|
n->nPositions = info.size();
|
||||||
|
n->type = SHENG_NFA_64;
|
||||||
|
n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0;
|
||||||
|
|
||||||
|
sheng64 *s = (sheng64 *)getMutableImplNfa(n);
|
||||||
|
s->aux_offset = aux_offset;
|
||||||
|
s->report_offset = report_offset;
|
||||||
|
s->accel_offset = accel_offset;
|
||||||
|
s->n_states = info.size();
|
||||||
|
s->length = dfa_size;
|
||||||
|
s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
|
||||||
|
|
||||||
|
s->anchored = getShengState<sheng64>(info.anchored, info, accelInfo);
|
||||||
|
s->floating = getShengState<sheng64>(info.floating, info, accelInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
static
|
static
|
||||||
void fillTops(NFA *n, dfa_info &info, dstate_id_t id,
|
void fillTops(NFA *n, dfa_info &info, dstate_id_t id,
|
||||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
T *s = (T *)getMutableImplNfa(n);
|
||||||
u32 aux_base = s->aux_offset;
|
u32 aux_base = s->aux_offset;
|
||||||
|
|
||||||
DEBUG_PRINTF("Filling tops for state %u\n", id);
|
DEBUG_PRINTF("Filling tops for state %u\n", id);
|
||||||
@ -393,13 +513,14 @@ void fillTops(NFA *n, dfa_info &info, dstate_id_t id,
|
|||||||
|
|
||||||
DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id);
|
DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id);
|
||||||
|
|
||||||
aux->top = getShengState(top_state, info, accelInfo);
|
aux->top = getShengState<T>(top_state, info, accelInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
static
|
static
|
||||||
void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports,
|
void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports,
|
||||||
vector<u32> &reports_eod, vector<u32> &report_offsets) {
|
vector<u32> &reports_eod, vector<u32> &report_offsets) {
|
||||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
T *s = (T *)getMutableImplNfa(n);
|
||||||
u32 aux_base = s->aux_offset;
|
u32 aux_base = s->aux_offset;
|
||||||
auto raw_id = info.raw_id(id);
|
auto raw_id = info.raw_id(id);
|
||||||
|
|
||||||
@ -419,37 +540,163 @@ void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports,
|
|||||||
DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod);
|
DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
static
|
static
|
||||||
void fillSingleReport(NFA *n, ReportID r_id) {
|
void fillSingleReport(NFA *n, ReportID r_id) {
|
||||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
T *s = (T *)getMutableImplNfa(n);
|
||||||
|
|
||||||
DEBUG_PRINTF("Single report ID: %u\n", r_id);
|
DEBUG_PRINTF("Single report ID: %u\n", r_id);
|
||||||
s->report = r_id;
|
s->report = r_id;
|
||||||
s->flags |= SHENG_FLAG_SINGLE_REPORT;
|
s->flags |= SHENG_FLAG_SINGLE_REPORT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
static
|
static
|
||||||
void createShuffleMasks(sheng *s, dfa_info &info,
|
bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info,
|
||||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
UNUSED map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool createShuffleMasks<sheng>(sheng *s, dfa_info &info,
|
||||||
|
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
for (u16 chr = 0; chr < 256; chr++) {
|
for (u16 chr = 0; chr < 256; chr++) {
|
||||||
u8 buf[16] = {0};
|
u8 buf[16] = {0};
|
||||||
|
|
||||||
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||||
auto &succ_state = info.next(idx, chr);
|
auto &succ_state = info.next(idx, chr);
|
||||||
|
|
||||||
buf[idx] = getShengState(succ_state, info, accelInfo);
|
buf[idx] = getShengState<sheng>(succ_state, info, accelInfo);
|
||||||
}
|
}
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
dumpShuffleMask(chr, buf, sizeof(buf));
|
dumpShuffleMask(chr, buf, sizeof(buf));
|
||||||
#endif
|
#endif
|
||||||
memcpy(&s->shuffle_masks[chr], buf, sizeof(m128));
|
memcpy(&s->shuffle_masks[chr], buf, sizeof(m128));
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info,
|
||||||
|
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
|
for (u16 chr = 0; chr < 256; chr++) {
|
||||||
|
u8 buf[64] = {0};
|
||||||
|
|
||||||
|
assert(info.size() <= 32);
|
||||||
|
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||||
|
auto &succ_state = info.next(idx, chr);
|
||||||
|
|
||||||
|
buf[idx] = getShengState<sheng32>(succ_state, info, accelInfo);
|
||||||
|
buf[32 + idx] = buf[idx];
|
||||||
|
}
|
||||||
|
#ifdef DEBUG
|
||||||
|
dumpShuffleMask32(chr, buf, sizeof(buf));
|
||||||
|
#endif
|
||||||
|
memcpy(&s->succ_masks[chr], buf, sizeof(m512));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool createShuffleMasks<sheng64>(sheng64 *s, dfa_info &info,
|
||||||
|
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
|
for (u16 chr = 0; chr < 256; chr++) {
|
||||||
|
u8 buf[64] = {0};
|
||||||
|
|
||||||
|
assert(info.size() <= 64);
|
||||||
|
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||||
|
auto &succ_state = info.next(idx, chr);
|
||||||
|
|
||||||
|
if (accelInfo.find(info.raw_id(succ_state.impl_id))
|
||||||
|
!= accelInfo.end()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
buf[idx] = getShengState<sheng64>(succ_state, info, accelInfo);
|
||||||
|
}
|
||||||
|
#ifdef DEBUG
|
||||||
|
dumpShuffleMask64(chr, buf, sizeof(buf));
|
||||||
|
#endif
|
||||||
|
memcpy(&s->succ_masks[chr], buf, sizeof(m512));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool has_accel_sheng(const NFA *) {
|
bool has_accel_sheng(const NFA *) {
|
||||||
return true; /* consider the sheng region as accelerated */
|
return true; /* consider the sheng region as accelerated */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static
|
||||||
|
bytecode_ptr<NFA> shengCompile_int(raw_dfa &raw, const CompileContext &cc,
|
||||||
|
set<dstate_id_t> *accel_states,
|
||||||
|
sheng_build_strat &strat,
|
||||||
|
dfa_info &info) {
|
||||||
|
if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
|
||||||
|
* mode with our semantics */
|
||||||
|
raw.stripExtraEodReports();
|
||||||
|
}
|
||||||
|
auto accelInfo = strat.getAccelInfo(cc.grey);
|
||||||
|
|
||||||
|
// set impl_id of each dfa state
|
||||||
|
for (dstate_id_t i = 0; i < info.size(); i++) {
|
||||||
|
info[i].impl_id = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n",
|
||||||
|
info.anchored.impl_id, info.floating.impl_id);
|
||||||
|
|
||||||
|
u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(T));
|
||||||
|
vector<u32> reports, eod_reports, report_offsets;
|
||||||
|
u8 isSingle = 0;
|
||||||
|
ReportID single_report = 0;
|
||||||
|
|
||||||
|
auto ri =
|
||||||
|
strat.gatherReports(reports, eod_reports, &isSingle, &single_report);
|
||||||
|
|
||||||
|
u32 total_aux = sizeof(sstate_aux) * info.size();
|
||||||
|
u32 total_accel = strat.accelSize() * accelInfo.size();
|
||||||
|
u32 total_reports = ri->getReportListSize();
|
||||||
|
|
||||||
|
u32 reports_offset = nfa_size + total_aux;
|
||||||
|
u32 accel_offset =
|
||||||
|
ROUNDUP_N(reports_offset + total_reports, alignof(AccelAux));
|
||||||
|
u32 total_size = ROUNDUP_N(accel_offset + total_accel, 64);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n",
|
||||||
|
nfa_size, total_aux, total_reports, total_accel, total_size);
|
||||||
|
|
||||||
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
|
|
||||||
|
populateBasicInfo<T>(nfa.get(), info, accelInfo, nfa_size,
|
||||||
|
reports_offset, accel_offset, total_size,
|
||||||
|
total_size - sizeof(NFA));
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Setting up aux and report structures\n");
|
||||||
|
|
||||||
|
ri->fillReportLists(nfa.get(), reports_offset, report_offsets);
|
||||||
|
|
||||||
|
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||||
|
fillTops<T>(nfa.get(), info, idx, accelInfo);
|
||||||
|
fillAux<T>(nfa.get(), info, idx, reports, eod_reports,
|
||||||
|
report_offsets);
|
||||||
|
}
|
||||||
|
if (isSingle) {
|
||||||
|
fillSingleReport<T>(nfa.get(), single_report);
|
||||||
|
}
|
||||||
|
|
||||||
|
fillAccelAux<T>(nfa.get(), info, accelInfo);
|
||||||
|
|
||||||
|
if (accel_states) {
|
||||||
|
fillAccelOut(accelInfo, accel_states);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!createShuffleMasks<T>((T *)getMutableImplNfa(nfa.get()), info, accelInfo)) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
return nfa;
|
||||||
|
}
|
||||||
|
|
||||||
bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
|
bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||||
const ReportManager &rm, bool only_accel_init,
|
const ReportManager &rm, bool only_accel_init,
|
||||||
set<dstate_id_t> *accel_states) {
|
set<dstate_id_t> *accel_states) {
|
||||||
@ -473,65 +720,75 @@ bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
|
return shengCompile_int<sheng>(raw, cc, accel_states, strat, info);
|
||||||
* mode with our semantics */
|
}
|
||||||
raw.stripExtraEodReports();
|
|
||||||
}
|
|
||||||
auto accelInfo = strat.getAccelInfo(cc.grey);
|
|
||||||
|
|
||||||
// set impl_id of each dfa state
|
bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
|
||||||
for (dstate_id_t i = 0; i < info.size(); i++) {
|
const ReportManager &rm, bool only_accel_init,
|
||||||
info[i].impl_id = i;
|
set<dstate_id_t> *accel_states) {
|
||||||
|
if (!cc.grey.allowSheng) {
|
||||||
|
DEBUG_PRINTF("Sheng is not allowed!\n");
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n",
|
if (!cc.target_info.has_avx512vbmi()) {
|
||||||
info.anchored.impl_id, info.floating.impl_id);
|
DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
||||||
|
return nullptr;
|
||||||
u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(sheng));
|
|
||||||
vector<u32> reports, eod_reports, report_offsets;
|
|
||||||
u8 isSingle = 0;
|
|
||||||
ReportID single_report = 0;
|
|
||||||
|
|
||||||
auto ri =
|
|
||||||
strat.gatherReports(reports, eod_reports, &isSingle, &single_report);
|
|
||||||
|
|
||||||
u32 total_aux = sizeof(sstate_aux) * info.size();
|
|
||||||
u32 total_accel = strat.accelSize() * accelInfo.size();
|
|
||||||
u32 total_reports = ri->getReportListSize();
|
|
||||||
|
|
||||||
u32 reports_offset = nfa_size + total_aux;
|
|
||||||
u32 accel_offset =
|
|
||||||
ROUNDUP_N(reports_offset + total_reports, alignof(AccelAux));
|
|
||||||
u32 total_size = ROUNDUP_N(accel_offset + total_accel, 64);
|
|
||||||
|
|
||||||
DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n",
|
|
||||||
nfa_size, total_aux, total_reports, total_accel, total_size);
|
|
||||||
|
|
||||||
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
|
||||||
|
|
||||||
populateBasicInfo(nfa.get(), info, accelInfo, nfa_size, reports_offset,
|
|
||||||
accel_offset, total_size, total_size - sizeof(NFA));
|
|
||||||
|
|
||||||
DEBUG_PRINTF("Setting up aux and report structures\n");
|
|
||||||
|
|
||||||
ri->fillReportLists(nfa.get(), reports_offset, report_offsets);
|
|
||||||
|
|
||||||
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
|
||||||
fillTops(nfa.get(), info, idx, accelInfo);
|
|
||||||
fillAux(nfa.get(), info, idx, reports, eod_reports, report_offsets);
|
|
||||||
}
|
|
||||||
if (isSingle) {
|
|
||||||
fillSingleReport(nfa.get(), single_report);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fillAccelAux(nfa.get(), info, accelInfo);
|
sheng_build_strat strat(raw, rm, only_accel_init);
|
||||||
|
dfa_info info(strat);
|
||||||
|
|
||||||
if (accel_states) {
|
DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
|
||||||
fillAccelOut(accelInfo, accel_states);
|
|
||||||
|
DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
|
||||||
|
raw.start_anchored, raw.start_floating);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
|
||||||
|
info.can_die ? "can" : "cannot", info.size());
|
||||||
|
assert(info.size() > 16);
|
||||||
|
if (info.size() > 32) {
|
||||||
|
DEBUG_PRINTF("Too many states\n");
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
createShuffleMasks((sheng *)getMutableImplNfa(nfa.get()), info, accelInfo);
|
return shengCompile_int<sheng32>(raw, cc, accel_states, strat, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
|
||||||
|
const ReportManager &rm, bool only_accel_init,
|
||||||
|
set<dstate_id_t> *accel_states) {
|
||||||
|
if (!cc.grey.allowSheng) {
|
||||||
|
DEBUG_PRINTF("Sheng is not allowed!\n");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!cc.target_info.has_avx512vbmi()) {
|
||||||
|
DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
sheng_build_strat strat(raw, rm, only_accel_init);
|
||||||
|
dfa_info info(strat);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
|
||||||
|
raw.start_anchored, raw.start_floating);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
|
||||||
|
info.can_die ? "can" : "cannot", info.size());
|
||||||
|
assert(info.size() > 32);
|
||||||
|
if (info.size() > 64) {
|
||||||
|
DEBUG_PRINTF("Too many states\n");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
vector<dstate> old_states;
|
||||||
|
old_states = info.states;
|
||||||
|
auto nfa = shengCompile_int<sheng64>(raw, cc, accel_states, strat, info);
|
||||||
|
if (!nfa) {
|
||||||
|
info.states = old_states;
|
||||||
|
}
|
||||||
return nfa;
|
return nfa;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2018, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -71,6 +71,14 @@ bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
const ReportManager &rm, bool only_accel_init,
|
const ReportManager &rm, bool only_accel_init,
|
||||||
std::set<dstate_id_t> *accel_states = nullptr);
|
std::set<dstate_id_t> *accel_states = nullptr);
|
||||||
|
|
||||||
|
bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc,
|
||||||
|
const ReportManager &rm, bool only_accel_init,
|
||||||
|
std::set<dstate_id_t> *accel_states = nullptr);
|
||||||
|
|
||||||
|
bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc,
|
||||||
|
const ReportManager &rm, bool only_accel_init,
|
||||||
|
std::set<dstate_id_t> *accel_states = nullptr);
|
||||||
|
|
||||||
struct sheng_escape_info {
|
struct sheng_escape_info {
|
||||||
CharReach outs;
|
CharReach outs;
|
||||||
CharReach outs2_single;
|
CharReach outs2_single;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2017, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -51,7 +51,7 @@ namespace ue2 {
|
|||||||
|
|
||||||
static
|
static
|
||||||
const sstate_aux *get_aux(const NFA *n, dstate_id_t i) {
|
const sstate_aux *get_aux(const NFA *n, dstate_id_t i) {
|
||||||
assert(n && isShengType(n->type));
|
assert(n && isSheng16Type(n->type));
|
||||||
|
|
||||||
const sheng *s = (const sheng *)getImplNfa(n);
|
const sheng *s = (const sheng *)getImplNfa(n);
|
||||||
const sstate_aux *aux_base =
|
const sstate_aux *aux_base =
|
||||||
@ -64,6 +64,36 @@ const sstate_aux *get_aux(const NFA *n, dstate_id_t i) {
|
|||||||
return aux;
|
return aux;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
const sstate_aux *get_aux32(const NFA *n, dstate_id_t i) {
|
||||||
|
assert(n && isSheng32Type(n->type));
|
||||||
|
|
||||||
|
const sheng32 *s = (const sheng32 *)getImplNfa(n);
|
||||||
|
const sstate_aux *aux_base =
|
||||||
|
(const sstate_aux *)((const char *)n + s->aux_offset);
|
||||||
|
|
||||||
|
const sstate_aux *aux = aux_base + i;
|
||||||
|
|
||||||
|
assert((const char *)aux < (const char *)s + s->length);
|
||||||
|
|
||||||
|
return aux;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
const sstate_aux *get_aux64(const NFA *n, dstate_id_t i) {
|
||||||
|
assert(n && isSheng64Type(n->type));
|
||||||
|
|
||||||
|
const sheng64 *s = (const sheng64 *)getImplNfa(n);
|
||||||
|
const sstate_aux *aux_base =
|
||||||
|
(const sstate_aux *)((const char *)n + s->aux_offset);
|
||||||
|
|
||||||
|
const sstate_aux *aux = aux_base + i;
|
||||||
|
|
||||||
|
assert((const char *)aux < (const char *)s + s->length);
|
||||||
|
|
||||||
|
return aux;
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void dumpHeader(FILE *f, const sheng *s) {
|
void dumpHeader(FILE *f, const sheng *s) {
|
||||||
fprintf(f, "number of states: %u, DFA engine size: %u\n", s->n_states,
|
fprintf(f, "number of states: %u, DFA engine size: %u\n", s->n_states,
|
||||||
@ -79,6 +109,36 @@ void dumpHeader(FILE *f, const sheng *s) {
|
|||||||
!!(s->flags & SHENG_FLAG_SINGLE_REPORT));
|
!!(s->flags & SHENG_FLAG_SINGLE_REPORT));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpHeader32(FILE *f, const sheng32 *s) {
|
||||||
|
fprintf(f, "number of states: %u, DFA engine size: %u\n", s->n_states,
|
||||||
|
s->length);
|
||||||
|
fprintf(f, "aux base offset: %u, reports base offset: %u, "
|
||||||
|
"accel offset: %u\n",
|
||||||
|
s->aux_offset, s->report_offset, s->accel_offset);
|
||||||
|
fprintf(f, "anchored start state: %u, floating start state: %u\n",
|
||||||
|
s->anchored & SHENG32_STATE_MASK, s->floating & SHENG32_STATE_MASK);
|
||||||
|
fprintf(f, "has accel: %u can die: %u single report: %u\n",
|
||||||
|
!!(s->flags & SHENG_FLAG_HAS_ACCEL),
|
||||||
|
!!(s->flags & SHENG_FLAG_CAN_DIE),
|
||||||
|
!!(s->flags & SHENG_FLAG_SINGLE_REPORT));
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpHeader64(FILE *f, const sheng64 *s) {
|
||||||
|
fprintf(f, "number of states: %u, DFA engine size: %u\n", s->n_states,
|
||||||
|
s->length);
|
||||||
|
fprintf(f, "aux base offset: %u, reports base offset: %u, "
|
||||||
|
"accel offset: %u\n",
|
||||||
|
s->aux_offset, s->report_offset, s->accel_offset);
|
||||||
|
fprintf(f, "anchored start state: %u, floating start state: %u\n",
|
||||||
|
s->anchored & SHENG64_STATE_MASK, s->floating & SHENG64_STATE_MASK);
|
||||||
|
fprintf(f, "has accel: %u can die: %u single report: %u\n",
|
||||||
|
!!(s->flags & SHENG_FLAG_HAS_ACCEL),
|
||||||
|
!!(s->flags & SHENG_FLAG_CAN_DIE),
|
||||||
|
!!(s->flags & SHENG_FLAG_SINGLE_REPORT));
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void dumpAux(FILE *f, u32 state, const sstate_aux *aux) {
|
void dumpAux(FILE *f, u32 state, const sstate_aux *aux) {
|
||||||
fprintf(f, "state id: %u, reports offset: %u, EOD reports offset: %u, "
|
fprintf(f, "state id: %u, reports offset: %u, EOD reports offset: %u, "
|
||||||
@ -87,6 +147,22 @@ void dumpAux(FILE *f, u32 state, const sstate_aux *aux) {
|
|||||||
aux->top & SHENG_STATE_MASK);
|
aux->top & SHENG_STATE_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpAux32(FILE *f, u32 state, const sstate_aux *aux) {
|
||||||
|
fprintf(f, "state id: %u, reports offset: %u, EOD reports offset: %u, "
|
||||||
|
"accel offset: %u, top: %u\n",
|
||||||
|
state, aux->accept, aux->accept_eod, aux->accel,
|
||||||
|
aux->top & SHENG32_STATE_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpAux64(FILE *f, u32 state, const sstate_aux *aux) {
|
||||||
|
fprintf(f, "state id: %u, reports offset: %u, EOD reports offset: %u, "
|
||||||
|
"accel offset: %u, top: %u\n",
|
||||||
|
state, aux->accept, aux->accept_eod, aux->accel,
|
||||||
|
aux->top & SHENG64_STATE_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void dumpReports(FILE *f, const report_list *rl) {
|
void dumpReports(FILE *f, const report_list *rl) {
|
||||||
fprintf(f, "reports count: %u\n", rl->count);
|
fprintf(f, "reports count: %u\n", rl->count);
|
||||||
@ -115,6 +191,46 @@ void dumpMasks(FILE *f, const sheng *s) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpMasks32(FILE *f, const sheng32 *s) {
|
||||||
|
for (u32 chr = 0; chr < 256; chr++) {
|
||||||
|
u8 buf[64];
|
||||||
|
m512 succ_mask = s->succ_masks[chr];
|
||||||
|
memcpy(buf, &succ_mask, sizeof(m512));
|
||||||
|
|
||||||
|
fprintf(f, "%3u: ", chr);
|
||||||
|
for (u32 pos = 0; pos < 64; pos++) {
|
||||||
|
u8 c = buf[pos];
|
||||||
|
if (c & SHENG32_STATE_FLAG_MASK) {
|
||||||
|
fprintf(f, "%2u* ", c & SHENG32_STATE_MASK);
|
||||||
|
} else {
|
||||||
|
fprintf(f, "%2u ", c & SHENG32_STATE_MASK);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(f, "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpMasks64(FILE *f, const sheng64 *s) {
|
||||||
|
for (u32 chr = 0; chr < 256; chr++) {
|
||||||
|
u8 buf[64];
|
||||||
|
m512 succ_mask = s->succ_masks[chr];
|
||||||
|
memcpy(buf, &succ_mask, sizeof(m512));
|
||||||
|
|
||||||
|
fprintf(f, "%3u: ", chr);
|
||||||
|
for (u32 pos = 0; pos < 64; pos++) {
|
||||||
|
u8 c = buf[pos];
|
||||||
|
if (c & SHENG64_STATE_FLAG_MASK) {
|
||||||
|
fprintf(f, "%2u* ", c & SHENG64_STATE_MASK);
|
||||||
|
} else {
|
||||||
|
fprintf(f, "%2u ", c & SHENG64_STATE_MASK);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(f, "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void nfaExecSheng_dumpText(const NFA *nfa, FILE *f) {
|
void nfaExecSheng_dumpText(const NFA *nfa, FILE *f) {
|
||||||
assert(nfa->type == SHENG_NFA);
|
assert(nfa->type == SHENG_NFA);
|
||||||
@ -153,6 +269,82 @@ void nfaExecSheng_dumpText(const NFA *nfa, FILE *f) {
|
|||||||
fprintf(f, "\n");
|
fprintf(f, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void nfaExecSheng32_dumpText(const NFA *nfa, FILE *f) {
|
||||||
|
assert(nfa->type == SHENG_NFA_32);
|
||||||
|
const sheng32 *s = (const sheng32 *)getImplNfa(nfa);
|
||||||
|
|
||||||
|
fprintf(f, "sheng32 DFA\n");
|
||||||
|
dumpHeader32(f, s);
|
||||||
|
|
||||||
|
for (u32 state = 0; state < s->n_states; state++) {
|
||||||
|
const sstate_aux *aux = get_aux32(nfa, state);
|
||||||
|
dumpAux32(f, state, aux);
|
||||||
|
if (aux->accept) {
|
||||||
|
fprintf(f, "report list:\n");
|
||||||
|
const report_list *rl =
|
||||||
|
(const report_list *)((const char *)nfa + aux->accept);
|
||||||
|
dumpReports(f, rl);
|
||||||
|
}
|
||||||
|
if (aux->accept_eod) {
|
||||||
|
fprintf(f, "EOD report list:\n");
|
||||||
|
const report_list *rl =
|
||||||
|
(const report_list *)((const char *)nfa + aux->accept_eod);
|
||||||
|
dumpReports(f, rl);
|
||||||
|
}
|
||||||
|
if (aux->accel) {
|
||||||
|
fprintf(f, "accel:\n");
|
||||||
|
const AccelAux *accel =
|
||||||
|
(const AccelAux *)((const char *)nfa + aux->accel);
|
||||||
|
dumpAccelInfo(f, *accel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "\n");
|
||||||
|
|
||||||
|
dumpMasks32(f, s);
|
||||||
|
|
||||||
|
fprintf(f, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void nfaExecSheng64_dumpText(const NFA *nfa, FILE *f) {
|
||||||
|
assert(nfa->type == SHENG_NFA_64);
|
||||||
|
const sheng64 *s = (const sheng64 *)getImplNfa(nfa);
|
||||||
|
|
||||||
|
fprintf(f, "sheng64 DFA\n");
|
||||||
|
dumpHeader64(f, s);
|
||||||
|
|
||||||
|
for (u32 state = 0; state < s->n_states; state++) {
|
||||||
|
const sstate_aux *aux = get_aux64(nfa, state);
|
||||||
|
dumpAux64(f, state, aux);
|
||||||
|
if (aux->accept) {
|
||||||
|
fprintf(f, "report list:\n");
|
||||||
|
const report_list *rl =
|
||||||
|
(const report_list *)((const char *)nfa + aux->accept);
|
||||||
|
dumpReports(f, rl);
|
||||||
|
}
|
||||||
|
if (aux->accept_eod) {
|
||||||
|
fprintf(f, "EOD report list:\n");
|
||||||
|
const report_list *rl =
|
||||||
|
(const report_list *)((const char *)nfa + aux->accept_eod);
|
||||||
|
dumpReports(f, rl);
|
||||||
|
}
|
||||||
|
if (aux->accel) {
|
||||||
|
fprintf(f, "accel:\n");
|
||||||
|
const AccelAux *accel =
|
||||||
|
(const AccelAux *)((const char *)nfa + aux->accel);
|
||||||
|
dumpAccelInfo(f, *accel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "\n");
|
||||||
|
|
||||||
|
dumpMasks64(f, s);
|
||||||
|
|
||||||
|
fprintf(f, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void dumpDotPreambleDfa(FILE *f) {
|
void dumpDotPreambleDfa(FILE *f) {
|
||||||
dumpDotPreamble(f);
|
dumpDotPreamble(f);
|
||||||
@ -163,8 +355,14 @@ void dumpDotPreambleDfa(FILE *f) {
|
|||||||
fprintf(f, "0 [style=invis];\n");
|
fprintf(f, "0 [style=invis];\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
static
|
static
|
||||||
void describeNode(const NFA *n, const sheng *s, u16 i, FILE *f) {
|
void describeNode(UNUSED const NFA *n, UNUSED const T *s, UNUSED u16 i,
|
||||||
|
UNUSED FILE *f) {
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void describeNode<sheng>(const NFA *n, const sheng *s, u16 i, FILE *f) {
|
||||||
const sstate_aux *aux = get_aux(n, i);
|
const sstate_aux *aux = get_aux(n, i);
|
||||||
|
|
||||||
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
|
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
|
||||||
@ -193,6 +391,66 @@ void describeNode(const NFA *n, const sheng *s, u16 i, FILE *f) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void describeNode<sheng32>(const NFA *n, const sheng32 *s, u16 i, FILE *f) {
|
||||||
|
const sstate_aux *aux = get_aux32(n, i);
|
||||||
|
|
||||||
|
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
|
||||||
|
"label = \"%u\" ]; \n",
|
||||||
|
i, i);
|
||||||
|
|
||||||
|
if (aux->accept_eod) {
|
||||||
|
fprintf(f, "%u [ color = darkorchid ];\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->accept) {
|
||||||
|
fprintf(f, "%u [ shape = doublecircle ];\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->top && (aux->top & SHENG32_STATE_MASK) != i) {
|
||||||
|
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
|
||||||
|
aux->top & SHENG32_STATE_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == (s->anchored & SHENG32_STATE_MASK)) {
|
||||||
|
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == (s->floating & SHENG32_STATE_MASK)) {
|
||||||
|
fprintf(f, "STARTF -> %u [color = red ]\n", i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void describeNode<sheng64>(const NFA *n, const sheng64 *s, u16 i, FILE *f) {
|
||||||
|
const sstate_aux *aux = get_aux64(n, i);
|
||||||
|
|
||||||
|
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
|
||||||
|
"label = \"%u\" ]; \n",
|
||||||
|
i, i);
|
||||||
|
|
||||||
|
if (aux->accept_eod) {
|
||||||
|
fprintf(f, "%u [ color = darkorchid ];\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->accept) {
|
||||||
|
fprintf(f, "%u [ shape = doublecircle ];\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->top && (aux->top & SHENG64_STATE_MASK) != i) {
|
||||||
|
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
|
||||||
|
aux->top & SHENG64_STATE_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == (s->anchored & SHENG64_STATE_MASK)) {
|
||||||
|
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == (s->floating & SHENG64_STATE_MASK)) {
|
||||||
|
fprintf(f, "STARTF -> %u [color = red ]\n", i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void describeEdge(FILE *f, const u16 *t, u16 i) {
|
void describeEdge(FILE *f, const u16 *t, u16 i) {
|
||||||
for (u16 s = 0; s < N_CHARS; s++) {
|
for (u16 s = 0; s < N_CHARS; s++) {
|
||||||
@ -228,7 +486,7 @@ void describeEdge(FILE *f, const u16 *t, u16 i) {
|
|||||||
|
|
||||||
static
|
static
|
||||||
void shengGetTransitions(const NFA *n, u16 state, u16 *t) {
|
void shengGetTransitions(const NFA *n, u16 state, u16 *t) {
|
||||||
assert(isShengType(n->type));
|
assert(isSheng16Type(n->type));
|
||||||
const sheng *s = (const sheng *)getImplNfa(n);
|
const sheng *s = (const sheng *)getImplNfa(n);
|
||||||
const sstate_aux *aux = get_aux(n, state);
|
const sstate_aux *aux = get_aux(n, state);
|
||||||
|
|
||||||
@ -244,6 +502,42 @@ void shengGetTransitions(const NFA *n, u16 state, u16 *t) {
|
|||||||
t[TOP] = aux->top & SHENG_STATE_MASK;
|
t[TOP] = aux->top & SHENG_STATE_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void sheng32GetTransitions(const NFA *n, u16 state, u16 *t) {
|
||||||
|
assert(isSheng32Type(n->type));
|
||||||
|
const sheng32 *s = (const sheng32 *)getImplNfa(n);
|
||||||
|
const sstate_aux *aux = get_aux32(n, state);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < N_CHARS; i++) {
|
||||||
|
u8 buf[64];
|
||||||
|
m512 succ_mask = s->succ_masks[i];
|
||||||
|
|
||||||
|
memcpy(buf, &succ_mask, sizeof(m512));
|
||||||
|
|
||||||
|
t[i] = buf[state] & SHENG32_STATE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
t[TOP] = aux->top & SHENG32_STATE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void sheng64GetTransitions(const NFA *n, u16 state, u16 *t) {
|
||||||
|
assert(isSheng64Type(n->type));
|
||||||
|
const sheng64 *s = (const sheng64 *)getImplNfa(n);
|
||||||
|
const sstate_aux *aux = get_aux64(n, state);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < N_CHARS; i++) {
|
||||||
|
u8 buf[64];
|
||||||
|
m512 succ_mask = s->succ_masks[i];
|
||||||
|
|
||||||
|
memcpy(buf, &succ_mask, sizeof(m512));
|
||||||
|
|
||||||
|
t[i] = buf[state] & SHENG64_STATE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
t[TOP] = aux->top & SHENG64_STATE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void nfaExecSheng_dumpDot(const NFA *nfa, FILE *f) {
|
void nfaExecSheng_dumpDot(const NFA *nfa, FILE *f) {
|
||||||
assert(nfa->type == SHENG_NFA);
|
assert(nfa->type == SHENG_NFA);
|
||||||
@ -252,7 +546,7 @@ void nfaExecSheng_dumpDot(const NFA *nfa, FILE *f) {
|
|||||||
dumpDotPreambleDfa(f);
|
dumpDotPreambleDfa(f);
|
||||||
|
|
||||||
for (u16 i = 1; i < s->n_states; i++) {
|
for (u16 i = 1; i < s->n_states; i++) {
|
||||||
describeNode(nfa, s, i, f);
|
describeNode<sheng>(nfa, s, i, f);
|
||||||
|
|
||||||
u16 t[ALPHABET_SIZE];
|
u16 t[ALPHABET_SIZE];
|
||||||
|
|
||||||
@ -264,10 +558,62 @@ void nfaExecSheng_dumpDot(const NFA *nfa, FILE *f) {
|
|||||||
fprintf(f, "}\n");
|
fprintf(f, "}\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void nfaExecSheng32_dumpDot(const NFA *nfa, FILE *f) {
|
||||||
|
assert(nfa->type == SHENG_NFA_32);
|
||||||
|
const sheng32 *s = (const sheng32 *)getImplNfa(nfa);
|
||||||
|
|
||||||
|
dumpDotPreambleDfa(f);
|
||||||
|
|
||||||
|
for (u16 i = 1; i < s->n_states; i++) {
|
||||||
|
describeNode<sheng32>(nfa, s, i, f);
|
||||||
|
|
||||||
|
u16 t[ALPHABET_SIZE];
|
||||||
|
|
||||||
|
sheng32GetTransitions(nfa, i, t);
|
||||||
|
|
||||||
|
describeEdge(f, t, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "}\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void nfaExecSheng64_dumpDot(const NFA *nfa, FILE *f) {
|
||||||
|
assert(nfa->type == SHENG_NFA_64);
|
||||||
|
const sheng64 *s = (const sheng64 *)getImplNfa(nfa);
|
||||||
|
|
||||||
|
dumpDotPreambleDfa(f);
|
||||||
|
|
||||||
|
for (u16 i = 1; i < s->n_states; i++) {
|
||||||
|
describeNode<sheng64>(nfa, s, i, f);
|
||||||
|
|
||||||
|
u16 t[ALPHABET_SIZE];
|
||||||
|
|
||||||
|
sheng64GetTransitions(nfa, i, t);
|
||||||
|
|
||||||
|
describeEdge(f, t, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "}\n");
|
||||||
|
}
|
||||||
|
|
||||||
void nfaExecSheng_dump(const NFA *nfa, const string &base) {
|
void nfaExecSheng_dump(const NFA *nfa, const string &base) {
|
||||||
assert(nfa->type == SHENG_NFA);
|
assert(nfa->type == SHENG_NFA);
|
||||||
nfaExecSheng_dumpText(nfa, StdioFile(base + ".txt", "w"));
|
nfaExecSheng_dumpText(nfa, StdioFile(base + ".txt", "w"));
|
||||||
nfaExecSheng_dumpDot(nfa, StdioFile(base + ".dot", "w"));
|
nfaExecSheng_dumpDot(nfa, StdioFile(base + ".dot", "w"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void nfaExecSheng32_dump(UNUSED const NFA *nfa, UNUSED const string &base) {
|
||||||
|
assert(nfa->type == SHENG_NFA_32);
|
||||||
|
nfaExecSheng32_dumpText(nfa, StdioFile(base + ".txt", "w"));
|
||||||
|
nfaExecSheng32_dumpDot(nfa, StdioFile(base + ".dot", "w"));
|
||||||
|
}
|
||||||
|
|
||||||
|
void nfaExecSheng64_dump(UNUSED const NFA *nfa, UNUSED const string &base) {
|
||||||
|
assert(nfa->type == SHENG_NFA_64);
|
||||||
|
nfaExecSheng64_dumpText(nfa, StdioFile(base + ".txt", "w"));
|
||||||
|
nfaExecSheng64_dumpDot(nfa, StdioFile(base + ".dot", "w"));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -38,6 +38,8 @@ struct NFA;
|
|||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
void nfaExecSheng_dump(const struct NFA *nfa, const std::string &base);
|
void nfaExecSheng_dump(const struct NFA *nfa, const std::string &base);
|
||||||
|
void nfaExecSheng32_dump(const struct NFA *nfa, const std::string &base);
|
||||||
|
void nfaExecSheng64_dump(const struct NFA *nfa, const std::string &base);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -632,8 +632,8 @@ bytecode_ptr<NFA>
|
|||||||
constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
||||||
const map<u32, u32> &fixed_depth_tops,
|
const map<u32, u32> &fixed_depth_tops,
|
||||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||||
bool compress_state, bool do_accel, bool impl_test_only, u32 hint,
|
bool compress_state, bool do_accel, bool impl_test_only,
|
||||||
const CompileContext &cc) {
|
bool &fast, u32 hint, const CompileContext &cc) {
|
||||||
if (!has_managed_reports(h_in)) {
|
if (!has_managed_reports(h_in)) {
|
||||||
rm = nullptr;
|
rm = nullptr;
|
||||||
} else {
|
} else {
|
||||||
@ -684,19 +684,19 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
|||||||
}
|
}
|
||||||
|
|
||||||
return generate(*h, state_ids, repeats, reportSquashMap, squashMap, tops,
|
return generate(*h, state_ids, repeats, reportSquashMap, squashMap, tops,
|
||||||
zombies, do_accel, compress_state, hint, cc);
|
zombies, do_accel, compress_state, fast, hint, cc);
|
||||||
}
|
}
|
||||||
|
|
||||||
bytecode_ptr<NFA>
|
bytecode_ptr<NFA>
|
||||||
constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
||||||
const map<u32, u32> &fixed_depth_tops,
|
const map<u32, u32> &fixed_depth_tops,
|
||||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||||
bool compress_state, const CompileContext &cc) {
|
bool compress_state, bool &fast, const CompileContext &cc) {
|
||||||
const u32 hint = INVALID_NFA;
|
const u32 hint = INVALID_NFA;
|
||||||
const bool do_accel = cc.grey.accelerateNFA;
|
const bool do_accel = cc.grey.accelerateNFA;
|
||||||
const bool impl_test_only = false;
|
const bool impl_test_only = false;
|
||||||
return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state,
|
return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state,
|
||||||
do_accel, impl_test_only, hint, cc);
|
do_accel, impl_test_only, fast, hint, cc);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef RELEASE_BUILD
|
#ifndef RELEASE_BUILD
|
||||||
@ -705,11 +705,11 @@ bytecode_ptr<NFA>
|
|||||||
constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
||||||
const map<u32, u32> &fixed_depth_tops,
|
const map<u32, u32> &fixed_depth_tops,
|
||||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||||
bool compress_state, u32 hint, const CompileContext &cc) {
|
bool compress_state, bool &fast, u32 hint, const CompileContext &cc) {
|
||||||
const bool do_accel = cc.grey.accelerateNFA;
|
const bool do_accel = cc.grey.accelerateNFA;
|
||||||
const bool impl_test_only = false;
|
const bool impl_test_only = false;
|
||||||
return constructNFA(h_in, rm, fixed_depth_tops, triggers,
|
return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state,
|
||||||
compress_state, do_accel, impl_test_only, hint, cc);
|
do_accel, impl_test_only, fast, hint, cc);
|
||||||
}
|
}
|
||||||
#endif // RELEASE_BUILD
|
#endif // RELEASE_BUILD
|
||||||
|
|
||||||
@ -739,9 +739,10 @@ bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint,
|
|||||||
vector<BoundedRepeatData> repeats;
|
vector<BoundedRepeatData> repeats;
|
||||||
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
|
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||||
unordered_map<NFAVertex, NFAStateSet> squashMap;
|
unordered_map<NFAVertex, NFAStateSet> squashMap;
|
||||||
|
UNUSED bool fast = false;
|
||||||
|
|
||||||
return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops,
|
return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops,
|
||||||
zombies, false, false, hint, cc);
|
zombies, false, false, fast, hint, cc);
|
||||||
}
|
}
|
||||||
|
|
||||||
bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in,
|
bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -100,7 +100,7 @@ bytecode_ptr<NFA>
|
|||||||
constructNFA(const NGHolder &g, const ReportManager *rm,
|
constructNFA(const NGHolder &g, const ReportManager *rm,
|
||||||
const std::map<u32, u32> &fixed_depth_tops,
|
const std::map<u32, u32> &fixed_depth_tops,
|
||||||
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
|
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
|
||||||
bool compress_state, const CompileContext &cc);
|
bool compress_state, bool &fast, const CompileContext &cc);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Build a reverse NFA from the graph given, which should have already
|
* \brief Build a reverse NFA from the graph given, which should have already
|
||||||
@ -129,7 +129,7 @@ bytecode_ptr<NFA>
|
|||||||
constructNFA(const NGHolder &g, const ReportManager *rm,
|
constructNFA(const NGHolder &g, const ReportManager *rm,
|
||||||
const std::map<u32, u32> &fixed_depth_tops,
|
const std::map<u32, u32> &fixed_depth_tops,
|
||||||
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
|
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
|
||||||
bool compress_state, u32 hint, const CompileContext &cc);
|
bool compress_state, bool &fast, u32 hint, const CompileContext &cc);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Build a reverse NFA (with model type hint) from the graph given,
|
* \brief Build a reverse NFA (with model type hint) from the graph given,
|
||||||
|
@ -69,14 +69,14 @@ struct LitGraphVertexProps {
|
|||||||
LitGraphVertexProps() = default;
|
LitGraphVertexProps() = default;
|
||||||
explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {}
|
explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {}
|
||||||
ue2_literal::elem c; // string element (char + bool)
|
ue2_literal::elem c; // string element (char + bool)
|
||||||
size_t index; // managed by ue2_graph
|
size_t index = 0; // managed by ue2_graph
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LitGraphEdgeProps {
|
struct LitGraphEdgeProps {
|
||||||
LitGraphEdgeProps() = default;
|
LitGraphEdgeProps() = default;
|
||||||
explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {}
|
explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {}
|
||||||
u64a score = NO_LITERAL_AT_EDGE_SCORE;
|
u64a score = NO_LITERAL_AT_EDGE_SCORE;
|
||||||
size_t index; // managed by ue2_graph
|
size_t index = 0; // managed by ue2_graph
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LitGraph
|
struct LitGraph
|
||||||
|
@ -2446,6 +2446,10 @@ static
|
|||||||
bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) {
|
bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) {
|
||||||
ue2_literal lit;
|
ue2_literal lit;
|
||||||
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
||||||
|
if (!rhs) {
|
||||||
|
assert(0);
|
||||||
|
throw std::bad_alloc();
|
||||||
|
}
|
||||||
if (!ng.cc.grey.allowLitHaig) {
|
if (!ng.cc.grey.allowLitHaig) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -2510,6 +2514,11 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g,
|
|||||||
ue2_literal lit;
|
ue2_literal lit;
|
||||||
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
||||||
shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
|
shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
|
||||||
|
if (!rhs || !lhs) {
|
||||||
|
assert(0);
|
||||||
|
throw std::bad_alloc();
|
||||||
|
}
|
||||||
|
|
||||||
if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) {
|
if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1036,6 +1036,11 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg,
|
|||||||
shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
|
shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
|
||||||
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
||||||
|
|
||||||
|
if (!lhs || !rhs) {
|
||||||
|
assert(0);
|
||||||
|
throw std::bad_alloc();
|
||||||
|
}
|
||||||
|
|
||||||
unordered_map<NFAVertex, NFAVertex> lhs_map;
|
unordered_map<NFAVertex, NFAVertex> lhs_map;
|
||||||
unordered_map<NFAVertex, NFAVertex> rhs_map;
|
unordered_map<NFAVertex, NFAVertex> rhs_map;
|
||||||
|
|
||||||
@ -1229,6 +1234,10 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg,
|
|||||||
DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index);
|
DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index);
|
||||||
unordered_map<NFAVertex, NFAVertex> temp_map;
|
unordered_map<NFAVertex, NFAVertex> temp_map;
|
||||||
shared_ptr<NGHolder> new_lhs = make_shared<NGHolder>();
|
shared_ptr<NGHolder> new_lhs = make_shared<NGHolder>();
|
||||||
|
if (!new_lhs) {
|
||||||
|
assert(0);
|
||||||
|
throw std::bad_alloc();
|
||||||
|
}
|
||||||
splitLHS(h, pivot, new_lhs.get(), &temp_map);
|
splitLHS(h, pivot, new_lhs.get(), &temp_map);
|
||||||
|
|
||||||
/* want to cut off paths to pivot from things other than the pivot -
|
/* want to cut off paths to pivot from things other than the pivot -
|
||||||
@ -1310,6 +1319,10 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg,
|
|||||||
if (!contains(done_rhs, adj)) {
|
if (!contains(done_rhs, adj)) {
|
||||||
unordered_map<NFAVertex, NFAVertex> temp_map;
|
unordered_map<NFAVertex, NFAVertex> temp_map;
|
||||||
shared_ptr<NGHolder> new_rhs = make_shared<NGHolder>();
|
shared_ptr<NGHolder> new_rhs = make_shared<NGHolder>();
|
||||||
|
if (!new_rhs) {
|
||||||
|
assert(0);
|
||||||
|
throw std::bad_alloc();
|
||||||
|
}
|
||||||
splitRHS(h, adj, new_rhs.get(), &temp_map);
|
splitRHS(h, adj, new_rhs.get(), &temp_map);
|
||||||
remove_edge(new_rhs->start, new_rhs->accept, *new_rhs);
|
remove_edge(new_rhs->start, new_rhs->accept, *new_rhs);
|
||||||
remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs);
|
remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs);
|
||||||
@ -2281,6 +2294,10 @@ void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg,
|
|||||||
assert(!splitters.empty());
|
assert(!splitters.empty());
|
||||||
|
|
||||||
shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
|
shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
|
||||||
|
if (!lhs) {
|
||||||
|
assert(0);
|
||||||
|
throw bad_alloc();
|
||||||
|
}
|
||||||
unordered_map<NFAVertex, NFAVertex> v_map;
|
unordered_map<NFAVertex, NFAVertex> v_map;
|
||||||
cloneHolder(*lhs, base_graph, &v_map);
|
cloneHolder(*lhs, base_graph, &v_map);
|
||||||
lhs->kind = NFA_INFIX;
|
lhs->kind = NFA_INFIX;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2018-2019, Intel Corporation
|
* Copyright (c) 2018-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -33,6 +33,7 @@
|
|||||||
#include "parser/parse_error.h"
|
#include "parser/parse_error.h"
|
||||||
#include "util/container.h"
|
#include "util/container.h"
|
||||||
#include "hs_compile.h"
|
#include "hs_compile.h"
|
||||||
|
#include "allocator.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -139,7 +140,8 @@ void ParsedLogical::validateSubIDs(const unsigned *ids,
|
|||||||
}
|
}
|
||||||
hs_compile_error_t *compile_err = NULL;
|
hs_compile_error_t *compile_err = NULL;
|
||||||
hs_expr_info_t *info = NULL;
|
hs_expr_info_t *info = NULL;
|
||||||
hs_error_t err = hs_expression_info(expressions[i], flags[i], &info,
|
hs_error_t err = hs_expression_info(expressions[i],
|
||||||
|
flags ? flags[i] : 0, &info,
|
||||||
&compile_err);
|
&compile_err);
|
||||||
if (err != HS_SUCCESS) {
|
if (err != HS_SUCCESS) {
|
||||||
hs_free_compile_error(compile_err);
|
hs_free_compile_error(compile_err);
|
||||||
@ -151,7 +153,7 @@ void ParsedLogical::validateSubIDs(const unsigned *ids,
|
|||||||
if (info->unordered_matches) {
|
if (info->unordered_matches) {
|
||||||
throw CompileError("Have unordered match in sub-expressions.");
|
throw CompileError("Have unordered match in sub-expressions.");
|
||||||
}
|
}
|
||||||
free(info);
|
hs_misc_free(info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2022, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -72,7 +72,7 @@ bool isValidUtf8(const char *expression, const size_t len) {
|
|||||||
while (i < len) {
|
while (i < len) {
|
||||||
DEBUG_PRINTF("byte %zu: 0x%02x\n", i, s[i]);
|
DEBUG_PRINTF("byte %zu: 0x%02x\n", i, s[i]);
|
||||||
// One octet.
|
// One octet.
|
||||||
if (s[i] < 0x7f) {
|
if (s[i] <= 0x7f) {
|
||||||
DEBUG_PRINTF("one octet\n");
|
DEBUG_PRINTF("one octet\n");
|
||||||
i++;
|
i++;
|
||||||
continue;
|
continue;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2021, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -767,10 +767,10 @@ int roseCheckMask32(const struct core_info *ci, const u8 *and_mask,
|
|||||||
c_shift = c_len - ci->len;
|
c_shift = c_len - ci->len;
|
||||||
c_len = ci->len;
|
c_len = ci->len;
|
||||||
}
|
}
|
||||||
copy_upto_32_bytes((u8 *)&data - offset, ci->buf, c_len);
|
copy_upto_64_bytes((u8 *)&data - offset, ci->buf, c_len);
|
||||||
}
|
}
|
||||||
assert(h_shift + h_len + c_len + c_shift == 32);
|
assert(h_shift + h_len + c_len + c_shift == 32);
|
||||||
copy_upto_32_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len);
|
copy_upto_64_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len);
|
||||||
} else {
|
} else {
|
||||||
if (offset + 32 > (s64a)ci->len) {
|
if (offset + 32 > (s64a)ci->len) {
|
||||||
if (offset >= (s64a)ci->len) {
|
if (offset >= (s64a)ci->len) {
|
||||||
@ -779,7 +779,7 @@ int roseCheckMask32(const struct core_info *ci, const u8 *and_mask,
|
|||||||
}
|
}
|
||||||
c_len = ci->len - offset;
|
c_len = ci->len - offset;
|
||||||
c_shift = 32 - c_len;
|
c_shift = 32 - c_len;
|
||||||
copy_upto_32_bytes((u8 *)&data, ci->buf + offset, c_len);
|
copy_upto_64_bytes((u8 *)&data, ci->buf + offset, c_len);
|
||||||
} else {
|
} else {
|
||||||
data = loadu256(ci->buf + offset);
|
data = loadu256(ci->buf + offset);
|
||||||
}
|
}
|
||||||
@ -800,12 +800,90 @@ int roseCheckMask32(const struct core_info *ci, const u8 *and_mask,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// get 128/256 bits data from history and current buffer.
|
#ifdef HAVE_AVX512
|
||||||
|
static rose_inline
|
||||||
|
int roseCheckMask64(const struct core_info *ci, const u8 *and_mask,
|
||||||
|
const u8 *cmp_mask, const u64a neg_mask,
|
||||||
|
s32 checkOffset, u64a end) {
|
||||||
|
const s64a base_offset = (s64a)end - ci->buf_offset;
|
||||||
|
s64a offset = base_offset + checkOffset;
|
||||||
|
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
|
||||||
|
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
|
||||||
|
|
||||||
|
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
|
||||||
|
DEBUG_PRINTF("too early, fail\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
m512 data = zeroes512(); // consists of the following four parts.
|
||||||
|
s32 c_shift = 0; // blank bytes after current.
|
||||||
|
s32 h_shift = 0; // blank bytes before history.
|
||||||
|
s32 h_len = 64; // number of bytes from history buffer.
|
||||||
|
s32 c_len = 0; // number of bytes from current buffer.
|
||||||
|
/* h_shift + h_len + c_len + c_shift = 64 need to be hold.*/
|
||||||
|
|
||||||
|
if (offset < 0) {
|
||||||
|
s32 h_offset = 0; // the start offset in history buffer.
|
||||||
|
if (offset < -(s64a)ci->hlen) {
|
||||||
|
if (offset + 64 <= -(s64a)ci->hlen) {
|
||||||
|
DEBUG_PRINTF("all before history\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
h_shift = -(offset + (s64a)ci->hlen);
|
||||||
|
h_len = 64 - h_shift;
|
||||||
|
} else {
|
||||||
|
h_offset = ci->hlen + offset;
|
||||||
|
}
|
||||||
|
if (offset + 64 > 0) {
|
||||||
|
// part in current buffer.
|
||||||
|
c_len = offset + 64;
|
||||||
|
h_len = -(offset + h_shift);
|
||||||
|
if (c_len > (s64a)ci->len) {
|
||||||
|
// out of current buffer.
|
||||||
|
c_shift = c_len - ci->len;
|
||||||
|
c_len = ci->len;
|
||||||
|
}
|
||||||
|
copy_upto_64_bytes((u8 *)&data - offset, ci->buf, c_len);
|
||||||
|
}
|
||||||
|
assert(h_shift + h_len + c_len + c_shift == 64);
|
||||||
|
copy_upto_64_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len);
|
||||||
|
} else {
|
||||||
|
if (offset + 64 > (s64a)ci->len) {
|
||||||
|
if (offset >= (s64a)ci->len) {
|
||||||
|
DEBUG_PRINTF("all in the future.\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
c_len = ci->len - offset;
|
||||||
|
c_shift = 64 - c_len;
|
||||||
|
copy_upto_64_bytes((u8 *)&data, ci->buf + offset, c_len);
|
||||||
|
} else {
|
||||||
|
data = loadu512(ci->buf + offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift);
|
||||||
|
DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len);
|
||||||
|
// we use valid_data_mask to blind bytes before history/in the future.
|
||||||
|
u64a valid_data_mask;
|
||||||
|
valid_data_mask = (~0ULL) << (h_shift + c_shift) >> (c_shift);
|
||||||
|
|
||||||
|
m512 and_mask_m512 = loadu512(and_mask);
|
||||||
|
m512 cmp_mask_m512 = loadu512(cmp_mask);
|
||||||
|
|
||||||
|
if (validateMask64(data, valid_data_mask, and_mask_m512,
|
||||||
|
cmp_mask_m512, neg_mask)) {
|
||||||
|
DEBUG_PRINTF("Mask64 passed\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// get 128/256/512 bits data from history and current buffer.
|
||||||
// return data and valid_data_mask.
|
// return data and valid_data_mask.
|
||||||
static rose_inline
|
static rose_inline
|
||||||
u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
|
u64a getBufferDataComplex(const struct core_info *ci, const s64a loc,
|
||||||
u8 *data, const u32 data_len) {
|
u8 *data, const u32 data_len) {
|
||||||
assert(data_len == 16 || data_len == 32);
|
assert(data_len == 16 || data_len == 32 || data_len == 64);
|
||||||
s32 c_shift = 0; // blank bytes after current.
|
s32 c_shift = 0; // blank bytes after current.
|
||||||
s32 h_shift = 0; // blank bytes before history.
|
s32 h_shift = 0; // blank bytes before history.
|
||||||
s32 h_len = data_len; // number of bytes from history buffer.
|
s32 h_len = data_len; // number of bytes from history buffer.
|
||||||
@ -831,10 +909,10 @@ u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
|
|||||||
c_shift = c_len - ci->len;
|
c_shift = c_len - ci->len;
|
||||||
c_len = ci->len;
|
c_len = ci->len;
|
||||||
}
|
}
|
||||||
copy_upto_32_bytes(data - loc, ci->buf, c_len);
|
copy_upto_64_bytes(data - loc, ci->buf, c_len);
|
||||||
}
|
}
|
||||||
assert(h_shift + h_len + c_len + c_shift == (s32)data_len);
|
assert(h_shift + h_len + c_len + c_shift == (s32)data_len);
|
||||||
copy_upto_32_bytes(data + h_shift, ci->hbuf + h_offset, h_len);
|
copy_upto_64_bytes(data + h_shift, ci->hbuf + h_offset, h_len);
|
||||||
} else {
|
} else {
|
||||||
if (loc + data_len > (s64a)ci->len) {
|
if (loc + data_len > (s64a)ci->len) {
|
||||||
if (loc >= (s64a)ci->len) {
|
if (loc >= (s64a)ci->len) {
|
||||||
@ -843,8 +921,14 @@ u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
|
|||||||
}
|
}
|
||||||
c_len = ci->len - loc;
|
c_len = ci->len - loc;
|
||||||
c_shift = data_len - c_len;
|
c_shift = data_len - c_len;
|
||||||
copy_upto_32_bytes(data, ci->buf + loc, c_len);
|
copy_upto_64_bytes(data, ci->buf + loc, c_len);
|
||||||
} else {
|
} else {
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
if (data_len == 64) {
|
||||||
|
storeu512(data, loadu512(ci->buf + loc));
|
||||||
|
return ~0ULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (data_len == 16) {
|
if (data_len == 16) {
|
||||||
storeu128(data, loadu128(ci->buf + loc));
|
storeu128(data, loadu128(ci->buf + loc));
|
||||||
return 0xffff;
|
return 0xffff;
|
||||||
@ -857,6 +941,11 @@ u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
|
|||||||
DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift);
|
DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift);
|
||||||
DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len);
|
DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len);
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
if (data_len == 64) {
|
||||||
|
return (~0ULL) << (h_shift + c_shift) >> c_shift;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (data_len == 16) {
|
if (data_len == 16) {
|
||||||
return (u16)(0xffff << (h_shift + c_shift)) >> c_shift;
|
return (u16)(0xffff << (h_shift + c_shift)) >> c_shift;
|
||||||
} else {
|
} else {
|
||||||
@ -886,6 +975,19 @@ m256 getData256(const struct core_info *ci, s64a offset, u32 *valid_data_mask) {
|
|||||||
return *(m256 *)data;
|
return *(m256 *)data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
static rose_inline
|
||||||
|
m512 getData512(const struct core_info *ci, s64a offset, u64a *valid_data_mask) {
|
||||||
|
if (offset > 0 && offset + sizeof(m512) <= ci->len) {
|
||||||
|
*valid_data_mask = ~0ULL;
|
||||||
|
return loadu512(ci->buf + offset);
|
||||||
|
}
|
||||||
|
ALIGN_CL_DIRECTIVE u8 data[sizeof(m512)];
|
||||||
|
*valid_data_mask = getBufferDataComplex(ci, offset, data, 64);
|
||||||
|
return *(m512 *)data;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static rose_inline
|
static rose_inline
|
||||||
int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask,
|
int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask,
|
||||||
const u8 *bucket_select_mask, u32 neg_mask,
|
const u8 *bucket_select_mask, u32 neg_mask,
|
||||||
@ -1025,6 +1127,83 @@ int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
static rose_inline
|
||||||
|
int roseCheckShufti64x8(const struct core_info *ci, const u8 *hi_mask,
|
||||||
|
const u8 *lo_mask, const u8 *bucket_select_mask,
|
||||||
|
u64a neg_mask, s32 checkOffset, u64a end) {
|
||||||
|
const s64a base_offset = (s64a)end - ci->buf_offset;
|
||||||
|
s64a offset = base_offset + checkOffset;
|
||||||
|
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
|
||||||
|
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
|
||||||
|
|
||||||
|
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
|
||||||
|
DEBUG_PRINTF("too early, fail\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64a valid_data_mask = 0;
|
||||||
|
m512 data = getData512(ci, offset, &valid_data_mask);
|
||||||
|
|
||||||
|
if (unlikely(!valid_data_mask)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
m512 hi_mask_m512 = loadu512(hi_mask);
|
||||||
|
m512 lo_mask_m512 = loadu512(lo_mask);
|
||||||
|
m512 bucket_select_mask_m512 = loadu512(bucket_select_mask);
|
||||||
|
if (validateShuftiMask64x8(data, hi_mask_m512, lo_mask_m512,
|
||||||
|
bucket_select_mask_m512,
|
||||||
|
neg_mask, valid_data_mask)) {
|
||||||
|
DEBUG_PRINTF("check shufti 64x8 successfully\n");
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static rose_inline
|
||||||
|
int roseCheckShufti64x16(const struct core_info *ci, const u8 *hi_mask_1,
|
||||||
|
const u8 *hi_mask_2, const u8 *lo_mask_1,
|
||||||
|
const u8 *lo_mask_2, const u8 *bucket_select_mask_hi,
|
||||||
|
const u8 *bucket_select_mask_lo, u64a neg_mask,
|
||||||
|
s32 checkOffset, u64a end) {
|
||||||
|
const s64a base_offset = (s64a)end - ci->buf_offset;
|
||||||
|
s64a offset = base_offset + checkOffset;
|
||||||
|
DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
|
||||||
|
DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
|
||||||
|
|
||||||
|
if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
|
||||||
|
DEBUG_PRINTF("too early, fail\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64a valid_data_mask = 0;
|
||||||
|
m512 data = getData512(ci, offset, &valid_data_mask);
|
||||||
|
if (unlikely(!valid_data_mask)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
m512 hi_mask_1_m512 = loadu512(hi_mask_1);
|
||||||
|
m512 hi_mask_2_m512 = loadu512(hi_mask_2);
|
||||||
|
m512 lo_mask_1_m512 = loadu512(lo_mask_1);
|
||||||
|
m512 lo_mask_2_m512 = loadu512(lo_mask_2);
|
||||||
|
|
||||||
|
m512 bucket_select_mask_hi_m512 = loadu512(bucket_select_mask_hi);
|
||||||
|
m512 bucket_select_mask_lo_m512 = loadu512(bucket_select_mask_lo);
|
||||||
|
if (validateShuftiMask64x16(data, hi_mask_1_m512, hi_mask_2_m512,
|
||||||
|
lo_mask_1_m512, lo_mask_2_m512,
|
||||||
|
bucket_select_mask_hi_m512,
|
||||||
|
bucket_select_mask_lo_m512,
|
||||||
|
neg_mask, valid_data_mask)) {
|
||||||
|
DEBUG_PRINTF("check shufti 64x16 successfully\n");
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static rose_inline
|
static rose_inline
|
||||||
int roseCheckSingleLookaround(const struct RoseEngine *t,
|
int roseCheckSingleLookaround(const struct RoseEngine *t,
|
||||||
const struct hs_scratch *scratch,
|
const struct hs_scratch *scratch,
|
||||||
@ -2068,6 +2247,12 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
|
|||||||
&&LABEL_ROSE_INSTR_FLUSH_COMBINATION,
|
&&LABEL_ROSE_INSTR_FLUSH_COMBINATION,
|
||||||
&&LABEL_ROSE_INSTR_SET_EXHAUST,
|
&&LABEL_ROSE_INSTR_SET_EXHAUST,
|
||||||
&&LABEL_ROSE_INSTR_LAST_FLUSH_COMBINATION
|
&&LABEL_ROSE_INSTR_LAST_FLUSH_COMBINATION
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
,
|
||||||
|
&&LABEL_ROSE_INSTR_CHECK_SHUFTI_64x8, //!< Check 64-byte data by 8-bucket shufti.
|
||||||
|
&&LABEL_ROSE_INSTR_CHECK_SHUFTI_64x16, //!< Check 64-byte data by 16-bucket shufti.
|
||||||
|
&&LABEL_ROSE_INSTR_CHECK_MASK_64 //!< 64-bytes and/cmp/neg mask check.
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2258,6 +2443,45 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
|
|||||||
}
|
}
|
||||||
PROGRAM_NEXT_INSTRUCTION
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
PROGRAM_CASE(CHECK_MASK_64) {
|
||||||
|
struct core_info *ci = &scratch->core_info;
|
||||||
|
if (!roseCheckMask64(ci, ri->and_mask, ri->cmp_mask,
|
||||||
|
ri->neg_mask, ri->offset, end)) {
|
||||||
|
assert(ri->fail_jump);
|
||||||
|
pc += ri->fail_jump;
|
||||||
|
PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(CHECK_SHUFTI_64x8) {
|
||||||
|
const struct core_info *ci = &scratch->core_info;
|
||||||
|
if (!roseCheckShufti64x8(ci, ri->hi_mask, ri->lo_mask,
|
||||||
|
ri->bucket_select_mask,
|
||||||
|
ri->neg_mask, ri->offset, end)) {
|
||||||
|
assert(ri->fail_jump);
|
||||||
|
pc += ri->fail_jump;
|
||||||
|
PROGRAM_NEXT_INSTRUCTION_JUMP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(CHECK_SHUFTI_64x16) {
|
||||||
|
const struct core_info *ci = &scratch->core_info;
|
||||||
|
if (!roseCheckShufti64x16(ci, ri->hi_mask_1, ri->hi_mask_2,
|
||||||
|
ri->lo_mask_1, ri->lo_mask_2,
|
||||||
|
ri->bucket_select_mask_hi,
|
||||||
|
ri->bucket_select_mask_lo,
|
||||||
|
ri->neg_mask, ri->offset, end)) {
|
||||||
|
assert(ri->fail_jump);
|
||||||
|
pc += ri->fail_jump;
|
||||||
|
PROGRAM_NEXT_INSTRUCTION_JUMP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
#endif
|
||||||
|
|
||||||
PROGRAM_CASE(CHECK_INFIX) {
|
PROGRAM_CASE(CHECK_INFIX) {
|
||||||
if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report,
|
if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report,
|
||||||
end)) {
|
end)) {
|
||||||
@ -2886,6 +3110,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
|||||||
|
|
||||||
const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP;
|
const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP;
|
||||||
const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
|
const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
|
||||||
|
const char skip_mpv_catchup = prog_flags & ROSE_PROG_FLAG_SKIP_MPV_CATCHUP;
|
||||||
|
|
||||||
const char *pc_base = getByOffset(t, programOffset);
|
const char *pc_base = getByOffset(t, programOffset);
|
||||||
const char *pc = pc_base;
|
const char *pc = pc_base;
|
||||||
@ -2945,6 +3170,19 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
|||||||
}
|
}
|
||||||
L_PROGRAM_NEXT_INSTRUCTION
|
L_PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
L_PROGRAM_CASE(CHECK_MASK_64) {
|
||||||
|
struct core_info *ci = &scratch->core_info;
|
||||||
|
if (!roseCheckMask64(ci, ri->and_mask, ri->cmp_mask,
|
||||||
|
ri->neg_mask, ri->offset, end)) {
|
||||||
|
assert(ri->fail_jump);
|
||||||
|
pc += ri->fail_jump;
|
||||||
|
L_PROGRAM_NEXT_INSTRUCTION_JUMP
|
||||||
|
}
|
||||||
|
}
|
||||||
|
L_PROGRAM_NEXT_INSTRUCTION
|
||||||
|
#endif
|
||||||
|
|
||||||
L_PROGRAM_CASE(CHECK_BYTE) {
|
L_PROGRAM_CASE(CHECK_BYTE) {
|
||||||
const struct core_info *ci = &scratch->core_info;
|
const struct core_info *ci = &scratch->core_info;
|
||||||
if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,
|
if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,
|
||||||
@ -2969,6 +3207,17 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
|||||||
}
|
}
|
||||||
L_PROGRAM_NEXT_INSTRUCTION
|
L_PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
L_PROGRAM_CASE(CATCH_UP_MPV) {
|
||||||
|
if (from_mpv || skip_mpv_catchup) {
|
||||||
|
DEBUG_PRINTF("skipping mpv catchup\n");
|
||||||
|
} else if (roseCatchUpMPV(t,
|
||||||
|
end - scratch->core_info.buf_offset,
|
||||||
|
scratch) == HWLM_TERMINATE_MATCHING) {
|
||||||
|
return HWLM_TERMINATE_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
L_PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
L_PROGRAM_CASE(SOM_FROM_REPORT) {
|
L_PROGRAM_CASE(SOM_FROM_REPORT) {
|
||||||
som = handleSomExternal(scratch, &ri->som, end);
|
som = handleSomExternal(scratch, &ri->som, end);
|
||||||
DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch,
|
DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch,
|
||||||
@ -2976,6 +3225,15 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
|
|||||||
}
|
}
|
||||||
L_PROGRAM_NEXT_INSTRUCTION
|
L_PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
L_PROGRAM_CASE(TRIGGER_SUFFIX) {
|
||||||
|
if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som,
|
||||||
|
end) == HWLM_TERMINATE_MATCHING) {
|
||||||
|
return HWLM_TERMINATE_MATCHING;
|
||||||
|
}
|
||||||
|
work_done = 1;
|
||||||
|
}
|
||||||
|
L_PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
L_PROGRAM_CASE(DEDUPE) {
|
L_PROGRAM_CASE(DEDUPE) {
|
||||||
updateSeqPoint(tctxt, end, from_mpv);
|
updateSeqPoint(tctxt, end, from_mpv);
|
||||||
const char do_som = t->hasSom; // TODO: constant propagate
|
const char do_som = t->hasSom; // TODO: constant propagate
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -554,7 +554,8 @@ void findFixedDepthTops(const RoseGraph &g, const set<PredTopPair> &triggers,
|
|||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
bytecode_ptr<NFA> pickImpl(bytecode_ptr<NFA> dfa_impl,
|
bytecode_ptr<NFA> pickImpl(bytecode_ptr<NFA> dfa_impl,
|
||||||
bytecode_ptr<NFA> nfa_impl) {
|
bytecode_ptr<NFA> nfa_impl,
|
||||||
|
bool fast_nfa) {
|
||||||
assert(nfa_impl);
|
assert(nfa_impl);
|
||||||
assert(dfa_impl);
|
assert(dfa_impl);
|
||||||
assert(isDfaType(dfa_impl->type));
|
assert(isDfaType(dfa_impl->type));
|
||||||
@ -584,7 +585,7 @@ bytecode_ptr<NFA> pickImpl(bytecode_ptr<NFA> dfa_impl,
|
|||||||
return nfa_impl;
|
return nfa_impl;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (n_accel) {
|
if (n_accel && fast_nfa) {
|
||||||
return nfa_impl;
|
return nfa_impl;
|
||||||
} else {
|
} else {
|
||||||
return dfa_impl;
|
return dfa_impl;
|
||||||
@ -632,6 +633,15 @@ bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, bool is_transient,
|
|||||||
* bytecode and that they are usually run on small blocks */
|
* bytecode and that they are usually run on small blocks */
|
||||||
dfa = mcshengCompile(rdfa, cc, rm);
|
dfa = mcshengCompile(rdfa, cc, rm);
|
||||||
}
|
}
|
||||||
|
if (!dfa) {
|
||||||
|
dfa = sheng32Compile(rdfa, cc, rm, false);
|
||||||
|
}
|
||||||
|
if (!dfa) {
|
||||||
|
dfa = sheng64Compile(rdfa, cc, rm, false);
|
||||||
|
}
|
||||||
|
if (!dfa && !is_transient) {
|
||||||
|
dfa = mcshengCompile64(rdfa, cc, rm);
|
||||||
|
}
|
||||||
if (!dfa) {
|
if (!dfa) {
|
||||||
// Sheng wasn't successful, so unleash McClellan!
|
// Sheng wasn't successful, so unleash McClellan!
|
||||||
dfa = mcclellanCompile(rdfa, cc, rm, false);
|
dfa = mcclellanCompile(rdfa, cc, rm, false);
|
||||||
@ -678,20 +688,21 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool fast_nfa = false;
|
||||||
auto n = constructNFA(holder, &rm, fixed_depth_tops, triggers,
|
auto n = constructNFA(holder, &rm, fixed_depth_tops, triggers,
|
||||||
compress_state, cc);
|
compress_state, fast_nfa, cc);
|
||||||
assert(n);
|
assert(n);
|
||||||
|
|
||||||
if (oneTop && cc.grey.roseMcClellanSuffix) {
|
if (oneTop && cc.grey.roseMcClellanSuffix) {
|
||||||
if (cc.grey.roseMcClellanSuffix == 2 || n->nPositions > 128 ||
|
if (cc.grey.roseMcClellanSuffix == 2 || n->nPositions > 128 ||
|
||||||
!has_bounded_repeats_other_than_firsts(*n)) {
|
!has_bounded_repeats_other_than_firsts(*n) || !fast_nfa) {
|
||||||
auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0),
|
auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0),
|
||||||
cc.grey);
|
cc.grey);
|
||||||
if (rdfa) {
|
if (rdfa) {
|
||||||
auto d = getDfa(*rdfa, false, cc, rm);
|
auto d = getDfa(*rdfa, false, cc, rm);
|
||||||
assert(d);
|
assert(d);
|
||||||
if (cc.grey.roseMcClellanSuffix != 2) {
|
if (cc.grey.roseMcClellanSuffix != 2) {
|
||||||
n = pickImpl(move(d), move(n));
|
n = pickImpl(move(d), move(n), fast_nfa);
|
||||||
} else {
|
} else {
|
||||||
n = move(d);
|
n = move(d);
|
||||||
}
|
}
|
||||||
@ -826,23 +837,24 @@ bytecode_ptr<NFA> makeLeftNfa(const RoseBuildImpl &tbi, left_id &left,
|
|||||||
n = constructLBR(*left.graph(), triggers.begin()->second, cc, rm);
|
n = constructLBR(*left.graph(), triggers.begin()->second, cc, rm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool fast_nfa = false;
|
||||||
if (!n && left.graph()) {
|
if (!n && left.graph()) {
|
||||||
map<u32, vector<vector<CharReach>>> triggers;
|
map<u32, vector<vector<CharReach>>> triggers;
|
||||||
if (left.graph()->kind == NFA_INFIX) {
|
if (left.graph()->kind == NFA_INFIX) {
|
||||||
findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
|
findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
|
||||||
}
|
}
|
||||||
n = constructNFA(*left.graph(), nullptr, fixed_depth_tops, triggers,
|
n = constructNFA(*left.graph(), nullptr, fixed_depth_tops, triggers,
|
||||||
compress_state, cc);
|
compress_state, fast_nfa, cc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cc.grey.roseMcClellanPrefix == 1 && is_prefix && !left.dfa()
|
if (cc.grey.roseMcClellanPrefix == 1 && is_prefix && !left.dfa()
|
||||||
&& left.graph()
|
&& left.graph()
|
||||||
&& (!n || !has_bounded_repeats_other_than_firsts(*n) || !is_fast(*n))) {
|
&& (!n || !has_bounded_repeats_other_than_firsts(*n) || !fast_nfa)) {
|
||||||
auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
|
auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
|
||||||
if (rdfa) {
|
if (rdfa) {
|
||||||
auto d = getDfa(*rdfa, is_transient, cc, rm);
|
auto d = getDfa(*rdfa, is_transient, cc, rm);
|
||||||
assert(d);
|
assert(d);
|
||||||
n = pickImpl(move(d), move(n));
|
n = pickImpl(move(d), move(n), fast_nfa);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1627,17 +1639,18 @@ public:
|
|||||||
const map<u32, u32> fixed_depth_tops; /* no tops */
|
const map<u32, u32> fixed_depth_tops; /* no tops */
|
||||||
const map<u32, vector<vector<CharReach>>> triggers; /* no tops */
|
const map<u32, vector<vector<CharReach>>> triggers; /* no tops */
|
||||||
bool compress_state = cc.streaming;
|
bool compress_state = cc.streaming;
|
||||||
|
bool fast_nfa = false;
|
||||||
auto n = constructNFA(h, &rm, fixed_depth_tops, triggers,
|
auto n = constructNFA(h, &rm, fixed_depth_tops, triggers,
|
||||||
compress_state, cc);
|
compress_state, fast_nfa, cc);
|
||||||
|
|
||||||
// Try for a DFA upgrade.
|
// Try for a DFA upgrade.
|
||||||
if (n && cc.grey.roseMcClellanOutfix &&
|
if (n && cc.grey.roseMcClellanOutfix &&
|
||||||
!has_bounded_repeats_other_than_firsts(*n)) {
|
(!has_bounded_repeats_other_than_firsts(*n) || !fast_nfa)) {
|
||||||
auto rdfa = buildMcClellan(h, &rm, cc.grey);
|
auto rdfa = buildMcClellan(h, &rm, cc.grey);
|
||||||
if (rdfa) {
|
if (rdfa) {
|
||||||
auto d = getDfa(*rdfa, false, cc, rm);
|
auto d = getDfa(*rdfa, false, cc, rm);
|
||||||
if (d) {
|
if (d) {
|
||||||
n = pickImpl(move(d), move(n));
|
n = pickImpl(move(d), move(n), fast_nfa);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -562,6 +562,10 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
|
|||||||
DEBUG_PRINTF("woot?\n");
|
DEBUG_PRINTF("woot?\n");
|
||||||
|
|
||||||
shared_ptr<NGHolder> h_new = make_shared<NGHolder>();
|
shared_ptr<NGHolder> h_new = make_shared<NGHolder>();
|
||||||
|
if (!h_new) {
|
||||||
|
assert(0);
|
||||||
|
throw std::bad_alloc();
|
||||||
|
}
|
||||||
unordered_map<NFAVertex, NFAVertex> rhs_map;
|
unordered_map<NFAVertex, NFAVertex> rhs_map;
|
||||||
vector<NFAVertex> exits_vec;
|
vector<NFAVertex> exits_vec;
|
||||||
insert(&exits_vec, exits_vec.end(), exits);
|
insert(&exits_vec, exits_vec.end(), exits);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -757,13 +757,12 @@ CharReach shufti2cr(const u8 *lo, const u8 *hi, u8 bucket_mask) {
|
|||||||
|
|
||||||
static
|
static
|
||||||
void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
||||||
const u8 *bucket_mask, u32 neg_mask, s32 offset) {
|
const u8 *bucket_mask, u64a neg_mask, s32 offset) {
|
||||||
assert(len == 16 || len == 32);
|
assert(len == 16 || len == 32 || len == 64);
|
||||||
os << " contents:" << endl;
|
os << " contents:" << endl;
|
||||||
for (u32 idx = 0; idx < len; idx++) {
|
for (u32 idx = 0; idx < len; idx++) {
|
||||||
CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
|
CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
|
||||||
|
if (neg_mask & (1ULL << idx)) {
|
||||||
if (neg_mask & (1U << idx)) {
|
|
||||||
cr.flip();
|
cr.flip();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -779,14 +778,13 @@ void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
|||||||
static
|
static
|
||||||
void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi,
|
||||||
const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask,
|
const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask,
|
||||||
const u8 *bucket_mask_2, u32 neg_mask, s32 offset) {
|
const u8 *bucket_mask_2, u64a neg_mask, s32 offset) {
|
||||||
assert(len == 16 || len == 32);
|
assert(len == 16 || len == 32 || len == 64);
|
||||||
os << " contents:" << endl;
|
os << " contents:" << endl;
|
||||||
for (u32 idx = 0; idx < len; idx++) {
|
for (u32 idx = 0; idx < len; idx++) {
|
||||||
CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
|
CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]);
|
||||||
cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]);
|
cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]);
|
||||||
|
if (neg_mask & (1ULL << idx)) {
|
||||||
if (neg_mask & (1U << idx)) {
|
|
||||||
cr.flip();
|
cr.flip();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -970,6 +968,20 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
|||||||
}
|
}
|
||||||
PROGRAM_NEXT_INSTRUCTION
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(CHECK_MASK_64) {
|
||||||
|
os << " and_mask "
|
||||||
|
<< dumpStrMask(ri->and_mask, sizeof(ri->and_mask))
|
||||||
|
<< endl;
|
||||||
|
os << " cmp_mask "
|
||||||
|
<< dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask))
|
||||||
|
<< endl;
|
||||||
|
os << " neg_mask 0x" << std::hex << std::setw(8)
|
||||||
|
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||||
|
os << " offset " << ri->offset << endl;
|
||||||
|
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
PROGRAM_CASE(CHECK_BYTE) {
|
PROGRAM_CASE(CHECK_BYTE) {
|
||||||
os << " and_mask 0x" << std::hex << std::setw(2)
|
os << " and_mask 0x" << std::hex << std::setw(2)
|
||||||
<< std::setfill('0') << u32{ri->and_mask} << std::dec
|
<< std::setfill('0') << u32{ri->and_mask} << std::dec
|
||||||
@ -1072,6 +1084,60 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
|||||||
}
|
}
|
||||||
PROGRAM_NEXT_INSTRUCTION
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(CHECK_SHUFTI_64x8) {
|
||||||
|
os << " hi_mask "
|
||||||
|
<< dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
|
||||||
|
<< endl;
|
||||||
|
os << " lo_mask "
|
||||||
|
<< dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask))
|
||||||
|
<< endl;
|
||||||
|
os << " bucket_select_mask "
|
||||||
|
<< dumpStrMask(ri->bucket_select_mask,
|
||||||
|
sizeof(ri->bucket_select_mask))
|
||||||
|
<< endl;
|
||||||
|
os << " neg_mask 0x" << std::hex << std::setw(8)
|
||||||
|
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||||
|
os << " offset " << ri->offset << endl;
|
||||||
|
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||||
|
dumpLookaroundShufti(os, 64, ri->lo_mask, ri->hi_mask,
|
||||||
|
ri->bucket_select_mask, ri->neg_mask,
|
||||||
|
ri->offset);
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
|
PROGRAM_CASE(CHECK_SHUFTI_64x16) {
|
||||||
|
os << " hi_mask_1 "
|
||||||
|
<< dumpStrMask(ri->hi_mask_1, sizeof(ri->hi_mask_1))
|
||||||
|
<< endl;
|
||||||
|
os << " hi_mask_2 "
|
||||||
|
<< dumpStrMask(ri->hi_mask_2, sizeof(ri->hi_mask_2))
|
||||||
|
<< endl;
|
||||||
|
os << " lo_mask_1 "
|
||||||
|
<< dumpStrMask(ri->lo_mask_1, sizeof(ri->lo_mask_1))
|
||||||
|
<< endl;
|
||||||
|
os << " lo_mask_2 "
|
||||||
|
<< dumpStrMask(ri->lo_mask_2, sizeof(ri->lo_mask_2))
|
||||||
|
<< endl;
|
||||||
|
os << " bucket_select_mask_hi "
|
||||||
|
<< dumpStrMask(ri->bucket_select_mask_hi,
|
||||||
|
sizeof(ri->bucket_select_mask_hi))
|
||||||
|
<< endl;
|
||||||
|
os << " bucket_select_mask_lo "
|
||||||
|
<< dumpStrMask(ri->bucket_select_mask_lo,
|
||||||
|
sizeof(ri->bucket_select_mask_lo))
|
||||||
|
<< endl;
|
||||||
|
os << " neg_mask 0x" << std::hex << std::setw(8)
|
||||||
|
<< std::setfill('0') << ri->neg_mask << std::dec << endl;
|
||||||
|
os << " offset " << ri->offset << endl;
|
||||||
|
os << " fail_jump " << offset + ri->fail_jump << endl;
|
||||||
|
dumpLookaroundShufti(os, 64, ri->lo_mask_1, ri->hi_mask_1,
|
||||||
|
ri->lo_mask_2, ri->hi_mask_2,
|
||||||
|
ri->bucket_select_mask_lo,
|
||||||
|
ri->bucket_select_mask_hi,
|
||||||
|
ri->neg_mask, ri->offset);
|
||||||
|
}
|
||||||
|
PROGRAM_NEXT_INSTRUCTION
|
||||||
|
|
||||||
PROGRAM_CASE(CHECK_INFIX) {
|
PROGRAM_CASE(CHECK_INFIX) {
|
||||||
os << " queue " << ri->queue << endl;
|
os << " queue " << ri->queue << endl;
|
||||||
os << " lag " << ri->lag << endl;
|
os << " lag " << ri->lag << endl;
|
||||||
|
@ -96,7 +96,7 @@ bool eligibleForAlwaysOnGroup(const RoseBuildImpl &build, u32 id) {
|
|||||||
static
|
static
|
||||||
bool requires_group_assignment(const rose_literal_id &lit,
|
bool requires_group_assignment(const rose_literal_id &lit,
|
||||||
const rose_literal_info &info) {
|
const rose_literal_info &info) {
|
||||||
if (lit.delay) { /* we will check the shadow's master */
|
if (lit.delay) { /* we will check the shadow's leader */
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2017-2019, Intel Corporation
|
* Copyright (c) 2017-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -162,6 +162,17 @@ void RoseInstrCheckMask32::write(void *dest, RoseEngineBlob &blob,
|
|||||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RoseInstrCheckMask64::write(void *dest, RoseEngineBlob &blob,
|
||||||
|
const OffsetMap &offset_map) const {
|
||||||
|
RoseInstrBase::write(dest, blob, offset_map);
|
||||||
|
auto *inst = static_cast<impl_type *>(dest);
|
||||||
|
copy(begin(and_mask), end(and_mask), inst->and_mask);
|
||||||
|
copy(begin(cmp_mask), end(cmp_mask), inst->cmp_mask);
|
||||||
|
inst->neg_mask = neg_mask;
|
||||||
|
inst->offset = offset;
|
||||||
|
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||||
|
}
|
||||||
|
|
||||||
void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob,
|
void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob,
|
||||||
const OffsetMap &offset_map) const {
|
const OffsetMap &offset_map) const {
|
||||||
RoseInstrBase::write(dest, blob, offset_map);
|
RoseInstrBase::write(dest, blob, offset_map);
|
||||||
@ -227,6 +238,36 @@ void RoseInstrCheckShufti32x16::write(void *dest, RoseEngineBlob &blob,
|
|||||||
inst->fail_jump = calc_jump(offset_map, this, target);
|
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RoseInstrCheckShufti64x8::write(void *dest, RoseEngineBlob &blob,
|
||||||
|
const OffsetMap &offset_map) const {
|
||||||
|
RoseInstrBase::write(dest, blob, offset_map);
|
||||||
|
auto *inst = static_cast<impl_type *>(dest);
|
||||||
|
copy(begin(hi_mask), end(hi_mask), inst->hi_mask);
|
||||||
|
copy(begin(lo_mask), end(lo_mask), inst->lo_mask);
|
||||||
|
copy(begin(bucket_select_mask), end(bucket_select_mask),
|
||||||
|
inst->bucket_select_mask);
|
||||||
|
inst->neg_mask = neg_mask;
|
||||||
|
inst->offset = offset;
|
||||||
|
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RoseInstrCheckShufti64x16::write(void *dest, RoseEngineBlob &blob,
|
||||||
|
const OffsetMap &offset_map) const {
|
||||||
|
RoseInstrBase::write(dest, blob, offset_map);
|
||||||
|
auto *inst = static_cast<impl_type *>(dest);
|
||||||
|
copy(begin(hi_mask_1), end(hi_mask_1), inst->hi_mask_1);
|
||||||
|
copy(begin(hi_mask_2), end(hi_mask_2), inst->hi_mask_2);
|
||||||
|
copy(begin(lo_mask_1), end(lo_mask_1), inst->lo_mask_1);
|
||||||
|
copy(begin(lo_mask_2), end(lo_mask_2), inst->lo_mask_2);
|
||||||
|
copy(begin(bucket_select_mask_hi), end(bucket_select_mask_hi),
|
||||||
|
inst->bucket_select_mask_hi);
|
||||||
|
copy(begin(bucket_select_mask_lo), end(bucket_select_mask_lo),
|
||||||
|
inst->bucket_select_mask_lo);
|
||||||
|
inst->neg_mask = neg_mask;
|
||||||
|
inst->offset = offset;
|
||||||
|
inst->fail_jump = calc_jump(offset_map, this, target);
|
||||||
|
}
|
||||||
|
|
||||||
void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob,
|
void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob,
|
||||||
const OffsetMap &offset_map) const {
|
const OffsetMap &offset_map) const {
|
||||||
RoseInstrBase::write(dest, blob, offset_map);
|
RoseInstrBase::write(dest, blob, offset_map);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2017-2019, Intel Corporation
|
* Copyright (c) 2017-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -519,6 +519,43 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class RoseInstrCheckMask64
|
||||||
|
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MASK_64,
|
||||||
|
ROSE_STRUCT_CHECK_MASK_64,
|
||||||
|
RoseInstrCheckMask64> {
|
||||||
|
public:
|
||||||
|
std::array<u8, 64> and_mask;
|
||||||
|
std::array<u8, 64> cmp_mask;
|
||||||
|
u64a neg_mask;
|
||||||
|
s32 offset;
|
||||||
|
const RoseInstruction *target;
|
||||||
|
|
||||||
|
RoseInstrCheckMask64(std::array<u8, 64> and_mask_in,
|
||||||
|
std::array<u8, 64> cmp_mask_in, u64a neg_mask_in,
|
||||||
|
s32 offset_in, const RoseInstruction *target_in)
|
||||||
|
: and_mask(std::move(and_mask_in)), cmp_mask(std::move(cmp_mask_in)),
|
||||||
|
neg_mask(neg_mask_in), offset(offset_in), target(target_in) {}
|
||||||
|
bool operator==(const RoseInstrCheckMask64 &ri) const {
|
||||||
|
return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask &&
|
||||||
|
neg_mask == ri.neg_mask && offset == ri.offset &&
|
||||||
|
target == ri.target;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t hash() const override {
|
||||||
|
return hash_all(opcode, and_mask, cmp_mask, neg_mask, offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void write(void *dest, RoseEngineBlob &blob,
|
||||||
|
const OffsetMap &offset_map) const override;
|
||||||
|
|
||||||
|
bool equiv_to(const RoseInstrCheckMask64 &ri, const OffsetMap &offsets,
|
||||||
|
const OffsetMap &other_offsets) const {
|
||||||
|
return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask &&
|
||||||
|
neg_mask == ri.neg_mask && offset == ri.offset &&
|
||||||
|
offsets.at(target) == other_offsets.at(ri.target);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class RoseInstrCheckByte
|
class RoseInstrCheckByte
|
||||||
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_BYTE,
|
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_BYTE,
|
||||||
ROSE_STRUCT_CHECK_BYTE,
|
ROSE_STRUCT_CHECK_BYTE,
|
||||||
@ -738,6 +775,109 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class RoseInstrCheckShufti64x8
|
||||||
|
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SHUFTI_64x8,
|
||||||
|
ROSE_STRUCT_CHECK_SHUFTI_64x8,
|
||||||
|
RoseInstrCheckShufti64x8> {
|
||||||
|
public:
|
||||||
|
std::array<u8, 64> hi_mask;
|
||||||
|
std::array<u8, 64> lo_mask;
|
||||||
|
std::array<u8, 64> bucket_select_mask;
|
||||||
|
u64a neg_mask;
|
||||||
|
s32 offset;
|
||||||
|
const RoseInstruction *target;
|
||||||
|
|
||||||
|
RoseInstrCheckShufti64x8(std::array<u8, 64> hi_mask_in,
|
||||||
|
std::array<u8, 64> lo_mask_in,
|
||||||
|
std::array<u8, 64> bucket_select_mask_in,
|
||||||
|
u64a neg_mask_in, s32 offset_in,
|
||||||
|
const RoseInstruction *target_in)
|
||||||
|
: hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)),
|
||||||
|
bucket_select_mask(std::move(bucket_select_mask_in)),
|
||||||
|
neg_mask(neg_mask_in), offset(offset_in), target(target_in) {}
|
||||||
|
|
||||||
|
bool operator==(const RoseInstrCheckShufti64x8 &ri) const {
|
||||||
|
return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask &&
|
||||||
|
bucket_select_mask == ri.bucket_select_mask &&
|
||||||
|
neg_mask == ri.neg_mask && offset == ri.offset &&
|
||||||
|
target == ri.target;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t hash() const override {
|
||||||
|
return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, neg_mask,
|
||||||
|
offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void write(void *dest, RoseEngineBlob &blob,
|
||||||
|
const OffsetMap &offset_map) const override;
|
||||||
|
|
||||||
|
bool equiv_to(const RoseInstrCheckShufti64x8 &ri, const OffsetMap &offsets,
|
||||||
|
const OffsetMap &other_offsets) const {
|
||||||
|
return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask &&
|
||||||
|
bucket_select_mask == ri.bucket_select_mask &&
|
||||||
|
neg_mask == ri.neg_mask && offset == ri.offset &&
|
||||||
|
offsets.at(target) == other_offsets.at(ri.target);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class RoseInstrCheckShufti64x16
|
||||||
|
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SHUFTI_64x16,
|
||||||
|
ROSE_STRUCT_CHECK_SHUFTI_64x16,
|
||||||
|
RoseInstrCheckShufti64x16> {
|
||||||
|
public:
|
||||||
|
std::array<u8, 64> hi_mask_1;
|
||||||
|
std::array<u8, 64> hi_mask_2;
|
||||||
|
std::array<u8, 64> lo_mask_1;
|
||||||
|
std::array<u8, 64> lo_mask_2;
|
||||||
|
std::array<u8, 64> bucket_select_mask_hi;
|
||||||
|
std::array<u8, 64> bucket_select_mask_lo;
|
||||||
|
u64a neg_mask;
|
||||||
|
s32 offset;
|
||||||
|
const RoseInstruction *target;
|
||||||
|
|
||||||
|
RoseInstrCheckShufti64x16(std::array<u8, 64> hi_mask_1_in,
|
||||||
|
std::array<u8, 64> hi_mask_2_in,
|
||||||
|
std::array<u8, 64> lo_mask_1_in,
|
||||||
|
std::array<u8, 64> lo_mask_2_in,
|
||||||
|
std::array<u8, 64> bucket_select_mask_hi_in,
|
||||||
|
std::array<u8, 64> bucket_select_mask_lo_in,
|
||||||
|
u64a neg_mask_in, s32 offset_in,
|
||||||
|
const RoseInstruction *target_in)
|
||||||
|
: hi_mask_1(std::move(hi_mask_1_in)), hi_mask_2(std::move(hi_mask_2_in)),
|
||||||
|
lo_mask_1(std::move(lo_mask_1_in)), lo_mask_2(std::move(lo_mask_2_in)),
|
||||||
|
bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)),
|
||||||
|
bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)),
|
||||||
|
neg_mask(neg_mask_in), offset(offset_in), target(target_in) {}
|
||||||
|
|
||||||
|
bool operator==(const RoseInstrCheckShufti64x16 &ri) const {
|
||||||
|
return hi_mask_1 == ri.hi_mask_1 && hi_mask_2 == ri.hi_mask_2 &&
|
||||||
|
lo_mask_1 == ri.lo_mask_1 && lo_mask_2 == ri.lo_mask_2 &&
|
||||||
|
bucket_select_mask_hi == ri.bucket_select_mask_hi &&
|
||||||
|
bucket_select_mask_lo == ri.bucket_select_mask_lo &&
|
||||||
|
neg_mask == ri.neg_mask && offset == ri.offset &&
|
||||||
|
target == ri.target;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t hash() const override {
|
||||||
|
return hash_all(opcode, hi_mask_1, hi_mask_2, lo_mask_1, lo_mask_2,
|
||||||
|
bucket_select_mask_hi, bucket_select_mask_lo, neg_mask,
|
||||||
|
offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void write(void *dest, RoseEngineBlob &blob,
|
||||||
|
const OffsetMap &offset_map) const override;
|
||||||
|
|
||||||
|
bool equiv_to(const RoseInstrCheckShufti64x16 &ri, const OffsetMap &offsets,
|
||||||
|
const OffsetMap &other_offsets) const {
|
||||||
|
return hi_mask_1 == ri.hi_mask_1 && hi_mask_2 == ri.hi_mask_2 &&
|
||||||
|
lo_mask_1 == ri.lo_mask_1 && lo_mask_2 == ri.lo_mask_2 &&
|
||||||
|
bucket_select_mask_hi == ri.bucket_select_mask_hi &&
|
||||||
|
bucket_select_mask_lo == ri.bucket_select_mask_lo &&
|
||||||
|
neg_mask == ri.neg_mask && offset == ri.offset &&
|
||||||
|
offsets.at(target) == other_offsets.at(ri.target);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class RoseInstrCheckInfix
|
class RoseInstrCheckInfix
|
||||||
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_INFIX,
|
: public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_INFIX,
|
||||||
ROSE_STRUCT_CHECK_INFIX,
|
ROSE_STRUCT_CHECK_INFIX,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -58,7 +58,7 @@ static const u32 MAX_FWD_LEN = 64;
|
|||||||
static const u32 MAX_BACK_LEN = 64;
|
static const u32 MAX_BACK_LEN = 64;
|
||||||
|
|
||||||
/** \brief Max lookaround entries for a role. */
|
/** \brief Max lookaround entries for a role. */
|
||||||
static const u32 MAX_LOOKAROUND_ENTRIES = 16;
|
static const u32 MAX_LOOKAROUND_ENTRIES = 32;
|
||||||
|
|
||||||
/** \brief We would rather have lookarounds with smaller reach than this. */
|
/** \brief We would rather have lookarounds with smaller reach than this. */
|
||||||
static const u32 LOOKAROUND_WIDE_REACH = 200;
|
static const u32 LOOKAROUND_WIDE_REACH = 200;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2019, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -1061,6 +1061,49 @@ bool makeRoleMask32(const vector<LookEntry> &look,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool makeRoleMask64(const vector<LookEntry> &look,
|
||||||
|
RoseProgram &program, const target_t &target) {
|
||||||
|
if (!target.has_avx512()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (look.back().offset >= look.front().offset + 64) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
s32 base_offset = verify_s32(look.front().offset);
|
||||||
|
array<u8, 64> and_mask, cmp_mask;
|
||||||
|
and_mask.fill(0);
|
||||||
|
cmp_mask.fill(0);
|
||||||
|
u64a neg_mask = 0;
|
||||||
|
for (const auto &entry : look) {
|
||||||
|
u8 andmask_u8, cmpmask_u8, flip;
|
||||||
|
if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
u32 shift = entry.offset - base_offset;
|
||||||
|
assert(shift < 64);
|
||||||
|
and_mask[shift] = andmask_u8;
|
||||||
|
cmp_mask[shift] = cmpmask_u8;
|
||||||
|
if (flip) {
|
||||||
|
neg_mask |= 1ULL << shift;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("and_mask %s\n",
|
||||||
|
convertMaskstoString(and_mask.data(), 64).c_str());
|
||||||
|
DEBUG_PRINTF("cmp_mask %s\n",
|
||||||
|
convertMaskstoString(cmp_mask.data(), 64).c_str());
|
||||||
|
DEBUG_PRINTF("neg_mask %llx\n", neg_mask);
|
||||||
|
DEBUG_PRINTF("base_offset %d\n", base_offset);
|
||||||
|
|
||||||
|
const auto *end_inst = program.end_instruction();
|
||||||
|
auto ri = make_unique<RoseInstrCheckMask64>(and_mask, cmp_mask, neg_mask,
|
||||||
|
base_offset, end_inst);
|
||||||
|
program.add_before_end(move(ri));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// Sorting by the size of every bucket.
|
// Sorting by the size of every bucket.
|
||||||
// Used in map<u32, vector<s8>, cmpNibble>.
|
// Used in map<u32, vector<s8>, cmpNibble>.
|
||||||
struct cmpNibble {
|
struct cmpNibble {
|
||||||
@ -1084,6 +1127,7 @@ void getAllBuckets(const vector<LookEntry> &look,
|
|||||||
} else {
|
} else {
|
||||||
neg_mask ^= 1ULL << (entry.offset - base_offset);
|
neg_mask ^= 1ULL << (entry.offset - base_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
map <u16, u16> lo2hi;
|
map <u16, u16> lo2hi;
|
||||||
// We treat Ascii Table as a 16x16 grid.
|
// We treat Ascii Table as a 16x16 grid.
|
||||||
// Push every row in cr into lo2hi and mark the row number.
|
// Push every row in cr into lo2hi and mark the row number.
|
||||||
@ -1237,6 +1281,7 @@ makeCheckShufti16x16(u32 offset_range, u8 bucket_idx,
|
|||||||
(hi_mask, lo_mask, bucket_select_mask_32,
|
(hi_mask, lo_mask, bucket_select_mask_32,
|
||||||
neg_mask & 0xffff, base_offset, end_inst);
|
neg_mask & 0xffff, base_offset, end_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
unique_ptr<RoseInstruction>
|
unique_ptr<RoseInstruction>
|
||||||
makeCheckShufti32x16(u32 offset_range, u8 bucket_idx,
|
makeCheckShufti32x16(u32 offset_range, u8 bucket_idx,
|
||||||
@ -1255,10 +1300,83 @@ makeCheckShufti32x16(u32 offset_range, u8 bucket_idx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
|
unique_ptr<RoseInstruction>
|
||||||
|
makeCheckShufti64x8(u32 offset_range, u8 bucket_idx,
|
||||||
|
const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
|
||||||
|
const array<u8, 64> &bucket_select_mask,
|
||||||
|
u64a neg_mask, s32 base_offset,
|
||||||
|
const RoseInstruction *end_inst) {
|
||||||
|
if (offset_range > 64 || bucket_idx > 8) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
array<u8, 64> hi_mask_64;
|
||||||
|
array<u8, 64> lo_mask_64;
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin());
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 16);
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 32);
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 48);
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin());
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 16);
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 32);
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 48);
|
||||||
|
|
||||||
|
return make_unique<RoseInstrCheckShufti64x8>
|
||||||
|
(hi_mask_64, lo_mask_64, bucket_select_mask,
|
||||||
|
neg_mask, base_offset, end_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
unique_ptr<RoseInstruction>
|
||||||
|
makeCheckShufti64x16(u32 offset_range, u8 bucket_idx,
|
||||||
|
const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
|
||||||
|
const array<u8, 64> &bucket_select_mask_lo,
|
||||||
|
const array<u8, 64> &bucket_select_mask_hi,
|
||||||
|
u64a neg_mask, s32 base_offset,
|
||||||
|
const RoseInstruction *end_inst) {
|
||||||
|
if (offset_range > 64 || bucket_idx > 16) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
array<u8, 64> hi_mask_1;
|
||||||
|
array<u8, 64> hi_mask_2;
|
||||||
|
array<u8, 64> lo_mask_1;
|
||||||
|
array<u8, 64> lo_mask_2;
|
||||||
|
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin());
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 16);
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 32);
|
||||||
|
copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 48);
|
||||||
|
copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin());
|
||||||
|
copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 16);
|
||||||
|
copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 32);
|
||||||
|
copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 48);
|
||||||
|
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin());
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 16);
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 32);
|
||||||
|
copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 48);
|
||||||
|
copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin());
|
||||||
|
copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 16);
|
||||||
|
copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 32);
|
||||||
|
copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 48);
|
||||||
|
|
||||||
|
return make_unique<RoseInstrCheckShufti64x16>
|
||||||
|
(hi_mask_1, hi_mask_2, lo_mask_1, lo_mask_2, bucket_select_mask_hi,
|
||||||
|
bucket_select_mask_lo, neg_mask, base_offset, end_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program,
|
||||||
|
const target_t &target) {
|
||||||
|
s32 offset_limit;
|
||||||
|
if (target.has_avx512()) {
|
||||||
|
offset_limit = 64;
|
||||||
|
} else {
|
||||||
|
offset_limit = 32;
|
||||||
|
}
|
||||||
s32 base_offset = verify_s32(look.front().offset);
|
s32 base_offset = verify_s32(look.front().offset);
|
||||||
if (look.back().offset >= base_offset + 32) {
|
if (look.back().offset >= base_offset + offset_limit) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1266,17 +1384,40 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
|
|||||||
u64a neg_mask_64;
|
u64a neg_mask_64;
|
||||||
array<u8, 32> hi_mask;
|
array<u8, 32> hi_mask;
|
||||||
array<u8, 32> lo_mask;
|
array<u8, 32> lo_mask;
|
||||||
|
array<u8, 64> bucket_select_hi_64; // for AVX512
|
||||||
|
array<u8, 64> bucket_select_lo_64; // for AVX512
|
||||||
array<u8, 32> bucket_select_hi;
|
array<u8, 32> bucket_select_hi;
|
||||||
array<u8, 32> bucket_select_lo;
|
array<u8, 32> bucket_select_lo;
|
||||||
hi_mask.fill(0);
|
hi_mask.fill(0);
|
||||||
lo_mask.fill(0);
|
lo_mask.fill(0);
|
||||||
|
bucket_select_hi_64.fill(0);
|
||||||
|
bucket_select_lo_64.fill(0);
|
||||||
bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8.
|
bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8.
|
||||||
bucket_select_lo.fill(0);
|
bucket_select_lo.fill(0);
|
||||||
|
|
||||||
if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(),
|
if (target.has_avx512()) {
|
||||||
bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) {
|
if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi_64.data(),
|
||||||
return false;
|
bucket_select_lo_64.data(), neg_mask_64, bucket_idx,
|
||||||
|
32)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
copy(bucket_select_hi_64.begin(), bucket_select_hi_64.begin() + 32,
|
||||||
|
bucket_select_hi.begin());
|
||||||
|
copy(bucket_select_lo_64.begin(), bucket_select_lo_64.begin() + 32,
|
||||||
|
bucket_select_lo.begin());
|
||||||
|
|
||||||
|
DEBUG_PRINTF("bucket_select_hi_64 %s\n",
|
||||||
|
convertMaskstoString(bucket_select_hi_64.data(), 64).c_str());
|
||||||
|
DEBUG_PRINTF("bucket_select_lo_64 %s\n",
|
||||||
|
convertMaskstoString(bucket_select_lo_64.data(), 64).c_str());
|
||||||
|
} else {
|
||||||
|
if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(),
|
||||||
|
bucket_select_lo.data(), neg_mask_64, bucket_idx,
|
||||||
|
32)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 neg_mask = (u32)neg_mask_64;
|
u32 neg_mask = (u32)neg_mask_64;
|
||||||
|
|
||||||
DEBUG_PRINTF("hi_mask %s\n",
|
DEBUG_PRINTF("hi_mask %s\n",
|
||||||
@ -1299,6 +1440,13 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
|
|||||||
bucket_select_lo, neg_mask, base_offset,
|
bucket_select_lo, neg_mask, base_offset,
|
||||||
end_inst);
|
end_inst);
|
||||||
}
|
}
|
||||||
|
if (target.has_avx512()) {
|
||||||
|
if (!ri) {
|
||||||
|
ri = makeCheckShufti64x8(offset_range, bucket_idx, hi_mask, lo_mask,
|
||||||
|
bucket_select_lo_64, neg_mask_64,
|
||||||
|
base_offset, end_inst);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (!ri) {
|
if (!ri) {
|
||||||
ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask,
|
ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask,
|
||||||
bucket_select_lo, bucket_select_hi,
|
bucket_select_lo, bucket_select_hi,
|
||||||
@ -1309,6 +1457,13 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
|
|||||||
bucket_select_lo, bucket_select_hi,
|
bucket_select_lo, bucket_select_hi,
|
||||||
neg_mask, base_offset, end_inst);
|
neg_mask, base_offset, end_inst);
|
||||||
}
|
}
|
||||||
|
if (target.has_avx512()) {
|
||||||
|
if (!ri) {
|
||||||
|
ri = makeCheckShufti64x16(offset_range, bucket_idx, hi_mask, lo_mask,
|
||||||
|
bucket_select_lo_64, bucket_select_hi_64,
|
||||||
|
neg_mask_64, base_offset, end_inst);
|
||||||
|
}
|
||||||
|
}
|
||||||
assert(ri);
|
assert(ri);
|
||||||
program.add_before_end(move(ri));
|
program.add_before_end(move(ri));
|
||||||
|
|
||||||
@ -1321,7 +1476,7 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
|
|||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
void makeLookaroundInstruction(const vector<LookEntry> &look,
|
void makeLookaroundInstruction(const vector<LookEntry> &look,
|
||||||
RoseProgram &program) {
|
RoseProgram &program, const target_t &target) {
|
||||||
assert(!look.empty());
|
assert(!look.empty());
|
||||||
|
|
||||||
if (makeRoleByte(look, program)) {
|
if (makeRoleByte(look, program)) {
|
||||||
@ -1345,7 +1500,11 @@ void makeLookaroundInstruction(const vector<LookEntry> &look,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (makeRoleShufti(look, program)) {
|
if (makeRoleMask64(look, program, target)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (makeRoleShufti(look, program, target)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1386,7 +1545,7 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id,
|
|||||||
return; // all caseful chars handled by HWLM mask.
|
return; // all caseful chars handled by HWLM mask.
|
||||||
}
|
}
|
||||||
|
|
||||||
makeLookaroundInstruction(look, program);
|
makeLookaroundInstruction(look, program, build.cc.target_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -1730,7 +1889,7 @@ void makeRoleLookaround(const RoseBuildImpl &build,
|
|||||||
findLookaroundMasks(build, v, look_more);
|
findLookaroundMasks(build, v, look_more);
|
||||||
mergeLookaround(look, look_more);
|
mergeLookaround(look, look_more);
|
||||||
if (!look.empty()) {
|
if (!look.empty()) {
|
||||||
makeLookaroundInstruction(look, program);
|
makeLookaroundInstruction(look, program, build.cc.target_info);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -208,7 +208,11 @@ enum RoseInstructionCode {
|
|||||||
*/
|
*/
|
||||||
ROSE_INSTR_LAST_FLUSH_COMBINATION,
|
ROSE_INSTR_LAST_FLUSH_COMBINATION,
|
||||||
|
|
||||||
LAST_ROSE_INSTRUCTION = ROSE_INSTR_LAST_FLUSH_COMBINATION //!< Sentinel.
|
ROSE_INSTR_CHECK_SHUFTI_64x8, //!< Check 64-byte data by 8-bucket shufti.
|
||||||
|
ROSE_INSTR_CHECK_SHUFTI_64x16, //!< Check 64-byte data by 16-bucket shufti.
|
||||||
|
ROSE_INSTR_CHECK_MASK_64, //!< 64-bytes and/cmp/neg mask check.
|
||||||
|
|
||||||
|
LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MASK_64 //!< Sentinel.
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ROSE_STRUCT_END {
|
struct ROSE_STRUCT_END {
|
||||||
@ -285,6 +289,15 @@ struct ROSE_STRUCT_CHECK_MASK_32 {
|
|||||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ROSE_STRUCT_CHECK_MASK_64 {
|
||||||
|
u8 code; //!< From enum RoseInstructionCode.
|
||||||
|
u8 and_mask[64]; //!< 64-byte and mask.
|
||||||
|
u8 cmp_mask[64]; //!< 64-byte cmp mask.
|
||||||
|
u64a neg_mask; //!< negation mask with 32 bits.
|
||||||
|
s32 offset; //!< Relative offset of the first byte.
|
||||||
|
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||||
|
};
|
||||||
|
|
||||||
struct ROSE_STRUCT_CHECK_BYTE {
|
struct ROSE_STRUCT_CHECK_BYTE {
|
||||||
u8 code; //!< From enum RoseInstructionCode.
|
u8 code; //!< From enum RoseInstructionCode.
|
||||||
u8 and_mask; //!< 8-bits and mask.
|
u8 and_mask; //!< 8-bits and mask.
|
||||||
@ -336,6 +349,29 @@ struct ROSE_STRUCT_CHECK_SHUFTI_32x16 {
|
|||||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ROSE_STRUCT_CHECK_SHUFTI_64x8 {
|
||||||
|
u8 code; //!< From enum RoseInstructionCode.
|
||||||
|
u8 hi_mask[64]; //!< High nibble mask in shufti.
|
||||||
|
u8 lo_mask[64]; //!< Low nibble mask in shufti.
|
||||||
|
u8 bucket_select_mask[64]; //!< Mask for bucket assigning.
|
||||||
|
u64a neg_mask; //!< 64 bits negation mask.
|
||||||
|
s32 offset; //!< Relative offset of the first byte.
|
||||||
|
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ROSE_STRUCT_CHECK_SHUFTI_64x16 {
|
||||||
|
u8 code; //!< From enum RoseInstructionCode.
|
||||||
|
u8 hi_mask_1[64]; //!< 4 copies of 0-15 High nibble mask.
|
||||||
|
u8 hi_mask_2[64]; //!< 4 copies of 16-32 High nibble mask.
|
||||||
|
u8 lo_mask_1[64]; //!< 4 copies of 0-15 Low nibble mask.
|
||||||
|
u8 lo_mask_2[64]; //!< 4 copies of 16-32 Low nibble mask.
|
||||||
|
u8 bucket_select_mask_hi[64]; //!< Bucket mask for high 8 buckets.
|
||||||
|
u8 bucket_select_mask_lo[64]; //!< Bucket mask for low 8 buckets.
|
||||||
|
u64a neg_mask; //!< 64 bits negation mask.
|
||||||
|
s32 offset; //!< Relative offset of the first byte.
|
||||||
|
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||||
|
};
|
||||||
|
|
||||||
struct ROSE_STRUCT_CHECK_INFIX {
|
struct ROSE_STRUCT_CHECK_INFIX {
|
||||||
u8 code; //!< From enum RoseInstructionCode.
|
u8 code; //!< From enum RoseInstructionCode.
|
||||||
u32 queue; //!< Queue of leftfix to check.
|
u32 queue; //!< Queue of leftfix to check.
|
||||||
|
@ -201,12 +201,12 @@ const u8 *prepScanBuffer(const struct core_info *ci,
|
|||||||
} else {
|
} else {
|
||||||
// Copy: first chunk from history buffer.
|
// Copy: first chunk from history buffer.
|
||||||
assert(overhang <= ci->hlen);
|
assert(overhang <= ci->hlen);
|
||||||
copy_upto_32_bytes(tempbuf, ci->hbuf + ci->hlen - overhang,
|
copy_upto_64_bytes(tempbuf, ci->hbuf + ci->hlen - overhang,
|
||||||
overhang);
|
overhang);
|
||||||
// Copy: second chunk from current buffer.
|
// Copy: second chunk from current buffer.
|
||||||
size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang;
|
size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang;
|
||||||
assert(copy_buf_len <= ci->len);
|
assert(copy_buf_len <= ci->len);
|
||||||
copy_upto_32_bytes(tempbuf + overhang, ci->buf, copy_buf_len);
|
copy_upto_64_bytes(tempbuf + overhang, ci->buf, copy_buf_len);
|
||||||
// Read from our temporary buffer for the hash.
|
// Read from our temporary buffer for the hash.
|
||||||
base = tempbuf;
|
base = tempbuf;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -41,6 +41,17 @@ void validateMask32Print(const u8 *mask) {
|
|||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
static
|
||||||
|
void validateMask64Print(const u8 *mask) {
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < 64; i++) {
|
||||||
|
printf("%02x ", mask[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// check positive bytes in cmp_result.
|
// check positive bytes in cmp_result.
|
||||||
@ -115,4 +126,29 @@ int validateMask32(const m256 data, const u32 valid_data_mask,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
static really_inline
|
||||||
|
int validateMask64(const m512 data, const u64a valid_data_mask,
|
||||||
|
const m512 and_mask, const m512 cmp_mask,
|
||||||
|
const u64a neg_mask) {
|
||||||
|
u64a cmp_result = ~eq512mask(and512(data, and_mask), cmp_mask);
|
||||||
|
#ifdef DEBUG
|
||||||
|
DEBUG_PRINTF("data\n");
|
||||||
|
validateMask64Print((const u8 *)&data);
|
||||||
|
DEBUG_PRINTF("cmp_result\n");
|
||||||
|
validateMask64Print((const u8 *)&cmp_result);
|
||||||
|
#endif
|
||||||
|
DEBUG_PRINTF("cmp_result %016llx neg_mask %016llx\n", cmp_result, neg_mask);
|
||||||
|
DEBUG_PRINTF("valid_data_mask %016llx\n", valid_data_mask);
|
||||||
|
|
||||||
|
if ((cmp_result & valid_data_mask) == (neg_mask & valid_data_mask)) {
|
||||||
|
DEBUG_PRINTF("checkCompareResult64 passed\n");
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
DEBUG_PRINTF("checkCompareResult64 failed\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016-2017, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -175,6 +175,84 @@ int validateShuftiMask32x16(const m256 data,
|
|||||||
return !cmp_result;
|
return !cmp_result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
static really_inline
|
||||||
|
int validateShuftiMask64x8(const m512 data, const m512 hi_mask,
|
||||||
|
const m512 lo_mask, const m512 and_mask,
|
||||||
|
const u64a neg_mask, const u64a valid_data_mask) {
|
||||||
|
m512 low4bits = set64x8(0xf);
|
||||||
|
m512 c_lo = pshufb_m512(lo_mask, and512(data, low4bits));
|
||||||
|
m512 c_hi = pshufb_m512(hi_mask,
|
||||||
|
rshift64_m512(andnot512(low4bits, data), 4));
|
||||||
|
m512 t = and512(c_lo, c_hi);
|
||||||
|
u64a nresult = eq512mask(and512(t, and_mask), zeroes512());
|
||||||
|
#ifdef DEBUG
|
||||||
|
DEBUG_PRINTF("data\n");
|
||||||
|
dumpMask(&data, 64);
|
||||||
|
DEBUG_PRINTF("hi_mask\n");
|
||||||
|
dumpMask(&hi_mask, 64);
|
||||||
|
DEBUG_PRINTF("lo_mask\n");
|
||||||
|
dumpMask(&lo_mask, 64);
|
||||||
|
DEBUG_PRINTF("c_lo\n");
|
||||||
|
dumpMask(&c_lo, 64);
|
||||||
|
DEBUG_PRINTF("c_hi\n");
|
||||||
|
dumpMask(&c_hi, 64);
|
||||||
|
DEBUG_PRINTF("nresult %llx\n", nresult);
|
||||||
|
DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask);
|
||||||
|
#endif
|
||||||
|
u64a cmp_result = (nresult ^ neg_mask) & valid_data_mask;
|
||||||
|
return !cmp_result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
int validateShuftiMask64x16(const m512 data,
|
||||||
|
const m512 hi_mask_1, const m512 hi_mask_2,
|
||||||
|
const m512 lo_mask_1, const m512 lo_mask_2,
|
||||||
|
const m512 and_mask_hi, const m512 and_mask_lo,
|
||||||
|
const u64a neg_mask, const u64a valid_data_mask) {
|
||||||
|
m512 low4bits = set64x8(0xf);
|
||||||
|
m512 data_lo = and512(data, low4bits);
|
||||||
|
m512 data_hi = and512(rshift64_m512(data, 4), low4bits);
|
||||||
|
m512 c_lo_1 = pshufb_m512(lo_mask_1, data_lo);
|
||||||
|
m512 c_lo_2 = pshufb_m512(lo_mask_2, data_lo);
|
||||||
|
m512 c_hi_1 = pshufb_m512(hi_mask_1, data_hi);
|
||||||
|
m512 c_hi_2 = pshufb_m512(hi_mask_2, data_hi);
|
||||||
|
m512 t1 = and512(c_lo_1, c_hi_1);
|
||||||
|
m512 t2 = and512(c_lo_2, c_hi_2);
|
||||||
|
m512 result = or512(and512(t1, and_mask_lo), and512(t2, and_mask_hi));
|
||||||
|
u64a nresult = eq512mask(result, zeroes512());
|
||||||
|
#ifdef DEBUG
|
||||||
|
DEBUG_PRINTF("data\n");
|
||||||
|
dumpMask(&data, 64);
|
||||||
|
DEBUG_PRINTF("data_lo\n");
|
||||||
|
dumpMask(&data_lo, 64);
|
||||||
|
DEBUG_PRINTF("data_hi\n");
|
||||||
|
dumpMask(&data_hi, 64);
|
||||||
|
DEBUG_PRINTF("hi_mask_1\n");
|
||||||
|
dumpMask(&hi_mask_1, 64);
|
||||||
|
DEBUG_PRINTF("hi_mask_2\n");
|
||||||
|
dumpMask(&hi_mask_2, 64);
|
||||||
|
DEBUG_PRINTF("lo_mask_1\n");
|
||||||
|
dumpMask(&lo_mask_1, 64);
|
||||||
|
DEBUG_PRINTF("lo_mask_2\n");
|
||||||
|
dumpMask(&lo_mask_2, 64);
|
||||||
|
DEBUG_PRINTF("c_lo_1\n");
|
||||||
|
dumpMask(&c_lo_1, 64);
|
||||||
|
DEBUG_PRINTF("c_lo_2\n");
|
||||||
|
dumpMask(&c_lo_2, 64);
|
||||||
|
DEBUG_PRINTF("c_hi_1\n");
|
||||||
|
dumpMask(&c_hi_1, 64);
|
||||||
|
DEBUG_PRINTF("c_hi_2\n");
|
||||||
|
dumpMask(&c_hi_2, 64);
|
||||||
|
DEBUG_PRINTF("result\n");
|
||||||
|
dumpMask(&result, 64);
|
||||||
|
DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask);
|
||||||
|
#endif
|
||||||
|
u64a cmp_result = (nresult ^ neg_mask) & valid_data_mask;
|
||||||
|
return !cmp_result;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
int checkMultipath32(u32 data, u32 hi_bits, u32 lo_bits) {
|
int checkMultipath32(u32 data, u32 hi_bits, u32 lo_bits) {
|
||||||
u32 t = ~(data | hi_bits);
|
u32 t = ~(data | hi_bits);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2022, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -1013,6 +1013,7 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
|
|||||||
report_eod_matches(id, scratch, onEvent, context);
|
report_eod_matches(id, scratch, onEvent, context);
|
||||||
if (unlikely(internal_matching_error(scratch))) {
|
if (unlikely(internal_matching_error(scratch))) {
|
||||||
unmarkScratchInUse(scratch);
|
unmarkScratchInUse(scratch);
|
||||||
|
hs_stream_free(id);
|
||||||
return HS_UNKNOWN_ERROR;
|
return HS_UNKNOWN_ERROR;
|
||||||
}
|
}
|
||||||
unmarkScratchInUse(scratch);
|
unmarkScratchInUse(scratch);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2023, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2019, Intel Corporation
|
* Copyright (c) 2015-2023, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -78,7 +78,7 @@ namespace ue2 {
|
|||||||
struct LitTrieVertexProps {
|
struct LitTrieVertexProps {
|
||||||
LitTrieVertexProps() = default;
|
LitTrieVertexProps() = default;
|
||||||
explicit LitTrieVertexProps(u8 c_in) : c(c_in) {}
|
explicit LitTrieVertexProps(u8 c_in) : c(c_in) {}
|
||||||
size_t index; // managed by ue2_graph
|
size_t index = 0; // managed by ue2_graph
|
||||||
u8 c = 0; //!< character reached on this vertex
|
u8 c = 0; //!< character reached on this vertex
|
||||||
flat_set<ReportID> reports; //!< managed reports fired on this vertex
|
flat_set<ReportID> reports; //!< managed reports fired on this vertex
|
||||||
};
|
};
|
||||||
@ -793,6 +793,12 @@ bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, const CompileContext &cc,
|
|||||||
bytecode_ptr<NFA> dfa = nullptr;
|
bytecode_ptr<NFA> dfa = nullptr;
|
||||||
if (cc.grey.allowSmallWriteSheng) {
|
if (cc.grey.allowSmallWriteSheng) {
|
||||||
dfa = shengCompile(rdfa, cc, rm, only_accel_init, &accel_states);
|
dfa = shengCompile(rdfa, cc, rm, only_accel_init, &accel_states);
|
||||||
|
if (!dfa) {
|
||||||
|
dfa = sheng32Compile(rdfa, cc, rm, only_accel_init, &accel_states);
|
||||||
|
}
|
||||||
|
if (!dfa) {
|
||||||
|
dfa = sheng64Compile(rdfa, cc, rm, only_accel_init, &accel_states);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!dfa) {
|
if (!dfa) {
|
||||||
dfa = mcclellanCompile(rdfa, cc, rm, only_accel_init,
|
dfa = mcclellanCompile(rdfa, cc, rm, only_accel_init,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2023, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2017-2018, Intel Corporation
|
* Copyright (c) 2017-2023, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
@ -76,7 +76,11 @@ public:
|
|||||||
|
|
||||||
T *allocate(std::size_t size) const {
|
T *allocate(std::size_t size) const {
|
||||||
size_t alloc_size = size * sizeof(T);
|
size_t alloc_size = size * sizeof(T);
|
||||||
return static_cast<T *>(aligned_malloc_internal(alloc_size, N));
|
T *ptr = static_cast<T *>(aligned_malloc_internal(alloc_size, N));
|
||||||
|
if (!ptr) {
|
||||||
|
throw std::bad_alloc();
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void deallocate(T *x, std::size_t) const noexcept {
|
void deallocate(T *x, std::size_t) const noexcept {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -33,7 +33,7 @@
|
|||||||
#include "simd_utils.h"
|
#include "simd_utils.h"
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
void copy_upto_32_bytes(u8 *dst, const u8 *src, unsigned int len) {
|
void copy_upto_64_bytes(u8 *dst, const u8 *src, unsigned int len) {
|
||||||
switch (len) {
|
switch (len) {
|
||||||
case 0:
|
case 0:
|
||||||
break;
|
break;
|
||||||
@ -72,14 +72,41 @@ void copy_upto_32_bytes(u8 *dst, const u8 *src, unsigned int len) {
|
|||||||
case 16:
|
case 16:
|
||||||
storeu128(dst, loadu128(src));
|
storeu128(dst, loadu128(src));
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 17:
|
||||||
storeu256(dst, loadu256(src));
|
case 18:
|
||||||
break;
|
case 19:
|
||||||
default:
|
case 20:
|
||||||
assert(len < 32);
|
case 21:
|
||||||
|
case 22:
|
||||||
|
case 23:
|
||||||
|
case 24:
|
||||||
|
case 25:
|
||||||
|
case 26:
|
||||||
|
case 27:
|
||||||
|
case 28:
|
||||||
|
case 29:
|
||||||
|
case 30:
|
||||||
|
case 31:
|
||||||
storeu128(dst + len - 16, loadu128(src + len - 16));
|
storeu128(dst + len - 16, loadu128(src + len - 16));
|
||||||
storeu128(dst, loadu128(src));
|
storeu128(dst, loadu128(src));
|
||||||
break;
|
break;
|
||||||
|
case 32:
|
||||||
|
storeu256(dst, loadu256(src));
|
||||||
|
break;
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
case 64:
|
||||||
|
storebytes512(dst, loadu512(src), 64);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(len < 64);
|
||||||
|
u64a k = (1ULL << len) - 1;
|
||||||
|
storeu_mask_m512(dst, k, loadu_maskz_m512(k, src));
|
||||||
|
break;
|
||||||
|
#else
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -50,6 +50,11 @@ u64a cpuid_flags(void) {
|
|||||||
cap |= HS_CPU_FEATURES_AVX512;
|
cap |= HS_CPU_FEATURES_AVX512;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (check_avx512vbmi()) {
|
||||||
|
DEBUG_PRINTF("AVX512VBMI enabled\n");
|
||||||
|
cap |= HS_CPU_FEATURES_AVX512VBMI;
|
||||||
|
}
|
||||||
|
|
||||||
#if !defined(FAT_RUNTIME) && !defined(HAVE_AVX2)
|
#if !defined(FAT_RUNTIME) && !defined(HAVE_AVX2)
|
||||||
cap &= ~HS_CPU_FEATURES_AVX2;
|
cap &= ~HS_CPU_FEATURES_AVX2;
|
||||||
#endif
|
#endif
|
||||||
@ -59,6 +64,11 @@ u64a cpuid_flags(void) {
|
|||||||
cap &= ~HS_CPU_FEATURES_AVX512;
|
cap &= ~HS_CPU_FEATURES_AVX512;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if (!defined(FAT_RUNTIME) && !defined(HAVE_AVX512VBMI)) || \
|
||||||
|
(defined(FAT_RUNTIME) && !defined(BUILD_AVX512VBMI))
|
||||||
|
cap &= ~HS_CPU_FEATURES_AVX512VBMI;
|
||||||
|
#endif
|
||||||
|
|
||||||
return cap;
|
return cap;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -105,6 +115,11 @@ static const struct family_id known_microarch[] = {
|
|||||||
{ 0x6, 0x8E, HS_TUNE_FAMILY_SKL }, /* Kabylake Mobile */
|
{ 0x6, 0x8E, HS_TUNE_FAMILY_SKL }, /* Kabylake Mobile */
|
||||||
{ 0x6, 0x9E, HS_TUNE_FAMILY_SKL }, /* Kabylake desktop */
|
{ 0x6, 0x9E, HS_TUNE_FAMILY_SKL }, /* Kabylake desktop */
|
||||||
|
|
||||||
|
{ 0x6, 0x7D, HS_TUNE_FAMILY_ICL }, /* Icelake */
|
||||||
|
{ 0x6, 0x7E, HS_TUNE_FAMILY_ICL }, /* Icelake */
|
||||||
|
{ 0x6, 0x6A, HS_TUNE_FAMILY_ICX }, /* Icelake Xeon-D */
|
||||||
|
{ 0x6, 0x6C, HS_TUNE_FAMILY_ICX }, /* Icelake Xeon */
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef DUMP_SUPPORT
|
#ifdef DUMP_SUPPORT
|
||||||
@ -120,6 +135,8 @@ const char *dumpTune(u32 tune) {
|
|||||||
T_CASE(HS_TUNE_FAMILY_BDW);
|
T_CASE(HS_TUNE_FAMILY_BDW);
|
||||||
T_CASE(HS_TUNE_FAMILY_SKL);
|
T_CASE(HS_TUNE_FAMILY_SKL);
|
||||||
T_CASE(HS_TUNE_FAMILY_SKX);
|
T_CASE(HS_TUNE_FAMILY_SKX);
|
||||||
|
T_CASE(HS_TUNE_FAMILY_ICL);
|
||||||
|
T_CASE(HS_TUNE_FAMILY_ICX);
|
||||||
}
|
}
|
||||||
#undef T_CASE
|
#undef T_CASE
|
||||||
return "unknown";
|
return "unknown";
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2017, Intel Corporation
|
* Copyright (c) 2017-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -74,11 +74,12 @@ void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
|
|||||||
#define CPUID_HTT (1 << 28)
|
#define CPUID_HTT (1 << 28)
|
||||||
|
|
||||||
// Structured Extended Feature Flags Enumeration Leaf ECX values
|
// Structured Extended Feature Flags Enumeration Leaf ECX values
|
||||||
|
#define CPUID_AVX512VBMI (1 << 1)
|
||||||
|
|
||||||
|
// Structured Extended Feature Flags Enumeration Leaf EBX values
|
||||||
#define CPUID_BMI (1 << 3)
|
#define CPUID_BMI (1 << 3)
|
||||||
#define CPUID_AVX2 (1 << 5)
|
#define CPUID_AVX2 (1 << 5)
|
||||||
#define CPUID_BMI2 (1 << 8)
|
#define CPUID_BMI2 (1 << 8)
|
||||||
|
|
||||||
// Structured Extended Feature Flags Enumeration Leaf EBX values
|
|
||||||
#define CPUID_AVX512F (1 << 16)
|
#define CPUID_AVX512F (1 << 16)
|
||||||
#define CPUID_AVX512BW (1 << 30)
|
#define CPUID_AVX512BW (1 << 30)
|
||||||
|
|
||||||
@ -186,6 +187,51 @@ int check_avx512(void) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
int check_avx512vbmi(void) {
|
||||||
|
#if defined(__INTEL_COMPILER)
|
||||||
|
return _may_i_use_cpu_feature(_FEATURE_AVX512VBMI);
|
||||||
|
#else
|
||||||
|
unsigned int eax, ebx, ecx, edx;
|
||||||
|
|
||||||
|
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
|
||||||
|
|
||||||
|
/* check XSAVE is enabled by OS */
|
||||||
|
if (!(ecx & CPUID_XSAVE)) {
|
||||||
|
DEBUG_PRINTF("AVX and XSAVE not supported\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check that AVX 512 registers are enabled by OS */
|
||||||
|
u64a xcr0 = xgetbv(0);
|
||||||
|
if ((xcr0 & CPUID_XCR0_AVX512) != CPUID_XCR0_AVX512) {
|
||||||
|
DEBUG_PRINTF("AVX512 registers not enabled\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ECX and EDX contain capability flags */
|
||||||
|
ecx = 0;
|
||||||
|
cpuid(7, 0, &eax, &ebx, &ecx, &edx);
|
||||||
|
|
||||||
|
if (!(ebx & CPUID_AVX512F)) {
|
||||||
|
DEBUG_PRINTF("AVX512F (AVX512 Foundation) instructions not enabled\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(ebx & CPUID_AVX512BW)) {
|
||||||
|
DEBUG_PRINTF("AVX512BW instructions not enabled\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ecx & CPUID_AVX512VBMI) {
|
||||||
|
DEBUG_PRINTF("AVX512VBMI instructions enabled\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
int check_ssse3(void) {
|
int check_ssse3(void) {
|
||||||
unsigned int eax, ebx, ecx, edx;
|
unsigned int eax, ebx, ecx, edx;
|
||||||
|
@ -170,6 +170,7 @@ find_vertices_in_cycles(const Graph &g) {
|
|||||||
assert(!comp.empty());
|
assert(!comp.empty());
|
||||||
if (comp.size() > 1) {
|
if (comp.size() > 1) {
|
||||||
insert(&rv, comp);
|
insert(&rv, comp);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
vertex_descriptor v = *comp.begin();
|
vertex_descriptor v = *comp.begin();
|
||||||
if (hasSelfLoop(v, g)) {
|
if (hasSelfLoop(v, g)) {
|
||||||
|
@ -70,8 +70,8 @@ class undirected_graph_edge_descriptor
|
|||||||
using base_vertex_type = typename base_graph_traits::vertex_descriptor;
|
using base_vertex_type = typename base_graph_traits::vertex_descriptor;
|
||||||
|
|
||||||
base_edge_type underlying_edge;
|
base_edge_type underlying_edge;
|
||||||
const base_graph_type *g;
|
const base_graph_type *g = nullptr;
|
||||||
bool reverse; // if true, reverse vertices in source() and target()
|
bool reverse = false; // if true, reverse vertices in source() and target()
|
||||||
|
|
||||||
inline std::pair<base_vertex_type, base_vertex_type>
|
inline std::pair<base_vertex_type, base_vertex_type>
|
||||||
canonical_edge() const {
|
canonical_edge() const {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2020, Intel Corporation
|
* Copyright (c) 2015-2021, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -138,6 +138,12 @@ m128 lshift64_m128(m128 a, unsigned b) {
|
|||||||
#define eq128(a, b) _mm_cmpeq_epi8((a), (b))
|
#define eq128(a, b) _mm_cmpeq_epi8((a), (b))
|
||||||
#define movemask128(a) ((u32)_mm_movemask_epi8((a)))
|
#define movemask128(a) ((u32)_mm_movemask_epi8((a)))
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512)
|
||||||
|
static really_inline m128 cast512to128(const m512 in) {
|
||||||
|
return _mm512_castsi512_si128(in);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static really_inline m128 set16x8(u8 c) {
|
static really_inline m128 set16x8(u8 c) {
|
||||||
return _mm_set1_epi8(c);
|
return _mm_set1_epi8(c);
|
||||||
}
|
}
|
||||||
@ -150,14 +156,6 @@ static really_inline u32 movd(const m128 in) {
|
|||||||
return _mm_cvtsi128_si32(in);
|
return _mm_cvtsi128_si32(in);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_AVX512)
|
|
||||||
static really_inline u32 movd512(const m512 in) {
|
|
||||||
// NOTE: seems gcc doesn't support _mm512_cvtsi512_si32(in),
|
|
||||||
// so we use 2-step convertions to work around.
|
|
||||||
return _mm_cvtsi128_si32(_mm512_castsi512_si128(in));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static really_inline u64a movq(const m128 in) {
|
static really_inline u64a movq(const m128 in) {
|
||||||
#if defined(ARCH_X86_64)
|
#if defined(ARCH_X86_64)
|
||||||
return _mm_cvtsi128_si64(in);
|
return _mm_cvtsi128_si64(in);
|
||||||
@ -168,6 +166,20 @@ static really_inline u64a movq(const m128 in) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512)
|
||||||
|
static really_inline u32 movd512(const m512 in) {
|
||||||
|
// NOTE: seems gcc doesn't support _mm512_cvtsi512_si32(in),
|
||||||
|
// so we use 2-step convertions to work around.
|
||||||
|
return _mm_cvtsi128_si32(_mm512_castsi512_si128(in));
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline u64a movq512(const m512 in) {
|
||||||
|
// NOTE: seems AVX512 doesn't support _mm512_cvtsi512_si64(in),
|
||||||
|
// so we use 2-step convertions to work around.
|
||||||
|
return movq(_mm512_castsi512_si128(in));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* another form of movq */
|
/* another form of movq */
|
||||||
static really_inline
|
static really_inline
|
||||||
m128 load_m128_from_u64a(const u64a *p) {
|
m128 load_m128_from_u64a(const u64a *p) {
|
||||||
@ -211,6 +223,24 @@ static really_inline m128 or128(m128 a, m128 b) {
|
|||||||
return _mm_or_si128(a,b);
|
return _mm_or_si128(a,b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
static really_inline m512 expand128(m128 a) {
|
||||||
|
return _mm512_broadcast_i32x4(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline m512 expand256(m256 a) {
|
||||||
|
return _mm512_broadcast_i64x4(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline m512 expand384(m384 a) {
|
||||||
|
u64a *lo = (u64a*)&a.lo;
|
||||||
|
u64a *mid = (u64a*)&a.mid;
|
||||||
|
u64a *hi = (u64a*)&a.hi;
|
||||||
|
return _mm512_set_epi64(0ULL, 0ULL, hi[1], hi[0], mid[1], mid[0],
|
||||||
|
lo[1], lo[0]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static really_inline m128 andnot128(m128 a, m128 b) {
|
static really_inline m128 andnot128(m128 a, m128 b) {
|
||||||
return _mm_andnot_si128(a, b);
|
return _mm_andnot_si128(a, b);
|
||||||
}
|
}
|
||||||
@ -761,7 +791,7 @@ m128 movdq_lo(m256 x) {
|
|||||||
#define lshift128_m256(a, count_immed) _mm256_slli_si256(a, count_immed)
|
#define lshift128_m256(a, count_immed) _mm256_slli_si256(a, count_immed)
|
||||||
#define extract64from256(a, imm) _mm_extract_epi64(_mm256_extracti128_si256(a, imm >> 1), imm % 2)
|
#define extract64from256(a, imm) _mm_extract_epi64(_mm256_extracti128_si256(a, imm >> 1), imm % 2)
|
||||||
#define extract32from256(a, imm) _mm_extract_epi32(_mm256_extracti128_si256(a, imm >> 2), imm % 4)
|
#define extract32from256(a, imm) _mm_extract_epi32(_mm256_extracti128_si256(a, imm >> 2), imm % 4)
|
||||||
#define extractlow64from256(a) _mm_cvtsi128_si64(cast256to128(a))
|
#define extractlow64from256(a) movq(cast256to128(a))
|
||||||
#define extractlow32from256(a) movd(cast256to128(a))
|
#define extractlow32from256(a) movd(cast256to128(a))
|
||||||
#define interleave256hi(a, b) _mm256_unpackhi_epi8(a, b)
|
#define interleave256hi(a, b) _mm256_unpackhi_epi8(a, b)
|
||||||
#define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b)
|
#define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b)
|
||||||
@ -1000,6 +1030,11 @@ m512 set8x64(u64a a) {
|
|||||||
return _mm512_set1_epi64(a);
|
return _mm512_set1_epi64(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 set16x32(u32 a) {
|
||||||
|
return _mm512_set1_epi32(a);
|
||||||
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m512 set512_64(u64a hi_3, u64a hi_2, u64a hi_1, u64a hi_0,
|
m512 set512_64(u64a hi_3, u64a hi_2, u64a hi_1, u64a hi_0,
|
||||||
u64a lo_3, u64a lo_2, u64a lo_1, u64a lo_0) {
|
u64a lo_3, u64a lo_2, u64a lo_1, u64a lo_0) {
|
||||||
@ -1017,6 +1052,26 @@ static really_inline
|
|||||||
m512 set4x128(m128 a) {
|
m512 set4x128(m128 a) {
|
||||||
return _mm512_broadcast_i32x4(a);
|
return _mm512_broadcast_i32x4(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 sadd_u8_m512(m512 a, m512 b) {
|
||||||
|
return _mm512_adds_epu8(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 max_u8_m512(m512 a, m512 b) {
|
||||||
|
return _mm512_max_epu8(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 min_u8_m512(m512 a, m512 b) {
|
||||||
|
return _mm512_min_epu8(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 sub_u8_m512(m512 a, m512 b) {
|
||||||
|
return _mm512_sub_epi8(a, b);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
@ -1204,6 +1259,22 @@ m512 loadu512(const void *ptr) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// unaligned store
|
||||||
|
static really_inline
|
||||||
|
void storeu512(void *ptr, m512 a) {
|
||||||
|
#if defined(HAVE_AVX512)
|
||||||
|
_mm512_storeu_si512((m512 *)ptr, a);
|
||||||
|
#elif defined(HAVE_AVX2)
|
||||||
|
storeu256(ptr, a.lo);
|
||||||
|
storeu256((char *)ptr + 32, a.hi);
|
||||||
|
#else
|
||||||
|
storeu128(ptr, a.lo.lo);
|
||||||
|
storeu128((char *)ptr + 16, a.lo.hi);
|
||||||
|
storeu128((char *)ptr + 32, a.hi.lo);
|
||||||
|
storeu128((char *)ptr + 48, a.hi.hi);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(HAVE_AVX512)
|
#if defined(HAVE_AVX512)
|
||||||
static really_inline
|
static really_inline
|
||||||
m512 loadu_maskz_m512(__mmask64 k, const void *ptr) {
|
m512 loadu_maskz_m512(__mmask64 k, const void *ptr) {
|
||||||
@ -1215,10 +1286,20 @@ m512 loadu_mask_m512(m512 src, __mmask64 k, const void *ptr) {
|
|||||||
return _mm512_mask_loadu_epi8(src, k, ptr);
|
return _mm512_mask_loadu_epi8(src, k, ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
void storeu_mask_m512(void *ptr, __mmask64 k, m512 a) {
|
||||||
|
_mm512_mask_storeu_epi8(ptr, k, a);
|
||||||
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m512 set_mask_m512(__mmask64 k) {
|
m512 set_mask_m512(__mmask64 k) {
|
||||||
return _mm512_movm_epi8(k);
|
return _mm512_movm_epi8(k);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m256 loadu_maskz_m256(__mmask32 k, const void *ptr) {
|
||||||
|
return _mm256_maskz_loadu_epi8(k, ptr);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// packed unaligned store of first N bytes
|
// packed unaligned store of first N bytes
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2017, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -50,6 +50,10 @@ bool target_t::can_run_on_code_built_for(const target_t &code_target) const {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!has_avx512vbmi() && code_target.has_avx512vbmi()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -64,6 +68,10 @@ bool target_t::has_avx512(void) const {
|
|||||||
return cpu_features & HS_CPU_FEATURES_AVX512;
|
return cpu_features & HS_CPU_FEATURES_AVX512;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool target_t::has_avx512vbmi(void) const {
|
||||||
|
return cpu_features & HS_CPU_FEATURES_AVX512VBMI;
|
||||||
|
}
|
||||||
|
|
||||||
bool target_t::is_atom_class(void) const {
|
bool target_t::is_atom_class(void) const {
|
||||||
return tune == HS_TUNE_FAMILY_SLM || tune == HS_TUNE_FAMILY_GLM;
|
return tune == HS_TUNE_FAMILY_SLM || tune == HS_TUNE_FAMILY_GLM;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -42,6 +42,8 @@ struct target_t {
|
|||||||
|
|
||||||
bool has_avx512(void) const;
|
bool has_avx512(void) const;
|
||||||
|
|
||||||
|
bool has_avx512vbmi(void) const;
|
||||||
|
|
||||||
bool is_atom_class(void) const;
|
bool is_atom_class(void) const;
|
||||||
|
|
||||||
// This asks: can this target (the object) run on code that was built for
|
// This asks: can this target (the object) run on code that was built for
|
||||||
|
@ -133,7 +133,7 @@ public:
|
|||||||
: lit(&lit_in), idx(idx_in) {}
|
: lit(&lit_in), idx(idx_in) {}
|
||||||
|
|
||||||
const ue2_literal *lit = nullptr;
|
const ue2_literal *lit = nullptr;
|
||||||
size_t idx;
|
size_t idx = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2020, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -101,6 +101,18 @@
|
|||||||
#define or_m384(a, b) (or384(a, b))
|
#define or_m384(a, b) (or384(a, b))
|
||||||
#define or_m512(a, b) (or512(a, b))
|
#define or_m512(a, b) (or512(a, b))
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512VBMI)
|
||||||
|
#define expand_m128(a) (expand128(a))
|
||||||
|
#define expand_m256(a) (expand256(a))
|
||||||
|
#define expand_m384(a) (expand384(a))
|
||||||
|
#define expand_m512(a) (a)
|
||||||
|
|
||||||
|
#define shuffle_byte_m128(a, b) (pshufb_m512(b, a))
|
||||||
|
#define shuffle_byte_m256(a, b) (vpermb512(a, b))
|
||||||
|
#define shuffle_byte_m384(a, b) (vpermb512(a, b))
|
||||||
|
#define shuffle_byte_m512(a, b) (vpermb512(a, b))
|
||||||
|
#endif
|
||||||
|
|
||||||
#define and_u8(a, b) ((a) & (b))
|
#define and_u8(a, b) ((a) & (b))
|
||||||
#define and_u32(a, b) ((a) & (b))
|
#define and_u32(a, b) ((a) & (b))
|
||||||
#define and_u64a(a, b) ((a) & (b))
|
#define and_u64a(a, b) ((a) & (b))
|
||||||
|
@ -58,7 +58,10 @@ void readRow(sqlite3_stmt *statement, vector<DataBlock> &blocks,
|
|||||||
}
|
}
|
||||||
auto internal_stream_index = stream_indices[stream_id];
|
auto internal_stream_index = stream_indices[stream_id];
|
||||||
|
|
||||||
assert(blob || bytes > 0);
|
if (!(blob && bytes > 0)) {
|
||||||
|
assert(0);
|
||||||
|
throw std::domain_error("Invalid blob or bytes from sqlite3.");
|
||||||
|
}
|
||||||
blocks.emplace_back(id, stream_id, internal_stream_index,
|
blocks.emplace_back(id, stream_id, internal_stream_index,
|
||||||
string(blob, blob + bytes));
|
string(blob, blob + bytes));
|
||||||
}
|
}
|
||||||
|
@ -88,6 +88,8 @@ public:
|
|||||||
|
|
||||||
virtual void printStats() const = 0;
|
virtual void printStats() const = 0;
|
||||||
|
|
||||||
|
virtual void printCsvStats() const = 0;
|
||||||
|
|
||||||
virtual void sqlStats(SqlDB &db) const = 0;
|
virtual void sqlStats(SqlDB &db) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -187,6 +187,16 @@ void EngineChimera::printStats() const {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EngineChimera::printCsvStats() const {
|
||||||
|
printf(",\"%s\"", compile_stats.signatures.c_str());
|
||||||
|
printf(",\"%zu\"", compile_stats.expressionCount);
|
||||||
|
printf(",\"0x%x\"", compile_stats.crc32);
|
||||||
|
printf(",\"%zu\"", compile_stats.compiledSize);
|
||||||
|
printf(",\"%zu\"", compile_stats.scratchSize);
|
||||||
|
printf(",\"%0.3Lf\"", compile_stats.compileSecs);
|
||||||
|
printf(",\"%u\"", compile_stats.peakMemorySize);
|
||||||
|
}
|
||||||
|
|
||||||
void EngineChimera::sqlStats(SqlDB &sqldb) const {
|
void EngineChimera::sqlStats(SqlDB &sqldb) const {
|
||||||
ostringstream crc;
|
ostringstream crc;
|
||||||
crc << "0x" << hex << compile_stats.crc32;
|
crc << "0x" << hex << compile_stats.crc32;
|
||||||
|
@ -89,6 +89,8 @@ public:
|
|||||||
|
|
||||||
void printStats() const;
|
void printStats() const;
|
||||||
|
|
||||||
|
void printCsvStats() const;
|
||||||
|
|
||||||
void sqlStats(SqlDB &db) const;
|
void sqlStats(SqlDB &db) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -276,6 +276,17 @@ void EngineHyperscan::printStats() const {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EngineHyperscan::printCsvStats() const {
|
||||||
|
printf(",\"%s\"", compile_stats.signatures.c_str());
|
||||||
|
printf(",\"%zu\"", compile_stats.expressionCount);
|
||||||
|
printf(",\"0x%x\"", compile_stats.crc32);
|
||||||
|
printf(",\"%zu\"", compile_stats.compiledSize);
|
||||||
|
printf(",\"%zu\"", compile_stats.streamSize);
|
||||||
|
printf(",\"%zu\"", compile_stats.scratchSize);
|
||||||
|
printf(",\"%0.3Lf\"", compile_stats.compileSecs);
|
||||||
|
printf(",\"%u\"", compile_stats.peakMemorySize);
|
||||||
|
}
|
||||||
|
|
||||||
void EngineHyperscan::sqlStats(SqlDB &sqldb) const {
|
void EngineHyperscan::sqlStats(SqlDB &sqldb) const {
|
||||||
ostringstream crc;
|
ostringstream crc;
|
||||||
crc << "0x" << hex << compile_stats.crc32;
|
crc << "0x" << hex << compile_stats.crc32;
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user