mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch develop into master
This commit is contained in:
commit
e3e0a0fab0
4
.gitignore
vendored
4
.gitignore
vendored
@ -46,10 +46,6 @@ sqlite3
|
||||
src/config.h
|
||||
src/config.h.in
|
||||
src/hs_version.h
|
||||
src/fdr/fdr_autogen.c
|
||||
src/fdr/fdr_autogen_compiler.cpp
|
||||
src/fdr/teddy_autogen.c
|
||||
src/fdr/teddy_autogen_compiler.cpp
|
||||
src/parser/Parser.cpp
|
||||
|
||||
# Generated PCRE files
|
||||
|
34
CHANGELOG.md
34
CHANGELOG.md
@ -2,6 +2,40 @@
|
||||
|
||||
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
||||
|
||||
## [4.2.0] 2016-05-31
|
||||
- Introduce an interpreter for many complex actions to replace the use of
|
||||
internal reports within the core of Hyperscan (the "Rose" engine). This
|
||||
improves scanning performance and reduces database size for many pattern
|
||||
sets.
|
||||
- Many enhancements to the acceleration framework used by NFA and DFA engines,
|
||||
including more flexible multibyte implementations and more AVX2 support. This
|
||||
improves scanning performance for many pattern sets.
|
||||
- Improved prefiltering support for complex patterns containing very large
|
||||
bounded repeats (`R{M,N}` with large `N`).
|
||||
- Improve scanning performance of pattern sets with a very large number of
|
||||
EOD-anchored patterns.
|
||||
- Improve scanning performance of large pattern sets that use the
|
||||
`HS_FLAG_SINGLEMATCH` flag.
|
||||
- Improve scanning performance of pattern sets that contain a single literal by
|
||||
improving the "Noodle" literal matcher.
|
||||
- Small reductions in total stream state for many pattern sets.
|
||||
- Improve runtime detection of AVX2 support.
|
||||
- Disable -Werror for release builds, in order to behave better for packagers
|
||||
and users with different compiler combinations than those that we test.
|
||||
- Improve support for building on Windows with MSVC 2015 (github issue #14).
|
||||
Support for Hyperscan on Windows is still experimental.
|
||||
- Small updates to fix warnings identified by Coverity.
|
||||
- Remove Python codegen for the "FDR" and "Teddy" literal matchers. These are
|
||||
now implemented directly in C code.
|
||||
- Remove the specialist "Sidecar" engine in favour of using our more general
|
||||
repeat engines.
|
||||
- New API function: add the `hs_expression_ext_info()` function. This is a
|
||||
variant of `hs_expression_info()` that can accept patterns with extended
|
||||
parameters.
|
||||
- New API error value: add the `HS_SCRATCH_IN_USE` error, which is returned
|
||||
when Hyperscan detects that a scratch region is already in use on entry to an
|
||||
API function.
|
||||
|
||||
## [4.1.0] 2015-12-18
|
||||
- Update version of PCRE used by testing tools as a syntax and semantic
|
||||
reference to PCRE 8.38.
|
||||
|
115
CMakeLists.txt
115
CMakeLists.txt
@ -2,7 +2,7 @@ cmake_minimum_required (VERSION 2.8.11)
|
||||
project (Hyperscan C CXX)
|
||||
|
||||
set (HS_MAJOR_VERSION 4)
|
||||
set (HS_MINOR_VERSION 1)
|
||||
set (HS_MINOR_VERSION 2)
|
||||
set (HS_PATCH_VERSION 0)
|
||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||
|
||||
@ -75,7 +75,7 @@ if(NOT Boost_FOUND)
|
||||
set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include")
|
||||
find_package(Boost ${BOOST_MINVERSION})
|
||||
if(NOT Boost_FOUND)
|
||||
message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.")
|
||||
message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system packages if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -115,7 +115,9 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
endif()
|
||||
|
||||
#for config
|
||||
set(HS_OPTIMIZE OPTIMISE)
|
||||
if (OPTIMISE)
|
||||
set(HS_OPTIMIZE ON)
|
||||
endif()
|
||||
|
||||
CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in release builds" ON "NOT RELEASE_BUILD" OFF)
|
||||
|
||||
@ -171,8 +173,14 @@ else()
|
||||
endif()
|
||||
|
||||
# set compiler flags - more are tested and added later
|
||||
set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual -Werror")
|
||||
set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Werror -Wno-shadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor")
|
||||
set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual")
|
||||
set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wno-shadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor")
|
||||
if (NOT RELEASE_BUILD)
|
||||
# -Werror is most useful during development, don't potentially break
|
||||
# release builds
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
|
||||
endif()
|
||||
|
||||
if (NOT CMAKE_C_FLAGS MATCHES .*march.*)
|
||||
message(STATUS "Building for current host CPU")
|
||||
@ -229,6 +237,9 @@ if (RELEASE_BUILD)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ensure we are building for the right target arch
|
||||
include (${CMAKE_MODULE_PATH}/arch.cmake)
|
||||
|
||||
# testing a builtin takes a little more work
|
||||
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
|
||||
CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
|
||||
@ -332,7 +343,7 @@ endif()
|
||||
add_subdirectory(util)
|
||||
add_subdirectory(unit)
|
||||
add_subdirectory(doc/dev-reference)
|
||||
if (EXISTS ${CMAKE_SOURCE_DIR}/tools)
|
||||
if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt)
|
||||
add_subdirectory(tools)
|
||||
endif()
|
||||
|
||||
@ -340,8 +351,15 @@ endif()
|
||||
configure_file(${CMAKE_MODULE_PATH}/config.h.in ${PROJECT_BINARY_DIR}/config.h)
|
||||
configure_file(src/hs_version.h.in ${PROJECT_BINARY_DIR}/hs_version.h)
|
||||
|
||||
if (PKG_CONFIG_FOUND)
|
||||
# we really only need to do this if we have pkg-config
|
||||
if (NOT WIN32)
|
||||
# expand out library names for pkgconfig static link info
|
||||
foreach (LIB ${CMAKE_CXX_IMPLICIT_LINK_LIBRARIES})
|
||||
# this is fragile, but protects us from toolchain specific files
|
||||
if (NOT EXISTS ${LIB})
|
||||
set(PRIVATE_LIBS "${PRIVATE_LIBS} -l${LIB}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
configure_file(libhs.pc.in libhs.pc @ONLY) # only replace @ quoted vars
|
||||
install(FILES ${CMAKE_BINARY_DIR}/libhs.pc
|
||||
DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/pkgconfig")
|
||||
@ -352,11 +370,6 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||
|
||||
|
||||
# include the autogen targets
|
||||
add_subdirectory(src/fdr)
|
||||
|
||||
include_directories(${PROJECT_BINARY_DIR}/src/fdr)
|
||||
|
||||
if(NOT WIN32)
|
||||
set(RAGEL_C_FLAGS "-Wno-unused")
|
||||
endif()
|
||||
@ -376,14 +389,13 @@ SET(hs_HEADERS
|
||||
)
|
||||
install(FILES ${hs_HEADERS} DESTINATION include/hs)
|
||||
|
||||
set(fdr_autogen_targets autogen_runtime autogen_teddy_runtime)
|
||||
|
||||
set (hs_exec_SRCS
|
||||
${hs_HEADERS}
|
||||
src/hs_version.h
|
||||
src/ue2common.h
|
||||
src/alloc.c
|
||||
src/allocator.h
|
||||
src/report.h
|
||||
src/runtime.c
|
||||
src/fdr/fdr.c
|
||||
src/fdr/fdr.h
|
||||
@ -394,7 +406,9 @@ set (hs_exec_SRCS
|
||||
src/fdr/flood_runtime.h
|
||||
src/fdr/fdr_loadval.h
|
||||
src/fdr/teddy.c
|
||||
src/fdr/teddy.h
|
||||
src/fdr/teddy_internal.h
|
||||
src/fdr/teddy_runtime_common.h
|
||||
src/hwlm/hwlm.c
|
||||
src/hwlm/hwlm.h
|
||||
src/hwlm/hwlm_internal.h
|
||||
@ -437,6 +451,25 @@ set (hs_exec_SRCS
|
||||
src/nfa/mpv.h
|
||||
src/nfa/mpv.c
|
||||
src/nfa/mpv_internal.h
|
||||
src/nfa/multiaccel_common.h
|
||||
src/nfa/multiaccel_doubleshift.h
|
||||
src/nfa/multiaccel_doubleshiftgrab.h
|
||||
src/nfa/multiaccel_long.h
|
||||
src/nfa/multiaccel_longgrab.h
|
||||
src/nfa/multiaccel_shift.h
|
||||
src/nfa/multiaccel_shiftgrab.h
|
||||
src/nfa/multishufti.c
|
||||
src/nfa/multishufti_avx2.h
|
||||
src/nfa/multishufti_sse.h
|
||||
src/nfa/multishufti.h
|
||||
src/nfa/multitruffle.c
|
||||
src/nfa/multitruffle_avx2.h
|
||||
src/nfa/multitruffle_sse.h
|
||||
src/nfa/multitruffle.h
|
||||
src/nfa/multivermicelli.c
|
||||
src/nfa/multivermicelli.h
|
||||
src/nfa/multivermicelli_sse.h
|
||||
src/nfa/multivermicelli_avx2.h
|
||||
src/nfa/nfa_api.h
|
||||
src/nfa/nfa_api_dispatch.c
|
||||
src/nfa/nfa_internal.h
|
||||
@ -444,20 +477,17 @@ set (hs_exec_SRCS
|
||||
src/nfa/repeat.c
|
||||
src/nfa/repeat.h
|
||||
src/nfa/repeat_internal.h
|
||||
src/nfa/shufti_common.h
|
||||
src/nfa/shufti.c
|
||||
src/nfa/shufti.h
|
||||
src/nfa/truffle_common.h
|
||||
src/nfa/truffle.c
|
||||
src/nfa/truffle.h
|
||||
src/nfa/vermicelli.h
|
||||
src/nfa/vermicelli_run.h
|
||||
src/nfa/vermicelli_sse.h
|
||||
src/sidecar/sidecar.c
|
||||
src/sidecar/sidecar.h
|
||||
src/sidecar/sidecar_generic.h
|
||||
src/sidecar/sidecar_internal.h
|
||||
src/sidecar/sidecar_shufti.c
|
||||
src/sidecar/sidecar_shufti.h
|
||||
src/som/som.h
|
||||
src/som/som_operation.h
|
||||
src/som/som_runtime.h
|
||||
src/som/som_runtime.c
|
||||
src/som/som_stream.c
|
||||
@ -473,10 +503,11 @@ set (hs_exec_SRCS
|
||||
src/rose/match.h
|
||||
src/rose/match.c
|
||||
src/rose/miracle.h
|
||||
src/rose/program_runtime.h
|
||||
src/rose/runtime.h
|
||||
src/rose/rose_sidecar_runtime.h
|
||||
src/rose/rose.h
|
||||
src/rose/rose_internal.h
|
||||
src/rose/rose_program.h
|
||||
src/rose/rose_types.h
|
||||
src/rose/rose_common.h
|
||||
src/util/bitutils.h
|
||||
@ -484,7 +515,6 @@ set (hs_exec_SRCS
|
||||
src/util/fatbit.h
|
||||
src/util/fatbit.c
|
||||
src/util/join.h
|
||||
src/util/masked_move.c
|
||||
src/util/masked_move.h
|
||||
src/util/multibit.h
|
||||
src/util/multibit_internal.h
|
||||
@ -498,6 +528,7 @@ set (hs_exec_SRCS
|
||||
src/util/shuffle_ssse3.h
|
||||
src/util/simd_utils.h
|
||||
src/util/simd_utils_ssse3.h
|
||||
src/util/simd_utils_ssse3.c
|
||||
src/util/state_compress.h
|
||||
src/util/state_compress.c
|
||||
src/util/unaligned.h
|
||||
@ -510,6 +541,14 @@ set (hs_exec_SRCS
|
||||
src/database.h
|
||||
)
|
||||
|
||||
if (HAVE_AVX2)
|
||||
set (hs_exec_SRCS
|
||||
${hs_exec_SRCS}
|
||||
src/fdr/teddy_avx2.c
|
||||
src/util/masked_move.c
|
||||
)
|
||||
endif ()
|
||||
|
||||
|
||||
SET (hs_SRCS
|
||||
${hs_HEADERS}
|
||||
@ -574,6 +613,8 @@ SET (hs_SRCS
|
||||
src/nfa/mcclellan_internal.h
|
||||
src/nfa/mcclellancompile.cpp
|
||||
src/nfa/mcclellancompile.h
|
||||
src/nfa/mcclellancompile_accel.cpp
|
||||
src/nfa/mcclellancompile_accel.h
|
||||
src/nfa/mcclellancompile_util.cpp
|
||||
src/nfa/mcclellancompile_util.h
|
||||
src/nfa/limex_compile.cpp
|
||||
@ -583,6 +624,8 @@ SET (hs_SRCS
|
||||
src/nfa/mpv_internal.h
|
||||
src/nfa/mpvcompile.cpp
|
||||
src/nfa/mpvcompile.h
|
||||
src/nfa/multiaccel_compilehelper.cpp
|
||||
src/nfa/multiaccel_compilehelper.h
|
||||
src/nfa/nfa_api.h
|
||||
src/nfa/nfa_api_queue.h
|
||||
src/nfa/nfa_api_util.h
|
||||
@ -762,8 +805,6 @@ SET (hs_SRCS
|
||||
src/parser/unsupported.h
|
||||
src/parser/utf8_validate.h
|
||||
src/parser/utf8_validate.cpp
|
||||
src/sidecar/sidecar_compile.cpp
|
||||
src/sidecar/sidecar_compile.h
|
||||
src/smallwrite/smallwrite_build.cpp
|
||||
src/smallwrite/smallwrite_build.h
|
||||
src/smallwrite/smallwrite_internal.h
|
||||
@ -771,6 +812,7 @@ SET (hs_SRCS
|
||||
src/som/slot_manager.h
|
||||
src/som/slot_manager_internal.h
|
||||
src/som/som.h
|
||||
src/som/som_operation.h
|
||||
src/rose/rose_build.h
|
||||
src/rose/rose_build_add.cpp
|
||||
src/rose/rose_build_add_internal.h
|
||||
@ -778,6 +820,8 @@ SET (hs_SRCS
|
||||
src/rose/rose_build_anchored.cpp
|
||||
src/rose/rose_build_anchored.h
|
||||
src/rose/rose_build_bytecode.cpp
|
||||
src/rose/rose_build_castle.h
|
||||
src/rose/rose_build_castle.cpp
|
||||
src/rose/rose_build_compile.cpp
|
||||
src/rose/rose_build_convert.cpp
|
||||
src/rose/rose_build_convert.h
|
||||
@ -786,6 +830,8 @@ SET (hs_SRCS
|
||||
src/rose/rose_build_infix.h
|
||||
src/rose/rose_build_lookaround.cpp
|
||||
src/rose/rose_build_lookaround.h
|
||||
src/rose/rose_build_matchers.cpp
|
||||
src/rose/rose_build_matchers.h
|
||||
src/rose/rose_build_merge.cpp
|
||||
src/rose/rose_build_merge.h
|
||||
src/rose/rose_build_misc.cpp
|
||||
@ -799,6 +845,7 @@ SET (hs_SRCS
|
||||
src/rose/rose_in_graph.h
|
||||
src/rose/rose_in_util.cpp
|
||||
src/rose/rose_in_util.h
|
||||
src/util/accel_scheme.h
|
||||
src/util/alloc.cpp
|
||||
src/util/alloc.h
|
||||
src/util/bitfield.h
|
||||
@ -820,7 +867,6 @@ SET (hs_SRCS
|
||||
src/util/dump_mask.cpp
|
||||
src/util/dump_mask.h
|
||||
src/util/graph.h
|
||||
src/util/internal_report.h
|
||||
src/util/multibit_build.cpp
|
||||
src/util/multibit_build.h
|
||||
src/util/order_check.h
|
||||
@ -828,7 +874,6 @@ SET (hs_SRCS
|
||||
src/util/partitioned_set.h
|
||||
src/util/popcount.h
|
||||
src/util/queue_index_factory.h
|
||||
src/util/report.cpp
|
||||
src/util/report.h
|
||||
src/util/report_manager.cpp
|
||||
src/util/report_manager.h
|
||||
@ -874,8 +919,6 @@ set(hs_dump_SRCS
|
||||
src/parser/dump.cpp
|
||||
src/parser/dump.h
|
||||
src/parser/position_dump.h
|
||||
src/sidecar/sidecar_dump.cpp
|
||||
src/sidecar/sidecar_dump.h
|
||||
src/smallwrite/smallwrite_dump.cpp
|
||||
src/smallwrite/smallwrite_dump.h
|
||||
src/som/slot_manager_dump.cpp
|
||||
@ -901,11 +944,9 @@ set (LIB_VERSION ${HS_VERSION})
|
||||
set (LIB_SOVERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION})
|
||||
|
||||
add_library(hs_exec OBJECT ${hs_exec_SRCS})
|
||||
add_dependencies(hs_exec ${fdr_autogen_targets})
|
||||
|
||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
add_library(hs_exec_shared OBJECT ${hs_exec_SRCS})
|
||||
add_dependencies(hs_exec_shared ${fdr_autogen_targets})
|
||||
set_target_properties(hs_exec_shared PROPERTIES
|
||||
POSITION_INDEPENDENT_CODE TRUE)
|
||||
endif()
|
||||
@ -929,14 +970,16 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
OUTPUT_NAME hs_runtime
|
||||
MACOSX_RPATH ON
|
||||
LINKER_LANGUAGE C)
|
||||
install(TARGETS hs_runtime_shared DESTINATION lib)
|
||||
install(TARGETS hs_runtime_shared
|
||||
RUNTIME DESTINATION bin
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib)
|
||||
endif()
|
||||
|
||||
# we want the static lib for testing
|
||||
add_library(hs STATIC ${hs_SRCS} $<TARGET_OBJECTS:hs_exec>)
|
||||
|
||||
add_dependencies(hs ragel_Parser)
|
||||
add_dependencies(hs autogen_compiler autogen_teddy_compiler)
|
||||
|
||||
if (NOT BUILD_SHARED_LIBS)
|
||||
install(TARGETS hs DESTINATION lib)
|
||||
@ -945,13 +988,15 @@ endif()
|
||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
add_library(hs_shared SHARED ${hs_SRCS} $<TARGET_OBJECTS:hs_exec_shared>)
|
||||
add_dependencies(hs_shared ragel_Parser)
|
||||
add_dependencies(hs_shared autogen_compiler autogen_teddy_compiler)
|
||||
set_target_properties(hs_shared PROPERTIES
|
||||
OUTPUT_NAME hs
|
||||
VERSION ${LIB_VERSION}
|
||||
SOVERSION ${LIB_SOVERSION}
|
||||
MACOSX_RPATH ON)
|
||||
install(TARGETS hs_shared DESTINATION lib)
|
||||
install(TARGETS hs_shared
|
||||
RUNTIME DESTINATION bin
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib)
|
||||
endif()
|
||||
|
||||
if(NOT WIN32)
|
||||
|
42
cmake/arch.cmake
Normal file
42
cmake/arch.cmake
Normal file
@ -0,0 +1,42 @@
|
||||
# detect architecture features
|
||||
#
|
||||
# must be called after determining where compiler intrinsics are defined
|
||||
|
||||
if (HAVE_C_X86INTRIN_H)
|
||||
set (INTRIN_INC_H "x86intrin.h")
|
||||
elseif (HAVE_C_INTRIN_H)
|
||||
set (INTRIN_INC_H "intrin.h")
|
||||
else ()
|
||||
message (FATAL_ERROR "No intrinsics header found")
|
||||
endif ()
|
||||
|
||||
|
||||
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
||||
# ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic
|
||||
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
|
||||
int main() {
|
||||
__m128i a = _mm_set1_epi8(1);
|
||||
(void)_mm_shuffle_epi8(a, a);
|
||||
}" HAVE_SSSE3)
|
||||
|
||||
if (NOT HAVE_SSSE3)
|
||||
message(FATAL_ERROR "A minimum of SSSE3 compiler support is required")
|
||||
endif ()
|
||||
|
||||
# now look for AVX2
|
||||
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
|
||||
#if !defined(__AVX2__)
|
||||
#error no avx2
|
||||
#endif
|
||||
|
||||
int main(){
|
||||
__m256i z = _mm256_setzero_si256();
|
||||
(void)_mm256_xor_si256(z, z);
|
||||
}" HAVE_AVX2)
|
||||
|
||||
if (NOT HAVE_AVX2)
|
||||
message(STATUS "Building without AVX2 support")
|
||||
endif ()
|
||||
|
||||
unset (CMAKE_REQUIRED_FLAGS)
|
||||
unset (INTRIN_INC_H)
|
@ -15,9 +15,6 @@
|
||||
/* internal build, switch on dump support. */
|
||||
#cmakedefine DUMP_SUPPORT
|
||||
|
||||
/* Build tools with threading support */
|
||||
#cmakedefine ENABLE_TOOLS_THREADS
|
||||
|
||||
/* Define to 1 if `backtrace' works. */
|
||||
#cmakedefine HAVE_BACKTRACE
|
||||
|
||||
@ -39,10 +36,6 @@
|
||||
/* C compiler has intrin.h */
|
||||
#cmakedefine HAVE_C_INTRIN_H
|
||||
|
||||
/* Define to 1 if you have the declaration of `pthread_barrier_init', and to 0
|
||||
if you don't. */
|
||||
#cmakedefine HAVE_DECL_PTHREAD_BARRIER_INIT
|
||||
|
||||
/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
|
||||
0 if you don't. */
|
||||
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP
|
||||
@ -59,9 +52,6 @@
|
||||
/* Define to 1 if `posix_memalign' works. */
|
||||
#cmakedefine HAVE_POSIX_MEMALIGN
|
||||
|
||||
/* Define to 1 if you have the <pthread.h> header file. */
|
||||
#cmakedefine HAVE_PTHREAD_H
|
||||
|
||||
/* Define to 1 if you have the `setrlimit' function. */
|
||||
#cmakedefine HAVE_SETRLIMIT
|
||||
|
||||
|
@ -119,12 +119,21 @@ The following regex constructs are supported by Hyperscan:
|
||||
* The anchors :regexp:`^`, :regexp:`$`, :regexp:`\\A`, :regexp:`\\Z` and
|
||||
:regexp:`\\z`.
|
||||
|
||||
* Option modifiers for:
|
||||
* Option modifiers:
|
||||
|
||||
* Case-sensitivity: :regexp:`(?i)` and :regexp:`(?-i)`
|
||||
* Multi-line: :regexp:`(?m)` and :regexp:`(?-m)`
|
||||
* Dot-all: :regexp:`(?s)` and :regexp:`(?-s)`
|
||||
* Extended syntax: :regexp:`(?s)` and :regexp:`(?-s)`
|
||||
These allow behaviour to be switched on (with :regexp:`(?<option>)`) and off
|
||||
(with :regexp:`(?-<option>)`) for a sub-pattern. The supported options are:
|
||||
|
||||
* :regexp:`i`: Case-insensitive matching, as per
|
||||
:c:member:`HS_FLAG_CASELESS`.
|
||||
* :regexp:`m`: Multi-line matching, as per :c:member:`HS_FLAG_MULTILINE`.
|
||||
* :regexp:`s`: Interpret ``.`` as "any character", as per
|
||||
:c:member:`HS_FLAG_DOTALL`.
|
||||
* :regexp:`x`: Extended syntax, which will ignore most whitespace in the
|
||||
pattern for compatibility with libpcre's ``PCRE_EXTENDED`` option.
|
||||
|
||||
For example, the expression :regexp:`foo(?i)bar(?-i)baz` will switch on
|
||||
case-insensitive matching *only* for the ``bar`` portion of the match.
|
||||
|
||||
* The :regexp:`\\b` and :regexp:`\\B` zero-width assertions (word boundary and
|
||||
'not word boundary', respectively).
|
||||
|
@ -44,7 +44,7 @@ master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = u'Hyperscan'
|
||||
copyright = u'2015, Intel Corporation'
|
||||
copyright = u'2015-2016, Intel Corporation'
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
|
@ -30,4 +30,4 @@ and/or other countries.
|
||||
|
||||
\*Other names and brands may be claimed as the property of others.
|
||||
|
||||
Copyright |copy| 2015, Intel Corporation. All rights reserved.
|
||||
Copyright |copy| 2015-2016, Intel Corporation. All rights reserved.
|
||||
|
@ -15,6 +15,7 @@ Hyperscan |version| Developer's Reference Guide
|
||||
getting_started
|
||||
compilation
|
||||
runtime
|
||||
serialization
|
||||
performance
|
||||
api_constants
|
||||
api_files
|
||||
|
@ -124,13 +124,19 @@ databases, only a single scratch region is necessary: in this case, calling
|
||||
will ensure that the scratch space is large enough to support scanning against
|
||||
any of the given databases.
|
||||
|
||||
Importantly, only one such space is required per thread and can (and indeed
|
||||
should) be allocated before data scanning is to commence. In a scenario where a
|
||||
set of expressions are compiled by a single "master" thread and data will be
|
||||
scanned by multiple "worker" threads, the convenience function
|
||||
:c:func:`hs_clone_scratch` allows multiple copies of an existing scratch space
|
||||
to be made for each thread (rather than forcing the caller to pass all the
|
||||
compiled databases through :c:func:`hs_alloc_scratch` multiple times).
|
||||
While the Hyperscan library is re-entrant, the use of scratch spaces is not.
|
||||
For example, if by design it is deemed necessary to run recursive or nested
|
||||
scanning (say, from the match callback function), then an additional scratch
|
||||
space is required for that context.
|
||||
|
||||
In the absence of recursive scanning, only one such space is required per thread
|
||||
and can (and indeed should) be allocated before data scanning is to commence.
|
||||
|
||||
In a scenario where a set of expressions are compiled by a single "master"
|
||||
thread and data will be scanned by multiple "worker" threads, the convenience
|
||||
function :c:func:`hs_clone_scratch` allows multiple copies of an existing
|
||||
scratch space to be made for each thread (rather than forcing the caller to pass
|
||||
all the compiled databases through :c:func:`hs_alloc_scratch` multiple times).
|
||||
|
||||
For example:
|
||||
|
||||
@ -163,14 +169,6 @@ For example:
|
||||
/* Now two threads can both scan against database db,
|
||||
each with its own scratch space. */
|
||||
|
||||
While the Hyperscan library is re-entrant, the use of scratch spaces is not.
|
||||
For example, if by design it is deemed necessary to run recursive or nested
|
||||
scanning (say, from the match callback function), then an additional scratch
|
||||
space is required for that context.
|
||||
|
||||
The easiest way to achieve this is to build up a single scratch space as a
|
||||
prototype, then clone it for each context:
|
||||
|
||||
*****************
|
||||
Custom Allocators
|
||||
*****************
|
||||
|
67
doc/dev-reference/serialization.rst
Normal file
67
doc/dev-reference/serialization.rst
Normal file
@ -0,0 +1,67 @@
|
||||
.. _serialization:
|
||||
|
||||
#############
|
||||
Serialization
|
||||
#############
|
||||
|
||||
For some applications, compiling Hyperscan pattern databases immediately prior
|
||||
to use is not an appropriate design. Some users may wish to:
|
||||
|
||||
* Compile pattern databases on a different host;
|
||||
|
||||
* Persist compiled databases to storage and only re-compile pattern databases
|
||||
when the patterns change;
|
||||
|
||||
* Control the region of memory in which the compiled database is located.
|
||||
|
||||
Hyperscan pattern databases are not completely flat in memory: they contain
|
||||
pointers and have specific alignment requirements. Therefore, they cannot be
|
||||
copied (or otherwise relocated) directly. To enable these use cases, Hyperscan
|
||||
provides functionality for serializing and deserializing compiled pattern
|
||||
databases.
|
||||
|
||||
The API provides the following functions:
|
||||
|
||||
#. :c:func:`hs_serialize_database`: serializes a pattern database into a
|
||||
flat relocatable buffer of bytes.
|
||||
|
||||
#. :c:func:`hs_deserialize_database`: reconstructs a newly allocated pattern
|
||||
database from the output of :c:func:`hs_serialize_database`.
|
||||
|
||||
#. :c:func:`hs_deserialize_database_at`: reconstructs a pattern
|
||||
database at a given memory location from the output of
|
||||
:c:func:`hs_serialize_database`.
|
||||
|
||||
#. :c:func:`hs_serialized_database_size`: given a serialized pattern database,
|
||||
returns the size of the memory block required by the database when
|
||||
deserialized.
|
||||
|
||||
#. :c:func:`hs_serialized_database_info`: given a serialized pattern database,
|
||||
returns a string containing information about the database. This call is
|
||||
analogous to :c:func:`hs_database_info`.
|
||||
|
||||
.. note:: Hyperscan performs both version and platform compatibility checks
|
||||
upon deserialization. The :c:func:`hs_deserialize_database` and
|
||||
:c:func:`hs_deserialize_database_at` functions will only permit the
|
||||
deserialization of databases compiled with (a) the same version of Hyperscan
|
||||
and (b) platform features supported by the current host platform. See
|
||||
:ref:`instr_specialization` for more information on platform specialization.
|
||||
|
||||
===================
|
||||
The Runtime Library
|
||||
===================
|
||||
|
||||
The main Hyperscan library (``libhs``) contains both the compiler and runtime
|
||||
portions of the library. This means that in order to support the Hyperscan
|
||||
compiler, which is written in C++, it requires C++ linkage and has a
|
||||
dependency on the C++ standard library.
|
||||
|
||||
Many embedded applications require only the scanning ("runtime") portion of the
|
||||
Hyperscan library. In these cases, pattern compilation generally takes place on
|
||||
another host, and serialized pattern databases are delivered to the application
|
||||
for use.
|
||||
|
||||
To support these applications without requiring the C++ dependency, a
|
||||
runtime-only version of the Hyperscan library, called ``libhs_runtime``, is also
|
||||
distributed. This library does not depend on the C++ standard library and
|
||||
provides all Hyperscan functions other that those used to compile databases.
|
@ -7,4 +7,5 @@ Name: libhs
|
||||
Description: Intel(R) Hyperscan Library
|
||||
Version: @HS_VERSION@
|
||||
Libs: -L${libdir} -lhs
|
||||
Libs.private: @PRIVATE_LIBS@
|
||||
Cflags: -I${includedir}/hs
|
||||
|
@ -1,39 +0,0 @@
|
||||
# The set of rules and other nastiness for generating FDR/Teddy source
|
||||
|
||||
# we need to add these as explicit dependencies
|
||||
set(AUTOGEN_PY_FILES
|
||||
arch.py
|
||||
autogen.py
|
||||
autogen_utils.py
|
||||
base_autogen.py
|
||||
fdr_autogen.py
|
||||
teddy_autogen.py
|
||||
)
|
||||
|
||||
function(fdr_autogen type out)
|
||||
add_custom_command (
|
||||
COMMENT "AUTOGEN ${out}"
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${out}"
|
||||
COMMAND ${PYTHON} "${CMAKE_CURRENT_SOURCE_DIR}/autogen.py" ${type} > "${CMAKE_CURRENT_BINARY_DIR}/${out}"
|
||||
DEPENDS ${AUTOGEN_PY_FILES}
|
||||
)
|
||||
add_custom_target(autogen_${type} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/${out}")
|
||||
endfunction(fdr_autogen)
|
||||
|
||||
#now build the functions
|
||||
fdr_autogen(runtime fdr_autogen.c)
|
||||
fdr_autogen(compiler fdr_autogen_compiler.cpp)
|
||||
fdr_autogen(teddy_runtime teddy_autogen.c)
|
||||
fdr_autogen(teddy_compiler teddy_autogen_compiler.cpp)
|
||||
|
||||
set(fdr_GENERATED_SRC
|
||||
${PROJECT_BINARY_DIR}/src/fdr/fdr_autogen.c
|
||||
${PROJECT_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp
|
||||
${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen.c
|
||||
${PROJECT_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp
|
||||
PARENT_SCOPE)
|
||||
|
||||
set_source_files_properties(${fdr_GENERATED_SRC} PROPERTIES GENERATED TRUE)
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
|
@ -1,58 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import autogen_utils
|
||||
|
||||
# wrapper for architectures
|
||||
|
||||
class Arch:
|
||||
def __init__(self, name, extensions = []):
|
||||
self.name = name
|
||||
self.extensions = extensions
|
||||
self.target = None
|
||||
|
||||
def get_guard(self):
|
||||
# these defines definitely fall into the "belt-and-suspenders"
|
||||
# category of paranoia
|
||||
if (self.guard_list == []):
|
||||
return "#if 1"
|
||||
|
||||
return "#if " + " && ".join(self.guard_list)
|
||||
|
||||
class X86Arch(Arch):
|
||||
def __init__(self, name, extensions = []):
|
||||
Arch.__init__(self, name, extensions)
|
||||
self.guard_list = [ ]
|
||||
self.target = "0"
|
||||
|
||||
if "AVX2" in extensions:
|
||||
self.target += " | HS_CPU_FEATURES_AVX2"
|
||||
self.guard_list += [ "defined(__AVX2__)" ]
|
||||
|
||||
|
||||
arch_x86_64 = X86Arch("x86_64", extensions = [ ])
|
||||
arch_x86_64_avx2 = X86Arch("x86_64_avx2", extensions = [ "AVX2" ])
|
@ -1,154 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from fdr_autogen import *
|
||||
from teddy_autogen import *
|
||||
from arch import *
|
||||
|
||||
# FDR setup
|
||||
|
||||
# these are either produced - if the guard succeeds, or #defined to zeroes.
|
||||
# either the function or the zero is fine in our array of function pointers
|
||||
|
||||
def produce_fdr_runtimes(l):
|
||||
for m in l:
|
||||
m.produce_code()
|
||||
|
||||
def produce_fdr_compiles(l):
|
||||
print "void getFdrDescriptions(vector<FDREngineDescription> *out) {"
|
||||
print " static const FDREngineDef defns[] = {"
|
||||
for m in l:
|
||||
m.produce_compile_call()
|
||||
print " };"
|
||||
print " out->clear();"
|
||||
print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {"
|
||||
print " out->push_back(FDREngineDescription(defns[i]));"
|
||||
print " }"
|
||||
print "}"
|
||||
|
||||
def build_fdr_matchers():
|
||||
all_matchers = [ ]
|
||||
strides = [ 1, 2, 4 ]
|
||||
|
||||
common = { "state_width" : 128, "num_buckets" : 8, "extract_frequency" : 8, "arch" : arch_x86_64 }
|
||||
for s in strides:
|
||||
all_matchers += [ M3(stride = s, **common) ]
|
||||
|
||||
return all_matchers
|
||||
|
||||
# teddy setup
|
||||
|
||||
def build_teddy_matchers():
|
||||
all_matchers = [ ]
|
||||
|
||||
# AVX2
|
||||
all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = False) ]
|
||||
all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = True) ]
|
||||
for n_msk in range(1, 5):
|
||||
all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = False, num_masks = n_msk, num_buckets = 16) ]
|
||||
all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = True, num_masks = n_msk, num_buckets = 16) ]
|
||||
|
||||
# SSE/SSE2/SSSE3
|
||||
for n_msk in range(1, 5):
|
||||
all_matchers += [ MT(arch = arch_x86_64, packed = False, num_masks = n_msk, num_buckets = 8) ]
|
||||
all_matchers += [ MT(arch = arch_x86_64, packed = True, num_masks = n_msk, num_buckets = 8) ]
|
||||
|
||||
return all_matchers
|
||||
|
||||
def produce_teddy_compiles(l):
|
||||
print "void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {"
|
||||
print " static const TeddyEngineDef defns[] = {"
|
||||
for m in l:
|
||||
m.produce_compile_call()
|
||||
print " };"
|
||||
print " out->clear();"
|
||||
print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {"
|
||||
print " out->push_back(TeddyEngineDescription(defns[i]));"
|
||||
print " }"
|
||||
print "}"
|
||||
|
||||
# see below - we don't produce our 'zeros' at the point of the teddy runtimes as they
|
||||
# are linked. So we either generate the function or we don't - then at the point of the
|
||||
# header in fdr_autogen.c we either generate the header or we #define the zero.
|
||||
|
||||
def produce_teddy_runtimes(l):
|
||||
# Since we're using -Wmissing-prototypes, we need headers first.
|
||||
for m in l:
|
||||
m.produce_guard()
|
||||
print m.produce_header(visible = True, header_only = True)
|
||||
m.close_guard()
|
||||
|
||||
for m in l:
|
||||
m.produce_guard()
|
||||
m.produce_code()
|
||||
m.close_guard()
|
||||
|
||||
# see produce_teddy_runtimes() comment for the rationale
|
||||
|
||||
def produce_teddy_headers(l):
|
||||
for m in l:
|
||||
m.produce_guard()
|
||||
print m.produce_header(visible = True, header_only = True)
|
||||
m.produce_zero_alternative()
|
||||
|
||||
# general utilities
|
||||
|
||||
def make_fdr_function_pointers(matcher_list):
|
||||
print """
|
||||
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a);
|
||||
static FDRFUNCTYPE funcs[] = {
|
||||
"""
|
||||
all_funcs = ",\n".join([ " %s" % m.get_name() for m in matcher_list ])
|
||||
print all_funcs
|
||||
print """
|
||||
};
|
||||
"""
|
||||
|
||||
def assign_ids(matcher_list, next_id):
|
||||
for m in matcher_list:
|
||||
m.id = next_id
|
||||
next_id += 1
|
||||
return next_id
|
||||
|
||||
# Main entry point
|
||||
|
||||
m = build_fdr_matchers()
|
||||
next_id = assign_ids(m, 0)
|
||||
tm = build_teddy_matchers()
|
||||
next_id = assign_ids(tm, next_id)
|
||||
if sys.argv[1] == "compiler":
|
||||
produce_fdr_compiles(m)
|
||||
elif sys.argv[1] == "runtime":
|
||||
produce_fdr_runtimes(m)
|
||||
produce_teddy_headers(tm)
|
||||
make_fdr_function_pointers(m+tm)
|
||||
elif sys.argv[1] == "teddy_runtime":
|
||||
produce_teddy_runtimes(tm)
|
||||
elif sys.argv[1] == "teddy_compiler":
|
||||
produce_teddy_compiles(tm)
|
@ -1,285 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
|
||||
def fail_out(msg = ""):
|
||||
print >>sys.stderr, "Internal failure in autogen.py: " + msg
|
||||
sys.exit(1)
|
||||
|
||||
class IntegerType:
|
||||
def __init__(self, size):
|
||||
self.size = size
|
||||
|
||||
def get_name(self):
|
||||
return { 256: "m256", 128 : "m128", 64 : "u64a", 32 : "u32" , 16 : "u16", 8 : "u8"}[self.size]
|
||||
|
||||
def size_in_bytes(self):
|
||||
return self.size / 8
|
||||
|
||||
def isSIMDOnIntel(self):
|
||||
return False
|
||||
|
||||
def zero_expression(self):
|
||||
return "0"
|
||||
|
||||
def constant_to_string(self, n):
|
||||
if self.size == 64:
|
||||
suffix = "ULL"
|
||||
else:
|
||||
suffix = ""
|
||||
return "0x%x%s" % (n & ((1 << self.size) - 1), suffix)
|
||||
|
||||
def lowbits(self, n):
|
||||
return (1 << n) - 1
|
||||
|
||||
def highbits(self, n):
|
||||
return ~(self.lowbits(self.size - n))
|
||||
|
||||
def lowbit_mask(self, n):
|
||||
return self.constant_to_string(self.lowbits(n))
|
||||
|
||||
def highbit_mask(self, n):
|
||||
return self.constant_to_string(self.highbits(n))
|
||||
|
||||
def lowbit_extract_expr(self, expr_string, n):
|
||||
return "(%s & %s)" % ( expr_string, self.lowbit_mask(n))
|
||||
|
||||
def highbit_extract_expr(self, expr_string, n):
|
||||
return "(%s >> %d)" % (expr_string, self.size - n)
|
||||
|
||||
def flip_lowbits_expr(self, expr_string, n):
|
||||
return "(%s ^ %s)" % ( expr_string, self.lowbit_mask(n))
|
||||
|
||||
def bit_extract_expr(self, expr_string, low, high):
|
||||
lbm = self.lowbit_mask(high - low)
|
||||
return "((%s >> %d) & %s)" % (expr_string, low, lbm)
|
||||
|
||||
# shifts are +ve if left and -ve if right
|
||||
def shift_expr(self, expr_string, n):
|
||||
if n <= -self.size or n >= self.size:
|
||||
return self.zero_expression()
|
||||
elif (n > 0):
|
||||
return "(%s << %d)" % (expr_string, n)
|
||||
elif (n < 0):
|
||||
return "(%s >> %d)" % (expr_string, -n)
|
||||
else:
|
||||
return "(%s)" % (expr_string)
|
||||
|
||||
# code is:
|
||||
# "normal" (always between buf and len) - the default
|
||||
# "aligned" (means normal + aligned to a natural boundary)
|
||||
# "cautious_forward" (means may go off the end of buf+len)
|
||||
# "cautious_backwards" (means may go off the start of buf)
|
||||
# "cautious_everywhere" (means may go off both)
|
||||
|
||||
def load_expr_data(self, offset = 0, code = "normal",
|
||||
base_string = "ptr", bounds_lo = "buf", bounds_hi = "buf + len"):
|
||||
if code is "normal":
|
||||
return "lv_%s(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "aligned":
|
||||
if self.size is 8:
|
||||
fail_out("no aligned byte loads")
|
||||
return "lv_%s_a(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "cautious_forward":
|
||||
return "lv_%s_cf(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "cautious_backward":
|
||||
return "lv_%s_cb(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "cautious_everywhere":
|
||||
return "lv_%s_ce(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
|
||||
|
||||
class SIMDIntegerType(IntegerType):
|
||||
def __init__(self, size):
|
||||
IntegerType.__init__(self, size)
|
||||
|
||||
def isSIMDOnIntel(self):
|
||||
return True
|
||||
|
||||
def zero_expression(self):
|
||||
return "zeroes128()"
|
||||
|
||||
def lowbit_extract_expr(self, expr_string, n):
|
||||
if (n <= 32):
|
||||
tmpType = IntegerType(32)
|
||||
tmpExpr = "movd(%s)" % expr_string
|
||||
elif (32 < n <= 64):
|
||||
tmpType = IntegerType(64)
|
||||
tmpExpr = "movq(%s)" % expr_string
|
||||
return tmpType.lowbit_extract_expr(tmpExpr, n)
|
||||
|
||||
def highbit_extract_expr(self, expr_string, n):
|
||||
fail_out("Unimplemented high bit extract on m128")
|
||||
|
||||
def bit_extract_expr(self, expr_string, low, high, flip):
|
||||
fail_out("Unimplemented bit extract on m128")
|
||||
|
||||
def shift_expr(self, expr_string, n):
|
||||
if n % 8 != 0:
|
||||
fail_out("Trying to shift a m128 by a bit granular value")
|
||||
|
||||
# should check that n is divisible by 8
|
||||
if n <= -self.size or n >= self.size:
|
||||
return self.zero_expression()
|
||||
elif (n > 0):
|
||||
return "_mm_slli_si128(%s, %s)" % (expr_string, n / 8)
|
||||
elif (n < 0):
|
||||
return "_mm_srli_si128(%s, %s)" % (expr_string, -n / 8)
|
||||
else:
|
||||
return "(%s)" % (expr_string)
|
||||
|
||||
def lowbit_mask(self, n):
|
||||
if n % 8 != 0:
|
||||
fail_out("Trying to make a lowbit mask in a m128 by a bit granular value")
|
||||
return self.shift_expr("ones128()", -(128 - n))
|
||||
|
||||
def getRequiredType(bits):
|
||||
if bits == 128:
|
||||
return SIMDIntegerType(bits)
|
||||
for b in [ 8, 16, 32, 64]:
|
||||
if (bits <= b):
|
||||
return IntegerType(b)
|
||||
return None
|
||||
|
||||
class IntegerVariable:
|
||||
def __init__(self, name, type):
|
||||
self.name = name
|
||||
self.type = type
|
||||
|
||||
def gen_initializer_stmt(self, initialization_string = None):
|
||||
if initialization_string:
|
||||
return "%s %s = %s;" % (self.type.get_name(), self.name, initialization_string)
|
||||
else:
|
||||
return "%s %s;" % (self.type.get_name(), self.name)
|
||||
|
||||
|
||||
class Step:
|
||||
def __init__(self, context, offset = 0):
|
||||
self.context = context
|
||||
self.matcher = context.matcher
|
||||
self.offset = offset
|
||||
self.latency = 1
|
||||
self.dependency_list = []
|
||||
self.latest = None
|
||||
self.context.add_step(self)
|
||||
|
||||
# return a string, complete with indentation
|
||||
def emit(self):
|
||||
indent = " " * (self.offset*2 + self.matcher.default_body_indent)
|
||||
s = "\n".join( [ indent + line for line in self.val.split("\n")] )
|
||||
if self.latest:
|
||||
s += " // " + str(self.debug_step) + " L" + str(self.latency) + " LTST:%d" % self.latest
|
||||
if self.dependency_list:
|
||||
s += " Derps: "
|
||||
for (d,l) in self.dependency_list:
|
||||
s += "%d/%d " % (d.debug_step,l)
|
||||
return s
|
||||
|
||||
def add_dependency(self, step, anti_dependency = False, output_dependency = False):
|
||||
if anti_dependency or output_dependency:
|
||||
self.dependency_list += [ (step, 1) ]
|
||||
else:
|
||||
self.dependency_list += [ (step, step.latency) ]
|
||||
|
||||
def nv(self, type, var_name):
|
||||
return self.context.new_var(self, type, var_name)
|
||||
|
||||
def gv(self, var_name, reader = True, writer = False):
|
||||
return self.context.get_var(self, var_name, reader = reader, writer = writer)
|
||||
|
||||
# utility steps, generic
|
||||
|
||||
class LabelStep(Step):
|
||||
def __init__(self, context, offset = 0, label_prefix = "off"):
|
||||
Step.__init__(self, context, offset)
|
||||
self.val = "%s%d: UNUSED;" % (label_prefix, offset)
|
||||
|
||||
class OpenScopeStep(Step):
|
||||
def __init__(self, context, offset = 0):
|
||||
Step.__init__(self, context, offset)
|
||||
self.val = "{"
|
||||
|
||||
class CloseScopeStep(Step):
|
||||
def __init__(self, context, offset = 0):
|
||||
Step.__init__(self, context, offset)
|
||||
self.val = "}"
|
||||
|
||||
|
||||
class CodeGenContext:
|
||||
def __init__(self, matcher):
|
||||
self.vars = {}
|
||||
self.steps = []
|
||||
self.ctr = 0
|
||||
self.matcher = matcher
|
||||
self.var_writer = {} # var to a single writer
|
||||
self.var_readers = {} # var to a list of all the readers that read the last value
|
||||
|
||||
def new_var(self, step, type, var_name):
|
||||
var = IntegerVariable(var_name, type)
|
||||
self.vars[var_name] = var
|
||||
self.var_writer[var_name] = step
|
||||
return var
|
||||
|
||||
def get_var(self, step, var_name, reader = True, writer = False):
|
||||
if reader:
|
||||
writer_step = self.var_writer[var_name]
|
||||
if writer_step:
|
||||
step.add_dependency(writer_step)
|
||||
self.var_readers.setdefault(var_name, []).append(step)
|
||||
if writer and not reader:
|
||||
if self.var_writer[var_name]:
|
||||
step.add_dependency(self.var_writer[var_name], output_dependency = True)
|
||||
if writer:
|
||||
if self.var_readers.has_key(var_name):
|
||||
for reader in [ r for r in self.var_readers[var_name] if r is not step ]:
|
||||
step.add_dependency(reader, anti_dependency = True)
|
||||
self.var_readers[var_name] = []
|
||||
self.var_writer[var_name] = step
|
||||
return self.vars[var_name]
|
||||
|
||||
def add_step(self, step):
|
||||
self.steps += [ step ]
|
||||
step.debug_step = self.ctr
|
||||
self.ctr += 1
|
||||
|
||||
def dontschedule(self, finals):
|
||||
return "\n".join( [ s.emit() for s in self.steps ] )
|
||||
|
||||
def schedule(self, finals):
|
||||
for f in finals:
|
||||
f.latest = f.latency
|
||||
worklist = finals
|
||||
while worklist:
|
||||
current = worklist[0]
|
||||
worklist = worklist[1:]
|
||||
for (dep, lat) in current.dependency_list:
|
||||
if dep.latest is None or dep.latest < (current.latest + dep.latency):
|
||||
dep.latest = current.latest + lat
|
||||
if dep not in worklist:
|
||||
worklist += [ dep ]
|
||||
self.steps.sort(reverse = True, key = lambda s : s.latest)
|
||||
return "\n".join( [ s.emit() for s in self.steps ] )
|
@ -1,167 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from base_autogen import *
|
||||
from string import Template
|
||||
|
||||
class MatcherBase:
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def get_name(self):
|
||||
return "fdr_exec_%03d" % self.id
|
||||
|
||||
def produce_header(self, visible, header_only = False):
|
||||
s = ""
|
||||
if not visible:
|
||||
s += "static never_inline"
|
||||
s += """
|
||||
hwlm_error_t %s(UNUSED const struct FDR *fdr,
|
||||
UNUSED const struct FDR_Runtime_Args * a)""" % self.get_name()
|
||||
if header_only:
|
||||
s += ";"
|
||||
else:
|
||||
s += "{"
|
||||
s += "\n"
|
||||
return s
|
||||
|
||||
def produce_guard(self):
|
||||
print self.arch.get_guard()
|
||||
|
||||
def produce_zero_alternative(self):
|
||||
print """
|
||||
#else
|
||||
#define %s 0
|
||||
#endif
|
||||
""" % self.get_name()
|
||||
|
||||
# trivial function for documentation/modularity
|
||||
def close_guard(self):
|
||||
print "#endif"
|
||||
|
||||
def produce_common_declarations(self):
|
||||
return """
|
||||
const u8 * buf = a->buf;
|
||||
const size_t len = a->len;
|
||||
const u8 * ptr = buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t * control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 * tryFloodDetect = a->firstFloodDetect;
|
||||
UNUSED u32 bit, bitRem, confSplit, idx;
|
||||
u32 byte, cf;
|
||||
const struct FDRConfirm *fdrc;
|
||||
u32 last_match = (u32)-1;
|
||||
"""
|
||||
|
||||
def produce_continue_check(self):
|
||||
return """if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
|
||||
*a->groups = controlVal;
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
"""
|
||||
def produce_flood_check(self):
|
||||
return """
|
||||
if (P0(ptr > tryFloodDetect)) {
|
||||
tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, &floodBackoff, &controlVal, iterBytes);
|
||||
if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
|
||||
*a->groups = controlVal;
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def produce_footer(self):
|
||||
return """
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
"""
|
||||
|
||||
def produce_confirm_base(self, conf_var_name, conf_var_size, offset, cautious, enable_confirmless, do_bailout = False):
|
||||
if cautious:
|
||||
caution_string = "VECTORING"
|
||||
else:
|
||||
caution_string = "NOT_CAUTIOUS"
|
||||
conf_split_mask = IntegerType(32).constant_to_string(
|
||||
self.conf_top_level_split - 1)
|
||||
if enable_confirmless:
|
||||
quick_check_string = """
|
||||
if (!fdrc->mult) {
|
||||
u32 id = fdrc->nBitsOrSoleID;
|
||||
if ((last_match == id) && (fdrc->flags & NoRepeat))
|
||||
continue;
|
||||
last_match = id;
|
||||
controlVal = a->cb(ptr+byte-buf, ptr+byte-buf, id, a->ctxt);
|
||||
continue;
|
||||
} """
|
||||
else:
|
||||
quick_check_string = ""
|
||||
if do_bailout:
|
||||
bailout_string = """
|
||||
if ((ptr + byte < buf + a->start_offset) || (ptr + byte >= buf + len)) continue;"""
|
||||
else:
|
||||
bailout_string = ""
|
||||
|
||||
return Template("""
|
||||
if (P0(!!$CONFVAR)) {
|
||||
do {
|
||||
bit = findAndClearLSB_$CONFVAR_SIZE(&$CONFVAR);
|
||||
byte = bit / $NUM_BUCKETS + $OFFSET;
|
||||
bitRem = bit % $NUM_BUCKETS;
|
||||
$BAILOUT_STRING
|
||||
confSplit = *(ptr+byte) & $SPLIT_MASK;
|
||||
idx = confSplit * $NUM_BUCKETS + bitRem;
|
||||
cf = confBase[idx];
|
||||
if (!cf)
|
||||
continue;
|
||||
fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);
|
||||
if (!(fdrc->groups & *control))
|
||||
continue;
|
||||
$QUICK_CHECK_STRING
|
||||
confWithBit(fdrc, a, ptr - buf + byte, $CAUTION_STRING, $CONF_PULL_BACK, control, &last_match);
|
||||
} while(P0(!!$CONFVAR));
|
||||
if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
|
||||
*a->groups = controlVal;
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
}""").substitute(CONFVAR = conf_var_name,
|
||||
CONFVAR_SIZE = conf_var_size,
|
||||
NUM_BUCKETS = self.num_buckets,
|
||||
OFFSET = offset,
|
||||
SPLIT_MASK = conf_split_mask,
|
||||
QUICK_CHECK_STRING = quick_check_string,
|
||||
BAILOUT_STRING = bailout_string,
|
||||
CAUTION_STRING = caution_string,
|
||||
CONF_PULL_BACK = self.conf_pull_back)
|
||||
|
||||
|
||||
def indent(block, depth):
|
||||
return "\n".join([ (" " * (4*depth)) + line for line in block.splitlines() ] )
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -55,6 +55,7 @@ public:
|
||||
u32 getNumBuckets() const { return numBuckets; }
|
||||
u32 getConfirmPullBackDistance() const { return confirmPullBackDistance; }
|
||||
u32 getConfirmTopLevelSplit() const { return confirmTopLevelSplit; }
|
||||
void setConfirmTopLevelSplit(u32 split) { confirmTopLevelSplit = split; }
|
||||
|
||||
bool isValidOnTarget(const target_t &target_in) const;
|
||||
virtual u32 getDefaultFloodSuffixLength() const = 0;
|
||||
|
794
src/fdr/fdr.c
794
src/fdr/fdr.c
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -26,34 +26,790 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#define P0(cnd) unlikely(cnd)
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "teddy_internal.h"
|
||||
|
||||
#include "flood_runtime.h"
|
||||
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_confirm_runtime.h"
|
||||
#include "fdr_streaming_runtime.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_loadval.h"
|
||||
#include "fdr_autogen.c"
|
||||
#include "fdr_streaming_runtime.h"
|
||||
#include "flood_runtime.h"
|
||||
#include "teddy.h"
|
||||
#include "teddy_internal.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
/** \brief number of bytes processed in each iteration */
|
||||
#define ITER_BYTES 16
|
||||
|
||||
/** \brief total zone buffer size */
|
||||
#define ZONE_TOTAL_SIZE 64
|
||||
|
||||
/** \brief maximum number of allowed zones */
|
||||
#define ZONE_MAX 3
|
||||
|
||||
/** \brief zone information.
|
||||
*
|
||||
* Zone represents a region of data to scan in FDR.
|
||||
*
|
||||
* The incoming buffer is to split in multiple zones to ensure two properties:
|
||||
* 1: that we can read 8? bytes behind to generate a hash safely
|
||||
* 2: that we can read the byte after the current byte (domain > 8)
|
||||
*/
|
||||
struct zone {
|
||||
/** \brief copied buffer, used only when it is a boundary zone. */
|
||||
u8 ALIGN_CL_DIRECTIVE buf[ZONE_TOTAL_SIZE];
|
||||
|
||||
/** \brief shift amount for fdr state to avoid unwanted match. */
|
||||
u8 shift;
|
||||
|
||||
/** \brief if boundary zone, start points into the zone buffer after the
|
||||
* pre-padding. Otherwise, points to the main buffer, appropriately. */
|
||||
const u8 *start;
|
||||
|
||||
/** \brief if boundary zone, end points to the end of zone. Otherwise,
|
||||
* pointer to the main buffer, appropriately. */
|
||||
const u8 *end;
|
||||
|
||||
/** \brief the amount to adjust to go from a pointer in the zones region
|
||||
* (between start and end) to a pointer in the original data buffer. */
|
||||
ptrdiff_t zone_pointer_adjust;
|
||||
|
||||
/** \brief firstFloodDetect from FDR_Runtime_Args for non-boundary zones,
|
||||
* otherwise end of the zone buf. floodPtr always points inside the same
|
||||
* buffer as the start pointe. */
|
||||
const u8 *floodPtr;
|
||||
};
|
||||
|
||||
static
|
||||
const ALIGN_CL_DIRECTIVE u8 zone_or_mask[ITER_BYTES+1][ITER_BYTES] = {
|
||||
{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00 },
|
||||
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 },
|
||||
{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }
|
||||
};
|
||||
|
||||
/* generates an initial state mask based on the last byte-ish of history rather
|
||||
* than being all accepting. If there is no history to consider, the state is
|
||||
* generated based on the minimum length of each bucket in order to prevent
|
||||
* confirms.
|
||||
*/
|
||||
static really_inline
|
||||
m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft,
|
||||
const struct zone *z) {
|
||||
m128 s;
|
||||
if (len_history) {
|
||||
/* +1: the zones ensure that we can read the byte at z->end */
|
||||
u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1);
|
||||
tmp &= fdr->domainMask;
|
||||
s = *((const m128 *)ft + tmp);
|
||||
s = shiftRight8Bits(s);
|
||||
} else {
|
||||
s = fdr->start;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
||||
u64a domain_mask_adjusted, const u8 *ft, u64a *conf0,
|
||||
u64a *conf8, m128 *s) {
|
||||
/* +1: the zones ensure that we can read the byte at z->end */
|
||||
|
||||
u64a current_data_0;
|
||||
u64a current_data_8;
|
||||
|
||||
current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr);
|
||||
u64a v7 = (lv_u16(itPtr + 7, start_ptr, end_ptr + 1) << 1) &
|
||||
domain_mask_adjusted;
|
||||
u64a v0 = (current_data_0 << 1) & domain_mask_adjusted;
|
||||
u64a v1 = (current_data_0 >> 7) & domain_mask_adjusted;
|
||||
u64a v2 = (current_data_0 >> 15) & domain_mask_adjusted;
|
||||
u64a v3 = (current_data_0 >> 23) & domain_mask_adjusted;
|
||||
u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted;
|
||||
u64a v5 = (current_data_0 >> 39) & domain_mask_adjusted;
|
||||
u64a v6 = (current_data_0 >> 47) & domain_mask_adjusted;
|
||||
current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr);
|
||||
u64a v15 = (lv_u16(itPtr + 15, start_ptr, end_ptr + 1) << 1) &
|
||||
domain_mask_adjusted;
|
||||
u64a v8 = (current_data_8 << 1) & domain_mask_adjusted;
|
||||
u64a v9 = (current_data_8 >> 7) & domain_mask_adjusted;
|
||||
u64a v10 = (current_data_8 >> 15) & domain_mask_adjusted;
|
||||
u64a v11 = (current_data_8 >> 23) & domain_mask_adjusted;
|
||||
u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted;
|
||||
u64a v13 = (current_data_8 >> 39) & domain_mask_adjusted;
|
||||
u64a v14 = (current_data_8 >> 47) & domain_mask_adjusted;
|
||||
|
||||
m128 st0 = *(const m128 *)(ft + v0*8);
|
||||
m128 st1 = *(const m128 *)(ft + v1*8);
|
||||
m128 st2 = *(const m128 *)(ft + v2*8);
|
||||
m128 st3 = *(const m128 *)(ft + v3*8);
|
||||
m128 st4 = *(const m128 *)(ft + v4*8);
|
||||
m128 st5 = *(const m128 *)(ft + v5*8);
|
||||
m128 st6 = *(const m128 *)(ft + v6*8);
|
||||
m128 st7 = *(const m128 *)(ft + v7*8);
|
||||
m128 st8 = *(const m128 *)(ft + v8*8);
|
||||
m128 st9 = *(const m128 *)(ft + v9*8);
|
||||
m128 st10 = *(const m128 *)(ft + v10*8);
|
||||
m128 st11 = *(const m128 *)(ft + v11*8);
|
||||
m128 st12 = *(const m128 *)(ft + v12*8);
|
||||
m128 st13 = *(const m128 *)(ft + v13*8);
|
||||
m128 st14 = *(const m128 *)(ft + v14*8);
|
||||
m128 st15 = *(const m128 *)(ft + v15*8);
|
||||
|
||||
st1 = byteShiftLeft128(st1, 1);
|
||||
st2 = byteShiftLeft128(st2, 2);
|
||||
st3 = byteShiftLeft128(st3, 3);
|
||||
st4 = byteShiftLeft128(st4, 4);
|
||||
st5 = byteShiftLeft128(st5, 5);
|
||||
st6 = byteShiftLeft128(st6, 6);
|
||||
st7 = byteShiftLeft128(st7, 7);
|
||||
st9 = byteShiftLeft128(st9, 1);
|
||||
st10 = byteShiftLeft128(st10, 2);
|
||||
st11 = byteShiftLeft128(st11, 3);
|
||||
st12 = byteShiftLeft128(st12, 4);
|
||||
st13 = byteShiftLeft128(st13, 5);
|
||||
st14 = byteShiftLeft128(st14, 6);
|
||||
st15 = byteShiftLeft128(st15, 7);
|
||||
|
||||
*s = or128(*s, st0);
|
||||
*s = or128(*s, st1);
|
||||
*s = or128(*s, st2);
|
||||
*s = or128(*s, st3);
|
||||
*s = or128(*s, st4);
|
||||
*s = or128(*s, st5);
|
||||
*s = or128(*s, st6);
|
||||
*s = or128(*s, st7);
|
||||
*conf0 = movq(*s);
|
||||
*s = byteShiftRight128(*s, 8);
|
||||
*conf0 ^= ~0ULL;
|
||||
|
||||
*s = or128(*s, st8);
|
||||
*s = or128(*s, st9);
|
||||
*s = or128(*s, st10);
|
||||
*s = or128(*s, st11);
|
||||
*s = or128(*s, st12);
|
||||
*s = or128(*s, st13);
|
||||
*s = or128(*s, st14);
|
||||
*s = or128(*s, st15);
|
||||
*conf8 = movq(*s);
|
||||
*s = byteShiftRight128(*s, 8);
|
||||
*conf8 ^= ~0ULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
||||
u64a domain_mask_adjusted, const u8 *ft, u64a *conf0,
|
||||
u64a *conf8, m128 *s) {
|
||||
u64a current_data_0;
|
||||
u64a current_data_8;
|
||||
|
||||
current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr);
|
||||
u64a v0 = (current_data_0 << 1) & domain_mask_adjusted;
|
||||
u64a v2 = (current_data_0 >> 15) & domain_mask_adjusted;
|
||||
u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted;
|
||||
u64a v6 = (current_data_0 >> 47) & domain_mask_adjusted;
|
||||
current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr);
|
||||
u64a v8 = (current_data_8 << 1) & domain_mask_adjusted;
|
||||
u64a v10 = (current_data_8 >> 15) & domain_mask_adjusted;
|
||||
u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted;
|
||||
u64a v14 = (current_data_8 >> 47) & domain_mask_adjusted;
|
||||
|
||||
m128 st0 = *(const m128 *)(ft + v0*8);
|
||||
m128 st2 = *(const m128 *)(ft + v2*8);
|
||||
m128 st4 = *(const m128 *)(ft + v4*8);
|
||||
m128 st6 = *(const m128 *)(ft + v6*8);
|
||||
m128 st8 = *(const m128 *)(ft + v8*8);
|
||||
m128 st10 = *(const m128 *)(ft + v10*8);
|
||||
m128 st12 = *(const m128 *)(ft + v12*8);
|
||||
m128 st14 = *(const m128 *)(ft + v14*8);
|
||||
|
||||
st2 = byteShiftLeft128(st2, 2);
|
||||
st4 = byteShiftLeft128(st4, 4);
|
||||
st6 = byteShiftLeft128(st6, 6);
|
||||
st10 = byteShiftLeft128(st10, 2);
|
||||
st12 = byteShiftLeft128(st12, 4);
|
||||
st14 = byteShiftLeft128(st14, 6);
|
||||
|
||||
*s = or128(*s, st0);
|
||||
*s = or128(*s, st2);
|
||||
*s = or128(*s, st4);
|
||||
*s = or128(*s, st6);
|
||||
*conf0 = movq(*s);
|
||||
*s = byteShiftRight128(*s, 8);
|
||||
*conf0 ^= ~0ULL;
|
||||
|
||||
*s = or128(*s, st8);
|
||||
*s = or128(*s, st10);
|
||||
*s = or128(*s, st12);
|
||||
*s = or128(*s, st14);
|
||||
*conf8 = movq(*s);
|
||||
*s = byteShiftRight128(*s, 8);
|
||||
*conf8 ^= ~0ULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void get_conf_stride_4(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
||||
u64a domain_mask_adjusted, const u8 *ft, u64a *conf0,
|
||||
u64a *conf8, m128 *s) {
|
||||
u64a current_data_0;
|
||||
u64a current_data_8;
|
||||
|
||||
current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr);
|
||||
u64a v0 = (current_data_0 << 1) & domain_mask_adjusted;
|
||||
u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted;
|
||||
current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr);
|
||||
u64a v8 = (current_data_8 << 1) & domain_mask_adjusted;
|
||||
u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted;
|
||||
|
||||
m128 st0 = *(const m128 *)(ft + v0*8);
|
||||
m128 st4 = *(const m128 *)(ft + v4*8);
|
||||
m128 st8 = *(const m128 *)(ft + v8*8);
|
||||
m128 st12 = *(const m128 *)(ft + v12*8);
|
||||
|
||||
st4 = byteShiftLeft128(st4, 4);
|
||||
st12 = byteShiftLeft128(st12, 4);
|
||||
|
||||
*s = or128(*s, st0);
|
||||
*s = or128(*s, st4);
|
||||
*conf0 = movq(*s);
|
||||
*s = byteShiftRight128(*s, 8);
|
||||
*conf0 ^= ~0ULL;
|
||||
|
||||
*s = or128(*s, st8);
|
||||
*s = or128(*s, st12);
|
||||
*conf8 = movq(*s);
|
||||
*s = byteShiftRight128(*s, 8);
|
||||
*conf8 ^= ~0ULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *controlVal,
|
||||
const u32 *confBase, const struct FDR_Runtime_Args *a,
|
||||
const u8 *ptr, hwlmcb_rv_t *control, u32 *last_match_id,
|
||||
struct zone *z) {
|
||||
const u8 bucket = 8;
|
||||
const u8 pullback = 1;
|
||||
|
||||
if (likely(!*conf)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* ptr is currently referring to a location in the zone's buffer, we also
|
||||
* need a pointer in the original, main buffer for the final string compare.
|
||||
*/
|
||||
const u8 *ptr_main = (const u8 *)((uintptr_t)ptr + z->zone_pointer_adjust);
|
||||
|
||||
const u8 *confLoc = ptr;
|
||||
|
||||
do {
|
||||
u32 bit = findAndClearLSB_64(conf);
|
||||
u32 byte = bit / bucket + offset;
|
||||
u32 bitRem = bit % bucket;
|
||||
u32 confSplit = *(ptr + byte);
|
||||
u32 idx = confSplit * bucket + bitRem;
|
||||
u32 cf = confBase[idx];
|
||||
if (!cf) {
|
||||
continue;
|
||||
}
|
||||
const struct FDRConfirm *fdrc = (const struct FDRConfirm *)
|
||||
((const u8 *)confBase + cf);
|
||||
if (!(fdrc->groups & *control)) {
|
||||
continue;
|
||||
}
|
||||
if (!fdrc->mult) {
|
||||
u32 id = fdrc->nBitsOrSoleID;
|
||||
if ((*last_match_id == id) && (fdrc->flags & NoRepeat)) {
|
||||
continue;
|
||||
}
|
||||
*last_match_id = id;
|
||||
*controlVal = a->cb(ptr_main + byte - a->buf,
|
||||
ptr_main + byte - a->buf, id, a->ctxt);
|
||||
continue;
|
||||
}
|
||||
u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a));
|
||||
confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback,
|
||||
control, last_match_id, confVal);
|
||||
} while (unlikely(!!*conf));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void dumpZoneInfo(UNUSED struct zone *z, UNUSED size_t zone_id) {
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF("zone: zone=%zu, bufPtr=%p\n", zone_id, z->buf);
|
||||
DEBUG_PRINTF("zone: startPtr=%p, endPtr=%p, shift=%u\n",
|
||||
z->start, z->end, z->shift);
|
||||
DEBUG_PRINTF("zone: zone_pointer_adjust=%zd, floodPtr=%p\n",
|
||||
z->zone_pointer_adjust, z->floodPtr);
|
||||
DEBUG_PRINTF("zone buf:");
|
||||
for (size_t i = 0; i < ZONE_TOTAL_SIZE; i++) {
|
||||
if (i % 8 == 0) {
|
||||
printf("_");
|
||||
}
|
||||
if (z->buf[i]) {
|
||||
printf("%02x", z->buf[i]);
|
||||
} else {
|
||||
printf("..");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Updates attributes for non-boundary region zone.
|
||||
*/
|
||||
static really_inline
|
||||
void createMainZone(const u8 *flood, const u8 *begin, const u8 *end,
|
||||
struct zone *z) {
|
||||
z->zone_pointer_adjust = 0; /* zone buffer is the main buffer */
|
||||
z->start = begin;
|
||||
z->end = end;
|
||||
z->floodPtr = flood;
|
||||
z->shift = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Create zone for short cases (<= ITER_BYTES).
|
||||
*
|
||||
* For this case we need to copy everything into the zone's internal buffer.
|
||||
*
|
||||
* We need to ensure that we run over real data if it exists (in history or
|
||||
* before zone begin). We also need to ensure 8 bytes before any data being
|
||||
* matched can be read (to perform a conf hash).
|
||||
*
|
||||
* We also need to ensure that the data at z->end can be read.
|
||||
*
|
||||
* Hence, the zone consists of:
|
||||
* 16 bytes of history,
|
||||
* 1 - 24 bytes of data form the buffer (ending at end),
|
||||
* 1 byte of final padding
|
||||
*/
|
||||
static really_inline
|
||||
void createShortZone(const u8 *buf, const u8 *hend, const u8 *begin,
|
||||
const u8 *end, struct zone *z) {
|
||||
/* the floodPtr for BOUNDARY zones are maximum of end of zone buf to avoid
|
||||
* the checks in boundary zone. */
|
||||
z->floodPtr = z->buf + ZONE_TOTAL_SIZE;
|
||||
|
||||
ptrdiff_t z_len = end - begin;
|
||||
assert(z_len > 0);
|
||||
assert(z_len <= ITER_BYTES);
|
||||
|
||||
z->shift = ITER_BYTES - z_len; /* ignore bytes outside region specified */
|
||||
|
||||
static const size_t ZONE_SHORT_DATA_OFFSET = 16; /* after history */
|
||||
|
||||
/* we are guaranteed to always have 16 initialised bytes at the end of
|
||||
* the history buffer (they may be garbage coming from the stream state
|
||||
* preceding hbuf, but bytes that don't correspond to actual history
|
||||
* shouldn't affect computations). */
|
||||
*(m128 *)z->buf = loadu128(hend - sizeof(m128));
|
||||
|
||||
/* The amount of data we have to copy from main buffer. */
|
||||
size_t copy_len = MIN((size_t)(end - buf),
|
||||
ITER_BYTES + sizeof(CONF_TYPE));
|
||||
|
||||
u8 *zone_data = z->buf + ZONE_SHORT_DATA_OFFSET;
|
||||
switch (copy_len) {
|
||||
case 1:
|
||||
*zone_data = *(end - 1);
|
||||
break;
|
||||
case 2:
|
||||
*(u16 *)zone_data = unaligned_load_u16(end - 2);
|
||||
break;
|
||||
case 3:
|
||||
*(u16 *)zone_data = unaligned_load_u16(end - 3);
|
||||
*(zone_data + 2) = *(end - 1);
|
||||
break;
|
||||
case 4:
|
||||
*(u32 *)zone_data = unaligned_load_u32(end - 4);
|
||||
break;
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
/* perform copy with 2 overlapping 4-byte chunks from buf. */
|
||||
*(u32 *)zone_data = unaligned_load_u32(end - copy_len);
|
||||
unaligned_store_u32(zone_data + copy_len - sizeof(u32),
|
||||
unaligned_load_u32(end - sizeof(u32)));
|
||||
break;
|
||||
case 8:
|
||||
*(u64a *)zone_data = unaligned_load_u64a(end - 8);
|
||||
break;
|
||||
case 9:
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
/* perform copy with 2 overlapping 8-byte chunks from buf. */
|
||||
*(u64a *)zone_data = unaligned_load_u64a(end - copy_len);
|
||||
unaligned_store_u64a(zone_data + copy_len - sizeof(u64a),
|
||||
unaligned_load_u64a(end - sizeof(u64a)));
|
||||
break;
|
||||
case 16:
|
||||
/* copy 16-bytes from buf. */
|
||||
*(m128 *)zone_data = loadu128(end - 16);
|
||||
break;
|
||||
default:
|
||||
assert(copy_len <= sizeof(m128) + sizeof(u64a));
|
||||
|
||||
/* perform copy with (potentially overlapping) 8-byte and 16-byte chunks.
|
||||
*/
|
||||
*(u64a *)zone_data = unaligned_load_u64a(end - copy_len);
|
||||
storeu128(zone_data + copy_len - sizeof(m128),
|
||||
loadu128(end - sizeof(m128)));
|
||||
break;
|
||||
}
|
||||
|
||||
/* set the start and end location of the zone buf
|
||||
* to be scanned */
|
||||
u8 *z_end = z->buf + ZONE_SHORT_DATA_OFFSET + copy_len;
|
||||
assert(ZONE_SHORT_DATA_OFFSET + copy_len >= ITER_BYTES);
|
||||
|
||||
/* copy the post-padding byte; this is required for domain > 8 due to
|
||||
* overhang */
|
||||
*z_end = 0;
|
||||
|
||||
z->end = z_end;
|
||||
z->start = z_end - ITER_BYTES;
|
||||
z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end);
|
||||
assert(z->start + z->shift == z_end - z_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Create a zone for the start region.
|
||||
*
|
||||
* This function requires that there is > ITER_BYTES of data in the buffer to
|
||||
* scan. The start zone itself is always responsible for scanning exactly
|
||||
* ITER_BYTES of data - there are no warmup/junk bytes scanned.
|
||||
*
|
||||
* This zone ensures that the byte at z->end can be read and corresponds to
|
||||
* the next byte of data.
|
||||
*
|
||||
* 8 bytes of history data are provided before z->start to allow proper hash
|
||||
* generation in streaming mode. If buf != begin, upto 8 bytes of data
|
||||
* prior to begin is also provided.
|
||||
*
|
||||
* Although we are not interested in bare literals which start before begin
|
||||
* if buf != begin, lookarounds associated with the literal may require
|
||||
* the data prior to begin for hash purposes.
|
||||
*/
|
||||
static really_inline
|
||||
void createStartZone(const u8 *buf, const u8 *hend, const u8 *begin,
|
||||
struct zone *z) {
|
||||
assert(ITER_BYTES == sizeof(m128));
|
||||
assert(sizeof(CONF_TYPE) == 8);
|
||||
static const size_t ZONE_START_BEGIN = sizeof(CONF_TYPE);
|
||||
|
||||
const u8 *end = begin + ITER_BYTES;
|
||||
|
||||
/* set floodPtr to the end of zone buf to avoid checks in start zone */
|
||||
z->floodPtr = z->buf + ZONE_TOTAL_SIZE;
|
||||
|
||||
z->shift = 0; /* we are processing ITER_BYTES of real data */
|
||||
|
||||
/* we are guaranteed to always have 16 initialised bytes at the end of the
|
||||
* history buffer (they may be garbage coming from the stream state
|
||||
* preceding hbuf, but bytes that don't correspond to actual history
|
||||
* shouldn't affect computations). However, for start zones, history is only
|
||||
* required for conf hash purposes so we only need 8 bytes */
|
||||
unaligned_store_u64a(z->buf, unaligned_load_u64a(hend - sizeof(u64a)));
|
||||
|
||||
/* The amount of data we have to copy from main buffer. */
|
||||
size_t copy_len = MIN((size_t)(end - buf),
|
||||
ITER_BYTES + sizeof(CONF_TYPE));
|
||||
assert(copy_len >= 16);
|
||||
|
||||
/* copy the post-padding byte; this is required for domain > 8 due to
|
||||
* overhang. The start requires that there is data after the zone so it
|
||||
* it safe to dereference end */
|
||||
z->buf[ZONE_START_BEGIN + copy_len] = *end;
|
||||
|
||||
/* set the start and end location of the zone buf to be scanned */
|
||||
u8 *z_end = z->buf + ZONE_START_BEGIN + copy_len;
|
||||
z->end = z_end;
|
||||
z->start = z_end - ITER_BYTES;
|
||||
|
||||
/* copy the first 8 bytes of the valid region */
|
||||
unaligned_store_u64a(z->buf + ZONE_START_BEGIN,
|
||||
unaligned_load_u64a(end - copy_len));
|
||||
|
||||
/* copy the last 16 bytes, may overlap with the previous 8 byte write */
|
||||
storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128)));
|
||||
|
||||
z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Create a zone for the end region.
|
||||
*
|
||||
* This function requires that there is > ITER_BYTES of data in the buffer to
|
||||
* scan. The end zone, however, is only responsible for a scanning the <=
|
||||
* ITER_BYTES rump of data. The end zone is required to handle a full ITER_BYTES
|
||||
* iteration as the main loop cannot handle the last byte of the buffer.
|
||||
*
|
||||
* This zone ensures that the byte at z->end can be read by filling it with a
|
||||
* padding character.
|
||||
*
|
||||
* Upto 8 bytes of data prior to begin is also provided for the purposes of
|
||||
* generating hashes. History is not copied, as all locations which require
|
||||
* history for generating a hash are the responsiblity of the start zone.
|
||||
*/
|
||||
static really_inline
|
||||
void createEndZone(const u8 *buf, const u8 *begin, const u8 *end,
|
||||
struct zone *z) {
|
||||
/* the floodPtr for BOUNDARY zones are maximum of end of zone buf to avoid
|
||||
* the checks in boundary zone. */
|
||||
z->floodPtr = z->buf + ZONE_TOTAL_SIZE;
|
||||
|
||||
ptrdiff_t z_len = end - begin;
|
||||
assert(z_len > 0);
|
||||
assert(z_len <= ITER_BYTES);
|
||||
|
||||
z->shift = ITER_BYTES - z_len;
|
||||
|
||||
/* The amount of data we have to copy from main buffer. */
|
||||
size_t copy_len = MIN((size_t)(end - buf),
|
||||
ITER_BYTES + sizeof(CONF_TYPE));
|
||||
assert(copy_len >= 16);
|
||||
|
||||
/* copy the post-padding byte; this is required for domain > 8 due to
|
||||
* overhang */
|
||||
z->buf[copy_len] = 0;
|
||||
|
||||
/* set the start and end location of the zone buf
|
||||
* to be scanned */
|
||||
u8 *z_end = z->buf + copy_len;
|
||||
z->end = z_end;
|
||||
z->start = z_end - ITER_BYTES;
|
||||
assert(z->start + z->shift == z_end - z_len);
|
||||
|
||||
/* copy the first 8 bytes of the valid region */
|
||||
unaligned_store_u64a(z->buf, unaligned_load_u64a(end - copy_len));
|
||||
|
||||
/* copy the last 16 bytes, may overlap with the previous 8 byte write */
|
||||
storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128)));
|
||||
|
||||
z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Prepare zones.
|
||||
*
|
||||
* This function prepares zones with actual buffer and some padded bytes.
|
||||
* The actual ITER_BYTES bytes in zone is preceded by main buf and/or
|
||||
* history buf and succeeded by padded bytes possibly from main buf,
|
||||
* if available.
|
||||
*/
|
||||
static really_inline
|
||||
size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
|
||||
size_t start, const u8 *flood, struct zone *zoneArr) {
|
||||
const u8 *ptr = buf + start;
|
||||
size_t remaining = len - start;
|
||||
|
||||
if (remaining <= ITER_BYTES) {
|
||||
/* enough bytes to make only one zone */
|
||||
createShortZone(buf, hend, ptr, buf + len, &zoneArr[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* enough bytes to make more than one zone */
|
||||
|
||||
size_t numZone = 0;
|
||||
createStartZone(buf, hend, ptr, &zoneArr[numZone++]);
|
||||
ptr += ITER_BYTES;
|
||||
|
||||
assert(ptr < buf + len);
|
||||
|
||||
/* find maximum buffer location that the main zone can scan
|
||||
* - must be a multiple of ITER_BYTES, and
|
||||
* - cannot contain the last byte (due to overhang)
|
||||
*/
|
||||
const u8 *main_end = buf + start + ROUNDDOWN_N(len - start - 1, ITER_BYTES);
|
||||
assert(main_end >= ptr);
|
||||
|
||||
/* create a zone if multiple of ITER_BYTES are found */
|
||||
if (main_end != ptr) {
|
||||
createMainZone(flood, ptr, main_end, &zoneArr[numZone++]);
|
||||
ptr = main_end;
|
||||
}
|
||||
/* create a zone with rest of the data from the main buffer */
|
||||
createEndZone(buf, ptr, buf + len, &zoneArr[numZone++]);
|
||||
return numZone;
|
||||
}
|
||||
|
||||
#define INVALID_MATCH_ID (~0U)
|
||||
|
||||
#define FDR_MAIN_LOOP(zz, s, get_conf_fn) \
|
||||
do { \
|
||||
const u8 *tryFloodDetect = zz->floodPtr; \
|
||||
const u8 *start_ptr = zz->start; \
|
||||
const u8 *end_ptr = zz->end; \
|
||||
\
|
||||
for (const u8 *itPtr = start_ptr; itPtr + ITER_BYTES <= end_ptr; \
|
||||
itPtr += ITER_BYTES) { \
|
||||
if (unlikely(itPtr > tryFloodDetect)) { \
|
||||
tryFloodDetect = floodDetect(fdr, a, &itPtr, tryFloodDetect,\
|
||||
&floodBackoff, &controlVal, \
|
||||
ITER_BYTES); \
|
||||
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \
|
||||
return HWLM_TERMINATED; \
|
||||
} \
|
||||
} \
|
||||
__builtin_prefetch(itPtr + (ITER_BYTES*4)); \
|
||||
u64a conf0; \
|
||||
u64a conf8; \
|
||||
get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_adjusted, \
|
||||
ft, &conf0, &conf8, &s); \
|
||||
do_confirm_fdr(&conf0, 0, &controlVal, confBase, a, itPtr, \
|
||||
control, &last_match_id, zz); \
|
||||
do_confirm_fdr(&conf8, 8, &controlVal, confBase, a, itPtr, \
|
||||
control, &last_match_id, zz); \
|
||||
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \
|
||||
return HWLM_TERMINATED; \
|
||||
} \
|
||||
} /* end for loop */ \
|
||||
} while (0) \
|
||||
|
||||
static never_inline
|
||||
hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
u32 last_match_id = INVALID_MATCH_ID;
|
||||
u64a domain_mask_adjusted = fdr->domainMask << 1;
|
||||
u8 stride = fdr->stride;
|
||||
const u8 *ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));
|
||||
const u32 *confBase = (const u32 *)(ft + fdr->tabSize);
|
||||
struct zone zones[ZONE_MAX];
|
||||
assert(fdr->domain > 8 && fdr->domain < 16);
|
||||
|
||||
size_t numZone = prepareZones(a->buf, a->len,
|
||||
a->buf_history + a->len_history,
|
||||
a->start_offset, a->firstFloodDetect, zones);
|
||||
assert(numZone <= ZONE_MAX);
|
||||
m128 state = getInitState(fdr, a->len_history, ft, &zones[0]);
|
||||
|
||||
for (size_t curZone = 0; curZone < numZone; curZone++) {
|
||||
struct zone *z = &zones[curZone];
|
||||
dumpZoneInfo(z, curZone);
|
||||
|
||||
/* When a zone contains less data than is processed in an iteration
|
||||
* of FDR_MAIN_LOOP(), we need to scan over some extra data.
|
||||
*
|
||||
* We have chosen to scan this extra data at the start of the
|
||||
* iteration. The extra data is either data we have already scanned or
|
||||
* garbage (if it is earlier than offset 0),
|
||||
*
|
||||
* As a result we need to shift the incoming state back so that it will
|
||||
* properly line up with the data being scanned.
|
||||
*
|
||||
* We also need to forbid reporting any matches in the data being
|
||||
* rescanned as they have already been reported (or are over garbage but
|
||||
* later stages should also provide that safety guarantee).
|
||||
*/
|
||||
|
||||
u8 shift = z->shift;
|
||||
|
||||
state = variable_byte_shift_m128(state, shift);
|
||||
|
||||
state = or128(state, load128(zone_or_mask[shift]));
|
||||
|
||||
switch (stride) {
|
||||
case 1:
|
||||
FDR_MAIN_LOOP(z, state, get_conf_stride_1);
|
||||
break;
|
||||
case 2:
|
||||
FDR_MAIN_LOOP(z, state, get_conf_stride_2);
|
||||
break;
|
||||
case 4:
|
||||
FDR_MAIN_LOOP(z, state, get_conf_stride_4);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define ONLY_AVX2(func) func
|
||||
#else
|
||||
#define ONLY_AVX2(func) NULL
|
||||
#endif
|
||||
|
||||
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a);
|
||||
static const FDRFUNCTYPE funcs[] = {
|
||||
fdr_engine_exec,
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fast),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fast),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks2_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks2_pck_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks3_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks3_pck_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks4_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks4_pck_fat),
|
||||
fdr_exec_teddy_msks1,
|
||||
fdr_exec_teddy_msks1_pck,
|
||||
fdr_exec_teddy_msks2,
|
||||
fdr_exec_teddy_msks2_pck,
|
||||
fdr_exec_teddy_msks3,
|
||||
fdr_exec_teddy_msks3_pck,
|
||||
fdr_exec_teddy_msks4,
|
||||
fdr_exec_teddy_msks4_pck,
|
||||
};
|
||||
|
||||
#define FAKE_HISTORY_SIZE 16
|
||||
static const u8 fake_history[FAKE_HISTORY_SIZE];
|
||||
|
||||
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, size_t start,
|
||||
HWLMCallback cb, void *ctxt, hwlm_group_t groups) {
|
||||
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
hwlm_group_t groups) {
|
||||
// We guarantee (for safezone construction) that it is safe to read 16
|
||||
// bytes before the end of the history buffer.
|
||||
const u8 *hbuf = fake_history + FAKE_HISTORY_SIZE;
|
||||
|
||||
const struct FDR_Runtime_Args a = {
|
||||
buf,
|
||||
len,
|
||||
fake_history,
|
||||
hbuf,
|
||||
0,
|
||||
fake_history, // nocase
|
||||
hbuf, // nocase
|
||||
0,
|
||||
start,
|
||||
cb,
|
||||
@ -86,9 +842,9 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
||||
ctxt,
|
||||
&groups,
|
||||
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
||||
hbuf ? CONF_LOADVAL_CALL_CAUTIOUS(hbuf + hlen - 8, hbuf, hbuf + hlen)
|
||||
: (u64a)0
|
||||
|
||||
/* we are guaranteed to always have 16 initialised bytes at the end of
|
||||
* the history buffer (they may be garbage). */
|
||||
hbuf ? unaligned_load_u64a(hbuf + hlen - sizeof(u64a)) : (u64a)0
|
||||
};
|
||||
fdrUnpackState(fdr, &a, stream_state);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -43,9 +43,6 @@ extern "C" {
|
||||
|
||||
struct FDR;
|
||||
|
||||
/** \brief Returns size in bytes of the given FDR engine. */
|
||||
size_t fdrSize(const struct FDR *fdr);
|
||||
|
||||
/** \brief Returns non-zero if the contents of the stream state indicate that
|
||||
* there is active FDR history beyond the regularly used history. */
|
||||
u32 fdrStreamStateActive(const struct FDR *fdr, const u8 *stream_state);
|
||||
|
@ -1,564 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from base_autogen import *
|
||||
from string import Template
|
||||
|
||||
class OrStep(Step):
|
||||
def __init__(self, context, offset, width):
|
||||
Step.__init__(self, context, offset)
|
||||
s_var = self.gv("st%d" % offset)
|
||||
if width < 128:
|
||||
self.val = "s |= %s;" % s_var.name
|
||||
else:
|
||||
self.val = "s = or%d(s, %s);" % (width, s_var.name)
|
||||
|
||||
class ShiftStateStep(Step):
|
||||
def __init__(self, context, offset = 0, stride_used = 1):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
state = m.state_variable
|
||||
shift_distance = -1 * stride_used * m.num_buckets
|
||||
self.val = "%s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
||||
|
||||
class BulkLoadStep(Step):
|
||||
def __init__(self, context, offset, size, define_var = True, aligned = True):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
self.latency = 4
|
||||
blt = m.bulk_load_type
|
||||
if aligned:
|
||||
init_string = blt.load_expr_data(self.offset, code = "aligned")
|
||||
else:
|
||||
init_string = blt.load_expr_data(self.offset)
|
||||
|
||||
var_name = "current_data_%d" % offset
|
||||
if define_var:
|
||||
lb_var = self.nv(blt, var_name)
|
||||
self.val = lb_var.gen_initializer_stmt(init_string)
|
||||
else:
|
||||
lb_var = self.gv(var_name, reader = False, writer = True)
|
||||
self.val = "%s = %s;" % (var_name, init_string)
|
||||
|
||||
class ValueExtractStep(Step):
|
||||
def __init__(self, context, offset, sub_load_cautious = False):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
self.latency = 2
|
||||
dsb = m.datasize_bytes
|
||||
modval = offset % dsb
|
||||
|
||||
if modval == dsb - 1:
|
||||
# Case 1: reading more than one byte over the end of the bulk load
|
||||
|
||||
self.latency = 4
|
||||
if sub_load_cautious:
|
||||
code_string = "cautious_forward"
|
||||
else:
|
||||
code_string = "normal"
|
||||
load_string = m.single_load_type.load_expr_data(self.offset, code_string)
|
||||
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
|
||||
else:
|
||||
# Case 2: reading a value that can be found entirely in the current register
|
||||
if m.fdr2_force_naive_load:
|
||||
load_string = m.single_load_type.load_expr_data(self.offset, "normal")
|
||||
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
|
||||
else:
|
||||
lb_var = self.gv("current_data_%d" % (offset - modval))
|
||||
if modval == 0:
|
||||
# Case 2a: value is at LSB end of the register and must be left-
|
||||
# shifted into place if there is a "reach_shift_adjust" required
|
||||
temp_string = "(%s << %d)" % (lb_var.name, m.reach_shift_adjust)
|
||||
else:
|
||||
# Case 2b: value is in the middle of the register and will be
|
||||
# right-shifted into place (adjusted by "reach_shift_adjust")
|
||||
temp_string = "(%s >> %d)" % (lb_var.name, modval*8 - m.reach_shift_adjust)
|
||||
|
||||
|
||||
init_string = "(%s) & (domain_mask << %d)" % (temp_string, m.reach_shift_adjust)
|
||||
v_var = self.nv(m.value_extract_type, "v%d" % offset)
|
||||
self.val = v_var.gen_initializer_stmt(init_string)
|
||||
|
||||
class TableLookupStep(Step):
|
||||
def __init__(self, context, reach_multiplier, offset = 0):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
self.latency = 4
|
||||
v_var = self.gv("v%d" % offset)
|
||||
s_var = self.nv(m.state_type, "st%d" % offset)
|
||||
init_string = "*(const %s *)(ft + %s*%dU)" % ( m.state_type.get_name(),
|
||||
v_var.name, reach_multiplier)
|
||||
self.val = s_var.gen_initializer_stmt(init_string)
|
||||
|
||||
class ShiftReachMaskStep(Step):
|
||||
def __init__(self, context, offset):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
extr = m.extract_frequency
|
||||
modval = offset % extr
|
||||
s_var = self.gv("st%d" % offset, writer = True)
|
||||
self.val = "%s = %s;" % (s_var.name, s_var.type.shift_expr(s_var.name, modval * m.num_buckets))
|
||||
|
||||
class ConfExtractStep(Step):
|
||||
def __init__(self, context, offset):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
if m.state_type.isSIMDOnIntel():
|
||||
self.latency = 2
|
||||
init_string = m.state_type.lowbit_extract_expr("s", m.extract_size)
|
||||
extr_var = self.nv(m.extr_type, "extr%d" % offset)
|
||||
self.val = extr_var.gen_initializer_stmt(init_string)
|
||||
|
||||
class ConfAccumulateStep(Step):
|
||||
def __init__(self, context, extract_offset, conf_offset, define_var = True):
|
||||
Step.__init__(self, context, extract_offset)
|
||||
m = self.matcher
|
||||
extr_var = self.gv("extr%d" % extract_offset)
|
||||
extr_var_cast = "((%s)%s)" % (m.conf_type.get_name(), extr_var.name)
|
||||
if extract_offset == conf_offset:
|
||||
# create conf_var as a straight copy of extr
|
||||
if define_var:
|
||||
conf_var = self.nv(m.conf_type, "conf%d" % conf_offset)
|
||||
self.val = conf_var.gen_initializer_stmt(extr_var_cast)
|
||||
else:
|
||||
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
|
||||
self.val = "%s = %s;" % (conf_var.name, extr_var_cast)
|
||||
else:
|
||||
# shift extr_var and insert/OR it in conf_var
|
||||
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
|
||||
shift_dist = (extract_offset - conf_offset) * m.num_buckets
|
||||
self.val = "%s |= %s;" % (conf_var.name, m.conf_type.shift_expr(extr_var_cast, shift_dist))
|
||||
self.latency = 2
|
||||
|
||||
class ConfirmFlipStep(Step):
|
||||
def __init__(self, context, offset):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
conf_var = self.gv("conf%d" % self.offset, writer = True)
|
||||
self.val = "%s = %s;" % (conf_var.name,
|
||||
conf_var.type.flip_lowbits_expr(conf_var.name, self.matcher.confirm_frequency * m.num_buckets))
|
||||
|
||||
class ConfirmStep(Step):
|
||||
def __init__(self, context, offset, cautious = False):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
conf_var = self.gv("conf%d" % offset, writer = True)
|
||||
self.val = m.produce_confirm_base(conf_var.name, conf_var.type.size, offset, cautious,
|
||||
enable_confirmless = m.stride == 1, do_bailout = False)
|
||||
|
||||
class M3(MatcherBase):
|
||||
def produce_compile_call(self):
|
||||
print " { %d, %d, %d, %d, %s, %d, %d }," % (
|
||||
self.id, self.state_width, self.num_buckets,
|
||||
self.stride,
|
||||
self.arch.target, self.conf_pull_back, self.conf_top_level_split)
|
||||
|
||||
def produce_main_loop(self, switch_variant = False):
|
||||
stride_offsets = xrange(0, self.loop_bytes, self.stride)
|
||||
stride_offsetSet = set(stride_offsets)
|
||||
so_steps_last_block = []
|
||||
sh = None
|
||||
last_confirm = None
|
||||
ctxt = CodeGenContext(self)
|
||||
|
||||
if switch_variant:
|
||||
print " ptr -= (iterBytes - dist);"
|
||||
print " { " # need an extra scope around switch variant to stop its globals escaping
|
||||
else:
|
||||
print " if (doMainLoop) {"
|
||||
print " for (; ptr + LOOP_READ_AHEAD < buf + len; ptr += iterBytes) {"
|
||||
print self.produce_flood_check()
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print " assert(((size_t)ptr % START_MOD) == 0);"
|
||||
|
||||
|
||||
# just do globally for now
|
||||
if switch_variant:
|
||||
subsidiary_load_cautious = True
|
||||
confirm_cautious = True
|
||||
else:
|
||||
subsidiary_load_cautious = False
|
||||
confirm_cautious = False
|
||||
|
||||
if not self.fdr2_force_naive_load:
|
||||
bulk_load_steps = [ off for off in range(self.loop_bytes)
|
||||
if off % self.datasize_bytes == 0 and
|
||||
(set(range(off, off + self.datasize_bytes - 1)) & stride_offsetSet)]
|
||||
else:
|
||||
bulk_load_steps = []
|
||||
|
||||
confirm_steps = [ off for off in range(self.loop_bytes) if off % self.confirm_frequency == 0 ]
|
||||
|
||||
for off in bulk_load_steps:
|
||||
lb_var = ctxt.new_var(None, self.bulk_load_type, "current_data_%d" % off)
|
||||
print " " + lb_var.gen_initializer_stmt()
|
||||
|
||||
|
||||
for off in confirm_steps:
|
||||
var_name = "conf%d" % off
|
||||
conf_def_var = ctxt.new_var(None, self.conf_type, var_name)
|
||||
if switch_variant:
|
||||
init_string = "(%s)-1" % self.conf_type.get_name()
|
||||
else:
|
||||
init_string = ""
|
||||
print " " + conf_def_var.gen_initializer_stmt(init_string)
|
||||
|
||||
if switch_variant:
|
||||
print " switch(iterBytes - dist) {"
|
||||
for i in range(0, self.loop_bytes):
|
||||
print " case %d:" % i
|
||||
|
||||
# init and poison conf; over-precise but harmless
|
||||
conf_id = (i / self.confirm_frequency) * self.confirm_frequency
|
||||
if i % self.confirm_frequency:
|
||||
conf_fixup_bits = self.conf_type.size - (self.num_buckets * (i % self.confirm_frequency))
|
||||
print " conf%d >>= %d;" % (conf_id, conf_fixup_bits)
|
||||
else:
|
||||
print " conf%d = 0;" % conf_id
|
||||
|
||||
# init state
|
||||
state_fixup = i % self.extract_frequency
|
||||
state = self.state_variable
|
||||
shift_distance = self.num_buckets * state_fixup
|
||||
if state_fixup:
|
||||
print " %s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
||||
if self.state_width < 128:
|
||||
print " %s |= %s;" % (state.name, state.type.lowbit_mask(shift_distance))
|
||||
else:
|
||||
print " %s = or%d(%s, %s);" % (state.name, self.state_width, state.name, state.type.lowbit_mask(shift_distance))
|
||||
|
||||
if not self.fdr2_force_naive_load:
|
||||
# init current_data (could poison it in some cases)
|
||||
load_mod = i % self.datasize_bytes
|
||||
load_offset = i - load_mod
|
||||
if load_mod:
|
||||
# not coming in on an even boundary means having to do a load var
|
||||
# actually, there are a bunch of things we can do on this bulk load
|
||||
# to avoid having to be 'cautious_backwards' but I'm not completely
|
||||
# sure they are good ideas
|
||||
init_string = self.bulk_load_type.load_expr_data(load_offset,
|
||||
code = "cautious_backward")
|
||||
var_name = "current_data_%d" % load_offset
|
||||
lb_var = ctxt.get_var(None, var_name, reader = False, writer = True)
|
||||
print " %s = %s;" % (lb_var.name, init_string)
|
||||
|
||||
print " goto off%d;" % i
|
||||
print " case %d: goto skipSwitch;" % self.loop_bytes
|
||||
print " }"
|
||||
print " {"
|
||||
|
||||
|
||||
for off in range(self.loop_bytes):
|
||||
# X_mod is the offset we're up to relative to the last X operation
|
||||
# X_offset is which of the last X operations matches this iteration
|
||||
|
||||
if (switch_variant):
|
||||
LabelStep(ctxt, off)
|
||||
|
||||
if off in bulk_load_steps:
|
||||
if not self.fdr2_force_naive_load:
|
||||
BulkLoadStep(ctxt, off, self.datasize, define_var = False, aligned = not switch_variant)
|
||||
|
||||
if off in stride_offsets:
|
||||
if switch_variant:
|
||||
OpenScopeStep(ctxt, off)
|
||||
ValueExtractStep(ctxt, off, sub_load_cautious = subsidiary_load_cautious)
|
||||
TableLookupStep(ctxt, self.reach_mult, off)
|
||||
if off % self.extract_frequency:
|
||||
ShiftReachMaskStep(ctxt, off)
|
||||
so = OrStep(ctxt, off, self.state_width)
|
||||
if switch_variant:
|
||||
CloseScopeStep(ctxt, off)
|
||||
if sh != None:
|
||||
so.add_dependency(sh)
|
||||
so_steps_last_block += [ so ]
|
||||
|
||||
extract_mod = off % self.extract_frequency
|
||||
extract_offset = off - extract_mod
|
||||
extract_ready = extract_mod == self.extract_frequency - 1
|
||||
if extract_ready:
|
||||
if switch_variant:
|
||||
OpenScopeStep(ctxt, off)
|
||||
ex = ConfExtractStep(ctxt, extract_offset)
|
||||
ConfAccumulateStep(ctxt, extract_offset, confirm_offset, define_var = False)
|
||||
for so_step in so_steps_last_block:
|
||||
ex.add_dependency(so_step)
|
||||
if switch_variant:
|
||||
CloseScopeStep(ctxt, off)
|
||||
so_steps_last_block = []
|
||||
sh = ShiftStateStep(ctxt, extract_offset, stride_used = self.extract_frequency)
|
||||
sh.add_dependency(ex)
|
||||
|
||||
confirm_mod = off % self.confirm_frequency
|
||||
confirm_offset = off - confirm_mod
|
||||
confirm_ready = confirm_mod == self.confirm_frequency - 1
|
||||
if confirm_ready:
|
||||
cflip = ConfirmFlipStep(ctxt, confirm_offset)
|
||||
cf = ConfirmStep(ctxt, confirm_offset, cautious = confirm_cautious )
|
||||
if last_confirm:
|
||||
cf.add_dependency(last_confirm)
|
||||
last_confirm = cf
|
||||
|
||||
|
||||
if not switch_variant:
|
||||
print ctxt.schedule([ last_confirm, sh ])
|
||||
else:
|
||||
print ctxt.dontschedule([ last_confirm, sh ])
|
||||
|
||||
if switch_variant:
|
||||
print "skipSwitch:;"
|
||||
print " ptr += iterBytes;"
|
||||
print " }" # close extra scope around switch variant
|
||||
print " }"
|
||||
|
||||
|
||||
def produce_init_state(self):
|
||||
state = self.state_variable
|
||||
s_type = self.state_type
|
||||
shift_distance = -1 * self.num_buckets
|
||||
shift_expr = "%s = %s" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
||||
|
||||
s = Template("""
|
||||
$TYPENAME s;
|
||||
if (a->len_history) {
|
||||
u32 tmp = 0;
|
||||
if (a->start_offset == 0) {
|
||||
tmp = a->buf_history[a->len_history - 1];
|
||||
tmp |= (a->buf[0] << 8);
|
||||
} else {
|
||||
tmp = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len);
|
||||
}
|
||||
tmp &= fdr->domainMask;
|
||||
s = *((const $TYPENAME *)ft + tmp);
|
||||
$SHIFT_EXPR;
|
||||
} else {
|
||||
s = *(const $TYPENAME *)&fdr->start;
|
||||
}
|
||||
""").substitute(TYPENAME = s_type.get_name(),
|
||||
ZERO_EXPR = s_type.zero_expression(),
|
||||
SHIFT_EXPR = shift_expr)
|
||||
return s
|
||||
|
||||
def produce_code(self):
|
||||
|
||||
loop_read_behind = 0
|
||||
loop_read_ahead = self.loop_bytes + 1
|
||||
|
||||
# we set up mask and shift stuff for extracting our masks from registers
|
||||
#
|
||||
# we have a choice as to whether to mask out the value early or
|
||||
# extract the value (shift first) then mask it
|
||||
#
|
||||
# Intel has a free scaling factor from 1/2/4/8 so we want to combine
|
||||
# the extra needed shift for SSE registers with the mask operation
|
||||
|
||||
ssb = self.state_type.size / 8 # state size in bytes
|
||||
|
||||
# Intel path
|
||||
if ssb == 16:
|
||||
# obscure corner - we don't have the room in the register to
|
||||
# do this for all values so we don't. domain==16 is pretty
|
||||
# bad anyhow, of course
|
||||
self.reach_mult = 8
|
||||
else:
|
||||
self.reach_mult = ssb
|
||||
|
||||
shift_amts = { 1 : 0, 2 : 1, 4 : 2, 8 : 3, 16: 4 }
|
||||
self.reach_shift_adjust = shift_amts[ ssb/self.reach_mult ]
|
||||
|
||||
print self.produce_header(visible = False)
|
||||
|
||||
print "// ",
|
||||
print " Arch: " + self.arch.name,
|
||||
print " State type: " + self.state_type.get_name(),
|
||||
print " Num buckets: %d" % self.num_buckets,
|
||||
print " Stride: %d" % self.stride
|
||||
|
||||
print self.produce_common_declarations()
|
||||
|
||||
print " assert(fdr->domain > 8 && fdr->domain < 16);"
|
||||
print
|
||||
print " u64a domain_mask = fdr->domainMask;"
|
||||
print " const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));"
|
||||
print " const u32 * confBase = (const u32 *)(ft + fdr->tabSize);"
|
||||
print self.produce_init_state()
|
||||
print " const size_t iterBytes = %d;" % self.loop_bytes
|
||||
print " const size_t START_MOD = %d;" % self.datasize_bytes
|
||||
print " const size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead
|
||||
|
||||
print """
|
||||
while (ptr < buf + len) {
|
||||
|
||||
u8 doMainLoop = 1;
|
||||
size_t remaining = len - (ptr - buf);
|
||||
size_t dist;
|
||||
if (remaining <= iterBytes) {
|
||||
dist = remaining; // once through the switch and we're done
|
||||
} else if (remaining < 2 * iterBytes) {
|
||||
// nibble some stuff off the front, skip the main loop,
|
||||
// then come back here
|
||||
dist = iterBytes; // maybe could be cleverer
|
||||
} else {
|
||||
// now, we need to see if we can make it to a main loop iteration
|
||||
// if so, we need to ensure that the main loop iteration is aligned
|
||||
// to a START_MOD boundary and i >= 8 so we can read ptr + i - 8
|
||||
|
||||
// see if we can do it - if not, just switch the main loop off,
|
||||
// eat iterBytes in cautious mode, and come back to this loop
|
||||
|
||||
const u8 * target = MAX(buf + 8, ptr);
|
||||
target = ROUNDUP_PTR(target, START_MOD);
|
||||
dist = target - ptr;
|
||||
if (dist > iterBytes) {
|
||||
doMainLoop = 0;
|
||||
dist = iterBytes;
|
||||
}
|
||||
}
|
||||
"""
|
||||
self.produce_main_loop(switch_variant = True)
|
||||
self.produce_main_loop(switch_variant = False)
|
||||
print """
|
||||
}
|
||||
"""
|
||||
print self.produce_footer()
|
||||
|
||||
def get_name(self):
|
||||
return "fdr_exec_%s_s%d_w%d" % (self.arch.name, self.stride, self.state_width)
|
||||
|
||||
def __init__(self, state_width, stride,
|
||||
arch,
|
||||
table_state_width = None,
|
||||
num_buckets = 8,
|
||||
extract_frequency = None,
|
||||
confirm_frequency = None):
|
||||
|
||||
# First - set up the values that are fundamental to how this matcher will operate
|
||||
self.arch = arch
|
||||
|
||||
# get the width of the state width on which we operate internally
|
||||
if state_width not in [ 128 ]:
|
||||
fail_out("Unknown state width: %d" % state_width)
|
||||
self.state_width = state_width
|
||||
self.state_type = getRequiredType(self.state_width)
|
||||
self.state_variable = IntegerVariable("s", self.state_type)
|
||||
|
||||
table_state_width = state_width
|
||||
self.table_state_width = state_width
|
||||
self.table_state_type = getRequiredType(self.table_state_width)
|
||||
|
||||
# this is the load type required for domain [9:15] if we want to
|
||||
# load it one at a time
|
||||
self.single_load_type = IntegerType(16)
|
||||
|
||||
# stride is the frequency with which we make data-driven
|
||||
# accesses to our reach table
|
||||
if stride not in [ 1, 2, 4, 8]:
|
||||
fail_out("Unsupported stride: %d" % stride)
|
||||
if stride * num_buckets > state_width:
|
||||
fail_out("Stride %d is too big for the number of buckets %d given state width %d\n" % (stride, num_buckets, state_width))
|
||||
self.stride = stride
|
||||
|
||||
if num_buckets != 8:
|
||||
fail_out("Unsupported number of buckets: %d" % num_buckets)
|
||||
if state_width % num_buckets and state_width == 128:
|
||||
fail_out("Bucket scheme requires bit-shifts on m128 (failing)")
|
||||
self.num_buckets = num_buckets
|
||||
|
||||
# Second - set up derived or optimization values - these can be
|
||||
# overridden by arguments that are passed in
|
||||
|
||||
self.datasize = 64
|
||||
self.bulk_load_type = IntegerType(self.datasize)
|
||||
self.datasize_bytes = self.datasize/8
|
||||
|
||||
self.value_extract_type = IntegerType(self.datasize)
|
||||
|
||||
self.fdr2_force_naive_load = False # disable everywhere for trunk
|
||||
|
||||
# extract frequency is how frequently (in bytes) we destructively shift
|
||||
# our state value after having pulled out that many bytes into a
|
||||
# confirm register (of one sort or another).
|
||||
# none means a default value - datasize, our biggest easily available GPR
|
||||
if extract_frequency is None:
|
||||
extract_frequency = self.datasize_bytes
|
||||
self.extract_frequency = extract_frequency
|
||||
self.extract_size = self.extract_frequency*self.num_buckets
|
||||
if extract_frequency < stride:
|
||||
fail_out("Can't extract at extract frequency %d with stride %d" % (extract_frequency, stride))
|
||||
if extract_frequency not in [ None, 1, 2, 4, 8, 16]:
|
||||
fail_out("Weird extract frequency: %d" % extract_frequency)
|
||||
|
||||
if self.extract_size <= 32:
|
||||
self.extr_type = IntegerType(32)
|
||||
elif self.extract_size <= 64:
|
||||
self.extr_type = IntegerType(64)
|
||||
else:
|
||||
fail_out("Implausible size %d required for confirm extract step" % size)
|
||||
|
||||
# extract_frequency is how often we pull out our state and place
|
||||
# it somewhere in a lossless fashion
|
||||
# confirm_frequency, on the other hand, is how frequently we
|
||||
# take the state extracted by extract_frequency and cobble it
|
||||
# together into a matching loop
|
||||
# confirm_frequency must be a multiple of extract_frequency
|
||||
# and must fit into a fast register; for now; we're going to
|
||||
# stay in the GPR domain
|
||||
if confirm_frequency is None:
|
||||
confirm_frequency = self.extract_frequency
|
||||
self.confirm_frequency = confirm_frequency
|
||||
if confirm_frequency % self.extract_frequency:
|
||||
fail_out("Confirm frequency %d must be evenly divisible by extract_frequency %d" % (confirm_frequency, self.extract_frequency))
|
||||
|
||||
self.conf_size = self.confirm_frequency * self.num_buckets
|
||||
if self.conf_size <= 32:
|
||||
self.conf_type = IntegerType(32)
|
||||
elif self.conf_size <= 64:
|
||||
self.conf_type = IntegerType(64)
|
||||
else:
|
||||
fail_out("Implausible size %d required for confirm accumulate step" % self.conf_size)
|
||||
|
||||
# how many bytes in flight at once
|
||||
self.loop_bytes = 16
|
||||
|
||||
# confirm configuration
|
||||
|
||||
# how many entries in the top-level confirm table - 256 means
|
||||
# complete split on the last character
|
||||
self.conf_top_level_split = 256
|
||||
|
||||
# how much we 'pull back' in confirm - this is obviously related
|
||||
# to the first level conf but we will keep two separate paramters
|
||||
# for this to avoid the risk of conflating these
|
||||
self.conf_pull_back = 1
|
||||
|
||||
if self.conf_pull_back > 0 and self.conf_top_level_split < 256:
|
||||
fail_out("Pull back distance %d not supported by top level split %d" % (self.conf_pull_back, self.conf_top_level_split))
|
||||
|
||||
# minor stuff
|
||||
self.default_body_indent = 8
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -29,7 +29,7 @@
|
||||
/** \file
|
||||
* \brief FDR literal matcher: build API.
|
||||
*/
|
||||
#include "fdr.h"
|
||||
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile.h"
|
||||
#include "fdr_confirm.h"
|
||||
@ -187,9 +187,9 @@ aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
|
||||
/* we are allowing domains 9 to 15 only */
|
||||
assert(eng.bits > 8 && eng.bits < 16);
|
||||
fdr->domain = eng.bits;
|
||||
fdr->schemeWidthByte = eng.schemeWidth / 8;
|
||||
fdr->domainMask = (1 << eng.bits) - 1;
|
||||
fdr->tabSize = (1 << eng.bits) * fdr->schemeWidthByte;
|
||||
fdr->tabSize = (1 << eng.bits) * (eng.schemeWidth / 8);
|
||||
fdr->stride = eng.stride;
|
||||
|
||||
if (link.first) {
|
||||
fdr->link = verify_u32(ptr - fdr_base);
|
||||
@ -544,6 +544,7 @@ fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
|
||||
// temporary hack for unit testing
|
||||
if (hint != HINT_INVALID) {
|
||||
des->bits = 9;
|
||||
des->stride = 1;
|
||||
}
|
||||
|
||||
FDRCompiler fc(lits, *des, make_small);
|
||||
@ -571,10 +572,9 @@ fdrBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small, u32 hint,
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
// FIXME: should be compile-time only
|
||||
size_t fdrSize(const FDR *fdr) {
|
||||
assert(fdr);
|
||||
return fdr->size;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -61,6 +61,9 @@ fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
|
||||
|
||||
#endif
|
||||
|
||||
/** \brief Returns size in bytes of the given FDR engine. */
|
||||
size_t fdrSize(const struct FDR *fdr);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -26,7 +26,6 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -36,66 +36,48 @@
|
||||
#include "util/bitutils.h"
|
||||
#include "util/compare.h"
|
||||
|
||||
#define CONF_LOADVAL_CALL lv_u64a
|
||||
#define CONF_LOADVAL_CALL_CAUTIOUS lv_u64a_ce
|
||||
|
||||
// this is ordinary confirmation function which runs through
|
||||
// the whole confirmation procedure
|
||||
static really_inline
|
||||
void confWithBit(const struct FDRConfirm * fdrc,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
size_t i,
|
||||
CautionReason r,
|
||||
u32 pullBackAmount,
|
||||
hwlmcb_rv_t *control,
|
||||
u32 * last_match) {
|
||||
void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a,
|
||||
size_t i, u32 pullBackAmount, hwlmcb_rv_t *control,
|
||||
u32 *last_match, u64a conf_key) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
const u8 * buf = a->buf;
|
||||
const size_t len = a->len;
|
||||
|
||||
CONF_TYPE v;
|
||||
const u8 * confirm_loc = buf + i - pullBackAmount - 7;
|
||||
if (likely(r == NOT_CAUTIOUS || confirm_loc >= buf)) {
|
||||
v = CONF_LOADVAL_CALL(confirm_loc, buf, buf + len);
|
||||
} else { // r == VECTORING, confirm_loc < buf
|
||||
u64a histBytes = a->histBytes;
|
||||
v = CONF_LOADVAL_CALL_CAUTIOUS(confirm_loc, buf, buf + len);
|
||||
// stitch together v (which doesn't move) and history (which does)
|
||||
u32 overhang = buf - confirm_loc;
|
||||
histBytes >>= 64 - (overhang * 8);
|
||||
v |= histBytes;
|
||||
u32 c = CONF_HASH_CALL(conf_key, fdrc->andmsk, fdrc->mult,
|
||||
fdrc->nBitsOrSoleID);
|
||||
u32 start = getConfirmLitIndex(fdrc)[c];
|
||||
if (likely(!start)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 c = CONF_HASH_CALL(v, fdrc->andmsk, fdrc->mult, fdrc->nBitsOrSoleID);
|
||||
u32 start = getConfirmLitIndex(fdrc)[c];
|
||||
if (P0(start)) {
|
||||
const struct LitInfo *l =
|
||||
(const struct LitInfo *)((const u8 *)fdrc + start);
|
||||
const struct LitInfo *li
|
||||
= (const struct LitInfo *)((const u8 *)fdrc + start);
|
||||
|
||||
u8 oldNext; // initialized in loop
|
||||
do {
|
||||
assert(ISALIGNED(l));
|
||||
assert(ISALIGNED(li));
|
||||
|
||||
if (P0( (v & l->msk) != l->v)) {
|
||||
if (unlikely((conf_key & li->msk) != li->v)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((*last_match == l->id) && (l->flags & NoRepeat)) {
|
||||
if ((*last_match == li->id) && (li->flags & NoRepeat)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
const u8 * loc = buf + i - l->size + 1 - pullBackAmount;
|
||||
const u8 *loc = buf + i - li->size + 1 - pullBackAmount;
|
||||
|
||||
u8 caseless = l->flags & Caseless;
|
||||
u8 caseless = li->flags & Caseless;
|
||||
if (loc < buf) {
|
||||
u32 full_overhang = buf - loc;
|
||||
|
||||
const u8 * history = (caseless) ?
|
||||
a->buf_history_nocase : a->buf_history;
|
||||
size_t len_history = (caseless) ?
|
||||
a->len_history_nocase : a->len_history;
|
||||
const u8 *history = caseless ? a->buf_history_nocase
|
||||
: a->buf_history;
|
||||
size_t len_history = caseless ? a->len_history_nocase
|
||||
: a->len_history;
|
||||
|
||||
// can't do a vectored confirm either if we don't have
|
||||
// the bytes
|
||||
@ -105,17 +87,15 @@ void confWithBit(const struct FDRConfirm * fdrc,
|
||||
|
||||
// as for the regular case, no need to do a full confirm if
|
||||
// we're a short literal
|
||||
if (unlikely(l->size > sizeof(CONF_TYPE))) {
|
||||
const u8 * s1 = l->s;
|
||||
if (unlikely(li->size > sizeof(CONF_TYPE))) {
|
||||
const u8 *s1 = li->s;
|
||||
const u8 *s2 = s1 + full_overhang;
|
||||
const u8 *loc1 = history + len_history - full_overhang;
|
||||
const u8 *loc2 = buf;
|
||||
size_t size1 = MIN(full_overhang,
|
||||
l->size - sizeof(CONF_TYPE));
|
||||
size_t wind_size2_back = sizeof(CONF_TYPE) +
|
||||
full_overhang;
|
||||
size_t size2 = wind_size2_back > l->size ?
|
||||
0 : l->size - wind_size2_back;
|
||||
size_t size1 = MIN(full_overhang, li->size - sizeof(CONF_TYPE));
|
||||
size_t wind_size2_back = sizeof(CONF_TYPE) + full_overhang;
|
||||
size_t size2 = wind_size2_back > li->size ?
|
||||
0 : li->size - wind_size2_back;
|
||||
|
||||
if (cmpForward(loc1, s1, size1, caseless)) {
|
||||
goto out;
|
||||
@ -127,53 +107,50 @@ void confWithBit(const struct FDRConfirm * fdrc,
|
||||
} else { // NON-VECTORING PATH
|
||||
|
||||
// if string < conf_type we don't need regular string cmp
|
||||
if (unlikely(l->size > sizeof(CONF_TYPE))) {
|
||||
if (cmpForward(loc, l->s, l->size - sizeof(CONF_TYPE), caseless)) {
|
||||
if (unlikely(li->size > sizeof(CONF_TYPE))) {
|
||||
if (cmpForward(loc, li->s, li->size - sizeof(CONF_TYPE),
|
||||
caseless)) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (P0(!(l->groups & *control))) {
|
||||
if (unlikely(!(li->groups & *control))) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely(l->flags & ComplexConfirm)) {
|
||||
const u8 * loc2 = buf + i - l->extended_size + 1 - pullBackAmount;
|
||||
if (unlikely(li->flags & ComplexConfirm)) {
|
||||
const u8 *loc2 = buf + i - li->extended_size + 1 - pullBackAmount;
|
||||
if (loc2 < buf) {
|
||||
u32 full_overhang = buf - loc2;
|
||||
size_t len_history = (caseless) ?
|
||||
a->len_history_nocase : a->len_history;
|
||||
size_t len_history = caseless ? a->len_history_nocase
|
||||
: a->len_history;
|
||||
if (full_overhang > len_history) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*last_match = l->id;
|
||||
*control = a->cb(loc - buf, i, l->id, a->ctxt);
|
||||
*last_match = li->id;
|
||||
*control = a->cb(loc - buf, i, li->id, a->ctxt);
|
||||
out:
|
||||
oldNext = l->next; // oldNext is either 0 or an 'adjust' value
|
||||
l = (const struct LitInfo*)((const u8 *)l + oldNext + l->size);
|
||||
oldNext = li->next; // oldNext is either 0 or an 'adjust' value
|
||||
li = (const struct LitInfo *)((const u8 *)li + oldNext + li->size);
|
||||
} while (oldNext);
|
||||
}
|
||||
}
|
||||
|
||||
// 'light-weight' confirmation function which is used by 1-mask Teddy;
|
||||
// in the 'confirmless' case it simply calls callback function,
|
||||
// otherwise it calls 'confWithBit' function for the full confirmation procedure
|
||||
static really_inline
|
||||
void confWithBit1(const struct FDRConfirm *fdrc,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
size_t i,
|
||||
CautionReason r,
|
||||
hwlmcb_rv_t *control,
|
||||
u32 * last_match) {
|
||||
const struct FDR_Runtime_Args *a, size_t i,
|
||||
hwlmcb_rv_t *control, u32 *last_match, u64a conf_key) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
if (unlikely(fdrc->mult)) {
|
||||
confWithBit(fdrc, a, i, r, 0, control, last_match);
|
||||
confWithBit(fdrc, a, i, 0, control, last_match, conf_key);
|
||||
return;
|
||||
} else {
|
||||
u32 id = fdrc->nBitsOrSoleID;
|
||||
@ -191,11 +168,8 @@ void confWithBit1(const struct FDRConfirm * fdrc,
|
||||
// otherwise it calls 'confWithBit' function for the full confirmation procedure
|
||||
static really_inline
|
||||
void confWithBitMany(const struct FDRConfirm *fdrc,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
size_t i,
|
||||
CautionReason r,
|
||||
hwlmcb_rv_t *control,
|
||||
u32 * last_match) {
|
||||
const struct FDR_Runtime_Args *a, size_t i, CautionReason r,
|
||||
hwlmcb_rv_t *control, u32 *last_match, u64a conf_key) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
@ -204,7 +178,7 @@ void confWithBitMany(const struct FDRConfirm * fdrc,
|
||||
}
|
||||
|
||||
if (unlikely(fdrc->mult)) {
|
||||
confWithBit(fdrc, a, i, r, 0, control, last_match);
|
||||
confWithBit(fdrc, a, i, 0, control, last_match, conf_key);
|
||||
return;
|
||||
} else {
|
||||
const u32 id = fdrc->nBitsOrSoleID;
|
||||
@ -215,7 +189,7 @@ void confWithBitMany(const struct FDRConfirm * fdrc,
|
||||
}
|
||||
|
||||
if (r == VECTORING && len > i - a->start_offset) {
|
||||
if (len > (i + a->len_history)) {
|
||||
if (len > i + a->len_history) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -28,11 +28,11 @@
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_dump.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
@ -68,8 +68,7 @@ void fdrPrintStats(const FDR *fdr, FILE *f) {
|
||||
}
|
||||
|
||||
if (isTeddy) {
|
||||
unique_ptr<TeddyEngineDescription> des =
|
||||
getTeddyDescription(fdr->engineID);
|
||||
auto des = getTeddyDescription(fdr->engineID);
|
||||
if (des) {
|
||||
fprintf(f, " masks %u\n", des->numMasks);
|
||||
fprintf(f, " buckets %u\n", des->getNumBuckets());
|
||||
@ -78,16 +77,8 @@ void fdrPrintStats(const FDR *fdr, FILE *f) {
|
||||
fprintf(f, " <unknown engine>\n");
|
||||
}
|
||||
} else {
|
||||
unique_ptr<FDREngineDescription> des =
|
||||
getFdrDescription(fdr->engineID);
|
||||
if (des) {
|
||||
fprintf(f, " domain %u\n", des->bits);
|
||||
fprintf(f, " stride %u\n", des->stride);
|
||||
fprintf(f, " buckets %u\n", des->getNumBuckets());
|
||||
fprintf(f, " width %u\n", des->schemeWidth);
|
||||
} else {
|
||||
fprintf(f, " <unknown engine>\n");
|
||||
}
|
||||
fprintf(f, " domain %u\n", fdr->domain);
|
||||
fprintf(f, " stride %u\n", fdr->stride);
|
||||
}
|
||||
|
||||
fprintf(f, " strings ???\n");
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -42,13 +42,11 @@ using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#include "fdr_autogen_compiler.cpp"
|
||||
|
||||
FDREngineDescription::FDREngineDescription(const FDREngineDef &def)
|
||||
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
|
||||
def.numBuckets, def.confirmPullBackDistance,
|
||||
def.confirmTopLevelSplit),
|
||||
schemeWidth(def.schemeWidth), stride(def.stride), bits(0) {}
|
||||
schemeWidth(def.schemeWidth), stride(0), bits(0) {}
|
||||
|
||||
u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
|
||||
// rounding up, so that scheme width 32 and 6 buckets is 6 not 5!
|
||||
@ -56,6 +54,12 @@ u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
|
||||
return ((getSchemeWidth() + getNumBuckets() - 1) / getNumBuckets()) + 1;
|
||||
}
|
||||
|
||||
void getFdrDescriptions(vector<FDREngineDescription> *out) {
|
||||
static const FDREngineDef def = {0, 128, 8, 0, 1, 256};
|
||||
out->clear();
|
||||
out->emplace_back(def);
|
||||
}
|
||||
|
||||
static
|
||||
u32 findDesiredStride(size_t num_lits, size_t min_len, size_t min_len_count) {
|
||||
u32 desiredStride = 1; // always our safe fallback
|
||||
@ -108,32 +112,33 @@ unique_ptr<FDREngineDescription> chooseEngine(const target_t &target,
|
||||
FDREngineDescription *best = nullptr;
|
||||
u32 best_score = 0;
|
||||
|
||||
FDREngineDescription &eng = allDescs[0];
|
||||
|
||||
for (u32 domain = 9; domain <= 15; domain++) {
|
||||
for (size_t engineID = 0; engineID < allDescs.size(); engineID++) {
|
||||
for (size_t stride = 1; stride <= 4; stride *= 2) {
|
||||
// to make sure that domains >=14 have stride 1 according to origin
|
||||
if (domain > 13 && engineID > 0) {
|
||||
if (domain > 13 && stride > 1) {
|
||||
continue;
|
||||
}
|
||||
FDREngineDescription &eng = allDescs[engineID];
|
||||
if (!eng.isValidOnTarget(target)) {
|
||||
continue;
|
||||
}
|
||||
if (msl < eng.stride) {
|
||||
if (msl < stride) {
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 score = 100;
|
||||
|
||||
score -= absdiff(desiredStride, eng.stride);
|
||||
score -= absdiff(desiredStride, stride);
|
||||
|
||||
if (eng.stride <= desiredStride) {
|
||||
score += eng.stride;
|
||||
if (stride <= desiredStride) {
|
||||
score += stride;
|
||||
}
|
||||
|
||||
u32 effLits = vl.size(); /* * desiredStride;*/
|
||||
u32 ideal;
|
||||
if (effLits < eng.getNumBuckets()) {
|
||||
if (eng.stride == 1) {
|
||||
if (stride == 1) {
|
||||
ideal = 8;
|
||||
} else {
|
||||
ideal = 10;
|
||||
@ -158,27 +163,28 @@ unique_ptr<FDREngineDescription> chooseEngine(const target_t &target,
|
||||
ideal -= 2;
|
||||
}
|
||||
|
||||
if (eng.stride > 1) {
|
||||
if (stride > 1) {
|
||||
ideal++;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("effLits %u\n", effLits);
|
||||
|
||||
if (target.is_atom_class() && !make_small && effLits < 4000) {
|
||||
/* Unless it is a very heavy case, we want to build smaller tables
|
||||
* on lightweight machines due to their small caches. */
|
||||
/* Unless it is a very heavy case, we want to build smaller
|
||||
* tables on lightweight machines due to their small caches. */
|
||||
ideal -= 2;
|
||||
}
|
||||
|
||||
score -= absdiff(ideal, domain);
|
||||
|
||||
DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u "
|
||||
DEBUG_PRINTF("fdr %u: width=%u, domain=%u, buckets=%u, stride=%zu "
|
||||
"-> score=%u\n",
|
||||
eng.getID(), eng.schemeWidth, eng.bits,
|
||||
eng.getNumBuckets(), eng.stride, score);
|
||||
eng.getID(), eng.schemeWidth, domain,
|
||||
eng.getNumBuckets(), stride, score);
|
||||
|
||||
if (!best || score > best_score) {
|
||||
eng.bits = domain;
|
||||
eng.stride = stride;
|
||||
best = ŋ
|
||||
best_score = score;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -42,7 +42,6 @@ struct FDREngineDef {
|
||||
u32 id;
|
||||
u32 schemeWidth;
|
||||
u32 numBuckets;
|
||||
u32 stride;
|
||||
u64a cpu_features;
|
||||
u32 confirmPullBackDistance;
|
||||
u32 confirmTopLevelSplit;
|
||||
@ -73,7 +72,6 @@ chooseEngine(const target_t &target, const std::vector<hwlmLiteral> &vl,
|
||||
bool make_small);
|
||||
std::unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID);
|
||||
void getFdrDescriptions(std::vector<FDREngineDescription> *out);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -76,17 +76,17 @@ struct FDR {
|
||||
* structures (spillover strings and hash table) if we're a secondary
|
||||
* structure. */
|
||||
u32 link;
|
||||
u8 domain; /* dynamic domain info */
|
||||
u8 schemeWidthByte; /* scheme width in bytes */
|
||||
u8 stride; /* stride - how frequeuntly the data is consulted by the first
|
||||
* stage matcher */
|
||||
u8 domain; /* number of bits used to index into main FDR table. This value
|
||||
* is used only of debugging/asserts. */
|
||||
u16 domainMask; /* pre-computed domain mask */
|
||||
u32 tabSize; /* pre-computed hashtable size in bytes */
|
||||
u32 pad1;
|
||||
u32 pad;
|
||||
|
||||
union {
|
||||
u32 s_u32;
|
||||
u64a s_u64a;
|
||||
m128 s_m128;
|
||||
} start;
|
||||
m128 start; /* initial start state to use at offset 0. The state has been set
|
||||
* up based on the min length of buckets to reduce the need for
|
||||
* pointless confirms. */
|
||||
};
|
||||
|
||||
/** \brief FDR runtime arguments.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -29,188 +29,43 @@
|
||||
#ifndef FDR_LOADVAL_H
|
||||
#define FDR_LOADVAL_H
|
||||
|
||||
#include "fdr_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/unaligned.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#define MAKE_LOADVAL(type, name) \
|
||||
static really_inline type name (const u8 * ptr, UNUSED const u8 * lo, UNUSED const u8 * hi)
|
||||
static really_inline \
|
||||
type name(const u8 *ptr, UNUSED const u8 *lo, UNUSED const u8 *hi)
|
||||
|
||||
#define NORMAL_SAFE(type) assert(ptr >= lo && (ptr + sizeof(type) - 1) < hi)
|
||||
#define ALIGNED_SAFE(type) NORMAL_SAFE(type); assert(((size_t)ptr % sizeof(type)) == 0);
|
||||
// these ones need asserts to test the property that we're not handling dynamically
|
||||
#define CAUTIOUS_FORWARD_SAFE(type) assert(ptr >= lo)
|
||||
#define CAUTIOUS_BACKWARD_SAFE(type) assert((ptr + sizeof(type) - 1) < hi)
|
||||
#define NORMAL_SAFE(type) \
|
||||
do { \
|
||||
assert(ptr >= lo); \
|
||||
assert(ptr + sizeof(type) - 1 < hi); \
|
||||
} while(0)
|
||||
|
||||
#define CF_INDEX_CHECK (ptr + i < hi)
|
||||
#define CB_INDEX_CHECK (lo <= ptr + i)
|
||||
#define CE_INDEX_CHECK (lo <= ptr + i) && (ptr + i < hi)
|
||||
|
||||
#define MAKE_LOOP(TYPE, COND, SHIFT_FIDDLE) \
|
||||
#define MAKE_LOOP_CE(TYPE) \
|
||||
TYPE v = 0; \
|
||||
for (TYPE i = 0; i < sizeof(TYPE); i++) { \
|
||||
if (COND) { \
|
||||
v += (TYPE)ptr[i] << ((SHIFT_FIDDLE)*8); \
|
||||
if ((lo <= ptr + i) && (ptr + i < hi)) { \
|
||||
v += (TYPE)ptr[i] << (i*8); \
|
||||
} \
|
||||
} \
|
||||
return v;
|
||||
|
||||
#define MAKE_LOOP_BE(TYPE, COND) \
|
||||
MAKE_LOOP(TYPE, COND, sizeof(TYPE)-i-1)
|
||||
|
||||
#define MAKE_LOOP_LE(TYPE, COND) \
|
||||
MAKE_LOOP(TYPE, COND, i)
|
||||
|
||||
|
||||
#define MAKE_LOOP_BE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CF_INDEX_CHECK)
|
||||
#define MAKE_LOOP_BE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CB_INDEX_CHECK)
|
||||
#define MAKE_LOOP_BE_CE(TYPE) MAKE_LOOP_BE(TYPE, CE_INDEX_CHECK)
|
||||
#define MAKE_LOOP_LE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CF_INDEX_CHECK)
|
||||
#define MAKE_LOOP_LE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CB_INDEX_CHECK)
|
||||
#define MAKE_LOOP_LE_CE(TYPE) MAKE_LOOP_LE(TYPE, CE_INDEX_CHECK)
|
||||
|
||||
// no suffix = normal (unaligned)
|
||||
// _a = aligned
|
||||
// _cf = cautious forwards, base is always in bounds, but may read over the end of the buffer (test against hi)
|
||||
// _cb = cautious backwards, final byte is always in bounds, but may read over the start of the buffer (test against lo)
|
||||
// _ce = cautious everywhere (in both directions); test against hi and lo
|
||||
|
||||
// u8 loadvals
|
||||
MAKE_LOADVAL(u8, lv_u8) {
|
||||
NORMAL_SAFE(u8);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u8, lv_u8_cf) {
|
||||
CAUTIOUS_FORWARD_SAFE(u8);
|
||||
if (ptr < hi) {
|
||||
return *ptr;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u8, lv_u8_cb) {
|
||||
CAUTIOUS_BACKWARD_SAFE(u8);
|
||||
if (lo <= ptr) {
|
||||
return *ptr;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u8, lv_u8_ce) {
|
||||
if ((lo <= ptr) && (ptr < hi)) {
|
||||
return *ptr;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u16, lv_u16) {
|
||||
NORMAL_SAFE(u16);
|
||||
return unaligned_load_u16(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u16, lv_u16_a) {
|
||||
ALIGNED_SAFE(u16);
|
||||
return *(const u16 *)ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u32, lv_u32) {
|
||||
NORMAL_SAFE(u32);
|
||||
return unaligned_load_u32(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u32, lv_u32_a) {
|
||||
ALIGNED_SAFE(u32);
|
||||
return *(const u32 *)ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u64a, lv_u64a) {
|
||||
NORMAL_SAFE(u32);
|
||||
return unaligned_load_u64a(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u64a, lv_u64a_a) {
|
||||
ALIGNED_SAFE(u64a);
|
||||
return *(const u64a *)ptr;
|
||||
}
|
||||
MAKE_LOADVAL(u16, lv_u16_ce) { MAKE_LOOP_CE(u16); }
|
||||
|
||||
MAKE_LOADVAL(u16, lv_u16_cf) { MAKE_LOOP_LE_CF(u16); }
|
||||
MAKE_LOADVAL(u16, lv_u16_cb) { MAKE_LOOP_LE_CB(u16); }
|
||||
MAKE_LOADVAL(u16, lv_u16_ce) { MAKE_LOOP_LE_CE(u16); }
|
||||
|
||||
MAKE_LOADVAL(u32, lv_u32_cf) { MAKE_LOOP_LE_CF(u32); }
|
||||
MAKE_LOADVAL(u32, lv_u32_cb) { MAKE_LOOP_LE_CB(u32); }
|
||||
MAKE_LOADVAL(u32, lv_u32_ce) { MAKE_LOOP_LE_CE(u32); }
|
||||
|
||||
MAKE_LOADVAL(u64a, lv_u64a_cf) { MAKE_LOOP_LE_CF(u64a); }
|
||||
MAKE_LOADVAL(u64a, lv_u64a_cb) { MAKE_LOOP_LE_CB(u64a); }
|
||||
MAKE_LOADVAL(u64a, lv_u64a_ce) { MAKE_LOOP_LE_CE(u64a); }
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128) {
|
||||
NORMAL_SAFE(m128);
|
||||
return loadu128(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_a) {
|
||||
ALIGNED_SAFE(m128);
|
||||
assert((size_t)ptr % sizeof(m128) == 0);
|
||||
return *(const m128 *)ptr;
|
||||
}
|
||||
|
||||
// m128 cases need to be manually created
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_cf) {
|
||||
CAUTIOUS_FORWARD_SAFE(m128);
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
if (ptr + i < hi) {
|
||||
u.val8[i] = ptr[i];
|
||||
} else {
|
||||
u.val8[i] = 0;
|
||||
}
|
||||
}
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_cb) {
|
||||
CAUTIOUS_BACKWARD_SAFE(m128);
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
if (lo <= ptr + i) {
|
||||
u.val8[i] = ptr[i];
|
||||
} else {
|
||||
u.val8[i] = 0;
|
||||
}
|
||||
}
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_ce) {
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
if ((lo <= ptr + i) && (ptr + i < hi)) {
|
||||
u.val8[i] = ptr[i];
|
||||
} else {
|
||||
u.val8[i] = 0;
|
||||
}
|
||||
}
|
||||
return u.val128;
|
||||
}
|
||||
MAKE_LOADVAL(u64a, lv_u64a_ce) { MAKE_LOOP_CE(u64a); }
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -26,7 +26,6 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_streaming_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -32,6 +32,8 @@
|
||||
#include "fdr_streaming_internal.h"
|
||||
#include "util/partial_store.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
static really_inline
|
||||
const struct FDRSTableHeader * getSHDR(const struct FDR * fdr) {
|
||||
const u8 * linkPtr = ((const u8 *)fdr) + fdr->link;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -26,7 +26,6 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
|
732
src/fdr/teddy.c
732
src/fdr/teddy.c
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -26,11 +26,19 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
/** \file
|
||||
* \brief Teddy literal matcher: SSSE3 engine runtime.
|
||||
*/
|
||||
|
||||
#include "fdr_internal.h"
|
||||
#include "flood_runtime.h"
|
||||
#include "teddy.h"
|
||||
#include "teddy_internal.h"
|
||||
#include "teddy_runtime_common.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
static const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
||||
const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
@ -67,178 +75,584 @@ static const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
||||
};
|
||||
|
||||
// Note: p_mask is an output param that initialises a poison mask.
|
||||
UNUSED static really_inline
|
||||
m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history,
|
||||
const u32 nMasks) {
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
u.val128 = zeroes128();
|
||||
#ifdef ARCH_64_BIT
|
||||
#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \
|
||||
do { \
|
||||
if (unlikely(isnonzero128(var))) { \
|
||||
u64a lo = movq(var); \
|
||||
u64a hi = movq(byteShiftRight128(var, 8)); \
|
||||
if (unlikely(lo)) { \
|
||||
conf_fn(&lo, bucket, offset, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(hi)) { \
|
||||
conf_fn(&hi, bucket, offset + 8, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
} \
|
||||
} while (0);
|
||||
#else
|
||||
#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \
|
||||
do { \
|
||||
if (unlikely(isnonzero128(var))) { \
|
||||
u32 part1 = movd(var); \
|
||||
u32 part2 = movd(byteShiftRight128(var, 4)); \
|
||||
u32 part3 = movd(byteShiftRight128(var, 8)); \
|
||||
u32 part4 = movd(byteShiftRight128(var, 12)); \
|
||||
if (unlikely(part1)) { \
|
||||
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part2)) { \
|
||||
conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part3)) { \
|
||||
conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part4)) { \
|
||||
conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
} \
|
||||
} while (0);
|
||||
#endif
|
||||
|
||||
if (ptr >= lo) {
|
||||
u32 avail = (u32)(hi - ptr);
|
||||
if (avail >= 16) {
|
||||
*p_mask = load128((const void*)(p_mask_arr[16] + 16));
|
||||
return loadu128(ptr);
|
||||
}
|
||||
*p_mask = load128((const void*)(p_mask_arr[avail] + 16));
|
||||
for (u32 i = 0; i < avail; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
} else {
|
||||
u32 need = MIN((u32)(lo - ptr), MIN(len_history, nMasks - 1));
|
||||
u32 start = (u32)(lo - ptr);
|
||||
u32 i;
|
||||
for (i = start - need; ptr + i < lo; i++) {
|
||||
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
|
||||
}
|
||||
u32 end = MIN(16, (u32)(hi - ptr));
|
||||
*p_mask = loadu128((const void*)(p_mask_arr[end - start] + 16 - start));
|
||||
for (; i < end; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m1(const m128 *maskBase, m128 p_mask, m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
||||
return and128(and128(pshufb(maskBase[0*2], lo),
|
||||
pshufb(maskBase[0*2+1], hi)), p_mask);
|
||||
}
|
||||
|
||||
return u.val128;
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 p_mask,
|
||||
m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m1(maskBase, p_mask, val);
|
||||
|
||||
m128 res_1 = and128(pshufb(maskBase[1*2], lo),
|
||||
pshufb(maskBase[1*2+1], hi));
|
||||
m128 res_shifted_1 = palignr(res_1, *old_1, 16-1);
|
||||
*old_1 = res_1;
|
||||
return and128(and128(r, p_mask), res_shifted_1);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
||||
m128 p_mask, m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m2(maskBase, old_1, p_mask, val);
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
UNUSED static really_inline
|
||||
m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history,
|
||||
const u32 nMasks) {
|
||||
m128 p_mask128;
|
||||
m256 ret = set2x128(vectoredLoad128(&p_mask128, ptr, lo, hi, buf_history, len_history, nMasks));
|
||||
*p_mask = set2x128(p_mask128);
|
||||
return ret;
|
||||
m128 res_2 = and128(pshufb(maskBase[2*2], lo),
|
||||
pshufb(maskBase[2*2+1], hi));
|
||||
m128 res_shifted_2 = palignr(res_2, *old_2, 16-2);
|
||||
*old_2 = res_2;
|
||||
return and128(r, res_shifted_2);
|
||||
}
|
||||
|
||||
static const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = {
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
||||
};
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
||||
m128 *old_3, m128 p_mask, m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, p_mask, val);
|
||||
|
||||
|
||||
UNUSED static really_inline
|
||||
m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history) {
|
||||
union {
|
||||
u8 val8[32];
|
||||
m256 val256;
|
||||
} u;
|
||||
|
||||
if (ptr >= lo) {
|
||||
u32 avail = (u32)(hi - ptr);
|
||||
if (avail >= 32) {
|
||||
*p_mask = load256((const void*)(p_mask_arr256[32] + 32));
|
||||
return loadu256(ptr);
|
||||
}
|
||||
*p_mask = load256((const void*)(p_mask_arr256[avail] + 32));
|
||||
for (u32 i = 0; i < avail; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
} else {
|
||||
// need contains "how many chars to pull from history"
|
||||
// calculate based on what we need, what we have in the buffer
|
||||
// and only what we need to make primary confirm work
|
||||
u32 start = (u32)(lo - ptr);
|
||||
u32 i;
|
||||
for (i = start; ptr + i < lo; i++) {
|
||||
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
|
||||
}
|
||||
u32 end = MIN(32, (u32)(hi - ptr));
|
||||
*p_mask = loadu256((const void*)(p_mask_arr256[end - start] + 32 - start));
|
||||
for (; i < end; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
m128 res_3 = and128(pshufb(maskBase[3*2], lo),
|
||||
pshufb(maskBase[3*2+1], hi));
|
||||
m128 res_shifted_3 = palignr(res_3, *old_3, 16-3);
|
||||
*old_3 = res_3;
|
||||
return and128(r, res_shifted_3);
|
||||
}
|
||||
|
||||
return u.val256;
|
||||
hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 1);
|
||||
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
#endif // __AVX2__
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||
}
|
||||
|
||||
#define P0(cnd) unlikely(cnd)
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "flood_runtime.h"
|
||||
hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_confirm_runtime.h"
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 1);
|
||||
|
||||
#include "fdr_loadval.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "teddy_internal.h"
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
#include "teddy_autogen.c"
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 2);
|
||||
|
||||
m128 res_old_1 = ones128();
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 2);
|
||||
|
||||
m128 res_old_1 = ones128();
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 3);
|
||||
|
||||
m128 res_old_1 = ones128();
|
||||
m128 res_old_2 = ones128();
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 3);
|
||||
|
||||
m128 res_old_1 = ones128();
|
||||
m128 res_old_2 = ones128();
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 4);
|
||||
|
||||
m128 res_old_1 = ones128();
|
||||
m128 res_old_2 = ones128();
|
||||
m128 res_old_3 = ones128();
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
||||
const size_t iterBytes = 32;
|
||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
||||
a->buf, a->len, a->start_offset);
|
||||
|
||||
const m128 *maskBase = getMaskBase(teddy);
|
||||
const u32 *confBase = getConfBase(teddy, 4);
|
||||
|
||||
m128 res_old_1 = ones128();
|
||||
m128 res_old_2 = ones128();
|
||||
m128 res_old_3 = ones128();
|
||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 16);
|
||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
||||
if (ptr < mainStart) {
|
||||
ptr = mainStart - 16;
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
for (; ptr < buf_end; ptr += 16) {
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
97
src/fdr/teddy.h
Normal file
97
src/fdr/teddy.h
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Teddy literal matcher: function declarations.
|
||||
*/
|
||||
|
||||
#ifndef TEDDY_H_
|
||||
#define TEDDY_H_
|
||||
|
||||
struct FDR; // forward declaration from fdr_internal.h
|
||||
struct FDR_Runtime_Args;
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
|
||||
#endif /* __AVX2__ */
|
||||
|
||||
#endif /* TEDDY_H_ */
|
@ -1,545 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from base_autogen import *
|
||||
from string import Template
|
||||
|
||||
class MT(MatcherBase):
|
||||
def produce_confirm(self, iter, var_name, offset, bits, cautious = True):
|
||||
if self.packed:
|
||||
print self.produce_confirm_base(var_name, bits, iter*16 + offset, cautious, enable_confirmless = False, do_bailout = False)
|
||||
else:
|
||||
if self.num_masks == 1:
|
||||
conf_func = "confWithBit1"
|
||||
else:
|
||||
conf_func = "confWithBitMany"
|
||||
|
||||
if cautious:
|
||||
caution_string = "VECTORING"
|
||||
else:
|
||||
caution_string = "NOT_CAUTIOUS"
|
||||
|
||||
print " if (P0(!!%s)) {" % var_name
|
||||
print " do {"
|
||||
if bits == 64:
|
||||
print " bit = findAndClearLSB_64(&%s);" % (var_name)
|
||||
else:
|
||||
print " bit = findAndClearLSB_32(&%s);" % (var_name)
|
||||
print " byte = bit / %d + %d;" % (self.num_buckets, iter*16 + offset)
|
||||
print " idx = bit %% %d;" % self.num_buckets
|
||||
print " cf = confBase[idx];"
|
||||
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
|
||||
print " if (!(fdrc->groups & *control))"
|
||||
print " continue;"
|
||||
print " %s(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % (conf_func, caution_string)
|
||||
print " } while(P0(!!%s));" % var_name
|
||||
print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {"
|
||||
print " *a->groups = controlVal;"
|
||||
print " return HWLM_TERMINATED;"
|
||||
print " }"
|
||||
print " }"
|
||||
|
||||
def produce_needed_temporaries(self, max_iterations):
|
||||
print " m128 p_mask;"
|
||||
for iter in range(0, max_iterations):
|
||||
print " m128 val_%d;" % iter
|
||||
print " m128 val_%d_lo;" % iter
|
||||
print " m128 val_%d_hi;" % iter
|
||||
for x in range(self.num_masks):
|
||||
print " m128 res_%d_%d;" % (iter, x)
|
||||
if x != 0:
|
||||
print " m128 res_shifted_%d_%d;" % (iter, x)
|
||||
print " m128 r_%d;" % iter
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " u64a r_%d_lopart;" % iter
|
||||
print " u64a r_%d_hipart;" % iter
|
||||
print "#else"
|
||||
print " u32 r_%d_part1;" % iter
|
||||
print " u32 r_%d_part2;" % iter
|
||||
print " u32 r_%d_part3;" % iter
|
||||
print " u32 r_%d_part4;" % iter
|
||||
print "#endif"
|
||||
|
||||
def produce_one_iteration_state_calc(self, iter, effective_num_iterations,
|
||||
cautious, save_old):
|
||||
if cautious:
|
||||
print " val_%d = vectoredLoad128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks)
|
||||
else:
|
||||
print " val_%d = load128(ptr + %d);" % (iter, iter*16)
|
||||
print " val_%d_lo = and128(val_%d, lomask);" % (iter, iter)
|
||||
print " val_%d_hi = rshift2x64(val_%d, 4);" % (iter, iter)
|
||||
print " val_%d_hi = and128(val_%d_hi, lomask);" % (iter, iter)
|
||||
print
|
||||
for x in range(self.num_masks):
|
||||
print Template("""
|
||||
res_${ITER}_${X} = and128(pshufb(maskBase[${X}*2] , val_${ITER}_lo),
|
||||
pshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x)
|
||||
if x != 0:
|
||||
if iter == 0:
|
||||
print " res_shifted_%d_%d = palignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x)
|
||||
else:
|
||||
print " res_shifted_%d_%d = palignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x)
|
||||
if x != 0 and iter == effective_num_iterations - 1 and save_old:
|
||||
print " res_old_%d = res_%d_%d;" % (x, iter, x)
|
||||
print
|
||||
if cautious:
|
||||
print " r_%d = and128(res_%d_0, p_mask);" % (iter, iter)
|
||||
else:
|
||||
print " r_%d = res_%d_0;" % (iter, iter)
|
||||
for x in range(1, self.num_masks):
|
||||
print " r_%d = and128(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x)
|
||||
print
|
||||
|
||||
def produce_one_iteration_confirm(self, iter, confirmCautious):
|
||||
setup64 = [ (0, "r_%d_lopart" % iter, "movq(r_%d)" % iter),
|
||||
(8, "r_%d_hipart" % iter, "movq(byteShiftRight128(r_%d, 8))" % iter) ]
|
||||
|
||||
setup32 = [ (0, "r_%d_part1" % iter, "movd(r_%d)" % iter),
|
||||
(4, "r_%d_part2" % iter, "movd(byteShiftRight128(r_%d, 4))" % iter),
|
||||
(8, "r_%d_part3" % iter, "movd(byteShiftRight128(r_%d, 8))" % iter),
|
||||
(12, "r_%d_part4" % iter, "movd(byteShiftRight128(r_%d, 12))" % iter) ]
|
||||
|
||||
print " if (P0(isnonzero128(r_%d))) {" % (iter)
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
for (off, val, init) in setup64:
|
||||
print " %s = %s;" % (val, init)
|
||||
for (off, val, init) in setup64:
|
||||
self.produce_confirm(iter, val, off, 64, cautious = confirmCautious)
|
||||
print "#else"
|
||||
for (off, val, init) in setup32:
|
||||
print " %s = %s;" % (val, init)
|
||||
for (off, val, init) in setup32:
|
||||
self.produce_confirm(iter, val, off, 32, cautious = confirmCautious)
|
||||
print "#endif"
|
||||
print " }"
|
||||
|
||||
def produce_one_iteration(self, iter, effective_num_iterations, cautious = False,
|
||||
confirmCautious = True, save_old = True):
|
||||
self.produce_one_iteration_state_calc(iter, effective_num_iterations, cautious, save_old)
|
||||
self.produce_one_iteration_confirm(iter, confirmCautious)
|
||||
|
||||
def produce_code(self):
|
||||
print self.produce_header(visible = True, header_only = False)
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
self.produce_needed_temporaries(self.num_iterations)
|
||||
print
|
||||
|
||||
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
|
||||
print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));"
|
||||
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32));" % self.num_masks
|
||||
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);"
|
||||
print " const size_t iterBytes = %d;" % (self.num_iterations * 16)
|
||||
|
||||
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
|
||||
' buf, len, a->start_offset);'
|
||||
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
|
||||
' mainStart);'
|
||||
|
||||
for x in range(self.num_masks):
|
||||
if (x != 0):
|
||||
print " m128 res_old_%d = ones128();" % x
|
||||
print " m128 lomask = set16x8(0xf);"
|
||||
|
||||
print " if (ptr < mainStart) {"
|
||||
print " ptr = mainStart - 16;"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " if (ptr + 16 < buf + len) {"
|
||||
self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print self.produce_flood_check()
|
||||
|
||||
for iter in range(self.num_iterations):
|
||||
self.produce_one_iteration(iter, self.num_iterations, cautious = False, confirmCautious = False)
|
||||
|
||||
print " }"
|
||||
|
||||
print " for (; ptr < buf + len; ptr += 16) {"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " }"
|
||||
|
||||
print self.produce_footer()
|
||||
|
||||
def produce_compile_call(self):
|
||||
packed_str = { False : "false", True : "true"}[self.packed]
|
||||
print " { %d, %s, %d, %d, %s, %d, %d }," % (
|
||||
self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str,
|
||||
self.conf_pull_back, self.conf_top_level_split)
|
||||
|
||||
def get_name(self):
|
||||
if self.packed:
|
||||
pck_string = "_pck"
|
||||
else:
|
||||
pck_string = ""
|
||||
|
||||
if self.num_buckets == 16:
|
||||
type_string = "_fat"
|
||||
else:
|
||||
type_string = ""
|
||||
|
||||
return "fdr_exec_teddy_%s_msks%d%s%s" % (self.arch.name, self.num_masks, pck_string, type_string)
|
||||
|
||||
def __init__(self, arch, packed = False, num_masks = 1, num_buckets = 8):
|
||||
self.arch = arch
|
||||
self.packed = packed
|
||||
self.num_masks = num_masks
|
||||
self.num_buckets = num_buckets
|
||||
self.num_iterations = 2
|
||||
|
||||
if packed:
|
||||
self.conf_top_level_split = 32
|
||||
else:
|
||||
self.conf_top_level_split = 1
|
||||
self.conf_pull_back = 0
|
||||
|
||||
class MTFat(MT):
|
||||
def produce_needed_temporaries(self, max_iterations):
|
||||
print " m256 p_mask;"
|
||||
for iter in range(0, max_iterations):
|
||||
print " m256 val_%d;" % iter
|
||||
print " m256 val_%d_lo;" % iter
|
||||
print " m256 val_%d_hi;" % iter
|
||||
for x in range(self.num_masks):
|
||||
print " m256 res_%d_%d;" % (iter, x)
|
||||
if x != 0:
|
||||
print " m256 res_shifted_%d_%d;" % (iter, x)
|
||||
print " m256 r_%d;" % iter
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " u64a r_%d_part1;" % iter
|
||||
print " u64a r_%d_part2;" % iter
|
||||
print " u64a r_%d_part3;" % iter
|
||||
print " u64a r_%d_part4;" % iter
|
||||
print "#else"
|
||||
print " u32 r_%d_part1;" % iter
|
||||
print " u32 r_%d_part2;" % iter
|
||||
print " u32 r_%d_part3;" % iter
|
||||
print " u32 r_%d_part4;" % iter
|
||||
print " u32 r_%d_part5;" % iter
|
||||
print " u32 r_%d_part6;" % iter
|
||||
print " u32 r_%d_part7;" % iter
|
||||
print " u32 r_%d_part8;" % iter
|
||||
print "#endif"
|
||||
|
||||
def produce_code(self):
|
||||
print self.produce_header(visible = True, header_only = False)
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
self.produce_needed_temporaries(self.num_iterations)
|
||||
print
|
||||
|
||||
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
|
||||
print " const m256 * maskBase = (const m256 *)((const u8 *)fdr + sizeof(struct Teddy));"
|
||||
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32*2));" % self.num_masks
|
||||
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);"
|
||||
print " const size_t iterBytes = %d;" % (self.num_iterations * 16)
|
||||
|
||||
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
|
||||
' buf, len, a->start_offset);'
|
||||
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
|
||||
' mainStart);'
|
||||
|
||||
for x in range(self.num_masks):
|
||||
if (x != 0):
|
||||
print " m256 res_old_%d = ones256();" % x
|
||||
print " m256 lomask = set32x8(0xf);"
|
||||
|
||||
print " if (ptr < mainStart) {"
|
||||
print " ptr = mainStart - 16;"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " if (ptr + 16 < buf + len) {"
|
||||
self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print self.produce_flood_check()
|
||||
|
||||
for iter in range(self.num_iterations):
|
||||
self.produce_one_iteration(iter, self.num_iterations, False, confirmCautious = False)
|
||||
|
||||
print " }"
|
||||
|
||||
print " for (; ptr < buf + len; ptr += 16) {"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " }"
|
||||
|
||||
print self.produce_footer()
|
||||
|
||||
def produce_one_iteration_state_calc(self, iter, effective_num_iterations,
|
||||
cautious, save_old):
|
||||
if cautious:
|
||||
print " val_%d = vectoredLoad2x128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks)
|
||||
else:
|
||||
print " val_%d = load2x128(ptr + %d);" % (iter, iter*16)
|
||||
print " val_%d_lo = and256(val_%d, lomask);" % (iter, iter)
|
||||
print " val_%d_hi = rshift4x64(val_%d, 4);" % (iter, iter)
|
||||
print " val_%d_hi = and256(val_%d_hi, lomask);" % (iter, iter)
|
||||
print
|
||||
for x in range(self.num_masks):
|
||||
print Template("""
|
||||
res_${ITER}_${X} = and256(vpshufb(maskBase[${X}*2] , val_${ITER}_lo),
|
||||
vpshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x)
|
||||
if x != 0:
|
||||
if iter == 0:
|
||||
print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x)
|
||||
else:
|
||||
print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x)
|
||||
if x != 0 and iter == effective_num_iterations - 1 and save_old:
|
||||
print " res_old_%d = res_%d_%d;" % (x, iter, x)
|
||||
print
|
||||
if cautious:
|
||||
print " r_%d = and256(res_%d_0, p_mask);" % (iter, iter)
|
||||
else:
|
||||
print " r_%d = res_%d_0;" % (iter, iter)
|
||||
for x in range(1, self.num_masks):
|
||||
print " r_%d = and256(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x)
|
||||
print
|
||||
|
||||
def produce_one_iteration_confirm(self, iter, confirmCautious):
|
||||
setup64 = [ (0, "r_%d_part1" % iter, "extractlow64from256(r)"),
|
||||
(4, "r_%d_part2" % iter, "extract64from256(r, 1);\n r = interleave256hi(r_%d, r_swap)" % (iter)),
|
||||
(8, "r_%d_part3" % iter, "extractlow64from256(r)"),
|
||||
(12, "r_%d_part4" % iter, "extract64from256(r, 1)") ]
|
||||
|
||||
setup32 = [ (0, "r_%d_part1" % iter, "extractlow32from256(r)"),
|
||||
(2, "r_%d_part2" % iter, "extract32from256(r, 1)"),
|
||||
(4, "r_%d_part3" % iter, "extract32from256(r, 2)"),
|
||||
(6, "r_%d_part4" % iter, "extract32from256(r, 3);\n r = interleave256hi(r_%d, r_swap)" % (iter)),
|
||||
(8, "r_%d_part5" % iter, "extractlow32from256(r)"),
|
||||
(10, "r_%d_part6" % iter, "extract32from256(r, 1)"),
|
||||
(12, "r_%d_part7" % iter, "extract32from256(r, 2)"),
|
||||
(14, "r_%d_part8" % iter, "extract32from256(r, 3)") ]
|
||||
|
||||
print " if (P0(isnonzero256(r_%d))) {" % (iter)
|
||||
print " m256 r_swap = swap128in256(r_%d);" % (iter)
|
||||
print " m256 r = interleave256lo(r_%d, r_swap);" % (iter)
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
for (off, val, init) in setup64:
|
||||
print " %s = %s;" % (val, init)
|
||||
|
||||
for (off, val, init) in setup64:
|
||||
self.produce_confirm(iter, val, off, 64, cautious = confirmCautious)
|
||||
print "#else"
|
||||
for (off, val, init) in setup32:
|
||||
print " %s = %s;" % (val, init)
|
||||
|
||||
for (off, val, init) in setup32:
|
||||
self.produce_confirm(iter, val, off, 32, cautious = confirmCautious)
|
||||
print "#endif"
|
||||
print " }"
|
||||
|
||||
class MTFast(MatcherBase):
|
||||
|
||||
def produce_confirm(self, cautious):
|
||||
if cautious:
|
||||
cautious_str = "VECTORING"
|
||||
else:
|
||||
cautious_str = "NOT_CAUTIOUS"
|
||||
|
||||
print " for (u32 i = 0; i < arrCnt; i++) {"
|
||||
print " byte = bitArr[i] / 8;"
|
||||
if self.packed:
|
||||
conf_split_mask = IntegerType(32).constant_to_string(
|
||||
self.conf_top_level_split - 1)
|
||||
print " bitRem = bitArr[i] % 8;"
|
||||
print " confSplit = *(ptr+byte) & 0x1f;"
|
||||
print " idx = confSplit * %d + bitRem;" % self.num_buckets
|
||||
print " cf = confBase[idx];"
|
||||
print " if (!cf)"
|
||||
print " continue;"
|
||||
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
|
||||
print " if (!(fdrc->groups & *control))"
|
||||
print " continue;"
|
||||
print " confWithBit(fdrc, a, ptr - buf + byte, %s, 0, control, &last_match);" % cautious_str
|
||||
else:
|
||||
print " cf = confBase[bitArr[i] % 8];"
|
||||
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
|
||||
print " confWithBit1(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % cautious_str
|
||||
print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {"
|
||||
print " *a->groups = controlVal;"
|
||||
print " return HWLM_TERMINATED;"
|
||||
print " }"
|
||||
print " }"
|
||||
|
||||
def produce_needed_temporaries(self, max_iterations):
|
||||
print " u32 arrCnt;"
|
||||
print " u16 bitArr[512];"
|
||||
print " m256 p_mask;"
|
||||
print " m256 val_0;"
|
||||
print " m256 val_0_lo;"
|
||||
print " m256 val_0_hi;"
|
||||
print " m256 res_0;"
|
||||
print " m256 res_1;"
|
||||
print " m128 lo_part;"
|
||||
print " m128 hi_part;"
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " u64a r_0_part;"
|
||||
print "#else"
|
||||
print " u32 r_0_part;"
|
||||
print "#endif"
|
||||
|
||||
def produce_bit_scan(self, offset, bits):
|
||||
print " while (P0(!!r_0_part)) {"
|
||||
if bits == 64:
|
||||
print " bitArr[arrCnt++] = (u16)findAndClearLSB_64(&r_0_part) + 64 * %d;" % (offset)
|
||||
else:
|
||||
print " bitArr[arrCnt++] = (u16)findAndClearLSB_32(&r_0_part) + 32 * %d;" % (offset)
|
||||
print " }"
|
||||
|
||||
def produce_bit_check_128(self, var_name, offset):
|
||||
print " if (P0(isnonzero128(%s))) {" % (var_name)
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " r_0_part = movq(%s);" % (var_name)
|
||||
self.produce_bit_scan(offset, 64)
|
||||
print " r_0_part = movq(byteShiftRight128(%s, 8));" % (var_name)
|
||||
self.produce_bit_scan(offset + 1, 64)
|
||||
print "#else"
|
||||
print " r_0_part = movd(%s);" % (var_name)
|
||||
self.produce_bit_scan(offset * 2, 32)
|
||||
for step in range(1, 4):
|
||||
print " r_0_part = movd(byteShiftRight128(%s, %d));" % (var_name, step * 4)
|
||||
self.produce_bit_scan(offset * 2 + step, 32)
|
||||
print "#endif"
|
||||
print " }"
|
||||
|
||||
def produce_bit_check_256(self, iter, single_iter, cautious):
|
||||
print " if (P0(isnonzero256(res_%d))) {" % (iter)
|
||||
if single_iter:
|
||||
print " arrCnt = 0;"
|
||||
print " lo_part = cast256to128(res_%d);" % (iter)
|
||||
print " hi_part = cast256to128(swap128in256(res_%d));" % (iter)
|
||||
self.produce_bit_check_128("lo_part", iter * 4)
|
||||
self.produce_bit_check_128("hi_part", iter * 4 + 2)
|
||||
if single_iter:
|
||||
self.produce_confirm(cautious)
|
||||
print " }"
|
||||
|
||||
def produce_one_iteration_state_calc(self, iter, cautious):
|
||||
if cautious:
|
||||
print " val_0 = vectoredLoad256(&p_mask, ptr + %d, buf+a->start_offset, buf+len, a->buf_history, a->len_history);" % (iter * 32)
|
||||
else:
|
||||
print " val_0 = load256(ptr + %d);" % (iter * 32)
|
||||
print " val_0_lo = and256(val_0, lomask);"
|
||||
print " val_0_hi = rshift4x64(val_0, 4);"
|
||||
print " val_0_hi = and256(val_0_hi, lomask);"
|
||||
print " res_%d = and256(vpshufb(maskLo , val_0_lo), vpshufb(maskHi, val_0_hi));" % (iter)
|
||||
if cautious:
|
||||
print " res_%d = and256(res_%d, p_mask);" % (iter, iter)
|
||||
|
||||
def produce_code(self):
|
||||
print self.produce_header(visible = True, header_only = False)
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
self.produce_needed_temporaries(self.num_iterations)
|
||||
|
||||
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
|
||||
print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));"
|
||||
print " const m256 maskLo = set2x128(maskBase[0]);"
|
||||
print " const m256 maskHi = set2x128(maskBase[1]);"
|
||||
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + 32);"
|
||||
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 32);"
|
||||
print " const size_t iterBytes = %d;" % (self.num_iterations * 32)
|
||||
|
||||
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
|
||||
' buf, len, a->start_offset);'
|
||||
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
|
||||
' mainStart);'
|
||||
print " const m256 lomask = set32x8(0xf);"
|
||||
|
||||
print " if (ptr < mainStart) {"
|
||||
print " ptr = mainStart - 32;"
|
||||
self.produce_one_iteration_state_calc(iter = 0, cautious = True)
|
||||
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
|
||||
print " ptr += 32;"
|
||||
print " }"
|
||||
|
||||
print " if (ptr + 32 < buf + len) {"
|
||||
self.produce_one_iteration_state_calc(iter = 0, cautious = False)
|
||||
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
|
||||
print " ptr += 32;"
|
||||
print " }"
|
||||
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print self.produce_flood_check()
|
||||
for iter in range (0, self.num_iterations):
|
||||
self.produce_one_iteration_state_calc(iter = iter, cautious = False)
|
||||
print " arrCnt = 0;"
|
||||
for iter in range (0, self.num_iterations):
|
||||
self.produce_bit_check_256(iter = iter, single_iter = False, cautious = False)
|
||||
self.produce_confirm(cautious = False)
|
||||
print " }"
|
||||
|
||||
print " for (; ptr < buf + len; ptr += 32) {"
|
||||
self.produce_one_iteration_state_calc(iter = 0, cautious = True)
|
||||
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
|
||||
print " }"
|
||||
|
||||
print self.produce_footer()
|
||||
|
||||
def get_name(self):
|
||||
if self.packed:
|
||||
pck_string = "_pck"
|
||||
else:
|
||||
pck_string = ""
|
||||
return "fdr_exec_teddy_%s_msks%d%s_fast" % (self.arch.name, self.num_masks, pck_string)
|
||||
|
||||
def produce_compile_call(self):
|
||||
packed_str = { False : "false", True : "true"}[self.packed]
|
||||
print " { %d, %s, %d, %d, %s, %d, %d }," % (
|
||||
self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str,
|
||||
self.conf_pull_back, self.conf_top_level_split)
|
||||
|
||||
def __init__(self, arch, packed = False):
|
||||
self.arch = arch
|
||||
self.packed = packed
|
||||
self.num_masks = 1
|
||||
self.num_buckets = 8
|
||||
self.num_iterations = 2
|
||||
|
||||
self.conf_top_level_split = 1
|
||||
self.conf_pull_back = 0
|
||||
if packed:
|
||||
self.conf_top_level_split = 32
|
||||
else:
|
||||
self.conf_top_level_split = 1
|
||||
self.conf_pull_back = 0
|
1110
src/fdr/teddy_avx2.c
Normal file
1110
src/fdr/teddy_avx2.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -26,7 +26,6 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
@ -65,7 +64,32 @@ bool TeddyEngineDescription::needConfirm(const vector<hwlmLiteral> &lits) const
|
||||
return false;
|
||||
}
|
||||
|
||||
#include "teddy_autogen_compiler.cpp"
|
||||
void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {
|
||||
static const TeddyEngineDef defns[] = {
|
||||
{ 1, 0 | HS_CPU_FEATURES_AVX2, 1, 8, false, 0, 1 },
|
||||
{ 2, 0 | HS_CPU_FEATURES_AVX2, 1, 8, true, 0, 32 },
|
||||
{ 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false, 0, 1 },
|
||||
{ 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true, 0, 32 },
|
||||
{ 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false, 0, 1 },
|
||||
{ 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true, 0, 32 },
|
||||
{ 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false, 0, 1 },
|
||||
{ 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true, 0, 32 },
|
||||
{ 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false, 0, 1 },
|
||||
{ 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true, 0, 32 },
|
||||
{ 11, 0, 1, 8, false, 0, 1 },
|
||||
{ 12, 0, 1, 8, true, 0, 32 },
|
||||
{ 13, 0, 2, 8, false, 0, 1 },
|
||||
{ 14, 0, 2, 8, true, 0, 32 },
|
||||
{ 15, 0, 3, 8, false, 0, 1 },
|
||||
{ 16, 0, 3, 8, true, 0, 32 },
|
||||
{ 17, 0, 4, 8, false, 0, 1 },
|
||||
{ 18, 0, 4, 8, true, 0, 32 },
|
||||
};
|
||||
out->clear();
|
||||
for (const auto &def : defns) {
|
||||
out->emplace_back(def);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
size_t maxFloodTailLen(const vector<hwlmLiteral> &vl) {
|
||||
|
256
src/fdr/teddy_runtime_common.h
Normal file
256
src/fdr/teddy_runtime_common.h
Normal file
@ -0,0 +1,256 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Teddy literal matcher: common runtime procedures.
|
||||
*/
|
||||
|
||||
#ifndef TEDDY_RUNTIME_COMMON_H_
|
||||
#define TEDDY_RUNTIME_COMMON_H_
|
||||
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_confirm_runtime.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
extern const u8 ALIGN_DIRECTIVE p_mask_arr[17][32];
|
||||
|
||||
#ifdef ARCH_64_BIT
|
||||
#define TEDDY_CONF_TYPE u64a
|
||||
#define TEDDY_FIND_AND_CLEAR_LSB(conf) findAndClearLSB_64(conf)
|
||||
#else
|
||||
#define TEDDY_CONF_TYPE u32
|
||||
#define TEDDY_FIND_AND_CLEAR_LSB(conf) findAndClearLSB_32(conf)
|
||||
#endif
|
||||
|
||||
#define CHECK_HWLM_TERMINATE_MATCHING \
|
||||
do { \
|
||||
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \
|
||||
*a->groups = controlVal; \
|
||||
return HWLM_TERMINATED; \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
#define CHECK_FLOOD \
|
||||
do { \
|
||||
if (unlikely(ptr > tryFloodDetect)) { \
|
||||
tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, \
|
||||
&floodBackoff, &controlVal, \
|
||||
iterBytes); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
/*
|
||||
* \brief Copy a block of [0,15] bytes efficiently.
|
||||
*
|
||||
* This function is a workaround intended to stop some compilers from
|
||||
* synthesizing a memcpy function call out of the copy of a small number of
|
||||
* bytes that we do in vectoredLoad128.
|
||||
*/
|
||||
static really_inline
|
||||
void copyRuntBlock128(u8 *dst, const u8 *src, size_t len) {
|
||||
switch (len) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
*dst = *src;
|
||||
break;
|
||||
case 2:
|
||||
unaligned_store_u16(dst, unaligned_load_u16(src));
|
||||
break;
|
||||
case 3:
|
||||
unaligned_store_u16(dst, unaligned_load_u16(src));
|
||||
dst[2] = src[2];
|
||||
break;
|
||||
case 4:
|
||||
unaligned_store_u32(dst, unaligned_load_u32(src));
|
||||
break;
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
/* Perform copy with two overlapping 4-byte chunks. */
|
||||
unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4));
|
||||
unaligned_store_u32(dst, unaligned_load_u32(src));
|
||||
break;
|
||||
case 8:
|
||||
unaligned_store_u64a(dst, unaligned_load_u64a(src));
|
||||
break;
|
||||
default:
|
||||
/* Perform copy with two overlapping 8-byte chunks. */
|
||||
assert(len < 16);
|
||||
unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8));
|
||||
unaligned_store_u64a(dst, unaligned_load_u64a(src));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: p_mask is an output param that initialises a poison mask.
|
||||
static really_inline
|
||||
m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history,
|
||||
const u32 nMasks) {
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
u.val128 = zeroes128();
|
||||
|
||||
uintptr_t copy_start;
|
||||
uintptr_t copy_len;
|
||||
|
||||
if (ptr >= lo) {
|
||||
uintptr_t avail = (uintptr_t)(hi - ptr);
|
||||
if (avail >= 16) {
|
||||
*p_mask = load128(p_mask_arr[16] + 16);
|
||||
return loadu128(ptr);
|
||||
}
|
||||
*p_mask = load128(p_mask_arr[avail] + 16);
|
||||
copy_start = 0;
|
||||
copy_len = avail;
|
||||
} else {
|
||||
uintptr_t need = MIN((uintptr_t)(lo - ptr),
|
||||
MIN(len_history, nMasks - 1));
|
||||
uintptr_t start = (uintptr_t)(lo - ptr);
|
||||
uintptr_t i;
|
||||
for (i = start - need; ptr + i < lo; i++) {
|
||||
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
|
||||
}
|
||||
uintptr_t end = MIN(16, (uintptr_t)(hi - ptr));
|
||||
*p_mask = loadu128(p_mask_arr[end - start] + 16 - start);
|
||||
copy_start = i;
|
||||
copy_len = end - i;
|
||||
}
|
||||
|
||||
// Runt block from the buffer.
|
||||
copyRuntBlock128(&u.val8[copy_start], &ptr[copy_start], copy_len);
|
||||
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u64a getConfVal(const struct FDR_Runtime_Args *a, const u8 *ptr, u32 byte,
|
||||
CautionReason reason) {
|
||||
u64a confVal = 0;
|
||||
const u8 *buf = a->buf;
|
||||
size_t len = a->len;
|
||||
const u8 *confirm_loc = ptr + byte - 7;
|
||||
if (likely(reason == NOT_CAUTIOUS || confirm_loc >= buf)) {
|
||||
confVal = lv_u64a(confirm_loc, buf, buf + len);
|
||||
} else { // r == VECTORING, confirm_loc < buf
|
||||
u64a histBytes = a->histBytes;
|
||||
confVal = lv_u64a_ce(confirm_loc, buf, buf + len);
|
||||
// stitch together confVal and history
|
||||
u32 overhang = buf - confirm_loc;
|
||||
histBytes >>= 64 - (overhang * 8);
|
||||
confVal |= histBytes;
|
||||
}
|
||||
return confVal;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
|
||||
const u32 *confBase, CautionReason reason,
|
||||
const struct FDR_Runtime_Args *a, const u8 *ptr,
|
||||
hwlmcb_rv_t *control, u32 *last_match) {
|
||||
do {
|
||||
u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf);
|
||||
u32 byte = bit / bucket + offset;
|
||||
u32 bitRem = bit % bucket;
|
||||
u32 confSplit = *(ptr+byte) & 0x1f;
|
||||
u32 idx = confSplit * bucket + bitRem;
|
||||
u32 cf = confBase[idx];
|
||||
if (!cf) {
|
||||
continue;
|
||||
}
|
||||
const struct FDRConfirm *fdrc = (const struct FDRConfirm *)
|
||||
((const u8 *)confBase + cf);
|
||||
if (!(fdrc->groups & *control)) {
|
||||
continue;
|
||||
}
|
||||
u64a confVal = getConfVal(a, ptr, byte, reason);
|
||||
confWithBit(fdrc, a, ptr - a->buf + byte, 0, control,
|
||||
last_match, confVal);
|
||||
} while (unlikely(*conf));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void do_confWithBit1_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
|
||||
const u32 *confBase, CautionReason reason,
|
||||
const struct FDR_Runtime_Args *a, const u8 *ptr,
|
||||
hwlmcb_rv_t *control, u32 *last_match) {
|
||||
do {
|
||||
u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf);
|
||||
u32 byte = bit / bucket + offset;
|
||||
u32 idx = bit % bucket;
|
||||
u32 cf = confBase[idx];
|
||||
const struct FDRConfirm *fdrc = (const struct FDRConfirm *)
|
||||
((const u8 *)confBase + cf);
|
||||
if (!(fdrc->groups & *control)) {
|
||||
continue;
|
||||
}
|
||||
u64a confVal = getConfVal(a, ptr, byte, reason);
|
||||
confWithBit1(fdrc, a, ptr - a->buf + byte, control, last_match,
|
||||
confVal);
|
||||
} while (unlikely(*conf));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void do_confWithBitMany_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
|
||||
const u32 *confBase, CautionReason reason,
|
||||
const struct FDR_Runtime_Args *a, const u8 *ptr,
|
||||
hwlmcb_rv_t *control, u32 *last_match) {
|
||||
do {
|
||||
u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf);
|
||||
u32 byte = bit / bucket + offset;
|
||||
u32 idx = bit % bucket;
|
||||
u32 cf = confBase[idx];
|
||||
const struct FDRConfirm *fdrc = (const struct FDRConfirm *)
|
||||
((const u8 *)confBase + cf);
|
||||
if (!(fdrc->groups & *control)) {
|
||||
continue;
|
||||
}
|
||||
u64a confVal = getConfVal(a, ptr, byte, reason);
|
||||
confWithBitMany(fdrc, a, ptr - a->buf + byte, reason, control,
|
||||
last_match, confVal);
|
||||
} while (unlikely(*conf));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const m128 * getMaskBase(const struct Teddy *teddy) {
|
||||
return (const m128 *)((const u8 *)teddy + sizeof(struct Teddy));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u32 * getConfBase(const struct Teddy *teddy, u8 numMask) {
|
||||
return (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) +
|
||||
(numMask*32));
|
||||
}
|
||||
|
||||
#endif /* TEDDY_RUNTIME_COMMON_H_ */
|
@ -54,7 +54,6 @@ Grey::Grey(void) :
|
||||
allowRose(true),
|
||||
allowExtendedNFA(true), /* bounded repeats of course */
|
||||
allowLimExNFA(true),
|
||||
allowSidecar(true),
|
||||
allowAnchoredAcyclic(true),
|
||||
allowSmallLiteralSet(true),
|
||||
allowCastle(true),
|
||||
@ -207,7 +206,6 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
||||
G_UPDATE(allowRose);
|
||||
G_UPDATE(allowExtendedNFA);
|
||||
G_UPDATE(allowLimExNFA);
|
||||
G_UPDATE(allowSidecar);
|
||||
G_UPDATE(allowAnchoredAcyclic);
|
||||
G_UPDATE(allowSmallLiteralSet);
|
||||
G_UPDATE(allowCastle);
|
||||
|
@ -54,7 +54,6 @@ struct Grey {
|
||||
bool allowRose;
|
||||
bool allowExtendedNFA;
|
||||
bool allowLimExNFA;
|
||||
bool allowSidecar;
|
||||
bool allowAnchoredAcyclic;
|
||||
bool allowSmallLiteralSet;
|
||||
bool allowCastle;
|
||||
|
18
src/hs.cpp
18
src/hs.cpp
@ -39,6 +39,7 @@
|
||||
#include "compiler/error.h"
|
||||
#include "nfagraph/ng.h"
|
||||
#include "nfagraph/ng_expr_info.h"
|
||||
#include "nfagraph/ng_extparam.h"
|
||||
#include "parser/parse_error.h"
|
||||
#include "parser/Parser.h"
|
||||
#include "parser/prefilter.h"
|
||||
@ -310,7 +311,8 @@ hs_error_t hs_compile_ext_multi(const char * const *expressions,
|
||||
|
||||
static
|
||||
hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
||||
unsigned int mode, hs_expr_info_t **info,
|
||||
const hs_expr_ext_t *ext, unsigned int mode,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error) {
|
||||
if (!error) {
|
||||
// nowhere to write an error, but we can still return an error code.
|
||||
@ -347,7 +349,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
||||
}
|
||||
|
||||
ReportManager rm(cc.grey);
|
||||
ParsedExpression pe(0, expression, flags, 0);
|
||||
ParsedExpression pe(0, expression, flags, 0, ext);
|
||||
assert(pe.component);
|
||||
|
||||
// Apply prefiltering transformations if desired.
|
||||
@ -362,6 +364,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
||||
throw ParseError("Internal error.");
|
||||
}
|
||||
|
||||
handleExtendedParams(rm, *g, cc);
|
||||
fillExpressionInfo(rm, *g, &local_info);
|
||||
}
|
||||
catch (const CompileError &e) {
|
||||
@ -394,7 +397,16 @@ extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error) {
|
||||
return hs_expression_info_int(expression, flags, HS_MODE_BLOCK, info,
|
||||
return hs_expression_info_int(expression, flags, nullptr, HS_MODE_BLOCK,
|
||||
info, error);
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags,
|
||||
const hs_expr_ext_t *ext,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error) {
|
||||
return hs_expression_info_int(expression, flags, ext, HS_MODE_BLOCK, info,
|
||||
error);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -500,6 +500,25 @@ const char *hs_version(void);
|
||||
*/
|
||||
#define HS_BAD_ALLOC (-9)
|
||||
|
||||
/**
|
||||
* The scratch region was already in use.
|
||||
*
|
||||
* This error is returned when Hyperscan is able to detect that the scratch
|
||||
* region given is already in use by another Hyperscan API call.
|
||||
*
|
||||
* A separate scratch region, allocated with @ref hs_alloc_scratch() or @ref
|
||||
* hs_clone_scratch(), is required for every concurrent caller of the Hyperscan
|
||||
* API.
|
||||
*
|
||||
* For example, this error might be returned when @ref hs_scan() has been
|
||||
* called inside a callback delivered by a currently-executing @ref hs_scan()
|
||||
* call using the same scratch region.
|
||||
*
|
||||
* Note: Not all concurrent uses of scratch regions may be detected. This error
|
||||
* is intended as a best-effort debugging tool, not a guarantee.
|
||||
*/
|
||||
#define HS_SCRATCH_IN_USE (-10)
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -158,7 +158,7 @@ typedef struct hs_platform_info {
|
||||
|
||||
/**
|
||||
* A type containing information related to an expression that is returned by
|
||||
* @ref hs_expression_info().
|
||||
* @ref hs_expression_info() or @ref hs_expression_ext_info.
|
||||
*/
|
||||
typedef struct hs_expr_info {
|
||||
/**
|
||||
@ -201,7 +201,8 @@ typedef struct hs_expr_info {
|
||||
|
||||
/**
|
||||
* A structure containing additional parameters related to an expression,
|
||||
* passed in at build time to @ref hs_compile_ext_multi().
|
||||
* passed in at build time to @ref hs_compile_ext_multi() or @ref
|
||||
* hs_expression_ext_info.
|
||||
*
|
||||
* These parameters allow the set of matches produced by a pattern to be
|
||||
* constrained at compile time, rather than relying on the application to
|
||||
@ -401,7 +402,7 @@ hs_error_t hs_compile_multi(const char *const *expressions,
|
||||
hs_database_t **db, hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* The multiple regular expression compiler with extended pattern support.
|
||||
* The multiple regular expression compiler with extended parameter support.
|
||||
*
|
||||
* This function call compiles a group of expressions into a database in the
|
||||
* same way as @ref hs_compile_multi(), but allows additional parameters to be
|
||||
@ -550,6 +551,62 @@ hs_error_t hs_expression_info(const char *expression, unsigned int flags,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* Utility function providing information about a regular expression, with
|
||||
* extended parameter support. The information provided in @ref hs_expr_info_t
|
||||
* includes the minimum and maximum width of a pattern match.
|
||||
*
|
||||
* @param expression
|
||||
* The NULL-terminated expression to parse. Note that this string must
|
||||
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
||||
* any global flags should be specified with the @a flags argument. For
|
||||
* example, the expression `/abc?def/i` should be compiled by providing
|
||||
* `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a
|
||||
* flags.
|
||||
*
|
||||
* @param flags
|
||||
* Flags which modify the behaviour of the expression. Multiple flags may
|
||||
* be used by ORing them together. Valid values are:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by the
|
||||
* expression per stream.
|
||||
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
|
||||
* empty string, such as `.*`.
|
||||
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - HS_FLAG_UCP - Use Unicode properties for character classes.
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
*
|
||||
* @param ext
|
||||
* A pointer to a filled @ref hs_expr_ext_t structure that defines
|
||||
* extended behaviour for this pattern. NULL may be specified if no
|
||||
* extended parameters are needed.
|
||||
*
|
||||
* @param info
|
||||
* On success, a pointer to the pattern information will be returned in
|
||||
* this parameter, or NULL on failure. This structure is allocated using
|
||||
* the allocator supplied in @ref hs_set_allocator() (or malloc() if no
|
||||
* allocator was set) and should be freed by the caller.
|
||||
*
|
||||
* @param error
|
||||
* If the call fails, a pointer to a @ref hs_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* hs_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||
* HS_COMPILER_ERROR on failure, with details provided in the error
|
||||
* parameter.
|
||||
*/
|
||||
hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags,
|
||||
const hs_expr_ext_t *ext,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* Populates the platform information based on the current host.
|
||||
*
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -37,7 +37,6 @@
|
||||
#include "noodle_build.h"
|
||||
#include "ue2common.h"
|
||||
#include "fdr/fdr_compile.h"
|
||||
#include "fdr/fdr.h"
|
||||
#include "nfa/shufticompile.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
@ -526,8 +525,7 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
|
||||
DEBUG_PRINTF("build noodle table\n");
|
||||
engType = HWLM_ENGINE_NOOD;
|
||||
const hwlmLiteral &lit = lits.front();
|
||||
auto noodle = noodBuildTable((const u8 *)lit.s.c_str(), lit.s.length(),
|
||||
lit.nocase, lit.id);
|
||||
auto noodle = noodBuildTable(lit);
|
||||
if (noodle) {
|
||||
engSize = noodSize(noodle.get());
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -34,13 +34,11 @@
|
||||
#include "util/compare.h" // for ourisalpha
|
||||
#include "util/ue2string.h" // for escapeString
|
||||
|
||||
#include <algorithm>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/algorithm/cxx11/all_of.hpp>
|
||||
|
||||
using namespace std;
|
||||
using namespace boost::algorithm;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
@ -91,10 +89,17 @@ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
|
||||
assert(msk.size() <= HWLM_MASKLEN);
|
||||
assert(msk.size() == cmp.size());
|
||||
|
||||
DEBUG_PRINTF("literal '%s', msk=%s, cmp=%s\n",
|
||||
escapeString(s).c_str(), dumpMask(msk).c_str(),
|
||||
// If we've been handled a nocase literal, all letter characters must be
|
||||
// upper-case.
|
||||
if (nocase) {
|
||||
upperString(s);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("literal '%s'%s, msk=%s, cmp=%s\n", escapeString(s).c_str(),
|
||||
nocase ? " (nocase)" : "", dumpMask(msk).c_str(),
|
||||
dumpMask(cmp).c_str());
|
||||
|
||||
|
||||
// Mask and compare vectors MUST be the same size.
|
||||
assert(msk.size() == cmp.size());
|
||||
|
||||
@ -102,7 +107,7 @@ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
|
||||
assert(maskIsConsistent(s, nocase, msk, cmp));
|
||||
|
||||
// In the name of good hygiene, zap msk/cmp if msk is all zeroes.
|
||||
if (all_of_equal(msk.begin(), msk.end(), 0)) {
|
||||
if (all_of(begin(msk), end(msk), [](u8 val) { return val == 0; })) {
|
||||
msk.clear();
|
||||
cmp.clear();
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -95,11 +95,6 @@ struct hwlmLiteral {
|
||||
*/
|
||||
std::vector<u8> cmp;
|
||||
|
||||
/** \brief Simple constructor: no group information, no msk/cmp. */
|
||||
hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in)
|
||||
: s(s_in), id(id_in), nocase(nocase_in), noruns(false),
|
||||
groups(HWLM_ALL_GROUPS), msk(0), cmp(0) {}
|
||||
|
||||
/** \brief Complete constructor, takes group information and msk/cmp.
|
||||
*
|
||||
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
|
||||
@ -107,6 +102,10 @@ struct hwlmLiteral {
|
||||
hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
|
||||
u32 id_in, hwlm_group_t groups_in,
|
||||
const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in);
|
||||
|
||||
/** \brief Simple constructor: no group information, no msk/cmp. */
|
||||
hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in)
|
||||
: hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {}
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -26,28 +26,35 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
/**
|
||||
* \file
|
||||
* \brief Noodle literal matcher: build code.
|
||||
*/
|
||||
#include <cstring> // for memcpy
|
||||
|
||||
#include "noodle_build.h"
|
||||
|
||||
#include "hwlm_literal.h"
|
||||
#include "noodle_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/verify_types.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <cstring> // for memcpy
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
size_t findNoodFragOffset(const u8 *lit, size_t len, bool nocase) {
|
||||
size_t findNoodFragOffset(const hwlmLiteral &lit) {
|
||||
const auto &s = lit.s;
|
||||
const size_t len = lit.s.length();
|
||||
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i + 1 < len; i++) {
|
||||
int diff = 0;
|
||||
const char c = lit[i];
|
||||
const char d = lit[i + 1];
|
||||
if (nocase && ourisalpha(c)) {
|
||||
const char c = s[i];
|
||||
const char d = s[i + 1];
|
||||
if (lit.nocase && ourisalpha(c)) {
|
||||
diff = (mytoupper(c) != mytoupper(d));
|
||||
} else {
|
||||
diff = (c != d);
|
||||
@ -60,21 +67,24 @@ size_t findNoodFragOffset(const u8 *lit, size_t len, bool nocase) {
|
||||
return offset;
|
||||
}
|
||||
|
||||
/** \brief Construct a Noodle matcher for the given literal. */
|
||||
aligned_unique_ptr<noodTable> noodBuildTable(const u8 *lit, size_t len,
|
||||
bool nocase, u32 id) {
|
||||
size_t noodle_len = sizeof(noodTable) + len;
|
||||
aligned_unique_ptr<noodTable> n =
|
||||
aligned_zmalloc_unique<noodTable>(noodle_len);
|
||||
aligned_unique_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit) {
|
||||
if (!lit.msk.empty()) {
|
||||
DEBUG_PRINTF("noodle can't handle supplementary masks\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const auto &s = lit.s;
|
||||
size_t noodle_len = sizeof(noodTable) + s.length();
|
||||
auto n = aligned_zmalloc_unique<noodTable>(noodle_len);
|
||||
assert(n);
|
||||
|
||||
size_t key_offset = findNoodFragOffset(lit, len, nocase);
|
||||
size_t key_offset = findNoodFragOffset(lit);
|
||||
|
||||
n->id = id;
|
||||
n->len = verify_u32(len);
|
||||
n->id = lit.id;
|
||||
n->len = verify_u32(s.length());
|
||||
n->key_offset = verify_u32(key_offset);
|
||||
n->nocase = nocase ? 1 : 0;
|
||||
memcpy(n->str, lit, len);
|
||||
n->nocase = lit.nocase ? 1 : 0;
|
||||
memcpy(n->str, s.c_str(), s.length());
|
||||
|
||||
return n;
|
||||
}
|
||||
|
@ -40,9 +40,10 @@ struct noodTable;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct hwlmLiteral;
|
||||
|
||||
/** \brief Construct a Noodle matcher for the given literal. */
|
||||
ue2::aligned_unique_ptr<noodTable> noodBuildTable(const u8 *lit, size_t len,
|
||||
bool nocase, u32 id);
|
||||
ue2::aligned_unique_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit);
|
||||
|
||||
size_t noodSize(const noodTable *n);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -37,6 +37,7 @@
|
||||
#include "util/compare.h"
|
||||
#include "util/masked_move.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -221,7 +221,7 @@ hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key,
|
||||
u32 z0 = movemask256(eq256(mask1, v));
|
||||
u32 z1 = movemask256(eq256(mask2, v));
|
||||
u32 z = (lastz0 | (z0 << 1)) & z1;
|
||||
lastz0 = (z0 & 0x80000000) >> 31;
|
||||
lastz0 = z0 >> 31;
|
||||
|
||||
// On large packet buffers, this prefetch appears to get us about 2%.
|
||||
__builtin_prefetch(d + 128);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -190,8 +190,8 @@ hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key,
|
||||
m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
|
||||
m128 z1 = eq128(mask1, v);
|
||||
m128 z2 = eq128(mask2, v);
|
||||
u32 z = movemask128(and128(or128(lastz1, shiftLeft8Bits(z1)), z2));
|
||||
lastz1 = _mm_srli_si128(z1, 15);
|
||||
u32 z = movemask128(and128(palignr(z1, lastz1, 15), z2));
|
||||
lastz1 = z1;
|
||||
|
||||
// On large packet buffers, this prefetch appears to get us about 2%.
|
||||
__builtin_prefetch(d + 128);
|
||||
|
234
src/nfa/accel.c
234
src/nfa/accel.c
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -30,6 +30,9 @@
|
||||
#include "shufti.h"
|
||||
#include "truffle.h"
|
||||
#include "vermicelli.h"
|
||||
#include "multishufti.h"
|
||||
#include "multitruffle.h"
|
||||
#include "multivermicelli.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
||||
@ -81,6 +84,18 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
||||
c_end - 1);
|
||||
break;
|
||||
|
||||
case ACCEL_DVERM_MASKED:
|
||||
DEBUG_PRINTF("accel dverm masked %p %p\n", c, c_end);
|
||||
if (c + 16 + 1 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
/* need to stop one early to get an accurate end state */
|
||||
rv = vermicelliDoubleMaskedExec(accel->dverm.c1, accel->dverm.c2,
|
||||
accel->dverm.m1, accel->dverm.m2,
|
||||
c, c_end - 1);
|
||||
break;
|
||||
|
||||
case ACCEL_SHUFTI:
|
||||
DEBUG_PRINTF("accel shufti %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
@ -117,6 +132,221 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
||||
rv = c_end;
|
||||
break;
|
||||
|
||||
/* multibyte matchers */
|
||||
case ACCEL_MLVERM:
|
||||
DEBUG_PRINTF("accel mlverm %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = long_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MLVERM_NOCASE:
|
||||
DEBUG_PRINTF("accel mlverm nc %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = long_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MLGVERM:
|
||||
DEBUG_PRINTF("accel mlgverm %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = longgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MLGVERM_NOCASE:
|
||||
DEBUG_PRINTF("accel mlgverm nc %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = longgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MSVERM:
|
||||
DEBUG_PRINTF("accel msverm %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = shift_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MSVERM_NOCASE:
|
||||
DEBUG_PRINTF("accel msverm nc %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = shift_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MSGVERM:
|
||||
DEBUG_PRINTF("accel msgverm %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = shiftgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MSGVERM_NOCASE:
|
||||
DEBUG_PRINTF("accel msgverm nc %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = shiftgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
||||
break;
|
||||
case ACCEL_MDSVERM:
|
||||
DEBUG_PRINTF("accel mdsverm %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = doubleshift_vermicelliExec(accel->mdverm.c, 0, c, c_end,
|
||||
accel->mdverm.len1, accel->mdverm.len2);
|
||||
break;
|
||||
case ACCEL_MDSVERM_NOCASE:
|
||||
DEBUG_PRINTF("accel mdsverm nc %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = doubleshift_vermicelliExec(accel->mdverm.c, 1, c, c_end,
|
||||
accel->mdverm.len1, accel->mdverm.len2);
|
||||
break;
|
||||
case ACCEL_MDSGVERM:
|
||||
DEBUG_PRINTF("accel mdsgverm %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 0, c, c_end,
|
||||
accel->mdverm.len1, accel->mdverm.len2);
|
||||
break;
|
||||
case ACCEL_MDSGVERM_NOCASE:
|
||||
DEBUG_PRINTF("accel mdsgverm nc %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 1, c, c_end,
|
||||
accel->mdverm.len1, accel->mdverm.len2);
|
||||
break;
|
||||
case ACCEL_MLSHUFTI:
|
||||
DEBUG_PRINTF("accel mlshufti %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = long_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
|
||||
accel->mshufti.len);
|
||||
break;
|
||||
case ACCEL_MLGSHUFTI:
|
||||
DEBUG_PRINTF("accel mlgshufti %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = longgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
|
||||
accel->mshufti.len);
|
||||
break;
|
||||
case ACCEL_MSSHUFTI:
|
||||
DEBUG_PRINTF("accel msshufti %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = shift_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
|
||||
accel->mshufti.len);
|
||||
break;
|
||||
case ACCEL_MSGSHUFTI:
|
||||
DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = shiftgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
|
||||
accel->mshufti.len);
|
||||
break;
|
||||
case ACCEL_MDSSHUFTI:
|
||||
DEBUG_PRINTF("accel mdsshufti %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = doubleshift_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end,
|
||||
accel->mdshufti.len1, accel->mdshufti.len2);
|
||||
break;
|
||||
case ACCEL_MDSGSHUFTI:
|
||||
DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = doubleshiftgrab_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end,
|
||||
accel->mdshufti.len1, accel->mdshufti.len2);
|
||||
break;
|
||||
case ACCEL_MLTRUFFLE:
|
||||
DEBUG_PRINTF("accel mltruffle %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = long_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
|
||||
c, c_end, accel->mtruffle.len);
|
||||
break;
|
||||
case ACCEL_MLGTRUFFLE:
|
||||
DEBUG_PRINTF("accel mlgtruffle %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = longgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
|
||||
c, c_end, accel->mtruffle.len);
|
||||
break;
|
||||
case ACCEL_MSTRUFFLE:
|
||||
DEBUG_PRINTF("accel mstruffle %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = shift_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
|
||||
c, c_end, accel->mtruffle.len);
|
||||
break;
|
||||
case ACCEL_MSGTRUFFLE:
|
||||
DEBUG_PRINTF("accel msgtruffle %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = shiftgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
|
||||
c, c_end, accel->mtruffle.len);
|
||||
break;
|
||||
case ACCEL_MDSTRUFFLE:
|
||||
DEBUG_PRINTF("accel mdstruffle %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = doubleshift_truffleExec(accel->mdtruffle.mask1,
|
||||
accel->mdtruffle.mask2, c, c_end,
|
||||
accel->mdtruffle.len1,
|
||||
accel->mdtruffle.len2);
|
||||
break;
|
||||
case ACCEL_MDSGTRUFFLE:
|
||||
DEBUG_PRINTF("accel mdsgtruffle %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = doubleshiftgrab_truffleExec(accel->mdtruffle.mask1,
|
||||
accel->mdtruffle.mask2, c, c_end,
|
||||
accel->mdtruffle.len1,
|
||||
accel->mdtruffle.len2);
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
assert(!"not here");
|
||||
return c;
|
||||
@ -127,5 +357,7 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
||||
rv = MAX(c + accel->generic.offset, rv);
|
||||
rv -= accel->generic.offset;
|
||||
|
||||
DEBUG_PRINTF("advanced %zd\n", rv - c);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -60,7 +60,37 @@ enum AccelType {
|
||||
ACCEL_SHUFTI,
|
||||
ACCEL_DSHUFTI,
|
||||
ACCEL_TRUFFLE,
|
||||
ACCEL_RED_TAPE
|
||||
ACCEL_RED_TAPE,
|
||||
/* multibyte vermicellis */
|
||||
ACCEL_MLVERM,
|
||||
ACCEL_MLVERM_NOCASE,
|
||||
ACCEL_MLGVERM,
|
||||
ACCEL_MLGVERM_NOCASE,
|
||||
ACCEL_MSVERM,
|
||||
ACCEL_MSVERM_NOCASE,
|
||||
ACCEL_MSGVERM,
|
||||
ACCEL_MSGVERM_NOCASE,
|
||||
ACCEL_MDSVERM,
|
||||
ACCEL_MDSVERM_NOCASE,
|
||||
ACCEL_MDSGVERM,
|
||||
ACCEL_MDSGVERM_NOCASE,
|
||||
/* multibyte shuftis */
|
||||
ACCEL_MLSHUFTI,
|
||||
ACCEL_MLGSHUFTI,
|
||||
ACCEL_MSSHUFTI,
|
||||
ACCEL_MSGSHUFTI,
|
||||
ACCEL_MDSSHUFTI,
|
||||
ACCEL_MDSGSHUFTI,
|
||||
/* multibyte truffles */
|
||||
ACCEL_MLTRUFFLE,
|
||||
ACCEL_MLGTRUFFLE,
|
||||
ACCEL_MSTRUFFLE,
|
||||
ACCEL_MSGTRUFFLE,
|
||||
ACCEL_MDSTRUFFLE,
|
||||
ACCEL_MDSGTRUFFLE,
|
||||
/* masked dverm */
|
||||
ACCEL_DVERM_MASKED,
|
||||
|
||||
};
|
||||
|
||||
/** \brief Structure for accel framework. */
|
||||
@ -80,7 +110,22 @@ union AccelAux {
|
||||
u8 offset;
|
||||
u8 c1; // uppercase if nocase
|
||||
u8 c2; // uppercase if nocase
|
||||
u8 m1; // masked variant
|
||||
u8 m2; // masked variant
|
||||
} dverm;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
u8 c; // uppercase if nocase
|
||||
u8 len;
|
||||
} mverm;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
u8 c; // uppercase if nocase
|
||||
u8 len1;
|
||||
u8 len2;
|
||||
} mdverm;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
@ -95,12 +140,42 @@ union AccelAux {
|
||||
m128 lo2;
|
||||
m128 hi2;
|
||||
} dshufti;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
m128 lo;
|
||||
m128 hi;
|
||||
u8 len;
|
||||
} mshufti;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
m128 lo;
|
||||
m128 hi;
|
||||
u8 len1;
|
||||
u8 len2;
|
||||
} mdshufti;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
m128 mask1;
|
||||
m128 mask2;
|
||||
} truffle;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
m128 mask1;
|
||||
m128 mask2;
|
||||
u8 len;
|
||||
} mtruffle;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
m128 mask1;
|
||||
m128 mask2;
|
||||
u8 len1;
|
||||
u8 len2;
|
||||
} mdtruffle;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -37,16 +37,21 @@
|
||||
#include "shufticompile.h"
|
||||
#include "trufflecompile.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/dump_mask.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
@ -62,6 +67,8 @@ const char *accelName(u8 accel_type) {
|
||||
return "double-vermicelli";
|
||||
case ACCEL_DVERM_NOCASE:
|
||||
return "double-vermicelli nocase";
|
||||
case ACCEL_DVERM_MASKED:
|
||||
return "double-vermicelli masked";
|
||||
case ACCEL_RVERM:
|
||||
return "reverse vermicelli";
|
||||
case ACCEL_RVERM_NOCASE:
|
||||
@ -86,11 +93,144 @@ const char *accelName(u8 accel_type) {
|
||||
return "truffle";
|
||||
case ACCEL_RED_TAPE:
|
||||
return "red tape";
|
||||
case ACCEL_MLVERM:
|
||||
return "multibyte long vermicelli";
|
||||
case ACCEL_MLVERM_NOCASE:
|
||||
return "multibyte long vermicelli nocase";
|
||||
case ACCEL_MLGVERM:
|
||||
return "multibyte long-grab vermicelli";
|
||||
case ACCEL_MLGVERM_NOCASE:
|
||||
return "multibyte long-grab vermicelli nocase";
|
||||
case ACCEL_MSVERM:
|
||||
return "multibyte shift vermicelli";
|
||||
case ACCEL_MSVERM_NOCASE:
|
||||
return "multibyte shift vermicelli nocase";
|
||||
case ACCEL_MSGVERM:
|
||||
return "multibyte shift-grab vermicelli";
|
||||
case ACCEL_MSGVERM_NOCASE:
|
||||
return "multibyte shift-grab vermicelli nocase";
|
||||
case ACCEL_MDSVERM:
|
||||
return "multibyte doubleshift vermicelli";
|
||||
case ACCEL_MDSVERM_NOCASE:
|
||||
return "multibyte doubleshift vermicelli nocase";
|
||||
case ACCEL_MDSGVERM:
|
||||
return "multibyte doubleshift-grab vermicelli";
|
||||
case ACCEL_MDSGVERM_NOCASE:
|
||||
return "multibyte doubleshift-grab vermicelli nocase";
|
||||
case ACCEL_MLSHUFTI:
|
||||
return "multibyte long shufti";
|
||||
case ACCEL_MLGSHUFTI:
|
||||
return "multibyte long-grab shufti";
|
||||
case ACCEL_MSSHUFTI:
|
||||
return "multibyte shift shufti";
|
||||
case ACCEL_MSGSHUFTI:
|
||||
return "multibyte shift-grab shufti";
|
||||
case ACCEL_MDSSHUFTI:
|
||||
return "multibyte doubleshift shufti";
|
||||
case ACCEL_MDSGSHUFTI:
|
||||
return "multibyte doubleshift-grab shufti";
|
||||
case ACCEL_MLTRUFFLE:
|
||||
return "multibyte long truffle";
|
||||
case ACCEL_MLGTRUFFLE:
|
||||
return "multibyte long-grab truffle";
|
||||
case ACCEL_MSTRUFFLE:
|
||||
return "multibyte shift truffle";
|
||||
case ACCEL_MSGTRUFFLE:
|
||||
return "multibyte shift-grab truffle";
|
||||
case ACCEL_MDSTRUFFLE:
|
||||
return "multibyte doubleshift truffle";
|
||||
case ACCEL_MDSGTRUFFLE:
|
||||
return "multibyte doubleshift-grab truffle";
|
||||
default:
|
||||
return "unknown!";
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dumpShuftiCharReach(FILE *f, const m128 &lo, const m128 &hi) {
|
||||
CharReach cr = shufti2cr(lo, hi);
|
||||
fprintf(f, "count %zu class %s\n", cr.count(),
|
||||
describeClass(cr).c_str());
|
||||
}
|
||||
|
||||
static
|
||||
vector<CharReach> shufti2cr_array(const m128 lo_in, const m128 hi_in) {
|
||||
const u8 *lo = (const u8 *)&lo_in;
|
||||
const u8 *hi = (const u8 *)&hi_in;
|
||||
vector<CharReach> crs(8);
|
||||
for (u32 i = 0; i < 256; i++) {
|
||||
u32 combined = lo[(u8)i & 0xf] & hi[(u8)i >> 4];
|
||||
while (combined) {
|
||||
u32 j = findAndClearLSB_32(&combined);
|
||||
crs.at(j).set(i);
|
||||
}
|
||||
}
|
||||
return crs;
|
||||
}
|
||||
|
||||
static
|
||||
void dumpDShuftiCharReach(FILE *f, const m128 &lo1, const m128 &hi1,
|
||||
const m128 &lo2, const m128 &hi2) {
|
||||
vector<CharReach> cr1 = shufti2cr_array(not128(lo1), not128(hi1));
|
||||
vector<CharReach> cr2 = shufti2cr_array(not128(lo2), not128(hi2));
|
||||
map<CharReach, set<u32> > cr1_group;
|
||||
assert(cr1.size() == 8 && cr2.size() == 8);
|
||||
for (u32 i = 0; i < 8; i++) {
|
||||
if (!cr1[i].any()) {
|
||||
continue;
|
||||
}
|
||||
cr1_group[cr1[i]].insert(i);
|
||||
}
|
||||
map<CharReach, CharReach> rev;
|
||||
for (const auto &e : cr1_group) {
|
||||
CharReach rhs;
|
||||
for (u32 r : e.second) {
|
||||
rhs |= cr2.at(r);
|
||||
}
|
||||
|
||||
rev[rhs] |= e.first;
|
||||
}
|
||||
fprintf(f, "escapes: {");
|
||||
for (auto it = rev.begin(); it != rev.end(); ++it) {
|
||||
const auto &e = *it;
|
||||
if (it != rev.begin()) {
|
||||
fprintf(f, ", ");
|
||||
}
|
||||
|
||||
if (e.first.all()) {
|
||||
fprintf(f, "%s", describeClass(e.second).c_str());
|
||||
} else {
|
||||
fprintf(f, "%s%s", describeClass(e.second).c_str(),
|
||||
describeClass(e.first).c_str());
|
||||
}
|
||||
}
|
||||
fprintf(f, "}\n");
|
||||
}
|
||||
|
||||
static
|
||||
void dumpShuftiMasks(FILE *f, const m128 &lo, const m128 &hi) {
|
||||
fprintf(f, "lo %s\n",
|
||||
dumpMask((const u8 *)&lo, 128).c_str());
|
||||
fprintf(f, "hi %s\n",
|
||||
dumpMask((const u8 *)&hi, 128).c_str());
|
||||
}
|
||||
|
||||
static
|
||||
void dumpTruffleCharReach(FILE *f, const m128 &hiset, const m128 &hiclear) {
|
||||
CharReach cr = truffle2cr(hiset, hiclear);
|
||||
fprintf(f, "count %zu class %s\n", cr.count(),
|
||||
describeClass(cr).c_str());
|
||||
}
|
||||
|
||||
static
|
||||
void dumpTruffleMasks(FILE *f, const m128 &hiset, const m128 &hiclear) {
|
||||
fprintf(f, "lo %s\n",
|
||||
dumpMask((const u8 *)&hiset, 128).c_str());
|
||||
fprintf(f, "hi %s\n",
|
||||
dumpMask((const u8 *)&hiclear, 128).c_str());
|
||||
}
|
||||
|
||||
|
||||
void dumpAccelInfo(FILE *f, const AccelAux &accel) {
|
||||
fprintf(f, " %s", accelName(accel.accel_type));
|
||||
if (accel.generic.offset) {
|
||||
@ -110,39 +250,76 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) {
|
||||
case ACCEL_RDVERM_NOCASE:
|
||||
fprintf(f, " [\\x%02hhx\\x%02hhx]\n", accel.dverm.c1, accel.dverm.c2);
|
||||
break;
|
||||
case ACCEL_DVERM_MASKED:
|
||||
fprintf(f, " [\\x%02hhx\\x%02hhx] & [\\x%02hhx\\x%02hhx]\n",
|
||||
accel.dverm.c1, accel.dverm.c2, accel.dverm.m1, accel.dverm.m2);
|
||||
break;
|
||||
case ACCEL_SHUFTI: {
|
||||
fprintf(f, "\n");
|
||||
fprintf(f, "lo %s\n",
|
||||
dumpMask((const u8 *)&accel.shufti.lo, 128).c_str());
|
||||
fprintf(f, "hi %s\n",
|
||||
dumpMask((const u8 *)&accel.shufti.hi, 128).c_str());
|
||||
CharReach cr = shufti2cr(accel.shufti.lo, accel.shufti.hi);
|
||||
fprintf(f, "count %zu class %s\n", cr.count(),
|
||||
describeClass(cr).c_str());
|
||||
dumpShuftiMasks(f, accel.shufti.lo, accel.shufti.hi);
|
||||
dumpShuftiCharReach(f, accel.shufti.lo, accel.shufti.hi);
|
||||
break;
|
||||
}
|
||||
case ACCEL_DSHUFTI:
|
||||
fprintf(f, "\n");
|
||||
fprintf(f, "lo1 %s\n",
|
||||
dumpMask((const u8 *)&accel.dshufti.lo1, 128).c_str());
|
||||
fprintf(f, "hi1 %s\n",
|
||||
dumpMask((const u8 *)&accel.dshufti.hi1, 128).c_str());
|
||||
fprintf(f, "lo2 %s\n",
|
||||
dumpMask((const u8 *)&accel.dshufti.lo2, 128).c_str());
|
||||
fprintf(f, "hi2 %s\n",
|
||||
dumpMask((const u8 *)&accel.dshufti.hi2, 128).c_str());
|
||||
fprintf(f, "mask 1\n");
|
||||
dumpShuftiMasks(f, accel.dshufti.lo1, accel.dshufti.hi1);
|
||||
fprintf(f, "mask 2\n");
|
||||
dumpShuftiMasks(f, accel.dshufti.lo2, accel.dshufti.hi2);
|
||||
dumpDShuftiCharReach(f, accel.dshufti.lo1, accel.dshufti.hi1,
|
||||
accel.dshufti.lo2, accel.dshufti.hi2);
|
||||
break;
|
||||
case ACCEL_TRUFFLE: {
|
||||
fprintf(f, "\n");
|
||||
fprintf(f, "lo %s\n",
|
||||
dumpMask((const u8 *)&accel.truffle.mask1, 128).c_str());
|
||||
fprintf(f, "hi %s\n",
|
||||
dumpMask((const u8 *)&accel.truffle.mask2, 128).c_str());
|
||||
CharReach cr = truffle2cr(accel.truffle.mask1, accel.truffle.mask2);
|
||||
fprintf(f, "count %zu class %s\n", cr.count(),
|
||||
describeClass(cr).c_str());
|
||||
dumpTruffleMasks(f, accel.truffle.mask1, accel.truffle.mask2);
|
||||
dumpTruffleCharReach(f, accel.truffle.mask1, accel.truffle.mask2);
|
||||
break;
|
||||
}
|
||||
case ACCEL_MLVERM:
|
||||
case ACCEL_MLVERM_NOCASE:
|
||||
case ACCEL_MLGVERM:
|
||||
case ACCEL_MLGVERM_NOCASE:
|
||||
case ACCEL_MSVERM:
|
||||
case ACCEL_MSVERM_NOCASE:
|
||||
case ACCEL_MSGVERM:
|
||||
case ACCEL_MSGVERM_NOCASE:
|
||||
fprintf(f, " [\\x%02hhx] len:%u\n", accel.mverm.c, accel.mverm.len);
|
||||
break;
|
||||
case ACCEL_MDSVERM:
|
||||
case ACCEL_MDSVERM_NOCASE:
|
||||
case ACCEL_MDSGVERM:
|
||||
case ACCEL_MDSGVERM_NOCASE:
|
||||
fprintf(f, " [\\x%02hhx] len1:%u len2:%u\n", accel.mdverm.c, accel.mdverm.len1,
|
||||
accel.mdverm.len2);
|
||||
break;
|
||||
case ACCEL_MLSHUFTI:
|
||||
case ACCEL_MLGSHUFTI:
|
||||
case ACCEL_MSSHUFTI:
|
||||
case ACCEL_MSGSHUFTI:
|
||||
fprintf(f, " len:%u\n", accel.mshufti.len);
|
||||
dumpShuftiMasks(f, accel.mshufti.lo, accel.mshufti.hi);
|
||||
dumpShuftiCharReach(f, accel.mshufti.lo, accel.mshufti.hi);
|
||||
break;
|
||||
case ACCEL_MDSSHUFTI:
|
||||
case ACCEL_MDSGSHUFTI:
|
||||
fprintf(f, " len1:%u len2:%u\n", accel.mdshufti.len1, accel.mdshufti.len2);
|
||||
dumpShuftiMasks(f, accel.mdshufti.lo, accel.mdshufti.hi);
|
||||
dumpShuftiCharReach(f, accel.mdshufti.lo, accel.mdshufti.hi);
|
||||
break;
|
||||
case ACCEL_MLTRUFFLE:
|
||||
case ACCEL_MLGTRUFFLE:
|
||||
case ACCEL_MSTRUFFLE:
|
||||
case ACCEL_MSGTRUFFLE:
|
||||
fprintf(f, " len:%u\n", accel.mtruffle.len);
|
||||
dumpTruffleMasks(f, accel.mtruffle.mask1, accel.mtruffle.mask2);
|
||||
dumpTruffleCharReach(f, accel.mtruffle.mask1, accel.mtruffle.mask2);
|
||||
break;
|
||||
case ACCEL_MDSTRUFFLE:
|
||||
case ACCEL_MDSGTRUFFLE:
|
||||
fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2);
|
||||
dumpTruffleMasks(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2);
|
||||
dumpTruffleCharReach(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2);
|
||||
break;
|
||||
default:
|
||||
fprintf(f, "\n");
|
||||
break;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -94,6 +94,47 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
|
||||
DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs);
|
||||
}
|
||||
|
||||
bool buildDvermMask(const flat_set<pair<u8, u8>> &escape_set, u8 *m1_out,
|
||||
u8 *m2_out) {
|
||||
u8 a1 = 0xff;
|
||||
u8 a2 = 0xff;
|
||||
u8 b1 = 0xff;
|
||||
u8 b2 = 0xff;
|
||||
|
||||
for (const auto &e : escape_set) {
|
||||
DEBUG_PRINTF("%0hhx %0hhx\n", e.first, e.second);
|
||||
a1 &= e.first;
|
||||
b1 &= ~e.first;
|
||||
a2 &= e.second;
|
||||
b2 &= ~e.second;
|
||||
}
|
||||
|
||||
u8 m1 = a1 | b1;
|
||||
u8 m2 = a2 | b2;
|
||||
|
||||
u32 holes1 = 8 - popcount32(m1);
|
||||
u32 holes2 = 8 - popcount32(m2);
|
||||
|
||||
DEBUG_PRINTF("aaaa %0hhx %0hhx\n", a1, a2);
|
||||
DEBUG_PRINTF("bbbb %0hhx %0hhx\n", b1, b2);
|
||||
DEBUG_PRINTF("mask %0hhx %0hhx\n", m1, m2);
|
||||
|
||||
assert(holes1 <= 8 && holes2 <= 8);
|
||||
assert(escape_set.size() <= 1U << (holes1 + holes2));
|
||||
if (escape_set.size() != 1U << (holes1 + holes2)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m1_out) {
|
||||
*m1_out = m1;
|
||||
}
|
||||
if (m2_out) {
|
||||
*m2_out = m2;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool isCaselessDouble(const flat_set<pair<u8, u8>> &stop) {
|
||||
// test for vector containing <A,Z> <A,z> <a,Z> <a,z>
|
||||
@ -149,17 +190,31 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (outs1 + outs2 <= 8) {
|
||||
if (outs1 == 0) {
|
||||
u8 m1;
|
||||
u8 m2;
|
||||
|
||||
if (buildDvermMask(info.double_stop2, &m1, &m2)) {
|
||||
aux->accel_type = ACCEL_DVERM_MASKED;
|
||||
aux->dverm.offset = offset;
|
||||
aux->dverm.c1 = info.double_stop2.begin()->first & m1;
|
||||
aux->dverm.c2 = info.double_stop2.begin()->second & m2;
|
||||
aux->dverm.m1 = m1;
|
||||
aux->dverm.m2 = m2;
|
||||
DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
|
||||
aux->dverm.c1, aux->dverm.c2);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438.
|
||||
DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu"
|
||||
" two-byte literals\n", outs1, outs2);
|
||||
aux->accel_type = ACCEL_DSHUFTI;
|
||||
aux->dshufti.offset = offset;
|
||||
shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2,
|
||||
&aux->dshufti.lo1,
|
||||
&aux->dshufti.hi1,
|
||||
&aux->dshufti.lo2,
|
||||
&aux->dshufti.hi2);
|
||||
if (shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2,
|
||||
&aux->dshufti.lo1, &aux->dshufti.hi1,
|
||||
&aux->dshufti.lo2, &aux->dshufti.hi2)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -169,13 +224,285 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
|
||||
aux->accel_type = ACCEL_NONE;
|
||||
}
|
||||
|
||||
static
|
||||
void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
|
||||
if (info.ma_type == MultibyteAccelInfo::MAT_NONE) {
|
||||
DEBUG_PRINTF("no multimatch for us :(");
|
||||
return;
|
||||
}
|
||||
|
||||
u32 offset = info.multiaccel_offset;
|
||||
const CharReach &stops = info.multiaccel_stops;
|
||||
|
||||
assert(aux->accel_type == ACCEL_NONE);
|
||||
if (stops.all()) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t outs = stops.count();
|
||||
DEBUG_PRINTF("%zu outs\n", outs);
|
||||
assert(outs && outs < 256);
|
||||
|
||||
switch (info.ma_type) {
|
||||
case MultibyteAccelInfo::MAT_LONG:
|
||||
if (outs == 1) {
|
||||
aux->accel_type = ACCEL_MLVERM;
|
||||
aux->mverm.offset = offset;
|
||||
aux->mverm.c = stops.find_first();
|
||||
aux->mverm.len = info.ma_len1;
|
||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
||||
return;
|
||||
}
|
||||
if (outs == 2 && stops.isCaselessChar()) {
|
||||
aux->accel_type = ACCEL_MLVERM_NOCASE;
|
||||
aux->mverm.offset = offset;
|
||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
||||
aux->mverm.len = info.ma_len1;
|
||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
||||
aux->verm.c);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
||||
if (outs == 1) {
|
||||
aux->accel_type = ACCEL_MLGVERM;
|
||||
aux->mverm.offset = offset;
|
||||
aux->mverm.c = stops.find_first();
|
||||
aux->mverm.len = info.ma_len1;
|
||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
||||
return;
|
||||
}
|
||||
if (outs == 2 && stops.isCaselessChar()) {
|
||||
aux->accel_type = ACCEL_MLGVERM_NOCASE;
|
||||
aux->mverm.offset = offset;
|
||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
||||
aux->mverm.len = info.ma_len1;
|
||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
||||
aux->verm.c);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_SHIFT:
|
||||
if (outs == 1) {
|
||||
aux->accel_type = ACCEL_MSVERM;
|
||||
aux->mverm.offset = offset;
|
||||
aux->mverm.c = stops.find_first();
|
||||
aux->mverm.len = info.ma_len1;
|
||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
||||
return;
|
||||
}
|
||||
if (outs == 2 && stops.isCaselessChar()) {
|
||||
aux->accel_type = ACCEL_MSVERM_NOCASE;
|
||||
aux->mverm.offset = offset;
|
||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
||||
aux->mverm.len = info.ma_len1;
|
||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
||||
aux->verm.c);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
||||
if (outs == 1) {
|
||||
aux->accel_type = ACCEL_MSGVERM;
|
||||
aux->mverm.offset = offset;
|
||||
aux->mverm.c = stops.find_first();
|
||||
aux->mverm.len = info.ma_len1;
|
||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
||||
return;
|
||||
}
|
||||
if (outs == 2 && stops.isCaselessChar()) {
|
||||
aux->accel_type = ACCEL_MSGVERM_NOCASE;
|
||||
aux->mverm.offset = offset;
|
||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
||||
aux->mverm.len = info.ma_len1;
|
||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
||||
aux->verm.c);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
||||
if (outs == 1) {
|
||||
aux->accel_type = ACCEL_MDSVERM;
|
||||
aux->mdverm.offset = offset;
|
||||
aux->mdverm.c = stops.find_first();
|
||||
aux->mdverm.len1 = info.ma_len1;
|
||||
aux->mdverm.len2 = info.ma_len2;
|
||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
||||
return;
|
||||
}
|
||||
if (outs == 2 && stops.isCaselessChar()) {
|
||||
aux->accel_type = ACCEL_MDSVERM_NOCASE;
|
||||
aux->mverm.offset = offset;
|
||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
||||
aux->mdverm.len1 = info.ma_len1;
|
||||
aux->mdverm.len2 = info.ma_len2;
|
||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
||||
aux->verm.c);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
||||
if (outs == 1) {
|
||||
aux->accel_type = ACCEL_MDSGVERM;
|
||||
aux->mdverm.offset = offset;
|
||||
aux->mdverm.c = stops.find_first();
|
||||
aux->mdverm.len1 = info.ma_len1;
|
||||
aux->mdverm.len2 = info.ma_len2;
|
||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
||||
return;
|
||||
}
|
||||
if (outs == 2 && stops.isCaselessChar()) {
|
||||
aux->accel_type = ACCEL_MDSGVERM_NOCASE;
|
||||
aux->mverm.offset = offset;
|
||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
||||
aux->mdverm.len1 = info.ma_len1;
|
||||
aux->mdverm.len2 = info.ma_len2;
|
||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
||||
aux->verm.c);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// shouldn't happen
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
|
||||
|
||||
switch (info.ma_type) {
|
||||
case MultibyteAccelInfo::MAT_LONG:
|
||||
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
|
||||
&aux->mshufti.hi) == -1) {
|
||||
break;
|
||||
}
|
||||
aux->accel_type = ACCEL_MLSHUFTI;
|
||||
aux->mshufti.offset = offset;
|
||||
aux->mshufti.len = info.ma_len1;
|
||||
return;
|
||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
||||
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
|
||||
&aux->mshufti.hi) == -1) {
|
||||
break;
|
||||
}
|
||||
aux->accel_type = ACCEL_MLGSHUFTI;
|
||||
aux->mshufti.offset = offset;
|
||||
aux->mshufti.len = info.ma_len1;
|
||||
return;
|
||||
case MultibyteAccelInfo::MAT_SHIFT:
|
||||
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
|
||||
&aux->mshufti.hi) == -1) {
|
||||
break;
|
||||
}
|
||||
aux->accel_type = ACCEL_MSSHUFTI;
|
||||
aux->mshufti.offset = offset;
|
||||
aux->mshufti.len = info.ma_len1;
|
||||
return;
|
||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
||||
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
|
||||
&aux->mshufti.hi) == -1) {
|
||||
break;
|
||||
}
|
||||
aux->accel_type = ACCEL_MSGSHUFTI;
|
||||
aux->mshufti.offset = offset;
|
||||
aux->mshufti.len = info.ma_len1;
|
||||
return;
|
||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
||||
if (shuftiBuildMasks(stops, &aux->mdshufti.lo,
|
||||
&aux->mdshufti.hi) == -1) {
|
||||
break;
|
||||
}
|
||||
aux->accel_type = ACCEL_MDSSHUFTI;
|
||||
aux->mdshufti.offset = offset;
|
||||
aux->mdshufti.len1 = info.ma_len1;
|
||||
aux->mdshufti.len2 = info.ma_len2;
|
||||
return;
|
||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
||||
if (shuftiBuildMasks(stops, &aux->mdshufti.lo,
|
||||
&aux->mdshufti.hi) == -1) {
|
||||
break;
|
||||
}
|
||||
aux->accel_type = ACCEL_MDSGSHUFTI;
|
||||
aux->mdshufti.offset = offset;
|
||||
aux->mdshufti.len1 = info.ma_len1;
|
||||
aux->mdshufti.len2 = info.ma_len2;
|
||||
return;
|
||||
default:
|
||||
// shouldn't happen
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
DEBUG_PRINTF("shufti build failed, falling through\n");
|
||||
|
||||
if (outs <= ACCEL_MAX_STOP_CHAR) {
|
||||
DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
|
||||
switch (info.ma_type) {
|
||||
case MultibyteAccelInfo::MAT_LONG:
|
||||
aux->accel_type = ACCEL_MLTRUFFLE;
|
||||
aux->mtruffle.offset = offset;
|
||||
aux->mtruffle.len = info.ma_len1;
|
||||
truffleBuildMasks(stops, &aux->mtruffle.mask1,
|
||||
&aux->mtruffle.mask2);
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
||||
aux->accel_type = ACCEL_MLGTRUFFLE;
|
||||
aux->mtruffle.offset = offset;
|
||||
aux->mtruffle.len = info.ma_len1;
|
||||
truffleBuildMasks(stops, &aux->mtruffle.mask1,
|
||||
&aux->mtruffle.mask2);
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_SHIFT:
|
||||
aux->accel_type = ACCEL_MSTRUFFLE;
|
||||
aux->mtruffle.offset = offset;
|
||||
aux->mtruffle.len = info.ma_len1;
|
||||
truffleBuildMasks(stops, &aux->mtruffle.mask1,
|
||||
&aux->mtruffle.mask2);
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
||||
aux->accel_type = ACCEL_MSGTRUFFLE;
|
||||
aux->mtruffle.offset = offset;
|
||||
aux->mtruffle.len = info.ma_len1;
|
||||
truffleBuildMasks(stops, &aux->mtruffle.mask1,
|
||||
&aux->mtruffle.mask2);
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
||||
aux->accel_type = ACCEL_MDSTRUFFLE;
|
||||
aux->mdtruffle.offset = offset;
|
||||
aux->mdtruffle.len1 = info.ma_len1;
|
||||
aux->mdtruffle.len2 = info.ma_len2;
|
||||
truffleBuildMasks(stops, &aux->mtruffle.mask1,
|
||||
&aux->mdtruffle.mask2);
|
||||
break;
|
||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
||||
aux->accel_type = ACCEL_MDSGTRUFFLE;
|
||||
aux->mdtruffle.offset = offset;
|
||||
aux->mdtruffle.len1 = info.ma_len1;
|
||||
aux->mdtruffle.len2 = info.ma_len2;
|
||||
truffleBuildMasks(stops, &aux->mtruffle.mask1,
|
||||
&aux->mdtruffle.mask2);
|
||||
break;
|
||||
default:
|
||||
// shouldn't happen
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("unable to accelerate multibyte case with %zu outs\n", outs);
|
||||
}
|
||||
|
||||
bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
|
||||
assert(aux->accel_type == ACCEL_NONE);
|
||||
if (info.single_stops.none()) {
|
||||
DEBUG_PRINTF("picked red tape\n");
|
||||
aux->accel_type = ACCEL_RED_TAPE;
|
||||
aux->generic.offset = info.single_offset;
|
||||
} else {
|
||||
}
|
||||
if (aux->accel_type == ACCEL_NONE) {
|
||||
buildAccelMulti(info, aux);
|
||||
}
|
||||
if (aux->accel_type == ACCEL_NONE) {
|
||||
buildAccelDouble(info, aux);
|
||||
}
|
||||
if (aux->accel_type == ACCEL_NONE) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -37,9 +37,30 @@ union AccelAux;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct MultibyteAccelInfo {
|
||||
/* multibyte accel schemes, ordered by strength */
|
||||
enum multiaccel_type {
|
||||
MAT_SHIFT,
|
||||
MAT_SHIFTGRAB,
|
||||
MAT_DSHIFT,
|
||||
MAT_DSHIFTGRAB,
|
||||
MAT_LONG,
|
||||
MAT_LONGGRAB,
|
||||
MAT_MAX,
|
||||
MAT_NONE = MAT_MAX
|
||||
};
|
||||
CharReach cr;
|
||||
u32 offset = 0;
|
||||
u32 len1 = 0;
|
||||
u32 len2 = 0;
|
||||
multiaccel_type type = MAT_NONE;
|
||||
};
|
||||
|
||||
struct AccelInfo {
|
||||
AccelInfo() : single_offset(0U), double_offset(0U),
|
||||
single_stops(CharReach::dot()) {}
|
||||
single_stops(CharReach::dot()),
|
||||
multiaccel_offset(0), ma_len1(0), ma_len2(0),
|
||||
ma_type(MultibyteAccelInfo::MAT_NONE) {}
|
||||
u32 single_offset; /**< offset correction to apply to single schemes */
|
||||
u32 double_offset; /**< offset correction to apply to double schemes */
|
||||
CharReach double_stop1; /**< single-byte accel stop literals for double
|
||||
@ -47,10 +68,19 @@ struct AccelInfo {
|
||||
flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop
|
||||
* literals */
|
||||
CharReach single_stops; /**< escapes for single byte acceleration */
|
||||
u32 multiaccel_offset; /**< offset correction to apply to multibyte schemes */
|
||||
CharReach multiaccel_stops; /**< escapes for multibyte acceleration */
|
||||
u32 ma_len1; /**< multiaccel len1 */
|
||||
u32 ma_len2; /**< multiaccel len2 */
|
||||
MultibyteAccelInfo::multiaccel_type ma_type; /**< multiaccel type */
|
||||
};
|
||||
|
||||
bool buildAccelAux(const AccelInfo &info, AccelAux *aux);
|
||||
|
||||
/* returns true is the escape set can be handled with a masked double_verm */
|
||||
bool buildDvermMask(const flat_set<std::pair<u8, u8>> &escape_set,
|
||||
u8 *m1_out = nullptr, u8 *m2_out = nullptr);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
|
548
src/nfa/castle.c
548
src/nfa/castle.c
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -96,7 +96,8 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q,
|
||||
repeatHasMatch(info, rctrl, rstate, offset);
|
||||
DEBUG_PRINTF("repeatHasMatch returned %d\n", match);
|
||||
if (match == REPEAT_MATCH) {
|
||||
DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, subIdx);
|
||||
DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset,
|
||||
subIdx, sub->report);
|
||||
if (q->cb(offset, sub->report, q->context) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
@ -111,17 +112,22 @@ int castleReportCurrent(const struct Castle *c, struct mq *q) {
|
||||
DEBUG_PRINTF("offset=%llu\n", offset);
|
||||
|
||||
if (c->exclusive) {
|
||||
const u32 activeIdx = partial_load_u32(q->streamState,
|
||||
c->activeIdxSize);
|
||||
u8 *active = (u8 *)q->streamState;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||
if (activeIdx < c->numRepeats && subCastleReportCurrent(c, q,
|
||||
if (subCastleReportCurrent(c, q,
|
||||
offset, activeIdx) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!c->pureExclusive) {
|
||||
const u8 *active = (const u8 *)q->streamState + c->activeIdxSize;
|
||||
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
|
||||
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||
DEBUG_PRINTF("subcastle %u\n", i);
|
||||
@ -162,11 +168,18 @@ static really_inline
|
||||
char castleInAccept(const struct Castle *c, struct mq *q,
|
||||
const ReportID report, const u64a offset) {
|
||||
DEBUG_PRINTF("offset=%llu\n", offset);
|
||||
/* ignore when just catching up due to full queue */
|
||||
if (report == MO_INVALID_IDX) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (c->exclusive) {
|
||||
const u32 activeIdx = partial_load_u32(q->streamState,
|
||||
c->activeIdxSize);
|
||||
if (activeIdx < c->numRepeats) {
|
||||
u8 *active = (u8 *)q->streamState;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||
if (subCastleInAccept(c, q, report, offset, activeIdx)) {
|
||||
return 1;
|
||||
@ -174,11 +187,10 @@ char castleInAccept(const struct Castle *c, struct mq *q,
|
||||
}
|
||||
}
|
||||
|
||||
if (!c->pureExclusive) {
|
||||
const u8 *active = (const u8 *)q->streamState + c->activeIdxSize;
|
||||
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
|
||||
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
||||
i != MMB_INVALID;
|
||||
i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||
DEBUG_PRINTF("subcastle %u\n", i);
|
||||
if (subCastleInAccept(c, q, report, offset, i)) {
|
||||
return 1;
|
||||
@ -193,7 +205,6 @@ static really_inline
|
||||
void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
|
||||
void *full_state, void *stream_state,
|
||||
const u32 subIdx) {
|
||||
u8 *active = (u8 *)stream_state;
|
||||
const struct SubCastle *sub = getSubCastle(c, subIdx);
|
||||
const struct RepeatInfo *info = getRepeatInfo(sub);
|
||||
|
||||
@ -203,10 +214,13 @@ void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
|
||||
|
||||
if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) {
|
||||
DEBUG_PRINTF("sub %u is stale at offset %llu\n", subIdx, offset);
|
||||
if (sub->exclusive) {
|
||||
partial_store_u32(stream_state, c->numRepeats, c->activeIdxSize);
|
||||
if (sub->exclusiveId < c->numRepeats) {
|
||||
u8 *active = (u8 *)stream_state;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
mmbit_unset(groups, c->numGroups, sub->exclusiveId);
|
||||
} else {
|
||||
mmbit_unset(active + c->activeIdxSize, c->numRepeats, subIdx);
|
||||
u8 *active = (u8 *)stream_state + c->activeOffset;
|
||||
mmbit_unset(active, c->numRepeats, subIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -216,30 +230,47 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
|
||||
void *full_state, void *stream_state) {
|
||||
DEBUG_PRINTF("offset=%llu\n", offset);
|
||||
|
||||
if (!c->staleIterOffset) {
|
||||
DEBUG_PRINTF("{no repeats can go stale}\n");
|
||||
return; /* no subcastle can ever go stale */
|
||||
}
|
||||
|
||||
if (c->exclusive) {
|
||||
const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize);
|
||||
if (activeIdx < c->numRepeats) {
|
||||
u8 *active = (u8 *)stream_state;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||
subCastleDeactivateStaleSubs(c, offset, full_state,
|
||||
stream_state, activeIdx);
|
||||
}
|
||||
}
|
||||
|
||||
if (!c->pureExclusive) {
|
||||
const u8 *active = (const u8 *)stream_state + c->activeIdxSize;
|
||||
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
||||
i != MMB_INVALID;
|
||||
i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||
const u8 *active = (const u8 *)stream_state + c->activeOffset;
|
||||
const struct mmbit_sparse_iter *it
|
||||
= (const void *)((const char *)c + c->staleIterOffset);
|
||||
|
||||
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
|
||||
u32 numRepeats = c->numRepeats;
|
||||
u32 idx = 0;
|
||||
|
||||
u32 i = mmbit_sparse_iter_begin(active, numRepeats, &idx, it, si_state);
|
||||
while(i != MMB_INVALID) {
|
||||
DEBUG_PRINTF("subcastle %u\n", i);
|
||||
subCastleDeactivateStaleSubs(c, offset, full_state,
|
||||
stream_state, i);
|
||||
subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, i);
|
||||
i = mmbit_sparse_iter_next(active, numRepeats, i, &idx, it,
|
||||
si_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset,
|
||||
void *full_state, void *stream_state) {
|
||||
void *full_state, void *stream_state,
|
||||
UNUSED char stale_checked) {
|
||||
assert(top < c->numRepeats);
|
||||
|
||||
const struct SubCastle *sub = getSubCastle(c, top);
|
||||
@ -249,12 +280,20 @@ void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset,
|
||||
info->packedCtrlSize;
|
||||
|
||||
char is_alive = 0;
|
||||
if (sub->exclusive) {
|
||||
const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize);
|
||||
u8 *active = (u8 *)stream_state;
|
||||
if (sub->exclusiveId < c->numRepeats) {
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
active += sub->exclusiveId * c->activeIdxSize;
|
||||
if (mmbit_set(groups, c->numGroups, sub->exclusiveId)) {
|
||||
const u32 activeIdx = partial_load_u32(active, c->activeIdxSize);
|
||||
is_alive = (activeIdx == top);
|
||||
partial_store_u32(stream_state, top, c->activeIdxSize);
|
||||
}
|
||||
|
||||
if (!is_alive) {
|
||||
partial_store_u32(active, top, c->activeIdxSize);
|
||||
}
|
||||
} else {
|
||||
u8 *active = (u8 *)stream_state + c->activeIdxSize;
|
||||
active += c->activeOffset;
|
||||
is_alive = mmbit_set(active, c->numRepeats, top);
|
||||
}
|
||||
|
||||
@ -263,8 +302,8 @@ void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset,
|
||||
} else {
|
||||
DEBUG_PRINTF("repeat %u is already alive\n", top);
|
||||
// Caller should ensure we're not stale.
|
||||
assert(repeatHasMatch(info, rctrl, rstate, offset) !=
|
||||
REPEAT_STALE);
|
||||
assert(!stale_checked
|
||||
|| repeatHasMatch(info, rctrl, rstate, offset) != REPEAT_STALE);
|
||||
|
||||
// Ignore duplicate top events.
|
||||
u64a last = repeatLastTop(info, rctrl, rstate);
|
||||
@ -292,11 +331,11 @@ void subCastleFindMatch(const struct Castle *c, const u64a begin,
|
||||
u64a match = repeatNextMatch(info, rctrl, rstate, begin);
|
||||
if (match == 0) {
|
||||
DEBUG_PRINTF("no more matches for sub %u\n", subIdx);
|
||||
if (sub->exclusive) {
|
||||
partial_store_u32(stream_state, c->numRepeats,
|
||||
c->activeIdxSize);
|
||||
if (sub->exclusiveId < c->numRepeats) {
|
||||
u8 *groups = (u8 *)stream_state + c->groupIterOffset;
|
||||
mmbit_unset(groups, c->numGroups, sub->exclusiveId);
|
||||
} else {
|
||||
u8 *active = (u8 *)stream_state + c->activeIdxSize;
|
||||
u8 *active = (u8 *)stream_state + c->activeOffset;
|
||||
mmbit_unset(active, c->numRepeats, subIdx);
|
||||
}
|
||||
return;
|
||||
@ -329,16 +368,20 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
|
||||
*mloc = 0;
|
||||
|
||||
if (c->exclusive) {
|
||||
const u32 activeIdx = partial_load_u32(stream_state, c->activeIdxSize);
|
||||
if (activeIdx < c->numRepeats) {
|
||||
u8 *active = (u8 *)stream_state;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||
subCastleFindMatch(c, begin, end, full_state, stream_state, mloc,
|
||||
&found, activeIdx);
|
||||
}
|
||||
}
|
||||
|
||||
if (!c->pureExclusive) {
|
||||
u8 *active = (u8 *)stream_state + c->activeIdxSize;
|
||||
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||
u8 *active = (u8 *)stream_state + c->activeOffset;
|
||||
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
||||
i != MMB_INVALID;
|
||||
i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||
@ -368,31 +411,38 @@ u64a subCastleNextMatch(const struct Castle *c, void *full_state,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void subCastleMatchLoop(const struct Castle *c, void *full_state,
|
||||
void *stream_state, const u64a end,
|
||||
const u64a loc, u64a *offset) {
|
||||
u8 *active = (u8 *)stream_state + c->activeIdxSize;
|
||||
u8 *matching = full_state;
|
||||
mmbit_clear(matching, c->numRepeats);
|
||||
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||
u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i);
|
||||
void set_matching(const struct Castle *c, const u64a match, u8 *active,
|
||||
u8 *matching, const u32 active_size, const u32 active_id,
|
||||
const u32 matching_id, u64a *offset, const u64a end) {
|
||||
if (match == 0) {
|
||||
DEBUG_PRINTF("no more matches\n");
|
||||
mmbit_unset(active, c->numRepeats, i);
|
||||
mmbit_unset(active, active_size, active_id);
|
||||
} else if (match > end) {
|
||||
// If we had a local copy of the active mmbit, we could skip
|
||||
// looking at this repeat again. But we don't, so we just move
|
||||
// on.
|
||||
} else if (match == *offset) {
|
||||
mmbit_set(matching, c->numRepeats, i);
|
||||
mmbit_set(matching, c->numRepeats, matching_id);
|
||||
} else if (match < *offset) {
|
||||
// New minimum offset.
|
||||
*offset = match;
|
||||
mmbit_clear(matching, c->numRepeats);
|
||||
mmbit_set(matching, c->numRepeats, i);
|
||||
mmbit_set(matching, c->numRepeats, matching_id);
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void subCastleMatchLoop(const struct Castle *c, void *full_state,
|
||||
void *stream_state, const u64a end,
|
||||
const u64a loc, u64a *offset) {
|
||||
u8 *active = (u8 *)stream_state + c->activeOffset;
|
||||
u8 *matching = full_state;
|
||||
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||
u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i);
|
||||
set_matching(c, match, active, matching, c->numRepeats, i,
|
||||
i, offset, end);
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
@ -434,61 +484,37 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end,
|
||||
// full_state (scratch).
|
||||
|
||||
u64a offset = end; // min offset of next match
|
||||
char found = 0;
|
||||
u32 activeIdx = 0;
|
||||
mmbit_clear(matching, c->numRepeats);
|
||||
if (c->exclusive) {
|
||||
activeIdx = partial_load_u32(stream_state, c->activeIdxSize);
|
||||
if (activeIdx < c->numRepeats) {
|
||||
u32 i = activeIdx;
|
||||
DEBUG_PRINTF("subcastle %u\n", i);
|
||||
u8 *active = (u8 *)stream_state;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
u64a match = subCastleNextMatch(c, full_state, stream_state,
|
||||
loc, i);
|
||||
|
||||
if (match == 0) {
|
||||
DEBUG_PRINTF("no more matches\n");
|
||||
partial_store_u32(stream_state, c->numRepeats,
|
||||
c->activeIdxSize);
|
||||
} else if (match > end) {
|
||||
// If we had a local copy of the active mmbit, we could skip
|
||||
// looking at this repeat again. But we don't, so we just move
|
||||
// on.
|
||||
} else if (match <= offset) {
|
||||
if (match < offset) {
|
||||
// New minimum offset.
|
||||
offset = match;
|
||||
}
|
||||
found = 1;
|
||||
}
|
||||
loc, activeIdx);
|
||||
set_matching(c, match, groups, matching, c->numGroups, i,
|
||||
activeIdx, &offset, end);
|
||||
}
|
||||
}
|
||||
|
||||
const char hasMatch = found;
|
||||
u64a newOffset = offset;
|
||||
if (!c->pureExclusive) {
|
||||
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||
subCastleMatchLoop(c, full_state, stream_state,
|
||||
end, loc, &newOffset);
|
||||
|
||||
DEBUG_PRINTF("offset=%llu\n", newOffset);
|
||||
if (mmbit_any(matching, c->numRepeats)) {
|
||||
found = 1;
|
||||
if (subCastleFireMatch(c, full_state, stream_state,
|
||||
cb, ctx, newOffset) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
end, loc, &offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
DEBUG_PRINTF("offset=%llu\n", offset);
|
||||
if (!mmbit_any(matching, c->numRepeats)) {
|
||||
DEBUG_PRINTF("no more matches\n");
|
||||
break;
|
||||
} else if (hasMatch && offset == newOffset) {
|
||||
const struct SubCastle *sub = getSubCastle(c, activeIdx);
|
||||
DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, activeIdx);
|
||||
if (cb(offset, sub->report, ctx) == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("caller told us to halt\n");
|
||||
}
|
||||
|
||||
if (subCastleFireMatch(c, full_state, stream_state,
|
||||
cb, ctx, offset) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
loc = newOffset;
|
||||
loc = offset;
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING;
|
||||
@ -547,7 +573,8 @@ char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin,
|
||||
static really_inline
|
||||
char castleScanTruffle(const struct Castle *c, const u8 *buf, const size_t begin,
|
||||
const size_t end, size_t *loc) {
|
||||
const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2, buf + begin, buf + end);
|
||||
const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2,
|
||||
buf + begin, buf + end);
|
||||
if (ptr == buf + end) {
|
||||
DEBUG_PRINTF("no escape found\n");
|
||||
return 0;
|
||||
@ -589,7 +616,103 @@ char castleScan(const struct Castle *c, const u8 *buf, const size_t begin,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp) {
|
||||
char castleRevScanVerm(const struct Castle *c, const u8 *buf,
|
||||
const size_t begin, const size_t end, size_t *loc) {
|
||||
const u8 *ptr = rvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
|
||||
if (ptr == buf + begin - 1) {
|
||||
DEBUG_PRINTF("no escape found\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(loc);
|
||||
assert(ptr >= buf && ptr < buf + end);
|
||||
*loc = (size_t)(ptr - buf);
|
||||
DEBUG_PRINTF("escape found at offset %zu\n", *loc);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char castleRevScanNVerm(const struct Castle *c, const u8 *buf,
|
||||
const size_t begin, const size_t end, size_t *loc) {
|
||||
const u8 *ptr = rnvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
|
||||
if (ptr == buf + begin - 1) {
|
||||
DEBUG_PRINTF("no escape found\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(loc);
|
||||
assert(ptr >= buf && ptr < buf + end);
|
||||
*loc = (size_t)(ptr - buf);
|
||||
DEBUG_PRINTF("escape found at offset %zu\n", *loc);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char castleRevScanShufti(const struct Castle *c, const u8 *buf,
|
||||
const size_t begin, const size_t end, size_t *loc) {
|
||||
const m128 mask_lo = c->u.shuf.mask_lo;
|
||||
const m128 mask_hi = c->u.shuf.mask_hi;
|
||||
const u8 *ptr = rshuftiExec(mask_lo, mask_hi, buf + begin, buf + end);
|
||||
if (ptr == buf + begin - 1) {
|
||||
DEBUG_PRINTF("no escape found\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(loc);
|
||||
assert(ptr >= buf && ptr < buf + end);
|
||||
*loc = (size_t)(ptr - buf);
|
||||
DEBUG_PRINTF("escape found at offset %zu\n", *loc);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char castleRevScanTruffle(const struct Castle *c, const u8 *buf,
|
||||
const size_t begin, const size_t end, size_t *loc) {
|
||||
const u8 *ptr = rtruffleExec(c->u.truffle.mask1, c->u.truffle.mask2,
|
||||
buf + begin, buf + end);
|
||||
if (ptr == buf + begin - 1) {
|
||||
DEBUG_PRINTF("no escape found\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(loc);
|
||||
assert(ptr >= buf && ptr < buf + end);
|
||||
*loc = (size_t)(ptr - buf);
|
||||
DEBUG_PRINTF("escape found at offset %zu\n", *loc);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin,
|
||||
const size_t end, size_t *loc) {
|
||||
assert(begin <= end);
|
||||
DEBUG_PRINTF("scanning backwards over (%zu,%zu]\n", begin, end);
|
||||
if (begin == end) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (c->type) {
|
||||
case CASTLE_DOT:
|
||||
// Nothing can stop a dot scan!
|
||||
return 0;
|
||||
case CASTLE_VERM:
|
||||
return castleRevScanVerm(c, buf, begin, end, loc);
|
||||
case CASTLE_NVERM:
|
||||
return castleRevScanNVerm(c, buf, begin, end, loc);
|
||||
case CASTLE_SHUFTI:
|
||||
return castleRevScanShufti(c, buf, begin, end, loc);
|
||||
case CASTLE_TRUFFLE:
|
||||
return castleRevScanTruffle(c, buf, begin, end, loc);
|
||||
default:
|
||||
DEBUG_PRINTF("unknown scan type!\n");
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp,
|
||||
char stale_checked) {
|
||||
const u32 event = q->items[q->cur].type;
|
||||
switch (event) {
|
||||
case MQE_TOP:
|
||||
@ -603,11 +726,24 @@ void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp) {
|
||||
assert(event < MQE_INVALID);
|
||||
u32 top = event - MQE_TOP_FIRST;
|
||||
DEBUG_PRINTF("top %u at offset %llu\n", top, sp);
|
||||
castleProcessTop(c, top, sp, q->state, q->streamState);
|
||||
castleProcessTop(c, top, sp, q->state, q->streamState, stale_checked);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) {
|
||||
DEBUG_PRINTF("clearing active repeats due to escape\n");
|
||||
if (c->exclusive) {
|
||||
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||
mmbit_clear(groups, c->numGroups);
|
||||
}
|
||||
|
||||
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||
mmbit_clear(active, c->numRepeats);
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
|
||||
enum MatchMode mode) {
|
||||
@ -630,7 +766,7 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
|
||||
return 1;
|
||||
}
|
||||
|
||||
u8 *active = (u8 *)q->streamState + c->activeIdxSize; // active multibit
|
||||
u8 *active = (u8 *)q->streamState + c->activeOffset;// active multibit
|
||||
|
||||
assert(q->cur + 1 < q->end); // require at least two items
|
||||
assert(q_cur_type(q) == MQE_START);
|
||||
@ -644,14 +780,8 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
|
||||
|
||||
char found = 0;
|
||||
if (c->exclusive) {
|
||||
const u32 activeIdx = partial_load_u32(q->streamState,
|
||||
c->activeIdxSize);
|
||||
if (activeIdx < c->numRepeats) {
|
||||
found = 1;
|
||||
} else if (c->pureExclusive) {
|
||||
DEBUG_PRINTF("castle is dead\n");
|
||||
goto scan_done;
|
||||
}
|
||||
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||
found = mmbit_any(groups, c->numGroups);
|
||||
}
|
||||
|
||||
if (!found && !mmbit_any(active, c->numRepeats)) {
|
||||
@ -698,15 +828,7 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
|
||||
}
|
||||
|
||||
if (escape_found) {
|
||||
DEBUG_PRINTF("clearing active repeats due to escape\n");
|
||||
if (c->exclusive) {
|
||||
partial_store_u32(q->streamState, c->numRepeats,
|
||||
c->activeIdxSize);
|
||||
}
|
||||
|
||||
if (!c->pureExclusive) {
|
||||
mmbit_clear(active, c->numRepeats);
|
||||
}
|
||||
clear_repeats(c, q, active);
|
||||
}
|
||||
}
|
||||
|
||||
@ -720,15 +842,14 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
|
||||
}
|
||||
|
||||
sp = q_cur_offset(q);
|
||||
castleHandleEvent(c, q, sp);
|
||||
castleHandleEvent(c, q, sp, 1);
|
||||
q->cur++;
|
||||
}
|
||||
|
||||
if (c->exclusive) {
|
||||
const u32 activeIdx = partial_load_u32(q->streamState,
|
||||
c->activeIdxSize);
|
||||
if (c->pureExclusive || activeIdx < c->numRepeats) {
|
||||
return activeIdx < c->numRepeats;
|
||||
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||
if (mmbit_any_precise(groups, c->numGroups)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -745,28 +866,34 @@ char nfaExecCastle0_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||
return nfaExecCastle0_Q_i(n, q, end, STOP_AT_MATCH);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void castleStreamSilent(const struct Castle *c, u8 *active, const u8 *buf,
|
||||
size_t length) {
|
||||
DEBUG_PRINTF("entry\n");
|
||||
static
|
||||
s64a castleLastKillLoc(const struct Castle *c, struct mq *q) {
|
||||
assert(q_cur_type(q) == MQE_START);
|
||||
assert(q_last_type(q) == MQE_END);
|
||||
s64a sp = q_cur_loc(q);
|
||||
s64a ep = q_last_loc(q);
|
||||
|
||||
// This call doesn't produce matches, so we elide the castleMatchLoop call
|
||||
// entirely and just do escape scans to maintain the repeat.
|
||||
DEBUG_PRINTF("finding final squash in (%lld, %lld]\n", sp, ep);
|
||||
|
||||
size_t eloc = 0;
|
||||
char escaped = castleScan(c, buf, 0, length, &eloc);
|
||||
if (escaped) {
|
||||
assert(eloc < length);
|
||||
DEBUG_PRINTF("escape found at %zu, clearing castle\n", eloc);
|
||||
if (c->exclusive) {
|
||||
partial_store_u32(active - c->activeIdxSize,
|
||||
c->numRepeats, c->activeIdxSize);
|
||||
size_t loc;
|
||||
|
||||
if (ep > 0) {
|
||||
if (castleRevScan(c, q->buffer, sp > 0 ? sp : 0, ep, &loc)) {
|
||||
return (s64a)loc;
|
||||
}
|
||||
ep = 0;
|
||||
}
|
||||
|
||||
if (!c->pureExclusive) {
|
||||
mmbit_clear(active, c->numRepeats);
|
||||
if (sp < 0) {
|
||||
s64a hlen = q->hlength;
|
||||
|
||||
if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) {
|
||||
return (s64a)loc - hlen;
|
||||
}
|
||||
ep = 0;
|
||||
}
|
||||
|
||||
return sp - 1; /* the repeats are never killed */
|
||||
}
|
||||
|
||||
char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||
@ -780,85 +907,44 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||
|
||||
assert(q->cur + 1 < q->end); /* require at least two items */
|
||||
assert(q_cur_type(q) == MQE_START);
|
||||
u64a sp = q_cur_offset(q);
|
||||
q->cur++;
|
||||
DEBUG_PRINTF("sp=%llu\n", sp);
|
||||
|
||||
const struct Castle *c = getImplNfa(n);
|
||||
u8 *active = (u8 *)q->streamState + c->activeIdxSize;
|
||||
char found = 0;
|
||||
u8 *active = (u8 *)q->streamState + c->activeOffset;
|
||||
|
||||
u64a end_offset = q_last_loc(q) + q->offset;
|
||||
s64a last_kill_loc = castleLastKillLoc(c, q);
|
||||
DEBUG_PRINTF("all repeats killed at %lld (exec range %lld, %lld)\n",
|
||||
last_kill_loc, q_cur_loc(q), q_last_loc(q));
|
||||
assert(last_kill_loc < q_last_loc(q));
|
||||
|
||||
if (last_kill_loc != q_cur_loc(q) - 1) {
|
||||
clear_repeats(c, q, active);
|
||||
}
|
||||
|
||||
q->cur++; /* skip start event */
|
||||
|
||||
/* skip events prior to the repeats being squashed */
|
||||
while (q_cur_loc(q) <= last_kill_loc) {
|
||||
DEBUG_PRINTF("skipping moot event at %lld\n", q_cur_loc(q));
|
||||
q->cur++;
|
||||
assert(q->cur < q->end);
|
||||
}
|
||||
|
||||
while (q->cur < q->end) {
|
||||
DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
|
||||
q_cur_offset(q));
|
||||
found = 0;
|
||||
if (c->exclusive) {
|
||||
const u32 activeIdx = partial_load_u32(q->streamState,
|
||||
c->activeIdxSize);
|
||||
if (activeIdx < c->numRepeats) {
|
||||
found = 1;
|
||||
} else if (c->pureExclusive) {
|
||||
DEBUG_PRINTF("castle is dead\n");
|
||||
goto scan_done;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found && !mmbit_any(active, c->numRepeats)) {
|
||||
DEBUG_PRINTF("castle is dead\n");
|
||||
goto scan_done;
|
||||
}
|
||||
|
||||
u64a ep = q_cur_offset(q);
|
||||
|
||||
if (sp < q->offset) {
|
||||
DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
|
||||
assert(q->offset - sp <= q->hlength);
|
||||
u64a local_ep = MIN(q->offset, ep);
|
||||
const u8 *ptr = q->history + q->hlength + sp - q->offset;
|
||||
castleStreamSilent(c, active, ptr, local_ep - sp);
|
||||
sp = local_ep;
|
||||
}
|
||||
|
||||
found = 0;
|
||||
if (c->exclusive) {
|
||||
const u32 activeIdx = partial_load_u32(q->streamState,
|
||||
c->activeIdxSize);
|
||||
if (activeIdx < c->numRepeats) {
|
||||
found = 1;
|
||||
} else if (c->pureExclusive) {
|
||||
DEBUG_PRINTF("castle is dead\n");
|
||||
goto scan_done;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found && !mmbit_any(active, c->numRepeats)) {
|
||||
DEBUG_PRINTF("castle is dead\n");
|
||||
goto scan_done;
|
||||
}
|
||||
|
||||
if (sp < ep) {
|
||||
DEBUG_PRINTF("MAIN BUFFER SCAN\n");
|
||||
assert(ep - q->offset <= q->length);
|
||||
const u8 *ptr = q->buffer + sp - q->offset;
|
||||
castleStreamSilent(c, active, ptr, ep - sp);
|
||||
}
|
||||
|
||||
scan_done:
|
||||
sp = q_cur_offset(q);
|
||||
castleDeactivateStaleSubs(c, sp, q->state, q->streamState);
|
||||
castleHandleEvent(c, q, sp);
|
||||
u64a sp = q_cur_offset(q);
|
||||
castleHandleEvent(c, q, sp, 0);
|
||||
q->cur++;
|
||||
}
|
||||
|
||||
found = 0;
|
||||
castleDeactivateStaleSubs(c, end_offset, q->state, q->streamState);
|
||||
|
||||
char found = 0;
|
||||
if (c->exclusive) {
|
||||
const u32 activeIdx = partial_load_u32(q->streamState,
|
||||
c->activeIdxSize);
|
||||
if (activeIdx < c->numRepeats) {
|
||||
found = 1;
|
||||
} else if (c->pureExclusive) {
|
||||
DEBUG_PRINTF("castle is dead\n");
|
||||
return 0;
|
||||
}
|
||||
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||
found = mmbit_any_precise(groups, c->numGroups);
|
||||
|
||||
}
|
||||
|
||||
if (!found && !mmbit_any_precise(active, c->numRepeats)) {
|
||||
@ -866,7 +952,7 @@ scan_done:
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (castleInAccept(c, q, report, sp)) {
|
||||
if (castleInAccept(c, q, report, end_offset)) {
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
|
||||
@ -901,11 +987,12 @@ char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
|
||||
const struct Castle *c = getImplNfa(n);
|
||||
assert(q->streamState);
|
||||
if (c->exclusive) {
|
||||
partial_store_u32(q->streamState, c->numRepeats, c->activeIdxSize);
|
||||
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
|
||||
mmbit_clear(groups, c->numGroups);
|
||||
}
|
||||
|
||||
if (!c->pureExclusive) {
|
||||
u8 *active = (u8 *)q->streamState + c->activeIdxSize;
|
||||
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||
u8 *active = (u8 *)q->streamState + c->activeOffset;
|
||||
mmbit_clear(active, c->numRepeats);
|
||||
}
|
||||
return 0;
|
||||
@ -919,11 +1006,12 @@ char nfaExecCastle0_initCompressedState(const struct NFA *n, UNUSED u64a offset,
|
||||
|
||||
const struct Castle *c = getImplNfa(n);
|
||||
if (c->exclusive) {
|
||||
partial_store_u32(state, c->numRepeats, c->activeIdxSize);
|
||||
u8 *groups = (u8 *)state + c->groupIterOffset;
|
||||
mmbit_clear(groups, c->numGroups);
|
||||
}
|
||||
|
||||
if (!c->pureExclusive) {
|
||||
u8 *active = (u8 *)state + c->activeIdxSize;
|
||||
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||
u8 *active = (u8 *)state + c->activeOffset;
|
||||
mmbit_clear(active, c->numRepeats);
|
||||
}
|
||||
return 0;
|
||||
@ -954,16 +1042,19 @@ char nfaExecCastle0_queueCompressState(const struct NFA *n, const struct mq *q,
|
||||
const u64a offset = q->offset + loc;
|
||||
DEBUG_PRINTF("offset=%llu\n", offset);
|
||||
if (c->exclusive) {
|
||||
const u32 activeIdx = partial_load_u32(q->streamState,
|
||||
c->activeIdxSize);
|
||||
if (activeIdx < c->numRepeats) {
|
||||
u8 *active = (u8 *)q->streamState;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
DEBUG_PRINTF("packing state for sub %u\n", activeIdx);
|
||||
subCastleQueueCompressState(c, activeIdx, q, offset);
|
||||
}
|
||||
}
|
||||
|
||||
if (!c->pureExclusive) {
|
||||
const u8 *active = (const u8 *)q->streamState + c->activeIdxSize;
|
||||
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
|
||||
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||
DEBUG_PRINTF("packing state for sub %u\n", i);
|
||||
@ -997,15 +1088,19 @@ char nfaExecCastle0_expandState(const struct NFA *n, void *dest,
|
||||
const struct Castle *c = getImplNfa(n);
|
||||
|
||||
if (c->exclusive) {
|
||||
const u32 activeIdx = partial_load_u32(src, c->activeIdxSize);
|
||||
if (activeIdx < c->numRepeats) {
|
||||
const u8 *active = (const u8 *)src;
|
||||
const u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
const u8 *cur = active + i * c->activeIdxSize;
|
||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
subCastleExpandState(c, activeIdx, dest, src, offset);
|
||||
}
|
||||
}
|
||||
|
||||
if (!c->pureExclusive) {
|
||||
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||
// Unpack state for all active repeats.
|
||||
const u8 *active = (const u8 *)src + c->activeIdxSize;
|
||||
const u8 *active = (const u8 *)src + c->activeOffset;
|
||||
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||
subCastleExpandState(c, i, dest, src, offset);
|
||||
@ -1013,4 +1108,3 @@ char nfaExecCastle0_expandState(const struct NFA *n, void *dest,
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -100,6 +100,7 @@ void nfaExecCastle0_dumpText(const struct NFA *nfa, FILE *f) {
|
||||
fprintf(f, "unknown type %u\n", c->type);
|
||||
break;
|
||||
}
|
||||
fprintf(f, "Stale Iter Offset: %u\n", c->staleIterOffset);
|
||||
|
||||
fprintf(f, "\n");
|
||||
dumpTextReverse(nfa, f);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -42,7 +42,9 @@ struct SubCastle {
|
||||
u32 streamStateOffset; //!< offset within stream state
|
||||
u32 repeatInfoOffset; //!< offset of RepeatInfo structure
|
||||
// relative to the start of SubCastle
|
||||
char exclusive; //!< exclusive info of this SubCastle
|
||||
u32 exclusiveId; //!< exclusive group id of this SubCastle,
|
||||
// set to the number of SubCastles in Castle
|
||||
// if it is not exclusive
|
||||
};
|
||||
|
||||
#define CASTLE_DOT 0
|
||||
@ -51,6 +53,12 @@ struct SubCastle {
|
||||
#define CASTLE_SHUFTI 3
|
||||
#define CASTLE_TRUFFLE 4
|
||||
|
||||
enum ExclusiveType {
|
||||
NOT_EXCLUSIVE, //!< no subcastles are exclusive
|
||||
EXCLUSIVE, //!< a subset of subcastles are exclusive
|
||||
PURE_EXCLUSIVE //!< all subcastles are exclusive
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Castle engine structure.
|
||||
*
|
||||
@ -63,26 +71,60 @@ struct SubCastle {
|
||||
* - struct Castle
|
||||
* - struct SubCastle[numRepeats]
|
||||
* - tables for sparse model repeats
|
||||
* - sparse iterator for subcastles that may be stale
|
||||
*
|
||||
* Castle stores an "active repeats" multibit in stream state, followed by the
|
||||
* packed repeat state for each SubCastle. If all SubCastles are mutual
|
||||
* exclusive, we store current active SubCastle id instead of "active repeats"
|
||||
* multibit in stream state. If there are both exclusive and non-exclusive
|
||||
* SubCastle groups, we use an active id for the exclusive group and a multibit
|
||||
* for the non-exclusive group.
|
||||
* packed repeat state for each SubCastle. If there are both exclusive and
|
||||
* non-exclusive SubCastle groups, we use an active id for each exclusive group
|
||||
* and a multibit for the non-exclusive group. We also store an "active
|
||||
* exclusive groups" multibit for exclusive groups. If all SubCastles are mutual
|
||||
* exclusive, we remove "active repeats" multibit from stream state.
|
||||
* * Castle stream state:
|
||||
* *
|
||||
* * |---|
|
||||
* * | | active subengine id for exclusive group 1
|
||||
* * |---|
|
||||
* * | | active subengine id for exclusive group 2(if necessary)
|
||||
* * |---|
|
||||
* * ...
|
||||
* * |---|
|
||||
* * | | "active repeats" multibit for non-exclusive subcastles
|
||||
* * | | (if not all subcastles are exclusive)
|
||||
* * |---|
|
||||
* * | | active multibit for exclusive groups
|
||||
* * | |
|
||||
* * |---|
|
||||
* * ||-|| common pool of stream state for exclusive group 1
|
||||
* * ||-||
|
||||
* * |---|
|
||||
* * ||-|| common pool of stream state for exclusive group 2(if necessary)
|
||||
* * ||-||
|
||||
* * |---|
|
||||
* * ...
|
||||
* * |---|
|
||||
* * | | stream state for each non-exclusive subcastles
|
||||
* * ...
|
||||
* * | |
|
||||
* * |---|
|
||||
*
|
||||
* In full state (stored in scratch space) it stores a temporary multibit over
|
||||
* the repeats (used by \ref castleMatchLoop), followed by the repeat control
|
||||
* blocks for each SubCastle. If all SubCastles are mutual exclusive, we only
|
||||
* need to store the repeat control blocks for each SubCastle.
|
||||
* blocks for each SubCastle.
|
||||
*/
|
||||
struct ALIGN_AVX_DIRECTIVE Castle {
|
||||
u32 numRepeats;
|
||||
u32 numRepeats; //!< number of repeats in Castle
|
||||
u32 numGroups; //!< number of exclusive groups
|
||||
u8 type; //!< tells us which scanning mechanism (below) to use
|
||||
char exclusive; //!< tells us if there are mutual exclusive SubCastles
|
||||
char pureExclusive; //!< tells us if all SubCastles are mutual exclusive
|
||||
u8 exclusive; //!< tells us if there are mutual exclusive SubCastles
|
||||
u8 activeIdxSize; //!< number of bytes in stream state to store
|
||||
// active SubCastle id for exclusive mode
|
||||
u32 activeOffset; //!< offset to active multibit for non-exclusive
|
||||
// SubCastles
|
||||
u32 staleIterOffset; //!< offset to a sparse iterator to check for stale
|
||||
// sub castles
|
||||
u32 groupIterOffset; //!< offset to a iterator to check the aliveness of
|
||||
// exclusive groups
|
||||
|
||||
union {
|
||||
struct {
|
||||
char c;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -32,6 +32,7 @@
|
||||
#include "castlecompile.h"
|
||||
|
||||
#include "castle_internal.h"
|
||||
#include "limex_limits.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "repeatcompile.h"
|
||||
#include "shufticompile.h"
|
||||
@ -47,7 +48,9 @@
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/multibit_build.h"
|
||||
#include "util/multibit_internal.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/verify_types.h"
|
||||
#include "grey.h"
|
||||
@ -63,7 +66,6 @@ using boost::adaptors::map_values;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define CASTLE_MAX_TOPS 32
|
||||
#define CLIQUE_GRAPH_MAX_SIZE 1000
|
||||
|
||||
static
|
||||
@ -204,7 +206,7 @@ bool graph_empty(const Graph &g) {
|
||||
static
|
||||
vector<u32> removeClique(CliqueGraph &cg) {
|
||||
vector<vector<u32>> cliquesVec(1);
|
||||
DEBUG_PRINTF("graph size:%lu\n", num_vertices(cg));
|
||||
DEBUG_PRINTF("graph size:%zu\n", num_vertices(cg));
|
||||
findCliqueGroup(cg, cliquesVec[0]);
|
||||
while (!graph_empty(cg)) {
|
||||
const vector<u32> &c = cliquesVec.back();
|
||||
@ -236,7 +238,7 @@ vector<u32> removeClique(CliqueGraph &cg) {
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("clique size:%lu\n", cliquesVec[id].size());
|
||||
DEBUG_PRINTF("clique size:%zu\n", cliquesVec[id].size());
|
||||
return cliquesVec[id];
|
||||
}
|
||||
|
||||
@ -244,17 +246,18 @@ vector<u32> removeClique(CliqueGraph &cg) {
|
||||
// the end locations where it overlaps with other literals,
|
||||
// then the literals are mutual exclusive
|
||||
static
|
||||
bool findExclusivePair(const u32 id1, const u32 id2,
|
||||
bool findExclusivePair(const size_t id1, const size_t id2,
|
||||
const size_t lower,
|
||||
const vector<vector<size_t>> &min_reset_dist,
|
||||
const vector<vector<vector<CharReach>>> &triggers) {
|
||||
const auto &triggers1 = triggers[id1];
|
||||
const auto &triggers2 = triggers[id2];
|
||||
for (u32 i = 0; i < triggers1.size(); ++i) {
|
||||
for (u32 j = 0; j < triggers2.size(); ++j) {
|
||||
for (size_t i = 0; i < triggers1.size(); ++i) {
|
||||
for (size_t j = 0; j < triggers2.size(); ++j) {
|
||||
if (!literalOverlap(triggers1[i], triggers2[j],
|
||||
min_reset_dist[id2][j]) ||
|
||||
min_reset_dist[id2 - lower][j]) ||
|
||||
!literalOverlap(triggers2[j], triggers1[i],
|
||||
min_reset_dist[id1][i])) {
|
||||
min_reset_dist[id1 - lower][i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -263,40 +266,75 @@ bool findExclusivePair(const u32 id1, const u32 id2,
|
||||
}
|
||||
|
||||
static
|
||||
vector<u32> checkExclusion(const CharReach &cr,
|
||||
const vector<vector<vector<CharReach>>> &triggers) {
|
||||
vector<u32> group;
|
||||
if (!triggers.size() || triggers.size() == 1) {
|
||||
return group;
|
||||
}
|
||||
vector<vector<u32>> checkExclusion(u32 &streamStateSize,
|
||||
const CharReach &cr,
|
||||
const vector<vector<vector<CharReach>>> &triggers,
|
||||
enum ExclusiveType &exclusive,
|
||||
const size_t numRepeats) {
|
||||
vector<vector<u32>> groups;
|
||||
size_t trigSize = triggers.size();
|
||||
DEBUG_PRINTF("trigSize %zu\n", trigSize);
|
||||
|
||||
size_t lower = 0;
|
||||
size_t total = 0;
|
||||
while (lower < trigSize) {
|
||||
vector<CliqueVertex> vertices;
|
||||
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
|
||||
|
||||
vector<vector<size_t>> min_reset_dist;
|
||||
size_t upper = min(lower + CLIQUE_GRAPH_MAX_SIZE, trigSize);
|
||||
// get min reset distance for each repeat
|
||||
for (auto it = triggers.begin(); it != triggers.end(); it++) {
|
||||
const vector<size_t> &tmp_dist = minResetDistToEnd(*it, cr);
|
||||
for (size_t i = lower; i < upper; i++) {
|
||||
CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
|
||||
vertices.push_back(v);
|
||||
|
||||
const vector<size_t> &tmp_dist =
|
||||
minResetDistToEnd(triggers[i], cr);
|
||||
min_reset_dist.push_back(tmp_dist);
|
||||
}
|
||||
|
||||
vector<CliqueVertex> vertices;
|
||||
unique_ptr<CliqueGraph> cg = make_unique<CliqueGraph>();
|
||||
for (u32 i = 0; i < triggers.size(); ++i) {
|
||||
CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
|
||||
vertices.push_back(v);
|
||||
}
|
||||
|
||||
// find exclusive pair for each repeat
|
||||
for (u32 i = 0; i < triggers.size(); ++i) {
|
||||
CliqueVertex s = vertices[i];
|
||||
for (u32 j = i + 1; j < triggers.size(); ++j) {
|
||||
if (findExclusivePair(i, j, min_reset_dist, triggers)) {
|
||||
CliqueVertex d = vertices[j];
|
||||
for (size_t i = lower; i < upper; i++) {
|
||||
CliqueVertex s = vertices[i - lower];
|
||||
for (size_t j = i + 1; j < upper; j++) {
|
||||
if (findExclusivePair(i, j, lower, min_reset_dist,
|
||||
triggers)) {
|
||||
CliqueVertex d = vertices[j - lower];
|
||||
add_edge(s, d, *cg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// find the largest exclusive group
|
||||
return removeClique(*cg);
|
||||
auto clique = removeClique(*cg);
|
||||
size_t cliqueSize = clique.size();
|
||||
if (cliqueSize > 1) {
|
||||
groups.push_back(clique);
|
||||
exclusive = EXCLUSIVE;
|
||||
total += cliqueSize;
|
||||
}
|
||||
|
||||
lower += CLIQUE_GRAPH_MAX_SIZE;
|
||||
}
|
||||
DEBUG_PRINTF("clique size %zu, num of repeats %zu\n",
|
||||
total, numRepeats);
|
||||
if (total == numRepeats) {
|
||||
exclusive = PURE_EXCLUSIVE;
|
||||
streamStateSize = 0;
|
||||
};
|
||||
|
||||
return groups;
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct ExclusiveInfo {
|
||||
|
||||
/** Mapping between top and exclusive group id */
|
||||
map<u32, u32> groupId;
|
||||
|
||||
/** Number of exclusive groups */
|
||||
u32 numGroups = 0;
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
@ -305,10 +343,15 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
|
||||
const vector<pair<depth, bool>> &repeatInfoPair,
|
||||
u32 &scratchStateSize, u32 &streamStateSize,
|
||||
u32 &tableSize, vector<u64a> &tables, u32 &sparseRepeats,
|
||||
const set<u32> &exclusiveGroup) {
|
||||
const ExclusiveInfo &exclusiveInfo,
|
||||
vector<u32> &may_stale, const ReportManager &rm) {
|
||||
const bool remap_reports = has_managed_reports(proto.kind);
|
||||
|
||||
u32 i = 0;
|
||||
u32 maxStreamSize = 0;
|
||||
bool exclusive = exclusiveGroup.size() > 1;
|
||||
const auto &groupId = exclusiveInfo.groupId;
|
||||
const auto &numGroups = exclusiveInfo.numGroups;
|
||||
vector<u32> maxStreamSize(numGroups, 0);
|
||||
|
||||
for (auto it = proto.repeats.begin(), ite = proto.repeats.end();
|
||||
it != ite; ++it, ++i) {
|
||||
const PureRepeat &pr = it->second;
|
||||
@ -316,33 +359,35 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
|
||||
bool is_reset = repeatInfoPair[i].second;
|
||||
|
||||
enum RepeatType rtype = chooseRepeatType(pr.bounds.min, pr.bounds.max,
|
||||
min_period, is_reset);
|
||||
min_period, is_reset, true);
|
||||
RepeatStateInfo rsi(rtype, pr.bounds.min, pr.bounds.max, min_period);
|
||||
|
||||
DEBUG_PRINTF("sub %u: selected %s model for %s repeat\n", i,
|
||||
repeatTypeName(rtype), pr.bounds.str().c_str());
|
||||
|
||||
u32 subScratchStateSize;
|
||||
u32 subStreamStateSize;
|
||||
|
||||
SubCastle &sub = subs[i];
|
||||
RepeatInfo &info = infos[i];
|
||||
|
||||
// handle exclusive case differently
|
||||
if (exclusive && exclusiveGroup.find(i) != exclusiveGroup.end()) {
|
||||
maxStreamSize = MAX(maxStreamSize, rsi.packedCtrlSize);
|
||||
} else {
|
||||
subScratchStateSize = verify_u32(sizeof(RepeatControl));
|
||||
subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize);
|
||||
|
||||
info.packedCtrlSize = rsi.packedCtrlSize;
|
||||
u32 subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize);
|
||||
|
||||
// Handle stream/scratch space alloc for exclusive case differently.
|
||||
if (contains(groupId, i)) {
|
||||
u32 id = groupId.at(i);
|
||||
maxStreamSize[id] = max(maxStreamSize[id], subStreamStateSize);
|
||||
// SubCastle full/stream state offsets are written in for the group
|
||||
// below.
|
||||
} else {
|
||||
sub.fullStateOffset = scratchStateSize;
|
||||
sub.streamStateOffset = streamStateSize;
|
||||
|
||||
scratchStateSize += subScratchStateSize;
|
||||
scratchStateSize += verify_u32(sizeof(RepeatControl));
|
||||
streamStateSize += subStreamStateSize;
|
||||
}
|
||||
|
||||
if (pr.bounds.max.is_finite()) {
|
||||
may_stale.push_back(i);
|
||||
}
|
||||
|
||||
info.type = verify_u8(rtype);
|
||||
info.repeatMin = depth_to_u32(pr.bounds.min);
|
||||
info.repeatMax = depth_to_u32(pr.bounds.max);
|
||||
@ -358,7 +403,9 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
|
||||
info.encodingSize = rsi.encodingSize;
|
||||
info.patchesOffset = rsi.patchesOffset;
|
||||
|
||||
sub.report = *pr.reports.begin();
|
||||
assert(pr.reports.size() == 1);
|
||||
ReportID id = *pr.reports.begin();
|
||||
sub.report = remap_reports ? rm.getProgramOffset(id) : id;
|
||||
|
||||
if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
|
||||
for (u32 j = 0; j < rsi.patchSize; j++) {
|
||||
@ -370,23 +417,30 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
|
||||
}
|
||||
}
|
||||
|
||||
if (exclusive) {
|
||||
for (auto k : exclusiveGroup) {
|
||||
SubCastle &sub = subs[k];
|
||||
RepeatInfo &info = infos[k];
|
||||
info.packedCtrlSize = maxStreamSize;
|
||||
vector<u32> scratchOffset(numGroups, 0);
|
||||
vector<u32> streamOffset(numGroups, 0);
|
||||
for (const auto &j : groupId) {
|
||||
u32 top = j.first;
|
||||
u32 id = j.second;
|
||||
SubCastle &sub = subs[top];
|
||||
if (!scratchOffset[id]) {
|
||||
sub.fullStateOffset = scratchStateSize;
|
||||
sub.streamStateOffset = streamStateSize;
|
||||
}
|
||||
scratchOffset[id] = scratchStateSize;
|
||||
streamOffset[id] = streamStateSize;
|
||||
scratchStateSize += verify_u32(sizeof(RepeatControl));
|
||||
streamStateSize += maxStreamSize;
|
||||
streamStateSize += maxStreamSize[id];
|
||||
} else {
|
||||
sub.fullStateOffset = scratchOffset[id];
|
||||
sub.streamStateOffset = streamOffset[id];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA>
|
||||
buildCastle(const CastleProto &proto,
|
||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||
const CompileContext &cc) {
|
||||
const CompileContext &cc, const ReportManager &rm) {
|
||||
assert(cc.grey.allowCastle);
|
||||
|
||||
const size_t numRepeats = proto.repeats.size();
|
||||
@ -418,8 +472,9 @@ buildCastle(const CastleProto &proto,
|
||||
depth maxWidth(0);
|
||||
|
||||
u32 i = 0;
|
||||
vector<u32> candidateRepeats;
|
||||
ExclusiveInfo exclusiveInfo;
|
||||
vector<vector<vector<CharReach>>> candidateTriggers;
|
||||
vector<u32> candidateRepeats;
|
||||
vector<pair<depth, bool>> repeatInfoPair;
|
||||
for (auto it = proto.repeats.begin(), ite = proto.repeats.end();
|
||||
it != ite; ++it, ++i) {
|
||||
@ -454,49 +509,60 @@ buildCastle(const CastleProto &proto,
|
||||
|
||||
repeatInfoPair.push_back(make_pair(min_period, is_reset));
|
||||
|
||||
if (is_reset && candidateRepeats.size() < CLIQUE_GRAPH_MAX_SIZE) {
|
||||
candidateTriggers.push_back(triggers.at(top));
|
||||
candidateRepeats.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Case 1: exclusive repeats
|
||||
bool exclusive = false;
|
||||
bool pureExclusive = false;
|
||||
enum ExclusiveType exclusive = NOT_EXCLUSIVE;
|
||||
u32 activeIdxSize = 0;
|
||||
set<u32> exclusiveGroup;
|
||||
u32 groupIterOffset = 0;
|
||||
if (cc.grey.castleExclusive) {
|
||||
vector<u32> tmpGroup = checkExclusion(cr, candidateTriggers);
|
||||
const u32 exclusiveSize = tmpGroup.size();
|
||||
if (exclusiveSize > 1) {
|
||||
// Case 1: mutual exclusive repeats group found, initialize state
|
||||
// sizes
|
||||
exclusive = true;
|
||||
auto cliqueGroups =
|
||||
checkExclusion(streamStateSize, cr, candidateTriggers,
|
||||
exclusive, numRepeats);
|
||||
for (const auto &group : cliqueGroups) {
|
||||
// mutual exclusive repeats group found,
|
||||
// update state sizes
|
||||
activeIdxSize = calcPackedBytes(numRepeats + 1);
|
||||
if (exclusiveSize == numRepeats) {
|
||||
pureExclusive = true;
|
||||
streamStateSize = 0;
|
||||
scratchStateSize = 0;
|
||||
}
|
||||
streamStateSize += activeIdxSize;
|
||||
|
||||
// replace with top values
|
||||
for (const auto &val : tmpGroup) {
|
||||
exclusiveGroup.insert(candidateRepeats[val]);
|
||||
for (const auto &val : group) {
|
||||
const u32 top = candidateRepeats[val];
|
||||
exclusiveInfo.groupId[top] = exclusiveInfo.numGroups;
|
||||
}
|
||||
exclusiveInfo.numGroups++;
|
||||
}
|
||||
|
||||
if (exclusive) {
|
||||
groupIterOffset = streamStateSize;
|
||||
streamStateSize += mmbit_size(exclusiveInfo.numGroups);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("num of groups:%u\n", exclusiveInfo.numGroups);
|
||||
}
|
||||
candidateRepeats.clear();
|
||||
|
||||
DEBUG_PRINTF("reach %s exclusive %u\n", describeClass(cr).c_str(),
|
||||
exclusive);
|
||||
|
||||
u32 tableSize = 0;
|
||||
u32 sparseRepeats = 0;
|
||||
vector<u32> may_stale; /* sub castles that may go stale */
|
||||
|
||||
buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair,
|
||||
scratchStateSize, streamStateSize, tableSize,
|
||||
tables, sparseRepeats, exclusiveGroup);
|
||||
tables, sparseRepeats, exclusiveInfo, may_stale, rm);
|
||||
|
||||
const size_t total_size =
|
||||
DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size());
|
||||
vector<mmbit_sparse_iter> stale_iter;
|
||||
if (!may_stale.empty()) {
|
||||
mmbBuildSparseIterator(stale_iter, may_stale, numRepeats);
|
||||
}
|
||||
|
||||
|
||||
size_t total_size =
|
||||
sizeof(NFA) + // initial NFA structure
|
||||
sizeof(Castle) + // Castle structure
|
||||
sizeof(SubCastle) * subs.size() + // SubCastles themselves
|
||||
@ -506,6 +572,9 @@ buildCastle(const CastleProto &proto,
|
||||
sizeof(u64a) * sparseRepeats; // paddings for
|
||||
// REPEAT_SPARSE_OPTIMAL_P tables
|
||||
|
||||
total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter));
|
||||
total_size += byte_length(stale_iter); // stale sparse iter
|
||||
|
||||
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
|
||||
nfa->type = verify_u8(CASTLE_NFA_0);
|
||||
nfa->length = verify_u32(total_size);
|
||||
@ -515,12 +584,15 @@ buildCastle(const CastleProto &proto,
|
||||
nfa->minWidth = verify_u32(minWidth);
|
||||
nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0;
|
||||
|
||||
char *ptr = (char *)nfa.get() + sizeof(NFA);
|
||||
char * const base_ptr = (char *)nfa.get() + sizeof(NFA);
|
||||
char *ptr = base_ptr;
|
||||
Castle *c = (Castle *)ptr;
|
||||
c->numRepeats = verify_u32(subs.size());
|
||||
c->exclusive = exclusive;
|
||||
c->pureExclusive = pureExclusive;
|
||||
c->numGroups = exclusiveInfo.numGroups;
|
||||
c->exclusive = verify_s8(exclusive);
|
||||
c->activeIdxSize = verify_u8(activeIdxSize);
|
||||
c->activeOffset = verify_u32(c->numGroups * activeIdxSize);
|
||||
c->groupIterOffset = groupIterOffset;
|
||||
|
||||
writeCastleScanEngine(cr, c);
|
||||
|
||||
@ -554,12 +626,22 @@ buildCastle(const CastleProto &proto,
|
||||
}
|
||||
|
||||
// set exclusive group info
|
||||
if (exclusiveGroup.find(i) != exclusiveGroup.end()) {
|
||||
sub->exclusive = 1;
|
||||
if (contains(exclusiveInfo.groupId, i)) {
|
||||
sub->exclusiveId = exclusiveInfo.groupId[i];
|
||||
} else {
|
||||
sub->exclusive = 0;
|
||||
sub->exclusiveId = numRepeats;
|
||||
}
|
||||
}
|
||||
|
||||
ptr = base_ptr + total_size - sizeof(NFA) - byte_length(stale_iter);
|
||||
|
||||
assert(ptr + byte_length(stale_iter) == base_ptr + total_size - sizeof(NFA));
|
||||
if (!stale_iter.empty()) {
|
||||
c->staleIterOffset = verify_u32(ptr - base_ptr);
|
||||
copy_bytes(ptr, stale_iter);
|
||||
ptr += byte_length(stale_iter);
|
||||
}
|
||||
|
||||
return nfa;
|
||||
}
|
||||
|
||||
@ -603,7 +685,7 @@ depth findMaxWidth(const CastleProto &proto, u32 top) {
|
||||
return proto.repeats.at(top).bounds.max;
|
||||
}
|
||||
|
||||
CastleProto::CastleProto(const PureRepeat &pr) {
|
||||
CastleProto::CastleProto(nfa_kind k, const PureRepeat &pr) : kind(k) {
|
||||
assert(pr.reach.any());
|
||||
assert(pr.reports.size() == 1);
|
||||
u32 top = 0;
|
||||
@ -665,6 +747,7 @@ u32 CastleProto::merge(const PureRepeat &pr) {
|
||||
bool mergeCastle(CastleProto &c1, const CastleProto &c2,
|
||||
map<u32, u32> &top_map) {
|
||||
assert(&c1 != &c2);
|
||||
assert(c1.kind == c2.kind);
|
||||
|
||||
DEBUG_PRINTF("c1 has %zu repeats, c2 has %zu repeats\n", c1.repeats.size(),
|
||||
c2.repeats.size());
|
||||
@ -738,6 +821,7 @@ bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2,
|
||||
ReportID report2) {
|
||||
assert(!c1.repeats.empty());
|
||||
assert(!c2.repeats.empty());
|
||||
assert(c1.kind == c2.kind);
|
||||
|
||||
if (c1.reach() != c2.reach()) {
|
||||
DEBUG_PRINTF("different reach\n");
|
||||
@ -784,6 +868,7 @@ bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2,
|
||||
bool is_equal(const CastleProto &c1, const CastleProto &c2) {
|
||||
assert(!c1.repeats.empty());
|
||||
assert(!c2.repeats.empty());
|
||||
assert(c1.kind == c2.kind);
|
||||
|
||||
if (c1.reach() != c2.reach()) {
|
||||
DEBUG_PRINTF("different reach\n");
|
||||
@ -877,7 +962,7 @@ bool hasZeroMinBound(const CastleProto &proto) {
|
||||
return false;
|
||||
}
|
||||
|
||||
unique_ptr<NGHolder> makeHolder(const CastleProto &proto, nfa_kind kind,
|
||||
unique_ptr<NGHolder> makeHolder(const CastleProto &proto,
|
||||
const CompileContext &cc) {
|
||||
assert(!proto.repeats.empty());
|
||||
|
||||
@ -890,10 +975,10 @@ unique_ptr<NGHolder> makeHolder(const CastleProto &proto, nfa_kind kind,
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<NGHolder> g = ue2::make_unique<NGHolder>(kind);
|
||||
auto g = ue2::make_unique<NGHolder>(proto.kind);
|
||||
|
||||
for (const auto &m : proto.repeats) {
|
||||
if (m.first >= CASTLE_MAX_TOPS) {
|
||||
if (m.first >= NFA_MAX_TOP_MASKS) {
|
||||
DEBUG_PRINTF("top %u too big for an NFA\n", m.first);
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -51,6 +51,7 @@ namespace ue2 {
|
||||
|
||||
class CharReach;
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
struct CompileContext;
|
||||
|
||||
/**
|
||||
@ -65,7 +66,7 @@ struct CompileContext;
|
||||
*/
|
||||
struct CastleProto {
|
||||
static constexpr size_t max_occupancy = 65536; // arbitrary limit
|
||||
explicit CastleProto(const PureRepeat &pr);
|
||||
CastleProto(nfa_kind k, const PureRepeat &pr);
|
||||
const CharReach &reach() const;
|
||||
|
||||
/** \brief Add a new repeat. */
|
||||
@ -94,6 +95,9 @@ struct CastleProto {
|
||||
* so we track this explicitly instead of using repeats.size().
|
||||
*/
|
||||
u32 next_top = 1;
|
||||
|
||||
/** \brief Kind for this engine. */
|
||||
nfa_kind kind;
|
||||
};
|
||||
|
||||
std::set<ReportID> all_reports(const CastleProto &proto);
|
||||
@ -119,7 +123,7 @@ void remapCastleTops(CastleProto &proto, std::map<u32, u32> &top_map);
|
||||
ue2::aligned_unique_ptr<NFA>
|
||||
buildCastle(const CastleProto &proto,
|
||||
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
|
||||
const CompileContext &cc);
|
||||
const CompileContext &cc, const ReportManager &rm);
|
||||
|
||||
/**
|
||||
* \brief Merge two CastleProto prototypes together, if possible.
|
||||
@ -155,7 +159,7 @@ bool requiresDedupe(const CastleProto &proto,
|
||||
/**
|
||||
* \brief Build an NGHolder from a CastleProto.
|
||||
*/
|
||||
std::unique_ptr<NGHolder> makeHolder(const CastleProto &castle, nfa_kind kind,
|
||||
std::unique_ptr<NGHolder> makeHolder(const CastleProto &castle,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -1049,15 +1049,16 @@ char nfaExecGough16_inAccept(const struct NFA *n, ReportID report,
|
||||
}
|
||||
|
||||
static
|
||||
void goughCheckEOD(const struct NFA *nfa, u16 s,
|
||||
char goughCheckEOD(const struct NFA *nfa, u16 s,
|
||||
const struct gough_som_info *som,
|
||||
u64a offset, SomNfaCallback cb, void *ctxt) {
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
||||
const struct mstate_aux *aux = get_aux(m, s);
|
||||
|
||||
if (aux->accept_eod) {
|
||||
doReports(cb, ctxt, m, som, s, offset, 1, NULL, NULL, NULL);
|
||||
if (!aux->accept_eod) {
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
return doReports(cb, ctxt, m, som, s, offset, 1, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
|
||||
@ -1065,8 +1066,8 @@ char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
|
||||
UNUSED NfaCallback callback,
|
||||
SomNfaCallback som_callback, void *context) {
|
||||
const struct gough_som_info *som = getSomInfoConst(state);
|
||||
goughCheckEOD(nfa, *(const u8 *)state, som, offset, som_callback, context);
|
||||
return 0;
|
||||
return goughCheckEOD(nfa, *(const u8 *)state, som, offset, som_callback,
|
||||
context);
|
||||
}
|
||||
|
||||
char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
|
||||
@ -1075,8 +1076,8 @@ char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
|
||||
SomNfaCallback som_callback, void *context) {
|
||||
assert(ISALIGNED_N(state, 8));
|
||||
const struct gough_som_info *som = getSomInfoConst(state);
|
||||
goughCheckEOD(nfa, *(const u16 *)state, som, offset, som_callback, context);
|
||||
return 0;
|
||||
return goughCheckEOD(nfa, *(const u16 *)state, som, offset, som_callback,
|
||||
context);
|
||||
}
|
||||
|
||||
char nfaExecGough8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -41,8 +41,9 @@
|
||||
#include "util/graph_range.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/verify_types.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
@ -77,18 +78,20 @@ namespace {
|
||||
|
||||
class gough_build_strat : public mcclellan_build_strat {
|
||||
public:
|
||||
gough_build_strat(raw_som_dfa &r, const GoughGraph &g,
|
||||
gough_build_strat(
|
||||
raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm,
|
||||
const map<dstate_id_t, gough_accel_state_info> &accel_info)
|
||||
: mcclellan_build_strat(r), rdfa(r), gg(g),
|
||||
: mcclellan_build_strat(r, rm), rdfa(r), gg(g),
|
||||
accel_gough_info(accel_info) {}
|
||||
unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */,
|
||||
vector<u32> &reports_eod /* out */,
|
||||
u8 *isSingleReport /* out */,
|
||||
ReportID *arbReport /* out */) const override;
|
||||
void find_escape_strings(dstate_id_t this_idx,
|
||||
escape_info *out) const override;
|
||||
AccelScheme find_escape_strings(dstate_id_t this_idx) const override;
|
||||
size_t accelSize(void) const override { return sizeof(gough_accel); }
|
||||
void buildAccel(dstate_id_t this_idx, void *accel_out) override;
|
||||
void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
|
||||
void *accel_out) override;
|
||||
u32 max_allowed_offset_accel() const override { return 0; }
|
||||
|
||||
raw_som_dfa &rdfa;
|
||||
const GoughGraph ≫
|
||||
@ -1034,7 +1037,8 @@ void update_accel_prog_offset(const gough_build_strat &gbs,
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
||||
const CompileContext &cc) {
|
||||
const CompileContext &cc,
|
||||
const ReportManager &rm) {
|
||||
assert(somPrecision == 2 || somPrecision == 4 || somPrecision == 8
|
||||
|| !cc.streaming);
|
||||
|
||||
@ -1066,7 +1070,7 @@ aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
||||
|
||||
map<dstate_id_t, gough_accel_state_info> accel_allowed;
|
||||
find_allowed_accel_states(*cfg, blocks, &accel_allowed);
|
||||
gough_build_strat gbs(raw, *cfg, accel_allowed);
|
||||
gough_build_strat gbs(raw, *cfg, rm, accel_allowed);
|
||||
aligned_unique_ptr<NFA> basic_dfa = mcclellanCompile_i(raw, gbs, cc);
|
||||
assert(basic_dfa);
|
||||
if (!basic_dfa) {
|
||||
@ -1145,32 +1149,44 @@ aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
||||
return gough_dfa;
|
||||
}
|
||||
|
||||
void gough_build_strat::find_escape_strings(dstate_id_t this_idx,
|
||||
escape_info *out) const {
|
||||
AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const {
|
||||
AccelScheme rv;
|
||||
if (!contains(accel_gough_info, this_idx)) {
|
||||
out->outs = CharReach::dot();
|
||||
out->outs2_broken = true;
|
||||
return;
|
||||
rv.cr = CharReach::dot();
|
||||
rv.double_byte.clear();
|
||||
return rv;
|
||||
}
|
||||
|
||||
mcclellan_build_strat::find_escape_strings(this_idx, out);
|
||||
rv = mcclellan_build_strat::find_escape_strings(this_idx);
|
||||
|
||||
if (!accel_gough_info.at(this_idx).two_byte) {
|
||||
out->outs2_broken = true;
|
||||
}
|
||||
assert(!rv.offset || rv.cr.all()); /* should have been limited by strat */
|
||||
if (rv.offset) {
|
||||
rv.cr = CharReach::dot();
|
||||
rv.double_byte.clear();
|
||||
return rv;
|
||||
}
|
||||
|
||||
void gough_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) {
|
||||
if (rv.double_offset
|
||||
|| !accel_gough_info.at(this_idx).two_byte) {
|
||||
rv.double_byte.clear();
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
void gough_build_strat::buildAccel(dstate_id_t this_idx, const AccelScheme &info,
|
||||
void *accel_out) {
|
||||
assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux));
|
||||
gough_accel *accel = (gough_accel *)accel_out;
|
||||
/* build a plain accelaux so we can work out where we can get to */
|
||||
mcclellan_build_strat::buildAccel(this_idx, &accel->accel);
|
||||
mcclellan_build_strat::buildAccel(this_idx, info, &accel->accel);
|
||||
DEBUG_PRINTF("state %hu is accel with type %hhu\n", this_idx,
|
||||
accel->accel.accel_type);
|
||||
if (accel->accel.accel_type == ACCEL_NONE) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert(!accel->accel.generic.offset);
|
||||
assert(contains(accel_gough_info, this_idx));
|
||||
accel->margin_dist = verify_u8(accel_gough_info.at(this_idx).margin);
|
||||
built_accel[accel] = this_idx;
|
||||
@ -1182,10 +1198,11 @@ namespace {
|
||||
struct raw_gough_report_list {
|
||||
set<som_report> reports;
|
||||
|
||||
explicit raw_gough_report_list(
|
||||
const vector<pair<ReportID, GoughSSAVar *>> &raw_reports) {
|
||||
raw_gough_report_list(
|
||||
const vector<pair<ReportID, GoughSSAVar *>> &raw_reports,
|
||||
const ReportManager &rm, bool do_remap) {
|
||||
for (const auto &m : raw_reports) {
|
||||
ReportID r = m.first;
|
||||
ReportID r = do_remap ? rm.getProgramOffset(m.first) : m.first;
|
||||
u32 impl_slot = INVALID_SLOT;
|
||||
if (m.second) {
|
||||
impl_slot = m.second->slot;
|
||||
@ -1214,11 +1231,13 @@ unique_ptr<raw_report_info> gough_build_strat::gatherReports(
|
||||
vector<u32> &reports_eod,
|
||||
u8 *isSingleReport,
|
||||
ReportID *arbReport) const {
|
||||
unique_ptr<raw_gough_report_info_impl> ri =
|
||||
ue2::make_unique<raw_gough_report_info_impl>();
|
||||
map<raw_gough_report_list, u32> rev;
|
||||
DEBUG_PRINTF("gathering reports\n");
|
||||
|
||||
const bool remap_reports = has_managed_reports(rdfa.kind);
|
||||
|
||||
auto ri = ue2::make_unique<raw_gough_report_info_impl>();
|
||||
map<raw_gough_report_list, u32> rev;
|
||||
|
||||
assert(!rdfa.states.empty());
|
||||
|
||||
vector<GoughVertex> verts(rdfa.states.size());
|
||||
@ -1237,7 +1256,7 @@ unique_ptr<raw_report_info> gough_build_strat::gatherReports(
|
||||
continue;
|
||||
}
|
||||
|
||||
raw_gough_report_list rrl(gg[v].reports);
|
||||
raw_gough_report_list rrl(gg[v].reports, rm, remap_reports);
|
||||
DEBUG_PRINTF("non empty r %zu\n", reports.size());
|
||||
if (rev.find(rrl) != rev.end()) {
|
||||
reports.push_back(rev[rrl]);
|
||||
@ -1256,7 +1275,7 @@ unique_ptr<raw_report_info> gough_build_strat::gatherReports(
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("non empty r eod\n");
|
||||
raw_gough_report_list rrl(gg[v].reports_eod);
|
||||
raw_gough_report_list rrl(gg[v].reports_eod, rm, remap_reports);
|
||||
if (rev.find(rrl) != rev.end()) {
|
||||
reports_eod.push_back(rev[rrl]);
|
||||
continue;
|
||||
|
@ -89,7 +89,8 @@ struct raw_som_dfa : public raw_dfa {
|
||||
};
|
||||
|
||||
aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
||||
const CompileContext &cc);
|
||||
const CompileContext &cc,
|
||||
const ReportManager &rm);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@ -130,6 +130,9 @@ char repeatIsDead(const struct RepeatInfo *info,
|
||||
return lstate->ctrl.ring.offset == REPEAT_DEAD;
|
||||
case REPEAT_TRAILER:
|
||||
return lstate->ctrl.trailer.offset == REPEAT_DEAD;
|
||||
case REPEAT_ALWAYS:
|
||||
assert(!"REPEAT_ALWAYS should only be used by Castle");
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(0);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -65,8 +65,7 @@ extern "C"
|
||||
void *state, u8 key); \
|
||||
char gf_name##_B_Reverse(const struct NFA *n, u64a offset, const u8 *buf, \
|
||||
size_t buflen, const u8 *hbuf, size_t hlen, \
|
||||
struct hs_scratch *scratch, NfaCallback cb, \
|
||||
void *context); \
|
||||
NfaCallback cb, void *context); \
|
||||
char gf_name##_queueCompressState(const struct NFA *nfa, \
|
||||
const struct mq *q, s64a loc); \
|
||||
char gf_name##_expandState(const struct NFA *nfa, void *dest, \
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -38,6 +38,9 @@
|
||||
#include "nfa_internal.h"
|
||||
#include "shufti.h"
|
||||
#include "truffle.h"
|
||||
#include "multishufti.h"
|
||||
#include "multitruffle.h"
|
||||
#include "multivermicelli.h"
|
||||
#include "ue2common.h"
|
||||
#include "vermicelli.h"
|
||||
#include "util/bitutils.h"
|
||||
@ -46,74 +49,6 @@
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
#include "util/shuffle_ssse3.h"
|
||||
|
||||
static
|
||||
const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) {
|
||||
assert(ISALIGNED(aux)); // must be SIMD aligned for shufti
|
||||
assert(end > ptr);
|
||||
assert(end - ptr >= 16); // must be at least 16 bytes to scan
|
||||
|
||||
const u8 *start = ptr;
|
||||
u8 offset;
|
||||
switch (aux->accel_type) {
|
||||
case ACCEL_VERM:
|
||||
DEBUG_PRINTF("single vermicelli for 0x%02hhx\n", aux->verm.c);
|
||||
offset = aux->verm.offset;
|
||||
ptr = vermicelliExec(aux->verm.c, 0, ptr, end);
|
||||
break;
|
||||
case ACCEL_VERM_NOCASE:
|
||||
DEBUG_PRINTF("single vermicelli-nocase for 0x%02hhx\n", aux->verm.c);
|
||||
offset = aux->verm.offset;
|
||||
ptr = vermicelliExec(aux->verm.c, 1, ptr, end);
|
||||
break;
|
||||
case ACCEL_DVERM:
|
||||
DEBUG_PRINTF("double vermicelli for 0x%02hhx%02hhx\n",
|
||||
aux->dverm.c1, aux->dverm.c2);
|
||||
offset = aux->dverm.offset;
|
||||
ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 0, ptr, end);
|
||||
break;
|
||||
case ACCEL_DVERM_NOCASE:
|
||||
DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n",
|
||||
aux->dverm.c1, aux->dverm.c2);
|
||||
offset = aux->dverm.offset;
|
||||
ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2,
|
||||
1, ptr, end);
|
||||
break;
|
||||
case ACCEL_SHUFTI:
|
||||
DEBUG_PRINTF("single shufti\n");
|
||||
offset = aux->shufti.offset;
|
||||
ptr = shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
|
||||
break;
|
||||
case ACCEL_DSHUFTI:
|
||||
DEBUG_PRINTF("double shufti\n");
|
||||
offset = aux->dshufti.offset;
|
||||
ptr = shuftiDoubleExec(aux->dshufti.lo1, aux->dshufti.hi1,
|
||||
aux->dshufti.lo2, aux->dshufti.hi2, ptr, end);
|
||||
break;
|
||||
case ACCEL_TRUFFLE:
|
||||
DEBUG_PRINTF("truffle shuffle\n");
|
||||
offset = aux->truffle.offset;
|
||||
ptr = truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end);
|
||||
break;
|
||||
case ACCEL_RED_TAPE:
|
||||
ptr = end; /* there is no escape */
|
||||
offset = aux->generic.offset;
|
||||
break;
|
||||
default:
|
||||
/* no acceleration, fall through and return current ptr */
|
||||
offset = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (offset) {
|
||||
ptr -= offset;
|
||||
if (ptr < start) {
|
||||
return start;
|
||||
}
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
|
||||
const u8 *input, u32 idx, size_t i, size_t end) {
|
||||
@ -134,7 +69,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
|
||||
}
|
||||
|
||||
aux = aux + aux_idx;
|
||||
const u8 *ptr = accelScan(aux, &input[i], &input[end]);
|
||||
const u8 *ptr = run_accel(aux, &input[i], &input[end]);
|
||||
assert(ptr >= &input[i]);
|
||||
size_t j = (size_t)(ptr - input);
|
||||
DEBUG_PRINTF("accel skipped %zu of %zu chars\n", (j - i), (end - i));
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -80,9 +80,11 @@ struct precalcAccel {
|
||||
CharReach double_cr;
|
||||
flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */
|
||||
u32 double_offset;
|
||||
|
||||
MultibyteAccelInfo ma_info;
|
||||
};
|
||||
|
||||
struct meteor_accel_info {
|
||||
struct limex_accel_info {
|
||||
ue2::unordered_set<NFAVertex> accelerable;
|
||||
map<NFAStateSet, precalcAccel> precalc;
|
||||
ue2::unordered_map<NFAVertex, flat_set<NFAVertex> > friends;
|
||||
@ -162,7 +164,7 @@ struct build_info {
|
||||
bool stateCompression;
|
||||
const CompileContext &cc;
|
||||
u32 num_states;
|
||||
meteor_accel_info accel;
|
||||
limex_accel_info accel;
|
||||
};
|
||||
|
||||
// Constants for scoring mechanism
|
||||
@ -334,12 +336,16 @@ void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
|
||||
}
|
||||
|
||||
struct AccelBuild {
|
||||
AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0) {}
|
||||
AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0), ma_len1(0),
|
||||
ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {}
|
||||
NFAVertex v;
|
||||
u32 state;
|
||||
u32 offset; // offset correction to apply
|
||||
CharReach stop1; // single-byte accel stop literals
|
||||
flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals
|
||||
u32 ma_len1; // multiaccel len1
|
||||
u32 ma_len2; // multiaccel len2
|
||||
MultibyteAccelInfo::multiaccel_type ma_type; // multiaccel type
|
||||
};
|
||||
|
||||
static
|
||||
@ -354,7 +360,12 @@ void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) {
|
||||
build.stop1 = CharReach::dot();
|
||||
} else {
|
||||
const precalcAccel &precalc = bi.accel.precalc.at(ss);
|
||||
if (precalc.double_lits.empty()) {
|
||||
unsigned ma_len = precalc.ma_info.len1 + precalc.ma_info.len2;
|
||||
if (ma_len >= MULTIACCEL_MIN_LEN) {
|
||||
build.ma_len1 = precalc.ma_info.len1;
|
||||
build.stop1 = precalc.ma_info.cr;
|
||||
build.offset = precalc.ma_info.offset;
|
||||
} else if (precalc.double_lits.empty()) {
|
||||
build.stop1 = precalc.single_cr;
|
||||
build.offset = precalc.single_offset;
|
||||
} else {
|
||||
@ -534,7 +545,7 @@ void filterAccelStates(NGHolder &g, const map<u32, NFAVertex> &tops,
|
||||
}
|
||||
|
||||
static
|
||||
bool containsBadSubset(const meteor_accel_info &accel,
|
||||
bool containsBadSubset(const limex_accel_info &accel,
|
||||
const NFAStateSet &state_set, const u32 effective_sds) {
|
||||
NFAStateSet subset(state_set.size());
|
||||
for (size_t j = state_set.find_first(); j != state_set.npos;
|
||||
@ -555,11 +566,29 @@ bool containsBadSubset(const meteor_accel_info &accel,
|
||||
}
|
||||
|
||||
static
|
||||
void doAccelCommon(NGHolder &g,
|
||||
ue2::unordered_map<NFAVertex, AccelScheme> &accel_map,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||
const u32 num_states, meteor_accel_info *accel) {
|
||||
bool is_too_wide(const AccelScheme &as) {
|
||||
return as.cr.count() > MAX_MERGED_ACCEL_STOPS;
|
||||
}
|
||||
|
||||
static
|
||||
void fillAccelInfo(build_info &bi) {
|
||||
if (!bi.do_accel) {
|
||||
return;
|
||||
}
|
||||
|
||||
NGHolder &g = bi.h;
|
||||
limex_accel_info &accel = bi.accel;
|
||||
unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map;
|
||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic;
|
||||
const CompileContext &cc = bi.cc;
|
||||
const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids;
|
||||
const u32 num_states = bi.num_states;
|
||||
|
||||
nfaFindAccelSchemes(g, br_cyclic, &accel_map);
|
||||
filterAccelStates(g, bi.tops, &accel_map);
|
||||
|
||||
assert(accel_map.size() <= NFA_MAX_ACCEL_STATES);
|
||||
|
||||
vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
|
||||
|
||||
vector<NFAVertex> astates;
|
||||
@ -590,7 +619,7 @@ void doAccelCommon(NGHolder &g,
|
||||
}
|
||||
}
|
||||
|
||||
if (containsBadSubset(*accel, state_set, effective_sds)) {
|
||||
if (containsBadSubset(accel, state_set, effective_sds)) {
|
||||
DEBUG_PRINTF("accel %u has bad subset\n", i);
|
||||
continue; /* if a subset failed to build we would too */
|
||||
}
|
||||
@ -598,30 +627,37 @@ void doAccelCommon(NGHolder &g,
|
||||
const bool allow_wide = allow_wide_accel(states, g, sds_or_proxy);
|
||||
|
||||
AccelScheme as = nfaFindAccel(g, states, refined_cr, br_cyclic,
|
||||
allow_wide);
|
||||
if (as.cr.count() > MAX_MERGED_ACCEL_STOPS) {
|
||||
allow_wide, true);
|
||||
if (is_too_wide(as)) {
|
||||
DEBUG_PRINTF("accel %u too wide (%zu, %d)\n", i,
|
||||
as.cr.count(), MAX_MERGED_ACCEL_STOPS);
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("accel %u ok with offset %u\n", i, as.offset);
|
||||
DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset,
|
||||
as.double_offset);
|
||||
|
||||
precalcAccel &pa = accel->precalc[state_set];
|
||||
pa.single_offset = as.offset;
|
||||
pa.single_cr = as.cr;
|
||||
// try multibyte acceleration first
|
||||
MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc);
|
||||
|
||||
precalcAccel &pa = accel.precalc[state_set];
|
||||
useful |= state_set;
|
||||
|
||||
if (states.size() == 1) {
|
||||
DoubleAccelInfo b = findBestDoubleAccelInfo(g, states.front());
|
||||
if (pa.single_cr.count() > b.stop1.count()) {
|
||||
/* insert this information into the precalc accel info as it is
|
||||
* better than the single scheme */
|
||||
pa.double_offset = b.offset;
|
||||
pa.double_lits = b.stop2;
|
||||
pa.double_cr = b.stop1;
|
||||
}
|
||||
// if we successfully built a multibyte accel scheme, use that
|
||||
if (mai.type != MultibyteAccelInfo::MAT_NONE) {
|
||||
pa.ma_info = mai;
|
||||
|
||||
DEBUG_PRINTF("multibyte acceleration!\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
pa.single_offset = as.offset;
|
||||
pa.single_cr = as.cr;
|
||||
if (as.double_byte.size() != 0) {
|
||||
pa.double_offset = as.double_offset;
|
||||
pa.double_lits = as.double_byte;
|
||||
pa.double_cr = as.double_cr;
|
||||
};
|
||||
}
|
||||
|
||||
for (const auto &m : accel_map) {
|
||||
@ -638,29 +674,20 @@ void doAccelCommon(NGHolder &g,
|
||||
state_set.reset();
|
||||
state_set.set(state_id);
|
||||
|
||||
auto p_it = accel->precalc.find(state_set);
|
||||
if (p_it != accel->precalc.end()) {
|
||||
bool is_multi = false;
|
||||
auto p_it = accel.precalc.find(state_set);
|
||||
if (p_it != accel.precalc.end()) {
|
||||
const precalcAccel &pa = p_it->second;
|
||||
offset = max(pa.double_offset, pa.single_offset);
|
||||
is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE;
|
||||
assert(offset <= MAX_ACCEL_DEPTH);
|
||||
}
|
||||
|
||||
accel->accelerable.insert(v);
|
||||
findAccelFriends(g, v, br_cyclic, offset, &accel->friends[v]);
|
||||
accel.accelerable.insert(v);
|
||||
if (!is_multi) {
|
||||
findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void fillAccelInfo(build_info &bi) {
|
||||
if (!bi.do_accel) {
|
||||
return;
|
||||
}
|
||||
|
||||
nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map);
|
||||
filterAccelStates(bi.h, bi.tops, &bi.accel.accel_map);
|
||||
assert(bi.accel.accel_map.size() <= NFA_MAX_ACCEL_STATES);
|
||||
doAccelCommon(bi.h, bi.accel.accel_map, bi.state_ids, bi.br_cyclic,
|
||||
bi.num_states, &bi.accel);
|
||||
}
|
||||
|
||||
/** The AccelAux structure has large alignment specified, and this makes some
|
||||
@ -672,7 +699,7 @@ static
|
||||
void buildAccel(const build_info &args, NFAStateSet &accelMask,
|
||||
NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec,
|
||||
vector<u8> &accelTable) {
|
||||
const meteor_accel_info &accel = args.accel;
|
||||
const limex_accel_info &accel = args.accel;
|
||||
|
||||
// Init, all zeroes.
|
||||
accelMask.resize(args.num_states);
|
||||
@ -737,9 +764,17 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask,
|
||||
|
||||
if (contains(accel.precalc, states)) {
|
||||
const precalcAccel &precalc = accel.precalc.at(states);
|
||||
if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) {
|
||||
ainfo.ma_len1 = precalc.ma_info.len1;
|
||||
ainfo.ma_len2 = precalc.ma_info.len2;
|
||||
ainfo.multiaccel_offset = precalc.ma_info.offset;
|
||||
ainfo.multiaccel_stops = precalc.ma_info.cr;
|
||||
ainfo.ma_type = precalc.ma_info.type;
|
||||
} else {
|
||||
ainfo.single_offset = precalc.single_offset;
|
||||
ainfo.single_stops = precalc.single_cr;
|
||||
}
|
||||
}
|
||||
|
||||
buildAccelAux(ainfo, &aux);
|
||||
|
||||
@ -2152,7 +2187,7 @@ u32 countAccelStates(NGHolder &h,
|
||||
|
||||
if (!cc.grey.allowLimExNFA) {
|
||||
DEBUG_PRINTF("limex not allowed\n");
|
||||
return NFA_MAX_ACCEL_STATES + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Sanity check the input data.
|
||||
@ -2166,11 +2201,11 @@ u32 countAccelStates(NGHolder &h,
|
||||
do_accel, state_compression, cc, num_states);
|
||||
|
||||
// Acceleration analysis.
|
||||
fillAccelInfo(bi);
|
||||
nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map);
|
||||
|
||||
u32 num_accel = verify_u32(bi.accel.accelerable.size());
|
||||
u32 num_accel = verify_u32(bi.accel.accel_map.size());
|
||||
DEBUG_PRINTF("found %u accel states\n", num_accel);
|
||||
return min(num_accel, (u32)NFA_MAX_ACCEL_STATES);
|
||||
return num_accel;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -79,11 +79,10 @@ aligned_unique_ptr<NFA> generate(NGHolder &g,
|
||||
const CompileContext &cc);
|
||||
|
||||
/**
|
||||
* \brief For a given graph, count the number of accel states it will have in
|
||||
* an implementation.
|
||||
* \brief For a given graph, count the number of accelerable states it has.
|
||||
*
|
||||
* \return the number of accel states, or NFA_MAX_ACCEL_STATES + 1 if an
|
||||
* implementation would not be constructible.
|
||||
* Note that this number may be greater than the number that are actually
|
||||
* implementable.
|
||||
*/
|
||||
u32 countAccelStates(NGHolder &h,
|
||||
const ue2::unordered_map<NFAVertex, u32> &states,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -42,7 +42,6 @@
|
||||
#include "limex_internal.h"
|
||||
#include "nfa_api_util.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "scratch.h"
|
||||
#include "util/uniform_ops.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -176,8 +176,6 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
||||
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
|
||||
const ReportID *exReports = getExReports(limex);
|
||||
const u32 *exceptionMap = limex->exceptionMap;
|
||||
assert(ISALIGNED_CL(ctx));
|
||||
assert(ISALIGNED_CL(&ctx->s));
|
||||
STATE_T s = LOAD_STATE(&ctx->s);
|
||||
|
||||
/* assert(ISALIGNED_16(exceptions)); */
|
||||
@ -533,17 +531,17 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
|
||||
|
||||
assert(q->cur + 1 < q->end); /* require at least two items */
|
||||
|
||||
struct CONTEXT_T *ctx = q->scratch->nfaContext;
|
||||
assert(ISALIGNED_CL(ctx));
|
||||
ctx->repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||
ctx->repeat_state = q->streamState + limex->stateSize;
|
||||
ctx->callback = q->cb;
|
||||
ctx->context = q->context;
|
||||
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
|
||||
struct CONTEXT_T ctx;
|
||||
ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||
ctx.repeat_state = q->streamState + limex->stateSize;
|
||||
ctx.callback = q->cb;
|
||||
ctx.context = q->context;
|
||||
STORE_STATE(&ctx.cached_estate, ZERO_STATE);
|
||||
ctx.cached_br = 0;
|
||||
|
||||
assert(q->items[q->cur].location >= 0);
|
||||
DEBUG_PRINTF("LOAD STATE\n");
|
||||
STORE_STATE(&ctx->s, LOAD_STATE(q->state));
|
||||
STORE_STATE(&ctx.s, LOAD_STATE(q->state));
|
||||
assert(q->items[q->cur].type == MQE_START);
|
||||
|
||||
u64a offset = q->offset;
|
||||
@ -565,7 +563,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
|
||||
/* do main buffer region */
|
||||
DEBUG_PRINTF("MAIN BUFFER SCAN\n");
|
||||
assert(ep - offset <= q->length);
|
||||
if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, ctx, sp)
|
||||
if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp)
|
||||
== MO_HALT_MATCHING) {
|
||||
STORE_STATE(q->state, ZERO_STATE);
|
||||
return 0;
|
||||
@ -584,19 +582,19 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
|
||||
q->items[q->cur].type = MQE_START;
|
||||
q->items[q->cur].location = sp - offset;
|
||||
DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end);
|
||||
STORE_STATE(q->state, LOAD_STATE(&ctx->s));
|
||||
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
|
||||
return MO_ALIVE;
|
||||
}
|
||||
|
||||
JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, ctx, sp);
|
||||
JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp);
|
||||
|
||||
q->cur++;
|
||||
}
|
||||
|
||||
EXPIRE_ESTATE_FN(limex, ctx, sp);
|
||||
EXPIRE_ESTATE_FN(limex, &ctx, sp);
|
||||
|
||||
DEBUG_PRINTF("END\n");
|
||||
STORE_STATE(q->state, LOAD_STATE(&ctx->s));
|
||||
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
|
||||
|
||||
if (q->cur != q->end) {
|
||||
q->cur--;
|
||||
@ -605,7 +603,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
|
||||
return MO_ALIVE;
|
||||
}
|
||||
|
||||
return ISNONZERO_STATE(LOAD_STATE(&ctx->s));
|
||||
return ISNONZERO_STATE(LOAD_STATE(&ctx.s));
|
||||
}
|
||||
|
||||
/* used by suffix execution in Rose */
|
||||
@ -628,16 +626,16 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
|
||||
|
||||
assert(q->cur + 1 < q->end); /* require at least two items */
|
||||
|
||||
struct CONTEXT_T *ctx = q->scratch->nfaContext;
|
||||
assert(ISALIGNED_CL(ctx));
|
||||
ctx->repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||
ctx->repeat_state = q->streamState + limex->stateSize;
|
||||
ctx->callback = q->cb;
|
||||
ctx->context = q->context;
|
||||
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
|
||||
struct CONTEXT_T ctx;
|
||||
ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||
ctx.repeat_state = q->streamState + limex->stateSize;
|
||||
ctx.callback = q->cb;
|
||||
ctx.context = q->context;
|
||||
STORE_STATE(&ctx.cached_estate, ZERO_STATE);
|
||||
ctx.cached_br = 0;
|
||||
|
||||
DEBUG_PRINTF("LOAD STATE\n");
|
||||
STORE_STATE(&ctx->s, LOAD_STATE(q->state));
|
||||
STORE_STATE(&ctx.s, LOAD_STATE(q->state));
|
||||
assert(q->items[q->cur].type == MQE_START);
|
||||
|
||||
u64a offset = q->offset;
|
||||
@ -661,7 +659,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
|
||||
/* do main buffer region */
|
||||
u64a final_look = 0;
|
||||
assert(ep - offset <= q->length);
|
||||
if (STREAMFIRST_FN(limex, q->buffer + sp - offset, ep - sp, ctx, sp,
|
||||
if (STREAMFIRST_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp,
|
||||
&final_look) == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu offset:%llu\n",
|
||||
final_look, sp, end_abs, offset);
|
||||
@ -669,7 +667,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
|
||||
q->cur--;
|
||||
q->items[q->cur].type = MQE_START;
|
||||
q->items[q->cur].location = sp + final_look - offset;
|
||||
STORE_STATE(q->state, LOAD_STATE(&ctx->s));
|
||||
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
|
||||
@ -685,19 +683,19 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
|
||||
q->items[q->cur].type = MQE_START;
|
||||
q->items[q->cur].location = sp - offset;
|
||||
DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end);
|
||||
STORE_STATE(q->state, LOAD_STATE(&ctx->s));
|
||||
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
|
||||
return MO_ALIVE;
|
||||
}
|
||||
|
||||
JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, ctx, sp);
|
||||
JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp);
|
||||
|
||||
q->cur++;
|
||||
}
|
||||
|
||||
EXPIRE_ESTATE_FN(limex, ctx, sp);
|
||||
EXPIRE_ESTATE_FN(limex, &ctx, sp);
|
||||
|
||||
DEBUG_PRINTF("END\n");
|
||||
STORE_STATE(q->state, LOAD_STATE(&ctx->s));
|
||||
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
|
||||
|
||||
if (q->cur != q->end) {
|
||||
q->cur--;
|
||||
@ -706,7 +704,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
|
||||
return MO_ALIVE;
|
||||
}
|
||||
|
||||
return ISNONZERO_STATE(LOAD_STATE(&ctx->s));
|
||||
return ISNONZERO_STATE(LOAD_STATE(&ctx.s));
|
||||
}
|
||||
|
||||
// Used for execution Rose prefix/infixes.
|
||||
@ -720,15 +718,16 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
|
||||
|
||||
assert(q->cur + 1 < q->end); /* require at least two items */
|
||||
|
||||
struct CONTEXT_T *ctx = q->scratch->nfaContext;
|
||||
ctx->repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||
ctx->repeat_state = q->streamState + limex->stateSize;
|
||||
ctx->callback = NULL;
|
||||
ctx->context = NULL;
|
||||
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
|
||||
struct CONTEXT_T ctx;
|
||||
ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||
ctx.repeat_state = q->streamState + limex->stateSize;
|
||||
ctx.callback = NULL;
|
||||
ctx.context = NULL;
|
||||
STORE_STATE(&ctx.cached_estate, ZERO_STATE);
|
||||
ctx.cached_br = 0;
|
||||
|
||||
DEBUG_PRINTF("LOAD STATE\n");
|
||||
STORE_STATE(&ctx->s, LOAD_STATE(q->state));
|
||||
STORE_STATE(&ctx.s, LOAD_STATE(q->state));
|
||||
assert(q->items[q->cur].type == MQE_START);
|
||||
|
||||
u64a offset = q->offset;
|
||||
@ -740,7 +739,7 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
|
||||
if (n->maxWidth) {
|
||||
if (ep - sp > n->maxWidth) {
|
||||
sp = ep - n->maxWidth;
|
||||
STORE_STATE(&ctx->s, INITIAL_FN(limex, !!sp));
|
||||
STORE_STATE(&ctx.s, INITIAL_FN(limex, !!sp));
|
||||
}
|
||||
}
|
||||
assert(ep >= sp);
|
||||
@ -751,7 +750,7 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
|
||||
u64a local_ep = MIN(offset, ep);
|
||||
/* we are starting inside the history buffer */
|
||||
STREAMSILENT_FN(limex, q->history + q->hlength + sp - offset,
|
||||
local_ep - sp, ctx, sp);
|
||||
local_ep - sp, &ctx, sp);
|
||||
|
||||
sp = local_ep;
|
||||
}
|
||||
@ -763,30 +762,30 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
|
||||
/* do main buffer region */
|
||||
DEBUG_PRINTF("MAIN BUFFER SCAN\n");
|
||||
assert(ep - offset <= q->length);
|
||||
STREAMSILENT_FN(limex, q->buffer + sp - offset, ep - sp, ctx, sp);
|
||||
STREAMSILENT_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp);
|
||||
|
||||
DEBUG_PRINTF("SCAN DONE\n");
|
||||
scan_done:
|
||||
sp = ep;
|
||||
|
||||
JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, ctx, sp);
|
||||
JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp);
|
||||
|
||||
q->cur++;
|
||||
}
|
||||
|
||||
EXPIRE_ESTATE_FN(limex, ctx, sp);
|
||||
EXPIRE_ESTATE_FN(limex, &ctx, sp);
|
||||
|
||||
DEBUG_PRINTF("END, nfa is %s\n",
|
||||
ISNONZERO_STATE(ctx->s) ? "still alive" : "dead");
|
||||
ISNONZERO_STATE(ctx.s) ? "still alive" : "dead");
|
||||
|
||||
STORE_STATE(q->state, LOAD_STATE(&ctx->s));
|
||||
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
|
||||
|
||||
if (JOIN(limexInAccept, SIZE)(limex, LOAD_STATE(&ctx->s), ctx->repeat_ctrl,
|
||||
ctx->repeat_state, sp + 1, report)) {
|
||||
if (JOIN(limexInAccept, SIZE)(limex, LOAD_STATE(&ctx.s), ctx.repeat_ctrl,
|
||||
ctx.repeat_state, sp + 1, report)) {
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
|
||||
return ISNONZERO_STATE(LOAD_STATE(&ctx->s));
|
||||
return ISNONZERO_STATE(LOAD_STATE(&ctx.s));
|
||||
}
|
||||
|
||||
char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
|
||||
@ -815,40 +814,38 @@ char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) {
|
||||
char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset,
|
||||
const u8 *buf, size_t buflen,
|
||||
const u8 *hbuf, size_t hlen,
|
||||
struct hs_scratch *scratch,
|
||||
NfaCallback cb, void *context) {
|
||||
assert(buf || hbuf);
|
||||
assert(buflen || hlen);
|
||||
|
||||
/* This may be called INSIDE another NFA, so we need a separate
|
||||
* context --> Hence the nfaContextSom */
|
||||
struct CONTEXT_T *ctx = scratch->nfaContextSom;
|
||||
ctx->repeat_ctrl = NULL;
|
||||
ctx->repeat_state = NULL;
|
||||
ctx->callback = cb;
|
||||
ctx->context = context;
|
||||
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
|
||||
struct CONTEXT_T ctx;
|
||||
ctx.repeat_ctrl = NULL;
|
||||
ctx.repeat_state = NULL;
|
||||
ctx.callback = cb;
|
||||
ctx.context = context;
|
||||
STORE_STATE(&ctx.cached_estate, ZERO_STATE);
|
||||
ctx.cached_br = 0;
|
||||
|
||||
const IMPL_NFA_T *limex = getImplNfa(n);
|
||||
STORE_STATE(&ctx->s, INITIAL_FN(limex, 0)); // always anchored
|
||||
STORE_STATE(&ctx.s, INITIAL_FN(limex, 0)); // always anchored
|
||||
|
||||
// 'buf' may be null, for example when we're scanning at EOD time.
|
||||
if (buflen) {
|
||||
assert(buf);
|
||||
DEBUG_PRINTF("MAIN BUFFER SCAN, %zu bytes\n", buflen);
|
||||
offset -= buflen;
|
||||
REV_STREAM_FN(limex, buf, buflen, ctx, offset);
|
||||
REV_STREAM_FN(limex, buf, buflen, &ctx, offset);
|
||||
}
|
||||
|
||||
if (hlen) {
|
||||
assert(hbuf);
|
||||
DEBUG_PRINTF("HISTORY BUFFER SCAN, %zu bytes\n", hlen);
|
||||
offset -= hlen;
|
||||
REV_STREAM_FN(limex, hbuf, hlen, ctx, offset);
|
||||
REV_STREAM_FN(limex, hbuf, hlen, &ctx, offset);
|
||||
}
|
||||
|
||||
if (offset == 0 && ISNONZERO_STATE(LOAD_STATE(&ctx->s))) {
|
||||
TESTEOD_REV_FN(limex, &ctx->s, offset, cb, context);
|
||||
if (offset == 0 && ISNONZERO_STATE(LOAD_STATE(&ctx.s))) {
|
||||
TESTEOD_REV_FN(limex, &ctx.s, offset, cb, context);
|
||||
}
|
||||
|
||||
// NOTE: return value is unused.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -445,14 +445,15 @@ char mcclellanExec8_i_ni(const struct mcclellan *m, u8 *state, const u8 *buf,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void mcclellanCheckEOD(const struct NFA *nfa, u16 s, u64a offset,
|
||||
char mcclellanCheckEOD(const struct NFA *nfa, u16 s, u64a offset,
|
||||
NfaCallback cb, void *ctxt) {
|
||||
const struct mcclellan *m = getImplNfa(nfa);
|
||||
const struct mstate_aux *aux = get_aux(m, s);
|
||||
|
||||
if (aux->accept_eod) {
|
||||
doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL);
|
||||
if (!aux->accept_eod) {
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
return doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
@ -1019,42 +1020,44 @@ void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
|
||||
const u8 *buf, char top, size_t start_off,
|
||||
size_t len, NfaCallback cb, void *ctxt) {
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
||||
if (top) {
|
||||
*(u8 *)state = m->start_anchored;
|
||||
}
|
||||
|
||||
u8 s = top ? m->start_anchored : *(u8 *)state;
|
||||
|
||||
if (m->flags & MCCLELLAN_FLAG_SINGLE) {
|
||||
mcclellanExec8_i(m, (u8 *)state, buf + start_off, len - start_off,
|
||||
mcclellanExec8_i(m, &s, buf + start_off, len - start_off,
|
||||
start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT);
|
||||
} else {
|
||||
mcclellanExec8_i(m, (u8 *)state, buf + start_off, len - start_off,
|
||||
mcclellanExec8_i(m, &s, buf + start_off, len - start_off,
|
||||
start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT);
|
||||
}
|
||||
|
||||
*(u8 *)state = s;
|
||||
}
|
||||
|
||||
void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
|
||||
const u8 *buf, char top, size_t start_off,
|
||||
size_t len, NfaCallback cb, void *ctxt) {
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
||||
if (top) {
|
||||
*(u16 *)state = m->start_anchored;
|
||||
}
|
||||
|
||||
u16 s = top ? m->start_anchored : unaligned_load_u16(state);
|
||||
|
||||
if (m->flags & MCCLELLAN_FLAG_SINGLE) {
|
||||
mcclellanExec16_i(m, (u16 *)state, buf + start_off, len - start_off,
|
||||
mcclellanExec16_i(m, &s, buf + start_off, len - start_off,
|
||||
start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT);
|
||||
} else {
|
||||
mcclellanExec16_i(m, (u16 *)state, buf + start_off, len - start_off,
|
||||
mcclellanExec16_i(m, &s, buf + start_off, len - start_off,
|
||||
start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT);
|
||||
}
|
||||
|
||||
unaligned_store_u16(state, s);
|
||||
}
|
||||
|
||||
char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
|
||||
UNUSED const char *streamState,
|
||||
u64a offset, NfaCallback callback,
|
||||
UNUSED SomNfaCallback som_cb, void *context) {
|
||||
mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback, context);
|
||||
return 0;
|
||||
return mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback,
|
||||
context);
|
||||
}
|
||||
|
||||
char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
|
||||
@ -1062,8 +1065,8 @@ char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
|
||||
u64a offset, NfaCallback callback,
|
||||
UNUSED SomNfaCallback som_cb, void *context) {
|
||||
assert(ISALIGNED_N(state, 2));
|
||||
mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback, context);
|
||||
return 0;
|
||||
return mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback,
|
||||
context);
|
||||
}
|
||||
|
||||
char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -29,8 +29,11 @@
|
||||
#include "mcclellancompile.h"
|
||||
|
||||
#include "accel.h"
|
||||
#include "accelcompile.h"
|
||||
#include "grey.h"
|
||||
#include "mcclellan_internal.h"
|
||||
#include "mcclellancompile_accel.h"
|
||||
#include "mcclellancompile_util.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "shufticompile.h"
|
||||
#include "trufflecompile.h"
|
||||
@ -43,6 +46,7 @@
|
||||
#include "util/container.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/unaligned.h"
|
||||
#include "util/verify_types.h"
|
||||
@ -56,25 +60,18 @@
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::adaptors::map_keys;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/* compile time accel defs */
|
||||
#define ACCEL_MAX_STOP_CHAR 160 /* larger than nfa, as we don't have a budget
|
||||
and the nfa cheats on stop characters for
|
||||
sets of states */
|
||||
#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
|
||||
|
||||
|
||||
namespace /* anon */ {
|
||||
|
||||
struct dstate_extra {
|
||||
u16 daddytaken;
|
||||
bool shermanState;
|
||||
bool accelerable;
|
||||
dstate_extra(void) : daddytaken(0), shermanState(false),
|
||||
accelerable(false) {}
|
||||
u16 daddytaken = 0;
|
||||
bool shermanState = false;
|
||||
};
|
||||
|
||||
struct dfa_info {
|
||||
@ -105,10 +102,6 @@ struct dfa_info {
|
||||
return extra[raw_id].shermanState;
|
||||
}
|
||||
|
||||
bool is_accel(dstate_id_t raw_id) const {
|
||||
return extra[raw_id].accelerable;
|
||||
}
|
||||
|
||||
size_t size(void) const { return states.size(); }
|
||||
};
|
||||
|
||||
@ -135,6 +128,13 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) {
|
||||
return aux;
|
||||
}
|
||||
|
||||
static
|
||||
bool double_byte_ok(const AccelScheme &info) {
|
||||
return !info.double_byte.empty()
|
||||
&& info.double_cr.count() < info.double_byte.size()
|
||||
&& info.double_cr.count() <= 2 && !info.double_byte.empty();
|
||||
}
|
||||
|
||||
static
|
||||
void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
||||
assert((size_t)succ_table % 2 == 0);
|
||||
@ -186,75 +186,45 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
||||
}
|
||||
}
|
||||
|
||||
void mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx,
|
||||
escape_info *out) const {
|
||||
const dstate &raw = rdfa.states[this_idx];
|
||||
const auto &alpha_remap = rdfa.alpha_remap;
|
||||
|
||||
flat_set<pair<u8, u8>> outs2_local;
|
||||
for (unsigned i = 0; i < N_CHARS; i++) {
|
||||
outs2_local.clear();
|
||||
|
||||
if (raw.next[alpha_remap[i]] != this_idx) {
|
||||
out->outs.set(i);
|
||||
|
||||
DEBUG_PRINTF("next is %hu\n", raw.next[alpha_remap[i]]);
|
||||
const dstate &raw_next = rdfa.states[raw.next[alpha_remap[i]]];
|
||||
|
||||
if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) {
|
||||
DEBUG_PRINTF("leads to report\n");
|
||||
out->outs2_broken = true; /* cannot accelerate over reports */
|
||||
u32 mcclellan_build_strat::max_allowed_offset_accel() const {
|
||||
return ACCEL_DFA_MAX_OFFSET_DEPTH;
|
||||
}
|
||||
|
||||
for (unsigned j = 0; !out->outs2_broken && j < N_CHARS; j++) {
|
||||
if (raw_next.next[alpha_remap[j]] == raw.next[alpha_remap[j]]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("adding %02x %02x -> %hu to 2 \n", i, j,
|
||||
raw_next.next[alpha_remap[j]]);
|
||||
outs2_local.emplace((u8)i, (u8)j);
|
||||
}
|
||||
|
||||
if (outs2_local.size() > 8) {
|
||||
DEBUG_PRINTF("adding %02x to outs2_single\n", i);
|
||||
out->outs2_single.set(i);
|
||||
} else {
|
||||
insert(&out->outs2, outs2_local);
|
||||
}
|
||||
if (out->outs2.size() > 8) {
|
||||
DEBUG_PRINTF("outs2 too big\n");
|
||||
out->outs2_broken = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
AccelScheme mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx)
|
||||
const {
|
||||
return find_mcclellan_escape_info(rdfa, this_idx,
|
||||
max_allowed_offset_accel());
|
||||
}
|
||||
|
||||
/** builds acceleration schemes for states */
|
||||
void mcclellan_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) {
|
||||
void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
const AccelScheme &info,
|
||||
void *accel_out) {
|
||||
AccelAux *accel = (AccelAux *)accel_out;
|
||||
escape_info out;
|
||||
|
||||
find_escape_strings(this_idx, &out);
|
||||
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
|
||||
info.double_offset);
|
||||
accel->generic.offset = verify_u8(info.offset);
|
||||
|
||||
if (!out.outs2_broken && out.outs2_single.none()
|
||||
&& out.outs2.size() == 1) {
|
||||
if (double_byte_ok(info) && info.double_cr.none()
|
||||
&& info.double_byte.size() == 1) {
|
||||
accel->accel_type = ACCEL_DVERM;
|
||||
accel->dverm.c1 = out.outs2.begin()->first;
|
||||
accel->dverm.c2 = out.outs2.begin()->second;
|
||||
accel->dverm.c1 = info.double_byte.begin()->first;
|
||||
accel->dverm.c2 = info.double_byte.begin()->second;
|
||||
accel->dverm.offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!out.outs2_broken && out.outs2_single.none()
|
||||
&& (out.outs2.size() == 2 || out.outs2.size() == 4)) {
|
||||
if (double_byte_ok(info) && info.double_cr.none()
|
||||
&& (info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
|
||||
bool ok = true;
|
||||
|
||||
assert(!out.outs2.empty());
|
||||
u8 firstC = out.outs2.begin()->first & CASE_CLEAR;
|
||||
u8 secondC = out.outs2.begin()->second & CASE_CLEAR;
|
||||
assert(!info.double_byte.empty());
|
||||
u8 firstC = info.double_byte.begin()->first & CASE_CLEAR;
|
||||
u8 secondC = info.double_byte.begin()->second & CASE_CLEAR;
|
||||
|
||||
for (const pair<u8, u8> &p : out.outs2) {
|
||||
for (const pair<u8, u8> &p : info.double_byte) {
|
||||
if ((p.first & CASE_CLEAR) != firstC
|
||||
|| (p.second & CASE_CLEAR) != secondC) {
|
||||
ok = false;
|
||||
@ -266,185 +236,76 @@ void mcclellan_build_strat::buildAccel(dstate_id_t this_idx, void *accel_out) {
|
||||
accel->accel_type = ACCEL_DVERM_NOCASE;
|
||||
accel->dverm.c1 = firstC;
|
||||
accel->dverm.c2 = secondC;
|
||||
accel->dverm.offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
u8 m1;
|
||||
u8 m2;
|
||||
if (buildDvermMask(info.double_byte, &m1, &m2)) {
|
||||
accel->accel_type = ACCEL_DVERM_MASKED;
|
||||
accel->dverm.offset = verify_u8(info.double_offset);
|
||||
accel->dverm.c1 = info.double_byte.begin()->first & m1;
|
||||
accel->dverm.c2 = info.double_byte.begin()->second & m2;
|
||||
accel->dverm.m1 = m1;
|
||||
accel->dverm.m2 = m2;
|
||||
DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
|
||||
accel->dverm.c1, accel->dverm.c2);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!out.outs2_broken &&
|
||||
(out.outs2_single.count() + out.outs2.size()) <= 8 &&
|
||||
out.outs2_single.count() < out.outs2.size() &&
|
||||
out.outs2_single.count() <= 2 && !out.outs2.empty()) {
|
||||
if (double_byte_ok(info)
|
||||
&& shuftiBuildDoubleMasks(info.double_cr, info.double_byte,
|
||||
&accel->dshufti.lo1, &accel->dshufti.hi1,
|
||||
&accel->dshufti.lo2, &accel->dshufti.hi2)) {
|
||||
accel->accel_type = ACCEL_DSHUFTI;
|
||||
shuftiBuildDoubleMasks(out.outs2_single, out.outs2,
|
||||
&accel->dshufti.lo1,
|
||||
&accel->dshufti.hi1,
|
||||
&accel->dshufti.lo2,
|
||||
&accel->dshufti.hi2);
|
||||
accel->dshufti.offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (out.outs.none()) {
|
||||
if (info.cr.none()) {
|
||||
accel->accel_type = ACCEL_RED_TAPE;
|
||||
DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
|
||||
" from which there is no escape\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (out.outs.count() == 1) {
|
||||
if (info.cr.count() == 1) {
|
||||
accel->accel_type = ACCEL_VERM;
|
||||
accel->verm.c = out.outs.find_first();
|
||||
accel->verm.c = info.cr.find_first();
|
||||
DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (out.outs.count() == 2 && out.outs.isCaselessChar()) {
|
||||
if (info.cr.count() == 2 && info.cr.isCaselessChar()) {
|
||||
accel->accel_type = ACCEL_VERM_NOCASE;
|
||||
accel->verm.c = out.outs.find_first() & CASE_CLEAR;
|
||||
accel->verm.c = info.cr.find_first() & CASE_CLEAR;
|
||||
DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (out.outs.count() > ACCEL_MAX_FLOATING_STOP_CHAR) {
|
||||
if (info.cr.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) {
|
||||
accel->accel_type = ACCEL_NONE;
|
||||
DEBUG_PRINTF("state %hu is too broad\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
accel->accel_type = ACCEL_SHUFTI;
|
||||
if (-1 != shuftiBuildMasks(out.outs, &accel->shufti.lo,
|
||||
if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo,
|
||||
&accel->shufti.hi)) {
|
||||
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(!out.outs.none());
|
||||
assert(!info.cr.none());
|
||||
accel->accel_type = ACCEL_TRUFFLE;
|
||||
truffleBuildMasks(out.outs, &accel->truffle.mask1, &accel->truffle.mask2);
|
||||
truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2);
|
||||
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
|
||||
}
|
||||
|
||||
static
|
||||
bool is_accel(const raw_dfa &raw, dstate_id_t sds_or_proxy,
|
||||
dstate_id_t this_idx) {
|
||||
if (!this_idx /* dead state is not accelerable */) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Note on report acceleration states: While we can't accelerate while we
|
||||
* are spamming out callbacks, the QR code paths don't raise reports
|
||||
* during scanning so they can accelerate report states. */
|
||||
|
||||
if (generates_callbacks(raw.kind)
|
||||
&& !raw.states[this_idx].reports.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t single_limit = this_idx == sds_or_proxy ?
|
||||
ACCEL_MAX_FLOATING_STOP_CHAR : ACCEL_MAX_STOP_CHAR;
|
||||
DEBUG_PRINTF("inspecting %hu/%hu: %zu\n", this_idx, sds_or_proxy,
|
||||
single_limit);
|
||||
|
||||
CharReach out;
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
if (raw.states[this_idx].next[raw.alpha_remap[i]] != this_idx) {
|
||||
out.set(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (out.count() <= single_limit) {
|
||||
DEBUG_PRINTF("state %hu should be accelerable %zu\n", this_idx,
|
||||
out.count());
|
||||
return true;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("state %hu is not accelerable has %zu\n", this_idx,
|
||||
out.count());
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
|
||||
u16 top_remap = raw.alpha_remap[TOP];
|
||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||
if (i != top_remap && raw.states[s].next[i] == s) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
|
||||
if (raw.start_floating != DEAD_STATE) {
|
||||
DEBUG_PRINTF("has floating start\n");
|
||||
return raw.start_floating;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("looking for SDS proxy\n");
|
||||
|
||||
dstate_id_t s = raw.start_anchored;
|
||||
|
||||
if (has_self_loop(s, raw)) {
|
||||
return s;
|
||||
}
|
||||
|
||||
u16 top_remap = raw.alpha_remap[TOP];
|
||||
|
||||
ue2::unordered_set<dstate_id_t> seen;
|
||||
while (true) {
|
||||
seen.insert(s);
|
||||
DEBUG_PRINTF("basis %hu\n", s);
|
||||
|
||||
/* check if we are connected to a state with a self loop */
|
||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||
dstate_id_t t = raw.states[s].next[i];
|
||||
if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
/* find a neighbour to use as a basis for looking for the sds proxy */
|
||||
dstate_id_t t = DEAD_STATE;
|
||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||
dstate_id_t tt = raw.states[s].next[i];
|
||||
if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
|
||||
t = tt;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (t == DEAD_STATE) {
|
||||
/* we were unable to find a state to use as a SDS proxy */
|
||||
return DEAD_STATE;
|
||||
}
|
||||
|
||||
s = t;
|
||||
seen.insert(t);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void populateAccelerationInfo(dfa_info &info, u32 *ac, const Grey &grey) {
|
||||
*ac = 0; /* number of accelerable states */
|
||||
|
||||
if (!grey.accelerateDFA) {
|
||||
return;
|
||||
}
|
||||
|
||||
dstate_id_t sds_proxy = get_sds_or_proxy(info.raw);
|
||||
DEBUG_PRINTF("sds %hu\n", sds_proxy);
|
||||
|
||||
for (size_t i = 0; i < info.size(); i++) {
|
||||
if (is_accel(info.raw, sds_proxy, i)) {
|
||||
++*ac;
|
||||
info.extra[i].accelerable = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void populateBasicInfo(size_t state_size, const dfa_info &info,
|
||||
u32 total_size, u32 aux_offset, u32 accel_offset,
|
||||
@ -496,8 +357,16 @@ namespace {
|
||||
struct raw_report_list {
|
||||
flat_set<ReportID> reports;
|
||||
|
||||
explicit raw_report_list(const flat_set<ReportID> &reports_in)
|
||||
: reports(reports_in) {}
|
||||
raw_report_list(const flat_set<ReportID> &reports_in,
|
||||
const ReportManager &rm, bool do_remap) {
|
||||
if (do_remap) {
|
||||
for (auto &id : reports_in) {
|
||||
reports.insert(rm.getProgramOffset(id));
|
||||
}
|
||||
} else {
|
||||
reports = reports_in;
|
||||
}
|
||||
}
|
||||
|
||||
bool operator<(const raw_report_list &b) const {
|
||||
return reports < b.reports;
|
||||
@ -520,6 +389,8 @@ unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports(
|
||||
ReportID *arbReport) const {
|
||||
DEBUG_PRINTF("gathering reports\n");
|
||||
|
||||
const bool remap_reports = has_managed_reports(rdfa.kind);
|
||||
|
||||
auto ri = ue2::make_unique<raw_report_info_impl>();
|
||||
map<raw_report_list, u32> rev;
|
||||
|
||||
@ -529,7 +400,7 @@ unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports(
|
||||
continue;
|
||||
}
|
||||
|
||||
raw_report_list rrl(s.reports);
|
||||
raw_report_list rrl(s.reports, rm, remap_reports);
|
||||
DEBUG_PRINTF("non empty r\n");
|
||||
if (rev.find(rrl) != rev.end()) {
|
||||
reports.push_back(rev[rrl]);
|
||||
@ -548,7 +419,7 @@ unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports(
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("non empty r eod\n");
|
||||
raw_report_list rrl(s.reports_eod);
|
||||
raw_report_list rrl(s.reports_eod, rm, remap_reports);
|
||||
if (rev.find(rrl) != rev.end()) {
|
||||
reports_eod.push_back(rev[rrl]);
|
||||
continue;
|
||||
@ -625,6 +496,14 @@ void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
for (dstate_id_t i : accel_escape_info | map_keys) {
|
||||
accel_states->insert(i);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
size_t calcShermanRegionSize(const dfa_info &info) {
|
||||
size_t rv = 0;
|
||||
@ -692,14 +571,14 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
|
||||
const CompileContext &cc) {
|
||||
const CompileContext &cc,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
DEBUG_PRINTF("building mcclellan 16\n");
|
||||
|
||||
vector<u32> reports; /* index in ri for the appropriate report list */
|
||||
vector<u32> reports_eod; /* as above */
|
||||
ReportID arb;
|
||||
u8 single;
|
||||
u32 accelCount;
|
||||
|
||||
u8 alphaShift = info.getAlphaShift();
|
||||
assert(alphaShift <= 8);
|
||||
@ -711,9 +590,9 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
unique_ptr<raw_report_info> ri
|
||||
= info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
||||
populateAccelerationInfo(info, &accelCount, cc.grey);
|
||||
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
||||
map<dstate_id_t, AccelScheme> accel_escape_info
|
||||
= populateAccelerationInfo(info.raw, info.strat, cc.grey);
|
||||
|
||||
size_t tran_size = (1 << info.getAlphaShift())
|
||||
* sizeof(u16) * count_real_states;
|
||||
@ -721,7 +600,7 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
|
||||
size_t aux_size = sizeof(mstate_aux) * info.size();
|
||||
|
||||
size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size);
|
||||
size_t accel_size = info.strat.accelSize() * accelCount;
|
||||
size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
|
||||
size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
|
||||
+ ri->getReportListSize(), 32);
|
||||
size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
|
||||
@ -736,7 +615,7 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
|
||||
char *nfa_base = (char *)nfa.get();
|
||||
|
||||
populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
|
||||
accelCount, arb, single, nfa.get());
|
||||
accel_escape_info.size(), arb, single, nfa.get());
|
||||
|
||||
vector<u32> reportOffsets;
|
||||
|
||||
@ -769,12 +648,12 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
|
||||
|
||||
fillInAux(&aux[fs], i, info, reports, reports_eod, reportOffsets);
|
||||
|
||||
if (info.is_accel(i)) {
|
||||
if (contains(accel_escape_info, i)) {
|
||||
this_aux->accel_offset = accel_offset;
|
||||
accel_offset += info.strat.accelSize();
|
||||
assert(accel_offset + sizeof(NFA) <= sherman_offset);
|
||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||
info.strat.buildAccel(i,
|
||||
info.strat.buildAccel(i, accel_escape_info.at(i),
|
||||
(void *)((char *)m + this_aux->accel_offset));
|
||||
}
|
||||
}
|
||||
@ -798,12 +677,12 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
|
||||
|
||||
fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets);
|
||||
|
||||
if (info.is_accel(i)) {
|
||||
if (contains(accel_escape_info, i)) {
|
||||
this_aux->accel_offset = accel_offset;
|
||||
accel_offset += info.strat.accelSize();
|
||||
assert(accel_offset + sizeof(NFA) <= sherman_offset);
|
||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||
info.strat.buildAccel(i,
|
||||
info.strat.buildAccel(i, accel_escape_info.at(i),
|
||||
(void *)((char *)m + this_aux->accel_offset));
|
||||
}
|
||||
|
||||
@ -836,6 +715,10 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
|
||||
|
||||
markEdges(nfa.get(), succ_table, info);
|
||||
|
||||
if (accel_states && nfa) {
|
||||
fillAccelOut(accel_escape_info, accel_states);
|
||||
}
|
||||
|
||||
return nfa;
|
||||
}
|
||||
|
||||
@ -874,7 +757,9 @@ void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table,
|
||||
}
|
||||
|
||||
static
|
||||
void allocateFSN8(dfa_info &info, u16 *accel_limit, u16 *accept_limit) {
|
||||
void allocateFSN8(dfa_info &info,
|
||||
const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||
u16 *accel_limit, u16 *accept_limit) {
|
||||
info.states[0].impl_id = 0; /* dead is always 0 */
|
||||
|
||||
vector<dstate_id_t> norm;
|
||||
@ -886,7 +771,7 @@ void allocateFSN8(dfa_info &info, u16 *accel_limit, u16 *accept_limit) {
|
||||
for (u32 i = 1; i < info.size(); i++) {
|
||||
if (!info.states[i].reports.empty()) {
|
||||
accept.push_back(i);
|
||||
} else if (info.is_accel(i)) {
|
||||
} else if (contains(accel_escape_info, i)) {
|
||||
accel.push_back(i);
|
||||
} else {
|
||||
norm.push_back(i);
|
||||
@ -915,23 +800,23 @@ void allocateFSN8(dfa_info &info, u16 *accel_limit, u16 *accept_limit) {
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
|
||||
const CompileContext &cc) {
|
||||
const CompileContext &cc,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
DEBUG_PRINTF("building mcclellan 8\n");
|
||||
|
||||
vector<u32> reports;
|
||||
vector<u32> reports_eod;
|
||||
ReportID arb;
|
||||
u8 single;
|
||||
u32 accelCount;
|
||||
|
||||
unique_ptr<raw_report_info> ri
|
||||
= info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
||||
populateAccelerationInfo(info, &accelCount, cc.grey);
|
||||
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
||||
map<dstate_id_t, AccelScheme> accel_escape_info
|
||||
= populateAccelerationInfo(info.raw, info.strat, cc.grey);
|
||||
|
||||
size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size();
|
||||
size_t aux_size = sizeof(mstate_aux) * info.size();
|
||||
size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size);
|
||||
size_t accel_size = info.strat.accelSize() * accelCount;
|
||||
size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
|
||||
size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
|
||||
+ ri->getReportListSize(), 32);
|
||||
size_t total_size = accel_offset + accel_size;
|
||||
@ -951,9 +836,9 @@ aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
|
||||
|
||||
mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get());
|
||||
|
||||
allocateFSN8(info, &m->accel_limit_8, &m->accept_limit_8);
|
||||
allocateFSN8(info, accel_escape_info, &m->accel_limit_8, &m->accept_limit_8);
|
||||
populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset,
|
||||
accelCount, arb, single, nfa.get());
|
||||
accel_escape_info.size(), arb, single, nfa.get());
|
||||
|
||||
vector<u32> reportOffsets;
|
||||
|
||||
@ -964,13 +849,14 @@ aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
|
||||
mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset);
|
||||
|
||||
for (size_t i = 0; i < info.size(); i++) {
|
||||
if (info.is_accel(i)) {
|
||||
if (contains(accel_escape_info, i)) {
|
||||
u32 j = info.implId(i);
|
||||
|
||||
aux[j].accel_offset = accel_offset;
|
||||
accel_offset += info.strat.accelSize();
|
||||
|
||||
info.strat.buildAccel(i, (void *)((char *)m + aux[j].accel_offset));
|
||||
info.strat.buildAccel(i, accel_escape_info.at(i),
|
||||
(void *)((char *)m + aux[j].accel_offset));
|
||||
}
|
||||
|
||||
fillInBasicState8(info, aux, succ_table, reportOffsets, reports,
|
||||
@ -981,6 +867,10 @@ aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
|
||||
|
||||
DEBUG_PRINTF("rl size %zu\n", ri->size());
|
||||
|
||||
if (accel_states && nfa) {
|
||||
fillAccelOut(accel_escape_info, accel_states);
|
||||
}
|
||||
|
||||
return nfa;
|
||||
}
|
||||
|
||||
@ -1163,15 +1053,6 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
void fillAccelOut(const dfa_info &info, set<dstate_id_t> *accel_states) {
|
||||
for (size_t i = 0; i < info.size(); i++) {
|
||||
if (info.is_accel(i)) {
|
||||
accel_states->insert(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat,
|
||||
const CompileContext &cc,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
@ -1200,26 +1081,23 @@ aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat,
|
||||
|
||||
aligned_unique_ptr<NFA> nfa;
|
||||
if (!using8bit) {
|
||||
nfa = mcclellanCompile16(info, cc);
|
||||
nfa = mcclellanCompile16(info, cc, accel_states);
|
||||
} else {
|
||||
nfa = mcclellanCompile8(info, cc);
|
||||
nfa = mcclellanCompile8(info, cc, accel_states);
|
||||
}
|
||||
|
||||
if (has_eod_reports) {
|
||||
nfa->flags |= NFA_ACCEPTS_EOD;
|
||||
}
|
||||
|
||||
if (accel_states && nfa) {
|
||||
fillAccelOut(info, accel_states);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("compile done\n");
|
||||
return nfa;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
|
||||
const ReportManager &rm,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
mcclellan_build_strat mbs(raw);
|
||||
mcclellan_build_strat mbs(raw, rm);
|
||||
return mcclellanCompile_i(raw, mbs, cc, accel_states);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -31,6 +31,7 @@
|
||||
|
||||
#include "rdfa.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/accel_scheme.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/ue2_containers.h"
|
||||
@ -43,6 +44,7 @@ struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class ReportManager;
|
||||
struct CompileContext;
|
||||
|
||||
struct raw_report_info {
|
||||
@ -54,15 +56,9 @@ struct raw_report_info {
|
||||
std::vector<u32> &ro /* out */) const = 0;
|
||||
};
|
||||
|
||||
struct escape_info {
|
||||
CharReach outs;
|
||||
CharReach outs2_single;
|
||||
flat_set<std::pair<u8, u8>> outs2;
|
||||
bool outs2_broken = false;
|
||||
};
|
||||
|
||||
class dfa_build_strat {
|
||||
public:
|
||||
explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {}
|
||||
virtual ~dfa_build_strat();
|
||||
virtual raw_dfa &get_raw() const = 0;
|
||||
virtual std::unique_ptr<raw_report_info> gatherReports(
|
||||
@ -70,25 +66,29 @@ public:
|
||||
std::vector<u32> &reports_eod /* out */,
|
||||
u8 *isSingleReport /* out */,
|
||||
ReportID *arbReport /* out */) const = 0;
|
||||
virtual void find_escape_strings(dstate_id_t this_idx,
|
||||
escape_info *out) const = 0;
|
||||
virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const = 0;
|
||||
virtual size_t accelSize(void) const = 0;
|
||||
virtual void buildAccel(dstate_id_t this_idx, void *accel_out) = 0;
|
||||
virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
|
||||
void *accel_out) = 0;
|
||||
protected:
|
||||
const ReportManager &rm;
|
||||
};
|
||||
|
||||
class mcclellan_build_strat : public dfa_build_strat {
|
||||
public:
|
||||
explicit mcclellan_build_strat(raw_dfa &r) : rdfa(r) {}
|
||||
mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in)
|
||||
: dfa_build_strat(rm_in), rdfa(rdfa_in) {}
|
||||
raw_dfa &get_raw() const override { return rdfa; }
|
||||
std::unique_ptr<raw_report_info> gatherReports(
|
||||
std::vector<u32> &reports /* out */,
|
||||
std::vector<u32> &reports_eod /* out */,
|
||||
u8 *isSingleReport /* out */,
|
||||
ReportID *arbReport /* out */) const override;
|
||||
void find_escape_strings(dstate_id_t this_idx,
|
||||
escape_info *out) const override;
|
||||
AccelScheme find_escape_strings(dstate_id_t this_idx) const override;
|
||||
size_t accelSize(void) const override;
|
||||
void buildAccel(dstate_id_t this_idx, void *accel_out) override;
|
||||
void buildAccel(dstate_id_t this_idx,const AccelScheme &info,
|
||||
void *accel_out) override;
|
||||
virtual u32 max_allowed_offset_accel() const;
|
||||
|
||||
private:
|
||||
raw_dfa &rdfa;
|
||||
@ -98,6 +98,7 @@ private:
|
||||
* states */
|
||||
ue2::aligned_unique_ptr<NFA>
|
||||
mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
|
||||
const ReportManager &rm,
|
||||
std::set<dstate_id_t> *accel_states = nullptr);
|
||||
|
||||
/* used internally by mcclellan/haig/gough compile process */
|
||||
|
422
src/nfa/mcclellancompile_accel.cpp
Normal file
422
src/nfa/mcclellancompile_accel.cpp
Normal file
@ -0,0 +1,422 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "mcclellancompile_accel.h"
|
||||
|
||||
#include "mcclellancompile_util.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "nfagraph/ng_limex_accel.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_charclass.h"
|
||||
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
|
||||
#define PATHS_LIMIT 500
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
struct path {
|
||||
vector<CharReach> reach;
|
||||
dstate_id_t dest = DEAD_STATE;
|
||||
explicit path(dstate_id_t base) : dest(base) {}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
static UNUSED
|
||||
string describeClasses(const vector<CharReach> &v) {
|
||||
std::ostringstream oss;
|
||||
for (const auto &cr : v) {
|
||||
describeClass(oss, cr);
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
static
|
||||
void dump_paths(const vector<path> &paths) {
|
||||
for (UNUSED const auto &p : paths) {
|
||||
DEBUG_PRINTF("[%s] -> %u\n", describeClasses(p.reach).c_str(), p.dest);
|
||||
}
|
||||
DEBUG_PRINTF("%zu paths\n", paths.size());
|
||||
}
|
||||
|
||||
static
|
||||
bool is_useful_path(const vector<path> &good, const path &p) {
|
||||
for (const auto &g : good) {
|
||||
assert(g.dest == p.dest);
|
||||
assert(g.reach.size() <= p.reach.size());
|
||||
auto git = g.reach.rbegin();
|
||||
auto pit = p.reach.rbegin();
|
||||
|
||||
for (; git != g.reach.rend(); ++git, ++pit) {
|
||||
if (!pit->isSubsetOf(*git)) {
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
DEBUG_PRINTF("better: [%s] -> %u\n",
|
||||
describeClasses(g.reach).c_str(), g.dest);
|
||||
|
||||
return false;
|
||||
next:;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
path append(const path &orig, const CharReach &cr, u32 new_dest) {
|
||||
path p(new_dest);
|
||||
p.reach = orig.reach;
|
||||
p.reach.push_back(cr);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static
|
||||
void extend(const raw_dfa &rdfa, const path &p,
|
||||
map<u32, vector<path> > &all,
|
||||
vector<path> &out) {
|
||||
dstate s = rdfa.states[p.dest];
|
||||
|
||||
if (!p.reach.empty() && p.reach.back().none()) {
|
||||
out.push_back(p);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!s.reports.empty()) {
|
||||
if (generates_callbacks(rdfa.kind)) {
|
||||
out.push_back(p);
|
||||
return;
|
||||
} else {
|
||||
path pp = append(p, CharReach(), p.dest);
|
||||
all[p.dest].push_back(pp);
|
||||
out.push_back(pp);
|
||||
}
|
||||
}
|
||||
|
||||
if (!s.reports_eod.empty()) {
|
||||
path pp = append(p, CharReach(), p.dest);
|
||||
all[p.dest].push_back(pp);
|
||||
out.push_back(pp);
|
||||
}
|
||||
|
||||
map<u32, CharReach> dest;
|
||||
for (unsigned i = 0; i < N_CHARS; i++) {
|
||||
u32 succ = s.next[rdfa.alpha_remap[i]];
|
||||
dest[succ].set(i);
|
||||
}
|
||||
|
||||
for (const auto &e : dest) {
|
||||
path pp = append(p, e.second, e.first);
|
||||
if (!is_useful_path(all[e.first], pp)) {
|
||||
DEBUG_PRINTF("not useful: [%s] -> %u\n",
|
||||
describeClasses(pp.reach).c_str(), pp.dest);
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("----good: [%s] -> %u\n",
|
||||
describeClasses(pp.reach).c_str(), pp.dest);
|
||||
all[e.first].push_back(pp);
|
||||
out.push_back(pp);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
vector<vector<CharReach> > generate_paths(const raw_dfa &rdfa, dstate_id_t base,
|
||||
u32 len) {
|
||||
vector<path> paths{ path(base) };
|
||||
map<u32, vector<path> > all;
|
||||
all[base].push_back(path(base));
|
||||
for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) {
|
||||
vector<path> next_gen;
|
||||
for (const auto &p : paths) {
|
||||
extend(rdfa, p, all, next_gen);
|
||||
}
|
||||
|
||||
paths = move(next_gen);
|
||||
}
|
||||
|
||||
dump_paths(paths);
|
||||
|
||||
vector<vector<CharReach> > rv;
|
||||
for (auto &p : paths) {
|
||||
rv.push_back(move(p.reach));
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
|
||||
u32 max_allowed_accel_offset) {
|
||||
DEBUG_PRINTF("looking for accel for %hu\n", base);
|
||||
vector<vector<CharReach> > paths = generate_paths(rdfa, base,
|
||||
max_allowed_accel_offset + 1);
|
||||
AccelScheme as = findBestAccelScheme(paths, CharReach(), true);
|
||||
DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset);
|
||||
return as;
|
||||
}
|
||||
|
||||
static
|
||||
vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
|
||||
const CharReach &escape) {
|
||||
set<u16> rv;
|
||||
CharReach nonexit = ~escape;
|
||||
for (auto i = nonexit.find_first(); i != CharReach::npos;
|
||||
i = nonexit.find_next(i)) {
|
||||
rv.insert(rdfa.alpha_remap[i]);
|
||||
}
|
||||
|
||||
return vector<u16>(rv.begin(), rv.end());
|
||||
}
|
||||
|
||||
static
|
||||
set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
|
||||
const AccelScheme &ei) {
|
||||
DEBUG_PRINTF("looking for region around %hu\n", base);
|
||||
|
||||
set<dstate_id_t> region = {base};
|
||||
|
||||
if (!ei.double_byte.empty()) {
|
||||
return region;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.cr).c_str(), ei.offset);
|
||||
|
||||
const CharReach &escape = ei.cr;
|
||||
auto nonexit_symbols = find_nonexit_symbols(rdfa, escape);
|
||||
|
||||
vector<dstate_id_t> pending = {base};
|
||||
while (!pending.empty()) {
|
||||
dstate_id_t curr = pending.back();
|
||||
pending.pop_back();
|
||||
for (auto s : nonexit_symbols) {
|
||||
dstate_id_t t = rdfa.states[curr].next[s];
|
||||
if (contains(region, t)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF(" %hu is in region\n", t);
|
||||
region.insert(t);
|
||||
pending.push_back(t);
|
||||
}
|
||||
}
|
||||
|
||||
return region;
|
||||
}
|
||||
|
||||
static
|
||||
bool better(const AccelScheme &a, const AccelScheme &b) {
|
||||
if (!a.double_byte.empty() && b.double_byte.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!b.double_byte.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return a.cr.count() < b.cr.count();
|
||||
}
|
||||
|
||||
static
|
||||
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
|
||||
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
|
||||
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
rv.at(rdfa.alpha_remap[i]).set(i);
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
map<dstate_id_t, AccelScheme> populateAccelerationInfo(const raw_dfa &rdfa,
|
||||
const dfa_build_strat &strat,
|
||||
const Grey &grey) {
|
||||
map<dstate_id_t, AccelScheme> rv;
|
||||
if (!grey.accelerateDFA) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
|
||||
DEBUG_PRINTF("sds %hu\n", sds_proxy);
|
||||
|
||||
for (size_t i = 0; i < rdfa.states.size(); i++) {
|
||||
if (i == DEAD_STATE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Note on report acceleration states: While we can't accelerate while we
|
||||
* are spamming out callbacks, the QR code paths don't raise reports
|
||||
* during scanning so they can accelerate report states. */
|
||||
if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t single_limit = i == sds_proxy ? ACCEL_DFA_MAX_FLOATING_STOP_CHAR
|
||||
: ACCEL_DFA_MAX_STOP_CHAR;
|
||||
DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
|
||||
|
||||
AccelScheme ei = strat.find_escape_strings(i);
|
||||
if (ei.cr.count() > single_limit) {
|
||||
DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
|
||||
ei.cr.count());
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("state %zu should be accelerable %zu\n",
|
||||
i, ei.cr.count());
|
||||
|
||||
rv[i] = ei;
|
||||
}
|
||||
|
||||
/* provide accleration states to states in the region of sds */
|
||||
if (contains(rv, sds_proxy)) {
|
||||
AccelScheme sds_ei = rv[sds_proxy];
|
||||
sds_ei.double_byte.clear(); /* region based on single byte scheme
|
||||
* may differ from double byte */
|
||||
DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
|
||||
sds_ei.cr.count());
|
||||
auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
|
||||
for (auto s : sds_region) {
|
||||
if (!contains(rv, s) || better(sds_ei, rv[s])) {
|
||||
rv[s] = sds_ei;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
bool double_byte_ok(const AccelScheme &info) {
|
||||
return !info.double_byte.empty()
|
||||
&& info.double_cr.count() < info.double_byte.size()
|
||||
&& info.double_cr.count() <= 2 && !info.double_byte.empty();
|
||||
}
|
||||
|
||||
AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx,
|
||||
u32 max_allowed_accel_offset) {
|
||||
AccelScheme rv;
|
||||
rv.cr.clear();
|
||||
rv.offset = 0;
|
||||
const dstate &raw = rdfa.states[this_idx];
|
||||
const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa);
|
||||
bool outs2_broken = false;
|
||||
map<dstate_id_t, CharReach> succs;
|
||||
|
||||
for (u32 i = 0; i < rev_map.size(); i++) {
|
||||
if (raw.next[i] == this_idx) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const CharReach &cr_i = rev_map.at(i);
|
||||
|
||||
rv.cr |= cr_i;
|
||||
dstate_id_t next_id = raw.next[i];
|
||||
|
||||
DEBUG_PRINTF("next is %hu\n", next_id);
|
||||
const dstate &raw_next = rdfa.states[next_id];
|
||||
|
||||
if (outs2_broken) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) {
|
||||
DEBUG_PRINTF("leads to report\n");
|
||||
outs2_broken = true; /* cannot accelerate over reports */
|
||||
continue;
|
||||
}
|
||||
succs[next_id] |= cr_i;
|
||||
}
|
||||
|
||||
if (!outs2_broken) {
|
||||
for (const auto &e : succs) {
|
||||
const CharReach &cr_i = e.second;
|
||||
const dstate &raw_next = rdfa.states[e.first];
|
||||
|
||||
CharReach cr_all_j;
|
||||
for (u32 j = 0; j < rev_map.size(); j++) {
|
||||
if (raw_next.next[j] == raw.next[j]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("state %hu: adding sym %u -> %hu to 2 \n", e.first,
|
||||
j, raw_next.next[j]);
|
||||
cr_all_j |= rev_map.at(j);
|
||||
}
|
||||
|
||||
if (cr_i.count() * cr_all_j.count() > 8) {
|
||||
DEBUG_PRINTF("adding %zu to double_cr\n", cr_i.count());
|
||||
rv.double_cr |= cr_i;
|
||||
} else {
|
||||
for (auto ii = cr_i.find_first(); ii != CharReach::npos;
|
||||
ii = cr_i.find_next(ii)) {
|
||||
for (auto jj = cr_all_j.find_first(); jj != CharReach::npos;
|
||||
jj = cr_all_j.find_next(jj)) {
|
||||
rv.double_byte.emplace((u8)ii, (u8)jj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rv.double_byte.size() > 8) {
|
||||
DEBUG_PRINTF("outs2 too big\n");
|
||||
outs2_broken = true;
|
||||
}
|
||||
|
||||
if (outs2_broken) {
|
||||
rv.double_byte.clear();
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa));
|
||||
DEBUG_PRINTF("broken %d\n", outs2_broken);
|
||||
if (!double_byte_ok(rv) && !is_triggered(rdfa.kind)
|
||||
&& this_idx == rdfa.start_floating
|
||||
&& this_idx != DEAD_STATE) {
|
||||
DEBUG_PRINTF("looking for offset accel at %u\n", this_idx);
|
||||
auto offset = look_for_offset_accel(rdfa, this_idx,
|
||||
max_allowed_accel_offset);
|
||||
DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(),
|
||||
rv.cr.count());
|
||||
if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) {
|
||||
DEBUG_PRINTF("using offset accel\n");
|
||||
rv = offset;
|
||||
}
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
}
|
61
src/nfa/mcclellancompile_accel.h
Normal file
61
src/nfa/mcclellancompile_accel.h
Normal file
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef MCCLELLANCOMPILE_ACCEL_H
|
||||
#define MCCLELLANCOMPILE_ACCEL_H
|
||||
|
||||
#include "mcclellancompile.h"
|
||||
|
||||
#include <map>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct Grey;
|
||||
|
||||
#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
|
||||
|
||||
/** Maximum tolerated number of escape character from an accel state.
|
||||
* This is larger than nfa, as we don't have a budget and the nfa cheats on stop
|
||||
* characters for sets of states */
|
||||
#define ACCEL_DFA_MAX_STOP_CHAR 160
|
||||
|
||||
/** Maximum tolerated number of escape character from a sds accel state. Larger
|
||||
* than normal states as accelerating sds is important. Matches NFA value */
|
||||
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
|
||||
|
||||
std::map<dstate_id_t, AccelScheme> populateAccelerationInfo(const raw_dfa &rdfa,
|
||||
const dfa_build_strat &strat,
|
||||
const Grey &grey);
|
||||
|
||||
AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa,
|
||||
dstate_id_t this_idx,
|
||||
u32 max_allowed_accel_offset);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -228,13 +228,13 @@ void calc_min_dist_to_accept(const raw_dfa &raw,
|
||||
}
|
||||
}
|
||||
|
||||
void prune_overlong(raw_dfa &raw, u32 max_offset) {
|
||||
bool prune_overlong(raw_dfa &raw, u32 max_offset) {
|
||||
DEBUG_PRINTF("pruning to at most %u\n", max_offset);
|
||||
vector<u32> bob_dist;
|
||||
u32 max_min_dist_bob = calc_min_dist_from_bob(raw, &bob_dist);
|
||||
|
||||
if (max_min_dist_bob <= max_offset) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<vector<dstate_id_t> > in_edges;
|
||||
@ -282,6 +282,8 @@ void prune_overlong(raw_dfa &raw, u32 max_offset) {
|
||||
/* update specials */
|
||||
raw.start_floating = new_ids[raw.start_floating];
|
||||
raw.start_anchored = new_ids[raw.start_anchored];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
set<ReportID> all_reports(const raw_dfa &rdfa) {
|
||||
@ -334,4 +336,63 @@ size_t hash_dfa(const raw_dfa &rdfa) {
|
||||
return v;
|
||||
}
|
||||
|
||||
static
|
||||
bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
|
||||
u16 top_remap = raw.alpha_remap[TOP];
|
||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||
if (i != top_remap && raw.states[s].next[i] == s) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
|
||||
if (raw.start_floating != DEAD_STATE) {
|
||||
DEBUG_PRINTF("has floating start\n");
|
||||
return raw.start_floating;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("looking for SDS proxy\n");
|
||||
|
||||
dstate_id_t s = raw.start_anchored;
|
||||
|
||||
if (has_self_loop(s, raw)) {
|
||||
return s;
|
||||
}
|
||||
|
||||
u16 top_remap = raw.alpha_remap[TOP];
|
||||
|
||||
ue2::unordered_set<dstate_id_t> seen;
|
||||
while (true) {
|
||||
seen.insert(s);
|
||||
DEBUG_PRINTF("basis %hu\n", s);
|
||||
|
||||
/* check if we are connected to a state with a self loop */
|
||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||
dstate_id_t t = raw.states[s].next[i];
|
||||
if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
/* find a neighbour to use as a basis for looking for the sds proxy */
|
||||
dstate_id_t t = DEAD_STATE;
|
||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||
dstate_id_t tt = raw.states[s].next[i];
|
||||
if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
|
||||
t = tt;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (t == DEAD_STATE) {
|
||||
/* we were unable to find a state to use as a SDS proxy */
|
||||
return DEAD_STATE;
|
||||
}
|
||||
|
||||
s = t;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -29,16 +29,21 @@
|
||||
#ifndef MCCLELLAN_COMPILE_UTIL_H
|
||||
#define MCCLELLAN_COMPILE_UTIL_H
|
||||
|
||||
#include "rdfa.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <set>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct raw_dfa;
|
||||
|
||||
u32 remove_leading_dots(raw_dfa &raw);
|
||||
void prune_overlong(raw_dfa &raw, u32 max_offset);
|
||||
|
||||
/**
|
||||
* Prunes any states which cannot be reached within max_offset from start of
|
||||
* stream. Returns false if no changes are made to the rdfa
|
||||
*/
|
||||
bool prune_overlong(raw_dfa &raw, u32 max_offset);
|
||||
|
||||
std::set<ReportID> all_reports(const raw_dfa &rdfa);
|
||||
bool has_eod_accepts(const raw_dfa &rdfa);
|
||||
bool has_non_eod_accepts(const raw_dfa &rdfa);
|
||||
@ -50,6 +55,8 @@ size_t hash_dfa_no_reports(const raw_dfa &rdfa);
|
||||
/** \brief Compute a simple hash of this raw_dfa, including its reports. */
|
||||
size_t hash_dfa(const raw_dfa &rdfa);
|
||||
|
||||
dstate_id_t get_sds_or_proxy(const raw_dfa &raw);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -124,7 +124,7 @@ char processReports(const struct mpv *m, u8 *reporters,
|
||||
DEBUG_PRINTF("report %u at %llu\n", curr->report,
|
||||
report_offset);
|
||||
|
||||
if (curr->unbounded) {
|
||||
if (curr->unbounded && !curr->simple_exhaust) {
|
||||
assert(rl_count < m->puffette_count);
|
||||
*rl = curr->report;
|
||||
++rl;
|
||||
@ -176,7 +176,9 @@ char processReportsForRange(const struct mpv *m, u8 *reporters,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
for (u32 i = 2; i <= length; i++) {
|
||||
DEBUG_PRINTF("length=%zu, rl_count=%u\n", length, rl_count);
|
||||
|
||||
for (size_t i = 2; i <= length; i++) {
|
||||
for (u32 j = 0; j < rl_count; j++) {
|
||||
if (cb(first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("bailing\n");
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -108,6 +108,9 @@ void dumpKilo(FILE *f, const mpv *m, const mpv_kilopuff *k) {
|
||||
fprintf(f, " Puffette %u\n", i);
|
||||
fprintf(f, " repeats: %u%s\n", p[i].repeats,
|
||||
p[i].unbounded ? "," : "");
|
||||
if (p[i].simple_exhaust) {
|
||||
fprintf(f, " simple exhaustible\n");
|
||||
}
|
||||
fprintf(f, " report id: %u\n", p[i].report);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -40,6 +40,15 @@
|
||||
struct mpv_puffette {
|
||||
u32 repeats;
|
||||
char unbounded;
|
||||
|
||||
/**
|
||||
* \brief Report is simple-exhaustible.
|
||||
*
|
||||
* If this is true, we do best-effort suppression of runs of reports, only
|
||||
* delivering the first one.
|
||||
*/
|
||||
char simple_exhaust;
|
||||
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -36,6 +36,7 @@
|
||||
#include "util/alloc.h"
|
||||
#include "util/multibit_internal.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
@ -53,10 +54,8 @@ namespace ue2 {
|
||||
namespace {
|
||||
struct pcomp {
|
||||
bool operator()(const raw_puff &a, const raw_puff &b) const {
|
||||
ORDER_CHECK(repeats);
|
||||
ORDER_CHECK(unbounded);
|
||||
ORDER_CHECK(report);
|
||||
return false;
|
||||
return tie(a.repeats, a.unbounded, a.simple_exhaust, a.report) <
|
||||
tie(b.repeats, b.unbounded, b.simple_exhaust, b.report);
|
||||
}
|
||||
};
|
||||
|
||||
@ -84,12 +83,21 @@ struct ClusterKey {
|
||||
} // namespace
|
||||
|
||||
static
|
||||
void writePuffette(mpv_puffette *out, const raw_puff &rp) {
|
||||
void writePuffette(mpv_puffette *out, const raw_puff &rp,
|
||||
const ReportManager &rm) {
|
||||
DEBUG_PRINTF("outputting %u %d %u to %p\n", rp.repeats, (int)rp.unbounded,
|
||||
rp.report, out);
|
||||
out->repeats = rp.repeats;
|
||||
out->unbounded = rp.unbounded;
|
||||
out->report = rp.report;
|
||||
out->simple_exhaust = rp.simple_exhaust;
|
||||
out->report = rm.getProgramOffset(rp.report);
|
||||
}
|
||||
|
||||
static
|
||||
void writeSentinel(mpv_puffette *out) {
|
||||
DEBUG_PRINTF("outputting sentinel to %p\n", out);
|
||||
memset(out, 0, sizeof(*out));
|
||||
out->report = INVALID_REPORT;
|
||||
}
|
||||
|
||||
static
|
||||
@ -148,8 +156,8 @@ void populateClusters(const vector<raw_puff> &puffs_in,
|
||||
|
||||
static
|
||||
void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it,
|
||||
u32 counter_offset, mpv *m, mpv_kilopuff *kp,
|
||||
mpv_puffette **pa) {
|
||||
const ReportManager &rm, u32 counter_offset, mpv *m,
|
||||
mpv_kilopuff *kp, mpv_puffette **pa) {
|
||||
const CharReach &reach = it->first.reach;
|
||||
const vector<raw_puff> &puffs = it->second;
|
||||
|
||||
@ -182,11 +190,11 @@ void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it,
|
||||
kp->puffette_offset = verify_u32((char *)*pa - (char *)m);
|
||||
for (size_t i = 0; i < puffs.size(); i++) {
|
||||
assert(!it->first.auto_restart || puffs[i].unbounded);
|
||||
writePuffette(*pa + i, puffs[i]);
|
||||
writePuffette(*pa + i, puffs[i], rm);
|
||||
}
|
||||
|
||||
*pa += puffs.size();
|
||||
writePuffette(*pa, raw_puff(0U, false, INVALID_REPORT, CharReach()));
|
||||
writeSentinel(*pa);
|
||||
++*pa;
|
||||
|
||||
writeDeadPoint(kp, puffs);
|
||||
@ -301,7 +309,8 @@ const mpv_counter_info &findCounter(const vector<mpv_counter_info> &counters,
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in,
|
||||
const vector<raw_puff> &triggered_puffs) {
|
||||
const vector<raw_puff> &triggered_puffs,
|
||||
const ReportManager &rm) {
|
||||
assert(!puffs_in.empty() || !triggered_puffs.empty());
|
||||
u32 puffette_count = puffs_in.size() + triggered_puffs.size();
|
||||
|
||||
@ -341,7 +350,7 @@ aligned_unique_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in,
|
||||
+ sizeof(mpv_counter_info) * counters.size());
|
||||
mpv_puffette *pa = pa_base;
|
||||
|
||||
writePuffette(pa, raw_puff(0U, false, INVALID_REPORT, CharReach()));
|
||||
writeSentinel(pa);
|
||||
|
||||
++pa; /* skip init sentinel */
|
||||
|
||||
@ -367,8 +376,9 @@ aligned_unique_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in,
|
||||
mpv_kilopuff *kp_begin = (mpv_kilopuff *)(m + 1);
|
||||
mpv_kilopuff *kp = kp_begin;
|
||||
for (auto it = puff_clusters.begin(); it != puff_clusters.end(); ++it) {
|
||||
writeKiloPuff(it, findCounter(counters, kp - kp_begin).counter_offset,
|
||||
m, kp, &pa);
|
||||
writeKiloPuff(it, rm,
|
||||
findCounter(counters, kp - kp_begin).counter_offset, m,
|
||||
kp, &pa);
|
||||
++kp;
|
||||
}
|
||||
assert((char *)pa == (char *)nfa.get() + len);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -40,14 +40,19 @@ struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class ReportManager;
|
||||
|
||||
struct raw_puff {
|
||||
raw_puff(u32 repeats_in, bool unbounded_in, ReportID report_in,
|
||||
const CharReach &reach_in, bool auto_restart_in = false)
|
||||
const CharReach &reach_in, bool auto_restart_in = false,
|
||||
bool simple_exhaust_in = false)
|
||||
: repeats(repeats_in), unbounded(unbounded_in),
|
||||
auto_restart(auto_restart_in), report(report_in), reach(reach_in) {}
|
||||
auto_restart(auto_restart_in), simple_exhaust(simple_exhaust_in),
|
||||
report(report_in), reach(reach_in) {}
|
||||
u32 repeats; /**< report match after this many matching bytes */
|
||||
bool unbounded; /**< keep producing matches after repeats are reached */
|
||||
bool auto_restart; /**< for /[^X]{n}/ type patterns */
|
||||
bool simple_exhaust; /* first report will exhaust us */
|
||||
ReportID report;
|
||||
CharReach reach; /**< = ~escapes */
|
||||
};
|
||||
@ -56,9 +61,9 @@ struct raw_puff {
|
||||
* puffs in the triggered_puffs vector are enabled when an TOP_N event is
|
||||
* delivered corresponding to their index in the vector
|
||||
*/
|
||||
aligned_unique_ptr<NFA>
|
||||
mpvCompile(const std::vector<raw_puff> &puffs,
|
||||
const std::vector<raw_puff> &triggered_puffs);
|
||||
aligned_unique_ptr<NFA> mpvCompile(const std::vector<raw_puff> &puffs,
|
||||
const std::vector<raw_puff> &triggered_puffs,
|
||||
const ReportManager &rm);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
265
src/nfa/multiaccel_common.h
Normal file
265
src/nfa/multiaccel_common.h
Normal file
@ -0,0 +1,265 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef MULTIACCEL_COMMON_H_
|
||||
#define MULTIACCEL_COMMON_H_
|
||||
|
||||
#include "config.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/join.h"
|
||||
#include "util/bitutils.h"
|
||||
|
||||
/*
|
||||
* When doing shifting, remember that the total number of shifts should be n-1
|
||||
*/
|
||||
#define VARISHIFT(src, dst, len) \
|
||||
do { \
|
||||
(dst) &= (src) >> (len); \
|
||||
} while (0)
|
||||
#define STATIC_SHIFT1(x) \
|
||||
do { \
|
||||
(x) &= (x) >> 1; \
|
||||
} while (0)
|
||||
#define STATIC_SHIFT2(x) \
|
||||
do { \
|
||||
(x) &= (x) >> 2;\
|
||||
} while (0)
|
||||
#define STATIC_SHIFT4(x) \
|
||||
do { \
|
||||
(x) &= (x) >> 4; \
|
||||
} while (0)
|
||||
#define STATIC_SHIFT8(x) \
|
||||
do { \
|
||||
(x) &= (x) >> 8; \
|
||||
} while (0)
|
||||
#define SHIFT1(x) \
|
||||
do {} while (0)
|
||||
#define SHIFT2(x) \
|
||||
do { \
|
||||
STATIC_SHIFT1(x); \
|
||||
} while (0)
|
||||
#define SHIFT3(x) \
|
||||
do { \
|
||||
STATIC_SHIFT1(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
} while (0)
|
||||
#define SHIFT4(x) \
|
||||
do { \
|
||||
STATIC_SHIFT1(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
} while (0)
|
||||
#define SHIFT5(x) \
|
||||
do { \
|
||||
SHIFT4(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
} while (0)
|
||||
#define SHIFT6(x) \
|
||||
do { \
|
||||
SHIFT4(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
} while (0)
|
||||
#define SHIFT7(x) \
|
||||
do { \
|
||||
SHIFT4(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
} while (0)
|
||||
#define SHIFT8(x) \
|
||||
do { \
|
||||
SHIFT4(x); \
|
||||
STATIC_SHIFT4(x); \
|
||||
} while (0)
|
||||
#define SHIFT9(x) \
|
||||
do { \
|
||||
SHIFT8(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
} while (0)
|
||||
#define SHIFT10(x) \
|
||||
do { \
|
||||
SHIFT8(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
} while (0)
|
||||
#define SHIFT11(x) \
|
||||
do { \
|
||||
SHIFT8(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
} while (0)
|
||||
#define SHIFT12(x); \
|
||||
do { \
|
||||
SHIFT8(x);\
|
||||
STATIC_SHIFT4(x); \
|
||||
} while (0)
|
||||
#define SHIFT13(x); \
|
||||
do { \
|
||||
SHIFT8(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
STATIC_SHIFT4(x); \
|
||||
} while (0)
|
||||
#define SHIFT14(x) \
|
||||
do { \
|
||||
SHIFT8(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
STATIC_SHIFT4(x); \
|
||||
} while (0)
|
||||
#define SHIFT15(x) \
|
||||
do { \
|
||||
SHIFT8(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
STATIC_SHIFT4(x); \
|
||||
} while (0)
|
||||
#define SHIFT16(x) \
|
||||
do { \
|
||||
SHIFT8(x); \
|
||||
STATIC_SHIFT8(x); \
|
||||
} while (0)
|
||||
#define SHIFT17(x) \
|
||||
do { \
|
||||
SHIFT16(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
} while (0)
|
||||
#define SHIFT18(x) \
|
||||
do { \
|
||||
SHIFT16(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
} while (0)
|
||||
#define SHIFT19(x) \
|
||||
do { \
|
||||
SHIFT16(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
} while (0)
|
||||
#define SHIFT20(x) \
|
||||
do { \
|
||||
SHIFT16(x); \
|
||||
STATIC_SHIFT4(x); \
|
||||
} while (0)
|
||||
#define SHIFT21(x) \
|
||||
do { \
|
||||
SHIFT16(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
STATIC_SHIFT4(x); \
|
||||
} while (0)
|
||||
#define SHIFT22(x) \
|
||||
do { \
|
||||
SHIFT16(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
STATIC_SHIFT4(x); \
|
||||
} while (0)
|
||||
#define SHIFT23(x) \
|
||||
do { \
|
||||
SHIFT16(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
STATIC_SHIFT4(x); \
|
||||
} while (0)
|
||||
#define SHIFT24(x) \
|
||||
do { \
|
||||
SHIFT16(x); \
|
||||
STATIC_SHIFT8(x); \
|
||||
} while (0)
|
||||
#define SHIFT25(x) \
|
||||
do { \
|
||||
SHIFT24(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
} while (0)
|
||||
#define SHIFT26(x) \
|
||||
do { \
|
||||
SHIFT24(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
} while (0)
|
||||
#define SHIFT27(x) \
|
||||
do { \
|
||||
SHIFT24(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
} while (0)
|
||||
#define SHIFT28(x) \
|
||||
do { \
|
||||
SHIFT24(x); \
|
||||
STATIC_SHIFT4(x); \
|
||||
} while (0)
|
||||
#define SHIFT29(x) \
|
||||
do { \
|
||||
SHIFT24(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
STATIC_SHIFT4(x); \
|
||||
} while (0)
|
||||
#define SHIFT30(x) \
|
||||
do { \
|
||||
SHIFT24(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
STATIC_SHIFT4(x); \
|
||||
} while (0)
|
||||
#define SHIFT31(x) \
|
||||
do { \
|
||||
SHIFT24(x); \
|
||||
STATIC_SHIFT1(x); \
|
||||
STATIC_SHIFT2(x); \
|
||||
STATIC_SHIFT4(x); \
|
||||
} while (0)
|
||||
#define SHIFT32(x) \
|
||||
do { \
|
||||
SHIFT24(x); \
|
||||
STATIC_SHIFT8(x); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* this function is used by 32-bit multiaccel matchers. 32-bit matchers accept
|
||||
* a 32-bit integer as a buffer, where low 16 bits is movemask result and
|
||||
* high 16 bits are "don't care" values. this function is not expected to return
|
||||
* a result higher than 16.
|
||||
*/
|
||||
static really_inline
|
||||
const u8 *match32(const u8 *buf, const u32 z) {
|
||||
if (unlikely(z != 0)) {
|
||||
u32 pos = ctz32(z);
|
||||
assert(pos < 16);
|
||||
return buf + pos;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* this function is used by 64-bit multiaccel matchers. 64-bit matchers accept
|
||||
* a 64-bit integer as a buffer, where low 32 bits is movemask result and
|
||||
* high 32 bits are "don't care" values. this function is not expected to return
|
||||
* a result higher than 32.
|
||||
*/
|
||||
static really_inline
|
||||
const u8 *match64(const u8 *buf, const u64a z) {
|
||||
if (unlikely(z != 0)) {
|
||||
u32 pos = ctz64(z);
|
||||
assert(pos < 32);
|
||||
return buf + pos;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif /* MULTIACCEL_COMMON_H_ */
|
439
src/nfa/multiaccel_compilehelper.cpp
Normal file
439
src/nfa/multiaccel_compilehelper.cpp
Normal file
@ -0,0 +1,439 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "multiaccel_compilehelper.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace ue2;
|
||||
|
||||
#ifdef DEBUG
|
||||
static const char* state_to_str[] = {
|
||||
"FIRST_RUN",
|
||||
"SECOND_RUN",
|
||||
"WAITING_FOR_GRAB",
|
||||
"FIRST_TAIL",
|
||||
"SECOND_TAIL",
|
||||
"STOPPED",
|
||||
"INVALID"
|
||||
};
|
||||
static const char* type_to_str[] = {
|
||||
"SHIFT",
|
||||
"SHIFTGRAB",
|
||||
"DOUBLESHIFT",
|
||||
"DOUBLESHIFTGRAB",
|
||||
"LONG",
|
||||
"LONGGRAB",
|
||||
"NONE"
|
||||
};
|
||||
|
||||
static
|
||||
void dumpMultiaccelState(const accel_data &d) {
|
||||
DEBUG_PRINTF("type: %s state: %s len1: %u tlen1: %u len2: %u tlen2: %u\n",
|
||||
type_to_str[(unsigned) d.type],
|
||||
state_to_str[(unsigned) d.state],
|
||||
d.len1, d.tlen1, d.len2, d.tlen2);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* stop all the matching. this may render most schemes invalid. */
|
||||
static
|
||||
void stop(accel_data &d) {
|
||||
switch (d.state) {
|
||||
case STATE_STOPPED:
|
||||
case STATE_INVALID:
|
||||
break;
|
||||
case STATE_FIRST_TAIL:
|
||||
case STATE_SECOND_RUN:
|
||||
/*
|
||||
* Shift matchers are special case, because they have "tails".
|
||||
* When shift matcher reaches a mid/endpoint, tail mode is
|
||||
* activated, which looks for more matches to extend the match.
|
||||
*
|
||||
* For example, consider pattern /a{5}ba{3}/. Under normal circumstances,
|
||||
* long-grab matcher will be picked for this pattern (matching a run of a's,
|
||||
* followed by a not-a), because doubleshift matcher would be confused by
|
||||
* consecutive a's and would parse the pattern as a.{0}a.{0}a (two shifts
|
||||
* by 1) and throw out the rest of the pattern.
|
||||
*
|
||||
* With tails, we defer ending the run until we actually run out of
|
||||
* matching characters, so the above pattern will now be parsed by
|
||||
* doubleshift matcher as /a.{3}a.{3}a/ (two shifts by 4).
|
||||
*
|
||||
* So if we are stopping shift matchers, we should check if we aren't in
|
||||
* the process of matching first tail or second run. If we are, we can't
|
||||
* finish the second run as we are stopping, but we can try and split
|
||||
* the first tail instead to obtain a valid second run.
|
||||
*/
|
||||
if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
|
||||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.tlen1 == 0) {
|
||||
// can't split an empty void...
|
||||
d.state = STATE_INVALID;
|
||||
break;
|
||||
}
|
||||
d.len2 = 0;
|
||||
d.state = STATE_STOPPED;
|
||||
break;
|
||||
case STATE_SECOND_TAIL:
|
||||
d.state = STATE_STOPPED;
|
||||
break;
|
||||
case STATE_WAITING_FOR_GRAB:
|
||||
case STATE_FIRST_RUN:
|
||||
if (d.type == MultibyteAccelInfo::MAT_LONG) {
|
||||
d.state = STATE_STOPPED;
|
||||
} else {
|
||||
d.state = STATE_INVALID;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void validate(accel_data &d, unsigned max_len) {
|
||||
// try and fit in all our tails
|
||||
if (d.len1 + d.tlen1 + d.len2 + d.tlen2 < max_len && d.len2 > 0) {
|
||||
// case 1: everything fits in
|
||||
d.len1 += d.tlen1;
|
||||
d.len2 += d.tlen2;
|
||||
d.tlen1 = 0;
|
||||
d.tlen2 = 0;
|
||||
} else if (d.len1 + d.tlen1 + d.len2 < max_len && d.len2 > 0) {
|
||||
// case 2: everything but the second tail fits in
|
||||
d.len1 += d.tlen1;
|
||||
d.tlen1 = 0;
|
||||
// try going for a partial tail
|
||||
if (d.tlen2 != 0) {
|
||||
int new_tlen2 = max_len - 1 - d.len1 - d.len2;
|
||||
if (new_tlen2 > 0) {
|
||||
d.len2 += new_tlen2;
|
||||
}
|
||||
d.tlen2 = 0;
|
||||
}
|
||||
} else if (d.len1 + d.tlen1 < max_len) {
|
||||
// case 3: first run and its tail fits in
|
||||
if (d.type == MultibyteAccelInfo::MAT_DSHIFT ||
|
||||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
|
||||
// split the tail into a second run
|
||||
d.len2 = d.tlen1;
|
||||
} else {
|
||||
d.len1 += d.tlen1;
|
||||
d.len2 = 0;
|
||||
}
|
||||
d.tlen1 = 0;
|
||||
d.tlen2 = 0;
|
||||
} else if (d.len1 < max_len) {
|
||||
// case 4: nothing but the first run fits in
|
||||
// try going for a partial tail
|
||||
if (d.tlen1 != 0) {
|
||||
int new_tlen1 = max_len - 1 - d.len1;
|
||||
if (new_tlen1 > 0) {
|
||||
d.len1 += new_tlen1;
|
||||
}
|
||||
d.tlen1 = 0;
|
||||
}
|
||||
d.len2 = 0;
|
||||
d.tlen2 = 0;
|
||||
}
|
||||
// if we removed our second run, doubleshift matchers are no longer valid
|
||||
if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
|
||||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.len2 == 0) {
|
||||
d.state = STATE_INVALID;
|
||||
} else if ((d.type == MultibyteAccelInfo::MAT_LONG) && d.len1 >= max_len) {
|
||||
// long matchers can just stop whenever they want to
|
||||
d.len1 = max_len - 1;
|
||||
}
|
||||
|
||||
// now, general sanity checks
|
||||
if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) >= max_len) {
|
||||
d.state = STATE_INVALID;
|
||||
}
|
||||
if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) < MULTIACCEL_MIN_LEN) {
|
||||
d.state = STATE_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) {
|
||||
switch (d.type) {
|
||||
case MultibyteAccelInfo::MAT_LONG:
|
||||
{
|
||||
/*
|
||||
* For long matcher, we want lots of consecutive same-or-subset
|
||||
* char-reaches
|
||||
*/
|
||||
if ((ref_cr & cur_cr) == cur_cr) {
|
||||
d.len1++;
|
||||
} else {
|
||||
d.state = STATE_STOPPED;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
||||
{
|
||||
/*
|
||||
* For long-grab matcher, we want lots of consecutive same-or-subset
|
||||
* char-reaches with a negative match in the end.
|
||||
*/
|
||||
if ((ref_cr & cur_cr) == cur_cr) {
|
||||
d.len1++;
|
||||
} else if (!(ref_cr & cur_cr).any()) {
|
||||
/* we grabbed, stop immediately */
|
||||
d.state = STATE_STOPPED;
|
||||
} else {
|
||||
/* our run-n-grab was interrupted; mark as invalid */
|
||||
d.state = STATE_INVALID;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
||||
{
|
||||
/*
|
||||
* For shift-grab matcher, we want two matches separated by anything;
|
||||
* however the second vertex *must* be a negative (non-overlapping) match.
|
||||
*
|
||||
* Shiftgrab matcher is identical to shift except for presence of grab.
|
||||
*/
|
||||
if (d.state == STATE_WAITING_FOR_GRAB) {
|
||||
if ((ref_cr & cur_cr).any()) {
|
||||
d.state = STATE_INVALID;
|
||||
} else {
|
||||
d.state = STATE_FIRST_RUN;
|
||||
d.len1++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* no break, falling through */
|
||||
case MultibyteAccelInfo::MAT_SHIFT:
|
||||
{
|
||||
/*
|
||||
* For shift-matcher, we want two matches separated by anything.
|
||||
*/
|
||||
if (ref_cr == cur_cr) {
|
||||
// keep matching tail
|
||||
switch (d.state) {
|
||||
case STATE_FIRST_RUN:
|
||||
d.state = STATE_FIRST_TAIL;
|
||||
break;
|
||||
case STATE_FIRST_TAIL:
|
||||
d.tlen1++;
|
||||
break;
|
||||
default:
|
||||
// shouldn't happen
|
||||
assert(0);
|
||||
}
|
||||
} else {
|
||||
switch (d.state) {
|
||||
case STATE_FIRST_RUN:
|
||||
// simply advance
|
||||
d.len1++;
|
||||
break;
|
||||
case STATE_FIRST_TAIL:
|
||||
// we found a non-matching char after tail, so stop
|
||||
d.state = STATE_STOPPED;
|
||||
break;
|
||||
default:
|
||||
// shouldn't happen
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
||||
{
|
||||
/*
|
||||
* For double shift-grab matcher, we want two matches separated by
|
||||
* either negative matches or dots; however the second vertex *must*
|
||||
* be a negative match.
|
||||
*
|
||||
* Doubleshiftgrab matcher is identical to doubleshift except for
|
||||
* presence of grab.
|
||||
*/
|
||||
if (d.state == STATE_WAITING_FOR_GRAB) {
|
||||
if ((ref_cr & cur_cr).any()) {
|
||||
d.state = STATE_INVALID;
|
||||
} else {
|
||||
d.state = STATE_FIRST_RUN;
|
||||
d.len1++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* no break, falling through */
|
||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
||||
{
|
||||
/*
|
||||
* For double shift matcher, we want three matches, each separated
|
||||
* by a lot of anything.
|
||||
*
|
||||
* Doubleshift matcher is complicated by presence of tails.
|
||||
*/
|
||||
if (ref_cr == cur_cr) {
|
||||
// decide if we are activating second shift or matching tails
|
||||
switch (d.state) {
|
||||
case STATE_FIRST_RUN:
|
||||
d.state = STATE_FIRST_TAIL;
|
||||
d.len2 = 1; // we're now ready for our second run
|
||||
break;
|
||||
case STATE_FIRST_TAIL:
|
||||
d.tlen1++;
|
||||
break;
|
||||
case STATE_SECOND_RUN:
|
||||
d.state = STATE_SECOND_TAIL;
|
||||
break;
|
||||
case STATE_SECOND_TAIL:
|
||||
d.tlen2++;
|
||||
break;
|
||||
default:
|
||||
// shouldn't happen
|
||||
assert(0);
|
||||
}
|
||||
} else {
|
||||
switch (d.state) {
|
||||
case STATE_FIRST_RUN:
|
||||
d.len1++;
|
||||
break;
|
||||
case STATE_FIRST_TAIL:
|
||||
// start second run
|
||||
d.state = STATE_SECOND_RUN;
|
||||
d.len2++;
|
||||
break;
|
||||
case STATE_SECOND_RUN:
|
||||
d.len2++;
|
||||
break;
|
||||
case STATE_SECOND_TAIL:
|
||||
// stop
|
||||
d.state = STATE_STOPPED;
|
||||
break;
|
||||
default:
|
||||
// shouldn't happen
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
// shouldn't happen
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, u32 off,
|
||||
unsigned max_len) :
|
||||
cr(ref_cr), offset(off), max_len(max_len) {
|
||||
int accel_num = (int) MultibyteAccelInfo::MAT_MAX;
|
||||
accels.resize(accel_num);
|
||||
|
||||
// mark everything as valid
|
||||
for (int i = 0; i < accel_num; i++) {
|
||||
accel_data &ad = accels[i];
|
||||
ad.len1 = 1;
|
||||
ad.type = (MultibyteAccelInfo::multiaccel_type) i;
|
||||
|
||||
/* for shift-grab matchers, we are waiting for the grab right at the start */
|
||||
if (ad.type == MultibyteAccelInfo::MAT_SHIFTGRAB
|
||||
|| ad.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
|
||||
ad.state = STATE_WAITING_FOR_GRAB;
|
||||
} else {
|
||||
ad.state = STATE_FIRST_RUN;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool MultiaccelCompileHelper::canAdvance() {
|
||||
for (const accel_data &ad : accels) {
|
||||
if (ad.state != STATE_STOPPED && ad.state != STATE_INVALID) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void MultiaccelCompileHelper::advance(const CharReach &cur_cr) {
|
||||
for (accel_data &ad : accels) {
|
||||
if (ad.state == STATE_STOPPED || ad.state == STATE_INVALID) {
|
||||
continue;
|
||||
}
|
||||
match(ad, cr, cur_cr);
|
||||
#ifdef DEBUG
|
||||
dumpMultiaccelState(ad);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
MultibyteAccelInfo MultiaccelCompileHelper::getBestScheme() {
|
||||
int best_len = 0;
|
||||
accel_data best;
|
||||
|
||||
DEBUG_PRINTF("Stopping multiaccel compile\n");
|
||||
|
||||
for (accel_data &ad : accels) {
|
||||
// stop our matching
|
||||
stop(ad);
|
||||
validate(ad, max_len);
|
||||
|
||||
#ifdef DEBUG
|
||||
dumpMultiaccelState(ad);
|
||||
#endif
|
||||
|
||||
// skip invalid schemes
|
||||
if (ad.state == STATE_INVALID) {
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("Marking as viable\n");
|
||||
|
||||
// TODO: relative strengths of accel schemes? maybe e.g. a shorter
|
||||
// long match would in some cases be preferable to a longer
|
||||
// double shift match (for example, depending on length)?
|
||||
int as_len = ad.len1 + ad.len2;
|
||||
if (as_len >= best_len) {
|
||||
DEBUG_PRINTF("Marking as best\n");
|
||||
best_len = as_len;
|
||||
best = ad;
|
||||
}
|
||||
}
|
||||
// if we found at least one accel scheme, return it
|
||||
if (best.state != STATE_INVALID) {
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF("Picked best multiaccel state:\n");
|
||||
dumpMultiaccelState(best);
|
||||
#endif
|
||||
MultibyteAccelInfo info;
|
||||
info.cr = cr;
|
||||
info.offset = offset;
|
||||
info.len1 = best.len1;
|
||||
info.len2 = best.len2;
|
||||
info.type = best.type;
|
||||
return info;
|
||||
}
|
||||
return MultibyteAccelInfo();
|
||||
}
|
@ -26,44 +26,50 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "internal_report.h"
|
||||
#include "report.h"
|
||||
#include "report_manager.h"
|
||||
#ifndef MULTIACCELCOMPILE_H_
|
||||
#define MULTIACCELCOMPILE_H_
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include "nfagraph/ng_limex_accel.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void writeInternalReport(const Report &report, const ReportManager &rm,
|
||||
internal_report *ir) {
|
||||
assert(ir);
|
||||
assert(ISALIGNED(ir));
|
||||
/* accel scheme state machine */
|
||||
enum accel_scheme_state {
|
||||
STATE_FIRST_RUN,
|
||||
STATE_SECOND_RUN,
|
||||
STATE_WAITING_FOR_GRAB,
|
||||
STATE_FIRST_TAIL,
|
||||
STATE_SECOND_TAIL,
|
||||
STATE_STOPPED,
|
||||
STATE_INVALID
|
||||
};
|
||||
|
||||
ir->type = report.type;
|
||||
ir->hasBounds = report.hasBounds() ? 1 : 0;
|
||||
ir->quashSom = report.quashSom ? 1 : 0;
|
||||
ir->minOffset = report.minOffset;
|
||||
ir->maxOffset = report.maxOffset;
|
||||
ir->minLength = report.minLength;
|
||||
ir->ekey = report.ekey;
|
||||
ir->offsetAdjust = report.offsetAdjust;
|
||||
ir->onmatch = report.onmatch;
|
||||
struct accel_data {
|
||||
MultibyteAccelInfo::multiaccel_type type = MultibyteAccelInfo::MAT_NONE;
|
||||
accel_scheme_state state = STATE_INVALID;
|
||||
unsigned len1 = 0; /* length of first run */
|
||||
unsigned len2 = 0; /* length of second run, if present */
|
||||
unsigned tlen1 = 0; /* first tail length */
|
||||
unsigned tlen2 = 0; /* second tail length */
|
||||
};
|
||||
|
||||
switch (report.type) {
|
||||
case INTERNAL_ROSE_CHAIN:
|
||||
ir->aux.topSquashDistance = report.topSquashDistance;
|
||||
break;
|
||||
case EXTERNAL_CALLBACK_SOM_REV_NFA:
|
||||
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
|
||||
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
|
||||
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
|
||||
ir->aux.revNfaIndex = report.revNfaIndex;
|
||||
break;
|
||||
default:
|
||||
ir->aux.somDistance = report.somDistance;
|
||||
break;
|
||||
}
|
||||
class MultiaccelCompileHelper {
|
||||
private:
|
||||
const CharReach &cr;
|
||||
u32 offset;
|
||||
std::vector<accel_data> accels;
|
||||
unsigned max_len;
|
||||
public:
|
||||
MultiaccelCompileHelper(const CharReach &cr, u32 off, unsigned max_len);
|
||||
bool canAdvance();
|
||||
MultibyteAccelInfo getBestScheme();
|
||||
void advance(const ue2::CharReach &cr);
|
||||
};
|
||||
|
||||
// Dedupe keys are managed by ReportManager.
|
||||
ir->dkey = rm.getDkey(report);
|
||||
}
|
||||
}; // namespace
|
||||
|
||||
} // namespace ue2
|
||||
#endif /* MULTIACCELCOMPILE_H_ */
|
149
src/nfa/multiaccel_doubleshift.h
Normal file
149
src/nfa/multiaccel_doubleshift.h
Normal file
@ -0,0 +1,149 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef MULTIACCEL_DOUBLESHIFT_H_
|
||||
#define MULTIACCEL_DOUBLESHIFT_H_
|
||||
|
||||
#include "multiaccel_common.h"
|
||||
|
||||
#define DOUBLESHIFT_MATCH(len, match_t, match_sz) \
|
||||
static really_inline \
|
||||
const u8 * JOIN4(doubleshiftMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\
|
||||
if (unlikely(z)) { \
|
||||
match_t tmp = z; \
|
||||
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
|
||||
tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \
|
||||
VARISHIFT(z, z, len); \
|
||||
VARISHIFT(tmp, tmp, len2); \
|
||||
VARISHIFT(tmp, z, len); \
|
||||
return JOIN(match, match_sz)(buf, z); \
|
||||
} \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
#define DOUBLESHIFT_MATCH_32_DEF(n) \
|
||||
DOUBLESHIFT_MATCH(n, u32, 32)
|
||||
#define DOUBLESHIFT_MATCH_64_DEF(n) \
|
||||
DOUBLESHIFT_MATCH(n, u64a, 64)
|
||||
#define DOUBLESHIFT_MATCH_DEF(n) \
|
||||
DOUBLESHIFT_MATCH_32_DEF(n) \
|
||||
DOUBLESHIFT_MATCH_64_DEF(n)
|
||||
|
||||
DOUBLESHIFT_MATCH_DEF(1)
|
||||
DOUBLESHIFT_MATCH_DEF(2)
|
||||
DOUBLESHIFT_MATCH_DEF(3)
|
||||
DOUBLESHIFT_MATCH_DEF(4)
|
||||
DOUBLESHIFT_MATCH_DEF(5)
|
||||
DOUBLESHIFT_MATCH_DEF(6)
|
||||
DOUBLESHIFT_MATCH_DEF(7)
|
||||
DOUBLESHIFT_MATCH_DEF(8)
|
||||
DOUBLESHIFT_MATCH_DEF(9)
|
||||
DOUBLESHIFT_MATCH_DEF(10)
|
||||
DOUBLESHIFT_MATCH_DEF(11)
|
||||
DOUBLESHIFT_MATCH_DEF(12)
|
||||
DOUBLESHIFT_MATCH_DEF(13)
|
||||
DOUBLESHIFT_MATCH_DEF(14)
|
||||
DOUBLESHIFT_MATCH_DEF(15)
|
||||
DOUBLESHIFT_MATCH_64_DEF(16)
|
||||
DOUBLESHIFT_MATCH_64_DEF(17)
|
||||
DOUBLESHIFT_MATCH_64_DEF(18)
|
||||
DOUBLESHIFT_MATCH_64_DEF(19)
|
||||
DOUBLESHIFT_MATCH_64_DEF(20)
|
||||
DOUBLESHIFT_MATCH_64_DEF(21)
|
||||
DOUBLESHIFT_MATCH_64_DEF(22)
|
||||
DOUBLESHIFT_MATCH_64_DEF(23)
|
||||
DOUBLESHIFT_MATCH_64_DEF(24)
|
||||
DOUBLESHIFT_MATCH_64_DEF(25)
|
||||
DOUBLESHIFT_MATCH_64_DEF(26)
|
||||
DOUBLESHIFT_MATCH_64_DEF(27)
|
||||
DOUBLESHIFT_MATCH_64_DEF(28)
|
||||
DOUBLESHIFT_MATCH_64_DEF(29)
|
||||
DOUBLESHIFT_MATCH_64_DEF(30)
|
||||
DOUBLESHIFT_MATCH_64_DEF(31)
|
||||
|
||||
static
|
||||
const UNUSED u8 * (*doubleshift_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) =
|
||||
{
|
||||
// skip the first
|
||||
0,
|
||||
&doubleshiftMatch_32_1,
|
||||
&doubleshiftMatch_32_2,
|
||||
&doubleshiftMatch_32_3,
|
||||
&doubleshiftMatch_32_4,
|
||||
&doubleshiftMatch_32_5,
|
||||
&doubleshiftMatch_32_6,
|
||||
&doubleshiftMatch_32_7,
|
||||
&doubleshiftMatch_32_8,
|
||||
&doubleshiftMatch_32_9,
|
||||
&doubleshiftMatch_32_10,
|
||||
&doubleshiftMatch_32_11,
|
||||
&doubleshiftMatch_32_12,
|
||||
&doubleshiftMatch_32_13,
|
||||
&doubleshiftMatch_32_14,
|
||||
&doubleshiftMatch_32_15,
|
||||
};
|
||||
|
||||
static
|
||||
const UNUSED u8 * (*doubleshift_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) =
|
||||
{
|
||||
// skip the first
|
||||
0,
|
||||
&doubleshiftMatch_64_1,
|
||||
&doubleshiftMatch_64_2,
|
||||
&doubleshiftMatch_64_3,
|
||||
&doubleshiftMatch_64_4,
|
||||
&doubleshiftMatch_64_5,
|
||||
&doubleshiftMatch_64_6,
|
||||
&doubleshiftMatch_64_7,
|
||||
&doubleshiftMatch_64_8,
|
||||
&doubleshiftMatch_64_9,
|
||||
&doubleshiftMatch_64_10,
|
||||
&doubleshiftMatch_64_11,
|
||||
&doubleshiftMatch_64_12,
|
||||
&doubleshiftMatch_64_13,
|
||||
&doubleshiftMatch_64_14,
|
||||
&doubleshiftMatch_64_15,
|
||||
&doubleshiftMatch_64_16,
|
||||
&doubleshiftMatch_64_17,
|
||||
&doubleshiftMatch_64_18,
|
||||
&doubleshiftMatch_64_19,
|
||||
&doubleshiftMatch_64_20,
|
||||
&doubleshiftMatch_64_21,
|
||||
&doubleshiftMatch_64_22,
|
||||
&doubleshiftMatch_64_23,
|
||||
&doubleshiftMatch_64_24,
|
||||
&doubleshiftMatch_64_25,
|
||||
&doubleshiftMatch_64_26,
|
||||
&doubleshiftMatch_64_27,
|
||||
&doubleshiftMatch_64_28,
|
||||
&doubleshiftMatch_64_29,
|
||||
&doubleshiftMatch_64_30,
|
||||
&doubleshiftMatch_64_31,
|
||||
};
|
||||
|
||||
#endif /* MULTIACCEL_DOUBLESHIFT_H_ */
|
152
src/nfa/multiaccel_doubleshiftgrab.h
Normal file
152
src/nfa/multiaccel_doubleshiftgrab.h
Normal file
@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef MULTIACCEL_DOUBLESHIFTGRAB_H_
|
||||
#define MULTIACCEL_DOUBLESHIFTGRAB_H_
|
||||
|
||||
#include "multiaccel_common.h"
|
||||
|
||||
#define DOUBLESHIFTGRAB_MATCH(len, match_t, match_sz) \
|
||||
static really_inline \
|
||||
const u8 * JOIN4(doubleshiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\
|
||||
if (unlikely(z)) { \
|
||||
match_t neg = ~z; \
|
||||
match_t tmp = z; \
|
||||
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
|
||||
tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \
|
||||
neg |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
|
||||
VARISHIFT(z, z, len); \
|
||||
VARISHIFT(tmp, tmp, len2); \
|
||||
VARISHIFT(neg, z, 1); \
|
||||
VARISHIFT(tmp, z, len); \
|
||||
return JOIN(match, match_sz)(buf, z); \
|
||||
} \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
#define DOUBLESHIFTGRAB_MATCH_32_DEF(n) \
|
||||
DOUBLESHIFTGRAB_MATCH(n, u32, 32)
|
||||
#define DOUBLESHIFTGRAB_MATCH_64_DEF(n) \
|
||||
DOUBLESHIFTGRAB_MATCH(n, u64a, 64)
|
||||
#define DOUBLESHIFTGRAB_MATCH_DEF(n) \
|
||||
DOUBLESHIFTGRAB_MATCH_32_DEF(n) \
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(n)
|
||||
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(1)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(2)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(3)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(4)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(5)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(6)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(7)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(8)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(9)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(10)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(11)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(12)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(13)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(14)
|
||||
DOUBLESHIFTGRAB_MATCH_DEF(15)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(16)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(17)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(18)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(19)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(20)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(21)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(22)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(23)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(24)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(25)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(26)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(27)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(28)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(29)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(30)
|
||||
DOUBLESHIFTGRAB_MATCH_64_DEF(31)
|
||||
|
||||
static
|
||||
const UNUSED u8 * (*doubleshiftgrab_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) =
|
||||
{
|
||||
// skip the first
|
||||
0,
|
||||
&doubleshiftgrabMatch_32_1,
|
||||
&doubleshiftgrabMatch_32_2,
|
||||
&doubleshiftgrabMatch_32_3,
|
||||
&doubleshiftgrabMatch_32_4,
|
||||
&doubleshiftgrabMatch_32_5,
|
||||
&doubleshiftgrabMatch_32_6,
|
||||
&doubleshiftgrabMatch_32_7,
|
||||
&doubleshiftgrabMatch_32_8,
|
||||
&doubleshiftgrabMatch_32_9,
|
||||
&doubleshiftgrabMatch_32_10,
|
||||
&doubleshiftgrabMatch_32_11,
|
||||
&doubleshiftgrabMatch_32_12,
|
||||
&doubleshiftgrabMatch_32_13,
|
||||
&doubleshiftgrabMatch_32_14,
|
||||
&doubleshiftgrabMatch_32_15,
|
||||
};
|
||||
|
||||
static
|
||||
const UNUSED u8 * (*doubleshiftgrab_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) =
|
||||
{
|
||||
// skip the first
|
||||
0,
|
||||
&doubleshiftgrabMatch_64_1,
|
||||
&doubleshiftgrabMatch_64_2,
|
||||
&doubleshiftgrabMatch_64_3,
|
||||
&doubleshiftgrabMatch_64_4,
|
||||
&doubleshiftgrabMatch_64_5,
|
||||
&doubleshiftgrabMatch_64_6,
|
||||
&doubleshiftgrabMatch_64_7,
|
||||
&doubleshiftgrabMatch_64_8,
|
||||
&doubleshiftgrabMatch_64_9,
|
||||
&doubleshiftgrabMatch_64_10,
|
||||
&doubleshiftgrabMatch_64_11,
|
||||
&doubleshiftgrabMatch_64_12,
|
||||
&doubleshiftgrabMatch_64_13,
|
||||
&doubleshiftgrabMatch_64_14,
|
||||
&doubleshiftgrabMatch_64_15,
|
||||
&doubleshiftgrabMatch_64_16,
|
||||
&doubleshiftgrabMatch_64_17,
|
||||
&doubleshiftgrabMatch_64_18,
|
||||
&doubleshiftgrabMatch_64_19,
|
||||
&doubleshiftgrabMatch_64_20,
|
||||
&doubleshiftgrabMatch_64_21,
|
||||
&doubleshiftgrabMatch_64_22,
|
||||
&doubleshiftgrabMatch_64_23,
|
||||
&doubleshiftgrabMatch_64_24,
|
||||
&doubleshiftgrabMatch_64_25,
|
||||
&doubleshiftgrabMatch_64_26,
|
||||
&doubleshiftgrabMatch_64_27,
|
||||
&doubleshiftgrabMatch_64_28,
|
||||
&doubleshiftgrabMatch_64_29,
|
||||
&doubleshiftgrabMatch_64_30,
|
||||
&doubleshiftgrabMatch_64_31,
|
||||
};
|
||||
|
||||
#endif /* MULTIACCEL_DOUBLESHIFTGRAB_H_ */
|
145
src/nfa/multiaccel_long.h
Normal file
145
src/nfa/multiaccel_long.h
Normal file
@ -0,0 +1,145 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef MULTIACCEL_LONG_H_
|
||||
#define MULTIACCEL_LONG_H_
|
||||
|
||||
#include "multiaccel_common.h"
|
||||
|
||||
#define LONG_MATCH(len, match_t, match_sz) \
|
||||
static really_inline \
|
||||
const u8 * JOIN4(longMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \
|
||||
if (unlikely(z)) { \
|
||||
z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \
|
||||
JOIN(SHIFT, len)(z); \
|
||||
return JOIN(match, match_sz)(buf, z); \
|
||||
} \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
#define LONG_MATCH_32_DEF(n) \
|
||||
LONG_MATCH(n, u32, 32)
|
||||
#define LONG_MATCH_64_DEF(n) \
|
||||
LONG_MATCH(n, u64a, 64)
|
||||
#define LONG_MATCH_DEF(n) \
|
||||
LONG_MATCH_32_DEF(n) \
|
||||
LONG_MATCH_64_DEF(n)
|
||||
|
||||
LONG_MATCH_DEF(1)
|
||||
LONG_MATCH_DEF(2)
|
||||
LONG_MATCH_DEF(3)
|
||||
LONG_MATCH_DEF(4)
|
||||
LONG_MATCH_DEF(5)
|
||||
LONG_MATCH_DEF(6)
|
||||
LONG_MATCH_DEF(7)
|
||||
LONG_MATCH_DEF(8)
|
||||
LONG_MATCH_DEF(9)
|
||||
LONG_MATCH_DEF(10)
|
||||
LONG_MATCH_DEF(11)
|
||||
LONG_MATCH_DEF(12)
|
||||
LONG_MATCH_DEF(13)
|
||||
LONG_MATCH_DEF(14)
|
||||
LONG_MATCH_DEF(15)
|
||||
LONG_MATCH_64_DEF(16)
|
||||
LONG_MATCH_64_DEF(17)
|
||||
LONG_MATCH_64_DEF(18)
|
||||
LONG_MATCH_64_DEF(19)
|
||||
LONG_MATCH_64_DEF(20)
|
||||
LONG_MATCH_64_DEF(21)
|
||||
LONG_MATCH_64_DEF(22)
|
||||
LONG_MATCH_64_DEF(23)
|
||||
LONG_MATCH_64_DEF(24)
|
||||
LONG_MATCH_64_DEF(25)
|
||||
LONG_MATCH_64_DEF(26)
|
||||
LONG_MATCH_64_DEF(27)
|
||||
LONG_MATCH_64_DEF(28)
|
||||
LONG_MATCH_64_DEF(29)
|
||||
LONG_MATCH_64_DEF(30)
|
||||
LONG_MATCH_64_DEF(31)
|
||||
|
||||
static
|
||||
const UNUSED u8 *(*long_match_funcs_32[])(const u8 *buf, u32 z) =
|
||||
{
|
||||
// skip the first three
|
||||
0,
|
||||
&longMatch_32_1,
|
||||
&longMatch_32_2,
|
||||
&longMatch_32_3,
|
||||
&longMatch_32_4,
|
||||
&longMatch_32_5,
|
||||
&longMatch_32_6,
|
||||
&longMatch_32_7,
|
||||
&longMatch_32_8,
|
||||
&longMatch_32_9,
|
||||
&longMatch_32_10,
|
||||
&longMatch_32_11,
|
||||
&longMatch_32_12,
|
||||
&longMatch_32_13,
|
||||
&longMatch_32_14,
|
||||
&longMatch_32_15,
|
||||
};
|
||||
|
||||
static
|
||||
const UNUSED u8 *(*long_match_funcs_64[])(const u8 *buf, u64a z) =
|
||||
{
|
||||
// skip the first three
|
||||
0,
|
||||
&longMatch_64_1,
|
||||
&longMatch_64_2,
|
||||
&longMatch_64_3,
|
||||
&longMatch_64_4,
|
||||
&longMatch_64_5,
|
||||
&longMatch_64_6,
|
||||
&longMatch_64_7,
|
||||
&longMatch_64_8,
|
||||
&longMatch_64_9,
|
||||
&longMatch_64_10,
|
||||
&longMatch_64_11,
|
||||
&longMatch_64_12,
|
||||
&longMatch_64_13,
|
||||
&longMatch_64_14,
|
||||
&longMatch_64_15,
|
||||
&longMatch_64_16,
|
||||
&longMatch_64_17,
|
||||
&longMatch_64_18,
|
||||
&longMatch_64_19,
|
||||
&longMatch_64_20,
|
||||
&longMatch_64_21,
|
||||
&longMatch_64_22,
|
||||
&longMatch_64_23,
|
||||
&longMatch_64_24,
|
||||
&longMatch_64_25,
|
||||
&longMatch_64_26,
|
||||
&longMatch_64_27,
|
||||
&longMatch_64_28,
|
||||
&longMatch_64_29,
|
||||
&longMatch_64_30,
|
||||
&longMatch_64_31,
|
||||
};
|
||||
|
||||
#endif /* MULTIACCEL_LONG_H_ */
|
148
src/nfa/multiaccel_longgrab.h
Normal file
148
src/nfa/multiaccel_longgrab.h
Normal file
@ -0,0 +1,148 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef MULTIACCEL_LONGGRAB_H_
|
||||
#define MULTIACCEL_LONGGRAB_H_
|
||||
|
||||
#include "multiaccel_common.h"
|
||||
|
||||
#define LONGGRAB_MATCH(len, match_t, match_sz) \
|
||||
static really_inline \
|
||||
const u8 * JOIN4(longgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \
|
||||
if (unlikely(z)) { \
|
||||
match_t tmp = ~z; \
|
||||
tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
|
||||
z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \
|
||||
JOIN(SHIFT, len)(z); \
|
||||
VARISHIFT(tmp, z, len); \
|
||||
return JOIN(match, match_sz)(buf, z); \
|
||||
} \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
#define LONGGRAB_MATCH_32_DEF(n) \
|
||||
LONGGRAB_MATCH(n, u32, 32)
|
||||
#define LONGGRAB_MATCH_64_DEF(n) \
|
||||
LONGGRAB_MATCH(n, u64a, 64)
|
||||
#define LONGGRAB_MATCH_DEF(n) \
|
||||
LONGGRAB_MATCH_32_DEF(n) \
|
||||
LONGGRAB_MATCH_64_DEF(n)
|
||||
|
||||
LONGGRAB_MATCH_DEF(1)
|
||||
LONGGRAB_MATCH_DEF(2)
|
||||
LONGGRAB_MATCH_DEF(3)
|
||||
LONGGRAB_MATCH_DEF(4)
|
||||
LONGGRAB_MATCH_DEF(5)
|
||||
LONGGRAB_MATCH_DEF(6)
|
||||
LONGGRAB_MATCH_DEF(7)
|
||||
LONGGRAB_MATCH_DEF(8)
|
||||
LONGGRAB_MATCH_DEF(9)
|
||||
LONGGRAB_MATCH_DEF(10)
|
||||
LONGGRAB_MATCH_DEF(11)
|
||||
LONGGRAB_MATCH_DEF(12)
|
||||
LONGGRAB_MATCH_DEF(13)
|
||||
LONGGRAB_MATCH_DEF(14)
|
||||
LONGGRAB_MATCH_DEF(15)
|
||||
LONGGRAB_MATCH_64_DEF(16)
|
||||
LONGGRAB_MATCH_64_DEF(17)
|
||||
LONGGRAB_MATCH_64_DEF(18)
|
||||
LONGGRAB_MATCH_64_DEF(19)
|
||||
LONGGRAB_MATCH_64_DEF(20)
|
||||
LONGGRAB_MATCH_64_DEF(21)
|
||||
LONGGRAB_MATCH_64_DEF(22)
|
||||
LONGGRAB_MATCH_64_DEF(23)
|
||||
LONGGRAB_MATCH_64_DEF(24)
|
||||
LONGGRAB_MATCH_64_DEF(25)
|
||||
LONGGRAB_MATCH_64_DEF(26)
|
||||
LONGGRAB_MATCH_64_DEF(27)
|
||||
LONGGRAB_MATCH_64_DEF(28)
|
||||
LONGGRAB_MATCH_64_DEF(29)
|
||||
LONGGRAB_MATCH_64_DEF(30)
|
||||
LONGGRAB_MATCH_64_DEF(31)
|
||||
|
||||
static
|
||||
const UNUSED u8 *(*longgrab_match_funcs_32[])(const u8 *buf, u32 z) =
|
||||
{
|
||||
// skip the first three
|
||||
0,
|
||||
&longgrabMatch_32_1,
|
||||
&longgrabMatch_32_2,
|
||||
&longgrabMatch_32_3,
|
||||
&longgrabMatch_32_4,
|
||||
&longgrabMatch_32_5,
|
||||
&longgrabMatch_32_6,
|
||||
&longgrabMatch_32_7,
|
||||
&longgrabMatch_32_8,
|
||||
&longgrabMatch_32_9,
|
||||
&longgrabMatch_32_10,
|
||||
&longgrabMatch_32_11,
|
||||
&longgrabMatch_32_12,
|
||||
&longgrabMatch_32_13,
|
||||
&longgrabMatch_32_14,
|
||||
&longgrabMatch_32_15,
|
||||
};
|
||||
|
||||
static
|
||||
const UNUSED u8 *(*longgrab_match_funcs_64[])(const u8 *buf, u64a z) =
|
||||
{
|
||||
// skip the first three
|
||||
0,
|
||||
&longgrabMatch_64_1,
|
||||
&longgrabMatch_64_2,
|
||||
&longgrabMatch_64_3,
|
||||
&longgrabMatch_64_4,
|
||||
&longgrabMatch_64_5,
|
||||
&longgrabMatch_64_6,
|
||||
&longgrabMatch_64_7,
|
||||
&longgrabMatch_64_8,
|
||||
&longgrabMatch_64_9,
|
||||
&longgrabMatch_64_10,
|
||||
&longgrabMatch_64_11,
|
||||
&longgrabMatch_64_12,
|
||||
&longgrabMatch_64_13,
|
||||
&longgrabMatch_64_14,
|
||||
&longgrabMatch_64_15,
|
||||
&longgrabMatch_64_16,
|
||||
&longgrabMatch_64_17,
|
||||
&longgrabMatch_64_18,
|
||||
&longgrabMatch_64_19,
|
||||
&longgrabMatch_64_20,
|
||||
&longgrabMatch_64_21,
|
||||
&longgrabMatch_64_22,
|
||||
&longgrabMatch_64_23,
|
||||
&longgrabMatch_64_24,
|
||||
&longgrabMatch_64_25,
|
||||
&longgrabMatch_64_26,
|
||||
&longgrabMatch_64_27,
|
||||
&longgrabMatch_64_28,
|
||||
&longgrabMatch_64_29,
|
||||
&longgrabMatch_64_30,
|
||||
&longgrabMatch_64_31,
|
||||
};
|
||||
|
||||
#endif /* MULTIACCEL_LONGGRAB_H_ */
|
145
src/nfa/multiaccel_shift.h
Normal file
145
src/nfa/multiaccel_shift.h
Normal file
@ -0,0 +1,145 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef MULTIACCEL_SHIFT_H_
|
||||
#define MULTIACCEL_SHIFT_H_
|
||||
|
||||
#include "multiaccel_common.h"
|
||||
|
||||
#define SHIFT_MATCH(len, match_t, match_sz) \
|
||||
static really_inline \
|
||||
const u8 * JOIN4(shiftMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\
|
||||
if (unlikely(z)) { \
|
||||
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
|
||||
VARISHIFT(z, z, len); \
|
||||
return JOIN(match, match_sz)(buf, z); \
|
||||
} \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
#define SHIFT_MATCH_32_DEF(n) \
|
||||
SHIFT_MATCH(n, u32, 32)
|
||||
#define SHIFT_MATCH_64_DEF(n) \
|
||||
SHIFT_MATCH(n, u64a, 64)
|
||||
#define SHIFT_MATCH_DEF(n) \
|
||||
SHIFT_MATCH_32_DEF(n) \
|
||||
SHIFT_MATCH_64_DEF(n)
|
||||
|
||||
SHIFT_MATCH_DEF(1)
|
||||
SHIFT_MATCH_DEF(2)
|
||||
SHIFT_MATCH_DEF(3)
|
||||
SHIFT_MATCH_DEF(4)
|
||||
SHIFT_MATCH_DEF(5)
|
||||
SHIFT_MATCH_DEF(6)
|
||||
SHIFT_MATCH_DEF(7)
|
||||
SHIFT_MATCH_DEF(8)
|
||||
SHIFT_MATCH_DEF(9)
|
||||
SHIFT_MATCH_DEF(10)
|
||||
SHIFT_MATCH_DEF(11)
|
||||
SHIFT_MATCH_DEF(12)
|
||||
SHIFT_MATCH_DEF(13)
|
||||
SHIFT_MATCH_DEF(14)
|
||||
SHIFT_MATCH_DEF(15)
|
||||
SHIFT_MATCH_64_DEF(16)
|
||||
SHIFT_MATCH_64_DEF(17)
|
||||
SHIFT_MATCH_64_DEF(18)
|
||||
SHIFT_MATCH_64_DEF(19)
|
||||
SHIFT_MATCH_64_DEF(20)
|
||||
SHIFT_MATCH_64_DEF(21)
|
||||
SHIFT_MATCH_64_DEF(22)
|
||||
SHIFT_MATCH_64_DEF(23)
|
||||
SHIFT_MATCH_64_DEF(24)
|
||||
SHIFT_MATCH_64_DEF(25)
|
||||
SHIFT_MATCH_64_DEF(26)
|
||||
SHIFT_MATCH_64_DEF(27)
|
||||
SHIFT_MATCH_64_DEF(28)
|
||||
SHIFT_MATCH_64_DEF(29)
|
||||
SHIFT_MATCH_64_DEF(30)
|
||||
SHIFT_MATCH_64_DEF(31)
|
||||
|
||||
static
|
||||
const UNUSED u8 * (*shift_match_funcs_32[])(const u8 *buf, u32 z) =
|
||||
{
|
||||
// skip the first
|
||||
0,
|
||||
&shiftMatch_32_1,
|
||||
&shiftMatch_32_2,
|
||||
&shiftMatch_32_3,
|
||||
&shiftMatch_32_4,
|
||||
&shiftMatch_32_5,
|
||||
&shiftMatch_32_6,
|
||||
&shiftMatch_32_7,
|
||||
&shiftMatch_32_8,
|
||||
&shiftMatch_32_9,
|
||||
&shiftMatch_32_10,
|
||||
&shiftMatch_32_11,
|
||||
&shiftMatch_32_12,
|
||||
&shiftMatch_32_13,
|
||||
&shiftMatch_32_14,
|
||||
&shiftMatch_32_15,
|
||||
};
|
||||
|
||||
static
|
||||
const UNUSED u8 * (*shift_match_funcs_64[])(const u8 *buf, u64a z) =
|
||||
{
|
||||
// skip the first
|
||||
0,
|
||||
&shiftMatch_64_1,
|
||||
&shiftMatch_64_2,
|
||||
&shiftMatch_64_3,
|
||||
&shiftMatch_64_4,
|
||||
&shiftMatch_64_5,
|
||||
&shiftMatch_64_6,
|
||||
&shiftMatch_64_7,
|
||||
&shiftMatch_64_8,
|
||||
&shiftMatch_64_9,
|
||||
&shiftMatch_64_10,
|
||||
&shiftMatch_64_11,
|
||||
&shiftMatch_64_12,
|
||||
&shiftMatch_64_13,
|
||||
&shiftMatch_64_14,
|
||||
&shiftMatch_64_15,
|
||||
&shiftMatch_64_16,
|
||||
&shiftMatch_64_17,
|
||||
&shiftMatch_64_18,
|
||||
&shiftMatch_64_19,
|
||||
&shiftMatch_64_20,
|
||||
&shiftMatch_64_21,
|
||||
&shiftMatch_64_22,
|
||||
&shiftMatch_64_23,
|
||||
&shiftMatch_64_24,
|
||||
&shiftMatch_64_25,
|
||||
&shiftMatch_64_26,
|
||||
&shiftMatch_64_27,
|
||||
&shiftMatch_64_28,
|
||||
&shiftMatch_64_29,
|
||||
&shiftMatch_64_30,
|
||||
&shiftMatch_64_31,
|
||||
};
|
||||
|
||||
#endif /* MULTIACCEL_SHIFT_H_ */
|
148
src/nfa/multiaccel_shiftgrab.h
Normal file
148
src/nfa/multiaccel_shiftgrab.h
Normal file
@ -0,0 +1,148 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef MULTIACCEL_SHIFTGRAB_H_
|
||||
#define MULTIACCEL_SHIFTGRAB_H_
|
||||
|
||||
#include "multiaccel_common.h"
|
||||
|
||||
#define SHIFTGRAB_MATCH(len, match_t, match_sz) \
|
||||
static really_inline \
|
||||
const u8 * JOIN4(shiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\
|
||||
if (unlikely(z)) { \
|
||||
match_t tmp = ~z; \
|
||||
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
|
||||
tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
|
||||
VARISHIFT(z, z, len); \
|
||||
VARISHIFT(tmp, z, 1); \
|
||||
return JOIN(match, match_sz)(buf, z); \
|
||||
} \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
#define SHIFTGRAB_MATCH_32_DEF(n) \
|
||||
SHIFTGRAB_MATCH(n, u32, 32)
|
||||
#define SHIFTGRAB_MATCH_64_DEF(n) \
|
||||
SHIFTGRAB_MATCH(n, u64a, 64)
|
||||
#define SHIFTGRAB_MATCH_DEF(n) \
|
||||
SHIFTGRAB_MATCH_32_DEF(n) \
|
||||
SHIFTGRAB_MATCH_64_DEF(n)
|
||||
|
||||
SHIFTGRAB_MATCH_DEF(1)
|
||||
SHIFTGRAB_MATCH_DEF(2)
|
||||
SHIFTGRAB_MATCH_DEF(3)
|
||||
SHIFTGRAB_MATCH_DEF(4)
|
||||
SHIFTGRAB_MATCH_DEF(5)
|
||||
SHIFTGRAB_MATCH_DEF(6)
|
||||
SHIFTGRAB_MATCH_DEF(7)
|
||||
SHIFTGRAB_MATCH_DEF(8)
|
||||
SHIFTGRAB_MATCH_DEF(9)
|
||||
SHIFTGRAB_MATCH_DEF(10)
|
||||
SHIFTGRAB_MATCH_DEF(11)
|
||||
SHIFTGRAB_MATCH_DEF(12)
|
||||
SHIFTGRAB_MATCH_DEF(13)
|
||||
SHIFTGRAB_MATCH_DEF(14)
|
||||
SHIFTGRAB_MATCH_DEF(15)
|
||||
SHIFTGRAB_MATCH_64_DEF(16)
|
||||
SHIFTGRAB_MATCH_64_DEF(17)
|
||||
SHIFTGRAB_MATCH_64_DEF(18)
|
||||
SHIFTGRAB_MATCH_64_DEF(19)
|
||||
SHIFTGRAB_MATCH_64_DEF(20)
|
||||
SHIFTGRAB_MATCH_64_DEF(21)
|
||||
SHIFTGRAB_MATCH_64_DEF(22)
|
||||
SHIFTGRAB_MATCH_64_DEF(23)
|
||||
SHIFTGRAB_MATCH_64_DEF(24)
|
||||
SHIFTGRAB_MATCH_64_DEF(25)
|
||||
SHIFTGRAB_MATCH_64_DEF(26)
|
||||
SHIFTGRAB_MATCH_64_DEF(27)
|
||||
SHIFTGRAB_MATCH_64_DEF(28)
|
||||
SHIFTGRAB_MATCH_64_DEF(29)
|
||||
SHIFTGRAB_MATCH_64_DEF(30)
|
||||
SHIFTGRAB_MATCH_64_DEF(31)
|
||||
|
||||
static
|
||||
const UNUSED u8 * (*shiftgrab_match_funcs_32[])(const u8 *buf, u32 z) =
|
||||
{
|
||||
// skip the first
|
||||
0,
|
||||
&shiftgrabMatch_32_1,
|
||||
&shiftgrabMatch_32_2,
|
||||
&shiftgrabMatch_32_3,
|
||||
&shiftgrabMatch_32_4,
|
||||
&shiftgrabMatch_32_5,
|
||||
&shiftgrabMatch_32_6,
|
||||
&shiftgrabMatch_32_7,
|
||||
&shiftgrabMatch_32_8,
|
||||
&shiftgrabMatch_32_9,
|
||||
&shiftgrabMatch_32_10,
|
||||
&shiftgrabMatch_32_11,
|
||||
&shiftgrabMatch_32_12,
|
||||
&shiftgrabMatch_32_13,
|
||||
&shiftgrabMatch_32_14,
|
||||
&shiftgrabMatch_32_15,
|
||||
};
|
||||
|
||||
static
|
||||
const UNUSED u8 * (*shiftgrab_match_funcs_64[])(const u8 *buf, u64a z) =
|
||||
{
|
||||
// skip the first
|
||||
0,
|
||||
&shiftgrabMatch_64_1,
|
||||
&shiftgrabMatch_64_2,
|
||||
&shiftgrabMatch_64_3,
|
||||
&shiftgrabMatch_64_4,
|
||||
&shiftgrabMatch_64_5,
|
||||
&shiftgrabMatch_64_6,
|
||||
&shiftgrabMatch_64_7,
|
||||
&shiftgrabMatch_64_8,
|
||||
&shiftgrabMatch_64_9,
|
||||
&shiftgrabMatch_64_10,
|
||||
&shiftgrabMatch_64_11,
|
||||
&shiftgrabMatch_64_12,
|
||||
&shiftgrabMatch_64_13,
|
||||
&shiftgrabMatch_64_14,
|
||||
&shiftgrabMatch_64_15,
|
||||
&shiftgrabMatch_64_16,
|
||||
&shiftgrabMatch_64_17,
|
||||
&shiftgrabMatch_64_18,
|
||||
&shiftgrabMatch_64_19,
|
||||
&shiftgrabMatch_64_20,
|
||||
&shiftgrabMatch_64_21,
|
||||
&shiftgrabMatch_64_22,
|
||||
&shiftgrabMatch_64_23,
|
||||
&shiftgrabMatch_64_24,
|
||||
&shiftgrabMatch_64_25,
|
||||
&shiftgrabMatch_64_26,
|
||||
&shiftgrabMatch_64_27,
|
||||
&shiftgrabMatch_64_28,
|
||||
&shiftgrabMatch_64_29,
|
||||
&shiftgrabMatch_64_30,
|
||||
&shiftgrabMatch_64_31,
|
||||
};
|
||||
|
||||
#endif /* MULTIACCEL_SHIFTGRAB_H_ */
|
114
src/nfa/multishufti.c
Normal file
114
src/nfa/multishufti.c
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Shufti: character class acceleration.
|
||||
*
|
||||
* Utilises the SSSE3 pshufb shuffle instruction
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include "multishufti.h"
|
||||
|
||||
#include "multiaccel_common.h"
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
#define MATCH_ALGO long_
|
||||
#include "multiaccel_long.h"
|
||||
#include "multishufti_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO longgrab_
|
||||
#include "multiaccel_longgrab.h"
|
||||
#include "multishufti_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO shift_
|
||||
#include "multiaccel_shift.h"
|
||||
#include "multishufti_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO shiftgrab_
|
||||
#include "multiaccel_shiftgrab.h"
|
||||
#include "multishufti_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MULTIACCEL_DOUBLE
|
||||
|
||||
#define MATCH_ALGO doubleshift_
|
||||
#include "multiaccel_doubleshift.h"
|
||||
#include "multishufti_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO doubleshiftgrab_
|
||||
#include "multiaccel_doubleshiftgrab.h"
|
||||
#include "multishufti_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#undef MULTIACCEL_DOUBLE
|
||||
|
||||
#else
|
||||
|
||||
#define MATCH_ALGO long_
|
||||
#include "multiaccel_long.h"
|
||||
#include "multishufti_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO longgrab_
|
||||
#include "multiaccel_longgrab.h"
|
||||
#include "multishufti_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO shift_
|
||||
#include "multiaccel_shift.h"
|
||||
#include "multishufti_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO shiftgrab_
|
||||
#include "multiaccel_shiftgrab.h"
|
||||
#include "multishufti_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MULTIACCEL_DOUBLE
|
||||
|
||||
#define MATCH_ALGO doubleshift_
|
||||
#include "multiaccel_doubleshift.h"
|
||||
#include "multishufti_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO doubleshiftgrab_
|
||||
#include "multiaccel_doubleshiftgrab.h"
|
||||
#include "multishufti_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#undef MULTIACCEL_DOUBLE
|
||||
|
||||
#endif
|
@ -26,46 +26,42 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SIDECAR_H
|
||||
#define SIDECAR_H
|
||||
/** \file
|
||||
* \brief Multishufti: multibyte version of Shufti
|
||||
*
|
||||
* Utilises the SSSE3 pshufb shuffle instruction
|
||||
*/
|
||||
|
||||
#ifndef MULTISHUFTI_H
|
||||
#define MULTISHUFTI_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
struct sidecar;
|
||||
struct sidecar_enabled;
|
||||
struct sidecar_scratch;
|
||||
const u8 *long_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len);
|
||||
|
||||
/*
|
||||
* Sidecar is guaranteed to return the first match of a given id. However, in
|
||||
* various cases later matches may also be returned, as may matches for disabled
|
||||
* ids
|
||||
*/
|
||||
typedef void (*SidecarCallback)(u64a offset, u32 id, void *context);
|
||||
const u8 *longgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len);
|
||||
|
||||
void sidecarExec(const struct sidecar *n, const u8 *buffer, size_t len,
|
||||
struct sidecar_enabled *enabled,
|
||||
struct sidecar_scratch *sidecar_scratch,
|
||||
u64a base_offset, SidecarCallback cb, void *context);
|
||||
const u8 *shift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len);
|
||||
|
||||
u32 sidecarScratchSize(const struct sidecar *n);
|
||||
const u8 *shiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len);
|
||||
|
||||
void sidecarEnabledInit(const struct sidecar *n,
|
||||
struct sidecar_enabled *enabled);
|
||||
const u8 *doubleshift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len,
|
||||
const u8 run2_len);
|
||||
|
||||
/* Note: sidecar literals need to be reenabled after they match.
|
||||
* This is purely because this behaviour is handy for rose.
|
||||
* In rose, they always set their roles when fired (never have to postpone due
|
||||
* to history) and if cleared their preds are also cleared so a pred would also
|
||||
* have to match again before we need to care about them again
|
||||
*/
|
||||
void sidecarEnabledUnion(const struct sidecar *n, struct sidecar_enabled *dest,
|
||||
const struct sidecar_enabled *src);
|
||||
|
||||
#define ID_TERMINATOR (~0U)
|
||||
const u8 *doubleshiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len,
|
||||
const u8 run2_len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
122
src/nfa/multishufti_avx2.h
Normal file
122
src/nfa/multishufti_avx2.h
Normal file
@ -0,0 +1,122 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "shufti_common.h"
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
static really_inline
|
||||
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars,
|
||||
const u8 *buf, const m256 low4bits,
|
||||
const m256 zeroes, const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes);
|
||||
return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, ~z
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi,
|
||||
const u8 *buf,
|
||||
const u8 *buf_end, u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
assert(buf && buf_end);
|
||||
assert(buf < buf_end);
|
||||
|
||||
// Slow path for small cases.
|
||||
if (buf_end - buf < 32) {
|
||||
return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
|
||||
buf, buf_end);
|
||||
}
|
||||
|
||||
const m256 zeroes = zeroes256();
|
||||
const m256 low4bits = set32x8(0xf);
|
||||
const m256 wide_mask_lo = set2x128(mask_lo);
|
||||
const m256 wide_mask_hi = set2x128(mask_hi);
|
||||
const u8 *rv;
|
||||
|
||||
size_t min = (size_t)buf % 32;
|
||||
assert(buf_end - buf >= 32);
|
||||
|
||||
// Preconditioning: most of the time our buffer won't be aligned.
|
||||
m256 chars = loadu256(buf);
|
||||
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf,
|
||||
low4bits, zeroes, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (rv) {
|
||||
return rv;
|
||||
}
|
||||
buf += (32 - min);
|
||||
|
||||
// Unrolling was here, but it wasn't doing anything but taking up space.
|
||||
// Reroll FTW.
|
||||
const u8 *last_block = buf_end - 32;
|
||||
while (buf < last_block) {
|
||||
m256 lchars = load256(buf);
|
||||
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, lchars, buf,
|
||||
low4bits, zeroes, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (rv) {
|
||||
return rv;
|
||||
}
|
||||
buf += 32;
|
||||
}
|
||||
|
||||
// Use an unaligned load to mop up the last 32 bytes and get an accurate
|
||||
// picture to buf_end.
|
||||
assert(buf <= buf_end && buf >= buf_end - 32);
|
||||
chars = loadu256(buf_end - 32);
|
||||
rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf_end - 32,
|
||||
low4bits, zeroes, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (rv) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
return buf_end;
|
||||
}
|
266
src/nfa/multishufti_sse.h
Normal file
266
src/nfa/multishufti_sse.h
Normal file
@ -0,0 +1,266 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "shufti_common.h"
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
/* Normal SSSE3 shufti */
|
||||
|
||||
static really_inline
|
||||
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 mask_lo, m128 mask_hi, m128 chars,
|
||||
const u8 *buf, const m128 low4bits,
|
||||
const m128 zeroes, const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
// negate first 16 bits
|
||||
u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes) ^ 0xFFFF;
|
||||
return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
/*
|
||||
* 16-byte pipeline, for smaller scans
|
||||
*/
|
||||
static
|
||||
const u8 *JOIN(MATCH_ALGO, shuftiPipeline16)(m128 mask_lo, m128 mask_hi,
|
||||
const u8 *buf, const u8 *buf_end,
|
||||
const m128 low4bits,
|
||||
const m128 zeroes, const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
const u8* ptr, *last_buf;
|
||||
u32 last_res;
|
||||
|
||||
// pipeline prologue: scan first 16 bytes
|
||||
m128 data = load128(buf);
|
||||
u32 z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF;
|
||||
last_buf = buf;
|
||||
last_res = z;
|
||||
buf += 16;
|
||||
|
||||
// now, start the pipeline!
|
||||
assert((size_t)buf % 16 == 0);
|
||||
for (; buf + 15 < buf_end; buf += 16) {
|
||||
// scan more data
|
||||
data = load128(buf);
|
||||
z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF;
|
||||
|
||||
// do a comparison on previous result
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
last_buf = buf;
|
||||
last_res = z;
|
||||
}
|
||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
||||
|
||||
// epilogue: compare final results
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* 32-byte pipeline, for bigger scans
|
||||
*/
|
||||
static
|
||||
const u8 *JOIN(MATCH_ALGO, shuftiPipeline32)(m128 mask_lo, m128 mask_hi,
|
||||
const u8 *buf, const u8 *buf_end,
|
||||
const m128 low4bits,
|
||||
const m128 zeroes, const u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, const u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
const u8* ptr, *last_buf;
|
||||
u32 res;
|
||||
|
||||
// pipeline prologue: scan first 32 bytes
|
||||
m128 data1 = load128(buf);
|
||||
u32 z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF;
|
||||
m128 data2 = load128(buf + 16);
|
||||
u32 z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF;
|
||||
|
||||
// store the results
|
||||
u32 last_res = z1 | (z2 << 16);
|
||||
last_buf = buf;
|
||||
buf += 32;
|
||||
|
||||
|
||||
// now, start the pipeline!
|
||||
assert((size_t)buf % 16 == 0);
|
||||
for (; buf + 31 < buf_end; buf += 32) {
|
||||
// scan more data
|
||||
data1 = load128(buf);
|
||||
z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF;
|
||||
data2 = load128(buf + 16);
|
||||
z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF;
|
||||
res = z1 | (z2 << 16);
|
||||
|
||||
// do a comparison on previous result
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
last_res = res;
|
||||
last_buf = buf;
|
||||
}
|
||||
|
||||
// epilogue: compare final results
|
||||
ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])
|
||||
(last_buf, last_res
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// if we still have some data left, scan it too
|
||||
for (; buf + 15 < buf_end; buf += 16) {
|
||||
m128 chars = load128(buf);
|
||||
ptr = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf,
|
||||
low4bits, zeroes, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(ptr)) {
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
assert(buf <= buf_end && buf >= buf_end - 16);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi,
|
||||
const u8 *buf,
|
||||
const u8 *buf_end, u8 run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, u8 run_len2
|
||||
#endif
|
||||
) {
|
||||
assert(buf && buf_end);
|
||||
assert(buf < buf_end);
|
||||
|
||||
// Slow path for small cases.
|
||||
if (buf_end - buf < 16) {
|
||||
return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
|
||||
buf, buf_end);
|
||||
}
|
||||
|
||||
const m128 zeroes = zeroes128();
|
||||
const m128 low4bits = _mm_set1_epi8(0xf);
|
||||
const u8 *rv;
|
||||
|
||||
size_t min = (size_t)buf % 16;
|
||||
assert(buf_end - buf >= 16);
|
||||
|
||||
// Preconditioning: most of the time our buffer won't be aligned.
|
||||
m128 chars = loadu128(buf);
|
||||
rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf,
|
||||
low4bits, zeroes, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (rv) {
|
||||
return rv;
|
||||
}
|
||||
buf += (16 - min);
|
||||
|
||||
// if we have enough data, run bigger pipeline; otherwise run smaller one
|
||||
if (buf_end - buf >= 128) {
|
||||
rv = JOIN(MATCH_ALGO, shuftiPipeline32)(mask_lo, mask_hi,
|
||||
buf, buf_end, low4bits, zeroes, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(rv)) {
|
||||
return rv;
|
||||
}
|
||||
} else if (buf_end - buf >= 16){
|
||||
rv = JOIN(MATCH_ALGO, shuftiPipeline16)(mask_lo, mask_hi,
|
||||
buf, buf_end, low4bits, zeroes, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (unlikely(rv)) {
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
// Use an unaligned load to mop up the last 16 bytes and get an accurate
|
||||
// picture to buf_end.
|
||||
chars = loadu128(buf_end - 16);
|
||||
rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars,
|
||||
buf_end - 16, low4bits, zeroes, run_len
|
||||
#ifdef MULTIACCEL_DOUBLE
|
||||
, run_len2
|
||||
#endif
|
||||
);
|
||||
if (rv) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
return buf_end;
|
||||
}
|
111
src/nfa/multitruffle.c
Normal file
111
src/nfa/multitruffle.c
Normal file
@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include "multitruffle.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
#include "multiaccel_common.h"
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
#define MATCH_ALGO long_
|
||||
#include "multiaccel_long.h"
|
||||
#include "multitruffle_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO longgrab_
|
||||
#include "multiaccel_longgrab.h"
|
||||
#include "multitruffle_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO shift_
|
||||
#include "multiaccel_shift.h"
|
||||
#include "multitruffle_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO shiftgrab_
|
||||
#include "multiaccel_shiftgrab.h"
|
||||
#include "multitruffle_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MULTIACCEL_DOUBLE
|
||||
|
||||
#define MATCH_ALGO doubleshift_
|
||||
#include "multiaccel_doubleshift.h"
|
||||
#include "multitruffle_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO doubleshiftgrab_
|
||||
#include "multiaccel_doubleshiftgrab.h"
|
||||
#include "multitruffle_sse.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#undef MULTIACCEL_DOUBLE
|
||||
|
||||
#else
|
||||
|
||||
#define MATCH_ALGO long_
|
||||
#include "multiaccel_long.h"
|
||||
#include "multitruffle_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO longgrab_
|
||||
#include "multiaccel_longgrab.h"
|
||||
#include "multitruffle_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO shift_
|
||||
#include "multiaccel_shift.h"
|
||||
#include "multitruffle_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO shiftgrab_
|
||||
#include "multiaccel_shiftgrab.h"
|
||||
#include "multitruffle_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MULTIACCEL_DOUBLE
|
||||
|
||||
#define MATCH_ALGO doubleshift_
|
||||
#include "multiaccel_doubleshift.h"
|
||||
#include "multitruffle_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#define MATCH_ALGO doubleshiftgrab_
|
||||
#include "multiaccel_doubleshiftgrab.h"
|
||||
#include "multitruffle_avx2.h"
|
||||
#undef MATCH_ALGO
|
||||
|
||||
#undef MULTIACCEL_DOUBLE
|
||||
|
||||
#endif
|
73
src/nfa/multitruffle.h
Normal file
73
src/nfa/multitruffle.h
Normal file
@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef MULTITRUFFLE_H
|
||||
#define MULTITRUFFLE_H
|
||||
|
||||
/** \file
|
||||
* \brief Multitruffle: multibyte version of Truffle.
|
||||
*
|
||||
* Utilises the SSSE3 pshufb shuffle instruction
|
||||
*/
|
||||
|
||||
#include "util/simd_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
const u8 *long_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
|
||||
const u8 *buf, const u8 *buf_end, const u8 run_len);
|
||||
|
||||
const u8 *longgrab_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
|
||||
const u8 *buf, const u8 *buf_end, const u8 run_len);
|
||||
|
||||
const u8 *shift_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
|
||||
const u8 *buf, const u8 *buf_end, const u8 run_len);
|
||||
|
||||
const u8 *shiftgrab_truffleExec(m128 shuf_mask_lo_highclear,
|
||||
m128 shuf_mask_lo_highset, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len);
|
||||
|
||||
const u8 *doubleshift_truffleExec(m128 shuf_mask_lo_highclear,
|
||||
m128 shuf_mask_lo_highset, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len,
|
||||
const u8 run2_len);
|
||||
|
||||
const u8 *doubleshiftgrab_truffleExec(m128 shuf_mask_lo_highclear,
|
||||
m128 shuf_mask_lo_highset, const u8 *buf,
|
||||
const u8 *buf_end, const u8 run_len,
|
||||
const u8 run2_len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* MULTITRUFFLE_H */
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user