mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch develop into master
This commit is contained in:
commit
a00bd3167c
49
CHANGELOG.md
49
CHANGELOG.md
@ -2,9 +2,52 @@
|
|||||||
|
|
||||||
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
||||||
|
|
||||||
|
## [4.5.0] 2017-06-09
|
||||||
|
- New API feature: approximate matching using the "edit distance" extended
|
||||||
|
parameter. This allows the user to request all matches that are a given edit
|
||||||
|
distance from an exact match for a pattern.
|
||||||
|
- Initial support for Intel(R) Advanced Vector Extensions 512 (Intel(R)
|
||||||
|
AVX-512), disabled by default. To enable it, pass `-DBUILD_AVX512=1` to
|
||||||
|
`cmake`.
|
||||||
|
- Major compile time improvements in many subsystems, reducing compile time
|
||||||
|
significantly for many large pattern sets.
|
||||||
|
- Internal reworking of literal matchers to operate on literals of at
|
||||||
|
most eight characters, with subsequent confirmation done in the Rose
|
||||||
|
interpreter. This reduces complexity and bytecode size and improves
|
||||||
|
performance for many pattern sets.
|
||||||
|
- Improve performance of the FDR literal matcher front end.
|
||||||
|
- Improve bucket assignment and other heuristics governing the FDR literal
|
||||||
|
matcher.
|
||||||
|
- Improve optimisation passes that take advantage of extended parameter
|
||||||
|
constraints (`min_offset`, etc).
|
||||||
|
- Introduce further lookaround specialisations to improve scanning performance.
|
||||||
|
- Optimise Rose interpreter construction to reduce the length of programs
|
||||||
|
generated in some situations.
|
||||||
|
- Remove the old "Rose" pattern decomposition analysis pass in favour of the
|
||||||
|
new "Violet" pass introduced in Hyperscan 4.3.0.
|
||||||
|
- In streaming mode, allow exhaustion (where the stream can no longer produce
|
||||||
|
matchers) to be detected in more situations, improving scanning performance.
|
||||||
|
- Improve parsing of control verbs (such as `(*UTF8)`) that can only occur at
|
||||||
|
the beginning of the pattern. Combinations of supported verbs in any order
|
||||||
|
are now permitted.
|
||||||
|
- Update version of PCRE used by testing tools as a syntax and semantic
|
||||||
|
reference to PCRE 8.40.
|
||||||
|
- Tuning support for Intel(R) microarchitecture code names Skylake, Skylake
|
||||||
|
Server, Goldmont.
|
||||||
|
- CMake: when building a native build with a version of GCC that doesn't
|
||||||
|
recognise the host compiler, tune for the microarch selected by
|
||||||
|
`-march=native`.
|
||||||
|
- CMake: don't fail if SQLite (which is only required to build the `hsbench`
|
||||||
|
tool) is not present.
|
||||||
|
- CMake: detect libc++ directly and use that to inform the Boost version
|
||||||
|
requirement.
|
||||||
|
- Bugfix for issue #51: make the fat runtime build wrapper less fragile.
|
||||||
|
- Bugfix for issues #46, #52: use `sqlite3_errmsg()` to allow SQLite 3.6.x to
|
||||||
|
be used. Thanks to @EaseTheWorld for the PR.
|
||||||
|
|
||||||
## [4.4.1] 2017-02-28
|
## [4.4.1] 2017-02-28
|
||||||
- Bugfixes to fix issues where stale data was being referenced in scratch
|
- Bugfixes to fix issues where stale data was being referenced in scratch
|
||||||
memory. In particular this may have resulted in hs_close_stream()
|
memory. In particular this may have resulted in `hs_close_stream()`
|
||||||
referencing data from other previously scanned streams. This may result in
|
referencing data from other previously scanned streams. This may result in
|
||||||
incorrect matches being been reported.
|
incorrect matches being been reported.
|
||||||
|
|
||||||
@ -142,9 +185,7 @@ This is a list of notable changes to Hyperscan, in reverse chronological order.
|
|||||||
supplied with a NULL scratch pointer if no matches are required. This is in
|
supplied with a NULL scratch pointer if no matches are required. This is in
|
||||||
line with the behaviour of `hs_close_stream()`.
|
line with the behaviour of `hs_close_stream()`.
|
||||||
- Disallow bounded repeats with a very large minimum repeat but no maximum,
|
- Disallow bounded repeats with a very large minimum repeat but no maximum,
|
||||||
i.e. {
|
i.e. {N,} for very large N.
|
||||||
N,
|
|
||||||
} for very large N.
|
|
||||||
- Reduce compile memory usage in literal set explansion for some large cases.
|
- Reduce compile memory usage in literal set explansion for some large cases.
|
||||||
|
|
||||||
## [4.0.0] 2015-10-20
|
## [4.0.0] 2015-10-20
|
||||||
|
318
CMakeLists.txt
318
CMakeLists.txt
@ -1,20 +1,22 @@
|
|||||||
cmake_minimum_required (VERSION 2.8.11)
|
cmake_minimum_required (VERSION 2.8.11)
|
||||||
project (Hyperscan C CXX)
|
project (hyperscan C CXX)
|
||||||
|
|
||||||
set (HS_MAJOR_VERSION 4)
|
set (HS_MAJOR_VERSION 4)
|
||||||
set (HS_MINOR_VERSION 4)
|
set (HS_MINOR_VERSION 5)
|
||||||
set (HS_PATCH_VERSION 1)
|
set (HS_PATCH_VERSION 0)
|
||||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||||
|
|
||||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||||
include(CheckCCompilerFlag)
|
include(CheckCCompilerFlag)
|
||||||
include(CheckCXXCompilerFlag)
|
include(CheckCXXCompilerFlag)
|
||||||
|
include(CheckCXXSymbolExists)
|
||||||
INCLUDE (CheckFunctionExists)
|
INCLUDE (CheckFunctionExists)
|
||||||
INCLUDE (CheckIncludeFiles)
|
INCLUDE (CheckIncludeFiles)
|
||||||
INCLUDE (CheckIncludeFileCXX)
|
INCLUDE (CheckIncludeFileCXX)
|
||||||
INCLUDE (CheckLibraryExists)
|
INCLUDE (CheckLibraryExists)
|
||||||
INCLUDE (CheckSymbolExists)
|
INCLUDE (CheckSymbolExists)
|
||||||
include (CMakeDependentOption)
|
include (CMakeDependentOption)
|
||||||
|
include (GNUInstallDirs)
|
||||||
include (${CMAKE_MODULE_PATH}/platform.cmake)
|
include (${CMAKE_MODULE_PATH}/platform.cmake)
|
||||||
include (${CMAKE_MODULE_PATH}/ragel.cmake)
|
include (${CMAKE_MODULE_PATH}/ragel.cmake)
|
||||||
|
|
||||||
@ -36,6 +38,7 @@ endif()
|
|||||||
|
|
||||||
set(BINDIR "${PROJECT_BINARY_DIR}/bin")
|
set(BINDIR "${PROJECT_BINARY_DIR}/bin")
|
||||||
set(LIBDIR "${PROJECT_BINARY_DIR}/lib")
|
set(LIBDIR "${PROJECT_BINARY_DIR}/lib")
|
||||||
|
set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR})
|
||||||
|
|
||||||
# First for the generic no-config case
|
# First for the generic no-config case
|
||||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${BINDIR}")
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${BINDIR}")
|
||||||
@ -59,31 +62,6 @@ include_directories(${PROJECT_SOURCE_DIR}/src)
|
|||||||
include_directories(${PROJECT_BINARY_DIR})
|
include_directories(${PROJECT_BINARY_DIR})
|
||||||
include_directories(SYSTEM include)
|
include_directories(SYSTEM include)
|
||||||
|
|
||||||
set(BOOST_USE_STATIC_LIBS OFF)
|
|
||||||
set(BOOST_USE_MULTITHREADED OFF)
|
|
||||||
set(BOOST_USE_STATIC_RUNTIME OFF)
|
|
||||||
if (CMAKE_SYSTEM_NAME MATCHES "Darwin"
|
|
||||||
OR (CMAKE_SYSTEM_NAME MATCHES "FreeBSD"
|
|
||||||
AND CMAKE_C_COMPILER_ID MATCHES "Clang"))
|
|
||||||
# we need a more recent boost for libc++ used by clang on OSX and FreeBSD
|
|
||||||
set(BOOST_MINVERSION 1.61.0)
|
|
||||||
else ()
|
|
||||||
set(BOOST_MINVERSION 1.57.0)
|
|
||||||
endif ()
|
|
||||||
set(BOOST_NO_BOOST_CMAKE ON)
|
|
||||||
|
|
||||||
# first check for Boost installed on the system
|
|
||||||
find_package(Boost ${BOOST_MINVERSION})
|
|
||||||
if(NOT Boost_FOUND)
|
|
||||||
# we might have boost in tree, so provide a hint and try again
|
|
||||||
message(STATUS "trying include dir for boost")
|
|
||||||
set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include")
|
|
||||||
find_package(Boost ${BOOST_MINVERSION})
|
|
||||||
if(NOT Boost_FOUND)
|
|
||||||
message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system packages if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
include (${CMAKE_MODULE_PATH}/boost.cmake)
|
include (${CMAKE_MODULE_PATH}/boost.cmake)
|
||||||
|
|
||||||
# -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
|
# -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
|
||||||
@ -132,6 +110,12 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (NOT BUILD_SHARED_LIBS)
|
||||||
|
# build static libs
|
||||||
|
set(BUILD_STATIC_LIBS ON)
|
||||||
|
mark_as_advanced(BUILD_STATIC_LIBS)
|
||||||
|
endif ()
|
||||||
|
|
||||||
#for config
|
#for config
|
||||||
if (OPTIMISE)
|
if (OPTIMISE)
|
||||||
set(HS_OPTIMIZE ON)
|
set(HS_OPTIMIZE ON)
|
||||||
@ -141,6 +125,9 @@ CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in r
|
|||||||
|
|
||||||
CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON)
|
CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON)
|
||||||
|
|
||||||
|
option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime"
|
||||||
|
OFF)
|
||||||
|
|
||||||
option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF)
|
option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF)
|
||||||
|
|
||||||
# TODO: per platform config files?
|
# TODO: per platform config files?
|
||||||
@ -148,16 +135,21 @@ option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC
|
|||||||
# TODO: windows generator on cmake always uses msvc, even if we plan to build with icc
|
# TODO: windows generator on cmake always uses msvc, even if we plan to build with icc
|
||||||
if(MSVC OR MSVC_IDE)
|
if(MSVC OR MSVC_IDE)
|
||||||
message(STATUS "Building for Windows")
|
message(STATUS "Building for Windows")
|
||||||
|
|
||||||
if (MSVC_VERSION LESS 1700)
|
if (MSVC_VERSION LESS 1700)
|
||||||
message(FATAL_ERROR "The project requires C++11 features.")
|
message(FATAL_ERROR "The project requires C++11 features.")
|
||||||
else()
|
else()
|
||||||
if (WINDOWS_ICC)
|
if (WINDOWS_ICC)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O3 /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark")
|
set(ARCH_C_FLAGS "/QxHost")
|
||||||
|
set(ARCH_CXX_FLAGS "/QxHost")
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O3 /Qstd=c99 /Qrestrict /wd4267 /Qdiag-disable:remark")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
||||||
else()
|
else()
|
||||||
#TODO: don't hardcode arch
|
# todo: change these as required
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /arch:AVX /wd4267")
|
set(ARCH_C_FLAGS "/arch:AVX2")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /arch:AVX /wd4244 /wd4267 /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
set(ARCH_CXX_FLAGS "/arch:AVX2")
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /wd4244 /wd4267")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /wd4244 /wd4267 /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
||||||
endif()
|
endif()
|
||||||
string(REPLACE "/RTC1" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
string(REPLACE "/RTC1" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
||||||
string(REPLACE "/RTC1" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
|
string(REPLACE "/RTC1" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
|
||||||
@ -166,32 +158,58 @@ if(MSVC OR MSVC_IDE)
|
|||||||
set(CMAKE_C_FLAGS_DEBUG "/DNDEBUG ${CMAKE_C_FLAGS_DEBUG}")
|
set(CMAKE_C_FLAGS_DEBUG "/DNDEBUG ${CMAKE_C_FLAGS_DEBUG}")
|
||||||
set(CMAKE_CXX_FLAGS_DEBUG "/DNDEBUG ${CMAKE_CXX_FLAGS_DEBUG}")
|
set(CMAKE_CXX_FLAGS_DEBUG "/DNDEBUG ${CMAKE_CXX_FLAGS_DEBUG}")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
# flags only used to build hs libs
|
||||||
|
set(HS_C_FLAGS "/Gv")
|
||||||
|
set(HS_CXX_FLAGS "/Gv")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
else()
|
else()
|
||||||
|
|
||||||
# compiler version checks TODO: test more compilers
|
|
||||||
if (CMAKE_COMPILER_IS_GNUCXX)
|
|
||||||
set (GNUCXX_MINVER "4.8.1")
|
|
||||||
exec_program(${CMAKE_CXX_COMPILER}
|
|
||||||
ARGS ${CMAKE_CXX_COMPILER_ARG1} --version
|
|
||||||
OUTPUT_VARIABLE _GXX_OUTPUT)
|
|
||||||
# is the following too fragile?
|
|
||||||
string(REGEX REPLACE ".* ([0-9]\\.[0-9](\\.[0-9])?)( |\n).*" "\\1"
|
|
||||||
GNUCXX_VERSION "${_GXX_OUTPUT}")
|
|
||||||
message(STATUS "g++ version ${GNUCXX_VERSION}")
|
|
||||||
if (GNUCXX_VERSION VERSION_LESS ${GNUCXX_MINVER})
|
|
||||||
message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++11 support")
|
|
||||||
endif()
|
|
||||||
unset(_GXX_OUTPUT)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# remove CMake's idea of optimisation
|
# remove CMake's idea of optimisation
|
||||||
foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES})
|
foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES})
|
||||||
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_C_FLAGS_${CONFIG} "${CMAKE_C_FLAGS_${CONFIG}}")
|
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_C_FLAGS_${CONFIG} "${CMAKE_C_FLAGS_${CONFIG}}")
|
||||||
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}")
|
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}")
|
||||||
endforeach ()
|
endforeach ()
|
||||||
|
|
||||||
|
if (CMAKE_COMPILER_IS_GNUCC)
|
||||||
|
message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}")
|
||||||
|
# If gcc doesn't recognise the host cpu, then mtune=native becomes
|
||||||
|
# generic, which isn't very good in some cases. march=native looks at
|
||||||
|
# cpuid info and then chooses the best microarch it can (and replaces
|
||||||
|
# the flag), so use that for tune.
|
||||||
|
|
||||||
|
# arg1 might exist if using ccache
|
||||||
|
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
|
||||||
|
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
|
||||||
|
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
|
||||||
|
OUTPUT_VARIABLE _GCC_OUTPUT)
|
||||||
|
string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1"
|
||||||
|
GNUCC_ARCH "${_GCC_OUTPUT}")
|
||||||
|
|
||||||
|
# test the parsed flag
|
||||||
|
set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
|
||||||
|
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
|
||||||
|
OUTPUT_QUIET ERROR_QUIET
|
||||||
|
INPUT_FILE /dev/null
|
||||||
|
RESULT_VARIABLE GNUCC_TUNE_TEST)
|
||||||
|
if (NOT GNUCC_TUNE_TEST EQUAL 0)
|
||||||
|
message(SEND_ERROR "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid")
|
||||||
|
endif()
|
||||||
|
set(TUNE_FLAG ${GNUCC_ARCH})
|
||||||
|
else ()
|
||||||
|
set(TUNE_FLAG native)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# compiler version checks TODO: test more compilers
|
||||||
|
if (CMAKE_COMPILER_IS_GNUCXX)
|
||||||
|
set(GNUCXX_MINVER "4.8.1")
|
||||||
|
message(STATUS "g++ version ${CMAKE_CXX_COMPILER_VERSION}")
|
||||||
|
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS GNUCXX_MINVER)
|
||||||
|
message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++11 support")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if(OPTIMISE)
|
if(OPTIMISE)
|
||||||
set(OPT_C_FLAG "-O3")
|
set(OPT_C_FLAG "-O3")
|
||||||
set(OPT_CXX_FLAG "-O2")
|
set(OPT_CXX_FLAG "-O2")
|
||||||
@ -216,12 +234,12 @@ else()
|
|||||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -DNDEBUG")
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -DNDEBUG")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT CMAKE_C_FLAGS MATCHES .*march.*)
|
if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
|
||||||
set(ARCH_C_FLAGS "${ARCH_C_FLAGS} -march=native -mtune=native")
|
set(ARCH_C_FLAGS "-march=native -mtune=${TUNE_FLAG}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT CMAKE_CXX_FLAGS MATCHES .*march.*)
|
if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
|
||||||
set(ARCH_CXX_FLAGS "${ARCH_CXX_FLAGS} -march=native -mtune=native")
|
set(ARCH_CXX_FLAGS "-march=native -mtune=${TUNE_FLAG}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(CMAKE_COMPILER_IS_GNUCC)
|
if(CMAKE_COMPILER_IS_GNUCC)
|
||||||
@ -244,6 +262,11 @@ else()
|
|||||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi")
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||||
|
set(SKYLAKE_FLAG "-xCORE-AVX512")
|
||||||
|
else ()
|
||||||
|
set(SKYLAKE_FLAG "-march=skylake-avx512")
|
||||||
|
endif ()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H)
|
CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H)
|
||||||
@ -259,6 +282,9 @@ CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC)
|
|||||||
CHECK_C_COMPILER_FLAG(-fvisibility=hidden HAS_C_HIDDEN)
|
CHECK_C_COMPILER_FLAG(-fvisibility=hidden HAS_C_HIDDEN)
|
||||||
CHECK_CXX_COMPILER_FLAG(-fvisibility=hidden HAS_CXX_HIDDEN)
|
CHECK_CXX_COMPILER_FLAG(-fvisibility=hidden HAS_CXX_HIDDEN)
|
||||||
|
|
||||||
|
# are we using libc++
|
||||||
|
CHECK_CXX_SYMBOL_EXISTS(_LIBCPP_VERSION ciso646 HAVE_LIBCPP)
|
||||||
|
|
||||||
if (RELEASE_BUILD)
|
if (RELEASE_BUILD)
|
||||||
if (HAS_C_HIDDEN)
|
if (HAS_C_HIDDEN)
|
||||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fvisibility=hidden")
|
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fvisibility=hidden")
|
||||||
@ -294,13 +320,10 @@ endif ()
|
|||||||
|
|
||||||
include (${CMAKE_MODULE_PATH}/arch.cmake)
|
include (${CMAKE_MODULE_PATH}/arch.cmake)
|
||||||
|
|
||||||
if (NOT FAT_RUNTIME AND NOT HAVE_SSSE3)
|
|
||||||
message(FATAL_ERROR "A minimum of SSSE3 compiler support is required")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
# testing a builtin takes a little more work
|
# testing a builtin takes a little more work
|
||||||
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
|
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
|
||||||
CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
|
CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
|
||||||
|
CHECK_C_SOURCE_COMPILES("int main(void) { __builtin_constant_p(0); }" HAVE__BUILTIN_CONSTANT_P)
|
||||||
|
|
||||||
if (NOT WIN32)
|
if (NOT WIN32)
|
||||||
set(C_FLAGS_TO_CHECK
|
set(C_FLAGS_TO_CHECK
|
||||||
@ -404,13 +427,13 @@ endif()
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT FAT_RUNTIME)
|
if (NOT FAT_RUNTIME)
|
||||||
message(STATUS "Building for current host CPU")
|
message(STATUS "Building for current host CPU: ${ARCH_C_FLAGS}")
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}")
|
||||||
else()
|
else()
|
||||||
message(STATUS "Building runtime for multiple microarchitectures")
|
message(STATUS "Building runtime for multiple microarchitectures")
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_subdirectory(util)
|
add_subdirectory(util)
|
||||||
@ -435,19 +458,18 @@ if (NOT WIN32)
|
|||||||
|
|
||||||
configure_file(libhs.pc.in libhs.pc @ONLY) # only replace @ quoted vars
|
configure_file(libhs.pc.in libhs.pc @ONLY) # only replace @ quoted vars
|
||||||
install(FILES ${CMAKE_BINARY_DIR}/libhs.pc
|
install(FILES ${CMAKE_BINARY_DIR}/libhs.pc
|
||||||
DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/pkgconfig")
|
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# only set these after all tests are done
|
# only set these after all tests are done
|
||||||
if (NOT FAT_RUNTIME)
|
if (NOT FAT_RUNTIME)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${HS_C_FLAGS}")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS} ${HS_CXX_FLAGS}")
|
||||||
else()
|
else()
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
if(NOT WIN32)
|
if(NOT WIN32)
|
||||||
set(RAGEL_C_FLAGS "-Wno-unused")
|
set(RAGEL_C_FLAGS "-Wno-unused")
|
||||||
endif()
|
endif()
|
||||||
@ -459,13 +481,20 @@ set_source_files_properties(
|
|||||||
|
|
||||||
ragelmaker(src/parser/Parser.rl)
|
ragelmaker(src/parser/Parser.rl)
|
||||||
|
|
||||||
|
set_source_files_properties(
|
||||||
|
${CMAKE_BINARY_DIR}/src/parser/control_verbs.cpp
|
||||||
|
PROPERTIES
|
||||||
|
COMPILE_FLAGS "${RAGEL_C_FLAGS}")
|
||||||
|
|
||||||
|
ragelmaker(src/parser/control_verbs.rl)
|
||||||
|
|
||||||
SET(hs_HEADERS
|
SET(hs_HEADERS
|
||||||
src/hs.h
|
src/hs.h
|
||||||
src/hs_common.h
|
src/hs_common.h
|
||||||
src/hs_compile.h
|
src/hs_compile.h
|
||||||
src/hs_runtime.h
|
src/hs_runtime.h
|
||||||
)
|
)
|
||||||
install(FILES ${hs_HEADERS} DESTINATION include/hs)
|
install(FILES ${hs_HEADERS} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/hs")
|
||||||
|
|
||||||
set (hs_exec_common_SRCS
|
set (hs_exec_common_SRCS
|
||||||
src/alloc.c
|
src/alloc.c
|
||||||
@ -541,25 +570,6 @@ set (hs_exec_SRCS
|
|||||||
src/nfa/mpv.h
|
src/nfa/mpv.h
|
||||||
src/nfa/mpv.c
|
src/nfa/mpv.c
|
||||||
src/nfa/mpv_internal.h
|
src/nfa/mpv_internal.h
|
||||||
src/nfa/multiaccel_common.h
|
|
||||||
src/nfa/multiaccel_doubleshift.h
|
|
||||||
src/nfa/multiaccel_doubleshiftgrab.h
|
|
||||||
src/nfa/multiaccel_long.h
|
|
||||||
src/nfa/multiaccel_longgrab.h
|
|
||||||
src/nfa/multiaccel_shift.h
|
|
||||||
src/nfa/multiaccel_shiftgrab.h
|
|
||||||
src/nfa/multishufti.c
|
|
||||||
src/nfa/multishufti_avx2.h
|
|
||||||
src/nfa/multishufti_sse.h
|
|
||||||
src/nfa/multishufti.h
|
|
||||||
src/nfa/multitruffle.c
|
|
||||||
src/nfa/multitruffle_avx2.h
|
|
||||||
src/nfa/multitruffle_sse.h
|
|
||||||
src/nfa/multitruffle.h
|
|
||||||
src/nfa/multivermicelli.c
|
|
||||||
src/nfa/multivermicelli.h
|
|
||||||
src/nfa/multivermicelli_sse.h
|
|
||||||
src/nfa/multivermicelli_avx2.h
|
|
||||||
src/nfa/nfa_api.h
|
src/nfa/nfa_api.h
|
||||||
src/nfa/nfa_api_dispatch.c
|
src/nfa/nfa_api_dispatch.c
|
||||||
src/nfa/nfa_internal.h
|
src/nfa/nfa_internal.h
|
||||||
@ -573,13 +583,11 @@ set (hs_exec_SRCS
|
|||||||
src/nfa/sheng_impl.h
|
src/nfa/sheng_impl.h
|
||||||
src/nfa/sheng_impl4.h
|
src/nfa/sheng_impl4.h
|
||||||
src/nfa/sheng_internal.h
|
src/nfa/sheng_internal.h
|
||||||
src/nfa/shufti_common.h
|
|
||||||
src/nfa/shufti.c
|
src/nfa/shufti.c
|
||||||
src/nfa/shufti.h
|
src/nfa/shufti.h
|
||||||
src/nfa/tamarama.c
|
src/nfa/tamarama.c
|
||||||
src/nfa/tamarama.h
|
src/nfa/tamarama.h
|
||||||
src/nfa/tamarama_internal.h
|
src/nfa/tamarama_internal.h
|
||||||
src/nfa/truffle_common.h
|
|
||||||
src/nfa/truffle.c
|
src/nfa/truffle.c
|
||||||
src/nfa/truffle.h
|
src/nfa/truffle.h
|
||||||
src/nfa/vermicelli.h
|
src/nfa/vermicelli.h
|
||||||
@ -662,6 +670,7 @@ SET (hs_SRCS
|
|||||||
src/compiler/compiler.h
|
src/compiler/compiler.h
|
||||||
src/compiler/error.cpp
|
src/compiler/error.cpp
|
||||||
src/compiler/error.h
|
src/compiler/error.h
|
||||||
|
src/compiler/expression_info.h
|
||||||
src/fdr/engine_description.cpp
|
src/fdr/engine_description.cpp
|
||||||
src/fdr/engine_description.h
|
src/fdr/engine_description.h
|
||||||
src/fdr/fdr_compile.cpp
|
src/fdr/fdr_compile.cpp
|
||||||
@ -719,8 +728,6 @@ SET (hs_SRCS
|
|||||||
src/nfa/mpv_internal.h
|
src/nfa/mpv_internal.h
|
||||||
src/nfa/mpvcompile.cpp
|
src/nfa/mpvcompile.cpp
|
||||||
src/nfa/mpvcompile.h
|
src/nfa/mpvcompile.h
|
||||||
src/nfa/multiaccel_compilehelper.cpp
|
|
||||||
src/nfa/multiaccel_compilehelper.h
|
|
||||||
src/nfa/nfa_api.h
|
src/nfa/nfa_api.h
|
||||||
src/nfa/nfa_api_queue.h
|
src/nfa/nfa_api_queue.h
|
||||||
src/nfa/nfa_api_util.h
|
src/nfa/nfa_api_util.h
|
||||||
@ -775,6 +782,8 @@ SET (hs_SRCS
|
|||||||
src/nfagraph/ng_extparam.h
|
src/nfagraph/ng_extparam.h
|
||||||
src/nfagraph/ng_fixed_width.cpp
|
src/nfagraph/ng_fixed_width.cpp
|
||||||
src/nfagraph/ng_fixed_width.h
|
src/nfagraph/ng_fixed_width.h
|
||||||
|
src/nfagraph/ng_fuzzy.cpp
|
||||||
|
src/nfagraph/ng_fuzzy.h
|
||||||
src/nfagraph/ng_haig.cpp
|
src/nfagraph/ng_haig.cpp
|
||||||
src/nfagraph/ng_haig.h
|
src/nfagraph/ng_haig.h
|
||||||
src/nfagraph/ng_holder.cpp
|
src/nfagraph/ng_holder.cpp
|
||||||
@ -820,8 +829,6 @@ SET (hs_SRCS
|
|||||||
src/nfagraph/ng_restructuring.h
|
src/nfagraph/ng_restructuring.h
|
||||||
src/nfagraph/ng_revacc.cpp
|
src/nfagraph/ng_revacc.cpp
|
||||||
src/nfagraph/ng_revacc.h
|
src/nfagraph/ng_revacc.h
|
||||||
src/nfagraph/ng_rose.cpp
|
|
||||||
src/nfagraph/ng_rose.h
|
|
||||||
src/nfagraph/ng_sep.cpp
|
src/nfagraph/ng_sep.cpp
|
||||||
src/nfagraph/ng_sep.h
|
src/nfagraph/ng_sep.h
|
||||||
src/nfagraph/ng_small_literal_set.cpp
|
src/nfagraph/ng_small_literal_set.cpp
|
||||||
@ -893,6 +900,8 @@ SET (hs_SRCS
|
|||||||
src/parser/buildstate.h
|
src/parser/buildstate.h
|
||||||
src/parser/check_refs.cpp
|
src/parser/check_refs.cpp
|
||||||
src/parser/check_refs.h
|
src/parser/check_refs.h
|
||||||
|
src/parser/control_verbs.cpp
|
||||||
|
src/parser/control_verbs.h
|
||||||
src/parser/parse_error.cpp
|
src/parser/parse_error.cpp
|
||||||
src/parser/parse_error.h
|
src/parser/parse_error.h
|
||||||
src/parser/parser_util.cpp
|
src/parser/parser_util.cpp
|
||||||
@ -928,6 +937,8 @@ SET (hs_SRCS
|
|||||||
src/rose/rose_build_compile.cpp
|
src/rose/rose_build_compile.cpp
|
||||||
src/rose/rose_build_convert.cpp
|
src/rose/rose_build_convert.cpp
|
||||||
src/rose/rose_build_convert.h
|
src/rose/rose_build_convert.h
|
||||||
|
src/rose/rose_build_dedupe.cpp
|
||||||
|
src/rose/rose_build_engine_blob.cpp
|
||||||
src/rose/rose_build_engine_blob.h
|
src/rose/rose_build_engine_blob.h
|
||||||
src/rose/rose_build_exclusive.cpp
|
src/rose/rose_build_exclusive.cpp
|
||||||
src/rose/rose_build_exclusive.h
|
src/rose/rose_build_exclusive.h
|
||||||
@ -936,6 +947,10 @@ SET (hs_SRCS
|
|||||||
src/rose/rose_build_impl.h
|
src/rose/rose_build_impl.h
|
||||||
src/rose/rose_build_infix.cpp
|
src/rose/rose_build_infix.cpp
|
||||||
src/rose/rose_build_infix.h
|
src/rose/rose_build_infix.h
|
||||||
|
src/rose/rose_build_instructions.cpp
|
||||||
|
src/rose/rose_build_instructions.h
|
||||||
|
src/rose/rose_build_lit_accel.cpp
|
||||||
|
src/rose/rose_build_lit_accel.h
|
||||||
src/rose/rose_build_long_lit.cpp
|
src/rose/rose_build_long_lit.cpp
|
||||||
src/rose/rose_build_long_lit.h
|
src/rose/rose_build_long_lit.h
|
||||||
src/rose/rose_build_lookaround.cpp
|
src/rose/rose_build_lookaround.cpp
|
||||||
@ -947,6 +962,7 @@ SET (hs_SRCS
|
|||||||
src/rose/rose_build_misc.cpp
|
src/rose/rose_build_misc.cpp
|
||||||
src/rose/rose_build_program.cpp
|
src/rose/rose_build_program.cpp
|
||||||
src/rose/rose_build_program.h
|
src/rose/rose_build_program.h
|
||||||
|
src/rose/rose_build_resources.h
|
||||||
src/rose/rose_build_role_aliasing.cpp
|
src/rose/rose_build_role_aliasing.cpp
|
||||||
src/rose/rose_build_scatter.cpp
|
src/rose/rose_build_scatter.cpp
|
||||||
src/rose/rose_build_scatter.h
|
src/rose/rose_build_scatter.h
|
||||||
@ -982,8 +998,12 @@ SET (hs_SRCS
|
|||||||
src/util/fatbit_build.h
|
src/util/fatbit_build.h
|
||||||
src/util/graph.h
|
src/util/graph.h
|
||||||
src/util/hash.h
|
src/util/hash.h
|
||||||
|
src/util/hash_dynamic_bitset.h
|
||||||
|
src/util/math.h
|
||||||
src/util/multibit_build.cpp
|
src/util/multibit_build.cpp
|
||||||
src/util/multibit_build.h
|
src/util/multibit_build.h
|
||||||
|
src/util/noncopyable.h
|
||||||
|
src/util/operators.h
|
||||||
src/util/order_check.h
|
src/util/order_check.h
|
||||||
src/util/partial_store.h
|
src/util/partial_store.h
|
||||||
src/util/partitioned_set.h
|
src/util/partitioned_set.h
|
||||||
@ -993,6 +1013,7 @@ SET (hs_SRCS
|
|||||||
src/util/report_manager.cpp
|
src/util/report_manager.cpp
|
||||||
src/util/report_manager.h
|
src/util/report_manager.h
|
||||||
src/util/simd_utils.h
|
src/util/simd_utils.h
|
||||||
|
src/util/small_vector.h
|
||||||
src/util/target_info.cpp
|
src/util/target_info.cpp
|
||||||
src/util/target_info.h
|
src/util/target_info.h
|
||||||
src/util/ue2_containers.h
|
src/util/ue2_containers.h
|
||||||
@ -1048,8 +1069,6 @@ set(hs_dump_SRCS
|
|||||||
src/rose/rose_build_dump.h
|
src/rose/rose_build_dump.h
|
||||||
src/rose/rose_in_dump.cpp
|
src/rose/rose_in_dump.cpp
|
||||||
src/rose/rose_in_dump.h
|
src/rose/rose_in_dump.h
|
||||||
src/rose/rose_dump.cpp
|
|
||||||
src/rose/rose_dump.h
|
|
||||||
src/util/dump_charclass.cpp
|
src/util/dump_charclass.cpp
|
||||||
src/util/dump_charclass.h
|
src/util/dump_charclass.h
|
||||||
src/util/dump_util.cpp
|
src/util/dump_util.cpp
|
||||||
@ -1074,62 +1093,113 @@ if (NOT FAT_RUNTIME)
|
|||||||
set(hs_exec_SRCS ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
|
set(hs_exec_SRCS ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (BUILD_STATIC_LIBS)
|
||||||
add_library(hs_exec OBJECT ${hs_exec_SRCS})
|
add_library(hs_exec OBJECT ${hs_exec_SRCS})
|
||||||
|
|
||||||
add_library(hs_runtime STATIC src/hs_version.c src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec>)
|
add_library(hs_runtime STATIC src/hs_version.c src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec>)
|
||||||
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
|
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
|
||||||
|
|
||||||
|
add_library(hs STATIC ${hs_SRCS} src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec>)
|
||||||
|
endif (BUILD_STATIC_LIBS)
|
||||||
|
|
||||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||||
add_library(hs_exec_shared OBJECT ${hs_exec_SRCS})
|
add_library(hs_exec_shared OBJECT ${hs_exec_SRCS})
|
||||||
set_target_properties(hs_exec_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
|
set_target_properties(hs_exec_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
else (FAT_RUNTIME)
|
else (FAT_RUNTIME)
|
||||||
|
|
||||||
set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh")
|
set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh")
|
||||||
|
if (NOT BUILD_AVX512)
|
||||||
|
set (DISPATCHER_DEFINE "-DDISABLE_AVX512_DISPATCH")
|
||||||
|
endif (NOT BUILD_AVX512)
|
||||||
|
set_source_files_properties(src/dispatcher.c PROPERTIES
|
||||||
|
COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function ${DISPATCHER_DEFINE}")
|
||||||
|
|
||||||
|
if (BUILD_STATIC_LIBS)
|
||||||
add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS})
|
add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS})
|
||||||
|
list(APPEND RUNTIME_LIBS $<TARGET_OBJECTS:hs_exec_core2>)
|
||||||
set_target_properties(hs_exec_core2 PROPERTIES
|
set_target_properties(hs_exec_core2 PROPERTIES
|
||||||
COMPILE_FLAGS "-march=core2"
|
COMPILE_FLAGS "-march=core2"
|
||||||
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||||
)
|
)
|
||||||
|
|
||||||
add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS})
|
add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS})
|
||||||
|
list(APPEND RUNTIME_LIBS $<TARGET_OBJECTS:hs_exec_corei7>)
|
||||||
set_target_properties(hs_exec_corei7 PROPERTIES
|
set_target_properties(hs_exec_corei7 PROPERTIES
|
||||||
COMPILE_FLAGS "-march=corei7"
|
COMPILE_FLAGS "-march=corei7"
|
||||||
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||||
)
|
)
|
||||||
|
|
||||||
add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
|
add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
|
||||||
|
list(APPEND RUNTIME_LIBS $<TARGET_OBJECTS:hs_exec_avx2>)
|
||||||
set_target_properties(hs_exec_avx2 PROPERTIES
|
set_target_properties(hs_exec_avx2 PROPERTIES
|
||||||
COMPILE_FLAGS "-march=core-avx2"
|
COMPILE_FLAGS "-march=core-avx2"
|
||||||
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||||
)
|
)
|
||||||
|
if (BUILD_AVX512)
|
||||||
|
add_library(hs_exec_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
|
||||||
|
list(APPEND RUNTIME_LIBS $<TARGET_OBJECTS:hs_exec_avx512>)
|
||||||
|
set_target_properties(hs_exec_avx512 PROPERTIES
|
||||||
|
COMPILE_FLAGS "${SKYLAKE_FLAG}"
|
||||||
|
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||||
|
)
|
||||||
|
endif (BUILD_AVX512)
|
||||||
|
|
||||||
add_library(hs_exec_common OBJECT
|
add_library(hs_exec_common OBJECT
|
||||||
${hs_exec_common_SRCS}
|
${hs_exec_common_SRCS}
|
||||||
src/dispatcher.c
|
src/dispatcher.c
|
||||||
)
|
)
|
||||||
set_source_files_properties(src/dispatcher.c PROPERTIES
|
|
||||||
COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function")
|
# hs_version.c is added explicitly to avoid some build systems that refuse to
|
||||||
|
# create a lib without any src (I'm looking at you Xcode)
|
||||||
|
|
||||||
|
add_library(hs_runtime STATIC src/hs_version.c
|
||||||
|
$<TARGET_OBJECTS:hs_exec_common>
|
||||||
|
${RUNTIME_LIBS})
|
||||||
|
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
|
||||||
|
|
||||||
|
# we want the static lib for testing
|
||||||
|
add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c
|
||||||
|
${hs_SRCS}
|
||||||
|
$<TARGET_OBJECTS:hs_exec_common>
|
||||||
|
${RUNTIME_LIBS})
|
||||||
|
|
||||||
|
endif (BUILD_STATIC_LIBS)
|
||||||
|
|
||||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||||
|
# build shared libs
|
||||||
add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS})
|
add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS})
|
||||||
|
list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_core2>)
|
||||||
set_target_properties(hs_exec_shared_core2 PROPERTIES
|
set_target_properties(hs_exec_shared_core2 PROPERTIES
|
||||||
COMPILE_FLAGS "-march=core2"
|
COMPILE_FLAGS "-march=core2"
|
||||||
POSITION_INDEPENDENT_CODE TRUE
|
POSITION_INDEPENDENT_CODE TRUE
|
||||||
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||||
)
|
)
|
||||||
add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS})
|
add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS})
|
||||||
|
list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_corei7>)
|
||||||
set_target_properties(hs_exec_shared_corei7 PROPERTIES
|
set_target_properties(hs_exec_shared_corei7 PROPERTIES
|
||||||
COMPILE_FLAGS "-march=corei7"
|
COMPILE_FLAGS "-march=corei7"
|
||||||
POSITION_INDEPENDENT_CODE TRUE
|
POSITION_INDEPENDENT_CODE TRUE
|
||||||
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||||
)
|
)
|
||||||
add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
|
add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
|
||||||
|
list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_avx2>)
|
||||||
set_target_properties(hs_exec_shared_avx2 PROPERTIES
|
set_target_properties(hs_exec_shared_avx2 PROPERTIES
|
||||||
COMPILE_FLAGS "-march=core-avx2"
|
COMPILE_FLAGS "-march=core-avx2"
|
||||||
POSITION_INDEPENDENT_CODE TRUE
|
POSITION_INDEPENDENT_CODE TRUE
|
||||||
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if (BUILD_AVX512)
|
||||||
|
add_library(hs_exec_shared_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
|
||||||
|
list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_avx512>)
|
||||||
|
set_target_properties(hs_exec_shared_avx512 PROPERTIES
|
||||||
|
COMPILE_FLAGS "${SKYLAKE_FLAG}"
|
||||||
|
POSITION_INDEPENDENT_CODE TRUE
|
||||||
|
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in"
|
||||||
|
)
|
||||||
|
endif (BUILD_AVX512)
|
||||||
add_library(hs_exec_common_shared OBJECT
|
add_library(hs_exec_common_shared OBJECT
|
||||||
${hs_exec_common_SRCS}
|
${hs_exec_common_SRCS}
|
||||||
src/dispatcher.c
|
src/dispatcher.c
|
||||||
@ -1140,31 +1210,21 @@ else (FAT_RUNTIME)
|
|||||||
endif() # SHARED
|
endif() # SHARED
|
||||||
|
|
||||||
|
|
||||||
# hs_version.c is added explicitly to avoid some build systems that refuse to
|
|
||||||
# create a lib without any src (I'm looking at you Xcode)
|
|
||||||
|
|
||||||
add_library(hs_runtime STATIC src/hs_version.c
|
|
||||||
$<TARGET_OBJECTS:hs_exec_common> $<TARGET_OBJECTS:hs_exec_core2>
|
|
||||||
$<TARGET_OBJECTS:hs_exec_corei7> $<TARGET_OBJECTS:hs_exec_avx2>)
|
|
||||||
endif (NOT FAT_RUNTIME)
|
endif (NOT FAT_RUNTIME)
|
||||||
|
|
||||||
|
|
||||||
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
|
|
||||||
if (NOT BUILD_SHARED_LIBS)
|
if (NOT BUILD_SHARED_LIBS)
|
||||||
install(TARGETS hs_runtime DESTINATION lib)
|
install(TARGETS hs_runtime DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||||
if (NOT FAT_RUNTIME)
|
if (NOT FAT_RUNTIME)
|
||||||
add_library(hs_runtime_shared SHARED src/hs_version.c src/hs_valid_platform.c
|
add_library(hs_runtime_shared SHARED src/hs_version.c
|
||||||
$<TARGET_OBJECTS:hs_exec_shared>)
|
src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec_shared>)
|
||||||
else()
|
else()
|
||||||
add_library(hs_runtime_shared SHARED src/hs_version.c
|
add_library(hs_runtime_shared SHARED src/hs_version.c
|
||||||
src/hs_valid_platform.c
|
src/hs_valid_platform.c
|
||||||
$<TARGET_OBJECTS:hs_exec_common_shared>
|
$<TARGET_OBJECTS:hs_exec_common_shared>
|
||||||
$<TARGET_OBJECTS:hs_exec_shared_core2>
|
${RUNTIME_SHLIBS})
|
||||||
$<TARGET_OBJECTS:hs_exec_shared_corei7>
|
|
||||||
$<TARGET_OBJECTS:hs_exec_shared_avx2>)
|
|
||||||
endif()
|
endif()
|
||||||
set_target_properties(hs_runtime_shared PROPERTIES
|
set_target_properties(hs_runtime_shared PROPERTIES
|
||||||
VERSION ${LIB_VERSION}
|
VERSION ${LIB_VERSION}
|
||||||
@ -1173,24 +1233,17 @@ $<TARGET_OBJECTS:hs_exec_shared>)
|
|||||||
MACOSX_RPATH ON
|
MACOSX_RPATH ON
|
||||||
LINKER_LANGUAGE C)
|
LINKER_LANGUAGE C)
|
||||||
install(TARGETS hs_runtime_shared
|
install(TARGETS hs_runtime_shared
|
||||||
RUNTIME DESTINATION bin
|
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||||
ARCHIVE DESTINATION lib
|
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||||
LIBRARY DESTINATION lib)
|
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT FAT_RUNTIME)
|
if (BUILD_STATIC_LIBS)
|
||||||
add_library(hs STATIC ${hs_SRCS} src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec>)
|
add_dependencies(hs ragel_Parser)
|
||||||
else()
|
endif ()
|
||||||
# we want the static lib for testing
|
|
||||||
add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c
|
|
||||||
${hs_SRCS} $<TARGET_OBJECTS:hs_exec_common> $<TARGET_OBJECTS:hs_exec_core2>
|
|
||||||
$<TARGET_OBJECTS:hs_exec_corei7> $<TARGET_OBJECTS:hs_exec_avx2>)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_dependencies(hs ragel_Parser)
|
|
||||||
|
|
||||||
if (NOT BUILD_SHARED_LIBS)
|
if (NOT BUILD_SHARED_LIBS)
|
||||||
install(TARGETS hs DESTINATION lib)
|
install(TARGETS hs DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||||
@ -1200,9 +1253,7 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
|||||||
else()
|
else()
|
||||||
add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c
|
add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c
|
||||||
${hs_SRCS} $<TARGET_OBJECTS:hs_exec_common_shared>
|
${hs_SRCS} $<TARGET_OBJECTS:hs_exec_common_shared>
|
||||||
$<TARGET_OBJECTS:hs_exec_shared_core2>
|
${RUNTIME_SHLIBS})
|
||||||
$<TARGET_OBJECTS:hs_exec_shared_corei7>
|
|
||||||
$<TARGET_OBJECTS:hs_exec_shared_avx2>)
|
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
add_dependencies(hs_shared ragel_Parser)
|
add_dependencies(hs_shared ragel_Parser)
|
||||||
@ -1212,11 +1263,18 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
|||||||
SOVERSION ${LIB_SOVERSION}
|
SOVERSION ${LIB_SOVERSION}
|
||||||
MACOSX_RPATH ON)
|
MACOSX_RPATH ON)
|
||||||
install(TARGETS hs_shared
|
install(TARGETS hs_shared
|
||||||
RUNTIME DESTINATION bin
|
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||||
ARCHIVE DESTINATION lib
|
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||||
LIBRARY DESTINATION lib)
|
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# used by tools and other targets
|
||||||
|
if (NOT BUILD_STATIC_LIBS)
|
||||||
|
# use shared lib without having to change all the targets
|
||||||
|
add_library(hs ALIAS hs_shared)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
|
||||||
if(NOT WIN32)
|
if(NOT WIN32)
|
||||||
add_subdirectory(examples)
|
add_subdirectory(examples)
|
||||||
endif()
|
endif()
|
||||||
|
@ -10,8 +10,24 @@ else ()
|
|||||||
message (FATAL_ERROR "No intrinsics header found")
|
message (FATAL_ERROR "No intrinsics header found")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (BUILD_AVX512)
|
||||||
|
CHECK_C_COMPILER_FLAG(${SKYLAKE_FLAG} HAS_ARCH_SKYLAKE)
|
||||||
|
if (NOT HAS_ARCH_SKYLAKE)
|
||||||
|
message (FATAL_ERROR "AVX512 not supported by compiler")
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}")
|
if (FAT_RUNTIME)
|
||||||
|
# test the highest level microarch to make sure everything works
|
||||||
|
if (BUILD_AVX512)
|
||||||
|
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}")
|
||||||
|
else ()
|
||||||
|
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2")
|
||||||
|
endif ()
|
||||||
|
else (NOT FAT_RUNTIME)
|
||||||
|
# if not fat runtime, then test given cflags
|
||||||
|
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}")
|
||||||
|
endif ()
|
||||||
|
|
||||||
# ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic
|
# ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic
|
||||||
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
|
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
|
||||||
@ -31,5 +47,39 @@ int main(){
|
|||||||
(void)_mm256_xor_si256(z, z);
|
(void)_mm256_xor_si256(z, z);
|
||||||
}" HAVE_AVX2)
|
}" HAVE_AVX2)
|
||||||
|
|
||||||
|
# and now for AVX512
|
||||||
|
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
|
||||||
|
#if !defined(__AVX512BW__)
|
||||||
|
#error no avx512bw
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main(){
|
||||||
|
__m512i z = _mm512_setzero_si512();
|
||||||
|
(void)_mm512_abs_epi8(z);
|
||||||
|
}" HAVE_AVX512)
|
||||||
|
|
||||||
|
if (FAT_RUNTIME)
|
||||||
|
if (NOT HAVE_SSSE3)
|
||||||
|
message(FATAL_ERROR "SSSE3 support required to build fat runtime")
|
||||||
|
endif ()
|
||||||
|
if (NOT HAVE_AVX2)
|
||||||
|
message(FATAL_ERROR "AVX2 support required to build fat runtime")
|
||||||
|
endif ()
|
||||||
|
if (BUILD_AVX512 AND NOT HAVE_AVX512)
|
||||||
|
message(FATAL_ERROR "AVX512 support requested but not supported")
|
||||||
|
endif ()
|
||||||
|
else (NOT FAT_RUNTIME)
|
||||||
|
if (NOT HAVE_AVX2)
|
||||||
|
message(STATUS "Building without AVX2 support")
|
||||||
|
endif ()
|
||||||
|
if (NOT HAVE_AVX512)
|
||||||
|
message(STATUS "Building without AVX512 support")
|
||||||
|
endif ()
|
||||||
|
else (NOT FAT_RUNTIME)
|
||||||
|
if (NOT HAVE_SSSE3)
|
||||||
|
message(FATAL_ERROR "A minimum of SSSE3 compiler support is required")
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
unset (CMAKE_REQUIRED_FLAGS)
|
unset (CMAKE_REQUIRED_FLAGS)
|
||||||
unset (INTRIN_INC_H)
|
unset (INTRIN_INC_H)
|
||||||
|
@ -45,10 +45,12 @@ if(HAVE_BACKTRACE)
|
|||||||
if(HAS_RDYNAMIC)
|
if(HAS_RDYNAMIC)
|
||||||
list(INSERT BACKTRACE_LDFLAGS 0 -rdynamic)
|
list(INSERT BACKTRACE_LDFLAGS 0 -rdynamic)
|
||||||
endif()
|
endif()
|
||||||
# cmake scope fun
|
|
||||||
set(HAVE_BACKTRACE ${HAVE_BACKTRACE} PARENT_SCOPE)
|
|
||||||
else()
|
else()
|
||||||
set(BACKTRACE_CFLAGS "")
|
set(BACKTRACE_CFLAGS "")
|
||||||
set(BACKTRACE_LDFLAGS "")
|
set(BACKTRACE_LDFLAGS "")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# cmake scope fun
|
||||||
|
set(HAVE_BACKTRACE ${HAVE_BACKTRACE} CACHE BOOL INTERNAL)
|
||||||
|
set(BACKTRACE_CFLAGS ${BACKTRACE_CFLAGS} CACHE STRING INTERNAL)
|
||||||
|
set(BACKTRACE_LDFLAGS ${BACKTRACE_LDFLAGS} CACHE STRING INTERNAL)
|
||||||
|
@ -1,3 +1,31 @@
|
|||||||
|
# Various checks related to Boost
|
||||||
|
|
||||||
|
set(BOOST_USE_STATIC_LIBS OFF)
|
||||||
|
set(BOOST_USE_MULTITHREADED OFF)
|
||||||
|
set(BOOST_USE_STATIC_RUNTIME OFF)
|
||||||
|
if (HAVE_LIBCPP)
|
||||||
|
# we need a more recent boost for libc++
|
||||||
|
set(BOOST_MINVERSION 1.61.0)
|
||||||
|
else ()
|
||||||
|
set(BOOST_MINVERSION 1.57.0)
|
||||||
|
endif ()
|
||||||
|
set(BOOST_NO_BOOST_CMAKE ON)
|
||||||
|
|
||||||
|
unset(Boost_INCLUDE_DIR CACHE)
|
||||||
|
# we might have boost in tree, so provide a hint and try again
|
||||||
|
set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include")
|
||||||
|
find_package(Boost ${BOOST_MINVERSION} QUIET)
|
||||||
|
if(NOT Boost_FOUND)
|
||||||
|
# otherwise check for Boost installed on the system
|
||||||
|
unset(BOOST_INCLUDEDIR)
|
||||||
|
find_package(Boost ${BOOST_MINVERSION} QUIET)
|
||||||
|
if(NOT Boost_FOUND)
|
||||||
|
message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system packages if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
message(STATUS "Boost version: ${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}")
|
||||||
|
|
||||||
# Boost 1.62 has a bug that we've patched around, check if it is required
|
# Boost 1.62 has a bug that we've patched around, check if it is required
|
||||||
if (Boost_VERSION EQUAL 106200)
|
if (Boost_VERSION EQUAL 106200)
|
||||||
set (CMAKE_REQUIRED_INCLUDES ${BOOST_INCLUDEDIR} "${PROJECT_SOURCE_DIR}/include")
|
set (CMAKE_REQUIRED_INCLUDES ${BOOST_INCLUDEDIR} "${PROJECT_SOURCE_DIR}/include")
|
||||||
@ -38,4 +66,7 @@ ${BOOST_REV_TEST}" BOOST_REVGRAPH_PATCH)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
unset (CMAKE_REQUIRED_INCLUDES)
|
unset (CMAKE_REQUIRED_INCLUDES)
|
||||||
|
else ()
|
||||||
|
unset(BOOST_REVGRAPH_OK CACHE)
|
||||||
|
unset(BOOST_REVGRAPH_PATCH CACHE)
|
||||||
endif () # Boost 1.62.0
|
endif () # Boost 1.62.0
|
||||||
|
@ -1,27 +1,28 @@
|
|||||||
#!/bin/sh -e
|
#!/bin/sh -e
|
||||||
# This is used for renaming symbols for the fat runtime, don't call directly
|
# This is used for renaming symbols for the fat runtime, don't call directly
|
||||||
# TODO: make this a lot less fragile!
|
# TODO: make this a lot less fragile!
|
||||||
|
cleanup () {
|
||||||
|
rm -f ${SYMSFILE} ${KEEPSYMS}
|
||||||
|
}
|
||||||
|
|
||||||
PREFIX=$1
|
PREFIX=$1
|
||||||
KEEPSYMS_IN=$2
|
KEEPSYMS_IN=$2
|
||||||
shift 2
|
shift 2
|
||||||
BUILD=$@
|
# $@ contains the actual build command
|
||||||
OUT=$(echo $BUILD | sed 's/.* -o \(.*\.o\).*/\1/')
|
OUT=$(echo "$@" | sed 's/.* -o \(.*\.o\).*/\1/')
|
||||||
SYMSFILE=/tmp/${PREFIX}_rename.syms.$$
|
trap cleanup INT QUIT EXIT
|
||||||
KEEPSYMS=/tmp/keep.syms.$$
|
SYMSFILE=$(mktemp --tmpdir ${PREFIX}_rename.syms.XXXXX)
|
||||||
# grab the command without the target obj or src file flags
|
KEEPSYMS=$(mktemp --tmpdir keep.syms.XXXXX)
|
||||||
# we don't just call gcc directly as there may be flags modifying the arch
|
# find the libc used by gcc
|
||||||
CC_CMD=$(echo $BUILD | sed 's/ -o .*\.o//;s/ -c //;s/ .[^ ]*\.c//;')
|
LIBC_SO=$("$@" --print-file-name=libc.so.6)
|
||||||
# find me a libc
|
|
||||||
LIBC_SO=$(${CC_CMD} --print-file-name=libc.so.6)
|
|
||||||
cp ${KEEPSYMS_IN} ${KEEPSYMS}
|
cp ${KEEPSYMS_IN} ${KEEPSYMS}
|
||||||
# get all symbols from libc and turn them into patterns
|
# get all symbols from libc and turn them into patterns
|
||||||
nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ ]*\).*/^\1$/' >> ${KEEPSYMS}
|
nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ ]*\).*/^\1$/' >> ${KEEPSYMS}
|
||||||
# build the object
|
# build the object
|
||||||
${BUILD}
|
"$@"
|
||||||
# rename the symbols in the object
|
# rename the symbols in the object
|
||||||
nm -f p -g ${OUT} | cut -f1 -d' ' | grep -v -f ${KEEPSYMS} | sed -e "s/\(.*\)/\1\ ${PREFIX}_\1/" >> ${SYMSFILE}
|
nm -f p -g ${OUT} | cut -f1 -d' ' | grep -v -f ${KEEPSYMS} | sed -e "s/\(.*\)/\1\ ${PREFIX}_\1/" >> ${SYMSFILE}
|
||||||
if test -s ${SYMSFILE}
|
if test -s ${SYMSFILE}
|
||||||
then
|
then
|
||||||
objcopy --redefine-syms=${SYMSFILE} ${OUT}
|
objcopy --redefine-syms=${SYMSFILE} ${OUT}
|
||||||
fi
|
fi
|
||||||
rm -f ${SYMSFILE} ${KEEPSYMS}
|
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
/* used by cmake */
|
/* used by cmake */
|
||||||
|
|
||||||
|
#ifndef CONFIG_H_
|
||||||
|
#define CONFIG_H_
|
||||||
|
|
||||||
/* "Define if the build is 32 bit" */
|
/* "Define if the build is 32 bit" */
|
||||||
#cmakedefine ARCH_32_BIT
|
#cmakedefine ARCH_32_BIT
|
||||||
|
|
||||||
@ -43,6 +46,8 @@
|
|||||||
0 if you don't. */
|
0 if you don't. */
|
||||||
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP
|
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP
|
||||||
|
|
||||||
|
#cmakedefine HAVE_PTHREAD_NP_H
|
||||||
|
|
||||||
/* Define to 1 if you have the `malloc_info' function. */
|
/* Define to 1 if you have the `malloc_info' function. */
|
||||||
#cmakedefine HAVE_MALLOC_INFO
|
#cmakedefine HAVE_MALLOC_INFO
|
||||||
|
|
||||||
@ -76,6 +81,9 @@
|
|||||||
/* Define to 1 if you have the `_aligned_malloc' function. */
|
/* Define to 1 if you have the `_aligned_malloc' function. */
|
||||||
#cmakedefine HAVE__ALIGNED_MALLOC
|
#cmakedefine HAVE__ALIGNED_MALLOC
|
||||||
|
|
||||||
|
/* Define if compiler has __builtin_constant_p */
|
||||||
|
#cmakedefine HAVE__BUILTIN_CONSTANT_P
|
||||||
|
|
||||||
/* Optimize, inline critical functions */
|
/* Optimize, inline critical functions */
|
||||||
#cmakedefine HS_OPTIMIZE
|
#cmakedefine HS_OPTIMIZE
|
||||||
|
|
||||||
@ -91,3 +99,5 @@
|
|||||||
|
|
||||||
/* define if reverse_graph requires patch for boost 1.62.0 */
|
/* define if reverse_graph requires patch for boost 1.62.0 */
|
||||||
#cmakedefine BOOST_REVGRAPH_PATCH
|
#cmakedefine BOOST_REVGRAPH_PATCH
|
||||||
|
|
||||||
|
#endif /* CONFIG_H_ */
|
||||||
|
@ -22,7 +22,7 @@ if (NOT SQLITE3_FOUND)
|
|||||||
set(SQLITE3_INCLUDE_DIRS "${PROJECT_SOURCE_DIR}/sqlite3")
|
set(SQLITE3_INCLUDE_DIRS "${PROJECT_SOURCE_DIR}/sqlite3")
|
||||||
set(SQLITE3_LDFLAGS sqlite3_static)
|
set(SQLITE3_LDFLAGS sqlite3_static)
|
||||||
else()
|
else()
|
||||||
message(FATAL_ERROR " no sqlite3 in source tree")
|
message(STATUS " no sqlite3 in source tree")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ libpcre are supported. The use of unsupported constructs will result in
|
|||||||
compilation errors.
|
compilation errors.
|
||||||
|
|
||||||
The version of PCRE used to validate Hyperscan's interpretation of this syntax
|
The version of PCRE used to validate Hyperscan's interpretation of this syntax
|
||||||
is 8.38.
|
is 8.40.
|
||||||
|
|
||||||
====================
|
====================
|
||||||
Supported Constructs
|
Supported Constructs
|
||||||
@ -171,6 +171,8 @@ The following regex constructs are not supported by Hyperscan:
|
|||||||
* Callouts and embedded code.
|
* Callouts and embedded code.
|
||||||
* Atomic grouping and possessive quantifiers.
|
* Atomic grouping and possessive quantifiers.
|
||||||
|
|
||||||
|
.. _semantics:
|
||||||
|
|
||||||
*********
|
*********
|
||||||
Semantics
|
Semantics
|
||||||
*********
|
*********
|
||||||
@ -284,16 +286,24 @@ which provides the following fields:
|
|||||||
expression should match successfully.
|
expression should match successfully.
|
||||||
* ``min_length``: The minimum match length (from start to end) required to
|
* ``min_length``: The minimum match length (from start to end) required to
|
||||||
successfully match this expression.
|
successfully match this expression.
|
||||||
|
* ``edit_distance``: Match this expression within a given Levenshtein distance.
|
||||||
|
|
||||||
These parameters allow the set of matches produced by a pattern to be
|
These parameters either allow the set of matches produced by a pattern to be
|
||||||
constrained at compile time, rather than relying on the application to process
|
constrained at compile time (rather than relying on the application to process
|
||||||
unwanted matches at runtime.
|
unwanted matches at runtime), or allow matching a pattern approximately (within
|
||||||
|
a given edit distance) to produce more matches.
|
||||||
|
|
||||||
For example, the pattern :regexp:`/foo.*bar/` when given a ``min_offset`` of 10
|
For example, the pattern :regexp:`/foo.*bar/` when given a ``min_offset`` of 10
|
||||||
and a ``max_offset`` of 15 will not produce matches when scanned against
|
and a ``max_offset`` of 15 will not produce matches when scanned against
|
||||||
``foobar`` or ``foo0123456789bar`` but will produce a match against the data
|
``foobar`` or ``foo0123456789bar`` but will produce a match against the data
|
||||||
streams ``foo0123bar`` or ``foo0123456bar``.
|
streams ``foo0123bar`` or ``foo0123456bar``.
|
||||||
|
|
||||||
|
Similarly, the pattern :regexp:`/foobar/` when given an ``edit_distance`` of 2
|
||||||
|
will produce matches when scanned against ``foobar``, ``fooba``, ``fobr``,
|
||||||
|
``fo_baz``, ``foooobar``, and anything else that lies within edit distance of 2
|
||||||
|
(as defined by Levenshtein distance). For more details, see the
|
||||||
|
:ref:`approximate_matching` section.
|
||||||
|
|
||||||
=================
|
=================
|
||||||
Prefiltering Mode
|
Prefiltering Mode
|
||||||
=================
|
=================
|
||||||
@ -375,3 +385,74 @@ An :c:type:`hs_platform_info_t` structure targeted at the current host can be
|
|||||||
built with the :c:func:`hs_populate_platform` function.
|
built with the :c:func:`hs_populate_platform` function.
|
||||||
|
|
||||||
See :ref:`api_constants` for the full list of CPU tuning and feature flags.
|
See :ref:`api_constants` for the full list of CPU tuning and feature flags.
|
||||||
|
|
||||||
|
.. _approximate_matching:
|
||||||
|
|
||||||
|
********************
|
||||||
|
Approximate matching
|
||||||
|
********************
|
||||||
|
|
||||||
|
Hyperscan provides an experimental approximate matching mode, which will match
|
||||||
|
patterns within a given edit distance. The exact matching behavior is defined as
|
||||||
|
follows:
|
||||||
|
|
||||||
|
#. **Edit distance** is defined as Levenshtein distance. That is, there are
|
||||||
|
three possible edit types considered: insertion, removal and substitution.
|
||||||
|
More formal description can be found on
|
||||||
|
`Wikipedia <https://en.wikipedia.org/wiki/Levenshtein_distance>`_.
|
||||||
|
|
||||||
|
#. **Approximate matching** will match all *corpora* within a given edit
|
||||||
|
distance. That is, given a pattern, approximate matching will match anything
|
||||||
|
that can be edited to arrive at a corpus that exactly matches the original
|
||||||
|
pattern.
|
||||||
|
|
||||||
|
#. **Matching semantics** are exactly the same as described in :ref:`semantics`.
|
||||||
|
|
||||||
|
Here are a few examples of approximate matching:
|
||||||
|
|
||||||
|
* Pattern :regexp:`/foo/` can match ``foo`` when using regular Hyperscan
|
||||||
|
matching behavior. With approximate matching within edit distance 2, the
|
||||||
|
pattern will produce matches when scanned against ``foo``, ``foooo``, ``f00``,
|
||||||
|
``f``, and anything else that lies within edit distance 2 of matching corpora
|
||||||
|
for the original pattern (``foo`` in this case).
|
||||||
|
|
||||||
|
* Pattern :regexp:`/foo(bar)+/` with edit distance 1 will match ``foobarbar``,
|
||||||
|
``foobarb0r``, ``fooarbar``, ``foobarba``, ``f0obarbar``, ``fobarbar`` and
|
||||||
|
anything else that lies within edit distance 1 of matching corpora for the
|
||||||
|
original pattern (``foobarbar`` in this case).
|
||||||
|
|
||||||
|
* Pattern :regexp:`/foob?ar/` with edit distance 2 will match ``fooar``,
|
||||||
|
``foo``, ``fabar``, ``oar`` and anything else that lies within edit distance 2
|
||||||
|
of matching corpora for the original pattern (``fooar`` in this case).
|
||||||
|
|
||||||
|
Currently, there are trade-offs and limitations that come with approximate
|
||||||
|
matching support. Here they are, in a nutshell:
|
||||||
|
|
||||||
|
* Reduced pattern support:
|
||||||
|
|
||||||
|
* For many patterns, approximate matching is complex and can result in
|
||||||
|
Hyperscan failing to compile a pattern with a "Pattern too large" error,
|
||||||
|
even if the pattern is supported in normal operation.
|
||||||
|
* Additionally, some patterns cannot be approximately matched because they
|
||||||
|
reduce to so-called "vacuous" patterns (patterns that match everything). For
|
||||||
|
example, pattern :regexp:`/foo/` with edit distance 3, if implemented,
|
||||||
|
would reduce to matching zero-length buffers. Such patterns will result in a
|
||||||
|
"Pattern cannot be approximately matched" compile error.
|
||||||
|
* Finally, due to the inherent complexities of defining matching behavior,
|
||||||
|
approximate matching implements a reduced subset of regular expression
|
||||||
|
syntax. Approximate matching does not support UTF-8 (and other
|
||||||
|
multibyte character encodings), and word boundaries (that is, ``\b``, ``\B``
|
||||||
|
and other equivalent constructs). Patterns containing unsupported constructs
|
||||||
|
will result in "Pattern cannot be approximately matched" compile error.
|
||||||
|
* When using approximate matching in conjunction with SOM, all of the
|
||||||
|
restrictions of SOM also apply. See :ref:`som` for more
|
||||||
|
details.
|
||||||
|
* Increased stream state/byte code size requirements: due to approximate
|
||||||
|
matching byte code being inherently larger and more complex than exact
|
||||||
|
matching, the corresponding requirements also increase.
|
||||||
|
* Performance overhead: similarly, there is generally a performance cost
|
||||||
|
associated with approximate matching, both due to increased matching
|
||||||
|
complexity, and due to the fact that it will produce more matches.
|
||||||
|
|
||||||
|
Approximate matching is always disabled by default, and can be enabled on a
|
||||||
|
per-pattern basis by using an extended parameter described in :ref:`extparam`.
|
||||||
|
@ -44,7 +44,7 @@ master_doc = 'index'
|
|||||||
|
|
||||||
# General information about the project.
|
# General information about the project.
|
||||||
project = u'Hyperscan'
|
project = u'Hyperscan'
|
||||||
copyright = u'2015-2016, Intel Corporation'
|
copyright = u'2015-2017, Intel Corporation'
|
||||||
|
|
||||||
# The version info for the project you're documenting, acts as replacement for
|
# The version info for the project you're documenting, acts as replacement for
|
||||||
# |version| and |release|, also used in various other places throughout the
|
# |version| and |release|, also used in various other places throughout the
|
||||||
|
@ -30,4 +30,4 @@ and/or other countries.
|
|||||||
|
|
||||||
\*Other names and brands may be claimed as the property of others.
|
\*Other names and brands may be claimed as the property of others.
|
||||||
|
|
||||||
Copyright |copy| 2015-2016, Intel Corporation. All rights reserved.
|
Copyright |copy| 2015-2017, Intel Corporation. All rights reserved.
|
||||||
|
@ -254,18 +254,32 @@ the current platform is supported by Hyperscan.
|
|||||||
At of this release, the variants of the runtime that are built, and the CPU
|
At of this release, the variants of the runtime that are built, and the CPU
|
||||||
capability that is required, are the following:
|
capability that is required, are the following:
|
||||||
|
|
||||||
+----------+-------------------------------+---------------------+
|
+----------+-------------------------------+---------------------------+
|
||||||
| Variant | CPU Feature Flag(s) Required | gcc arch flag |
|
| Variant | CPU Feature Flag(s) Required | gcc arch flag |
|
||||||
+==========+===============================+=====================+
|
+==========+===============================+===========================+
|
||||||
| Core 2 | ``SSSE3`` | ``-march=core2`` |
|
| Core 2 | ``SSSE3`` | ``-march=core2`` |
|
||||||
+----------+-------------------------------+---------------------+
|
+----------+-------------------------------+---------------------------+
|
||||||
| Core i7 | ``SSE4_2`` and ``POPCNT`` | ``-march=corei7`` |
|
| Core i7 | ``SSE4_2`` and ``POPCNT`` | ``-march=corei7`` |
|
||||||
+----------+-------------------------------+---------------------+
|
+----------+-------------------------------+---------------------------+
|
||||||
| AVX 2 | ``AVX2`` | ``-march=avx2`` |
|
| AVX 2 | ``AVX2`` | ``-march=core-avx2`` |
|
||||||
+----------+-------------------------------+---------------------+
|
+----------+-------------------------------+---------------------------+
|
||||||
|
| AVX 512 | ``AVX512BW`` (see note below) | ``-march=skylake-avx512`` |
|
||||||
|
+----------+-------------------------------+---------------------------+
|
||||||
|
|
||||||
As this requires compiler, libc, and binutils support, at this time the fat
|
.. note::
|
||||||
runtime will only be enabled for Linux builds where the compiler supports the
|
|
||||||
|
Hyperscan v4.5 adds support for AVX-512 instructions - in particular the
|
||||||
|
``AVX-512BW`` instruction set that was introduced on Intel "Skylake" Xeon
|
||||||
|
processors - however the AVX-512 runtime variant is **not** enabled by
|
||||||
|
default in fat runtime builds as not all toolchains support AVX-512
|
||||||
|
instruction sets. To build an AVX-512 runtime, the CMake variable
|
||||||
|
``BUILD_AVX512`` must be enabled manually during configuration. For
|
||||||
|
example: ::
|
||||||
|
|
||||||
|
cmake -DBUILD_AVX512=on <...>
|
||||||
|
|
||||||
|
As the fat runtime requires compiler, libc, and binutils support, at this time
|
||||||
|
it will only be enabled for Linux builds where the compiler supports the
|
||||||
`indirect function "ifunc" function attribute
|
`indirect function "ifunc" function attribute
|
||||||
<https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-indirect-functions-3321>`_.
|
<https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-indirect-functions-3321>`_.
|
||||||
|
|
||||||
|
@ -17,5 +17,6 @@ Hyperscan |version| Developer's Reference Guide
|
|||||||
runtime
|
runtime
|
||||||
serialization
|
serialization
|
||||||
performance
|
performance
|
||||||
|
tools
|
||||||
api_constants
|
api_constants
|
||||||
api_files
|
api_files
|
||||||
|
@ -70,6 +70,13 @@ For a given database, Hyperscan provides several guarantees:
|
|||||||
|
|
||||||
See :ref:`runtime` for more detail.
|
See :ref:`runtime` for more detail.
|
||||||
|
|
||||||
|
*****
|
||||||
|
Tools
|
||||||
|
*****
|
||||||
|
|
||||||
|
Some utilities for testing and benchmarking Hyperscan are included with the
|
||||||
|
library. See :ref:`tools` for more information.
|
||||||
|
|
||||||
************
|
************
|
||||||
Example Code
|
Example Code
|
||||||
************
|
************
|
||||||
|
@ -333,3 +333,13 @@ Similarly, the :c:member:`hs_expr_ext::min_length` extended parameter can be
|
|||||||
used to specify a lower bound on the length of the matches for a pattern. Using
|
used to specify a lower bound on the length of the matches for a pattern. Using
|
||||||
this facility may be more lightweight in some circumstances than using the SOM
|
this facility may be more lightweight in some circumstances than using the SOM
|
||||||
flag and post-confirming match length in the calling application.
|
flag and post-confirming match length in the calling application.
|
||||||
|
|
||||||
|
********************
|
||||||
|
Approximate matching
|
||||||
|
********************
|
||||||
|
|
||||||
|
.. tip:: Approximate matching is an experimental feature.
|
||||||
|
|
||||||
|
There is generally a performance impact associated with approximate matching due
|
||||||
|
to the reduced specificity of the matches. This impact may vary significantly
|
||||||
|
depending on the pattern and edit distance.
|
||||||
|
116
doc/dev-reference/tools.rst
Normal file
116
doc/dev-reference/tools.rst
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
.. _tools:
|
||||||
|
|
||||||
|
#####
|
||||||
|
Tools
|
||||||
|
#####
|
||||||
|
|
||||||
|
This section describes the set of utilities included with the Hyperscan library.
|
||||||
|
|
||||||
|
********************
|
||||||
|
Benchmarker: hsbench
|
||||||
|
********************
|
||||||
|
|
||||||
|
The ``hsbench`` tool provides an easy way to measure Hyperscan's performance
|
||||||
|
for a particular set of patterns and corpus of data to be scanned.
|
||||||
|
|
||||||
|
Patterns are supplied in the format described below in
|
||||||
|
:ref:`tools_pattern_format`, while the corpus must be provided in the form of a
|
||||||
|
`corpus database`: this is a simple SQLite database format intended to allow for
|
||||||
|
easy control of how a corpus is broken into blocks and streams.
|
||||||
|
|
||||||
|
.. note:: A group of Python scripts for constructing corpora databases from
|
||||||
|
various input types, such as PCAP network traffic captures or text files, can
|
||||||
|
be found in the Hyperscan source tree in ``tools/hsbench/scripts``.
|
||||||
|
|
||||||
|
Running hsbench
|
||||||
|
===============
|
||||||
|
|
||||||
|
Given a file full of patterns specified with ``-e`` and a corpus database
|
||||||
|
specified with ``-c``, ``hsbench`` will perform a single-threaded benchmark and
|
||||||
|
produce output like this::
|
||||||
|
|
||||||
|
$ hsbench -e /tmp/patterns -c /tmp/corpus.db
|
||||||
|
|
||||||
|
Signatures: /tmp/patterns
|
||||||
|
Hyperscan info: Version: 4.3.1 Features: AVX2 Mode: STREAM
|
||||||
|
Expression count: 200
|
||||||
|
Bytecode size: 342,540 bytes
|
||||||
|
Database CRC: 0x6cd6b67c
|
||||||
|
Stream state size: 252 bytes
|
||||||
|
Scratch size: 18,406 bytes
|
||||||
|
Compile time: 0.153 seconds
|
||||||
|
Peak heap usage: 78,073,856 bytes
|
||||||
|
|
||||||
|
Time spent scanning: 0.600 seconds
|
||||||
|
Corpus size: 72,138,183 bytes (63,946 blocks in 8,891 streams)
|
||||||
|
Scan matches: 81 (0.001 matches/kilobyte)
|
||||||
|
Overall block rate: 2,132,004.45 blocks/sec
|
||||||
|
Overall throughput: 19,241.10 Mbit/sec
|
||||||
|
|
||||||
|
By default, the corpus is scanned twenty times, and the overall performance
|
||||||
|
reported is computed based the total number of bytes scanned in the time it
|
||||||
|
takes to perform all twenty scans. The number of repeats can be changed with the
|
||||||
|
``-n`` argument, and the results of each scan will be displayed if the
|
||||||
|
``--per-scan`` argument is specified.
|
||||||
|
|
||||||
|
To benchmark Hyperscan on more than one core, you can supply a list of cores
|
||||||
|
with the ``-T`` argument, which will instruct ``hsbench`` to start one
|
||||||
|
benchmark thread per core given and compute the throughput from the time taken
|
||||||
|
to complete all of them.
|
||||||
|
|
||||||
|
.. tip:: For single-threaded benchmarks on multi-processor systems, we recommend
|
||||||
|
using a utility like ``taskset`` to lock the hsbench process to one core and
|
||||||
|
minimize jitter due to the operating system's scheduler.
|
||||||
|
|
||||||
|
.. _tools_pattern_format:
|
||||||
|
|
||||||
|
**************
|
||||||
|
Pattern Format
|
||||||
|
**************
|
||||||
|
|
||||||
|
All of the Hyperscan tools accept patterns in the same format, read from plain
|
||||||
|
text files with one pattern per line. Each line looks like this:
|
||||||
|
|
||||||
|
* ``<integer id>:/<regex>/<flags>``
|
||||||
|
|
||||||
|
For example::
|
||||||
|
|
||||||
|
1:/hatstand.*teakettle/s
|
||||||
|
2:/(hatstand|teakettle)/iH
|
||||||
|
3:/^.{10,20}hatstand/m
|
||||||
|
|
||||||
|
The integer ID is the value that will be reported when a match is found by
|
||||||
|
Hyperscan and must be unique.
|
||||||
|
|
||||||
|
The pattern itself is a regular expression in PCRE syntax; see
|
||||||
|
:ref:`compilation` for more information on supported features.
|
||||||
|
|
||||||
|
The flags are single characters that map to Hyperscan flags as follows:
|
||||||
|
|
||||||
|
========= ================================= ===========
|
||||||
|
Character API Flag Description
|
||||||
|
========= ================================= ===========
|
||||||
|
``i`` :c:member:`HS_FLAG_CASELESS` Case-insensitive matching
|
||||||
|
``s`` :c:member:`HS_FLAG_DOTALL` Dot (``.``) will match newlines
|
||||||
|
``m`` :c:member:`HS_FLAG_MULTILINE` Multi-line anchoring
|
||||||
|
``H`` :c:member:`HS_FLAG_SINGLEMATCH` Report match ID at most once
|
||||||
|
``V`` :c:member:`HS_FLAG_ALLOWEMPTY` Allow patterns that can match against empty buffers
|
||||||
|
``8`` :c:member:`HS_FLAG_UTF8` UTF-8 mode
|
||||||
|
``W`` :c:member:`HS_FLAG_UCP` Unicode property support
|
||||||
|
``P`` :c:member:`HS_FLAG_PREFILTER` Prefiltering mode
|
||||||
|
``L`` :c:member:`HS_FLAG_SOM_LEFTMOST` Leftmost start of match reporting
|
||||||
|
========= ================================= ===========
|
||||||
|
|
||||||
|
In addition to the set of flags above, :ref:`extparam` can be supplied
|
||||||
|
for each pattern. These are supplied after the flags as ``key=value`` pairs
|
||||||
|
between braces, separated by commas. For example::
|
||||||
|
|
||||||
|
1:/hatstand.*teakettle/s{min_offset=50,max_offset=100}
|
||||||
|
|
||||||
|
All Hyperscan tools will accept a pattern file (or a directory containing
|
||||||
|
pattern files) with the ``-e`` argument. If no further arguments constraining
|
||||||
|
the pattern set are given, all patterns in those files are used.
|
||||||
|
|
||||||
|
To select a subset of the patterns, a single ID can be supplied with the ``-z``
|
||||||
|
argument, or a file containing a set of IDs can be supplied with the ``-s``
|
||||||
|
argument.
|
@ -22,3 +22,6 @@ set_source_files_properties(patbench.cc PROPERTIES COMPILE_FLAGS
|
|||||||
"-Wall -Wno-unused-parameter")
|
"-Wall -Wno-unused-parameter")
|
||||||
target_link_libraries(patbench hs pcap)
|
target_link_libraries(patbench hs pcap)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
install(FILES simplegrep.c pcapscan.cc patbench.cc README.md
|
||||||
|
DESTINATION ${CMAKE_INSTALL_DOCDIR}/examples)
|
||||||
|
16
src/alloc.c
16
src/alloc.c
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -67,7 +67,7 @@ hs_free_t normalise_free(hs_free_t f) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
hs_error_t HS_CDECL hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
||||||
hs_set_database_allocator(allocfunc, freefunc);
|
hs_set_database_allocator(allocfunc, freefunc);
|
||||||
hs_set_misc_allocator(allocfunc, freefunc);
|
hs_set_misc_allocator(allocfunc, freefunc);
|
||||||
hs_set_stream_allocator(allocfunc, freefunc);
|
hs_set_stream_allocator(allocfunc, freefunc);
|
||||||
@ -77,7 +77,8 @@ hs_error_t hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_set_database_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
hs_error_t HS_CDECL hs_set_database_allocator(hs_alloc_t allocfunc,
|
||||||
|
hs_free_t freefunc) {
|
||||||
hs_database_alloc = normalise_alloc(allocfunc);
|
hs_database_alloc = normalise_alloc(allocfunc);
|
||||||
hs_database_free = normalise_free(freefunc);
|
hs_database_free = normalise_free(freefunc);
|
||||||
|
|
||||||
@ -85,7 +86,8 @@ hs_error_t hs_set_database_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_set_misc_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
hs_error_t HS_CDECL hs_set_misc_allocator(hs_alloc_t allocfunc,
|
||||||
|
hs_free_t freefunc) {
|
||||||
hs_misc_alloc = normalise_alloc(allocfunc);
|
hs_misc_alloc = normalise_alloc(allocfunc);
|
||||||
hs_misc_free = normalise_free(freefunc);
|
hs_misc_free = normalise_free(freefunc);
|
||||||
|
|
||||||
@ -93,7 +95,8 @@ hs_error_t hs_set_misc_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_set_scratch_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
hs_error_t HS_CDECL hs_set_scratch_allocator(hs_alloc_t allocfunc,
|
||||||
|
hs_free_t freefunc) {
|
||||||
hs_scratch_alloc = normalise_alloc(allocfunc);
|
hs_scratch_alloc = normalise_alloc(allocfunc);
|
||||||
hs_scratch_free = normalise_free(freefunc);
|
hs_scratch_free = normalise_free(freefunc);
|
||||||
|
|
||||||
@ -101,7 +104,8 @@ hs_error_t hs_set_scratch_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_set_stream_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
hs_error_t HS_CDECL hs_set_stream_allocator(hs_alloc_t allocfunc,
|
||||||
|
hs_free_t freefunc) {
|
||||||
hs_stream_alloc = normalise_alloc(allocfunc);
|
hs_stream_alloc = normalise_alloc(allocfunc);
|
||||||
hs_stream_free = normalise_free(freefunc);
|
hs_stream_free = normalise_free(freefunc);
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -42,6 +42,8 @@
|
|||||||
* word-to-word and word-to-nonword) are dropped.
|
* word-to-word and word-to-nonword) are dropped.
|
||||||
*/
|
*/
|
||||||
#include "asserts.h"
|
#include "asserts.h"
|
||||||
|
|
||||||
|
#include "compiler/compiler.h"
|
||||||
#include "nfagraph/ng.h"
|
#include "nfagraph/ng.h"
|
||||||
#include "nfagraph/ng_prune.h"
|
#include "nfagraph/ng_prune.h"
|
||||||
#include "nfagraph/ng_redundancy.h"
|
#include "nfagraph/ng_redundancy.h"
|
||||||
@ -115,8 +117,8 @@ u32 conjunct(u32 flags1, u32 flags2) {
|
|||||||
typedef map<pair<NFAVertex, NFAVertex>, NFAEdge> edge_cache_t;
|
typedef map<pair<NFAVertex, NFAVertex>, NFAEdge> edge_cache_t;
|
||||||
|
|
||||||
static
|
static
|
||||||
void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
|
void replaceAssertVertex(NGHolder &g, NFAVertex t, const ExpressionInfo &expr,
|
||||||
u32 &assert_edge_count) {
|
edge_cache_t &edge_cache, u32 &assert_edge_count) {
|
||||||
DEBUG_PRINTF("replacing assert vertex %zu\n", g[t].index);
|
DEBUG_PRINTF("replacing assert vertex %zu\n", g[t].index);
|
||||||
|
|
||||||
const u32 flags = g[t].assert_flags;
|
const u32 flags = g[t].assert_flags;
|
||||||
@ -178,8 +180,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
|
|||||||
edge_cache.emplace(cache_key, e);
|
edge_cache.emplace(cache_key, e);
|
||||||
g[e].assert_flags = flags;
|
g[e].assert_flags = flags;
|
||||||
if (++assert_edge_count > MAX_ASSERT_EDGES) {
|
if (++assert_edge_count > MAX_ASSERT_EDGES) {
|
||||||
throw CompileError(g.expressionIndex,
|
throw CompileError(expr.index, "Pattern is too large.");
|
||||||
"Pattern is too large.");
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
NFAEdge e = ecit->second;
|
NFAEdge e = ecit->second;
|
||||||
@ -200,21 +201,23 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) {
|
void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
|
||||||
|
NFAVertex v, s32 adj) {
|
||||||
// Don't try and set the report ID of a special vertex.
|
// Don't try and set the report ID of a special vertex.
|
||||||
assert(!is_special(v, g));
|
assert(!is_special(v, g));
|
||||||
|
|
||||||
// There should be no reports set already.
|
// There should be no reports set already.
|
||||||
assert(g[v].reports.empty());
|
assert(g[v].reports.empty());
|
||||||
|
|
||||||
Report r = rm.getBasicInternalReport(g, adj);
|
Report r = rm.getBasicInternalReport(expr, adj);
|
||||||
|
|
||||||
g[v].reports.insert(rm.getInternalId(r));
|
g[v].reports.insert(rm.getInternalId(r));
|
||||||
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
|
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void checkForMultilineStart(ReportManager &rm, NGWrapper &g) {
|
void checkForMultilineStart(ReportManager &rm, NGHolder &g,
|
||||||
|
const ExpressionInfo &expr) {
|
||||||
vector<NFAEdge> dead;
|
vector<NFAEdge> dead;
|
||||||
for (auto v : adjacent_vertices_range(g.start, g)) {
|
for (auto v : adjacent_vertices_range(g.start, g)) {
|
||||||
if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) {
|
if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) {
|
||||||
@ -238,7 +241,7 @@ void checkForMultilineStart(ReportManager &rm, NGWrapper &g) {
|
|||||||
for (const auto &e : dead) {
|
for (const auto &e : dead) {
|
||||||
NFAVertex dummy = add_vertex(g);
|
NFAVertex dummy = add_vertex(g);
|
||||||
g[dummy].char_reach.setall();
|
g[dummy].char_reach.setall();
|
||||||
setReportId(rm, g, dummy, -1);
|
setReportId(rm, g, expr, dummy, -1);
|
||||||
add_edge(source(e, g), dummy, g[e], g);
|
add_edge(source(e, g), dummy, g[e], g);
|
||||||
add_edge(dummy, g.accept, g);
|
add_edge(dummy, g.accept, g);
|
||||||
}
|
}
|
||||||
@ -263,7 +266,8 @@ bool hasAssertVertices(const NGHolder &g) {
|
|||||||
* Remove the horrors that are the temporary assert vertices which arise from
|
* Remove the horrors that are the temporary assert vertices which arise from
|
||||||
* our construction method. Allows the rest of our code base to live in
|
* our construction method. Allows the rest of our code base to live in
|
||||||
* blissful ignorance of their existence. */
|
* blissful ignorance of their existence. */
|
||||||
void removeAssertVertices(ReportManager &rm, NGWrapper &g) {
|
void removeAssertVertices(ReportManager &rm, NGHolder &g,
|
||||||
|
const ExpressionInfo &expr) {
|
||||||
size_t num = 0;
|
size_t num = 0;
|
||||||
|
|
||||||
DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g));
|
DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g));
|
||||||
@ -285,12 +289,12 @@ void removeAssertVertices(ReportManager &rm, NGWrapper &g) {
|
|||||||
|
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
if (g[v].assert_flags & WORDBOUNDARY_FLAGS) {
|
if (g[v].assert_flags & WORDBOUNDARY_FLAGS) {
|
||||||
replaceAssertVertex(g, v, edge_cache, assert_edge_count);
|
replaceAssertVertex(g, v, expr, edge_cache, assert_edge_count);
|
||||||
num++;
|
num++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
checkForMultilineStart(rm, g);
|
checkForMultilineStart(rm, g, expr);
|
||||||
|
|
||||||
if (num) {
|
if (num) {
|
||||||
DEBUG_PRINTF("resolved %zu assert vertices\n", num);
|
DEBUG_PRINTF("resolved %zu assert vertices\n", num);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -35,8 +35,9 @@
|
|||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
|
class ExpressionInfo;
|
||||||
class ReportManager;
|
class ReportManager;
|
||||||
class NGWrapper;
|
class NGHolder;
|
||||||
|
|
||||||
/** \brief Convert temporary assert vertices (from construction method) to
|
/** \brief Convert temporary assert vertices (from construction method) to
|
||||||
* edge-based flags.
|
* edge-based flags.
|
||||||
@ -44,7 +45,8 @@ class NGWrapper;
|
|||||||
* Remove the horrors that are the temporary assert vertices which arise from
|
* Remove the horrors that are the temporary assert vertices which arise from
|
||||||
* our construction method. Allows the rest of our code base to live in
|
* our construction method. Allows the rest of our code base to live in
|
||||||
* blissful ignorance of their existence. */
|
* blissful ignorance of their existence. */
|
||||||
void removeAssertVertices(ReportManager &rm, NGWrapper &g);
|
void removeAssertVertices(ReportManager &rm, NGHolder &g,
|
||||||
|
const ExpressionInfo &expr);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -55,9 +55,8 @@
|
|||||||
#include "parser/unsupported.h"
|
#include "parser/unsupported.h"
|
||||||
#include "parser/utf8_validate.h"
|
#include "parser/utf8_validate.h"
|
||||||
#include "rose/rose_build.h"
|
#include "rose/rose_build.h"
|
||||||
#include "rose/rose_build_dump.h"
|
|
||||||
#include "som/slot_manager_dump.h"
|
#include "som/slot_manager_dump.h"
|
||||||
#include "util/alloc.h"
|
#include "util/bytecode_ptr.h"
|
||||||
#include "util/compile_error.h"
|
#include "util/compile_error.h"
|
||||||
#include "util/target_info.h"
|
#include "util/target_info.h"
|
||||||
#include "util/verify_types.h"
|
#include "util/verify_types.h"
|
||||||
@ -74,12 +73,12 @@ using namespace std;
|
|||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
|
|
||||||
static
|
static
|
||||||
void validateExt(const hs_expr_ext &ext) {
|
void validateExt(const hs_expr_ext &ext) {
|
||||||
static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET |
|
static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET |
|
||||||
HS_EXT_FLAG_MAX_OFFSET |
|
HS_EXT_FLAG_MAX_OFFSET |
|
||||||
HS_EXT_FLAG_MIN_LENGTH;
|
HS_EXT_FLAG_MIN_LENGTH |
|
||||||
|
HS_EXT_FLAG_EDIT_DISTANCE;
|
||||||
if (ext.flags & ~ALL_EXT_FLAGS) {
|
if (ext.flags & ~ALL_EXT_FLAGS) {
|
||||||
throw CompileError("Invalid hs_expr_ext flag set.");
|
throw CompileError("Invalid hs_expr_ext flag set.");
|
||||||
}
|
}
|
||||||
@ -100,25 +99,18 @@ void validateExt(const hs_expr_ext &ext) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
||||||
unsigned flags, ReportID actionId,
|
unsigned flags, ReportID report,
|
||||||
const hs_expr_ext *ext)
|
const hs_expr_ext *ext)
|
||||||
: utf8(false),
|
: expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH,
|
||||||
allow_vacuous(flags & HS_FLAG_ALLOWEMPTY),
|
false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET,
|
||||||
highlander(flags & HS_FLAG_SINGLEMATCH),
|
0, 0) {
|
||||||
prefilter(flags & HS_FLAG_PREFILTER),
|
|
||||||
som(SOM_NONE),
|
|
||||||
index(index_in),
|
|
||||||
id(actionId),
|
|
||||||
min_offset(0),
|
|
||||||
max_offset(MAX_OFFSET),
|
|
||||||
min_length(0) {
|
|
||||||
ParseMode mode(flags);
|
ParseMode mode(flags);
|
||||||
|
|
||||||
component = parse(expression, mode);
|
component = parse(expression, mode);
|
||||||
|
|
||||||
utf8 = mode.utf8; /* utf8 may be set by parse() */
|
expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
|
||||||
|
|
||||||
if (utf8 && !isValidUtf8(expression)) {
|
if (expr.utf8 && !isValidUtf8(expression)) {
|
||||||
throw ParseError("Expression is not valid UTF-8.");
|
throw ParseError("Expression is not valid UTF-8.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -146,7 +138,7 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
|||||||
|
|
||||||
// Set SOM type.
|
// Set SOM type.
|
||||||
if (flags & HS_FLAG_SOM_LEFTMOST) {
|
if (flags & HS_FLAG_SOM_LEFTMOST) {
|
||||||
som = SOM_LEFT;
|
expr.som = SOM_LEFT;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set extended parameters, if we have them.
|
// Set extended parameters, if we have them.
|
||||||
@ -155,26 +147,29 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
|||||||
validateExt(*ext);
|
validateExt(*ext);
|
||||||
|
|
||||||
if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
|
if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
|
||||||
min_offset = ext->min_offset;
|
expr.min_offset = ext->min_offset;
|
||||||
}
|
}
|
||||||
if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
|
if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
|
||||||
max_offset = ext->max_offset;
|
expr.max_offset = ext->max_offset;
|
||||||
}
|
}
|
||||||
if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) {
|
if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) {
|
||||||
min_length = ext->min_length;
|
expr.min_length = ext->min_length;
|
||||||
|
}
|
||||||
|
if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) {
|
||||||
|
expr.edit_distance = ext->edit_distance;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// These are validated in validateExt, so an error will already have been
|
// These are validated in validateExt, so an error will already have been
|
||||||
// thrown if these conditions don't hold.
|
// thrown if these conditions don't hold.
|
||||||
assert(max_offset >= min_offset);
|
assert(expr.max_offset >= expr.min_offset);
|
||||||
assert(max_offset >= min_length);
|
assert(expr.max_offset >= expr.min_length);
|
||||||
|
|
||||||
// Since prefiltering and SOM aren't supported together, we must squash any
|
// Since prefiltering and SOM aren't supported together, we must squash any
|
||||||
// min_length constraint as well.
|
// min_length constraint as well.
|
||||||
if (flags & HS_FLAG_PREFILTER && min_length) {
|
if (flags & HS_FLAG_PREFILTER && expr.min_length) {
|
||||||
DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n");
|
DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n");
|
||||||
min_length = 0;
|
expr.min_length = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -183,25 +178,25 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
|||||||
* \brief Dumps the parse tree to screen in debug mode and to disk in dump
|
* \brief Dumps the parse tree to screen in debug mode and to disk in dump
|
||||||
* mode.
|
* mode.
|
||||||
*/
|
*/
|
||||||
void dumpExpression(UNUSED const ParsedExpression &expr,
|
void dumpExpression(UNUSED const ParsedExpression &pe,
|
||||||
UNUSED const char *stage, UNUSED const Grey &grey) {
|
UNUSED const char *stage, UNUSED const Grey &grey) {
|
||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
DEBUG_PRINTF("===== Rule ID: %u (internalID: %u) =====\n", expr.id,
|
DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n",
|
||||||
expr.index);
|
pe.expr.report, pe.expr.index);
|
||||||
ostringstream debug_tree;
|
ostringstream debug_tree;
|
||||||
dumpTree(debug_tree, expr.component.get());
|
dumpTree(debug_tree, pe.component.get());
|
||||||
printf("%s\n", debug_tree.str().c_str());
|
printf("%s\n", debug_tree.str().c_str());
|
||||||
#endif // DEBUG
|
#endif // DEBUG
|
||||||
|
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
if (grey.dumpFlags & Grey::DUMP_PARSE) {
|
if (grey.dumpFlags & Grey::DUMP_PARSE) {
|
||||||
stringstream ss;
|
stringstream ss;
|
||||||
ss << grey.dumpPath << "Expr_" << expr.index << "_componenttree_"
|
ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_"
|
||||||
<< stage << ".txt";
|
<< stage << ".txt";
|
||||||
ofstream out(ss.str().c_str());
|
ofstream out(ss.str().c_str());
|
||||||
out << "Component Tree for " << expr.id << endl;
|
out << "Component Tree for " << pe.expr.report << endl;
|
||||||
dumpTree(out, expr.component.get());
|
dumpTree(out, pe.component.get());
|
||||||
if (expr.utf8) {
|
if (pe.expr.utf8) {
|
||||||
out << "UTF8 mode" << endl;
|
out << "UTF8 mode" << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -211,13 +206,13 @@ void dumpExpression(UNUSED const ParsedExpression &expr,
|
|||||||
|
|
||||||
/** \brief Run Component tree optimisations on \a expr. */
|
/** \brief Run Component tree optimisations on \a expr. */
|
||||||
static
|
static
|
||||||
void optimise(ParsedExpression &expr) {
|
void optimise(ParsedExpression &pe) {
|
||||||
if (expr.min_length || expr.som) {
|
if (pe.expr.min_length || pe.expr.som) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("optimising\n");
|
DEBUG_PRINTF("optimising\n");
|
||||||
expr.component->optimise(true /* root is connected to sds */);
|
pe.component->optimise(true /* root is connected to sds */);
|
||||||
}
|
}
|
||||||
|
|
||||||
void addExpression(NG &ng, unsigned index, const char *expression,
|
void addExpression(NG &ng, unsigned index, const char *expression,
|
||||||
@ -234,34 +229,34 @@ void addExpression(NG &ng, unsigned index, const char *expression,
|
|||||||
|
|
||||||
// Do per-expression processing: errors here will result in an exception
|
// Do per-expression processing: errors here will result in an exception
|
||||||
// being thrown up to our caller
|
// being thrown up to our caller
|
||||||
ParsedExpression expr(index, expression, flags, id, ext);
|
ParsedExpression pe(index, expression, flags, id, ext);
|
||||||
dumpExpression(expr, "orig", cc.grey);
|
dumpExpression(pe, "orig", cc.grey);
|
||||||
|
|
||||||
// Apply prefiltering transformations if desired.
|
// Apply prefiltering transformations if desired.
|
||||||
if (expr.prefilter) {
|
if (pe.expr.prefilter) {
|
||||||
prefilterTree(expr.component, ParseMode(flags));
|
prefilterTree(pe.component, ParseMode(flags));
|
||||||
dumpExpression(expr, "prefiltered", cc.grey);
|
dumpExpression(pe, "prefiltered", cc.grey);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Expressions containing zero-width assertions and other extended pcre
|
// Expressions containing zero-width assertions and other extended pcre
|
||||||
// types aren't supported yet. This call will throw a ParseError exception
|
// types aren't supported yet. This call will throw a ParseError exception
|
||||||
// if the component tree contains such a construct.
|
// if the component tree contains such a construct.
|
||||||
checkUnsupported(*expr.component);
|
checkUnsupported(*pe.component);
|
||||||
|
|
||||||
expr.component->checkEmbeddedStartAnchor(true);
|
pe.component->checkEmbeddedStartAnchor(true);
|
||||||
expr.component->checkEmbeddedEndAnchor(true);
|
pe.component->checkEmbeddedEndAnchor(true);
|
||||||
|
|
||||||
if (cc.grey.optimiseComponentTree) {
|
if (cc.grey.optimiseComponentTree) {
|
||||||
optimise(expr);
|
optimise(pe);
|
||||||
dumpExpression(expr, "opt", cc.grey);
|
dumpExpression(pe, "opt", cc.grey);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n",
|
DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n",
|
||||||
expr.component.get(), expr.index, expr.id);
|
pe.component.get(), pe.expr.index, pe.expr.report);
|
||||||
|
|
||||||
// You can only use the SOM flags if you've also specified an SOM
|
// You can only use the SOM flags if you've also specified an SOM
|
||||||
// precision mode.
|
// precision mode.
|
||||||
if (expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
|
if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
|
||||||
throw CompileError("To use a SOM expression flag in streaming mode, "
|
throw CompileError("To use a SOM expression flag in streaming mode, "
|
||||||
"an SOM precision mode (e.g. "
|
"an SOM precision mode (e.g. "
|
||||||
"HS_MODE_SOM_HORIZON_LARGE) must be specified.");
|
"HS_MODE_SOM_HORIZON_LARGE) must be specified.");
|
||||||
@ -269,32 +264,31 @@ void addExpression(NG &ng, unsigned index, const char *expression,
|
|||||||
|
|
||||||
// If this expression is a literal, we can feed it directly to Rose rather
|
// If this expression is a literal, we can feed it directly to Rose rather
|
||||||
// than building the NFA graph.
|
// than building the NFA graph.
|
||||||
if (shortcutLiteral(ng, expr)) {
|
if (shortcutLiteral(ng, pe)) {
|
||||||
DEBUG_PRINTF("took literal short cut\n");
|
DEBUG_PRINTF("took literal short cut\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
unique_ptr<NGWrapper> g = buildWrapper(ng.rm, cc, expr);
|
auto built_expr = buildGraph(ng.rm, cc, pe);
|
||||||
|
if (!built_expr.g) {
|
||||||
if (!g) {
|
|
||||||
DEBUG_PRINTF("NFA build failed on ID %u, but no exception was "
|
DEBUG_PRINTF("NFA build failed on ID %u, but no exception was "
|
||||||
"thrown.\n", expr.id);
|
"thrown.\n", pe.expr.report);
|
||||||
throw CompileError("Internal error.");
|
throw CompileError("Internal error.");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!expr.allow_vacuous && matches_everywhere(*g)) {
|
if (!pe.expr.allow_vacuous && matches_everywhere(*built_expr.g)) {
|
||||||
throw CompileError("Pattern matches empty buffer; use "
|
throw CompileError("Pattern matches empty buffer; use "
|
||||||
"HS_FLAG_ALLOWEMPTY to enable support.");
|
"HS_FLAG_ALLOWEMPTY to enable support.");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ng.addGraph(*g)) {
|
if (!ng.addGraph(built_expr.expr, std::move(built_expr.g))) {
|
||||||
DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", expr.id);
|
DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report);
|
||||||
throw CompileError("Error compiling expression.");
|
throw CompileError("Error compiling expression.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
aligned_unique_ptr<RoseEngine> generateRoseEngine(NG &ng) {
|
bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
|
||||||
const u32 minWidth =
|
const u32 minWidth =
|
||||||
ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF;
|
ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF;
|
||||||
auto rose = ng.rose->buildRose(minWidth);
|
auto rose = ng.rose->buildRose(minWidth);
|
||||||
@ -305,7 +299,6 @@ aligned_unique_ptr<RoseEngine> generateRoseEngine(NG &ng) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
dumpRose(*ng.rose, rose.get(), ng.cc.grey);
|
|
||||||
dumpReportManager(ng.rm, ng.cc.grey);
|
dumpReportManager(ng.rm, ng.cc.grey);
|
||||||
dumpSomSlotManager(ng.ssm, ng.cc.grey);
|
dumpSomSlotManager(ng.ssm, ng.cc.grey);
|
||||||
dumpSmallWrite(rose.get(), ng.cc.grey);
|
dumpSmallWrite(rose.get(), ng.cc.grey);
|
||||||
@ -320,6 +313,9 @@ platform_t target_to_platform(const target_t &target_info) {
|
|||||||
if (!target_info.has_avx2()) {
|
if (!target_info.has_avx2()) {
|
||||||
p |= HS_PLATFORM_NOAVX2;
|
p |= HS_PLATFORM_NOAVX2;
|
||||||
}
|
}
|
||||||
|
if (!target_info.has_avx512()) {
|
||||||
|
p |= HS_PLATFORM_NOAVX512;
|
||||||
|
}
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -369,7 +365,7 @@ struct hs_database *build(NG &ng, unsigned int *length) {
|
|||||||
if (!rose) {
|
if (!rose) {
|
||||||
throw CompileError("Unable to generate bytecode.");
|
throw CompileError("Unable to generate bytecode.");
|
||||||
}
|
}
|
||||||
*length = roseSize(rose.get());
|
*length = rose.size();
|
||||||
if (!*length) {
|
if (!*length) {
|
||||||
DEBUG_PRINTF("RoseEngine has zero length\n");
|
DEBUG_PRINTF("RoseEngine has zero length\n");
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -450,41 +446,42 @@ bool isSupported(const Component &c) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
unique_ptr<NGWrapper> buildWrapper(ReportManager &rm, const CompileContext &cc,
|
BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
|
||||||
const ParsedExpression &expr) {
|
const ParsedExpression &pe) {
|
||||||
assert(isSupported(*expr.component));
|
assert(isSupported(*pe.component));
|
||||||
|
|
||||||
const unique_ptr<NFABuilder> builder = makeNFABuilder(rm, cc, expr);
|
const auto builder = makeNFABuilder(rm, cc, pe);
|
||||||
assert(builder);
|
assert(builder);
|
||||||
|
|
||||||
// Set up START and ACCEPT states; retrieve the special states
|
// Set up START and ACCEPT states; retrieve the special states
|
||||||
const auto bs = makeGlushkovBuildState(*builder, expr.prefilter);
|
const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter);
|
||||||
|
|
||||||
// Map position IDs to characters/components
|
// Map position IDs to characters/components
|
||||||
expr.component->notePositions(*bs);
|
pe.component->notePositions(*bs);
|
||||||
|
|
||||||
// Wire the start dotstar state to the firsts
|
// Wire the start dotstar state to the firsts
|
||||||
connectInitialStates(*bs, expr);
|
connectInitialStates(*bs, pe);
|
||||||
|
|
||||||
DEBUG_PRINTF("wire up body of expr\n");
|
DEBUG_PRINTF("wire up body of expr\n");
|
||||||
// Build the rest of the FOLLOW set
|
// Build the rest of the FOLLOW set
|
||||||
vector<PositionInfo> initials = {builder->getStartDotStar(),
|
vector<PositionInfo> initials = {builder->getStartDotStar(),
|
||||||
builder->getStart()};
|
builder->getStart()};
|
||||||
expr.component->buildFollowSet(*bs, initials);
|
pe.component->buildFollowSet(*bs, initials);
|
||||||
|
|
||||||
// Wire the lasts to the accept state
|
// Wire the lasts to the accept state
|
||||||
connectFinalStates(*bs, expr);
|
connectFinalStates(*bs, pe);
|
||||||
|
|
||||||
// Create our edges
|
// Create our edges
|
||||||
bs->buildEdges();
|
bs->buildEdges();
|
||||||
|
|
||||||
auto g = builder->getGraph();
|
BuiltExpression built_expr = builder->getGraph();
|
||||||
assert(g);
|
assert(built_expr.g);
|
||||||
|
|
||||||
dumpDotWrapper(*g, "00_before_asserts", cc.grey);
|
dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts",
|
||||||
removeAssertVertices(rm, *g);
|
cc.grey);
|
||||||
|
removeAssertVertices(rm, *built_expr.g, built_expr.expr);
|
||||||
|
|
||||||
return g;
|
return built_expr;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -35,11 +35,11 @@
|
|||||||
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "database.h"
|
#include "database.h"
|
||||||
|
#include "compiler/expression_info.h"
|
||||||
#include "parser/Component.h"
|
#include "parser/Component.h"
|
||||||
#include "som/som.h"
|
#include "util/noncopyable.h"
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <boost/core/noncopyable.hpp>
|
|
||||||
|
|
||||||
struct hs_database;
|
struct hs_database;
|
||||||
struct hs_expr_ext;
|
struct hs_expr_ext;
|
||||||
@ -50,34 +50,32 @@ struct CompileContext;
|
|||||||
struct Grey;
|
struct Grey;
|
||||||
struct target_t;
|
struct target_t;
|
||||||
class NG;
|
class NG;
|
||||||
|
class NGHolder;
|
||||||
class ReportManager;
|
class ReportManager;
|
||||||
class NGWrapper;
|
|
||||||
|
|
||||||
/** Class gathering together the pieces of a parsed expression.
|
/** \brief Class gathering together the pieces of a parsed expression. */
|
||||||
* Note: Owns the provided component.
|
class ParsedExpression : noncopyable {
|
||||||
*/
|
|
||||||
class ParsedExpression : boost::noncopyable {
|
|
||||||
public:
|
public:
|
||||||
ParsedExpression(unsigned index, const char *expression, unsigned flags,
|
ParsedExpression(unsigned index, const char *expression, unsigned flags,
|
||||||
ReportID actionId, const hs_expr_ext *ext = nullptr);
|
ReportID report, const hs_expr_ext *ext = nullptr);
|
||||||
|
|
||||||
bool utf8; //!< UTF-8 mode flag specified
|
/** \brief Expression information (from flags, extparam etc) */
|
||||||
|
ExpressionInfo expr;
|
||||||
|
|
||||||
/** \brief root node of parsed component tree. */
|
/** \brief Root node of parsed component tree. */
|
||||||
std::unique_ptr<ue2::Component> component;
|
std::unique_ptr<Component> component;
|
||||||
|
};
|
||||||
|
|
||||||
const bool allow_vacuous; //!< HS_FLAG_ALLOWEMPTY specified
|
/**
|
||||||
const bool highlander; //!< HS_FLAG_SINGLEMATCH specified
|
* \brief Class gathering together the pieces of an expression that has been
|
||||||
const bool prefilter; //!< HS_FLAG_PREFILTER specified
|
* built into an NFA graph.
|
||||||
som_type som; //!< chosen SOM mode, or SOM_NONE
|
*/
|
||||||
|
struct BuiltExpression {
|
||||||
|
/** \brief Expression information (from flags, extparam etc) */
|
||||||
|
ExpressionInfo expr;
|
||||||
|
|
||||||
/** \brief index in expressions array passed to \ref hs_compile_multi */
|
/** \brief Built Glushkov NFA graph. */
|
||||||
const unsigned index;
|
std::unique_ptr<NGHolder> g;
|
||||||
|
|
||||||
const ReportID id; //!< user-specified pattern ID
|
|
||||||
u64a min_offset; //!< 0 if not used
|
|
||||||
u64a max_offset; //!< MAX_OFFSET if not used
|
|
||||||
u64a min_length; //!< 0 if not used
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -94,12 +92,12 @@ public:
|
|||||||
* @param ext
|
* @param ext
|
||||||
* Struct containing extra parameters for this expression, or NULL if
|
* Struct containing extra parameters for this expression, or NULL if
|
||||||
* none.
|
* none.
|
||||||
* @param actionId
|
* @param report
|
||||||
* The identifier to associate with the expression; returned by engine on
|
* The identifier to associate with the expression; returned by engine on
|
||||||
* match.
|
* match.
|
||||||
*/
|
*/
|
||||||
void addExpression(NG &ng, unsigned index, const char *expression,
|
void addExpression(NG &ng, unsigned index, const char *expression,
|
||||||
unsigned flags, const hs_expr_ext *ext, ReportID actionId);
|
unsigned flags, const hs_expr_ext *ext, ReportID report);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build a Hyperscan database out of the expressions we've been given. A
|
* Build a Hyperscan database out of the expressions we've been given. A
|
||||||
@ -127,8 +125,7 @@ struct hs_database *build(NG &ng, unsigned int *length);
|
|||||||
* @return
|
* @return
|
||||||
* nullptr on error.
|
* nullptr on error.
|
||||||
*/
|
*/
|
||||||
std::unique_ptr<NGWrapper> buildWrapper(ReportManager &rm,
|
BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
|
||||||
const CompileContext &cc,
|
|
||||||
const ParsedExpression &expr);
|
const ParsedExpression &expr);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
102
src/compiler/expression_info.h
Normal file
102
src/compiler/expression_info.h
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \file
|
||||||
|
* \brief ExpressionInfo class for storing the properties of an expression.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef COMPILER_EXPRESSION_INFO_H
|
||||||
|
#define COMPILER_EXPRESSION_INFO_H
|
||||||
|
|
||||||
|
#include "ue2common.h"
|
||||||
|
#include "som/som.h"
|
||||||
|
|
||||||
|
namespace ue2 {
|
||||||
|
|
||||||
|
/** \brief Properties of an expression. */
|
||||||
|
class ExpressionInfo {
|
||||||
|
public:
|
||||||
|
ExpressionInfo(unsigned int index_in, bool allow_vacuous_in,
|
||||||
|
bool highlander_in, bool utf8_in, bool prefilter_in,
|
||||||
|
som_type som_in, ReportID report_in, u64a min_offset_in,
|
||||||
|
u64a max_offset_in, u64a min_length_in, u32 edit_distance_in)
|
||||||
|
: index(index_in), report(report_in), allow_vacuous(allow_vacuous_in),
|
||||||
|
highlander(highlander_in), utf8(utf8_in), prefilter(prefilter_in),
|
||||||
|
som(som_in), min_offset(min_offset_in), max_offset(max_offset_in),
|
||||||
|
min_length(min_length_in), edit_distance(edit_distance_in) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Index of the expression represented by this graph.
|
||||||
|
*
|
||||||
|
* Used:
|
||||||
|
* - down the track in error handling;
|
||||||
|
* - for identifying parts of an expression in highlander mode.
|
||||||
|
*/
|
||||||
|
unsigned int index;
|
||||||
|
|
||||||
|
/** \brief Report ID specified by the user. */
|
||||||
|
ReportID report;
|
||||||
|
|
||||||
|
/** \brief Vacuous pattern is allowed. (HS_FLAG_ALLOWEMPTY) */
|
||||||
|
bool allow_vacuous;
|
||||||
|
|
||||||
|
/** \brief "Highlander" (single match) pattern. (HS_FLAG_SINGLEMATCH) */
|
||||||
|
bool highlander;
|
||||||
|
|
||||||
|
/** \brief UTF-8 pattern. (HS_FLAG_UTF8) */
|
||||||
|
bool utf8;
|
||||||
|
|
||||||
|
/** \brief Prefiltering pattern. (HS_FLAG_PREFILTER) */
|
||||||
|
bool prefilter;
|
||||||
|
|
||||||
|
/** \brief Start-of-match type requested, or SOM_NONE. */
|
||||||
|
som_type som;
|
||||||
|
|
||||||
|
/** \brief Minimum match offset extended parameter. 0 if not used. */
|
||||||
|
u64a min_offset;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Maximum match offset extended parameter.
|
||||||
|
* MAX_OFFSET if not used.
|
||||||
|
*/
|
||||||
|
u64a max_offset;
|
||||||
|
|
||||||
|
/** \brief Minimum match length extended parameter. 0 if not used. */
|
||||||
|
u64a min_length;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Approximate matching edit distance extended parameter.
|
||||||
|
* 0 if not used.
|
||||||
|
*/
|
||||||
|
u32 edit_distance;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // COMPILER_EXPRESSION_INFO_H
|
16
src/crc32.c
16
src/crc32.c
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -29,14 +29,10 @@
|
|||||||
#include "crc32.h"
|
#include "crc32.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
#include "util/arch.h"
|
||||||
|
#include "util/intrinsics.h"
|
||||||
|
|
||||||
#if defined(HAVE_C_X86INTRIN_H)
|
#if !defined(HAVE_SSE42)
|
||||||
#include <x86intrin.h>
|
|
||||||
#elif defined(HAVE_C_INTRIN_H)
|
|
||||||
#include <intrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __SSE4_2__
|
|
||||||
|
|
||||||
/***
|
/***
|
||||||
*** What follows is derived from Intel's Slicing-by-8 CRC32 impl, which is BSD
|
*** What follows is derived from Intel's Slicing-by-8 CRC32 impl, which is BSD
|
||||||
@ -582,7 +578,7 @@ u32 crc32c_sb8_64_bit(u32 running_crc, const unsigned char* p_buf,
|
|||||||
return crc;
|
return crc;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else // __SSE4_2__
|
#else // HAVE_SSE42
|
||||||
|
|
||||||
#ifdef ARCH_64_BIT
|
#ifdef ARCH_64_BIT
|
||||||
#define CRC_WORD 8
|
#define CRC_WORD 8
|
||||||
@ -638,7 +634,7 @@ u32 crc32c_sse42(u32 running_crc, const unsigned char* p_buf,
|
|||||||
|
|
||||||
// Externally visible function
|
// Externally visible function
|
||||||
u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen) {
|
u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen) {
|
||||||
#ifdef __SSE4_2__
|
#if defined(HAVE_SSE42)
|
||||||
u32 crc = crc32c_sse42(inCrc32, (const unsigned char *)buf, bufLen);
|
u32 crc = crc32c_sse42(inCrc32, (const unsigned char *)buf, bufLen);
|
||||||
#else
|
#else
|
||||||
u32 crc = crc32c_sb8_64_bit(inCrc32, (const unsigned char *)buf, bufLen);
|
u32 crc = crc32c_sb8_64_bit(inCrc32, (const unsigned char *)buf, bufLen);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -49,7 +49,7 @@ int db_correctly_aligned(const void *db) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_free_database(hs_database_t *db) {
|
hs_error_t HS_CDECL hs_free_database(hs_database_t *db) {
|
||||||
if (db && db->magic != HS_DB_MAGIC) {
|
if (db && db->magic != HS_DB_MAGIC) {
|
||||||
return HS_INVALID;
|
return HS_INVALID;
|
||||||
}
|
}
|
||||||
@ -59,7 +59,7 @@ hs_error_t hs_free_database(hs_database_t *db) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes,
|
hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes,
|
||||||
size_t *serialized_length) {
|
size_t *serialized_length) {
|
||||||
if (!db || !bytes || !serialized_length) {
|
if (!db || !bytes || !serialized_length) {
|
||||||
return HS_INVALID;
|
return HS_INVALID;
|
||||||
@ -114,7 +114,8 @@ hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes,
|
|||||||
static
|
static
|
||||||
hs_error_t db_check_platform(const u64a p) {
|
hs_error_t db_check_platform(const u64a p) {
|
||||||
if (p != hs_current_platform
|
if (p != hs_current_platform
|
||||||
&& p != hs_current_platform_no_avx2) {
|
&& p != hs_current_platform_no_avx2
|
||||||
|
&& p != hs_current_platform_no_avx512) {
|
||||||
return HS_DB_PLATFORM_ERROR;
|
return HS_DB_PLATFORM_ERROR;
|
||||||
}
|
}
|
||||||
// passed all checks
|
// passed all checks
|
||||||
@ -195,7 +196,8 @@ void db_copy_bytecode(const char *serialized, hs_database_t *db) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length,
|
hs_error_t HS_CDECL hs_deserialize_database_at(const char *bytes,
|
||||||
|
const size_t length,
|
||||||
hs_database_t *db) {
|
hs_database_t *db) {
|
||||||
if (!bytes || !db) {
|
if (!bytes || !db) {
|
||||||
return HS_INVALID;
|
return HS_INVALID;
|
||||||
@ -237,7 +239,8 @@ hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length,
|
|||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_deserialize_database(const char *bytes, const size_t length,
|
hs_error_t HS_CDECL hs_deserialize_database(const char *bytes,
|
||||||
|
const size_t length,
|
||||||
hs_database_t **db) {
|
hs_database_t **db) {
|
||||||
if (!bytes || !db) {
|
if (!bytes || !db) {
|
||||||
return HS_INVALID;
|
return HS_INVALID;
|
||||||
@ -286,7 +289,7 @@ hs_error_t hs_deserialize_database(const char *bytes, const size_t length,
|
|||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_database_size(const hs_database_t *db, size_t *size) {
|
hs_error_t HS_CDECL hs_database_size(const hs_database_t *db, size_t *size) {
|
||||||
if (!size) {
|
if (!size) {
|
||||||
return HS_INVALID;
|
return HS_INVALID;
|
||||||
}
|
}
|
||||||
@ -301,7 +304,8 @@ hs_error_t hs_database_size(const hs_database_t *db, size_t *size) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_serialized_database_size(const char *bytes, const size_t length,
|
hs_error_t HS_CDECL hs_serialized_database_size(const char *bytes,
|
||||||
|
const size_t length,
|
||||||
size_t *size) {
|
size_t *size) {
|
||||||
// Decode and check the header
|
// Decode and check the header
|
||||||
hs_database_t header;
|
hs_database_t header;
|
||||||
@ -366,7 +370,9 @@ hs_error_t print_database_string(char **s, u32 version, const platform_t plat,
|
|||||||
u8 minor = (version >> 16) & 0xff;
|
u8 minor = (version >> 16) & 0xff;
|
||||||
u8 major = (version >> 24) & 0xff;
|
u8 major = (version >> 24) & 0xff;
|
||||||
|
|
||||||
const char *avx2 = (plat & HS_PLATFORM_NOAVX2) ? "NOAVX2" : " AVX2";
|
const char *features = (plat & HS_PLATFORM_NOAVX512)
|
||||||
|
? (plat & HS_PLATFORM_NOAVX2) ? "" : "AVX2"
|
||||||
|
: "AVX512";
|
||||||
|
|
||||||
const char *mode = NULL;
|
const char *mode = NULL;
|
||||||
|
|
||||||
@ -395,7 +401,7 @@ hs_error_t print_database_string(char **s, u32 version, const platform_t plat,
|
|||||||
// that don't have snprintf but have a workalike.
|
// that don't have snprintf but have a workalike.
|
||||||
int p_len = SNPRINTF_COMPAT(
|
int p_len = SNPRINTF_COMPAT(
|
||||||
buf, len, "Version: %u.%u.%u Features: %s Mode: %s",
|
buf, len, "Version: %u.%u.%u Features: %s Mode: %s",
|
||||||
major, minor, release, avx2, mode);
|
major, minor, release, features, mode);
|
||||||
if (p_len < 0) {
|
if (p_len < 0) {
|
||||||
DEBUG_PRINTF("snprintf output error, returned %d\n", p_len);
|
DEBUG_PRINTF("snprintf output error, returned %d\n", p_len);
|
||||||
hs_misc_free(buf);
|
hs_misc_free(buf);
|
||||||
@ -414,8 +420,8 @@ hs_error_t print_database_string(char **s, u32 version, const platform_t plat,
|
|||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
|
hs_error_t HS_CDECL hs_serialized_database_info(const char *bytes,
|
||||||
char **info) {
|
size_t length, char **info) {
|
||||||
if (!info) {
|
if (!info) {
|
||||||
return HS_INVALID;
|
return HS_INVALID;
|
||||||
}
|
}
|
||||||
@ -434,7 +440,7 @@ hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
|
|||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_database_info(const hs_database_t *db, char **info) {
|
hs_error_t HS_CDECL hs_database_info(const hs_database_t *db, char **info) {
|
||||||
if (!info) {
|
if (!info) {
|
||||||
return HS_INVALID;
|
return HS_INVALID;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -41,6 +41,7 @@ extern "C"
|
|||||||
#include "hs_compile.h" // for HS_MODE_ flags
|
#include "hs_compile.h" // for HS_MODE_ flags
|
||||||
#include "hs_version.h"
|
#include "hs_version.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
#include "util/arch.h"
|
||||||
|
|
||||||
#define HS_DB_VERSION HS_VERSION_32BIT
|
#define HS_DB_VERSION HS_VERSION_32BIT
|
||||||
#define HS_DB_MAGIC (0xdbdbdbdbU)
|
#define HS_DB_MAGIC (0xdbdbdbdbU)
|
||||||
@ -53,14 +54,18 @@ extern "C"
|
|||||||
#define HS_PLATFORM_CPU_MASK 0x3F
|
#define HS_PLATFORM_CPU_MASK 0x3F
|
||||||
|
|
||||||
#define HS_PLATFORM_NOAVX2 (4<<13)
|
#define HS_PLATFORM_NOAVX2 (4<<13)
|
||||||
|
#define HS_PLATFORM_NOAVX512 (8<<13)
|
||||||
|
|
||||||
/** \brief Platform features bitmask. */
|
/** \brief Platform features bitmask. */
|
||||||
typedef u64a platform_t;
|
typedef u64a platform_t;
|
||||||
|
|
||||||
static UNUSED
|
static UNUSED
|
||||||
const platform_t hs_current_platform = {
|
const platform_t hs_current_platform = {
|
||||||
#if !defined(__AVX2__)
|
#if !defined(HAVE_AVX2)
|
||||||
HS_PLATFORM_NOAVX2 |
|
HS_PLATFORM_NOAVX2 |
|
||||||
|
#endif
|
||||||
|
#if !defined(HAVE_AVX512)
|
||||||
|
HS_PLATFORM_NOAVX512 |
|
||||||
#endif
|
#endif
|
||||||
0,
|
0,
|
||||||
};
|
};
|
||||||
@ -68,6 +73,13 @@ const platform_t hs_current_platform = {
|
|||||||
static UNUSED
|
static UNUSED
|
||||||
const platform_t hs_current_platform_no_avx2 = {
|
const platform_t hs_current_platform_no_avx2 = {
|
||||||
HS_PLATFORM_NOAVX2 |
|
HS_PLATFORM_NOAVX2 |
|
||||||
|
HS_PLATFORM_NOAVX512 |
|
||||||
|
0,
|
||||||
|
};
|
||||||
|
|
||||||
|
static UNUSED
|
||||||
|
const platform_t hs_current_platform_no_avx512 = {
|
||||||
|
HS_PLATFORM_NOAVX512 |
|
||||||
0,
|
0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -33,8 +33,14 @@
|
|||||||
#include "util/cpuid_flags.h"
|
#include "util/cpuid_flags.h"
|
||||||
#include "util/join.h"
|
#include "util/join.h"
|
||||||
|
|
||||||
|
#if defined(DISABLE_AVX512_DISPATCH)
|
||||||
|
#define avx512_ disabled_
|
||||||
|
#define check_avx512() (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define CREATE_DISPATCH(RTYPE, NAME, ...) \
|
#define CREATE_DISPATCH(RTYPE, NAME, ...) \
|
||||||
/* create defns */ \
|
/* create defns */ \
|
||||||
|
RTYPE JOIN(avx512_, NAME)(__VA_ARGS__); \
|
||||||
RTYPE JOIN(avx2_, NAME)(__VA_ARGS__); \
|
RTYPE JOIN(avx2_, NAME)(__VA_ARGS__); \
|
||||||
RTYPE JOIN(corei7_, NAME)(__VA_ARGS__); \
|
RTYPE JOIN(corei7_, NAME)(__VA_ARGS__); \
|
||||||
RTYPE JOIN(core2_, NAME)(__VA_ARGS__); \
|
RTYPE JOIN(core2_, NAME)(__VA_ARGS__); \
|
||||||
@ -46,6 +52,9 @@
|
|||||||
\
|
\
|
||||||
/* resolver */ \
|
/* resolver */ \
|
||||||
static void(*JOIN(resolve_, NAME)(void)) { \
|
static void(*JOIN(resolve_, NAME)(void)) { \
|
||||||
|
if (check_avx512()) { \
|
||||||
|
return JOIN(avx512_, NAME); \
|
||||||
|
} \
|
||||||
if (check_avx2()) { \
|
if (check_avx2()) { \
|
||||||
return JOIN(avx2_, NAME); \
|
return JOIN(avx2_, NAME); \
|
||||||
} \
|
} \
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -38,29 +38,19 @@ class EngineDescription {
|
|||||||
u32 id;
|
u32 id;
|
||||||
target_t code_target; // the target that we built this code for
|
target_t code_target; // the target that we built this code for
|
||||||
u32 numBuckets;
|
u32 numBuckets;
|
||||||
u32 confirmPullBackDistance;
|
|
||||||
u32 confirmTopLevelSplit;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
EngineDescription(u32 id_in, const target_t &code_target_in,
|
EngineDescription(u32 id_in, const target_t &code_target_in,
|
||||||
u32 numBuckets_in, u32 confirmPullBackDistance_in,
|
u32 numBuckets_in)
|
||||||
u32 confirmTopLevelSplit_in)
|
: id(id_in), code_target(code_target_in), numBuckets(numBuckets_in) {}
|
||||||
: id(id_in), code_target(code_target_in), numBuckets(numBuckets_in),
|
|
||||||
confirmPullBackDistance(confirmPullBackDistance_in),
|
|
||||||
confirmTopLevelSplit(confirmTopLevelSplit_in) {}
|
|
||||||
|
|
||||||
virtual ~EngineDescription();
|
virtual ~EngineDescription();
|
||||||
|
|
||||||
u32 getID() const { return id; }
|
u32 getID() const { return id; }
|
||||||
u32 getNumBuckets() const { return numBuckets; }
|
u32 getNumBuckets() const { return numBuckets; }
|
||||||
u32 getConfirmPullBackDistance() const { return confirmPullBackDistance; }
|
|
||||||
u32 getConfirmTopLevelSplit() const { return confirmTopLevelSplit; }
|
|
||||||
void setConfirmTopLevelSplit(u32 split) { confirmTopLevelSplit = split; }
|
|
||||||
|
|
||||||
bool isValidOnTarget(const target_t &target_in) const;
|
bool isValidOnTarget(const target_t &target_in) const;
|
||||||
virtual u32 getDefaultFloodSuffixLength() const = 0;
|
virtual u32 getDefaultFloodSuffixLength() const = 0;
|
||||||
|
|
||||||
virtual bool typicallyHoldsOneCharLits() const { return true; }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Returns a target given a CPU feature set value. */
|
/** Returns a target given a CPU feature set value. */
|
||||||
|
315
src/fdr/fdr.c
315
src/fdr/fdr.c
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -34,7 +34,9 @@
|
|||||||
#include "flood_runtime.h"
|
#include "flood_runtime.h"
|
||||||
#include "teddy.h"
|
#include "teddy.h"
|
||||||
#include "teddy_internal.h"
|
#include "teddy_internal.h"
|
||||||
|
#include "util/arch.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
|
#include "util/uniform_ops.h"
|
||||||
|
|
||||||
/** \brief number of bytes processed in each iteration */
|
/** \brief number of bytes processed in each iteration */
|
||||||
#define ITER_BYTES 16
|
#define ITER_BYTES 16
|
||||||
@ -51,7 +53,7 @@
|
|||||||
*
|
*
|
||||||
* The incoming buffer is to split in multiple zones to ensure two properties:
|
* The incoming buffer is to split in multiple zones to ensure two properties:
|
||||||
* 1: that we can read 8? bytes behind to generate a hash safely
|
* 1: that we can read 8? bytes behind to generate a hash safely
|
||||||
* 2: that we can read the byte after the current byte (domain > 8)
|
* 2: that we can read the 3 byte after the current byte (domain > 8)
|
||||||
*/
|
*/
|
||||||
struct zone {
|
struct zone {
|
||||||
/** \brief copied buffer, used only when it is a boundary zone. */
|
/** \brief copied buffer, used only when it is a boundary zone. */
|
||||||
@ -116,20 +118,34 @@ const ALIGN_CL_DIRECTIVE u8 zone_or_mask[ITER_BYTES+1][ITER_BYTES] = {
|
|||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* compilers don't reliably synthesize the 32-bit ANDN instruction here,
|
||||||
|
* so we force its generation.
|
||||||
|
*/
|
||||||
|
static really_inline
|
||||||
|
u64a andn(const u32 a, const u8 *b) {
|
||||||
|
u64a r;
|
||||||
|
#if defined(HAVE_BMI) && !defined(NO_ASM)
|
||||||
|
__asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b));
|
||||||
|
#else
|
||||||
|
r = unaligned_load_u32(b) & ~a;
|
||||||
|
#endif
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
/* generates an initial state mask based on the last byte-ish of history rather
|
/* generates an initial state mask based on the last byte-ish of history rather
|
||||||
* than being all accepting. If there is no history to consider, the state is
|
* than being all accepting. If there is no history to consider, the state is
|
||||||
* generated based on the minimum length of each bucket in order to prevent
|
* generated based on the minimum length of each bucket in order to prevent
|
||||||
* confirms.
|
* confirms.
|
||||||
*/
|
*/
|
||||||
static really_inline
|
static really_inline
|
||||||
m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft,
|
m128 getInitState(const struct FDR *fdr, u8 len_history, const u64a *ft,
|
||||||
const struct zone *z) {
|
const struct zone *z) {
|
||||||
m128 s;
|
m128 s;
|
||||||
if (len_history) {
|
if (len_history) {
|
||||||
/* +1: the zones ensure that we can read the byte at z->end */
|
/* +1: the zones ensure that we can read the byte at z->end */
|
||||||
u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1);
|
u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1);
|
||||||
tmp &= fdr->domainMask;
|
tmp &= fdr->domainMask;
|
||||||
s = *((const m128 *)ft + tmp);
|
s = load_m128_from_u64a(ft + tmp);
|
||||||
s = rshiftbyte_m128(s, 1);
|
s = rshiftbyte_m128(s, 1);
|
||||||
} else {
|
} else {
|
||||||
s = fdr->start;
|
s = fdr->start;
|
||||||
@ -138,51 +154,30 @@ m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
void get_conf_stride_1(const u8 *itPtr, UNUSED const u8 *start_ptr,
|
||||||
u64a domain_mask_adjusted, const u8 *ft, u64a *conf0,
|
UNUSED const u8 *end_ptr, u32 domain_mask_flipped,
|
||||||
u64a *conf8, m128 *s) {
|
const u64a *ft, u64a *conf0, u64a *conf8, m128 *s) {
|
||||||
/* +1: the zones ensure that we can read the byte at z->end */
|
/* +1: the zones ensure that we can read the byte at z->end */
|
||||||
|
assert(itPtr >= start_ptr && itPtr + ITER_BYTES <= end_ptr);
|
||||||
|
u64a reach0 = andn(domain_mask_flipped, itPtr);
|
||||||
|
u64a reach1 = andn(domain_mask_flipped, itPtr + 1);
|
||||||
|
u64a reach2 = andn(domain_mask_flipped, itPtr + 2);
|
||||||
|
u64a reach3 = andn(domain_mask_flipped, itPtr + 3);
|
||||||
|
|
||||||
u64a current_data_0;
|
m128 st0 = load_m128_from_u64a(ft + reach0);
|
||||||
u64a current_data_8;
|
m128 st1 = load_m128_from_u64a(ft + reach1);
|
||||||
|
m128 st2 = load_m128_from_u64a(ft + reach2);
|
||||||
|
m128 st3 = load_m128_from_u64a(ft + reach3);
|
||||||
|
|
||||||
current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr);
|
u64a reach4 = andn(domain_mask_flipped, itPtr + 4);
|
||||||
u64a v7 = (lv_u16(itPtr + 7, start_ptr, end_ptr + 1) << 1) &
|
u64a reach5 = andn(domain_mask_flipped, itPtr + 5);
|
||||||
domain_mask_adjusted;
|
u64a reach6 = andn(domain_mask_flipped, itPtr + 6);
|
||||||
u64a v0 = (current_data_0 << 1) & domain_mask_adjusted;
|
u64a reach7 = andn(domain_mask_flipped, itPtr + 7);
|
||||||
u64a v1 = (current_data_0 >> 7) & domain_mask_adjusted;
|
|
||||||
u64a v2 = (current_data_0 >> 15) & domain_mask_adjusted;
|
|
||||||
u64a v3 = (current_data_0 >> 23) & domain_mask_adjusted;
|
|
||||||
u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted;
|
|
||||||
u64a v5 = (current_data_0 >> 39) & domain_mask_adjusted;
|
|
||||||
u64a v6 = (current_data_0 >> 47) & domain_mask_adjusted;
|
|
||||||
current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr);
|
|
||||||
u64a v15 = (lv_u16(itPtr + 15, start_ptr, end_ptr + 1) << 1) &
|
|
||||||
domain_mask_adjusted;
|
|
||||||
u64a v8 = (current_data_8 << 1) & domain_mask_adjusted;
|
|
||||||
u64a v9 = (current_data_8 >> 7) & domain_mask_adjusted;
|
|
||||||
u64a v10 = (current_data_8 >> 15) & domain_mask_adjusted;
|
|
||||||
u64a v11 = (current_data_8 >> 23) & domain_mask_adjusted;
|
|
||||||
u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted;
|
|
||||||
u64a v13 = (current_data_8 >> 39) & domain_mask_adjusted;
|
|
||||||
u64a v14 = (current_data_8 >> 47) & domain_mask_adjusted;
|
|
||||||
|
|
||||||
m128 st0 = *(const m128 *)(ft + v0*8);
|
m128 st4 = load_m128_from_u64a(ft + reach4);
|
||||||
m128 st1 = *(const m128 *)(ft + v1*8);
|
m128 st5 = load_m128_from_u64a(ft + reach5);
|
||||||
m128 st2 = *(const m128 *)(ft + v2*8);
|
m128 st6 = load_m128_from_u64a(ft + reach6);
|
||||||
m128 st3 = *(const m128 *)(ft + v3*8);
|
m128 st7 = load_m128_from_u64a(ft + reach7);
|
||||||
m128 st4 = *(const m128 *)(ft + v4*8);
|
|
||||||
m128 st5 = *(const m128 *)(ft + v5*8);
|
|
||||||
m128 st6 = *(const m128 *)(ft + v6*8);
|
|
||||||
m128 st7 = *(const m128 *)(ft + v7*8);
|
|
||||||
m128 st8 = *(const m128 *)(ft + v8*8);
|
|
||||||
m128 st9 = *(const m128 *)(ft + v9*8);
|
|
||||||
m128 st10 = *(const m128 *)(ft + v10*8);
|
|
||||||
m128 st11 = *(const m128 *)(ft + v11*8);
|
|
||||||
m128 st12 = *(const m128 *)(ft + v12*8);
|
|
||||||
m128 st13 = *(const m128 *)(ft + v13*8);
|
|
||||||
m128 st14 = *(const m128 *)(ft + v14*8);
|
|
||||||
m128 st15 = *(const m128 *)(ft + v15*8);
|
|
||||||
|
|
||||||
st1 = lshiftbyte_m128(st1, 1);
|
st1 = lshiftbyte_m128(st1, 1);
|
||||||
st2 = lshiftbyte_m128(st2, 2);
|
st2 = lshiftbyte_m128(st2, 2);
|
||||||
@ -191,6 +186,40 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
|||||||
st5 = lshiftbyte_m128(st5, 5);
|
st5 = lshiftbyte_m128(st5, 5);
|
||||||
st6 = lshiftbyte_m128(st6, 6);
|
st6 = lshiftbyte_m128(st6, 6);
|
||||||
st7 = lshiftbyte_m128(st7, 7);
|
st7 = lshiftbyte_m128(st7, 7);
|
||||||
|
|
||||||
|
st0 = or128(st0, st1);
|
||||||
|
st2 = or128(st2, st3);
|
||||||
|
st4 = or128(st4, st5);
|
||||||
|
st6 = or128(st6, st7);
|
||||||
|
st0 = or128(st0, st2);
|
||||||
|
st4 = or128(st4, st6);
|
||||||
|
st0 = or128(st0, st4);
|
||||||
|
*s = or128(*s, st0);
|
||||||
|
|
||||||
|
*conf0 = movq(*s);
|
||||||
|
*s = rshiftbyte_m128(*s, 8);
|
||||||
|
*conf0 ^= ~0ULL;
|
||||||
|
|
||||||
|
u64a reach8 = andn(domain_mask_flipped, itPtr + 8);
|
||||||
|
u64a reach9 = andn(domain_mask_flipped, itPtr + 9);
|
||||||
|
u64a reach10 = andn(domain_mask_flipped, itPtr + 10);
|
||||||
|
u64a reach11 = andn(domain_mask_flipped, itPtr + 11);
|
||||||
|
|
||||||
|
m128 st8 = load_m128_from_u64a(ft + reach8);
|
||||||
|
m128 st9 = load_m128_from_u64a(ft + reach9);
|
||||||
|
m128 st10 = load_m128_from_u64a(ft + reach10);
|
||||||
|
m128 st11 = load_m128_from_u64a(ft + reach11);
|
||||||
|
|
||||||
|
u64a reach12 = andn(domain_mask_flipped, itPtr + 12);
|
||||||
|
u64a reach13 = andn(domain_mask_flipped, itPtr + 13);
|
||||||
|
u64a reach14 = andn(domain_mask_flipped, itPtr + 14);
|
||||||
|
u64a reach15 = andn(domain_mask_flipped, itPtr + 15);
|
||||||
|
|
||||||
|
m128 st12 = load_m128_from_u64a(ft + reach12);
|
||||||
|
m128 st13 = load_m128_from_u64a(ft + reach13);
|
||||||
|
m128 st14 = load_m128_from_u64a(ft + reach14);
|
||||||
|
m128 st15 = load_m128_from_u64a(ft + reach15);
|
||||||
|
|
||||||
st9 = lshiftbyte_m128(st9, 1);
|
st9 = lshiftbyte_m128(st9, 1);
|
||||||
st10 = lshiftbyte_m128(st10, 2);
|
st10 = lshiftbyte_m128(st10, 2);
|
||||||
st11 = lshiftbyte_m128(st11, 3);
|
st11 = lshiftbyte_m128(st11, 3);
|
||||||
@ -199,100 +228,86 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
|||||||
st14 = lshiftbyte_m128(st14, 6);
|
st14 = lshiftbyte_m128(st14, 6);
|
||||||
st15 = lshiftbyte_m128(st15, 7);
|
st15 = lshiftbyte_m128(st15, 7);
|
||||||
|
|
||||||
*s = or128(*s, st0);
|
st8 = or128(st8, st9);
|
||||||
*s = or128(*s, st1);
|
st10 = or128(st10, st11);
|
||||||
*s = or128(*s, st2);
|
st12 = or128(st12, st13);
|
||||||
*s = or128(*s, st3);
|
st14 = or128(st14, st15);
|
||||||
*s = or128(*s, st4);
|
st8 = or128(st8, st10);
|
||||||
*s = or128(*s, st5);
|
st12 = or128(st12, st14);
|
||||||
*s = or128(*s, st6);
|
st8 = or128(st8, st12);
|
||||||
*s = or128(*s, st7);
|
|
||||||
*conf0 = movq(*s);
|
|
||||||
*s = rshiftbyte_m128(*s, 8);
|
|
||||||
*conf0 ^= ~0ULL;
|
|
||||||
|
|
||||||
*s = or128(*s, st8);
|
*s = or128(*s, st8);
|
||||||
*s = or128(*s, st9);
|
|
||||||
*s = or128(*s, st10);
|
|
||||||
*s = or128(*s, st11);
|
|
||||||
*s = or128(*s, st12);
|
|
||||||
*s = or128(*s, st13);
|
|
||||||
*s = or128(*s, st14);
|
|
||||||
*s = or128(*s, st15);
|
|
||||||
*conf8 = movq(*s);
|
*conf8 = movq(*s);
|
||||||
*s = rshiftbyte_m128(*s, 8);
|
*s = rshiftbyte_m128(*s, 8);
|
||||||
*conf8 ^= ~0ULL;
|
*conf8 ^= ~0ULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
void get_conf_stride_2(const u8 *itPtr, UNUSED const u8 *start_ptr,
|
||||||
u64a domain_mask_adjusted, const u8 *ft, u64a *conf0,
|
UNUSED const u8 *end_ptr, u32 domain_mask_flipped,
|
||||||
u64a *conf8, m128 *s) {
|
const u64a *ft, u64a *conf0, u64a *conf8, m128 *s) {
|
||||||
u64a current_data_0;
|
assert(itPtr >= start_ptr && itPtr + ITER_BYTES <= end_ptr);
|
||||||
u64a current_data_8;
|
u64a reach0 = andn(domain_mask_flipped, itPtr);
|
||||||
|
u64a reach2 = andn(domain_mask_flipped, itPtr + 2);
|
||||||
|
u64a reach4 = andn(domain_mask_flipped, itPtr + 4);
|
||||||
|
u64a reach6 = andn(domain_mask_flipped, itPtr + 6);
|
||||||
|
|
||||||
current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr);
|
m128 st0 = load_m128_from_u64a(ft + reach0);
|
||||||
u64a v0 = (current_data_0 << 1) & domain_mask_adjusted;
|
m128 st2 = load_m128_from_u64a(ft + reach2);
|
||||||
u64a v2 = (current_data_0 >> 15) & domain_mask_adjusted;
|
m128 st4 = load_m128_from_u64a(ft + reach4);
|
||||||
u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted;
|
m128 st6 = load_m128_from_u64a(ft + reach6);
|
||||||
u64a v6 = (current_data_0 >> 47) & domain_mask_adjusted;
|
|
||||||
current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr);
|
|
||||||
u64a v8 = (current_data_8 << 1) & domain_mask_adjusted;
|
|
||||||
u64a v10 = (current_data_8 >> 15) & domain_mask_adjusted;
|
|
||||||
u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted;
|
|
||||||
u64a v14 = (current_data_8 >> 47) & domain_mask_adjusted;
|
|
||||||
|
|
||||||
m128 st0 = *(const m128 *)(ft + v0*8);
|
u64a reach8 = andn(domain_mask_flipped, itPtr + 8);
|
||||||
m128 st2 = *(const m128 *)(ft + v2*8);
|
u64a reach10 = andn(domain_mask_flipped, itPtr + 10);
|
||||||
m128 st4 = *(const m128 *)(ft + v4*8);
|
u64a reach12 = andn(domain_mask_flipped, itPtr + 12);
|
||||||
m128 st6 = *(const m128 *)(ft + v6*8);
|
u64a reach14 = andn(domain_mask_flipped, itPtr + 14);
|
||||||
m128 st8 = *(const m128 *)(ft + v8*8);
|
|
||||||
m128 st10 = *(const m128 *)(ft + v10*8);
|
m128 st8 = load_m128_from_u64a(ft + reach8);
|
||||||
m128 st12 = *(const m128 *)(ft + v12*8);
|
m128 st10 = load_m128_from_u64a(ft + reach10);
|
||||||
m128 st14 = *(const m128 *)(ft + v14*8);
|
m128 st12 = load_m128_from_u64a(ft + reach12);
|
||||||
|
m128 st14 = load_m128_from_u64a(ft + reach14);
|
||||||
|
|
||||||
st2 = lshiftbyte_m128(st2, 2);
|
st2 = lshiftbyte_m128(st2, 2);
|
||||||
st4 = lshiftbyte_m128(st4, 4);
|
st4 = lshiftbyte_m128(st4, 4);
|
||||||
st6 = lshiftbyte_m128(st6, 6);
|
st6 = lshiftbyte_m128(st6, 6);
|
||||||
st10 = lshiftbyte_m128(st10, 2);
|
|
||||||
st12 = lshiftbyte_m128(st12, 4);
|
|
||||||
st14 = lshiftbyte_m128(st14, 6);
|
|
||||||
|
|
||||||
*s = or128(*s, st0);
|
*s = or128(*s, st0);
|
||||||
*s = or128(*s, st2);
|
*s = or128(*s, st2);
|
||||||
*s = or128(*s, st4);
|
*s = or128(*s, st4);
|
||||||
*s = or128(*s, st6);
|
*s = or128(*s, st6);
|
||||||
|
|
||||||
*conf0 = movq(*s);
|
*conf0 = movq(*s);
|
||||||
*s = rshiftbyte_m128(*s, 8);
|
*s = rshiftbyte_m128(*s, 8);
|
||||||
*conf0 ^= ~0ULL;
|
*conf0 ^= ~0ULL;
|
||||||
|
|
||||||
|
st10 = lshiftbyte_m128(st10, 2);
|
||||||
|
st12 = lshiftbyte_m128(st12, 4);
|
||||||
|
st14 = lshiftbyte_m128(st14, 6);
|
||||||
|
|
||||||
*s = or128(*s, st8);
|
*s = or128(*s, st8);
|
||||||
*s = or128(*s, st10);
|
*s = or128(*s, st10);
|
||||||
*s = or128(*s, st12);
|
*s = or128(*s, st12);
|
||||||
*s = or128(*s, st14);
|
*s = or128(*s, st14);
|
||||||
|
|
||||||
*conf8 = movq(*s);
|
*conf8 = movq(*s);
|
||||||
*s = rshiftbyte_m128(*s, 8);
|
*s = rshiftbyte_m128(*s, 8);
|
||||||
*conf8 ^= ~0ULL;
|
*conf8 ^= ~0ULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
void get_conf_stride_4(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
void get_conf_stride_4(const u8 *itPtr, UNUSED const u8 *start_ptr,
|
||||||
u64a domain_mask_adjusted, const u8 *ft, u64a *conf0,
|
UNUSED const u8 *end_ptr, u32 domain_mask_flipped,
|
||||||
u64a *conf8, m128 *s) {
|
const u64a *ft, u64a *conf0, u64a *conf8, m128 *s) {
|
||||||
u64a current_data_0;
|
assert(itPtr >= start_ptr && itPtr + ITER_BYTES <= end_ptr);
|
||||||
u64a current_data_8;
|
u64a reach0 = andn(domain_mask_flipped, itPtr);
|
||||||
|
u64a reach4 = andn(domain_mask_flipped, itPtr + 4);
|
||||||
|
u64a reach8 = andn(domain_mask_flipped, itPtr + 8);
|
||||||
|
u64a reach12 = andn(domain_mask_flipped, itPtr + 12);
|
||||||
|
|
||||||
current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr);
|
m128 st0 = load_m128_from_u64a(ft + reach0);
|
||||||
u64a v0 = (current_data_0 << 1) & domain_mask_adjusted;
|
m128 st4 = load_m128_from_u64a(ft + reach4);
|
||||||
u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted;
|
m128 st8 = load_m128_from_u64a(ft + reach8);
|
||||||
current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr);
|
m128 st12 = load_m128_from_u64a(ft + reach12);
|
||||||
u64a v8 = (current_data_8 << 1) & domain_mask_adjusted;
|
|
||||||
u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted;
|
|
||||||
|
|
||||||
m128 st0 = *(const m128 *)(ft + v0*8);
|
|
||||||
m128 st4 = *(const m128 *)(ft + v4*8);
|
|
||||||
m128 st8 = *(const m128 *)(ft + v8*8);
|
|
||||||
m128 st12 = *(const m128 *)(ft + v12*8);
|
|
||||||
|
|
||||||
st4 = lshiftbyte_m128(st4, 4);
|
st4 = lshiftbyte_m128(st4, 4);
|
||||||
st12 = lshiftbyte_m128(st12, 4);
|
st12 = lshiftbyte_m128(st12, 4);
|
||||||
@ -315,7 +330,6 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
|
|||||||
const u32 *confBase, const struct FDR_Runtime_Args *a,
|
const u32 *confBase, const struct FDR_Runtime_Args *a,
|
||||||
const u8 *ptr, u32 *last_match_id, struct zone *z) {
|
const u8 *ptr, u32 *last_match_id, struct zone *z) {
|
||||||
const u8 bucket = 8;
|
const u8 bucket = 8;
|
||||||
const u8 pullback = 1;
|
|
||||||
|
|
||||||
if (likely(!*conf)) {
|
if (likely(!*conf)) {
|
||||||
return;
|
return;
|
||||||
@ -332,8 +346,7 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
|
|||||||
u32 bit = findAndClearLSB_64(conf);
|
u32 bit = findAndClearLSB_64(conf);
|
||||||
u32 byte = bit / bucket + offset;
|
u32 byte = bit / bucket + offset;
|
||||||
u32 bitRem = bit % bucket;
|
u32 bitRem = bit % bucket;
|
||||||
u32 confSplit = *(ptr + byte);
|
u32 idx = bitRem;
|
||||||
u32 idx = confSplit * bucket + bitRem;
|
|
||||||
u32 cf = confBase[idx];
|
u32 cf = confBase[idx];
|
||||||
if (!cf) {
|
if (!cf) {
|
||||||
continue;
|
continue;
|
||||||
@ -343,18 +356,8 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
|
|||||||
if (!(fdrc->groups & *control)) {
|
if (!(fdrc->groups & *control)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!fdrc->mult) {
|
u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a) + 1);
|
||||||
u32 id = fdrc->nBitsOrSoleID;
|
confWithBit(fdrc, a, ptr_main - a->buf + byte, control,
|
||||||
if ((*last_match_id == id) && (fdrc->flags & NoRepeat)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
*last_match_id = id;
|
|
||||||
*control = a->cb(ptr_main + byte - a->buf, ptr_main + byte - a->buf,
|
|
||||||
id, a->ctxt);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a));
|
|
||||||
confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, control,
|
|
||||||
last_match_id, confVal);
|
last_match_id, confVal);
|
||||||
} while (unlikely(!!*conf));
|
} while (unlikely(!!*conf));
|
||||||
}
|
}
|
||||||
@ -496,6 +499,7 @@ void createShortZone(const u8 *buf, const u8 *hend, const u8 *begin,
|
|||||||
|
|
||||||
/* copy the post-padding byte; this is required for domain > 8 due to
|
/* copy the post-padding byte; this is required for domain > 8 due to
|
||||||
* overhang */
|
* overhang */
|
||||||
|
assert(ZONE_SHORT_DATA_OFFSET + copy_len + 3 < 64);
|
||||||
*z_end = 0;
|
*z_end = 0;
|
||||||
|
|
||||||
z->end = z_end;
|
z->end = z_end;
|
||||||
@ -566,15 +570,19 @@ void createStartZone(const u8 *buf, const u8 *hend, const u8 *begin,
|
|||||||
storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128)));
|
storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128)));
|
||||||
|
|
||||||
z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end);
|
z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end);
|
||||||
|
|
||||||
|
assert(ZONE_START_BEGIN + copy_len + 3 < 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Create a zone for the end region.
|
* \brief Create a zone for the end region.
|
||||||
*
|
*
|
||||||
* This function requires that there is > ITER_BYTES of data in the buffer to
|
* This function requires that there is > ITER_BYTES of data in the buffer to
|
||||||
* scan. The end zone, however, is only responsible for a scanning the <=
|
* scan. The end zone is responsible for a scanning the <= ITER_BYTES rump of
|
||||||
* ITER_BYTES rump of data. The end zone is required to handle a full ITER_BYTES
|
* data and optional ITER_BYTES. The main zone cannot handle the last 3 bytes
|
||||||
* iteration as the main loop cannot handle the last byte of the buffer.
|
* of the buffer. The end zone is required to handle an optional full
|
||||||
|
* ITER_BYTES from main zone when there are less than 3 bytes to scan. The
|
||||||
|
* main zone size is reduced by ITER_BYTES in this case.
|
||||||
*
|
*
|
||||||
* This zone ensures that the byte at z->end can be read by filling it with a
|
* This zone ensures that the byte at z->end can be read by filling it with a
|
||||||
* padding character.
|
* padding character.
|
||||||
@ -592,31 +600,45 @@ void createEndZone(const u8 *buf, const u8 *begin, const u8 *end,
|
|||||||
|
|
||||||
ptrdiff_t z_len = end - begin;
|
ptrdiff_t z_len = end - begin;
|
||||||
assert(z_len > 0);
|
assert(z_len > 0);
|
||||||
assert(z_len <= ITER_BYTES);
|
size_t iter_bytes_second = 0;
|
||||||
|
size_t z_len_first = z_len;
|
||||||
|
if (z_len > ITER_BYTES) {
|
||||||
|
z_len_first = z_len - ITER_BYTES;
|
||||||
|
iter_bytes_second = ITER_BYTES;
|
||||||
|
}
|
||||||
|
z->shift = ITER_BYTES - z_len_first;
|
||||||
|
|
||||||
z->shift = ITER_BYTES - z_len;
|
const u8 *end_first = end - iter_bytes_second;
|
||||||
|
/* The amount of data we have to copy from main buffer for the
|
||||||
/* The amount of data we have to copy from main buffer. */
|
* first iteration. */
|
||||||
size_t copy_len = MIN((size_t)(end - buf),
|
size_t copy_len_first = MIN((size_t)(end_first - buf),
|
||||||
ITER_BYTES + sizeof(CONF_TYPE));
|
ITER_BYTES + sizeof(CONF_TYPE));
|
||||||
assert(copy_len >= 16);
|
assert(copy_len_first >= 16);
|
||||||
|
|
||||||
|
size_t total_copy_len = copy_len_first + iter_bytes_second;
|
||||||
|
assert(total_copy_len + 3 < 64);
|
||||||
|
|
||||||
/* copy the post-padding byte; this is required for domain > 8 due to
|
/* copy the post-padding byte; this is required for domain > 8 due to
|
||||||
* overhang */
|
* overhang */
|
||||||
z->buf[copy_len] = 0;
|
z->buf[total_copy_len] = 0;
|
||||||
|
|
||||||
/* set the start and end location of the zone buf
|
/* set the start and end location of the zone buf
|
||||||
* to be scanned */
|
* to be scanned */
|
||||||
u8 *z_end = z->buf + copy_len;
|
u8 *z_end = z->buf + total_copy_len;
|
||||||
z->end = z_end;
|
z->end = z_end;
|
||||||
z->start = z_end - ITER_BYTES;
|
z->start = z_end - ITER_BYTES - iter_bytes_second;
|
||||||
assert(z->start + z->shift == z_end - z_len);
|
assert(z->start + z->shift == z_end - z_len);
|
||||||
|
|
||||||
|
u8 *z_end_first = z_end - iter_bytes_second;
|
||||||
/* copy the first 8 bytes of the valid region */
|
/* copy the first 8 bytes of the valid region */
|
||||||
unaligned_store_u64a(z->buf, unaligned_load_u64a(end - copy_len));
|
unaligned_store_u64a(z->buf,
|
||||||
|
unaligned_load_u64a(end_first - copy_len_first));
|
||||||
|
|
||||||
/* copy the last 16 bytes, may overlap with the previous 8 byte write */
|
/* copy the last 16 bytes, may overlap with the previous 8 byte write */
|
||||||
|
storeu128(z_end_first - sizeof(m128), loadu128(end_first - sizeof(m128)));
|
||||||
|
if (iter_bytes_second) {
|
||||||
storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128)));
|
storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128)));
|
||||||
|
}
|
||||||
|
|
||||||
z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end);
|
z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end);
|
||||||
}
|
}
|
||||||
@ -651,13 +673,13 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
|
|||||||
|
|
||||||
/* find maximum buffer location that the main zone can scan
|
/* find maximum buffer location that the main zone can scan
|
||||||
* - must be a multiple of ITER_BYTES, and
|
* - must be a multiple of ITER_BYTES, and
|
||||||
* - cannot contain the last byte (due to overhang)
|
* - cannot contain the last 3 bytes (due to 3 bytes read behind the
|
||||||
|
end of buffer in FDR main loop)
|
||||||
*/
|
*/
|
||||||
const u8 *main_end = buf + start + ROUNDDOWN_N(len - start - 1, ITER_BYTES);
|
const u8 *main_end = buf + start + ROUNDDOWN_N(len - start - 3, ITER_BYTES);
|
||||||
assert(main_end >= ptr);
|
|
||||||
|
|
||||||
/* create a zone if multiple of ITER_BYTES are found */
|
/* create a zone if multiple of ITER_BYTES are found */
|
||||||
if (main_end != ptr) {
|
if (main_end > ptr) {
|
||||||
createMainZone(flood, ptr, main_end, &zoneArr[numZone++]);
|
createMainZone(flood, ptr, main_end, &zoneArr[numZone++]);
|
||||||
ptr = main_end;
|
ptr = main_end;
|
||||||
}
|
}
|
||||||
@ -684,10 +706,10 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
|
|||||||
return HWLM_TERMINATED; \
|
return HWLM_TERMINATED; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
__builtin_prefetch(itPtr + (ITER_BYTES*4)); \
|
__builtin_prefetch(itPtr + ITER_BYTES); \
|
||||||
u64a conf0; \
|
u64a conf0; \
|
||||||
u64a conf8; \
|
u64a conf8; \
|
||||||
get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_adjusted, \
|
get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_flipped, \
|
||||||
ft, &conf0, &conf8, &s); \
|
ft, &conf0, &conf8, &s); \
|
||||||
do_confirm_fdr(&conf0, 0, &control, confBase, a, itPtr, \
|
do_confirm_fdr(&conf0, 0, &control, confBase, a, itPtr, \
|
||||||
&last_match_id, zz); \
|
&last_match_id, zz); \
|
||||||
@ -705,10 +727,11 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
|
|||||||
hwlm_group_t control) {
|
hwlm_group_t control) {
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
u32 last_match_id = INVALID_MATCH_ID;
|
u32 last_match_id = INVALID_MATCH_ID;
|
||||||
u64a domain_mask_adjusted = fdr->domainMask << 1;
|
u32 domain_mask_flipped = ~fdr->domainMask;
|
||||||
u8 stride = fdr->stride;
|
u8 stride = fdr->stride;
|
||||||
const u8 *ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));
|
const u64a *ft =
|
||||||
const u32 *confBase = (const u32 *)(ft + fdr->tabSize);
|
(const u64a *)((const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR)));
|
||||||
|
const u32 *confBase = (const u32 *)((const u8 *)ft + fdr->tabSize);
|
||||||
struct zone zones[ZONE_MAX];
|
struct zone zones[ZONE_MAX];
|
||||||
assert(fdr->domain > 8 && fdr->domain < 16);
|
assert(fdr->domain > 8 && fdr->domain < 16);
|
||||||
|
|
||||||
@ -761,7 +784,7 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
|
|||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(HAVE_AVX2)
|
||||||
#define ONLY_AVX2(func) func
|
#define ONLY_AVX2(func) func
|
||||||
#else
|
#else
|
||||||
#define ONLY_AVX2(func) NULL
|
#define ONLY_AVX2(func) NULL
|
||||||
@ -773,8 +796,8 @@ typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr,
|
|||||||
|
|
||||||
static const FDRFUNCTYPE funcs[] = {
|
static const FDRFUNCTYPE funcs[] = {
|
||||||
fdr_engine_exec,
|
fdr_engine_exec,
|
||||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fast),
|
NULL, /* old: fast teddy */
|
||||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fast),
|
NULL, /* old: fast teddy */
|
||||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fat),
|
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fat),
|
||||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fat),
|
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fat),
|
||||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks2_fat),
|
ONLY_AVX2(fdr_exec_teddy_avx2_msks2_fat),
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -30,8 +30,9 @@
|
|||||||
* \brief FDR literal matcher: build API.
|
* \brief FDR literal matcher: build API.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "fdr_internal.h"
|
|
||||||
#include "fdr_compile.h"
|
#include "fdr_compile.h"
|
||||||
|
|
||||||
|
#include "fdr_internal.h"
|
||||||
#include "fdr_confirm.h"
|
#include "fdr_confirm.h"
|
||||||
#include "fdr_compile_internal.h"
|
#include "fdr_compile_internal.h"
|
||||||
#include "fdr_engine_description.h"
|
#include "fdr_engine_description.h"
|
||||||
@ -40,9 +41,10 @@
|
|||||||
#include "grey.h"
|
#include "grey.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "hwlm/hwlm_build.h"
|
#include "hwlm/hwlm_build.h"
|
||||||
#include "util/alloc.h"
|
|
||||||
#include "util/compare.h"
|
#include "util/compare.h"
|
||||||
#include "util/dump_mask.h"
|
#include "util/dump_mask.h"
|
||||||
|
#include "util/math.h"
|
||||||
|
#include "util/noncopyable.h"
|
||||||
#include "util/target_info.h"
|
#include "util/target_info.h"
|
||||||
#include "util/ue2string.h"
|
#include "util/ue2string.h"
|
||||||
#include "util/verify_types.h"
|
#include "util/verify_types.h"
|
||||||
@ -53,13 +55,15 @@
|
|||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <limits>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <numeric>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <boost/core/noncopyable.hpp>
|
#include <boost/multi_array.hpp>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@ -67,31 +71,31 @@ namespace ue2 {
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
class FDRCompiler : boost::noncopyable {
|
class FDRCompiler : noncopyable {
|
||||||
private:
|
private:
|
||||||
const FDREngineDescription ŋ
|
const FDREngineDescription ŋ
|
||||||
|
const Grey &grey;
|
||||||
vector<u8> tab;
|
vector<u8> tab;
|
||||||
const vector<hwlmLiteral> &lits;
|
vector<hwlmLiteral> lits;
|
||||||
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
|
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
|
||||||
bool make_small;
|
bool make_small;
|
||||||
|
|
||||||
u8 *tabIndexToMask(u32 indexInTable);
|
u8 *tabIndexToMask(u32 indexInTable);
|
||||||
void assignStringToBucket(LiteralIndex l, BucketIndex b);
|
|
||||||
void assignStringsToBuckets();
|
void assignStringsToBuckets();
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
void dumpMasks(const u8 *defaultMask);
|
void dumpMasks(const u8 *defaultMask);
|
||||||
#endif
|
#endif
|
||||||
void setupTab();
|
void setupTab();
|
||||||
aligned_unique_ptr<FDR> setupFDR(pair<aligned_unique_ptr<u8>, size_t> &link);
|
bytecode_ptr<FDR> setupFDR();
|
||||||
void createInitialState(FDR *fdr);
|
void createInitialState(FDR *fdr);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
FDRCompiler(const vector<hwlmLiteral> &lits_in,
|
FDRCompiler(vector<hwlmLiteral> lits_in, const FDREngineDescription &eng_in,
|
||||||
const FDREngineDescription &eng_in, bool make_small_in)
|
bool make_small_in, const Grey &grey_in)
|
||||||
: eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in),
|
: eng(eng_in), grey(grey_in), tab(eng_in.getTabSizeBytes()),
|
||||||
make_small(make_small_in) {}
|
lits(move(lits_in)), make_small(make_small_in) {}
|
||||||
|
|
||||||
aligned_unique_ptr<FDR> build(pair<aligned_unique_ptr<u8>, size_t> &link);
|
bytecode_ptr<FDR> build();
|
||||||
};
|
};
|
||||||
|
|
||||||
u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
|
u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
|
||||||
@ -140,27 +144,25 @@ void FDRCompiler::createInitialState(FDR *fdr) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<FDR>
|
bytecode_ptr<FDR> FDRCompiler::setupFDR() {
|
||||||
FDRCompiler::setupFDR(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
|
||||||
size_t tabSize = eng.getTabSizeBytes();
|
size_t tabSize = eng.getTabSizeBytes();
|
||||||
|
|
||||||
auto floodControlTmp = setupFDRFloodControl(lits, eng);
|
auto floodControlTmp = setupFDRFloodControl(lits, eng, grey);
|
||||||
auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small);
|
||||||
|
|
||||||
assert(ISALIGNED_16(tabSize));
|
assert(ISALIGNED_16(tabSize));
|
||||||
assert(ISALIGNED_16(confirmTmp.second));
|
assert(ISALIGNED_16(confirmTmp.size()));
|
||||||
assert(ISALIGNED_16(floodControlTmp.second));
|
assert(ISALIGNED_16(floodControlTmp.size()));
|
||||||
assert(ISALIGNED_16(link.second));
|
|
||||||
size_t headerSize = ROUNDUP_16(sizeof(FDR));
|
size_t headerSize = ROUNDUP_16(sizeof(FDR));
|
||||||
size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.second +
|
size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.size() +
|
||||||
floodControlTmp.second + link.second);
|
floodControlTmp.size());
|
||||||
|
|
||||||
DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
|
DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
|
||||||
"total=%zu\n",
|
"total=%zu\n",
|
||||||
headerSize, tabSize, confirmTmp.second, floodControlTmp.second,
|
headerSize, tabSize, confirmTmp.size(), floodControlTmp.size(),
|
||||||
size);
|
size);
|
||||||
|
|
||||||
aligned_unique_ptr<FDR> fdr = aligned_zmalloc_unique<FDR>(size);
|
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
|
||||||
assert(fdr); // otherwise would have thrown std::bad_alloc
|
assert(fdr); // otherwise would have thrown std::bad_alloc
|
||||||
|
|
||||||
fdr->size = size;
|
fdr->size = size;
|
||||||
@ -169,16 +171,16 @@ FDRCompiler::setupFDR(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
|||||||
createInitialState(fdr.get());
|
createInitialState(fdr.get());
|
||||||
|
|
||||||
u8 *fdr_base = (u8 *)fdr.get();
|
u8 *fdr_base = (u8 *)fdr.get();
|
||||||
u8 * ptr = fdr_base + ROUNDUP_16(sizeof(FDR));
|
u8 *ptr = fdr_base + ROUNDUP_16(sizeof(FDR));
|
||||||
copy(tab.begin(), tab.end(), ptr);
|
copy(tab.begin(), tab.end(), ptr);
|
||||||
ptr += tabSize;
|
ptr += tabSize;
|
||||||
|
|
||||||
memcpy(ptr, confirmTmp.first.get(), confirmTmp.second);
|
memcpy(ptr, confirmTmp.get(), confirmTmp.size());
|
||||||
ptr += confirmTmp.second;
|
ptr += confirmTmp.size();
|
||||||
|
|
||||||
fdr->floodOffset = verify_u32(ptr - fdr_base);
|
fdr->floodOffset = verify_u32(ptr - fdr_base);
|
||||||
memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second);
|
memcpy(ptr, floodControlTmp.get(), floodControlTmp.size());
|
||||||
ptr += floodControlTmp.second;
|
ptr += floodControlTmp.size();
|
||||||
|
|
||||||
/* we are allowing domains 9 to 15 only */
|
/* we are allowing domains 9 to 15 only */
|
||||||
assert(eng.bits > 8 && eng.bits < 16);
|
assert(eng.bits > 8 && eng.bits < 16);
|
||||||
@ -187,76 +189,124 @@ FDRCompiler::setupFDR(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
|||||||
fdr->tabSize = (1 << eng.bits) * (eng.schemeWidth / 8);
|
fdr->tabSize = (1 << eng.bits) * (eng.schemeWidth / 8);
|
||||||
fdr->stride = eng.stride;
|
fdr->stride = eng.stride;
|
||||||
|
|
||||||
if (link.first) {
|
|
||||||
fdr->link = verify_u32(ptr - fdr_base);
|
|
||||||
memcpy(ptr, link.first.get(), link.second);
|
|
||||||
} else {
|
|
||||||
fdr->link = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return fdr;
|
return fdr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void FDRCompiler::assignStringToBucket(LiteralIndex l, BucketIndex b) {
|
//#define DEBUG_ASSIGNMENT
|
||||||
bucketToLits[b].push_back(l);
|
|
||||||
|
static
|
||||||
|
double getScoreUtil(u32 len, u32 count) {
|
||||||
|
return len == 0 ? numeric_limits<double>::max()
|
||||||
|
: our_pow(count, 1.05) * our_pow(len, -3.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct LitOrder {
|
/**
|
||||||
explicit LitOrder(const vector<hwlmLiteral> &vl_) : vl(vl_) {}
|
* Returns true if the two given literals should be placed in the same chunk as
|
||||||
bool operator()(const u32 &i1, const u32 &i2) const {
|
* they are identical except for a difference in caselessness.
|
||||||
const string &i1s = vl[i1].s;
|
*/
|
||||||
const string &i2s = vl[i2].s;
|
static
|
||||||
|
bool isEquivLit(const hwlmLiteral &a, const hwlmLiteral &b,
|
||||||
|
const hwlmLiteral *last_nocase_lit) {
|
||||||
|
const size_t a_len = a.s.size();
|
||||||
|
const size_t b_len = b.s.size();
|
||||||
|
|
||||||
size_t len1 = i1s.size(), len2 = i2s.size();
|
if (a_len != b_len) {
|
||||||
|
|
||||||
if (len1 != len2) {
|
|
||||||
return len1 < len2;
|
|
||||||
} else {
|
|
||||||
auto p = std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
|
|
||||||
if (p.first == i1s.rend()) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return *p.first < *p.second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
bool nocase = last_nocase_lit && a_len == last_nocase_lit->s.size() &&
|
||||||
const vector<hwlmLiteral> &vl;
|
!cmp(a.s.c_str(), last_nocase_lit->s.c_str(), a_len, true);
|
||||||
};
|
return !cmp(a.s.c_str(), b.s.c_str(), a.s.size(), nocase);
|
||||||
|
|
||||||
static u64a getScoreUtil(u32 len, u32 count) {
|
|
||||||
if (len == 0) {
|
|
||||||
return (u64a)-1;
|
|
||||||
}
|
|
||||||
const u32 LEN_THRESH = 128;
|
|
||||||
const u32 elen = (len > LEN_THRESH) ? LEN_THRESH : len;
|
|
||||||
const u64a lenScore =
|
|
||||||
(LEN_THRESH * LEN_THRESH * LEN_THRESH) / (elen * elen * elen);
|
|
||||||
return count * lenScore; // deemphasize count - possibly more than needed
|
|
||||||
// this might be overkill in the other direction
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//#define DEBUG_ASSIGNMENT
|
struct Chunk {
|
||||||
void FDRCompiler::assignStringsToBuckets() {
|
Chunk(u32 first_id_in, u32 count_in, u32 length_in)
|
||||||
typedef u64a SCORE; // 'Score' type
|
: first_id(first_id_in), count(count_in), length(length_in) {}
|
||||||
const SCORE MAX_SCORE = (SCORE)-1;
|
u32 first_id; //!< first id in this chunk
|
||||||
|
u32 count; //!< how many are in this chunk
|
||||||
|
u32 length; //!< how long things in the chunk are
|
||||||
|
};
|
||||||
|
|
||||||
|
static
|
||||||
|
vector<Chunk> assignChunks(const vector<hwlmLiteral> &lits,
|
||||||
|
const map<u32, u32> &lenCounts) {
|
||||||
const u32 CHUNK_MAX = 512;
|
const u32 CHUNK_MAX = 512;
|
||||||
const u32 BUCKET_MAX = 16;
|
const u32 MAX_CONSIDERED_LENGTH = 16;
|
||||||
typedef pair<SCORE, u32> SCORE_INDEX_PAIR;
|
|
||||||
|
|
||||||
u32 ls = verify_u32(lits.size());
|
// TODO: detailed early stage literal analysis for v. small cases (actually
|
||||||
assert(ls); // Shouldn't be called with no literals.
|
// look at lits) yes - after we factor this out and merge in the Teddy
|
||||||
|
// style of building we can look at this, although the teddy merge
|
||||||
|
// modelling is quite different. It's still probably adaptable to some
|
||||||
|
// extent for this class of problem.
|
||||||
|
|
||||||
// make a vector that contains our literals as pointers or u32 LiteralIndex values
|
vector<Chunk> chunks;
|
||||||
vector<LiteralIndex> vli;
|
chunks.reserve(CHUNK_MAX);
|
||||||
vli.resize(ls);
|
|
||||||
map<u32, u32> lenCounts;
|
const u32 maxPerChunk = lits.size() /
|
||||||
for (LiteralIndex l = 0; l < ls; l++) {
|
(CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + 1;
|
||||||
vli[l] = l;
|
|
||||||
lenCounts[lits[l].s.size()]++;
|
u32 currentSize = 0;
|
||||||
|
u32 chunkStartID = 0;
|
||||||
|
const hwlmLiteral *last_nocase_lit = nullptr;
|
||||||
|
|
||||||
|
for (u32 i = 0; i < lits.size() && chunks.size() < CHUNK_MAX - 1; i++) {
|
||||||
|
const auto &lit = lits[i];
|
||||||
|
|
||||||
|
DEBUG_PRINTF("i=%u, lit=%s%s\n", i, escapeString(lit.s).c_str(),
|
||||||
|
lit.nocase ? " (nocase)" : "");
|
||||||
|
|
||||||
|
// If this literal is identical to the last one (aside from differences
|
||||||
|
// in caselessness), keep going even if we will "overfill" a chunk; we
|
||||||
|
// don't want to split identical literals into different buckets.
|
||||||
|
if (i != 0 && isEquivLit(lit, lits[i - 1], last_nocase_lit)) {
|
||||||
|
DEBUG_PRINTF("identical lit\n");
|
||||||
|
goto next_literal;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((currentSize < MAX_CONSIDERED_LENGTH &&
|
||||||
|
(lit.s.size() != currentSize)) ||
|
||||||
|
(currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) {
|
||||||
|
currentSize = lit.s.size();
|
||||||
|
if (!chunks.empty()) {
|
||||||
|
chunks.back().count = i - chunkStartID;
|
||||||
|
}
|
||||||
|
chunkStartID = i;
|
||||||
|
chunks.emplace_back(i, 0, currentSize);
|
||||||
|
}
|
||||||
|
next_literal:
|
||||||
|
if (lit.nocase) {
|
||||||
|
last_nocase_lit = &lit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(!chunks.empty());
|
||||||
|
chunks.back().count = lits.size() - chunkStartID;
|
||||||
|
// close off chunks with an empty row
|
||||||
|
chunks.emplace_back(lits.size(), 0, 0);
|
||||||
|
|
||||||
|
#ifdef DEBUG_ASSIGNMENT
|
||||||
|
for (size_t j = 0; j < chunks.size(); j++) {
|
||||||
|
const auto &chunk = chunks[j];
|
||||||
|
printf("chunk %zu first_id=%u count=%u length=%u\n", j, chunk.first_id,
|
||||||
|
chunk.count, chunk.length);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
DEBUG_PRINTF("built %zu chunks (%zu lits)\n", chunks.size(), lits.size());
|
||||||
|
assert(chunks.size() <= CHUNK_MAX);
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FDRCompiler::assignStringsToBuckets() {
|
||||||
|
const double MAX_SCORE = numeric_limits<double>::max();
|
||||||
|
|
||||||
|
assert(!lits.empty()); // Shouldn't be called with no literals.
|
||||||
|
|
||||||
|
// Count the number of literals for each length.
|
||||||
|
map<u32, u32> lenCounts;
|
||||||
|
for (const auto &lit : lits) {
|
||||||
|
lenCounts[lit.s.size()]++;
|
||||||
}
|
}
|
||||||
// sort vector by literal length + if tied on length, 'magic' criteria of some kind (tbd)
|
|
||||||
stable_sort(vli.begin(), vli.end(), LitOrder(lits));
|
|
||||||
|
|
||||||
#ifdef DEBUG_ASSIGNMENT
|
#ifdef DEBUG_ASSIGNMENT
|
||||||
for (const auto &m : lenCounts) {
|
for (const auto &m : lenCounts) {
|
||||||
@ -265,103 +315,94 @@ void FDRCompiler::assignStringsToBuckets() {
|
|||||||
printf("\n");
|
printf("\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// TODO: detailed early stage literal analysis for v. small cases (actually look at lits)
|
// Sort literals by literal length. If tied on length, use lexicographic
|
||||||
// yes - after we factor this out and merge in the Teddy style of building we can look
|
// ordering (of the reversed literals).
|
||||||
// at this, although the teddy merge modelling is quite different. It's still probably
|
stable_sort(lits.begin(), lits.end(),
|
||||||
// adaptable to some extent for this class of problem
|
[](const hwlmLiteral &a, const hwlmLiteral &b) {
|
||||||
|
if (a.s.size() != b.s.size()) {
|
||||||
u32 firstIds[CHUNK_MAX]; // how many are in this chunk (CHUNK_MAX - 1 contains 'last' bound)
|
return a.s.size() < b.s.size();
|
||||||
u32 count[CHUNK_MAX]; // how many are in this chunk
|
|
||||||
u32 length[CHUNK_MAX]; // how long things in the chunk are
|
|
||||||
|
|
||||||
const u32 MAX_CONSIDERED_LENGTH = 16;
|
|
||||||
u32 currentChunk = 0;
|
|
||||||
u32 currentSize = 0;
|
|
||||||
u32 chunkStartID = 0;
|
|
||||||
u32 maxPerChunk = ls/(CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + 1;
|
|
||||||
|
|
||||||
for (u32 i = 0; i < ls && currentChunk < CHUNK_MAX - 1; i++) {
|
|
||||||
LiteralIndex l = vli[i];
|
|
||||||
if ((currentSize < MAX_CONSIDERED_LENGTH && (lits[l].s.size() != currentSize)) ||
|
|
||||||
(currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) {
|
|
||||||
currentSize = lits[l].s.size();
|
|
||||||
if (currentChunk) {
|
|
||||||
count[currentChunk - 1 ] = i - chunkStartID;
|
|
||||||
}
|
|
||||||
chunkStartID = firstIds[currentChunk] = i;
|
|
||||||
length[currentChunk] = currentSize;
|
|
||||||
currentChunk++;
|
|
||||||
}
|
}
|
||||||
|
auto p = mismatch(a.s.rbegin(), a.s.rend(), b.s.rbegin());
|
||||||
|
if (p.first != a.s.rend()) {
|
||||||
|
return *p.first < *p.second;
|
||||||
}
|
}
|
||||||
|
// Sort caseless variants first.
|
||||||
|
return a.nocase > b.nocase;
|
||||||
|
});
|
||||||
|
|
||||||
assert(currentChunk > 0);
|
vector<Chunk> chunks = assignChunks(lits, lenCounts);
|
||||||
count[currentChunk - 1] = ls - chunkStartID;
|
|
||||||
// close off chunks with an empty row
|
|
||||||
firstIds[currentChunk] = ls;
|
|
||||||
length[currentChunk] = 0;
|
|
||||||
count[currentChunk] = 0;
|
|
||||||
u32 nChunks = currentChunk + 1;
|
|
||||||
|
|
||||||
#ifdef DEBUG_ASSIGNMENT
|
const u32 numChunks = chunks.size();
|
||||||
for (u32 j = 0; j < nChunks; j++) {
|
const u32 numBuckets = eng.getNumBuckets();
|
||||||
printf("%d %d %d %d\n", j, firstIds[j], count[j], length[j]);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
SCORE_INDEX_PAIR t[CHUNK_MAX][BUCKET_MAX]; // pair of score, index
|
// 2D array of (score, chunk index) pairs, indexed by
|
||||||
u32 nb = eng.getNumBuckets();
|
// [chunk_index][bucket_index].
|
||||||
|
boost::multi_array<pair<double, u32>, 2> t(
|
||||||
|
boost::extents[numChunks][numBuckets]);
|
||||||
|
|
||||||
for (u32 j = 0; j < nChunks; j++) {
|
for (u32 j = 0; j < numChunks; j++) {
|
||||||
u32 cnt = 0;
|
u32 cnt = 0;
|
||||||
for (u32 k = j; k < nChunks; ++k) {
|
for (u32 k = j; k < numChunks; ++k) {
|
||||||
cnt += count[k];
|
cnt += chunks[k].count;
|
||||||
}
|
}
|
||||||
t[j][0] = {getScoreUtil(length[j], cnt), 0};
|
t[j][0] = {getScoreUtil(chunks[j].length, cnt), 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u32 i = 1; i < nb; i++) {
|
for (u32 i = 1; i < numBuckets; i++) {
|
||||||
for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row
|
for (u32 j = 0; j < numChunks - 1; j++) { // don't do last, empty row
|
||||||
SCORE_INDEX_PAIR best = {MAX_SCORE, 0};
|
pair<double, u32> best = {MAX_SCORE, 0};
|
||||||
u32 cnt = count[j];
|
u32 cnt = chunks[j].count;
|
||||||
for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) {
|
for (u32 k = j + 1; k < numChunks - 1; k++) {
|
||||||
SCORE score = getScoreUtil(length[j], cnt);
|
auto score = getScoreUtil(chunks[j].length, cnt);
|
||||||
if (score > best.first) {
|
if (score > best.first) {
|
||||||
break; // if we're now worse locally than our best score, give up
|
break; // now worse locally than our best score, give up
|
||||||
}
|
}
|
||||||
score += t[k][i-1].first;
|
score += t[k][i-1].first;
|
||||||
if (score < best.first) {
|
if (score < best.first) {
|
||||||
best = {score, k};
|
best = {score, k};
|
||||||
}
|
}
|
||||||
|
cnt += chunks[k].count;
|
||||||
}
|
}
|
||||||
t[j][i] = best;
|
t[j][i] = best;
|
||||||
}
|
}
|
||||||
t[nChunks - 1][i] = {0,0}; // fill in empty final row for next iteration
|
t[numChunks - 1][i] = {0,0}; // fill in empty final row for next iter
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG_ASSIGNMENT
|
#ifdef DEBUG_ASSIGNMENT
|
||||||
for (u32 j = 0; j < nChunks; j++) {
|
for (u32 j = 0; j < numChunks; j++) {
|
||||||
for (u32 i = 0; i < nb; i++) {
|
printf("%03u: ", j);
|
||||||
SCORE_INDEX_PAIR v = t[j][i];
|
for (u32 i = 0; i < numBuckets; i++) {
|
||||||
printf("<%7lld,%3d>", v.first, v.second);
|
const auto &v = t[j][i];
|
||||||
|
printf("<%0.3f,%3d> ", v.first, v.second);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// our best score is in best[0][N_BUCKETS-1] and we can follow the links
|
// our best score is in t[0][N_BUCKETS-1] and we can follow the links
|
||||||
// to find where our buckets should start and what goes into them
|
// to find where our buckets should start and what goes into them
|
||||||
for (u32 i = 0, n = nb; n && (i != nChunks - 1); n--) {
|
for (u32 i = 0, n = numBuckets; n && (i != numChunks - 1); n--) {
|
||||||
u32 j = t[i][n - 1].second;
|
u32 j = t[i][n - 1].second;
|
||||||
if (j == 0) {
|
if (j == 0) {
|
||||||
j = nChunks - 1;
|
j = numChunks - 1;
|
||||||
}
|
}
|
||||||
// put chunks between i - j into bucket (NBUCKETS-1) - n
|
|
||||||
#ifdef DEBUG_ASSIGNMENT
|
// put chunks between i - j into bucket (numBuckets - n).
|
||||||
printf("placing from %d to %d in bucket %d\n", firstIds[i], firstIds[j],
|
u32 first_id = chunks[i].first_id;
|
||||||
nb - n);
|
u32 last_id = chunks[j].first_id;
|
||||||
#endif
|
assert(first_id < last_id);
|
||||||
for (u32 k = firstIds[i]; k < firstIds[j]; k++) {
|
u32 bucket = numBuckets - n;
|
||||||
assignStringToBucket((LiteralIndex)vli[k], nb - n);
|
UNUSED const auto &first_lit = lits[first_id];
|
||||||
|
UNUSED const auto &last_lit = lits[last_id - 1];
|
||||||
|
DEBUG_PRINTF("placing [%u-%u) in bucket %u (%u lits, len %zu-%zu, "
|
||||||
|
"score %0.4f)\n",
|
||||||
|
first_id, last_id, bucket, last_id - first_id,
|
||||||
|
first_lit.s.length(), last_lit.s.length(),
|
||||||
|
getScoreUtil(first_lit.s.length(), last_id - first_id));
|
||||||
|
|
||||||
|
auto &bucket_lits = bucketToLits[bucket];
|
||||||
|
for (u32 k = first_id; k < last_id; k++) {
|
||||||
|
bucket_lits.push_back(k);
|
||||||
}
|
}
|
||||||
i = j;
|
i = j;
|
||||||
}
|
}
|
||||||
@ -487,49 +528,22 @@ void FDRCompiler::setupTab() {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<FDR>
|
bytecode_ptr<FDR> FDRCompiler::build() {
|
||||||
FDRCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
|
||||||
assignStringsToBuckets();
|
assignStringsToBuckets();
|
||||||
setupTab();
|
setupTab();
|
||||||
return setupFDR(link);
|
return setupFDR();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
static
|
static
|
||||||
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
|
bytecode_ptr<FDR> fdrBuildTableInternal(const vector<hwlmLiteral> &lits,
|
||||||
size_t rv = 0;
|
bool make_small, const target_t &target,
|
||||||
for (const auto &lit : lits) {
|
const Grey &grey, u32 hint) {
|
||||||
rv = max(rv, lit.msk.size());
|
|
||||||
}
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void setHistoryRequired(hwlmStreamingControl &stream_ctl,
|
|
||||||
const vector<hwlmLiteral> &lits) {
|
|
||||||
size_t max_mask_len = maxMaskLen(lits);
|
|
||||||
|
|
||||||
// we want enough history to manage the longest literal and the longest
|
|
||||||
// mask.
|
|
||||||
stream_ctl.literal_history_required = max(maxLen(lits), max_mask_len) - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
aligned_unique_ptr<FDR>
|
|
||||||
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
|
|
||||||
const target_t &target, const Grey &grey, u32 hint,
|
|
||||||
hwlmStreamingControl *stream_control) {
|
|
||||||
pair<aligned_unique_ptr<u8>, size_t> link(nullptr, 0);
|
|
||||||
|
|
||||||
if (stream_control) {
|
|
||||||
setHistoryRequired(*stream_control, lits);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
|
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
|
||||||
|
|
||||||
if (grey.fdrAllowTeddy) {
|
if (grey.fdrAllowTeddy) {
|
||||||
auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, link);
|
auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, grey);
|
||||||
if (fdr) {
|
if (fdr) {
|
||||||
DEBUG_PRINTF("build with teddy succeeded\n");
|
DEBUG_PRINTF("build with teddy succeeded\n");
|
||||||
return fdr;
|
return fdr;
|
||||||
@ -538,10 +552,8 @@ fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const unique_ptr<FDREngineDescription> des =
|
auto des = (hint == HINT_INVALID) ? chooseEngine(target, lits, make_small)
|
||||||
(hint == HINT_INVALID) ? chooseEngine(target, lits, make_small)
|
|
||||||
: getFdrDescription(hint);
|
: getFdrDescription(hint);
|
||||||
|
|
||||||
if (!des) {
|
if (!des) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -552,27 +564,23 @@ fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
|
|||||||
des->stride = 1;
|
des->stride = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
FDRCompiler fc(lits, *des, make_small);
|
FDRCompiler fc(lits, *des, make_small, grey);
|
||||||
return fc.build(link);
|
return fc.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<FDR> fdrBuildTable(const vector<hwlmLiteral> &lits,
|
bytecode_ptr<FDR> fdrBuildTable(const vector<hwlmLiteral> &lits,
|
||||||
bool make_small, const target_t &target,
|
bool make_small, const target_t &target,
|
||||||
const Grey &grey,
|
const Grey &grey) {
|
||||||
hwlmStreamingControl *stream_control) {
|
return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID);
|
||||||
return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID,
|
|
||||||
stream_control);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(RELEASE_BUILD)
|
#if !defined(RELEASE_BUILD)
|
||||||
|
|
||||||
aligned_unique_ptr<FDR>
|
bytecode_ptr<FDR> fdrBuildTableHinted(const vector<hwlmLiteral> &lits,
|
||||||
fdrBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small, u32 hint,
|
bool make_small, u32 hint,
|
||||||
const target_t &target, const Grey &grey,
|
const target_t &target,
|
||||||
hwlmStreamingControl *stream_control) {
|
const Grey &grey) {
|
||||||
pair<u8 *, size_t> link(nullptr, 0);
|
return fdrBuildTableInternal(lits, make_small, target, grey, hint);
|
||||||
return fdrBuildTableInternal(lits, make_small, target, grey, hint,
|
|
||||||
stream_control);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -34,7 +34,7 @@
|
|||||||
#define FDR_COMPILE_H
|
#define FDR_COMPILE_H
|
||||||
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/alloc.h"
|
#include "util/bytecode_ptr.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -43,21 +43,18 @@ struct FDR;
|
|||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
struct hwlmLiteral;
|
struct hwlmLiteral;
|
||||||
struct hwlmStreamingControl;
|
|
||||||
struct Grey;
|
struct Grey;
|
||||||
struct target_t;
|
struct target_t;
|
||||||
|
|
||||||
ue2::aligned_unique_ptr<FDR>
|
bytecode_ptr<FDR> fdrBuildTable(const std::vector<hwlmLiteral> &lits,
|
||||||
fdrBuildTable(const std::vector<hwlmLiteral> &lits, bool make_small,
|
bool make_small, const target_t &target,
|
||||||
const target_t &target, const Grey &grey,
|
const Grey &grey);
|
||||||
hwlmStreamingControl *stream_control = nullptr);
|
|
||||||
|
|
||||||
#if !defined(RELEASE_BUILD)
|
#if !defined(RELEASE_BUILD)
|
||||||
|
|
||||||
ue2::aligned_unique_ptr<FDR>
|
bytecode_ptr<FDR> fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits,
|
||||||
fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
|
bool make_small, u32 hint,
|
||||||
u32 hint, const target_t &target, const Grey &grey,
|
const target_t &target, const Grey &grey);
|
||||||
hwlmStreamingControl *stream_control = nullptr);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -31,7 +31,7 @@
|
|||||||
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "hwlm/hwlm_literal.h"
|
#include "hwlm/hwlm_literal.h"
|
||||||
#include "util/alloc.h"
|
#include "util/bytecode_ptr.h"
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
@ -55,9 +55,10 @@ typedef u32 PositionInBucket; // zero is 'we are matching right now!",
|
|||||||
class EngineDescription;
|
class EngineDescription;
|
||||||
class FDREngineDescription;
|
class FDREngineDescription;
|
||||||
struct hwlmStreamingControl;
|
struct hwlmStreamingControl;
|
||||||
|
struct Grey;
|
||||||
|
|
||||||
std::pair<aligned_unique_ptr<u8>, size_t> setupFullMultiConfs(
|
bytecode_ptr<u8> setupFullConfs(const std::vector<hwlmLiteral> &lits,
|
||||||
const std::vector<hwlmLiteral> &lits, const EngineDescription &eng,
|
const EngineDescription &eng,
|
||||||
std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
|
std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
|
||||||
bool make_small);
|
bool make_small);
|
||||||
|
|
||||||
@ -65,11 +66,11 @@ std::pair<aligned_unique_ptr<u8>, size_t> setupFullMultiConfs(
|
|||||||
// we always read a full-scale flood "behind" us in terms of what's in our
|
// we always read a full-scale flood "behind" us in terms of what's in our
|
||||||
// state; if we don't have a flood that's long enough we won't be in the
|
// state; if we don't have a flood that's long enough we won't be in the
|
||||||
// right state yet to allow blindly advancing
|
// right state yet to allow blindly advancing
|
||||||
std::pair<aligned_unique_ptr<u8>, size_t>
|
bytecode_ptr<u8> setupFDRFloodControl(const std::vector<hwlmLiteral> &lits,
|
||||||
setupFDRFloodControl(const std::vector<hwlmLiteral> &lits,
|
const EngineDescription &eng,
|
||||||
const EngineDescription &eng);
|
const Grey &grey);
|
||||||
|
|
||||||
std::pair<aligned_unique_ptr<u8>, size_t>
|
bytecode_ptr<u8>
|
||||||
fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
|
fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
|
||||||
hwlmStreamingControl &stream_control);
|
hwlmStreamingControl &stream_control);
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -45,10 +45,7 @@ using namespace std;
|
|||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
using ConfSplitType = u8;
|
using BC2CONF = map<BucketIndex, bytecode_ptr<FDRConfirm>>;
|
||||||
using BucketSplitPair = pair<BucketIndex, ConfSplitType>;
|
|
||||||
using BC2CONF = map<BucketSplitPair,
|
|
||||||
pair<aligned_unique_ptr<FDRConfirm>, size_t>>;
|
|
||||||
|
|
||||||
// return the number of bytes beyond a length threshold in all strings in lits
|
// return the number of bytes beyond a length threshold in all strings in lits
|
||||||
static
|
static
|
||||||
@ -150,8 +147,8 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
|
|||||||
|
|
||||||
//#define FDR_CONFIRM_DUMP 1
|
//#define FDR_CONFIRM_DUMP 1
|
||||||
|
|
||||||
static pair<aligned_unique_ptr<FDRConfirm>, size_t>
|
static
|
||||||
getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
|
bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
|
||||||
bool make_small, bool make_confirm) {
|
bool make_small, bool make_confirm) {
|
||||||
vector<LitInfo> tmpLitInfo(lits.size());
|
vector<LitInfo> tmpLitInfo(lits.size());
|
||||||
CONF_TYPE andmsk;
|
CONF_TYPE andmsk;
|
||||||
@ -166,7 +163,7 @@ getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
|
|||||||
if (make_small) {
|
if (make_small) {
|
||||||
nBits = min(10U, lg2(lits.size()) + 1);
|
nBits = min(10U, lg2(lits.size()) + 1);
|
||||||
} else {
|
} else {
|
||||||
nBits = min(13U, lg2(lits.size()) + 4);
|
nBits = lg2(lits.size() + 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL;
|
CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL;
|
||||||
@ -177,8 +174,7 @@ getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
|
|||||||
u32 soleLitCmp = 0;
|
u32 soleLitCmp = 0;
|
||||||
u32 soleLitMsk = 0;
|
u32 soleLitMsk = 0;
|
||||||
|
|
||||||
if ((applyOneCharOpt && lits.size() == 1 && lits[0].s.size() == 0 &&
|
if (!make_confirm) {
|
||||||
lits[0].msk.empty()) || make_confirm == false) {
|
|
||||||
flags = FDRC_FLAG_NO_CONFIRM;
|
flags = FDRC_FLAG_NO_CONFIRM;
|
||||||
if (lits[0].noruns) {
|
if (lits[0].noruns) {
|
||||||
flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted
|
flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted
|
||||||
@ -288,7 +284,7 @@ getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
|
|||||||
sizeof(LitInfo) * lits.size() + totalLitSize;
|
sizeof(LitInfo) * lits.size() + totalLitSize;
|
||||||
size = ROUNDUP_N(size, alignof(FDRConfirm));
|
size = ROUNDUP_N(size, alignof(FDRConfirm));
|
||||||
|
|
||||||
auto fdrc = aligned_zmalloc_unique<FDRConfirm>(size);
|
auto fdrc = make_zeroed_bytecode_ptr<FDRConfirm>(size);
|
||||||
assert(fdrc); // otherwise would have thrown std::bad_alloc
|
assert(fdrc); // otherwise would have thrown std::bad_alloc
|
||||||
|
|
||||||
fdrc->andmsk = andmsk;
|
fdrc->andmsk = andmsk;
|
||||||
@ -322,32 +318,15 @@ getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
|
|||||||
LiteralIndex litIdx = *i;
|
LiteralIndex litIdx = *i;
|
||||||
|
|
||||||
// Write LitInfo header.
|
// Write LitInfo header.
|
||||||
u8 *oldPtr = ptr;
|
|
||||||
LitInfo &finalLI = *(LitInfo *)ptr;
|
LitInfo &finalLI = *(LitInfo *)ptr;
|
||||||
finalLI = tmpLitInfo[litIdx];
|
finalLI = tmpLitInfo[litIdx];
|
||||||
|
|
||||||
ptr += sizeof(LitInfo); // String starts directly after LitInfo.
|
ptr += sizeof(LitInfo); // String starts directly after LitInfo.
|
||||||
|
assert(lits[litIdx].s.size() <= sizeof(CONF_TYPE));
|
||||||
// Write literal prefix (everything before the last N characters,
|
|
||||||
// as the last N are already confirmed).
|
|
||||||
const string &t = lits[litIdx].s;
|
|
||||||
if (t.size() > sizeof(CONF_TYPE)) {
|
|
||||||
size_t prefix_len = t.size() - sizeof(CONF_TYPE);
|
|
||||||
memcpy(ptr, t.c_str(), prefix_len);
|
|
||||||
ptr += prefix_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
|
|
||||||
if (next(i) == e) {
|
if (next(i) == e) {
|
||||||
finalLI.next = 0;
|
finalLI.next = 0;
|
||||||
} else {
|
} else {
|
||||||
// our next field represents an adjustment on top of
|
finalLI.next = 1;
|
||||||
// current address + the actual size of the literal
|
|
||||||
// so we track any rounding up done for alignment and
|
|
||||||
// add this in - that way we don't have to use bigger
|
|
||||||
// than a u8 (for now)
|
|
||||||
assert((size_t)(ptr - oldPtr) > t.size());
|
|
||||||
finalLI.next = verify_u8(ptr - oldPtr - t.size());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert((size_t)(ptr - fdrc_base) <= size);
|
assert((size_t)(ptr - fdrc_base) <= size);
|
||||||
@ -358,19 +337,16 @@ getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
|
|||||||
size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
|
size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
|
||||||
alignof(FDRConfirm));
|
alignof(FDRConfirm));
|
||||||
assert(actual_size <= size);
|
assert(actual_size <= size);
|
||||||
|
fdrc.shrink(actual_size);
|
||||||
|
|
||||||
return {move(fdrc), actual_size};
|
return fdrc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
bytecode_ptr<u8>
|
||||||
u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
|
setupFullConfs(const vector<hwlmLiteral> &lits,
|
||||||
const EngineDescription &eng, BC2CONF &bc2Conf,
|
const EngineDescription &eng,
|
||||||
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
|
map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
|
||||||
bool make_small) {
|
bool make_small) {
|
||||||
u32 pullBack = eng.getConfirmPullBackDistance();
|
|
||||||
u32 splitMask = eng.getConfirmTopLevelSplit() - 1;
|
|
||||||
bool splitHasCase = splitMask & 0x20;
|
|
||||||
|
|
||||||
bool makeConfirm = true;
|
bool makeConfirm = true;
|
||||||
unique_ptr<TeddyEngineDescription> teddyDescr =
|
unique_ptr<TeddyEngineDescription> teddyDescr =
|
||||||
getTeddyDescription(eng.getID());
|
getTeddyDescription(eng.getID());
|
||||||
@ -378,101 +354,43 @@ u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
|
|||||||
makeConfirm = teddyDescr->needConfirm(lits);
|
makeConfirm = teddyDescr->needConfirm(lits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BC2CONF bc2Conf;
|
||||||
u32 totalConfirmSize = 0;
|
u32 totalConfirmSize = 0;
|
||||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||||
if (!bucketToLits[b].empty()) {
|
if (!bucketToLits[b].empty()) {
|
||||||
vector<vector<hwlmLiteral>> vl(eng.getConfirmTopLevelSplit());
|
vector<hwlmLiteral> vl;
|
||||||
for (const LiteralIndex &lit_idx : bucketToLits[b]) {
|
for (const LiteralIndex &lit_idx : bucketToLits[b]) {
|
||||||
hwlmLiteral lit = lits[lit_idx]; // copy
|
vl.push_back(lits[lit_idx]);
|
||||||
// c is last char of this literal
|
|
||||||
u8 c = *(lit.s.rbegin());
|
|
||||||
|
|
||||||
bool suppressSplit = false;
|
|
||||||
if (pullBack) {
|
|
||||||
// make a shorter string to work over if we're pulling back
|
|
||||||
// getFDRConfirm doesn't know about that stuff
|
|
||||||
assert(lit.s.size() >= pullBack);
|
|
||||||
lit.s.resize(lit.s.size() - pullBack);
|
|
||||||
|
|
||||||
u8 c_sub, c_sub_msk;
|
|
||||||
if (lit.msk.empty()) {
|
|
||||||
c_sub = 0;
|
|
||||||
c_sub_msk = 0;
|
|
||||||
} else {
|
|
||||||
c_sub = *(lit.cmp.rbegin());
|
|
||||||
c_sub_msk = *(lit.msk.rbegin());
|
|
||||||
size_t len = lit.msk.size() -
|
|
||||||
min(lit.msk.size(), (size_t)pullBack);
|
|
||||||
lit.msk.resize(len);
|
|
||||||
lit.cmp.resize(len);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if c_sub_msk is 0xff and lit.nocase
|
DEBUG_PRINTF("b %d sz %zu\n", b, vl.size());
|
||||||
// resteer 'c' to an exact value and set suppressSplit
|
auto fc = getFDRConfirm(vl, make_small, makeConfirm);
|
||||||
if ((c_sub_msk == 0xff) && (lit.nocase)) {
|
totalConfirmSize += fc.size();
|
||||||
suppressSplit = true;
|
bc2Conf.emplace(b, move(fc));
|
||||||
c = c_sub;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!suppressSplit && splitHasCase && lit.nocase &&
|
|
||||||
ourisalpha(c)) {
|
|
||||||
vl[(u8)(mytoupper(c) & splitMask)].push_back(lit);
|
|
||||||
vl[(u8)(mytolower(c) & splitMask)].push_back(lit);
|
|
||||||
} else {
|
|
||||||
vl[c & splitMask].push_back(lit);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) {
|
|
||||||
if (vl[c].empty()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
|
|
||||||
auto key = make_pair(b, c);
|
|
||||||
auto fc = getFDRConfirm(vl[c], eng.typicallyHoldsOneCharLits(),
|
|
||||||
make_small, makeConfirm);
|
|
||||||
totalConfirmSize += fc.second;
|
|
||||||
assert(bc2Conf.find(key) == end(bc2Conf));
|
|
||||||
bc2Conf.emplace(key, move(fc));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return totalConfirmSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
pair<aligned_unique_ptr<u8>, size_t>
|
|
||||||
setupFullMultiConfs(const vector<hwlmLiteral> &lits,
|
|
||||||
const EngineDescription &eng,
|
|
||||||
map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
|
|
||||||
bool make_small) {
|
|
||||||
BC2CONF bc2Conf;
|
|
||||||
u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits,
|
|
||||||
make_small);
|
|
||||||
|
|
||||||
u32 primarySwitch = eng.getConfirmTopLevelSplit();
|
|
||||||
u32 nBuckets = eng.getNumBuckets();
|
u32 nBuckets = eng.getNumBuckets();
|
||||||
u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32);
|
u32 totalConfSwitchSize = nBuckets * sizeof(u32);
|
||||||
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
|
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
|
||||||
|
|
||||||
auto buf = aligned_zmalloc_unique<u8>(totalSize);
|
auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 16);
|
||||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||||
|
|
||||||
u32 *confBase = (u32 *)buf.get();
|
u32 *confBase = (u32 *)buf.get();
|
||||||
u8 *ptr = buf.get() + totalConfSwitchSize;
|
u8 *ptr = buf.get() + totalConfSwitchSize;
|
||||||
|
|
||||||
for (const auto &m : bc2Conf) {
|
for (const auto &m : bc2Conf) {
|
||||||
const BucketIndex &b = m.first.first;
|
const BucketIndex &idx = m.first;
|
||||||
const u8 &c = m.first.second;
|
const bytecode_ptr<FDRConfirm> &p = m.second;
|
||||||
const pair<aligned_unique_ptr<FDRConfirm>, size_t> &p = m.second;
|
|
||||||
// confirm offset is relative to the base of this structure, now
|
// confirm offset is relative to the base of this structure, now
|
||||||
u32 confirm_offset = verify_u32(ptr - buf.get());
|
u32 confirm_offset = verify_u32(ptr - buf.get());
|
||||||
memcpy(ptr, p.first.get(), p.second);
|
memcpy(ptr, p.get(), p.size());
|
||||||
ptr += p.second;
|
ptr += p.size();
|
||||||
u32 idx = c * nBuckets + b;
|
|
||||||
confBase[idx] = confirm_offset;
|
confBase[idx] = confirm_offset;
|
||||||
}
|
}
|
||||||
return {move(buf), totalSize};
|
|
||||||
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -40,8 +40,8 @@
|
|||||||
// the whole confirmation procedure
|
// the whole confirmation procedure
|
||||||
static really_inline
|
static really_inline
|
||||||
void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a,
|
void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a,
|
||||||
size_t i, u32 pullBackAmount, hwlmcb_rv_t *control,
|
size_t i, hwlmcb_rv_t *control, u32 *last_match,
|
||||||
u32 *last_match, u64a conf_key) {
|
u64a conf_key) {
|
||||||
assert(i < a->len);
|
assert(i < a->len);
|
||||||
assert(ISALIGNED(fdrc));
|
assert(ISALIGNED(fdrc));
|
||||||
|
|
||||||
@ -68,13 +68,10 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u8 *loc = buf + i - li->size + 1 - pullBackAmount;
|
const u8 *loc = buf + i - li->size + 1;
|
||||||
|
|
||||||
u8 caseless = li->flags & Caseless;
|
|
||||||
if (loc < buf) {
|
if (loc < buf) {
|
||||||
u32 full_overhang = buf - loc;
|
u32 full_overhang = buf - loc;
|
||||||
|
|
||||||
const u8 *history = a->buf_history;
|
|
||||||
size_t len_history = a->len_history;
|
size_t len_history = a->len_history;
|
||||||
|
|
||||||
// can't do a vectored confirm either if we don't have
|
// can't do a vectored confirm either if we don't have
|
||||||
@ -82,44 +79,15 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
|
|||||||
if (full_overhang > len_history) {
|
if (full_overhang > len_history) {
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
// as for the regular case, no need to do a full confirm if
|
|
||||||
// we're a short literal
|
|
||||||
if (unlikely(li->size > sizeof(CONF_TYPE))) {
|
|
||||||
const u8 *s1 = (const u8 *)li + sizeof(*li);
|
|
||||||
const u8 *s2 = s1 + full_overhang;
|
|
||||||
const u8 *loc1 = history + len_history - full_overhang;
|
|
||||||
const u8 *loc2 = buf;
|
|
||||||
size_t size1 = MIN(full_overhang, li->size - sizeof(CONF_TYPE));
|
|
||||||
size_t wind_size2_back = sizeof(CONF_TYPE) + full_overhang;
|
|
||||||
size_t size2 = wind_size2_back > li->size ?
|
|
||||||
0 : li->size - wind_size2_back;
|
|
||||||
|
|
||||||
if (cmpForward(loc1, s1, size1, caseless)) {
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
if (cmpForward(loc2, s2, size2, caseless)) {
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else { // NON-VECTORING PATH
|
|
||||||
|
|
||||||
// if string < conf_type we don't need regular string cmp
|
|
||||||
if (unlikely(li->size > sizeof(CONF_TYPE))) {
|
|
||||||
const u8 *s = (const u8 *)li + sizeof(*li);
|
|
||||||
if (cmpForward(loc, s, li->size - sizeof(CONF_TYPE),
|
|
||||||
caseless)) {
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
assert(li->size <= sizeof(CONF_TYPE));
|
||||||
|
|
||||||
if (unlikely(!(li->groups & *control))) {
|
if (unlikely(!(li->groups & *control))) {
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(li->flags & ComplexConfirm)) {
|
if (unlikely(li->flags & ComplexConfirm)) {
|
||||||
const u8 *loc2 = buf + i - li->extended_size + 1 - pullBackAmount;
|
const u8 *loc2 = buf + i - li->extended_size + 1;
|
||||||
if (loc2 < buf) {
|
if (loc2 < buf) {
|
||||||
u32 full_overhang = buf - loc2;
|
u32 full_overhang = buf - loc2;
|
||||||
size_t len_history = a->len_history;
|
size_t len_history = a->len_history;
|
||||||
@ -133,7 +101,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
|
|||||||
*control = a->cb(loc - buf, i, li->id, a->ctxt);
|
*control = a->cb(loc - buf, i, li->id, a->ctxt);
|
||||||
out:
|
out:
|
||||||
oldNext = li->next; // oldNext is either 0 or an 'adjust' value
|
oldNext = li->next; // oldNext is either 0 or an 'adjust' value
|
||||||
li = (const struct LitInfo *)((const u8 *)li + oldNext + li->size);
|
li++;
|
||||||
} while (oldNext);
|
} while (oldNext);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -148,7 +116,7 @@ void confWithBit1(const struct FDRConfirm *fdrc,
|
|||||||
assert(ISALIGNED(fdrc));
|
assert(ISALIGNED(fdrc));
|
||||||
|
|
||||||
if (unlikely(fdrc->mult)) {
|
if (unlikely(fdrc->mult)) {
|
||||||
confWithBit(fdrc, a, i, 0, control, last_match, conf_key);
|
confWithBit(fdrc, a, i, control, last_match, conf_key);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
u32 id = fdrc->nBitsOrSoleID;
|
u32 id = fdrc->nBitsOrSoleID;
|
||||||
@ -176,7 +144,7 @@ void confWithBitMany(const struct FDRConfirm *fdrc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(fdrc->mult)) {
|
if (unlikely(fdrc->mult)) {
|
||||||
confWithBit(fdrc, a, i, 0, control, last_match, conf_key);
|
confWithBit(fdrc, a, i, control, last_match, conf_key);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
const u32 id = fdrc->nBitsOrSoleID;
|
const u32 id = fdrc->nBitsOrSoleID;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -44,8 +44,7 @@ namespace ue2 {
|
|||||||
|
|
||||||
FDREngineDescription::FDREngineDescription(const FDREngineDef &def)
|
FDREngineDescription::FDREngineDescription(const FDREngineDef &def)
|
||||||
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
|
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
|
||||||
def.numBuckets, def.confirmPullBackDistance,
|
def.numBuckets),
|
||||||
def.confirmTopLevelSplit),
|
|
||||||
schemeWidth(def.schemeWidth), stride(0), bits(0) {}
|
schemeWidth(def.schemeWidth), stride(0), bits(0) {}
|
||||||
|
|
||||||
u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
|
u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
|
||||||
@ -55,7 +54,7 @@ u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void getFdrDescriptions(vector<FDREngineDescription> *out) {
|
void getFdrDescriptions(vector<FDREngineDescription> *out) {
|
||||||
static const FDREngineDef def = {0, 128, 8, 0, 1, 256};
|
static const FDREngineDef def = {0, 64, 8, 0};
|
||||||
out->clear();
|
out->clear();
|
||||||
out->emplace_back(def);
|
out->emplace_back(def);
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -43,8 +43,6 @@ struct FDREngineDef {
|
|||||||
u32 schemeWidth;
|
u32 schemeWidth;
|
||||||
u32 numBuckets;
|
u32 numBuckets;
|
||||||
u64a cpu_features;
|
u64a cpu_features;
|
||||||
u32 confirmPullBackDistance;
|
|
||||||
u32 confirmTopLevelSplit;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class FDREngineDescription : public EngineDescription {
|
class FDREngineDescription : public EngineDescription {
|
||||||
@ -64,7 +62,6 @@ public:
|
|||||||
explicit FDREngineDescription(const FDREngineDef &def);
|
explicit FDREngineDescription(const FDREngineDef &def);
|
||||||
|
|
||||||
u32 getDefaultFloodSuffixLength() const override;
|
u32 getDefaultFloodSuffixLength() const override;
|
||||||
bool typicallyHoldsOneCharLits() const override { return stride == 1; }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unique_ptr<FDREngineDescription>
|
std::unique_ptr<FDREngineDescription>
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -71,11 +71,6 @@ struct FDR {
|
|||||||
u32 maxStringLen;
|
u32 maxStringLen;
|
||||||
u32 floodOffset;
|
u32 floodOffset;
|
||||||
|
|
||||||
/** link is the relative offset of a secondary included FDR table for
|
|
||||||
* stream handling if we're a primary FDR table or the subsidiary tertiary
|
|
||||||
* structures (spillover strings and hash table) if we're a secondary
|
|
||||||
* structure. */
|
|
||||||
u32 link;
|
|
||||||
u8 stride; /* stride - how frequeuntly the data is consulted by the first
|
u8 stride; /* stride - how frequeuntly the data is consulted by the first
|
||||||
* stage matcher */
|
* stage matcher */
|
||||||
u8 domain; /* number of bits used to index into main FDR table. This value
|
u8 domain; /* number of bits used to index into main FDR table. This value
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -30,6 +30,7 @@
|
|||||||
#include "fdr_confirm.h"
|
#include "fdr_confirm.h"
|
||||||
#include "fdr_compile_internal.h"
|
#include "fdr_compile_internal.h"
|
||||||
#include "fdr_engine_description.h"
|
#include "fdr_engine_description.h"
|
||||||
|
#include "grey.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/alloc.h"
|
#include "util/alloc.h"
|
||||||
#include "util/bitutils.h"
|
#include "util/bitutils.h"
|
||||||
@ -90,9 +91,9 @@ void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pair<aligned_unique_ptr<u8>, size_t>
|
bytecode_ptr<u8> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
||||||
setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
const EngineDescription &eng,
|
||||||
const EngineDescription &eng) {
|
const Grey &grey) {
|
||||||
vector<FDRFlood> tmpFlood(N_CHARS);
|
vector<FDRFlood> tmpFlood(N_CHARS);
|
||||||
u32 default_suffix = eng.getDefaultFloodSuffixLength();
|
u32 default_suffix = eng.getDefaultFloodSuffixLength();
|
||||||
|
|
||||||
@ -187,6 +188,14 @@ setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// If flood detection has been switched off in the grey box, we comply by
|
||||||
|
// setting idCount too high for all floods.
|
||||||
|
if (!grey.fdrAllowFlood) {
|
||||||
|
for (auto &fl : tmpFlood) {
|
||||||
|
fl.idCount = FDR_FLOOD_MAX_IDS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
map<FDRFlood, CharReach, FloodComparator> flood2chars;
|
map<FDRFlood, CharReach, FloodComparator> flood2chars;
|
||||||
for (u32 i = 0; i < N_CHARS; i++) {
|
for (u32 i = 0; i < N_CHARS; i++) {
|
||||||
FDRFlood fl = tmpFlood[i];
|
FDRFlood fl = tmpFlood[i];
|
||||||
@ -198,7 +207,7 @@ setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
|||||||
size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
|
size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
|
||||||
size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
|
size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
|
||||||
|
|
||||||
auto buf = aligned_zmalloc_unique<u8>(totalSize);
|
auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 16);
|
||||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||||
|
|
||||||
u32 *floodHeader = (u32 *)buf.get();
|
u32 *floodHeader = (u32 *)buf.get();
|
||||||
@ -218,7 +227,7 @@ setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
|||||||
DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
|
DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
|
||||||
floodHeaderSize, floodStructSize, totalSize);
|
floodHeaderSize, floodStructSize, totalSize);
|
||||||
|
|
||||||
return {move(buf), totalSize};
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -100,7 +100,7 @@ const u8 * floodDetect(const struct FDR * fdr,
|
|||||||
// tryFloodDetect is never put in places where unconditional
|
// tryFloodDetect is never put in places where unconditional
|
||||||
// reads a short distance forward or backward here
|
// reads a short distance forward or backward here
|
||||||
// TODO: rationale for this line needs to be rediscovered!!
|
// TODO: rationale for this line needs to be rediscovered!!
|
||||||
size_t mainLoopLen = len > iterBytes ? len - iterBytes : 0;
|
size_t mainLoopLen = len > 2 * iterBytes ? len - 2 * iterBytes : 0;
|
||||||
const u32 i = ptr - buf;
|
const u32 i = ptr - buf;
|
||||||
u32 j = i;
|
u32 j = i;
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -129,7 +129,8 @@ m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) {
|
|||||||
m128 mask = set16x8(0xf);
|
m128 mask = set16x8(0xf);
|
||||||
m128 lo = and128(val, mask);
|
m128 lo = and128(val, mask);
|
||||||
m128 hi = and128(rshift64_m128(val, 4), mask);
|
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||||
return and128(pshufb(maskBase[0*2], lo), pshufb(maskBase[0*2+1], hi));
|
return and128(pshufb_m128(maskBase[0 * 2], lo),
|
||||||
|
pshufb_m128(maskBase[0 * 2 + 1], hi));
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
@ -139,8 +140,8 @@ m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) {
|
|||||||
m128 hi = and128(rshift64_m128(val, 4), mask);
|
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||||
m128 r = prep_conf_teddy_m1(maskBase, val);
|
m128 r = prep_conf_teddy_m1(maskBase, val);
|
||||||
|
|
||||||
m128 res_1 = and128(pshufb(maskBase[1*2], lo),
|
m128 res_1 = and128(pshufb_m128(maskBase[1*2], lo),
|
||||||
pshufb(maskBase[1*2+1], hi));
|
pshufb_m128(maskBase[1*2+1], hi));
|
||||||
m128 res_shifted_1 = palignr(res_1, *old_1, 16-1);
|
m128 res_shifted_1 = palignr(res_1, *old_1, 16-1);
|
||||||
*old_1 = res_1;
|
*old_1 = res_1;
|
||||||
return and128(r, res_shifted_1);
|
return and128(r, res_shifted_1);
|
||||||
@ -154,8 +155,8 @@ m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
|||||||
m128 hi = and128(rshift64_m128(val, 4), mask);
|
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||||
m128 r = prep_conf_teddy_m2(maskBase, old_1, val);
|
m128 r = prep_conf_teddy_m2(maskBase, old_1, val);
|
||||||
|
|
||||||
m128 res_2 = and128(pshufb(maskBase[2*2], lo),
|
m128 res_2 = and128(pshufb_m128(maskBase[2*2], lo),
|
||||||
pshufb(maskBase[2*2+1], hi));
|
pshufb_m128(maskBase[2*2+1], hi));
|
||||||
m128 res_shifted_2 = palignr(res_2, *old_2, 16-2);
|
m128 res_shifted_2 = palignr(res_2, *old_2, 16-2);
|
||||||
*old_2 = res_2;
|
*old_2 = res_2;
|
||||||
return and128(r, res_shifted_2);
|
return and128(r, res_shifted_2);
|
||||||
@ -169,8 +170,8 @@ m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
|||||||
m128 hi = and128(rshift64_m128(val, 4), mask);
|
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||||
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val);
|
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val);
|
||||||
|
|
||||||
m128 res_3 = and128(pshufb(maskBase[3*2], lo),
|
m128 res_3 = and128(pshufb_m128(maskBase[3*2], lo),
|
||||||
pshufb(maskBase[3*2+1], hi));
|
pshufb_m128(maskBase[3*2+1], hi));
|
||||||
m128 res_shifted_3 = palignr(res_3, *old_3, 16-3);
|
m128 res_shifted_3 = palignr(res_3, *old_3, 16-3);
|
||||||
*old_3 = res_3;
|
*old_3 = res_3;
|
||||||
return and128(r, res_shifted_3);
|
return and128(r, res_shifted_3);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -34,6 +34,7 @@
|
|||||||
#define TEDDY_H_
|
#define TEDDY_H_
|
||||||
|
|
||||||
#include "hwlm/hwlm.h" // for hwlm_group_t
|
#include "hwlm/hwlm.h" // for hwlm_group_t
|
||||||
|
#include "util/arch.h"
|
||||||
|
|
||||||
struct FDR; // forward declaration from fdr_internal.h
|
struct FDR; // forward declaration from fdr_internal.h
|
||||||
struct FDR_Runtime_Args;
|
struct FDR_Runtime_Args;
|
||||||
@ -70,7 +71,7 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
|||||||
const struct FDR_Runtime_Args *a,
|
const struct FDR_Runtime_Args *a,
|
||||||
hwlm_group_t control);
|
hwlm_group_t control);
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(HAVE_AVX2)
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a,
|
const struct FDR_Runtime_Args *a,
|
||||||
@ -104,15 +105,6 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
|||||||
const struct FDR_Runtime_Args *a,
|
const struct FDR_Runtime_Args *a,
|
||||||
hwlm_group_t control);
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
#endif /* HAVE_AVX2 */
|
||||||
const struct FDR_Runtime_Args *a,
|
|
||||||
hwlm_group_t control);
|
|
||||||
|
|
||||||
hwlm_error_t
|
|
||||||
fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
|
||||||
const struct FDR_Runtime_Args *a,
|
|
||||||
hwlm_group_t control);
|
|
||||||
|
|
||||||
#endif /* __AVX2__ */
|
|
||||||
|
|
||||||
#endif /* TEDDY_H_ */
|
#endif /* TEDDY_H_ */
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -35,78 +35,10 @@
|
|||||||
#include "teddy.h"
|
#include "teddy.h"
|
||||||
#include "teddy_internal.h"
|
#include "teddy_internal.h"
|
||||||
#include "teddy_runtime_common.h"
|
#include "teddy_runtime_common.h"
|
||||||
|
#include "util/arch.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(HAVE_AVX2)
|
||||||
|
|
||||||
static const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = {
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifdef ARCH_64_BIT
|
#ifdef ARCH_64_BIT
|
||||||
#define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \
|
#define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \
|
||||||
@ -199,22 +131,6 @@ do { \
|
|||||||
} while (0);
|
} while (0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define CONFIRM_FAST_TEDDY(var, offset, reason, conf_fn) \
|
|
||||||
do { \
|
|
||||||
if (unlikely(isnonzero256(var))) { \
|
|
||||||
u32 arrCnt = 0; \
|
|
||||||
m128 lo = cast256to128(var); \
|
|
||||||
m128 hi = movdq_hi(var); \
|
|
||||||
bit_array_fast_teddy(lo, bitArr, &arrCnt, offset); \
|
|
||||||
bit_array_fast_teddy(hi, bitArr, &arrCnt, offset + 2); \
|
|
||||||
for (u32 i = 0; i < arrCnt; i++) { \
|
|
||||||
conf_fn(bitArr[i], confBase, reason, a, ptr, &control, \
|
|
||||||
&last_match); \
|
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} while (0);
|
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||||
const u8 *buf_history, size_t len_history,
|
const u8 *buf_history, size_t len_history,
|
||||||
@ -226,193 +142,13 @@ m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* \brief Copy a block of [0,31] bytes efficiently.
|
|
||||||
*
|
|
||||||
* This function is a workaround intended to stop some compilers from
|
|
||||||
* synthesizing a memcpy function call out of the copy of a small number of
|
|
||||||
* bytes that we do in vectoredLoad128.
|
|
||||||
*/
|
|
||||||
static really_inline
|
|
||||||
void copyRuntBlock256(u8 *dst, const u8 *src, size_t len) {
|
|
||||||
switch (len) {
|
|
||||||
case 0:
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
*dst = *src;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
unaligned_store_u16(dst, unaligned_load_u16(src));
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
unaligned_store_u16(dst, unaligned_load_u16(src));
|
|
||||||
dst[2] = src[2];
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
unaligned_store_u32(dst, unaligned_load_u32(src));
|
|
||||||
break;
|
|
||||||
case 5:
|
|
||||||
case 6:
|
|
||||||
case 7:
|
|
||||||
/* Perform copy with two overlapping 4-byte chunks. */
|
|
||||||
unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4));
|
|
||||||
unaligned_store_u32(dst, unaligned_load_u32(src));
|
|
||||||
break;
|
|
||||||
case 8:
|
|
||||||
unaligned_store_u64a(dst, unaligned_load_u64a(src));
|
|
||||||
break;
|
|
||||||
case 9:
|
|
||||||
case 10:
|
|
||||||
case 11:
|
|
||||||
case 12:
|
|
||||||
case 13:
|
|
||||||
case 14:
|
|
||||||
case 15:
|
|
||||||
/* Perform copy with two overlapping 8-byte chunks. */
|
|
||||||
unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8));
|
|
||||||
unaligned_store_u64a(dst, unaligned_load_u64a(src));
|
|
||||||
break;
|
|
||||||
case 16:
|
|
||||||
storeu128(dst, loadu128(src));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
/* Perform copy with two overlapping 16-byte chunks. */
|
|
||||||
assert(len < 32);
|
|
||||||
storeu128(dst + len - 16, loadu128(src + len - 16));
|
|
||||||
storeu128(dst, loadu128(src));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
|
||||||
const u8 *buf_history, size_t len_history) {
|
|
||||||
union {
|
|
||||||
u8 val8[32];
|
|
||||||
m256 val256;
|
|
||||||
} u;
|
|
||||||
|
|
||||||
uintptr_t copy_start;
|
|
||||||
uintptr_t copy_len;
|
|
||||||
|
|
||||||
if (ptr >= lo) {
|
|
||||||
uintptr_t avail = (uintptr_t)(hi - ptr);
|
|
||||||
if (avail >= 32) {
|
|
||||||
*p_mask = load256(p_mask_arr256[32] + 32);
|
|
||||||
return loadu256(ptr);
|
|
||||||
}
|
|
||||||
*p_mask = load256(p_mask_arr256[avail] + 32);
|
|
||||||
copy_start = 0;
|
|
||||||
copy_len = avail;
|
|
||||||
} else {
|
|
||||||
// need contains "how many chars to pull from history"
|
|
||||||
// calculate based on what we need, what we have in the buffer
|
|
||||||
// and only what we need to make primary confirm work
|
|
||||||
uintptr_t start = (uintptr_t)(lo - ptr);
|
|
||||||
uintptr_t i;
|
|
||||||
for (i = start; ptr + i < lo; i++) {
|
|
||||||
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
|
|
||||||
}
|
|
||||||
uintptr_t end = MIN(32, (uintptr_t)(hi - ptr));
|
|
||||||
*p_mask = loadu256(p_mask_arr256[end - start] + 32 - start);
|
|
||||||
copy_start = i;
|
|
||||||
copy_len = end - i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Runt block from the buffer.
|
|
||||||
copyRuntBlock256(&u.val8[copy_start], &ptr[copy_start], copy_len);
|
|
||||||
|
|
||||||
return u.val256;
|
|
||||||
}
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
void do_confWithBit1_fast_teddy(u16 bits, const u32 *confBase,
|
|
||||||
CautionReason reason,
|
|
||||||
const struct FDR_Runtime_Args *a,
|
|
||||||
const u8 *ptr, hwlmcb_rv_t *control,
|
|
||||||
u32 *last_match) {
|
|
||||||
u32 byte = bits / 8;
|
|
||||||
u32 cf = confBase[bits % 8];
|
|
||||||
const struct FDRConfirm *fdrc = (const struct FDRConfirm *)
|
|
||||||
((const u8 *)confBase + cf);
|
|
||||||
u64a confVal = getConfVal(a, ptr, byte, reason);
|
|
||||||
confWithBit1(fdrc, a, ptr - a->buf + byte, control, last_match, confVal);
|
|
||||||
}
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
void do_confWithBit_fast_teddy(u16 bits, const u32 *confBase,
|
|
||||||
CautionReason reason,
|
|
||||||
const struct FDR_Runtime_Args *a, const u8 *ptr,
|
|
||||||
hwlmcb_rv_t *control, u32 *last_match) {
|
|
||||||
u32 byte = bits / 8;
|
|
||||||
u32 bitRem = bits % 8;
|
|
||||||
u32 confSplit = *(ptr+byte) & 0x1f;
|
|
||||||
u32 idx = confSplit * 8 + bitRem;
|
|
||||||
u32 cf = confBase[idx];
|
|
||||||
if (!cf) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const struct FDRConfirm *fdrc = (const struct FDRConfirm *)
|
|
||||||
((const u8 *)confBase + cf);
|
|
||||||
if (!(fdrc->groups & *control)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
u64a confVal = getConfVal(a, ptr, byte, reason);
|
|
||||||
confWithBit(fdrc, a, ptr - a->buf + byte, 0, control, last_match, confVal);
|
|
||||||
}
|
|
||||||
|
|
||||||
static really_inline
|
|
||||||
void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
|
|
||||||
if (unlikely(isnonzero128(var))) {
|
|
||||||
#ifdef ARCH_64_BIT
|
|
||||||
u64a part_0 = movq(var);
|
|
||||||
while (unlikely(part_0)) {
|
|
||||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_0) +
|
|
||||||
64 * (offset);
|
|
||||||
*arrCnt += 1;
|
|
||||||
}
|
|
||||||
u64a part_1 = movq(rshiftbyte_m128(var, 8));
|
|
||||||
while (unlikely(part_1)) {
|
|
||||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
|
|
||||||
64 * (offset + 1);
|
|
||||||
*arrCnt += 1;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
u32 part_0 = movd(var);
|
|
||||||
while (unlikely(part_0)) {
|
|
||||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_0) +
|
|
||||||
32 * (offset * 2);
|
|
||||||
*arrCnt += 1;
|
|
||||||
}
|
|
||||||
u32 part_1 = movd(rshiftbyte_m128(var, 4));
|
|
||||||
while (unlikely(part_1)) {
|
|
||||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
|
|
||||||
32 * (offset * 2 + 1);
|
|
||||||
*arrCnt += 1;
|
|
||||||
}
|
|
||||||
u32 part_2 = movd(rshiftbyte_m128(var, 8));
|
|
||||||
while (unlikely(part_2)) {
|
|
||||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_2) +
|
|
||||||
32 * (offset * 2 + 2);
|
|
||||||
*arrCnt += 1;
|
|
||||||
}
|
|
||||||
u32 part_3 = movd(rshiftbyte_m128(var, 12));
|
|
||||||
while (unlikely(part_3)) {
|
|
||||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_3) +
|
|
||||||
32 * (offset * 2 + 3);
|
|
||||||
*arrCnt += 1;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 val) {
|
m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 val) {
|
||||||
m256 mask = set32x8(0xf);
|
m256 mask = set32x8(0xf);
|
||||||
m256 lo = and256(val, mask);
|
m256 lo = and256(val, mask);
|
||||||
m256 hi = and256(rshift64_m256(val, 4), mask);
|
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||||
return and256(vpshufb(maskBase[0*2], lo),
|
return and256(pshufb_m256(maskBase[0*2], lo),
|
||||||
vpshufb(maskBase[0*2+1], hi));
|
pshufb_m256(maskBase[0*2+1], hi));
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
@ -422,8 +158,8 @@ m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 val) {
|
|||||||
m256 hi = and256(rshift64_m256(val, 4), mask);
|
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||||
m256 r = prep_conf_fat_teddy_m1(maskBase, val);
|
m256 r = prep_conf_fat_teddy_m1(maskBase, val);
|
||||||
|
|
||||||
m256 res_1 = and256(vpshufb(maskBase[1*2], lo),
|
m256 res_1 = and256(pshufb_m256(maskBase[1*2], lo),
|
||||||
vpshufb(maskBase[1*2+1], hi));
|
pshufb_m256(maskBase[1*2+1], hi));
|
||||||
m256 res_shifted_1 = vpalignr(res_1, *old_1, 16-1);
|
m256 res_shifted_1 = vpalignr(res_1, *old_1, 16-1);
|
||||||
*old_1 = res_1;
|
*old_1 = res_1;
|
||||||
return and256(r, res_shifted_1);
|
return and256(r, res_shifted_1);
|
||||||
@ -437,8 +173,8 @@ m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2,
|
|||||||
m256 hi = and256(rshift64_m256(val, 4), mask);
|
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||||
m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, val);
|
m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, val);
|
||||||
|
|
||||||
m256 res_2 = and256(vpshufb(maskBase[2*2], lo),
|
m256 res_2 = and256(pshufb_m256(maskBase[2*2], lo),
|
||||||
vpshufb(maskBase[2*2+1], hi));
|
pshufb_m256(maskBase[2*2+1], hi));
|
||||||
m256 res_shifted_2 = vpalignr(res_2, *old_2, 16-2);
|
m256 res_shifted_2 = vpalignr(res_2, *old_2, 16-2);
|
||||||
*old_2 = res_2;
|
*old_2 = res_2;
|
||||||
return and256(r, res_shifted_2);
|
return and256(r, res_shifted_2);
|
||||||
@ -452,20 +188,13 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2,
|
|||||||
m256 hi = and256(rshift64_m256(val, 4), mask);
|
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||||
m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, val);
|
m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, val);
|
||||||
|
|
||||||
m256 res_3 = and256(vpshufb(maskBase[3*2], lo),
|
m256 res_3 = and256(pshufb_m256(maskBase[3*2], lo),
|
||||||
vpshufb(maskBase[3*2+1], hi));
|
pshufb_m256(maskBase[3*2+1], hi));
|
||||||
m256 res_shifted_3 = vpalignr(res_3, *old_3, 16-3);
|
m256 res_shifted_3 = vpalignr(res_3, *old_3, 16-3);
|
||||||
*old_3 = res_3;
|
*old_3 = res_3;
|
||||||
return and256(r, res_shifted_3);
|
return and256(r, res_shifted_3);
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
|
||||||
m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi) {
|
|
||||||
m256 lo = and256(val, mask);
|
|
||||||
m256 hi = and256(rshift64_m256(val, 4), mask);
|
|
||||||
return and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi));
|
|
||||||
}
|
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const m256 * getMaskBase_avx2(const struct Teddy *teddy) {
|
const m256 * getMaskBase_avx2(const struct Teddy *teddy) {
|
||||||
return (const m256 *)((const u8 *)teddy + sizeof(struct Teddy));
|
return (const m256 *)((const u8 *)teddy + sizeof(struct Teddy));
|
||||||
@ -959,136 +688,4 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
|||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
#endif // HAVE_AVX2
|
||||||
const struct FDR_Runtime_Args *a,
|
|
||||||
hwlm_group_t control) {
|
|
||||||
const u8 *buf_end = a->buf + a->len;
|
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
|
||||||
u32 last_match = (u32)-1;
|
|
||||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
|
||||||
const size_t iterBytes = 64;
|
|
||||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
|
||||||
a->buf, a->len, a->start_offset);
|
|
||||||
|
|
||||||
const m128 *maskBase = getMaskBase(teddy);
|
|
||||||
const u32 *confBase = getConfBase(teddy, 1);
|
|
||||||
|
|
||||||
const m256 maskLo = set2x128(maskBase[0]);
|
|
||||||
const m256 maskHi = set2x128(maskBase[1]);
|
|
||||||
const m256 mask = set32x8(0xf);
|
|
||||||
u16 bitArr[512];
|
|
||||||
|
|
||||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 32);
|
|
||||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
|
||||||
if (ptr < mainStart) {
|
|
||||||
ptr = mainStart - 32;
|
|
||||||
m256 p_mask;
|
|
||||||
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
|
||||||
buf_end, a->buf_history, a->len_history);
|
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
|
||||||
res_0 = and256(res_0, p_mask);
|
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
|
|
||||||
ptr += 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ptr + 32 < buf_end) {
|
|
||||||
m256 val_0 = load256(ptr + 0);
|
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
|
|
||||||
ptr += 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
|
||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
|
||||||
CHECK_FLOOD;
|
|
||||||
|
|
||||||
m256 val_0 = load256(ptr + 0);
|
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit1_fast_teddy);
|
|
||||||
|
|
||||||
m256 val_1 = load256(ptr + 32);
|
|
||||||
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi);
|
|
||||||
CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit1_fast_teddy);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (; ptr < buf_end; ptr += 32) {
|
|
||||||
m256 p_mask;
|
|
||||||
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
|
||||||
buf_end, a->buf_history, a->len_history);
|
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
|
||||||
res_0 = and256(res_0, p_mask);
|
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
|
|
||||||
}
|
|
||||||
|
|
||||||
return HWLM_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
|
||||||
const struct FDR_Runtime_Args *a,
|
|
||||||
hwlm_group_t control) {
|
|
||||||
const u8 *buf_end = a->buf + a->len;
|
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
|
||||||
u32 last_match = (u32)-1;
|
|
||||||
const struct Teddy *teddy = (const struct Teddy *)fdr;
|
|
||||||
const size_t iterBytes = 64;
|
|
||||||
DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",
|
|
||||||
a->buf, a->len, a->start_offset);
|
|
||||||
|
|
||||||
const m128 *maskBase = getMaskBase(teddy);
|
|
||||||
const u32 *confBase = getConfBase(teddy, 1);
|
|
||||||
|
|
||||||
const m256 maskLo = set2x128(maskBase[0]);
|
|
||||||
const m256 maskHi = set2x128(maskBase[1]);
|
|
||||||
const m256 mask = set32x8(0xf);
|
|
||||||
u16 bitArr[512];
|
|
||||||
|
|
||||||
const u8 *mainStart = ROUNDUP_PTR(ptr, 32);
|
|
||||||
DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);
|
|
||||||
if (ptr < mainStart) {
|
|
||||||
ptr = mainStart - 32;
|
|
||||||
m256 p_mask;
|
|
||||||
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
|
||||||
buf_end, a->buf_history, a->len_history);
|
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
|
||||||
res_0 = and256(res_0, p_mask);
|
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
|
|
||||||
ptr += 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ptr + 32 < buf_end) {
|
|
||||||
m256 val_0 = load256(ptr + 0);
|
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
|
|
||||||
ptr += 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
|
||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
|
||||||
CHECK_FLOOD;
|
|
||||||
|
|
||||||
m256 val_0 = load256(ptr + 0);
|
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit_fast_teddy);
|
|
||||||
|
|
||||||
m256 val_1 = load256(ptr + 32);
|
|
||||||
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi);
|
|
||||||
CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit_fast_teddy);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (; ptr < buf_end; ptr += 32) {
|
|
||||||
m256 p_mask;
|
|
||||||
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
|
||||||
buf_end, a->buf_history, a->len_history);
|
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
|
||||||
res_0 = and256(res_0, p_mask);
|
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
|
|
||||||
}
|
|
||||||
|
|
||||||
return HWLM_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // __AVX2__
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -26,22 +26,29 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \file
|
||||||
|
* \brief FDR literal matcher: Teddy build code.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "teddy_compile.h"
|
||||||
|
|
||||||
#include "fdr.h"
|
#include "fdr.h"
|
||||||
#include "fdr_internal.h"
|
#include "fdr_internal.h"
|
||||||
#include "fdr_compile_internal.h"
|
#include "fdr_compile_internal.h"
|
||||||
#include "fdr_confirm.h"
|
#include "fdr_confirm.h"
|
||||||
#include "fdr_engine_description.h"
|
#include "fdr_engine_description.h"
|
||||||
|
#include "teddy_internal.h"
|
||||||
|
#include "teddy_engine_description.h"
|
||||||
|
#include "grey.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/alloc.h"
|
#include "util/alloc.h"
|
||||||
#include "util/compare.h"
|
#include "util/compare.h"
|
||||||
|
#include "util/noncopyable.h"
|
||||||
#include "util/popcount.h"
|
#include "util/popcount.h"
|
||||||
#include "util/target_info.h"
|
#include "util/target_info.h"
|
||||||
#include "util/verify_types.h"
|
#include "util/verify_types.h"
|
||||||
|
|
||||||
#include "teddy_compile.h"
|
|
||||||
#include "teddy_internal.h"
|
|
||||||
#include "teddy_engine_description.h"
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
@ -54,8 +61,6 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <boost/core/noncopyable.hpp>
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
@ -64,17 +69,20 @@ namespace {
|
|||||||
|
|
||||||
//#define TEDDY_DEBUG
|
//#define TEDDY_DEBUG
|
||||||
|
|
||||||
class TeddyCompiler : boost::noncopyable {
|
class TeddyCompiler : noncopyable {
|
||||||
const TeddyEngineDescription ŋ
|
const TeddyEngineDescription ŋ
|
||||||
|
const Grey &grey;
|
||||||
const vector<hwlmLiteral> &lits;
|
const vector<hwlmLiteral> &lits;
|
||||||
bool make_small;
|
bool make_small;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
TeddyCompiler(const vector<hwlmLiteral> &lits_in,
|
TeddyCompiler(const vector<hwlmLiteral> &lits_in,
|
||||||
const TeddyEngineDescription &eng_in, bool make_small_in)
|
const TeddyEngineDescription &eng_in, bool make_small_in,
|
||||||
: eng(eng_in), lits(lits_in), make_small(make_small_in) {}
|
const Grey &grey_in)
|
||||||
|
: eng(eng_in), grey(grey_in), lits(lits_in), make_small(make_small_in) {
|
||||||
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<FDR> build(pair<aligned_unique_ptr<u8>, size_t> &link);
|
bytecode_ptr<FDR> build();
|
||||||
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
|
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -274,8 +282,7 @@ bool TeddyCompiler::pack(map<BucketIndex,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<FDR>
|
bytecode_ptr<FDR> TeddyCompiler::build() {
|
||||||
TeddyCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
|
||||||
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
|
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
|
||||||
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
|
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -308,16 +315,16 @@ TeddyCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
|||||||
|
|
||||||
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
|
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
|
||||||
|
|
||||||
auto floodControlTmp = setupFDRFloodControl(lits, eng);
|
auto floodControlTmp = setupFDRFloodControl(lits, eng, grey);
|
||||||
auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small);
|
||||||
|
|
||||||
size_t size = ROUNDUP_N(sizeof(Teddy) +
|
size_t size = ROUNDUP_N(sizeof(Teddy) +
|
||||||
maskLen +
|
maskLen +
|
||||||
confirmTmp.second +
|
confirmTmp.size() +
|
||||||
floodControlTmp.second +
|
floodControlTmp.size(),
|
||||||
link.second, 16 * maskWidth);
|
16 * maskWidth);
|
||||||
|
|
||||||
aligned_unique_ptr<FDR> fdr = aligned_zmalloc_unique<FDR>(size);
|
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
|
||||||
assert(fdr); // otherwise would have thrown std::bad_alloc
|
assert(fdr); // otherwise would have thrown std::bad_alloc
|
||||||
Teddy *teddy = (Teddy *)fdr.get(); // ugly
|
Teddy *teddy = (Teddy *)fdr.get(); // ugly
|
||||||
u8 *teddy_base = (u8 *)teddy;
|
u8 *teddy_base = (u8 *)teddy;
|
||||||
@ -327,19 +334,12 @@ TeddyCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
|||||||
teddy->maxStringLen = verify_u32(maxLen(lits));
|
teddy->maxStringLen = verify_u32(maxLen(lits));
|
||||||
|
|
||||||
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
|
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
|
||||||
memcpy(ptr, confirmTmp.first.get(), confirmTmp.second);
|
memcpy(ptr, confirmTmp.get(), confirmTmp.size());
|
||||||
ptr += confirmTmp.second;
|
ptr += confirmTmp.size();
|
||||||
|
|
||||||
teddy->floodOffset = verify_u32(ptr - teddy_base);
|
teddy->floodOffset = verify_u32(ptr - teddy_base);
|
||||||
memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second);
|
memcpy(ptr, floodControlTmp.get(), floodControlTmp.size());
|
||||||
ptr += floodControlTmp.second;
|
ptr += floodControlTmp.size();
|
||||||
|
|
||||||
if (link.first) {
|
|
||||||
teddy->link = verify_u32(ptr - teddy_base);
|
|
||||||
memcpy(ptr, link.first.get(), link.second);
|
|
||||||
} else {
|
|
||||||
teddy->link = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
u8 *baseMsk = teddy_base + sizeof(Teddy);
|
u8 *baseMsk = teddy_base + sizeof(Teddy);
|
||||||
|
|
||||||
@ -423,10 +423,10 @@ TeddyCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
aligned_unique_ptr<FDR>
|
bytecode_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
|
||||||
teddyBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small,
|
bool make_small, u32 hint,
|
||||||
u32 hint, const target_t &target,
|
const target_t &target,
|
||||||
pair<aligned_unique_ptr<u8>, size_t> &link) {
|
const Grey &grey) {
|
||||||
unique_ptr<TeddyEngineDescription> des;
|
unique_ptr<TeddyEngineDescription> des;
|
||||||
if (hint == HINT_INVALID) {
|
if (hint == HINT_INVALID) {
|
||||||
des = chooseTeddyEngine(target, lits);
|
des = chooseTeddyEngine(target, lits);
|
||||||
@ -436,8 +436,8 @@ teddyBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small,
|
|||||||
if (!des) {
|
if (!des) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
TeddyCompiler tc(lits, *des, make_small);
|
TeddyCompiler tc(lits, *des, make_small, grey);
|
||||||
return tc.build(link);
|
return tc.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -26,7 +26,8 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \file
|
/**
|
||||||
|
* \file
|
||||||
* \brief FDR literal matcher: Teddy build API.
|
* \brief FDR literal matcher: Teddy build API.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -34,22 +35,22 @@
|
|||||||
#define TEDDY_COMPILE_H
|
#define TEDDY_COMPILE_H
|
||||||
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/alloc.h"
|
#include "util/bytecode_ptr.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <utility> // std::pair
|
|
||||||
|
|
||||||
struct FDR;
|
struct FDR;
|
||||||
struct target_t;
|
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
|
struct Grey;
|
||||||
struct hwlmLiteral;
|
struct hwlmLiteral;
|
||||||
|
struct target_t;
|
||||||
|
|
||||||
ue2::aligned_unique_ptr<FDR>
|
bytecode_ptr<FDR> teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits,
|
||||||
teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
|
bool make_small, u32 hint,
|
||||||
u32 hint, const target_t &target,
|
const target_t &target,
|
||||||
std::pair<aligned_unique_ptr<u8>, size_t> &link);
|
const Grey &grey);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -44,8 +44,7 @@ namespace ue2 {
|
|||||||
|
|
||||||
TeddyEngineDescription::TeddyEngineDescription(const TeddyEngineDef &def)
|
TeddyEngineDescription::TeddyEngineDescription(const TeddyEngineDef &def)
|
||||||
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
|
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
|
||||||
def.numBuckets, def.confirmPullBackDistance,
|
def.numBuckets),
|
||||||
def.confirmTopLevelSplit),
|
|
||||||
numMasks(def.numMasks), packed(def.packed) {}
|
numMasks(def.numMasks), packed(def.packed) {}
|
||||||
|
|
||||||
u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
|
u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
|
||||||
@ -66,24 +65,22 @@ bool TeddyEngineDescription::needConfirm(const vector<hwlmLiteral> &lits) const
|
|||||||
|
|
||||||
void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {
|
void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {
|
||||||
static const TeddyEngineDef defns[] = {
|
static const TeddyEngineDef defns[] = {
|
||||||
{ 1, 0 | HS_CPU_FEATURES_AVX2, 1, 8, false, 0, 1 },
|
{ 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false },
|
||||||
{ 2, 0 | HS_CPU_FEATURES_AVX2, 1, 8, true, 0, 32 },
|
{ 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true },
|
||||||
{ 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false, 0, 1 },
|
{ 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false },
|
||||||
{ 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true, 0, 32 },
|
{ 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true },
|
||||||
{ 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false, 0, 1 },
|
{ 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false },
|
||||||
{ 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true, 0, 32 },
|
{ 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true },
|
||||||
{ 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false, 0, 1 },
|
{ 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false },
|
||||||
{ 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true, 0, 32 },
|
{ 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true },
|
||||||
{ 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false, 0, 1 },
|
{ 11, 0, 1, 8, false },
|
||||||
{ 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true, 0, 32 },
|
{ 12, 0, 1, 8, true },
|
||||||
{ 11, 0, 1, 8, false, 0, 1 },
|
{ 13, 0, 2, 8, false },
|
||||||
{ 12, 0, 1, 8, true, 0, 32 },
|
{ 14, 0, 2, 8, true },
|
||||||
{ 13, 0, 2, 8, false, 0, 1 },
|
{ 15, 0, 3, 8, false },
|
||||||
{ 14, 0, 2, 8, true, 0, 32 },
|
{ 16, 0, 3, 8, true },
|
||||||
{ 15, 0, 3, 8, false, 0, 1 },
|
{ 17, 0, 4, 8, false },
|
||||||
{ 16, 0, 3, 8, true, 0, 32 },
|
{ 18, 0, 4, 8, true },
|
||||||
{ 17, 0, 4, 8, false, 0, 1 },
|
|
||||||
{ 18, 0, 4, 8, true, 0, 32 },
|
|
||||||
};
|
};
|
||||||
out->clear();
|
out->clear();
|
||||||
for (const auto &def : defns) {
|
for (const auto &def : defns) {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -45,8 +45,6 @@ struct TeddyEngineDef {
|
|||||||
u32 numMasks;
|
u32 numMasks;
|
||||||
u32 numBuckets;
|
u32 numBuckets;
|
||||||
bool packed;
|
bool packed;
|
||||||
u32 confirmPullBackDistance;
|
|
||||||
u32 confirmTopLevelSplit;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class TeddyEngineDescription : public EngineDescription {
|
class TeddyEngineDescription : public EngineDescription {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -180,9 +180,7 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
|
|||||||
do {
|
do {
|
||||||
u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf);
|
u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf);
|
||||||
u32 byte = bit / bucket + offset;
|
u32 byte = bit / bucket + offset;
|
||||||
u32 bitRem = bit % bucket;
|
u32 idx = bit % bucket;
|
||||||
u32 confSplit = *(ptr+byte) & 0x1f;
|
|
||||||
u32 idx = confSplit * bucket + bitRem;
|
|
||||||
u32 cf = confBase[idx];
|
u32 cf = confBase[idx];
|
||||||
if (!cf) {
|
if (!cf) {
|
||||||
continue;
|
continue;
|
||||||
@ -193,7 +191,7 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
u64a confVal = getConfVal(a, ptr, byte, reason);
|
u64a confVal = getConfVal(a, ptr, byte, reason);
|
||||||
confWithBit(fdrc, a, ptr - a->buf + byte, 0, control,
|
confWithBit(fdrc, a, ptr - a->buf + byte, control,
|
||||||
last_match, confVal);
|
last_match, confVal);
|
||||||
} while (unlikely(*conf));
|
} while (unlikely(*conf));
|
||||||
}
|
}
|
||||||
|
25
src/grey.cpp
25
src/grey.cpp
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -42,6 +42,7 @@ namespace ue2 {
|
|||||||
|
|
||||||
Grey::Grey(void) :
|
Grey::Grey(void) :
|
||||||
optimiseComponentTree(true),
|
optimiseComponentTree(true),
|
||||||
|
calcComponents(true),
|
||||||
performGraphSimplification(true),
|
performGraphSimplification(true),
|
||||||
prefilterReductions(true),
|
prefilterReductions(true),
|
||||||
removeEdgeRedundancy(true),
|
removeEdgeRedundancy(true),
|
||||||
@ -54,7 +55,6 @@ Grey::Grey(void) :
|
|||||||
allowMcSheng(true),
|
allowMcSheng(true),
|
||||||
allowPuff(true),
|
allowPuff(true),
|
||||||
allowLiteral(true),
|
allowLiteral(true),
|
||||||
allowRose(true),
|
|
||||||
allowViolet(true),
|
allowViolet(true),
|
||||||
allowExtendedNFA(true), /* bounded repeats of course */
|
allowExtendedNFA(true), /* bounded repeats of course */
|
||||||
allowLimExNFA(true),
|
allowLimExNFA(true),
|
||||||
@ -62,8 +62,10 @@ Grey::Grey(void) :
|
|||||||
allowSmallLiteralSet(true),
|
allowSmallLiteralSet(true),
|
||||||
allowCastle(true),
|
allowCastle(true),
|
||||||
allowDecoratedLiteral(true),
|
allowDecoratedLiteral(true),
|
||||||
|
allowApproximateMatching(true),
|
||||||
allowNoodle(true),
|
allowNoodle(true),
|
||||||
fdrAllowTeddy(true),
|
fdrAllowTeddy(true),
|
||||||
|
fdrAllowFlood(true),
|
||||||
violetAvoidSuffixes(true),
|
violetAvoidSuffixes(true),
|
||||||
violetAvoidWeakInfixes(true),
|
violetAvoidWeakInfixes(true),
|
||||||
violetDoubleCut(true),
|
violetDoubleCut(true),
|
||||||
@ -98,6 +100,7 @@ Grey::Grey(void) :
|
|||||||
minRoseLiteralLength(3),
|
minRoseLiteralLength(3),
|
||||||
minRoseNetflowLiteralLength(2),
|
minRoseNetflowLiteralLength(2),
|
||||||
maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */
|
maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */
|
||||||
|
maxEditDistance(16),
|
||||||
minExtBoundedRepeatSize(32),
|
minExtBoundedRepeatSize(32),
|
||||||
goughCopyPropagate(true),
|
goughCopyPropagate(true),
|
||||||
goughRegisterAllocate(true),
|
goughRegisterAllocate(true),
|
||||||
@ -105,8 +108,6 @@ Grey::Grey(void) :
|
|||||||
roseGraphReduction(true),
|
roseGraphReduction(true),
|
||||||
roseRoleAliasing(true),
|
roseRoleAliasing(true),
|
||||||
roseMasks(true),
|
roseMasks(true),
|
||||||
roseMaxBadLeafLength(5),
|
|
||||||
roseConvertInfBadLeaves(true),
|
|
||||||
roseConvertFloodProneSuffixes(true),
|
roseConvertFloodProneSuffixes(true),
|
||||||
roseMergeRosesDuringAliasing(true),
|
roseMergeRosesDuringAliasing(true),
|
||||||
roseMultiTopRoses(true),
|
roseMultiTopRoses(true),
|
||||||
@ -116,7 +117,6 @@ Grey::Grey(void) :
|
|||||||
roseMcClellanSuffix(1),
|
roseMcClellanSuffix(1),
|
||||||
roseMcClellanOutfix(2),
|
roseMcClellanOutfix(2),
|
||||||
roseTransformDelay(true),
|
roseTransformDelay(true),
|
||||||
roseDesiredSplit(4),
|
|
||||||
earlyMcClellanPrefix(true),
|
earlyMcClellanPrefix(true),
|
||||||
earlyMcClellanInfix(true),
|
earlyMcClellanInfix(true),
|
||||||
earlyMcClellanSuffix(true),
|
earlyMcClellanSuffix(true),
|
||||||
@ -157,7 +157,8 @@ Grey::Grey(void) :
|
|||||||
limitEngineSize(1073741824), // 1 GB
|
limitEngineSize(1073741824), // 1 GB
|
||||||
limitDFASize(1073741824), // 1 GB
|
limitDFASize(1073741824), // 1 GB
|
||||||
limitNFASize(1048576), // 1 MB
|
limitNFASize(1048576), // 1 MB
|
||||||
limitLBRSize(1048576) // 1 MB
|
limitLBRSize(1048576), // 1 MB
|
||||||
|
limitApproxMatchingVertices(5000)
|
||||||
{
|
{
|
||||||
assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
|
assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
|
||||||
}
|
}
|
||||||
@ -209,6 +210,7 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
G_UPDATE(optimiseComponentTree);
|
G_UPDATE(optimiseComponentTree);
|
||||||
|
G_UPDATE(calcComponents);
|
||||||
G_UPDATE(performGraphSimplification);
|
G_UPDATE(performGraphSimplification);
|
||||||
G_UPDATE(prefilterReductions);
|
G_UPDATE(prefilterReductions);
|
||||||
G_UPDATE(removeEdgeRedundancy);
|
G_UPDATE(removeEdgeRedundancy);
|
||||||
@ -221,7 +223,6 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
G_UPDATE(allowMcSheng);
|
G_UPDATE(allowMcSheng);
|
||||||
G_UPDATE(allowPuff);
|
G_UPDATE(allowPuff);
|
||||||
G_UPDATE(allowLiteral);
|
G_UPDATE(allowLiteral);
|
||||||
G_UPDATE(allowRose);
|
|
||||||
G_UPDATE(allowViolet);
|
G_UPDATE(allowViolet);
|
||||||
G_UPDATE(allowExtendedNFA);
|
G_UPDATE(allowExtendedNFA);
|
||||||
G_UPDATE(allowLimExNFA);
|
G_UPDATE(allowLimExNFA);
|
||||||
@ -230,7 +231,9 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
G_UPDATE(allowCastle);
|
G_UPDATE(allowCastle);
|
||||||
G_UPDATE(allowDecoratedLiteral);
|
G_UPDATE(allowDecoratedLiteral);
|
||||||
G_UPDATE(allowNoodle);
|
G_UPDATE(allowNoodle);
|
||||||
|
G_UPDATE(allowApproximateMatching);
|
||||||
G_UPDATE(fdrAllowTeddy);
|
G_UPDATE(fdrAllowTeddy);
|
||||||
|
G_UPDATE(fdrAllowFlood);
|
||||||
G_UPDATE(violetAvoidSuffixes);
|
G_UPDATE(violetAvoidSuffixes);
|
||||||
G_UPDATE(violetAvoidWeakInfixes);
|
G_UPDATE(violetAvoidWeakInfixes);
|
||||||
G_UPDATE(violetDoubleCut);
|
G_UPDATE(violetDoubleCut);
|
||||||
@ -265,6 +268,7 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
G_UPDATE(minRoseLiteralLength);
|
G_UPDATE(minRoseLiteralLength);
|
||||||
G_UPDATE(minRoseNetflowLiteralLength);
|
G_UPDATE(minRoseNetflowLiteralLength);
|
||||||
G_UPDATE(maxRoseNetflowEdges);
|
G_UPDATE(maxRoseNetflowEdges);
|
||||||
|
G_UPDATE(maxEditDistance);
|
||||||
G_UPDATE(minExtBoundedRepeatSize);
|
G_UPDATE(minExtBoundedRepeatSize);
|
||||||
G_UPDATE(goughCopyPropagate);
|
G_UPDATE(goughCopyPropagate);
|
||||||
G_UPDATE(goughRegisterAllocate);
|
G_UPDATE(goughRegisterAllocate);
|
||||||
@ -272,8 +276,6 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
G_UPDATE(roseGraphReduction);
|
G_UPDATE(roseGraphReduction);
|
||||||
G_UPDATE(roseRoleAliasing);
|
G_UPDATE(roseRoleAliasing);
|
||||||
G_UPDATE(roseMasks);
|
G_UPDATE(roseMasks);
|
||||||
G_UPDATE(roseMaxBadLeafLength);
|
|
||||||
G_UPDATE(roseConvertInfBadLeaves);
|
|
||||||
G_UPDATE(roseConvertFloodProneSuffixes);
|
G_UPDATE(roseConvertFloodProneSuffixes);
|
||||||
G_UPDATE(roseMergeRosesDuringAliasing);
|
G_UPDATE(roseMergeRosesDuringAliasing);
|
||||||
G_UPDATE(roseMultiTopRoses);
|
G_UPDATE(roseMultiTopRoses);
|
||||||
@ -283,7 +285,6 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
G_UPDATE(roseMcClellanSuffix);
|
G_UPDATE(roseMcClellanSuffix);
|
||||||
G_UPDATE(roseMcClellanOutfix);
|
G_UPDATE(roseMcClellanOutfix);
|
||||||
G_UPDATE(roseTransformDelay);
|
G_UPDATE(roseTransformDelay);
|
||||||
G_UPDATE(roseDesiredSplit);
|
|
||||||
G_UPDATE(earlyMcClellanPrefix);
|
G_UPDATE(earlyMcClellanPrefix);
|
||||||
G_UPDATE(earlyMcClellanInfix);
|
G_UPDATE(earlyMcClellanInfix);
|
||||||
G_UPDATE(earlyMcClellanSuffix);
|
G_UPDATE(earlyMcClellanSuffix);
|
||||||
@ -319,6 +320,7 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
G_UPDATE(limitDFASize);
|
G_UPDATE(limitDFASize);
|
||||||
G_UPDATE(limitNFASize);
|
G_UPDATE(limitNFASize);
|
||||||
G_UPDATE(limitLBRSize);
|
G_UPDATE(limitLBRSize);
|
||||||
|
G_UPDATE(limitApproxMatchingVertices);
|
||||||
|
|
||||||
#undef G_UPDATE
|
#undef G_UPDATE
|
||||||
if (key == "simple_som") {
|
if (key == "simple_som") {
|
||||||
@ -340,7 +342,6 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
g->allowMcClellan = false;
|
g->allowMcClellan = false;
|
||||||
g->allowPuff = false;
|
g->allowPuff = false;
|
||||||
g->allowLiteral = false;
|
g->allowLiteral = false;
|
||||||
g->allowRose = false;
|
|
||||||
g->allowViolet = false;
|
g->allowViolet = false;
|
||||||
g->allowSmallLiteralSet = false;
|
g->allowSmallLiteralSet = false;
|
||||||
g->roseMasks = false;
|
g->roseMasks = false;
|
||||||
@ -358,7 +359,6 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
g->allowMcClellan = true;
|
g->allowMcClellan = true;
|
||||||
g->allowPuff = false;
|
g->allowPuff = false;
|
||||||
g->allowLiteral = false;
|
g->allowLiteral = false;
|
||||||
g->allowRose = false;
|
|
||||||
g->allowViolet = false;
|
g->allowViolet = false;
|
||||||
g->allowSmallLiteralSet = false;
|
g->allowSmallLiteralSet = false;
|
||||||
g->roseMasks = false;
|
g->roseMasks = false;
|
||||||
@ -376,7 +376,6 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
g->allowMcClellan = true;
|
g->allowMcClellan = true;
|
||||||
g->allowPuff = false;
|
g->allowPuff = false;
|
||||||
g->allowLiteral = false;
|
g->allowLiteral = false;
|
||||||
g->allowRose = false;
|
|
||||||
g->allowViolet = false;
|
g->allowViolet = false;
|
||||||
g->allowSmallLiteralSet = false;
|
g->allowSmallLiteralSet = false;
|
||||||
g->roseMasks = false;
|
g->roseMasks = false;
|
||||||
|
13
src/grey.h
13
src/grey.h
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -41,6 +41,7 @@ struct Grey {
|
|||||||
|
|
||||||
bool optimiseComponentTree;
|
bool optimiseComponentTree;
|
||||||
|
|
||||||
|
bool calcComponents;
|
||||||
bool performGraphSimplification;
|
bool performGraphSimplification;
|
||||||
bool prefilterReductions;
|
bool prefilterReductions;
|
||||||
bool removeEdgeRedundancy;
|
bool removeEdgeRedundancy;
|
||||||
@ -54,7 +55,6 @@ struct Grey {
|
|||||||
bool allowMcSheng;
|
bool allowMcSheng;
|
||||||
bool allowPuff;
|
bool allowPuff;
|
||||||
bool allowLiteral;
|
bool allowLiteral;
|
||||||
bool allowRose;
|
|
||||||
bool allowViolet;
|
bool allowViolet;
|
||||||
bool allowExtendedNFA;
|
bool allowExtendedNFA;
|
||||||
bool allowLimExNFA;
|
bool allowLimExNFA;
|
||||||
@ -62,9 +62,11 @@ struct Grey {
|
|||||||
bool allowSmallLiteralSet;
|
bool allowSmallLiteralSet;
|
||||||
bool allowCastle;
|
bool allowCastle;
|
||||||
bool allowDecoratedLiteral;
|
bool allowDecoratedLiteral;
|
||||||
|
bool allowApproximateMatching;
|
||||||
|
|
||||||
bool allowNoodle;
|
bool allowNoodle;
|
||||||
bool fdrAllowTeddy;
|
bool fdrAllowTeddy;
|
||||||
|
bool fdrAllowFlood;
|
||||||
|
|
||||||
u32 violetAvoidSuffixes; /* 0=never, 1=sometimes, 2=always */
|
u32 violetAvoidSuffixes; /* 0=never, 1=sometimes, 2=always */
|
||||||
bool violetAvoidWeakInfixes;
|
bool violetAvoidWeakInfixes;
|
||||||
@ -107,6 +109,7 @@ struct Grey {
|
|||||||
u32 minRoseLiteralLength;
|
u32 minRoseLiteralLength;
|
||||||
u32 minRoseNetflowLiteralLength;
|
u32 minRoseNetflowLiteralLength;
|
||||||
u32 maxRoseNetflowEdges;
|
u32 maxRoseNetflowEdges;
|
||||||
|
u32 maxEditDistance;
|
||||||
|
|
||||||
u32 minExtBoundedRepeatSize; /* to be considered for ng_repeat */
|
u32 minExtBoundedRepeatSize; /* to be considered for ng_repeat */
|
||||||
|
|
||||||
@ -118,8 +121,6 @@ struct Grey {
|
|||||||
bool roseGraphReduction;
|
bool roseGraphReduction;
|
||||||
bool roseRoleAliasing;
|
bool roseRoleAliasing;
|
||||||
bool roseMasks;
|
bool roseMasks;
|
||||||
u32 roseMaxBadLeafLength;
|
|
||||||
bool roseConvertInfBadLeaves;
|
|
||||||
bool roseConvertFloodProneSuffixes;
|
bool roseConvertFloodProneSuffixes;
|
||||||
bool roseMergeRosesDuringAliasing;
|
bool roseMergeRosesDuringAliasing;
|
||||||
bool roseMultiTopRoses;
|
bool roseMultiTopRoses;
|
||||||
@ -130,7 +131,6 @@ struct Grey {
|
|||||||
* always */
|
* always */
|
||||||
u32 roseMcClellanOutfix; /* 0 = off, 1 = sometimes, 2 = almost always */
|
u32 roseMcClellanOutfix; /* 0 = off, 1 = sometimes, 2 = almost always */
|
||||||
bool roseTransformDelay;
|
bool roseTransformDelay;
|
||||||
u32 roseDesiredSplit;
|
|
||||||
|
|
||||||
bool earlyMcClellanPrefix;
|
bool earlyMcClellanPrefix;
|
||||||
bool earlyMcClellanInfix;
|
bool earlyMcClellanInfix;
|
||||||
@ -202,6 +202,9 @@ struct Grey {
|
|||||||
u32 limitDFASize; //!< max size of a DFA (in bytes)
|
u32 limitDFASize; //!< max size of a DFA (in bytes)
|
||||||
u32 limitNFASize; //!< max size of an NFA (in bytes)
|
u32 limitNFASize; //!< max size of an NFA (in bytes)
|
||||||
u32 limitLBRSize; //!< max size of an LBR engine (in bytes)
|
u32 limitLBRSize; //!< max size of an LBR engine (in bytes)
|
||||||
|
|
||||||
|
// Approximate matching limits.
|
||||||
|
u32 limitApproxMatchingVertices; //!< max number of vertices per graph
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef RELEASE_BUILD
|
#ifndef RELEASE_BUILD
|
||||||
|
52
src/hs.cpp
52
src/hs.cpp
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -39,10 +39,10 @@
|
|||||||
#include "compiler/error.h"
|
#include "compiler/error.h"
|
||||||
#include "nfagraph/ng.h"
|
#include "nfagraph/ng.h"
|
||||||
#include "nfagraph/ng_expr_info.h"
|
#include "nfagraph/ng_expr_info.h"
|
||||||
#include "nfagraph/ng_extparam.h"
|
|
||||||
#include "parser/parse_error.h"
|
|
||||||
#include "parser/Parser.h"
|
#include "parser/Parser.h"
|
||||||
|
#include "parser/parse_error.h"
|
||||||
#include "parser/prefilter.h"
|
#include "parser/prefilter.h"
|
||||||
|
#include "parser/unsupported.h"
|
||||||
#include "util/compile_error.h"
|
#include "util/compile_error.h"
|
||||||
#include "util/cpuid_flags.h"
|
#include "util/cpuid_flags.h"
|
||||||
#include "util/depth.h"
|
#include "util/depth.h"
|
||||||
@ -119,8 +119,9 @@ bool checkMode(unsigned int mode, hs_compile_error **comp_error) {
|
|||||||
|
|
||||||
static
|
static
|
||||||
bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) {
|
bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) {
|
||||||
#define HS_TUNE_LAST HS_TUNE_FAMILY_BDW
|
static constexpr u32 HS_TUNE_LAST = HS_TUNE_FAMILY_GLM;
|
||||||
#define HS_CPU_FEATURES_ALL (HS_CPU_FEATURES_AVX2)
|
static constexpr u32 HS_CPU_FEATURES_ALL =
|
||||||
|
HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512;
|
||||||
|
|
||||||
if (!p) {
|
if (!p) {
|
||||||
return true;
|
return true;
|
||||||
@ -277,9 +278,10 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
|
|||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
extern "C" HS_PUBLIC_API
|
extern "C" HS_PUBLIC_API
|
||||||
hs_error_t hs_compile(const char *expression, unsigned flags, unsigned mode,
|
hs_error_t HS_CDECL hs_compile(const char *expression, unsigned flags,
|
||||||
const hs_platform_info_t *platform, hs_database_t **db,
|
unsigned mode,
|
||||||
hs_compile_error_t **error) {
|
const hs_platform_info_t *platform,
|
||||||
|
hs_database_t **db, hs_compile_error_t **error) {
|
||||||
if (expression == nullptr) {
|
if (expression == nullptr) {
|
||||||
*db = nullptr;
|
*db = nullptr;
|
||||||
*error = generateCompileError("Invalid parameter: expression is NULL",
|
*error = generateCompileError("Invalid parameter: expression is NULL",
|
||||||
@ -295,18 +297,19 @@ hs_error_t hs_compile(const char *expression, unsigned flags, unsigned mode,
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" HS_PUBLIC_API
|
extern "C" HS_PUBLIC_API
|
||||||
hs_error_t hs_compile_multi(const char * const *expressions,
|
hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions,
|
||||||
const unsigned *flags, const unsigned *ids,
|
const unsigned *flags, const unsigned *ids,
|
||||||
unsigned elements, unsigned mode,
|
unsigned elements, unsigned mode,
|
||||||
const hs_platform_info_t *platform,
|
const hs_platform_info_t *platform,
|
||||||
hs_database_t **db, hs_compile_error_t **error) {
|
hs_database_t **db,
|
||||||
|
hs_compile_error_t **error) {
|
||||||
const hs_expr_ext * const *ext = nullptr; // unused for this call.
|
const hs_expr_ext * const *ext = nullptr; // unused for this call.
|
||||||
return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
|
return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
|
||||||
platform, db, error, Grey());
|
platform, db, error, Grey());
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" HS_PUBLIC_API
|
extern "C" HS_PUBLIC_API
|
||||||
hs_error_t hs_compile_ext_multi(const char * const *expressions,
|
hs_error_t HS_CDECL hs_compile_ext_multi(const char * const *expressions,
|
||||||
const unsigned *flags, const unsigned *ids,
|
const unsigned *flags, const unsigned *ids,
|
||||||
const hs_expr_ext * const *ext,
|
const hs_expr_ext * const *ext,
|
||||||
unsigned elements, unsigned mode,
|
unsigned elements, unsigned mode,
|
||||||
@ -368,19 +371,28 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
|||||||
assert(pe.component);
|
assert(pe.component);
|
||||||
|
|
||||||
// Apply prefiltering transformations if desired.
|
// Apply prefiltering transformations if desired.
|
||||||
if (pe.prefilter) {
|
if (pe.expr.prefilter) {
|
||||||
prefilterTree(pe.component, ParseMode(flags));
|
prefilterTree(pe.component, ParseMode(flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, pe);
|
// Expressions containing zero-width assertions and other extended pcre
|
||||||
|
// types aren't supported yet. This call will throw a ParseError
|
||||||
|
// exception if the component tree contains such a construct.
|
||||||
|
checkUnsupported(*pe.component);
|
||||||
|
|
||||||
|
pe.component->checkEmbeddedStartAnchor(true);
|
||||||
|
pe.component->checkEmbeddedEndAnchor(true);
|
||||||
|
|
||||||
|
auto built_expr = buildGraph(rm, cc, pe);
|
||||||
|
unique_ptr<NGHolder> &g = built_expr.g;
|
||||||
|
ExpressionInfo &expr = built_expr.expr;
|
||||||
|
|
||||||
if (!g) {
|
if (!g) {
|
||||||
DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n");
|
DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n");
|
||||||
throw ParseError("Internal error.");
|
throw ParseError("Internal error.");
|
||||||
}
|
}
|
||||||
|
|
||||||
handleExtendedParams(rm, *g, cc);
|
fillExpressionInfo(rm, cc, *g, expr, &local_info);
|
||||||
fillExpressionInfo(rm, *g, &local_info);
|
|
||||||
}
|
}
|
||||||
catch (const CompileError &e) {
|
catch (const CompileError &e) {
|
||||||
// Compiler error occurred
|
// Compiler error occurred
|
||||||
@ -409,7 +421,8 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" HS_PUBLIC_API
|
extern "C" HS_PUBLIC_API
|
||||||
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
|
hs_error_t HS_CDECL hs_expression_info(const char *expression,
|
||||||
|
unsigned int flags,
|
||||||
hs_expr_info_t **info,
|
hs_expr_info_t **info,
|
||||||
hs_compile_error_t **error) {
|
hs_compile_error_t **error) {
|
||||||
return hs_expression_info_int(expression, flags, nullptr, HS_MODE_BLOCK,
|
return hs_expression_info_int(expression, flags, nullptr, HS_MODE_BLOCK,
|
||||||
@ -417,7 +430,8 @@ hs_error_t hs_expression_info(const char *expression, unsigned int flags,
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" HS_PUBLIC_API
|
extern "C" HS_PUBLIC_API
|
||||||
hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags,
|
hs_error_t HS_CDECL hs_expression_ext_info(const char *expression,
|
||||||
|
unsigned int flags,
|
||||||
const hs_expr_ext_t *ext,
|
const hs_expr_ext_t *ext,
|
||||||
hs_expr_info_t **info,
|
hs_expr_info_t **info,
|
||||||
hs_compile_error_t **error) {
|
hs_compile_error_t **error) {
|
||||||
@ -426,7 +440,7 @@ hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags,
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" HS_PUBLIC_API
|
extern "C" HS_PUBLIC_API
|
||||||
hs_error_t hs_populate_platform(hs_platform_info_t *platform) {
|
hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform) {
|
||||||
if (!platform) {
|
if (!platform) {
|
||||||
return HS_INVALID;
|
return HS_INVALID;
|
||||||
}
|
}
|
||||||
@ -440,7 +454,7 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern "C" HS_PUBLIC_API
|
extern "C" HS_PUBLIC_API
|
||||||
hs_error_t hs_free_compile_error(hs_compile_error_t *error) {
|
hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error) {
|
||||||
#if defined(FAT_RUNTIME)
|
#if defined(FAT_RUNTIME)
|
||||||
if (!check_ssse3()) {
|
if (!check_ssse3()) {
|
||||||
return HS_ARCH_ERROR;
|
return HS_ARCH_ERROR;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -29,6 +29,11 @@
|
|||||||
#ifndef HS_COMMON_H_
|
#ifndef HS_COMMON_H_
|
||||||
#define HS_COMMON_H_
|
#define HS_COMMON_H_
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
#define HS_CDECL __cdecl
|
||||||
|
#else
|
||||||
|
#define HS_CDECL
|
||||||
|
#endif
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -76,7 +81,7 @@ typedef int hs_error_t;
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_free_database(hs_database_t *db);
|
hs_error_t HS_CDECL hs_free_database(hs_database_t *db);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Serialize a pattern database to a stream of bytes.
|
* Serialize a pattern database to a stream of bytes.
|
||||||
@ -100,7 +105,7 @@ hs_error_t hs_free_database(hs_database_t *db);
|
|||||||
* @ref HS_SUCCESS on success, @ref HS_NOMEM if the byte array cannot be
|
* @ref HS_SUCCESS on success, @ref HS_NOMEM if the byte array cannot be
|
||||||
* allocated, other values may be returned if errors are detected.
|
* allocated, other values may be returned if errors are detected.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes,
|
hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes,
|
||||||
size_t *length);
|
size_t *length);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -129,7 +134,8 @@ hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes,
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_deserialize_database(const char *bytes, const size_t length,
|
hs_error_t HS_CDECL hs_deserialize_database(const char *bytes,
|
||||||
|
const size_t length,
|
||||||
hs_database_t **db);
|
hs_database_t **db);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -160,7 +166,8 @@ hs_error_t hs_deserialize_database(const char *bytes, const size_t length,
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length,
|
hs_error_t HS_CDECL hs_deserialize_database_at(const char *bytes,
|
||||||
|
const size_t length,
|
||||||
hs_database_t *db);
|
hs_database_t *db);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -177,7 +184,8 @@ hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length,
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_stream_size(const hs_database_t *database, size_t *stream_size);
|
hs_error_t HS_CDECL hs_stream_size(const hs_database_t *database,
|
||||||
|
size_t *stream_size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provides the size of the given database in bytes.
|
* Provides the size of the given database in bytes.
|
||||||
@ -192,7 +200,7 @@ hs_error_t hs_stream_size(const hs_database_t *database, size_t *stream_size);
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_database_size(const hs_database_t *database,
|
hs_error_t HS_CDECL hs_database_size(const hs_database_t *database,
|
||||||
size_t *database_size);
|
size_t *database_size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -219,7 +227,8 @@ hs_error_t hs_database_size(const hs_database_t *database,
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_serialized_database_size(const char *bytes, const size_t length,
|
hs_error_t HS_CDECL hs_serialized_database_size(const char *bytes,
|
||||||
|
const size_t length,
|
||||||
size_t *deserialized_size);
|
size_t *deserialized_size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -237,7 +246,8 @@ hs_error_t hs_serialized_database_size(const char *bytes, const size_t length,
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_database_info(const hs_database_t *database, char **info);
|
hs_error_t HS_CDECL hs_database_info(const hs_database_t *database,
|
||||||
|
char **info);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility function providing information about a serialized database.
|
* Utility function providing information about a serialized database.
|
||||||
@ -258,8 +268,8 @@ hs_error_t hs_database_info(const hs_database_t *database, char **info);
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
|
hs_error_t HS_CDECL hs_serialized_database_info(const char *bytes,
|
||||||
char **info);
|
size_t length, char **info);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The type of the callback function that will be used by Hyperscan to allocate
|
* The type of the callback function that will be used by Hyperscan to allocate
|
||||||
@ -275,7 +285,7 @@ hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
|
|||||||
* @return
|
* @return
|
||||||
* A pointer to the region of memory allocated, or NULL on error.
|
* A pointer to the region of memory allocated, or NULL on error.
|
||||||
*/
|
*/
|
||||||
typedef void *(*hs_alloc_t)(size_t size);
|
typedef void *(HS_CDECL *hs_alloc_t)(size_t size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The type of the callback function that will be used by Hyperscan to free
|
* The type of the callback function that will be used by Hyperscan to free
|
||||||
@ -284,7 +294,7 @@ typedef void *(*hs_alloc_t)(size_t size);
|
|||||||
* @param ptr
|
* @param ptr
|
||||||
* The region of memory to be freed.
|
* The region of memory to be freed.
|
||||||
*/
|
*/
|
||||||
typedef void (*hs_free_t)(void *ptr);
|
typedef void (HS_CDECL *hs_free_t)(void *ptr);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the allocate and free functions used by Hyperscan for allocating
|
* Set the allocate and free functions used by Hyperscan for allocating
|
||||||
@ -312,7 +322,8 @@ typedef void (*hs_free_t)(void *ptr);
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_set_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
hs_error_t HS_CDECL hs_set_allocator(hs_alloc_t alloc_func,
|
||||||
|
hs_free_t free_func);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the allocate and free functions used by Hyperscan for allocating memory
|
* Set the allocate and free functions used by Hyperscan for allocating memory
|
||||||
@ -344,7 +355,7 @@ hs_error_t hs_set_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_set_database_allocator(hs_alloc_t alloc_func,
|
hs_error_t HS_CDECL hs_set_database_allocator(hs_alloc_t alloc_func,
|
||||||
hs_free_t free_func);
|
hs_free_t free_func);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -371,7 +382,8 @@ hs_error_t hs_set_database_allocator(hs_alloc_t alloc_func,
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_set_misc_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
hs_error_t HS_CDECL hs_set_misc_allocator(hs_alloc_t alloc_func,
|
||||||
|
hs_free_t free_func);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the allocate and free functions used by Hyperscan for allocating memory
|
* Set the allocate and free functions used by Hyperscan for allocating memory
|
||||||
@ -397,7 +409,8 @@ hs_error_t hs_set_misc_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_set_scratch_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
hs_error_t HS_CDECL hs_set_scratch_allocator(hs_alloc_t alloc_func,
|
||||||
|
hs_free_t free_func);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the allocate and free functions used by Hyperscan for allocating memory
|
* Set the allocate and free functions used by Hyperscan for allocating memory
|
||||||
@ -423,7 +436,8 @@ hs_error_t hs_set_scratch_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_set_stream_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
hs_error_t HS_CDECL hs_set_stream_allocator(hs_alloc_t alloc_func,
|
||||||
|
hs_free_t free_func);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility function for identifying this release version.
|
* Utility function for identifying this release version.
|
||||||
@ -433,7 +447,7 @@ hs_error_t hs_set_stream_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
|||||||
* date of the build. It is allocated statically, so it does not need to
|
* date of the build. It is allocated statically, so it does not need to
|
||||||
* be freed by the caller.
|
* be freed by the caller.
|
||||||
*/
|
*/
|
||||||
const char *hs_version(void);
|
const char * HS_CDECL hs_version(void);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility function to test the current system architecture.
|
* Utility function to test the current system architecture.
|
||||||
@ -450,7 +464,7 @@ const char *hs_version(void);
|
|||||||
* @ref HS_SUCCESS on success, @ref HS_ARCH_ERROR if system does not
|
* @ref HS_SUCCESS on success, @ref HS_ARCH_ERROR if system does not
|
||||||
* support Hyperscan.
|
* support Hyperscan.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_valid_platform(void);
|
hs_error_t HS_CDECL hs_valid_platform(void);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @defgroup HS_ERROR hs_error_t values
|
* @defgroup HS_ERROR hs_error_t values
|
||||||
|
105
src/hs_compile.h
105
src/hs_compile.h
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -169,13 +169,23 @@ typedef struct hs_platform_info {
|
|||||||
typedef struct hs_expr_info {
|
typedef struct hs_expr_info {
|
||||||
/**
|
/**
|
||||||
* The minimum length in bytes of a match for the pattern.
|
* The minimum length in bytes of a match for the pattern.
|
||||||
|
*
|
||||||
|
* Note: in some cases when using advanced features to suppress matches
|
||||||
|
* (such as extended parameters or the @ref HS_FLAG_SINGLEMATCH flag) this
|
||||||
|
* may represent a conservative lower bound for the true minimum length of
|
||||||
|
* a match.
|
||||||
*/
|
*/
|
||||||
unsigned int min_width;
|
unsigned int min_width;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The maximum length in bytes of a match for the pattern. If the pattern
|
* The maximum length in bytes of a match for the pattern. If the pattern
|
||||||
* has an unbounded maximum width, this will be set to the maximum value of
|
* has an unbounded maximum length, this will be set to the maximum value
|
||||||
* an unsigned int (UINT_MAX).
|
* of an unsigned int (UINT_MAX).
|
||||||
|
*
|
||||||
|
* Note: in some cases when using advanced features to suppress matches
|
||||||
|
* (such as extended parameters or the @ref HS_FLAG_SINGLEMATCH flag) this
|
||||||
|
* may represent a conservative upper bound for the true maximum length of
|
||||||
|
* a match.
|
||||||
*/
|
*/
|
||||||
unsigned int max_width;
|
unsigned int max_width;
|
||||||
|
|
||||||
@ -241,6 +251,13 @@ typedef struct hs_expr_ext {
|
|||||||
* @ref HS_EXT_FLAG_MIN_LENGTH flag in the hs_expr_ext::flags field.
|
* @ref HS_EXT_FLAG_MIN_LENGTH flag in the hs_expr_ext::flags field.
|
||||||
*/
|
*/
|
||||||
unsigned long long min_length;
|
unsigned long long min_length;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allow patterns to approximately match within this edit distance. To use
|
||||||
|
* this parameter, set the @ref HS_EXT_FLAG_EDIT_DISTANCE flag in the
|
||||||
|
* hs_expr_ext::flags field.
|
||||||
|
*/
|
||||||
|
unsigned edit_distance;
|
||||||
} hs_expr_ext_t;
|
} hs_expr_ext_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -261,6 +278,9 @@ typedef struct hs_expr_ext {
|
|||||||
/** Flag indicating that the hs_expr_ext::min_length field is used. */
|
/** Flag indicating that the hs_expr_ext::min_length field is used. */
|
||||||
#define HS_EXT_FLAG_MIN_LENGTH 4ULL
|
#define HS_EXT_FLAG_MIN_LENGTH 4ULL
|
||||||
|
|
||||||
|
/** Flag indicating that the hs_expr_ext::edit_distance field is used. */
|
||||||
|
#define HS_EXT_FLAG_EDIT_DISTANCE 8ULL
|
||||||
|
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -323,8 +343,9 @@ typedef struct hs_expr_ext {
|
|||||||
* HS_COMPILER_ERROR on failure, with details provided in the error
|
* HS_COMPILER_ERROR on failure, with details provided in the error
|
||||||
* parameter.
|
* parameter.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_compile(const char *expression, unsigned int flags,
|
hs_error_t HS_CDECL hs_compile(const char *expression, unsigned int flags,
|
||||||
unsigned int mode, const hs_platform_info_t *platform,
|
unsigned int mode,
|
||||||
|
const hs_platform_info_t *platform,
|
||||||
hs_database_t **db, hs_compile_error_t **error);
|
hs_database_t **db, hs_compile_error_t **error);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -401,11 +422,13 @@ hs_error_t hs_compile(const char *expression, unsigned int flags,
|
|||||||
* parameter.
|
* parameter.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_compile_multi(const char *const *expressions,
|
hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions,
|
||||||
const unsigned int *flags, const unsigned int *ids,
|
const unsigned int *flags,
|
||||||
|
const unsigned int *ids,
|
||||||
unsigned int elements, unsigned int mode,
|
unsigned int elements, unsigned int mode,
|
||||||
const hs_platform_info_t *platform,
|
const hs_platform_info_t *platform,
|
||||||
hs_database_t **db, hs_compile_error_t **error);
|
hs_database_t **db,
|
||||||
|
hs_compile_error_t **error);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The multiple regular expression compiler with extended parameter support.
|
* The multiple regular expression compiler with extended parameter support.
|
||||||
@ -486,7 +509,7 @@ hs_error_t hs_compile_multi(const char *const *expressions,
|
|||||||
* parameter.
|
* parameter.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_compile_ext_multi(const char *const *expressions,
|
hs_error_t HS_CDECL hs_compile_ext_multi(const char *const *expressions,
|
||||||
const unsigned int *flags,
|
const unsigned int *flags,
|
||||||
const unsigned int *ids,
|
const unsigned int *ids,
|
||||||
const hs_expr_ext_t *const *ext,
|
const hs_expr_ext_t *const *ext,
|
||||||
@ -505,13 +528,24 @@ hs_error_t hs_compile_ext_multi(const char *const *expressions,
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_free_compile_error(hs_compile_error_t *error);
|
hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility function providing information about a regular expression. The
|
* Utility function providing information about a regular expression. The
|
||||||
* information provided in @ref hs_expr_info_t includes the minimum and maximum
|
* information provided in @ref hs_expr_info_t includes the minimum and maximum
|
||||||
* width of a pattern match.
|
* width of a pattern match.
|
||||||
*
|
*
|
||||||
|
* Note: successful analysis of an expression with this function does not imply
|
||||||
|
* that compilation of the same expression (via @ref hs_compile(), @ref
|
||||||
|
* hs_compile_multi() or @ref hs_compile_ext_multi()) would succeed. This
|
||||||
|
* function may return @ref HS_SUCCESS for regular expressions that Hyperscan
|
||||||
|
* cannot compile.
|
||||||
|
*
|
||||||
|
* Note: some per-pattern flags (such as @ref HS_FLAG_ALLOWEMPTY, @ref
|
||||||
|
* HS_FLAG_SOM_LEFTMOST) are accepted by this call, but as they do not affect
|
||||||
|
* the properties returned in the @ref hs_expr_info_t structure, they will not
|
||||||
|
* affect the outcome of this function.
|
||||||
|
*
|
||||||
* @param expression
|
* @param expression
|
||||||
* The NULL-terminated expression to parse. Note that this string must
|
* The NULL-terminated expression to parse. Note that this string must
|
||||||
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
||||||
@ -553,7 +587,8 @@ hs_error_t hs_free_compile_error(hs_compile_error_t *error);
|
|||||||
* HS_COMPILER_ERROR on failure, with details provided in the error
|
* HS_COMPILER_ERROR on failure, with details provided in the error
|
||||||
* parameter.
|
* parameter.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
|
hs_error_t HS_CDECL hs_expression_info(const char *expression,
|
||||||
|
unsigned int flags,
|
||||||
hs_expr_info_t **info,
|
hs_expr_info_t **info,
|
||||||
hs_compile_error_t **error);
|
hs_compile_error_t **error);
|
||||||
|
|
||||||
@ -562,6 +597,17 @@ hs_error_t hs_expression_info(const char *expression, unsigned int flags,
|
|||||||
* extended parameter support. The information provided in @ref hs_expr_info_t
|
* extended parameter support. The information provided in @ref hs_expr_info_t
|
||||||
* includes the minimum and maximum width of a pattern match.
|
* includes the minimum and maximum width of a pattern match.
|
||||||
*
|
*
|
||||||
|
* Note: successful analysis of an expression with this function does not imply
|
||||||
|
* that compilation of the same expression (via @ref hs_compile(), @ref
|
||||||
|
* hs_compile_multi() or @ref hs_compile_ext_multi()) would succeed. This
|
||||||
|
* function may return @ref HS_SUCCESS for regular expressions that Hyperscan
|
||||||
|
* cannot compile.
|
||||||
|
*
|
||||||
|
* Note: some per-pattern flags (such as @ref HS_FLAG_ALLOWEMPTY, @ref
|
||||||
|
* HS_FLAG_SOM_LEFTMOST) are accepted by this call, but as they do not affect
|
||||||
|
* the properties returned in the @ref hs_expr_info_t structure, they will not
|
||||||
|
* affect the outcome of this function.
|
||||||
|
*
|
||||||
* @param expression
|
* @param expression
|
||||||
* The NULL-terminated expression to parse. Note that this string must
|
* The NULL-terminated expression to parse. Note that this string must
|
||||||
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
||||||
@ -608,7 +654,8 @@ hs_error_t hs_expression_info(const char *expression, unsigned int flags,
|
|||||||
* HS_COMPILER_ERROR on failure, with details provided in the error
|
* HS_COMPILER_ERROR on failure, with details provided in the error
|
||||||
* parameter.
|
* parameter.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags,
|
hs_error_t HS_CDECL hs_expression_ext_info(const char *expression,
|
||||||
|
unsigned int flags,
|
||||||
const hs_expr_ext_t *ext,
|
const hs_expr_ext_t *ext,
|
||||||
hs_expr_info_t **info,
|
hs_expr_info_t **info,
|
||||||
hs_compile_error_t **error);
|
hs_compile_error_t **error);
|
||||||
@ -623,7 +670,7 @@ hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags,
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_populate_platform(hs_platform_info_t *platform);
|
hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @defgroup HS_PATTERN_FLAG Pattern flags
|
* @defgroup HS_PATTERN_FLAG Pattern flags
|
||||||
@ -770,6 +817,14 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform);
|
|||||||
*/
|
*/
|
||||||
#define HS_CPU_FEATURES_AVX2 (1ULL << 2)
|
#define HS_CPU_FEATURES_AVX2 (1ULL << 2)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CPU features flag - Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX512)
|
||||||
|
*
|
||||||
|
* Setting this flag indicates that the target platform supports AVX512
|
||||||
|
* instructions, specifically AVX-512BW. Using AVX512 implies the use of AVX2.
|
||||||
|
*/
|
||||||
|
#define HS_CPU_FEATURES_AVX512 (1ULL << 3)
|
||||||
|
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -826,6 +881,30 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform);
|
|||||||
*/
|
*/
|
||||||
#define HS_TUNE_FAMILY_BDW 5
|
#define HS_TUNE_FAMILY_BDW 5
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tuning Parameter - Intel(R) microarchitecture code name Skylake
|
||||||
|
*
|
||||||
|
* This indicates that the compiled database should be tuned for the
|
||||||
|
* Skylake microarchitecture.
|
||||||
|
*/
|
||||||
|
#define HS_TUNE_FAMILY_SKL 6
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tuning Parameter - Intel(R) microarchitecture code name Skylake Server
|
||||||
|
*
|
||||||
|
* This indicates that the compiled database should be tuned for the
|
||||||
|
* Skylake Server microarchitecture.
|
||||||
|
*/
|
||||||
|
#define HS_TUNE_FAMILY_SKX 7
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tuning Parameter - Intel(R) microarchitecture code name Goldmont
|
||||||
|
*
|
||||||
|
* This indicates that the compiled database should be tuned for the
|
||||||
|
* Goldmont microarchitecture.
|
||||||
|
*/
|
||||||
|
#define HS_TUNE_FAMILY_GLM 8
|
||||||
|
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -145,7 +145,7 @@ typedef int (*match_event_handler)(unsigned int id,
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_open_stream(const hs_database_t *db, unsigned int flags,
|
hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db, unsigned int flags,
|
||||||
hs_stream_t **stream);
|
hs_stream_t **stream);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -185,10 +185,10 @@ hs_error_t hs_open_stream(const hs_database_t *db, unsigned int flags,
|
|||||||
* match callback indicated that scanning should stop; other values on
|
* match callback indicated that scanning should stop; other values on
|
||||||
* error.
|
* error.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_scan_stream(hs_stream_t *id, const char *data,
|
hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data,
|
||||||
unsigned int length, unsigned int flags,
|
unsigned int length, unsigned int flags,
|
||||||
hs_scratch_t *scratch, match_event_handler onEvent,
|
hs_scratch_t *scratch,
|
||||||
void *ctxt);
|
match_event_handler onEvent, void *ctxt);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Close a stream.
|
* Close a stream.
|
||||||
@ -223,7 +223,7 @@ hs_error_t hs_scan_stream(hs_stream_t *id, const char *data,
|
|||||||
* @return
|
* @return
|
||||||
* Returns @ref HS_SUCCESS on success, other values on failure.
|
* Returns @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
|
hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
|
||||||
match_event_handler onEvent, void *ctxt);
|
match_event_handler onEvent, void *ctxt);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -264,9 +264,9 @@ hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_reset_stream(hs_stream_t *id, unsigned int flags,
|
hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, unsigned int flags,
|
||||||
hs_scratch_t *scratch, match_event_handler onEvent,
|
hs_scratch_t *scratch,
|
||||||
void *context);
|
match_event_handler onEvent, void *context);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Duplicate the given stream. The new stream will have the same state as the
|
* Duplicate the given stream. The new stream will have the same state as the
|
||||||
@ -282,7 +282,8 @@ hs_error_t hs_reset_stream(hs_stream_t *id, unsigned int flags,
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id);
|
hs_error_t HS_CDECL hs_copy_stream(hs_stream_t **to_id,
|
||||||
|
const hs_stream_t *from_id);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Duplicate the given 'from' stream state onto the 'to' stream. The 'to' stream
|
* Duplicate the given 'from' stream state onto the 'to' stream. The 'to' stream
|
||||||
@ -314,7 +315,7 @@ hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id);
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id,
|
hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id,
|
||||||
const hs_stream_t *from_id,
|
const hs_stream_t *from_id,
|
||||||
hs_scratch_t *scratch,
|
hs_scratch_t *scratch,
|
||||||
match_event_handler onEvent,
|
match_event_handler onEvent,
|
||||||
@ -355,7 +356,7 @@ hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id,
|
|||||||
* match callback indicated that scanning should stop; other values on
|
* match callback indicated that scanning should stop; other values on
|
||||||
* error.
|
* error.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_scan(const hs_database_t *db, const char *data,
|
hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data,
|
||||||
unsigned int length, unsigned int flags,
|
unsigned int length, unsigned int flags,
|
||||||
hs_scratch_t *scratch, match_event_handler onEvent,
|
hs_scratch_t *scratch, match_event_handler onEvent,
|
||||||
void *context);
|
void *context);
|
||||||
@ -398,9 +399,11 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data,
|
|||||||
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match
|
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match
|
||||||
* callback indicated that scanning should stop; other values on error.
|
* callback indicated that scanning should stop; other values on error.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_scan_vector(const hs_database_t *db, const char *const *data,
|
hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db,
|
||||||
const unsigned int *length, unsigned int count,
|
const char *const *data,
|
||||||
unsigned int flags, hs_scratch_t *scratch,
|
const unsigned int *length,
|
||||||
|
unsigned int count, unsigned int flags,
|
||||||
|
hs_scratch_t *scratch,
|
||||||
match_event_handler onEvent, void *context);
|
match_event_handler onEvent, void *context);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -429,7 +432,8 @@ hs_error_t hs_scan_vector(const hs_database_t *db, const char *const *data,
|
|||||||
* allocation fails. Other errors may be returned if invalid parameters
|
* allocation fails. Other errors may be returned if invalid parameters
|
||||||
* are specified.
|
* are specified.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch);
|
hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db,
|
||||||
|
hs_scratch_t **scratch);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allocate a scratch space that is a clone of an existing scratch space.
|
* Allocate a scratch space that is a clone of an existing scratch space.
|
||||||
@ -449,7 +453,8 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch);
|
|||||||
* @ref HS_SUCCESS on success; @ref HS_NOMEM if the allocation fails.
|
* @ref HS_SUCCESS on success; @ref HS_NOMEM if the allocation fails.
|
||||||
* Other errors may be returned if invalid parameters are specified.
|
* Other errors may be returned if invalid parameters are specified.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest);
|
hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src,
|
||||||
|
hs_scratch_t **dest);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provides the size of the given scratch space.
|
* Provides the size of the given scratch space.
|
||||||
@ -465,7 +470,8 @@ hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest);
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_scratch_size(const hs_scratch_t *scratch, size_t *scratch_size);
|
hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t *scratch,
|
||||||
|
size_t *scratch_size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Free a scratch block previously allocated by @ref hs_alloc_scratch() or @ref
|
* Free a scratch block previously allocated by @ref hs_alloc_scratch() or @ref
|
||||||
@ -480,7 +486,7 @@ hs_error_t hs_scratch_size(const hs_scratch_t *scratch, size_t *scratch_size);
|
|||||||
* @return
|
* @return
|
||||||
* @ref HS_SUCCESS on success, other values on failure.
|
* @ref HS_SUCCESS on success, other values on failure.
|
||||||
*/
|
*/
|
||||||
hs_error_t hs_free_scratch(hs_scratch_t *scratch);
|
hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Callback 'from' return value, indicating that the start of this match was
|
* Callback 'from' return value, indicating that the start of this match was
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -30,7 +30,7 @@
|
|||||||
#include "util/cpuid_flags.h"
|
#include "util/cpuid_flags.h"
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
hs_error_t hs_valid_platform(void) {
|
hs_error_t HS_CDECL hs_valid_platform(void) {
|
||||||
/* Hyperscan requires SSSE3, anything else is a bonus */
|
/* Hyperscan requires SSSE3, anything else is a bonus */
|
||||||
if (check_ssse3()) {
|
if (check_ssse3()) {
|
||||||
return HS_SUCCESS;
|
return HS_SUCCESS;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -31,6 +31,6 @@
|
|||||||
#include "hs_version.h"
|
#include "hs_version.h"
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
const char *hs_version(void) {
|
const char * HS_CDECL hs_version(void) {
|
||||||
return HS_VERSION_STRING;
|
return HS_VERSION_STRING;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -172,6 +172,8 @@ void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen,
|
|||||||
hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
|
hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
|
||||||
size_t start, HWLMCallback cb, void *ctxt,
|
size_t start, HWLMCallback cb, void *ctxt,
|
||||||
hwlm_group_t groups) {
|
hwlm_group_t groups) {
|
||||||
|
assert(t);
|
||||||
|
|
||||||
DEBUG_PRINTF("buf len=%zu, start=%zu, groups=%llx\n", len, start, groups);
|
DEBUG_PRINTF("buf len=%zu, start=%zu, groups=%llx\n", len, start, groups);
|
||||||
if (!groups) {
|
if (!groups) {
|
||||||
DEBUG_PRINTF("groups all off\n");
|
DEBUG_PRINTF("groups all off\n");
|
||||||
@ -201,6 +203,9 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
|
|||||||
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
|
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
|
||||||
size_t len, size_t start, HWLMCallback cb,
|
size_t len, size_t start, HWLMCallback cb,
|
||||||
void *ctxt, hwlm_group_t groups) {
|
void *ctxt, hwlm_group_t groups) {
|
||||||
|
assert(t);
|
||||||
|
assert(scratch);
|
||||||
|
|
||||||
const u8 *hbuf = scratch->core_info.hbuf;
|
const u8 *hbuf = scratch->core_info.hbuf;
|
||||||
const size_t hlen = scratch->core_info.hlen;
|
const size_t hlen = scratch->core_info.hlen;
|
||||||
const u8 *buf = scratch->core_info.buf;
|
const u8 *buf = scratch->core_info.buf;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -29,31 +29,23 @@
|
|||||||
/** \file
|
/** \file
|
||||||
* \brief Hamster Wheel Literal Matcher: build code.
|
* \brief Hamster Wheel Literal Matcher: build code.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "hwlm_build.h"
|
||||||
|
|
||||||
#include "grey.h"
|
#include "grey.h"
|
||||||
#include "hwlm.h"
|
#include "hwlm.h"
|
||||||
#include "hwlm_build.h"
|
|
||||||
#include "hwlm_internal.h"
|
#include "hwlm_internal.h"
|
||||||
|
#include "hwlm_literal.h"
|
||||||
#include "noodle_engine.h"
|
#include "noodle_engine.h"
|
||||||
#include "noodle_build.h"
|
#include "noodle_build.h"
|
||||||
#include "scratch.h"
|
#include "scratch.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "fdr/fdr_compile.h"
|
#include "fdr/fdr_compile.h"
|
||||||
#include "nfa/shufticompile.h"
|
|
||||||
#include "nfa/trufflecompile.h"
|
|
||||||
#include "util/alloc.h"
|
|
||||||
#include "util/bitutils.h"
|
|
||||||
#include "util/charreach.h"
|
|
||||||
#include "util/compare.h"
|
|
||||||
#include "util/compile_context.h"
|
#include "util/compile_context.h"
|
||||||
#include "util/compile_error.h"
|
#include "util/compile_error.h"
|
||||||
#include "util/dump_charclass.h"
|
|
||||||
#include "util/target_info.h"
|
|
||||||
#include "util/ue2string.h"
|
#include "util/ue2string.h"
|
||||||
#include "util/verify_types.h"
|
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cstdio>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -61,431 +53,6 @@ using namespace std;
|
|||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
static const unsigned int MAX_ACCEL_OFFSET = 16;
|
|
||||||
static const unsigned int MAX_SHUFTI_WIDTH = 240;
|
|
||||||
|
|
||||||
static
|
|
||||||
size_t mask_overhang(const hwlmLiteral &lit) {
|
|
||||||
size_t msk_true_size = lit.msk.size();
|
|
||||||
assert(msk_true_size <= HWLM_MASKLEN);
|
|
||||||
assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET);
|
|
||||||
for (u8 c : lit.msk) {
|
|
||||||
if (!c) {
|
|
||||||
msk_true_size--;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lit.s.length() >= msk_true_size) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* only short literals should be able to have a mask which overhangs */
|
|
||||||
assert(lit.s.length() < MAX_ACCEL_OFFSET);
|
|
||||||
return msk_true_size - lit.s.length();
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
|
||||||
const hwlmLiteral &first = *lits.front();
|
|
||||||
|
|
||||||
struct candidate {
|
|
||||||
candidate(void)
|
|
||||||
: c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {}
|
|
||||||
candidate(const hwlmLiteral &base, u32 offset)
|
|
||||||
: c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0),
|
|
||||||
b5insens(false), valid(true) {}
|
|
||||||
char c1;
|
|
||||||
char c2;
|
|
||||||
u32 max_offset;
|
|
||||||
bool b5insens;
|
|
||||||
bool valid;
|
|
||||||
|
|
||||||
bool operator>(const candidate &other) const {
|
|
||||||
if (!valid) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!other.valid) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (other.cdiffers() && !cdiffers()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!other.cdiffers() && cdiffers()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!other.b5insens && b5insens) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (other.b5insens && !b5insens) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (max_offset > other.max_offset) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool cdiffers(void) const {
|
|
||||||
if (!b5insens) {
|
|
||||||
return c1 != c2;
|
|
||||||
}
|
|
||||||
return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
candidate best;
|
|
||||||
|
|
||||||
for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) {
|
|
||||||
candidate curr(first, i);
|
|
||||||
|
|
||||||
/* check to see if this pair appears in each string */
|
|
||||||
for (const auto &lit_ptr : lits) {
|
|
||||||
const hwlmLiteral &lit = *lit_ptr;
|
|
||||||
if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) {
|
|
||||||
curr.b5insens = true; /* no choice but to be case insensitive */
|
|
||||||
}
|
|
||||||
|
|
||||||
bool found = false;
|
|
||||||
bool found_nc = false;
|
|
||||||
for (u32 j = 0;
|
|
||||||
!found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) {
|
|
||||||
found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
|
|
||||||
found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
|
|
||||||
&& (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
|
|
||||||
|
|
||||||
if (curr.b5insens) {
|
|
||||||
found = found_nc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!curr.b5insens && !found && found_nc) {
|
|
||||||
curr.b5insens = true;
|
|
||||||
found = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!found) {
|
|
||||||
goto next_candidate;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check to find the max offset where this appears */
|
|
||||||
for (const auto &lit_ptr : lits) {
|
|
||||||
const hwlmLiteral &lit = *lit_ptr;
|
|
||||||
for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1;
|
|
||||||
j++) {
|
|
||||||
bool found = false;
|
|
||||||
if (curr.b5insens) {
|
|
||||||
found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
|
|
||||||
&& (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
|
|
||||||
} else {
|
|
||||||
found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (found) {
|
|
||||||
assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
|
|
||||||
ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (curr > best) {
|
|
||||||
best = curr;
|
|
||||||
}
|
|
||||||
|
|
||||||
next_candidate:;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!best.valid) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
aux->dverm.offset = verify_u8(best.max_offset);
|
|
||||||
|
|
||||||
if (!best.b5insens) {
|
|
||||||
aux->dverm.accel_type = ACCEL_DVERM;
|
|
||||||
aux->dverm.c1 = best.c1;
|
|
||||||
aux->dverm.c2 = best.c2;
|
|
||||||
DEBUG_PRINTF("built dverm for %02hhx%02hhx\n",
|
|
||||||
aux->dverm.c1, aux->dverm.c2);
|
|
||||||
} else {
|
|
||||||
aux->dverm.accel_type = ACCEL_DVERM_NOCASE;
|
|
||||||
aux->dverm.c1 = best.c1 & CASE_CLEAR;
|
|
||||||
aux->dverm.c2 = best.c2 & CASE_CLEAR;
|
|
||||||
DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n",
|
|
||||||
aux->dverm.c1, aux->dverm.c2);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
bool findSVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
|
||||||
const hwlmLiteral &first = *lits.front();
|
|
||||||
|
|
||||||
struct candidate {
|
|
||||||
candidate(void)
|
|
||||||
: c(0), max_offset(0), b5insens(false), valid(false) {}
|
|
||||||
candidate(const hwlmLiteral &base, u32 offset)
|
|
||||||
: c(base.s[offset]), max_offset(0),
|
|
||||||
b5insens(false), valid(true) {}
|
|
||||||
char c;
|
|
||||||
u32 max_offset;
|
|
||||||
bool b5insens;
|
|
||||||
bool valid;
|
|
||||||
|
|
||||||
bool operator>(const candidate &other) const {
|
|
||||||
if (!valid) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!other.valid) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!other.b5insens && b5insens) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (other.b5insens && !b5insens) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (max_offset > other.max_offset) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
candidate best;
|
|
||||||
|
|
||||||
for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) {
|
|
||||||
candidate curr(first, i);
|
|
||||||
|
|
||||||
/* check to see if this pair appears in each string */
|
|
||||||
for (const auto &lit_ptr : lits) {
|
|
||||||
const hwlmLiteral &lit = *lit_ptr;
|
|
||||||
if (lit.nocase && ourisalpha(curr.c)) {
|
|
||||||
curr.b5insens = true; /* no choice but to be case insensitive */
|
|
||||||
}
|
|
||||||
|
|
||||||
bool found = false;
|
|
||||||
bool found_nc = false;
|
|
||||||
for (u32 j = 0;
|
|
||||||
!found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
|
|
||||||
found |= curr.c == lit.s[j];
|
|
||||||
found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
|
|
||||||
|
|
||||||
if (curr.b5insens) {
|
|
||||||
found = found_nc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!curr.b5insens && !found && found_nc) {
|
|
||||||
curr.b5insens = true;
|
|
||||||
found = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!found) {
|
|
||||||
goto next_candidate;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check to find the max offset where this appears */
|
|
||||||
for (const auto &lit_ptr : lits) {
|
|
||||||
const hwlmLiteral &lit = *lit_ptr;
|
|
||||||
for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
|
|
||||||
bool found = false;
|
|
||||||
if (curr.b5insens) {
|
|
||||||
found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
|
|
||||||
} else {
|
|
||||||
found = curr.c == lit.s[j];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (found) {
|
|
||||||
assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
|
|
||||||
ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (curr > best) {
|
|
||||||
best = curr;
|
|
||||||
}
|
|
||||||
|
|
||||||
next_candidate:;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!best.valid) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!best.b5insens) {
|
|
||||||
aux->verm.accel_type = ACCEL_VERM;
|
|
||||||
aux->verm.c = best.c;
|
|
||||||
DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c);
|
|
||||||
} else {
|
|
||||||
aux->verm.accel_type = ACCEL_VERM_NOCASE;
|
|
||||||
aux->verm.c = best.c & CASE_CLEAR;
|
|
||||||
DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c);
|
|
||||||
}
|
|
||||||
aux->verm.offset = verify_u8(best.max_offset);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void filterLits(const vector<hwlmLiteral> &lits, hwlm_group_t expected_groups,
|
|
||||||
vector<const hwlmLiteral *> *filtered_lits, u32 *min_len) {
|
|
||||||
*min_len = MAX_ACCEL_OFFSET;
|
|
||||||
|
|
||||||
for (const auto &lit : lits) {
|
|
||||||
if (!(lit.groups & expected_groups)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const size_t lit_len = lit.s.length();
|
|
||||||
if (lit_len < *min_len) {
|
|
||||||
*min_len = verify_u32(lit_len);
|
|
||||||
}
|
|
||||||
|
|
||||||
filtered_lits->push_back(&lit);
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
DEBUG_PRINTF("lit:");
|
|
||||||
for (u32 i = 0; i < lit.s.length(); i++) {
|
|
||||||
printf("%02hhx", lit.s[i]);
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
bool litGuardedByCharReach(const CharReach &cr, const hwlmLiteral &lit,
|
|
||||||
u32 max_offset) {
|
|
||||||
for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) {
|
|
||||||
unsigned char c = lit.s[i];
|
|
||||||
if (lit.nocase) {
|
|
||||||
if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (cr.test(c)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
|
||||||
hwlm_group_t expected_groups, AccelAux *aux) {
|
|
||||||
DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups);
|
|
||||||
u32 min_len = MAX_ACCEL_OFFSET;
|
|
||||||
vector<const hwlmLiteral *> filtered_lits;
|
|
||||||
|
|
||||||
filterLits(lits, expected_groups, &filtered_lits, &min_len);
|
|
||||||
if (filtered_lits.empty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (findDVerm(filtered_lits, aux)
|
|
||||||
|| findSVerm(filtered_lits, aux)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* look for shufti/truffle */
|
|
||||||
|
|
||||||
vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
|
|
||||||
for (const auto &lit : lits) {
|
|
||||||
if (!(lit.groups & expected_groups)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 overhang = mask_overhang(lit);
|
|
||||||
for (u32 i = 0; i < overhang; i++) {
|
|
||||||
/* this offset overhangs the start of the real literal; look at the
|
|
||||||
* msk/cmp */
|
|
||||||
for (u32 j = 0; j < N_CHARS; j++) {
|
|
||||||
if ((j & lit.msk[i]) == lit.cmp[i]) {
|
|
||||||
reach[i].set(j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) {
|
|
||||||
CharReach &reach_i = reach[i];
|
|
||||||
u32 i_effective = i - overhang;
|
|
||||||
|
|
||||||
if (litGuardedByCharReach(reach_i, lit, i_effective)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective]
|
|
||||||
: lit.s.back();
|
|
||||||
if (lit.nocase) {
|
|
||||||
reach_i.set(mytoupper(c));
|
|
||||||
reach_i.set(mytolower(c));
|
|
||||||
} else {
|
|
||||||
reach_i.set(c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 min_count = ~0U;
|
|
||||||
u32 min_offset = ~0U;
|
|
||||||
for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) {
|
|
||||||
size_t count = reach[i].count();
|
|
||||||
DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
|
|
||||||
describeClass(reach[i]).c_str(), count);
|
|
||||||
if (count < min_count) {
|
|
||||||
min_count = (u32)count;
|
|
||||||
min_offset = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (min_count > MAX_SHUFTI_WIDTH) {
|
|
||||||
DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const CharReach &cr = reach[min_offset];
|
|
||||||
if (-1 !=
|
|
||||||
shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) {
|
|
||||||
DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n",
|
|
||||||
describeClass(cr).c_str(), cr.count(), min_offset);
|
|
||||||
aux->shufti.accel_type = ACCEL_SHUFTI;
|
|
||||||
aux->shufti.offset = verify_u8(min_offset);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2);
|
|
||||||
DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n",
|
|
||||||
describeClass(cr).c_str(), cr.count(), min_offset);
|
|
||||||
aux->truffle.accel_type = ACCEL_TRUFFLE;
|
|
||||||
aux->truffle.offset = verify_u8(min_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void buildForwardAccel(HWLM *h, const vector<hwlmLiteral> &lits,
|
|
||||||
hwlm_group_t expected_groups) {
|
|
||||||
findForwardAccelScheme(lits, expected_groups, &h->accel1);
|
|
||||||
findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0);
|
|
||||||
|
|
||||||
h->accel1_groups = expected_groups;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
static
|
||||||
void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
|
void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
@ -512,7 +79,6 @@ bool everyoneHasGroups(const vector<hwlmLiteral> &lits) {
|
|||||||
|
|
||||||
static
|
static
|
||||||
bool isNoodleable(const vector<hwlmLiteral> &lits,
|
bool isNoodleable(const vector<hwlmLiteral> &lits,
|
||||||
const hwlmStreamingControl *stream_control,
|
|
||||||
const CompileContext &cc) {
|
const CompileContext &cc) {
|
||||||
if (!cc.grey.allowNoodle) {
|
if (!cc.grey.allowNoodle) {
|
||||||
return false;
|
return false;
|
||||||
@ -523,19 +89,6 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stream_control) { // nullptr if in block mode
|
|
||||||
if (lits.front().s.length() > stream_control->history_max + 1) {
|
|
||||||
DEBUG_PRINTF("length of %zu too long for history max %zu\n",
|
|
||||||
lits.front().s.length(),
|
|
||||||
stream_control->history_max);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (2 * lits.front().s.length() - 2 > FDR_TEMP_BUF_SIZE) {
|
|
||||||
assert(0);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!lits.front().msk.empty()) {
|
if (!lits.front().msk.empty()) {
|
||||||
DEBUG_PRINTF("noodle can't handle supplementary masks\n");
|
DEBUG_PRINTF("noodle can't handle supplementary masks\n");
|
||||||
return false;
|
return false;
|
||||||
@ -544,23 +97,12 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
|
bytecode_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, bool make_small,
|
||||||
hwlmStreamingControl *stream_control,
|
const CompileContext &cc,
|
||||||
bool make_small, const CompileContext &cc,
|
UNUSED hwlm_group_t expected_groups) {
|
||||||
hwlm_group_t expected_groups) {
|
|
||||||
assert(!lits.empty());
|
assert(!lits.empty());
|
||||||
dumpLits(lits);
|
dumpLits(lits);
|
||||||
|
|
||||||
if (stream_control) {
|
|
||||||
assert(stream_control->history_min <= stream_control->history_max);
|
|
||||||
|
|
||||||
// We should not have been passed any literals that are too long to
|
|
||||||
// match with a maximally-sized history buffer.
|
|
||||||
assert(all_of(begin(lits), end(lits), [&](const hwlmLiteral &lit) {
|
|
||||||
return lit.s.length() <= stream_control->history_max + 1;
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check that we haven't exceeded the maximum number of literals.
|
// Check that we haven't exceeded the maximum number of literals.
|
||||||
if (lits.size() > cc.grey.limitLiteralCount) {
|
if (lits.size() > cc.grey.limitLiteralCount) {
|
||||||
throw ResourceLimitError();
|
throw ResourceLimitError();
|
||||||
@ -595,29 +137,21 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
|
|||||||
|
|
||||||
assert(everyoneHasGroups(lits));
|
assert(everyoneHasGroups(lits));
|
||||||
|
|
||||||
if (isNoodleable(lits, stream_control, cc)) {
|
if (isNoodleable(lits, cc)) {
|
||||||
DEBUG_PRINTF("build noodle table\n");
|
DEBUG_PRINTF("build noodle table\n");
|
||||||
engType = HWLM_ENGINE_NOOD;
|
engType = HWLM_ENGINE_NOOD;
|
||||||
const hwlmLiteral &lit = lits.front();
|
const hwlmLiteral &lit = lits.front();
|
||||||
auto noodle = noodBuildTable(lit);
|
auto noodle = noodBuildTable(lit);
|
||||||
if (noodle) {
|
if (noodle) {
|
||||||
engSize = noodSize(noodle.get());
|
engSize = noodle.size();
|
||||||
}
|
|
||||||
if (stream_control) {
|
|
||||||
// For now, a single literal still goes to noodle and asks
|
|
||||||
// for a great big history
|
|
||||||
stream_control->literal_history_required = lit.s.length() - 1;
|
|
||||||
assert(stream_control->literal_history_required
|
|
||||||
<= stream_control->history_max);
|
|
||||||
}
|
}
|
||||||
eng = move(noodle);
|
eng = move(noodle);
|
||||||
} else {
|
} else {
|
||||||
DEBUG_PRINTF("building a new deal\n");
|
DEBUG_PRINTF("building a new deal\n");
|
||||||
engType = HWLM_ENGINE_FDR;
|
engType = HWLM_ENGINE_FDR;
|
||||||
auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey,
|
auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey);
|
||||||
stream_control);
|
|
||||||
if (fdr) {
|
if (fdr) {
|
||||||
engSize = fdrSize(fdr.get());
|
engSize = fdr.size();
|
||||||
}
|
}
|
||||||
eng = move(fdr);
|
eng = move(fdr);
|
||||||
}
|
}
|
||||||
@ -631,23 +165,12 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
|
|||||||
throw ResourceLimitError();
|
throw ResourceLimitError();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto h = aligned_zmalloc_unique<HWLM>(ROUNDUP_CL(sizeof(HWLM)) + engSize);
|
const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize;
|
||||||
|
auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64);
|
||||||
|
|
||||||
h->type = engType;
|
h->type = engType;
|
||||||
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
|
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
|
||||||
|
|
||||||
if (engType == HWLM_ENGINE_FDR && cc.grey.hamsterAccelForward) {
|
|
||||||
buildForwardAccel(h.get(), lits, expected_groups);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (stream_control) {
|
|
||||||
DEBUG_PRINTF("requires %zu (of max %zu) bytes of history\n",
|
|
||||||
stream_control->literal_history_required,
|
|
||||||
stream_control->history_max);
|
|
||||||
assert(stream_control->literal_history_required
|
|
||||||
<= stream_control->history_max);
|
|
||||||
}
|
|
||||||
|
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -34,9 +34,8 @@
|
|||||||
#define HWLM_BUILD_H
|
#define HWLM_BUILD_H
|
||||||
|
|
||||||
#include "hwlm.h"
|
#include "hwlm.h"
|
||||||
#include "hwlm_literal.h"
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/alloc.h"
|
#include "util/bytecode_ptr.h"
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -47,30 +46,12 @@ namespace ue2 {
|
|||||||
|
|
||||||
struct CompileContext;
|
struct CompileContext;
|
||||||
struct Grey;
|
struct Grey;
|
||||||
struct target_t;
|
struct hwlmLiteral;
|
||||||
|
|
||||||
/** \brief Structure gathering together the input/output parameters related to
|
|
||||||
* streaming mode operation. */
|
|
||||||
struct hwlmStreamingControl {
|
|
||||||
/** \brief IN parameter: Upper limit on the amount of history that can be
|
|
||||||
* requested. */
|
|
||||||
size_t history_max;
|
|
||||||
|
|
||||||
/** \brief IN parameter: History already known to be used before literal
|
|
||||||
* analysis. */
|
|
||||||
size_t history_min;
|
|
||||||
|
|
||||||
/** \brief OUT parameter: History required by the literal matcher to
|
|
||||||
* correctly match all literals. */
|
|
||||||
size_t literal_history_required;
|
|
||||||
};
|
|
||||||
|
|
||||||
/** \brief Build an \ref HWLM literal matcher runtime structure for a group of
|
/** \brief Build an \ref HWLM literal matcher runtime structure for a group of
|
||||||
* literals.
|
* literals.
|
||||||
*
|
*
|
||||||
* \param lits The group of literals.
|
* \param lits The group of literals.
|
||||||
* \param stream_control Streaming control parameters. If the matcher will
|
|
||||||
* operate in non-streaming (block) mode, this pointer should be NULL.
|
|
||||||
* \param make_small Optimise matcher for small size.
|
* \param make_small Optimise matcher for small size.
|
||||||
* \param cc Compile context.
|
* \param cc Compile context.
|
||||||
* \param expected_groups FIXME: document me!
|
* \param expected_groups FIXME: document me!
|
||||||
@ -79,10 +60,8 @@ struct hwlmStreamingControl {
|
|||||||
* may result in a nullptr return value, or a std::bad_alloc exception being
|
* may result in a nullptr return value, or a std::bad_alloc exception being
|
||||||
* thrown.
|
* thrown.
|
||||||
*/
|
*/
|
||||||
aligned_unique_ptr<HWLM>
|
bytecode_ptr<HWLM> hwlmBuild(const std::vector<hwlmLiteral> &lits,
|
||||||
hwlmBuild(const std::vector<hwlmLiteral> &lits,
|
bool make_small, const CompileContext &cc,
|
||||||
hwlmStreamingControl *stream_control, bool make_small,
|
|
||||||
const CompileContext &cc,
|
|
||||||
hwlm_group_t expected_groups = HWLM_ALL_GROUPS);
|
hwlm_group_t expected_groups = HWLM_ALL_GROUPS);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -37,12 +37,13 @@
|
|||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <tuple>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
/** \brief Max length of the literal passed to HWLM. */
|
/** \brief Max length of the literal passed to HWLM. */
|
||||||
#define HWLM_LITERAL_MAX_LEN 255
|
#define HWLM_LITERAL_MAX_LEN 8
|
||||||
|
|
||||||
/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
|
/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
|
||||||
#define HWLM_MASKLEN 8
|
#define HWLM_MASKLEN 8
|
||||||
@ -111,6 +112,19 @@ struct hwlmLiteral {
|
|||||||
: hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {}
|
: hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool operator<(const hwlmLiteral &a, const hwlmLiteral &b) {
|
||||||
|
return std::tie(a.id, a.s, a.nocase, a.noruns, a.groups, a.msk, a.cmp) <
|
||||||
|
std::tie(b.id, b.s, b.nocase, b.noruns, b.groups, b.msk, b.cmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool operator==(const hwlmLiteral &a, const hwlmLiteral &b) {
|
||||||
|
return a.id == b.id && a.s == b.s && a.nocase == b.nocase &&
|
||||||
|
a.noruns == b.noruns && a.groups == b.groups && a.msk == b.msk &&
|
||||||
|
a.cmp == b.cmp;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Consistency test; returns false if the given msk/cmp test can never match
|
* Consistency test; returns false if the given msk/cmp test can never match
|
||||||
* the literal string s.
|
* the literal string s.
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -35,7 +35,6 @@
|
|||||||
|
|
||||||
#include "hwlm_literal.h"
|
#include "hwlm_literal.h"
|
||||||
#include "noodle_internal.h"
|
#include "noodle_internal.h"
|
||||||
#include "util/alloc.h"
|
|
||||||
#include "util/compare.h"
|
#include "util/compare.h"
|
||||||
#include "util/verify_types.h"
|
#include "util/verify_types.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
@ -67,7 +66,7 @@ size_t findNoodFragOffset(const hwlmLiteral &lit) {
|
|||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit) {
|
bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit) {
|
||||||
if (!lit.msk.empty()) {
|
if (!lit.msk.empty()) {
|
||||||
DEBUG_PRINTF("noodle can't handle supplementary masks\n");
|
DEBUG_PRINTF("noodle can't handle supplementary masks\n");
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -75,7 +74,7 @@ aligned_unique_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit) {
|
|||||||
|
|
||||||
const auto &s = lit.s;
|
const auto &s = lit.s;
|
||||||
size_t noodle_len = sizeof(noodTable) + s.length();
|
size_t noodle_len = sizeof(noodTable) + s.length();
|
||||||
auto n = aligned_zmalloc_unique<noodTable>(noodle_len);
|
auto n = make_zeroed_bytecode_ptr<noodTable>(noodle_len);
|
||||||
assert(n);
|
assert(n);
|
||||||
|
|
||||||
size_t key_offset = findNoodFragOffset(lit);
|
size_t key_offset = findNoodFragOffset(lit);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -30,11 +30,11 @@
|
|||||||
* \brief Noodle literal matcher: build code.
|
* \brief Noodle literal matcher: build code.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef NOODLE_BUILD_H_048A1A6D585A9A
|
#ifndef NOODLE_BUILD_H
|
||||||
#define NOODLE_BUILD_H_048A1A6D585A9A
|
#define NOODLE_BUILD_H
|
||||||
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/alloc.h"
|
#include "util/bytecode_ptr.h"
|
||||||
|
|
||||||
struct noodTable;
|
struct noodTable;
|
||||||
|
|
||||||
@ -43,7 +43,7 @@ namespace ue2 {
|
|||||||
struct hwlmLiteral;
|
struct hwlmLiteral;
|
||||||
|
|
||||||
/** \brief Construct a Noodle matcher for the given literal. */
|
/** \brief Construct a Noodle matcher for the given literal. */
|
||||||
ue2::aligned_unique_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit);
|
bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit);
|
||||||
|
|
||||||
size_t noodSize(const noodTable *n);
|
size_t noodSize(const noodTable *n);
|
||||||
|
|
||||||
@ -61,5 +61,5 @@ void noodPrintStats(const noodTable *n, FILE *f);
|
|||||||
|
|
||||||
#endif // DUMP_SUPPORT
|
#endif // DUMP_SUPPORT
|
||||||
|
|
||||||
#endif /* NOODLE_BUILD_H_048A1A6D585A9A */
|
#endif /* NOODLE_BUILD_H */
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -33,8 +33,11 @@
|
|||||||
#include "noodle_engine.h"
|
#include "noodle_engine.h"
|
||||||
#include "noodle_internal.h"
|
#include "noodle_internal.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
#include "util/arch.h"
|
||||||
#include "util/bitutils.h"
|
#include "util/bitutils.h"
|
||||||
#include "util/compare.h"
|
#include "util/compare.h"
|
||||||
|
#include "util/intrinsics.h"
|
||||||
|
#include "util/join.h"
|
||||||
#include "util/masked_move.h"
|
#include "util/masked_move.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
|
|
||||||
@ -50,6 +53,24 @@ struct cb_info {
|
|||||||
size_t offsetAdj; //!< used in streaming mode
|
size_t offsetAdj; //!< used in streaming mode
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512)
|
||||||
|
#define CHUNKSIZE 64
|
||||||
|
#define MASK_TYPE m512
|
||||||
|
#define Z_BITS 64
|
||||||
|
#define Z_TYPE u64a
|
||||||
|
#elif defined(HAVE_AVX2)
|
||||||
|
#define CHUNKSIZE 32
|
||||||
|
#define MASK_TYPE m256
|
||||||
|
#define Z_BITS 32
|
||||||
|
#define Z_TYPE u32
|
||||||
|
#else
|
||||||
|
#define CHUNKSIZE 16
|
||||||
|
#define MASK_TYPE m128
|
||||||
|
#define Z_BITS 32
|
||||||
|
#define Z_TYPE u32
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define RETURN_IF_TERMINATED(x) \
|
#define RETURN_IF_TERMINATED(x) \
|
||||||
{ \
|
{ \
|
||||||
if ((x) == HWLM_TERMINATED) { \
|
if ((x) == HWLM_TERMINATED) { \
|
||||||
@ -60,8 +81,9 @@ struct cb_info {
|
|||||||
#define SINGLE_ZSCAN() \
|
#define SINGLE_ZSCAN() \
|
||||||
do { \
|
do { \
|
||||||
while (unlikely(z)) { \
|
while (unlikely(z)) { \
|
||||||
u32 pos = findAndClearLSB_32(&z); \
|
Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \
|
||||||
size_t matchPos = d - buf + pos; \
|
size_t matchPos = d - buf + pos; \
|
||||||
|
DEBUG_PRINTF("match pos %zu\n", matchPos); \
|
||||||
hwlmcb_rv_t rv = final(buf, len, key, 1, 0, 0, noCase, cbi, \
|
hwlmcb_rv_t rv = final(buf, len, key, 1, 0, 0, noCase, cbi, \
|
||||||
matchPos); \
|
matchPos); \
|
||||||
RETURN_IF_TERMINATED(rv); \
|
RETURN_IF_TERMINATED(rv); \
|
||||||
@ -71,8 +93,9 @@ struct cb_info {
|
|||||||
#define DOUBLE_ZSCAN() \
|
#define DOUBLE_ZSCAN() \
|
||||||
do { \
|
do { \
|
||||||
while (unlikely(z)) { \
|
while (unlikely(z)) { \
|
||||||
u32 pos = findAndClearLSB_32(&z); \
|
Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \
|
||||||
size_t matchPos = d - buf + pos - 1; \
|
size_t matchPos = d - buf + pos - 1; \
|
||||||
|
DEBUG_PRINTF("match pos %zu\n", matchPos); \
|
||||||
hwlmcb_rv_t rv = final(buf, len, key, keyLen, keyOffset, 1, \
|
hwlmcb_rv_t rv = final(buf, len, key, keyLen, keyOffset, 1, \
|
||||||
noCase, cbi, matchPos); \
|
noCase, cbi, matchPos); \
|
||||||
RETURN_IF_TERMINATED(rv); \
|
RETURN_IF_TERMINATED(rv); \
|
||||||
@ -109,7 +132,11 @@ hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
|
|||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(HAVE_AVX512)
|
||||||
|
#define CHUNKSIZE 64
|
||||||
|
#define MASK_TYPE m512
|
||||||
|
#include "noodle_engine_avx512.c"
|
||||||
|
#elif defined(HAVE_AVX2)
|
||||||
#define CHUNKSIZE 32
|
#define CHUNKSIZE 32
|
||||||
#define MASK_TYPE m256
|
#define MASK_TYPE m256
|
||||||
#include "noodle_engine_avx2.c"
|
#include "noodle_engine_avx2.c"
|
||||||
@ -122,12 +149,14 @@ hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
|
|||||||
static really_inline
|
static really_inline
|
||||||
hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key,
|
hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key,
|
||||||
bool noCase, const struct cb_info *cbi) {
|
bool noCase, const struct cb_info *cbi) {
|
||||||
hwlm_error_t rv;
|
|
||||||
size_t end = len;
|
|
||||||
|
|
||||||
const MASK_TYPE mask1 = getMask(key[0], noCase);
|
const MASK_TYPE mask1 = getMask(key[0], noCase);
|
||||||
const MASK_TYPE caseMask = getCaseMask();
|
const MASK_TYPE caseMask = getCaseMask();
|
||||||
|
|
||||||
|
#if !defined(HAVE_AVX512)
|
||||||
|
hwlm_error_t rv;
|
||||||
|
size_t end = len;
|
||||||
|
|
||||||
if (len < CHUNKSIZE) {
|
if (len < CHUNKSIZE) {
|
||||||
rv = scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, len);
|
rv = scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, len);
|
||||||
return rv;
|
return rv;
|
||||||
@ -172,13 +201,15 @@ hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key,
|
|||||||
cbi, s2End, end);
|
cbi, s2End, end);
|
||||||
|
|
||||||
return rv;
|
return rv;
|
||||||
|
#else // HAVE_AVX512
|
||||||
|
return scanSingle512(buf, len, key, noCase, caseMask, mask1, cbi);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
|
hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
|
||||||
size_t keyLen, size_t keyOffset, bool noCase,
|
size_t keyLen, size_t keyOffset, bool noCase,
|
||||||
const struct cb_info *cbi) {
|
const struct cb_info *cbi) {
|
||||||
hwlm_error_t rv;
|
|
||||||
// we stop scanning for the key-fragment when the rest of the key can't
|
// we stop scanning for the key-fragment when the rest of the key can't
|
||||||
// possibly fit in the remaining buffer
|
// possibly fit in the remaining buffer
|
||||||
size_t end = len - keyLen + keyOffset + 2;
|
size_t end = len - keyLen + keyOffset + 2;
|
||||||
@ -187,6 +218,9 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
|
|||||||
const MASK_TYPE mask1 = getMask(key[keyOffset + 0], noCase);
|
const MASK_TYPE mask1 = getMask(key[keyOffset + 0], noCase);
|
||||||
const MASK_TYPE mask2 = getMask(key[keyOffset + 1], noCase);
|
const MASK_TYPE mask2 = getMask(key[keyOffset + 1], noCase);
|
||||||
|
|
||||||
|
#if !defined(HAVE_AVX512)
|
||||||
|
hwlm_error_t rv;
|
||||||
|
|
||||||
if (end - keyOffset < CHUNKSIZE) {
|
if (end - keyOffset < CHUNKSIZE) {
|
||||||
rv = scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
rv = scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
||||||
mask1, mask2, cbi, keyOffset, end);
|
mask1, mask2, cbi, keyOffset, end);
|
||||||
@ -243,6 +277,10 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
|
|||||||
caseMask, mask1, mask2, cbi, off, end);
|
caseMask, mask1, mask2, cbi, off, end);
|
||||||
|
|
||||||
return rv;
|
return rv;
|
||||||
|
#else // AVX512
|
||||||
|
return scanDouble512(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
||||||
|
mask1, mask2, cbi, keyOffset, end);
|
||||||
|
#endif // AVX512
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -117,9 +117,9 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
|
|||||||
if (l < 4) {
|
if (l < 4) {
|
||||||
u8 *vp = (u8*)&v;
|
u8 *vp = (u8*)&v;
|
||||||
switch (l) {
|
switch (l) {
|
||||||
case 3: vp[2] = d[2];
|
case 3: vp[2] = d[2]; // fallthrough
|
||||||
case 2: vp[1] = d[1];
|
case 2: vp[1] = d[1]; // fallthrough
|
||||||
case 1: vp[0] = d[0];
|
case 1: vp[0] = d[0]; // fallthrough
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
v = masked_move256_len(d, l);
|
v = masked_move256_len(d, l);
|
||||||
@ -157,9 +157,9 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
|
|||||||
if (l < 4) {
|
if (l < 4) {
|
||||||
u8 *vp = (u8*)&v;
|
u8 *vp = (u8*)&v;
|
||||||
switch (l) {
|
switch (l) {
|
||||||
case 3: vp[2] = d[2];
|
case 3: vp[2] = d[2]; // fallthrough
|
||||||
case 2: vp[1] = d[1];
|
case 2: vp[1] = d[1]; // fallthrough
|
||||||
case 1: vp[0] = d[0];
|
case 1: vp[0] = d[0]; // fallthrough
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
v = masked_move256_len(d, l);
|
v = masked_move256_len(d, l);
|
||||||
|
193
src/hwlm/noodle_engine_avx512.c
Normal file
193
src/hwlm/noodle_engine_avx512.c
Normal file
@ -0,0 +1,193 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* noodle scan parts for AVX512 */
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 getMask(u8 c, bool noCase) {
|
||||||
|
u8 k = caseClear8(c, noCase);
|
||||||
|
return set64x8(k);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m512 getCaseMask(void) {
|
||||||
|
return set64x8(CASE_CLEAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The short scan routine. It is used both to scan data up to an
|
||||||
|
// alignment boundary if needed and to finish off data that the aligned scan
|
||||||
|
// function can't handle (due to small/unaligned chunk at end)
|
||||||
|
static really_inline
|
||||||
|
hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
|
||||||
|
bool noCase, m512 caseMask, m512 mask1,
|
||||||
|
const struct cb_info *cbi, size_t start,
|
||||||
|
size_t end) {
|
||||||
|
const u8 *d = buf + start;
|
||||||
|
ptrdiff_t scan_len = end - start;
|
||||||
|
DEBUG_PRINTF("scan_len %zu\n", scan_len);
|
||||||
|
assert(scan_len <= 64);
|
||||||
|
if (!scan_len) {
|
||||||
|
return HWLM_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
__mmask64 k = (~0ULL) >> (64 - scan_len);
|
||||||
|
DEBUG_PRINTF("load mask 0x%016llx\n", k);
|
||||||
|
|
||||||
|
m512 v = loadu_maskz_m512(k, d);
|
||||||
|
|
||||||
|
if (noCase) {
|
||||||
|
v = and512(v, caseMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
// reuse the load mask to indicate valid bytes
|
||||||
|
u64a z = masked_eq512mask(k, mask1, v);
|
||||||
|
|
||||||
|
SINGLE_ZSCAN();
|
||||||
|
|
||||||
|
return HWLM_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
hwlm_error_t scanSingle512(const u8 *buf, size_t len, const u8 *key,
|
||||||
|
bool noCase, m512 caseMask, m512 mask1,
|
||||||
|
const struct cb_info *cbi) {
|
||||||
|
const u8 *d = buf;
|
||||||
|
const u8 *e = buf + len;
|
||||||
|
DEBUG_PRINTF("start %p end %p \n", d, e);
|
||||||
|
assert(d < e);
|
||||||
|
if (d + 64 >= e) {
|
||||||
|
goto tail;
|
||||||
|
}
|
||||||
|
|
||||||
|
// peel off first part to cacheline boundary
|
||||||
|
const u8 *d1 = ROUNDUP_PTR(d, 64);
|
||||||
|
if (scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0,
|
||||||
|
d1 - d) == HWLM_TERMINATED) {
|
||||||
|
return HWLM_TERMINATED;
|
||||||
|
}
|
||||||
|
d = d1;
|
||||||
|
|
||||||
|
for (; d + 64 < e; d += 64) {
|
||||||
|
DEBUG_PRINTF("d %p e %p \n", d, e);
|
||||||
|
m512 v = noCase ? and512(load512(d), caseMask) : load512(d);
|
||||||
|
|
||||||
|
u64a z = eq512mask(mask1, v);
|
||||||
|
__builtin_prefetch(d + 128);
|
||||||
|
|
||||||
|
SINGLE_ZSCAN();
|
||||||
|
}
|
||||||
|
|
||||||
|
tail:
|
||||||
|
DEBUG_PRINTF("d %p e %p \n", d, e);
|
||||||
|
// finish off tail
|
||||||
|
|
||||||
|
return scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, d - buf,
|
||||||
|
e - buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
|
||||||
|
size_t keyLen, size_t keyOffset, bool noCase,
|
||||||
|
m512 caseMask, m512 mask1, m512 mask2,
|
||||||
|
const struct cb_info *cbi, u64a *lastz0,
|
||||||
|
size_t start, size_t end) {
|
||||||
|
DEBUG_PRINTF("start %zu end %zu last 0x%016llx\n", start, end, *lastz0);
|
||||||
|
const u8 *d = buf + start;
|
||||||
|
ptrdiff_t scan_len = end - start;
|
||||||
|
if (!scan_len) {
|
||||||
|
return HWLM_SUCCESS;
|
||||||
|
}
|
||||||
|
assert(scan_len <= 64);
|
||||||
|
__mmask64 k = (~0ULL) >> (64 - scan_len);
|
||||||
|
DEBUG_PRINTF("load mask 0x%016llx scan_len %zu\n", k, scan_len);
|
||||||
|
|
||||||
|
m512 v = loadu_maskz_m512(k, d);
|
||||||
|
if (noCase) {
|
||||||
|
v = and512(v, caseMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
u64a z0 = masked_eq512mask(k, mask1, v);
|
||||||
|
u64a z1 = masked_eq512mask(k, mask2, v);
|
||||||
|
u64a z = (*lastz0 | (z0 << 1)) & z1;
|
||||||
|
DEBUG_PRINTF("z 0x%016llx\n", z);
|
||||||
|
|
||||||
|
DOUBLE_ZSCAN();
|
||||||
|
*lastz0 = z0 >> (scan_len - 1);
|
||||||
|
return HWLM_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
hwlm_error_t scanDouble512(const u8 *buf, size_t len, const u8 *key,
|
||||||
|
size_t keyLen, size_t keyOffset, bool noCase,
|
||||||
|
m512 caseMask, m512 mask1, m512 mask2,
|
||||||
|
const struct cb_info *cbi, size_t start,
|
||||||
|
size_t end) {
|
||||||
|
const u8 *d = buf + start;
|
||||||
|
const u8 *e = buf + end;
|
||||||
|
u64a lastz0 = 0;
|
||||||
|
DEBUG_PRINTF("start %zu end %zu \n", start, end);
|
||||||
|
assert(d < e);
|
||||||
|
if (d + 64 >= e) {
|
||||||
|
goto tail;
|
||||||
|
}
|
||||||
|
|
||||||
|
// peel off first part to cacheline boundary
|
||||||
|
const u8 *d1 = ROUNDUP_PTR(d, 64);
|
||||||
|
if (scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
||||||
|
mask1, mask2, cbi, &lastz0, start,
|
||||||
|
d1 - buf) == HWLM_TERMINATED) {
|
||||||
|
return HWLM_TERMINATED;
|
||||||
|
}
|
||||||
|
d = d1;
|
||||||
|
|
||||||
|
for (; d + 64 < e; d += 64) {
|
||||||
|
DEBUG_PRINTF("d %p e %p 0x%016llx\n", d, e, lastz0);
|
||||||
|
m512 v = noCase ? and512(load512(d), caseMask) : load512(d);
|
||||||
|
|
||||||
|
/* we have to pull the masks out of the AVX registers because we can't
|
||||||
|
byte shift between the lanes */
|
||||||
|
u64a z0 = eq512mask(mask1, v);
|
||||||
|
u64a z1 = eq512mask(mask2, v);
|
||||||
|
u64a z = (lastz0 | (z0 << 1)) & z1;
|
||||||
|
lastz0 = z0 >> 63;
|
||||||
|
|
||||||
|
// On large packet buffers, this prefetch appears to get us about 2%.
|
||||||
|
__builtin_prefetch(d + 256);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("z 0x%016llx\n", z);
|
||||||
|
|
||||||
|
DOUBLE_ZSCAN();
|
||||||
|
}
|
||||||
|
|
||||||
|
tail:
|
||||||
|
DEBUG_PRINTF("d %p e %p off %zu \n", d, e, d - buf);
|
||||||
|
// finish off tail
|
||||||
|
|
||||||
|
return scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
||||||
|
mask1, mask2, cbi, &lastz0, d - buf, end);
|
||||||
|
}
|
219
src/nfa/accel.c
219
src/nfa/accel.c
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -30,9 +30,6 @@
|
|||||||
#include "shufti.h"
|
#include "shufti.h"
|
||||||
#include "truffle.h"
|
#include "truffle.h"
|
||||||
#include "vermicelli.h"
|
#include "vermicelli.h"
|
||||||
#include "multishufti.h"
|
|
||||||
#include "multitruffle.h"
|
|
||||||
#include "multivermicelli.h"
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
|
||||||
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
||||||
@ -132,220 +129,6 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
|||||||
rv = c_end;
|
rv = c_end;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* multibyte matchers */
|
|
||||||
case ACCEL_MLVERM:
|
|
||||||
DEBUG_PRINTF("accel mlverm %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = long_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLVERM_NOCASE:
|
|
||||||
DEBUG_PRINTF("accel mlverm nc %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = long_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLGVERM:
|
|
||||||
DEBUG_PRINTF("accel mlgverm %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = longgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLGVERM_NOCASE:
|
|
||||||
DEBUG_PRINTF("accel mlgverm nc %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = longgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSVERM:
|
|
||||||
DEBUG_PRINTF("accel msverm %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shift_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSVERM_NOCASE:
|
|
||||||
DEBUG_PRINTF("accel msverm nc %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shift_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSGVERM:
|
|
||||||
DEBUG_PRINTF("accel msgverm %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shiftgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSGVERM_NOCASE:
|
|
||||||
DEBUG_PRINTF("accel msgverm nc %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shiftgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSVERM:
|
|
||||||
DEBUG_PRINTF("accel mdsverm %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshift_vermicelliExec(accel->mdverm.c, 0, c, c_end,
|
|
||||||
accel->mdverm.len1, accel->mdverm.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSVERM_NOCASE:
|
|
||||||
DEBUG_PRINTF("accel mdsverm nc %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshift_vermicelliExec(accel->mdverm.c, 1, c, c_end,
|
|
||||||
accel->mdverm.len1, accel->mdverm.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSGVERM:
|
|
||||||
DEBUG_PRINTF("accel mdsgverm %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 0, c, c_end,
|
|
||||||
accel->mdverm.len1, accel->mdverm.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSGVERM_NOCASE:
|
|
||||||
DEBUG_PRINTF("accel mdsgverm nc %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 1, c, c_end,
|
|
||||||
accel->mdverm.len1, accel->mdverm.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLSHUFTI:
|
|
||||||
DEBUG_PRINTF("accel mlshufti %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = long_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
|
|
||||||
accel->mshufti.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLGSHUFTI:
|
|
||||||
DEBUG_PRINTF("accel mlgshufti %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = longgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
|
|
||||||
accel->mshufti.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSSHUFTI:
|
|
||||||
DEBUG_PRINTF("accel msshufti %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shift_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
|
|
||||||
accel->mshufti.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSGSHUFTI:
|
|
||||||
DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shiftgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end,
|
|
||||||
accel->mshufti.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSSHUFTI:
|
|
||||||
DEBUG_PRINTF("accel mdsshufti %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshift_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end,
|
|
||||||
accel->mdshufti.len1, accel->mdshufti.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSGSHUFTI:
|
|
||||||
DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshiftgrab_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end,
|
|
||||||
accel->mdshufti.len1, accel->mdshufti.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLTRUFFLE:
|
|
||||||
DEBUG_PRINTF("accel mltruffle %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = long_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
|
|
||||||
c, c_end, accel->mtruffle.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLGTRUFFLE:
|
|
||||||
DEBUG_PRINTF("accel mlgtruffle %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = longgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
|
|
||||||
c, c_end, accel->mtruffle.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSTRUFFLE:
|
|
||||||
DEBUG_PRINTF("accel mstruffle %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shift_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
|
|
||||||
c, c_end, accel->mtruffle.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MSGTRUFFLE:
|
|
||||||
DEBUG_PRINTF("accel msgtruffle %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = shiftgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2,
|
|
||||||
c, c_end, accel->mtruffle.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSTRUFFLE:
|
|
||||||
DEBUG_PRINTF("accel mdstruffle %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshift_truffleExec(accel->mdtruffle.mask1,
|
|
||||||
accel->mdtruffle.mask2, c, c_end,
|
|
||||||
accel->mdtruffle.len1,
|
|
||||||
accel->mdtruffle.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSGTRUFFLE:
|
|
||||||
DEBUG_PRINTF("accel mdsgtruffle %p %p\n", c, c_end);
|
|
||||||
if (c + 15 >= c_end) {
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = doubleshiftgrab_truffleExec(accel->mdtruffle.mask1,
|
|
||||||
accel->mdtruffle.mask2, c, c_end,
|
|
||||||
accel->mdtruffle.len1,
|
|
||||||
accel->mdtruffle.len2);
|
|
||||||
break;
|
|
||||||
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
assert(!"not here");
|
assert(!"not here");
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -61,36 +61,7 @@ enum AccelType {
|
|||||||
ACCEL_DSHUFTI,
|
ACCEL_DSHUFTI,
|
||||||
ACCEL_TRUFFLE,
|
ACCEL_TRUFFLE,
|
||||||
ACCEL_RED_TAPE,
|
ACCEL_RED_TAPE,
|
||||||
/* multibyte vermicellis */
|
|
||||||
ACCEL_MLVERM,
|
|
||||||
ACCEL_MLVERM_NOCASE,
|
|
||||||
ACCEL_MLGVERM,
|
|
||||||
ACCEL_MLGVERM_NOCASE,
|
|
||||||
ACCEL_MSVERM,
|
|
||||||
ACCEL_MSVERM_NOCASE,
|
|
||||||
ACCEL_MSGVERM,
|
|
||||||
ACCEL_MSGVERM_NOCASE,
|
|
||||||
ACCEL_MDSVERM,
|
|
||||||
ACCEL_MDSVERM_NOCASE,
|
|
||||||
ACCEL_MDSGVERM,
|
|
||||||
ACCEL_MDSGVERM_NOCASE,
|
|
||||||
/* multibyte shuftis */
|
|
||||||
ACCEL_MLSHUFTI,
|
|
||||||
ACCEL_MLGSHUFTI,
|
|
||||||
ACCEL_MSSHUFTI,
|
|
||||||
ACCEL_MSGSHUFTI,
|
|
||||||
ACCEL_MDSSHUFTI,
|
|
||||||
ACCEL_MDSGSHUFTI,
|
|
||||||
/* multibyte truffles */
|
|
||||||
ACCEL_MLTRUFFLE,
|
|
||||||
ACCEL_MLGTRUFFLE,
|
|
||||||
ACCEL_MSTRUFFLE,
|
|
||||||
ACCEL_MSGTRUFFLE,
|
|
||||||
ACCEL_MDSTRUFFLE,
|
|
||||||
ACCEL_MDSGTRUFFLE,
|
|
||||||
/* masked dverm */
|
|
||||||
ACCEL_DVERM_MASKED,
|
ACCEL_DVERM_MASKED,
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \brief Structure for accel framework. */
|
/** \brief Structure for accel framework. */
|
||||||
@ -140,42 +111,12 @@ union AccelAux {
|
|||||||
m128 lo2;
|
m128 lo2;
|
||||||
m128 hi2;
|
m128 hi2;
|
||||||
} dshufti;
|
} dshufti;
|
||||||
struct {
|
|
||||||
u8 accel_type;
|
|
||||||
u8 offset;
|
|
||||||
m128 lo;
|
|
||||||
m128 hi;
|
|
||||||
u8 len;
|
|
||||||
} mshufti;
|
|
||||||
struct {
|
|
||||||
u8 accel_type;
|
|
||||||
u8 offset;
|
|
||||||
m128 lo;
|
|
||||||
m128 hi;
|
|
||||||
u8 len1;
|
|
||||||
u8 len2;
|
|
||||||
} mdshufti;
|
|
||||||
struct {
|
struct {
|
||||||
u8 accel_type;
|
u8 accel_type;
|
||||||
u8 offset;
|
u8 offset;
|
||||||
m128 mask1;
|
m128 mask1;
|
||||||
m128 mask2;
|
m128 mask2;
|
||||||
} truffle;
|
} truffle;
|
||||||
struct {
|
|
||||||
u8 accel_type;
|
|
||||||
u8 offset;
|
|
||||||
m128 mask1;
|
|
||||||
m128 mask2;
|
|
||||||
u8 len;
|
|
||||||
} mtruffle;
|
|
||||||
struct {
|
|
||||||
u8 accel_type;
|
|
||||||
u8 offset;
|
|
||||||
m128 mask1;
|
|
||||||
m128 mask2;
|
|
||||||
u8 len1;
|
|
||||||
u8 len2;
|
|
||||||
} mdtruffle;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -33,9 +33,11 @@
|
|||||||
#include "nfagraph/ng_limex_accel.h"
|
#include "nfagraph/ng_limex_accel.h"
|
||||||
#include "shufticompile.h"
|
#include "shufticompile.h"
|
||||||
#include "trufflecompile.h"
|
#include "trufflecompile.h"
|
||||||
|
#include "util/accel_scheme.h"
|
||||||
#include "util/charreach.h"
|
#include "util/charreach.h"
|
||||||
#include "util/container.h"
|
#include "util/container.h"
|
||||||
#include "util/dump_charclass.h"
|
#include "util/dump_charclass.h"
|
||||||
|
#include "util/small_vector.h"
|
||||||
#include "util/verify_types.h"
|
#include "util/verify_types.h"
|
||||||
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
@ -49,16 +51,15 @@ namespace ue2 {
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
struct path {
|
struct path {
|
||||||
vector<CharReach> reach;
|
small_vector<CharReach, MAX_ACCEL_DEPTH + 1> reach;
|
||||||
dstate_id_t dest = DEAD_STATE;
|
dstate_id_t dest = DEAD_STATE;
|
||||||
explicit path(dstate_id_t base) : dest(base) {
|
explicit path(dstate_id_t base) : dest(base) {}
|
||||||
}
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
static
|
template<typename Container>
|
||||||
void dump_paths(const vector<path> &paths) {
|
void dump_paths(const Container &paths) {
|
||||||
for (UNUSED const auto &p : paths) {
|
for (UNUSED const path &p : paths) {
|
||||||
DEBUG_PRINTF("[%s] -> %u\n", describeClasses(p.reach).c_str(), p.dest);
|
DEBUG_PRINTF("[%s] -> %u\n", describeClasses(p.reach).c_str(), p.dest);
|
||||||
}
|
}
|
||||||
DEBUG_PRINTF("%zu paths\n", paths.size());
|
DEBUG_PRINTF("%zu paths\n", paths.size());
|
||||||
@ -113,17 +114,17 @@ void extend(const raw_dfa &rdfa, const path &p,
|
|||||||
} else {
|
} else {
|
||||||
path pp = append(p, CharReach(), p.dest);
|
path pp = append(p, CharReach(), p.dest);
|
||||||
all[p.dest].push_back(pp);
|
all[p.dest].push_back(pp);
|
||||||
out.push_back(pp);
|
out.push_back(move(pp));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!s.reports_eod.empty()) {
|
if (!s.reports_eod.empty()) {
|
||||||
path pp = append(p, CharReach(), p.dest);
|
path pp = append(p, CharReach(), p.dest);
|
||||||
all[p.dest].push_back(pp);
|
all[p.dest].push_back(pp);
|
||||||
out.push_back(pp);
|
out.push_back(move(pp));
|
||||||
}
|
}
|
||||||
|
|
||||||
map<u32, CharReach> dest;
|
flat_map<u32, CharReach> dest;
|
||||||
for (unsigned i = 0; i < N_CHARS; i++) {
|
for (unsigned i = 0; i < N_CHARS; i++) {
|
||||||
u32 succ = s.next[rdfa.alpha_remap[i]];
|
u32 succ = s.next[rdfa.alpha_remap[i]];
|
||||||
dest[succ].set(i);
|
dest[succ].set(i);
|
||||||
@ -140,7 +141,7 @@ void extend(const raw_dfa &rdfa, const path &p,
|
|||||||
DEBUG_PRINTF("----good: [%s] -> %u\n",
|
DEBUG_PRINTF("----good: [%s] -> %u\n",
|
||||||
describeClasses(pp.reach).c_str(), pp.dest);
|
describeClasses(pp.reach).c_str(), pp.dest);
|
||||||
all[e.first].push_back(pp);
|
all[e.first].push_back(pp);
|
||||||
out.push_back(pp);
|
out.push_back(move(pp));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -162,8 +163,10 @@ vector<vector<CharReach>> generate_paths(const raw_dfa &rdfa,
|
|||||||
dump_paths(paths);
|
dump_paths(paths);
|
||||||
|
|
||||||
vector<vector<CharReach>> rv;
|
vector<vector<CharReach>> rv;
|
||||||
|
rv.reserve(paths.size());
|
||||||
for (auto &p : paths) {
|
for (auto &p : paths) {
|
||||||
rv.push_back(move(p.reach));
|
rv.push_back(vector<CharReach>(std::make_move_iterator(p.reach.begin()),
|
||||||
|
std::make_move_iterator(p.reach.end())));
|
||||||
}
|
}
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
@ -327,7 +330,7 @@ accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const {
|
|||||||
const dstate &raw = rdfa.states[this_idx];
|
const dstate &raw = rdfa.states[this_idx];
|
||||||
const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa);
|
const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa);
|
||||||
bool outs2_broken = false;
|
bool outs2_broken = false;
|
||||||
map<dstate_id_t, CharReach> succs;
|
flat_map<dstate_id_t, CharReach> succs;
|
||||||
|
|
||||||
for (u32 i = 0; i < rev_map.size(); i++) {
|
for (u32 i = 0; i < rev_map.size(); i++) {
|
||||||
if (raw.next[i] == this_idx) {
|
if (raw.next[i] == this_idx) {
|
||||||
@ -379,16 +382,18 @@ accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const {
|
|||||||
for (auto jj = cr_all_j.find_first(); jj != CharReach::npos;
|
for (auto jj = cr_all_j.find_first(); jj != CharReach::npos;
|
||||||
jj = cr_all_j.find_next(jj)) {
|
jj = cr_all_j.find_next(jj)) {
|
||||||
rv.double_byte.emplace((u8)ii, (u8)jj);
|
rv.double_byte.emplace((u8)ii, (u8)jj);
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rv.double_byte.size() > 8) {
|
if (rv.double_byte.size() > 8) {
|
||||||
DEBUG_PRINTF("outs2 too big\n");
|
DEBUG_PRINTF("outs2 too big\n");
|
||||||
outs2_broken = true;
|
outs2_broken = true;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
|
assert(outs2_broken || rv.double_byte.size() <= 8);
|
||||||
if (outs2_broken) {
|
if (outs2_broken) {
|
||||||
rv.double_byte.clear();
|
rv.double_byte.clear();
|
||||||
}
|
}
|
||||||
@ -536,17 +541,17 @@ accel_dfa_build_strat::getAccelInfo(const Grey &grey) {
|
|||||||
dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
|
dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
|
||||||
DEBUG_PRINTF("sds %hu\n", sds_proxy);
|
DEBUG_PRINTF("sds %hu\n", sds_proxy);
|
||||||
|
|
||||||
for (size_t i = 0; i < rdfa.states.size(); i++) {
|
/* Find accel info for a single state. */
|
||||||
|
auto do_state = [&](size_t i) {
|
||||||
if (i == DEAD_STATE) {
|
if (i == DEAD_STATE) {
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Note on report acceleration states: While we can't accelerate while
|
/* Note on report acceleration states: While we can't accelerate while
|
||||||
* we
|
* we are spamming out callbacks, the QR code paths don't raise reports
|
||||||
* are spamming out callbacks, the QR code paths don't raise reports
|
|
||||||
* during scanning so they can accelerate report states. */
|
* during scanning so they can accelerate report states. */
|
||||||
if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) {
|
if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) {
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t single_limit =
|
size_t single_limit =
|
||||||
@ -557,15 +562,28 @@ accel_dfa_build_strat::getAccelInfo(const Grey &grey) {
|
|||||||
if (ei.cr.count() > single_limit) {
|
if (ei.cr.count() > single_limit) {
|
||||||
DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
|
DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
|
||||||
ei.cr.count());
|
ei.cr.count());
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count());
|
DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count());
|
||||||
|
|
||||||
rv[i] = ei;
|
rv[i] = ei;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (only_accel_init) {
|
||||||
|
DEBUG_PRINTF("only computing accel for init states\n");
|
||||||
|
do_state(rdfa.start_anchored);
|
||||||
|
if (rdfa.start_floating != rdfa.start_anchored) {
|
||||||
|
do_state(rdfa.start_floating);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
DEBUG_PRINTF("computing accel for all states\n");
|
||||||
|
for (size_t i = 0; i < rdfa.states.size(); i++) {
|
||||||
|
do_state(i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* provide accleration states to states in the region of sds */
|
/* provide acceleration states to states in the region of sds */
|
||||||
if (contains(rv, sds_proxy)) {
|
if (contains(rv, sds_proxy)) {
|
||||||
AccelScheme sds_ei = rv[sds_proxy];
|
AccelScheme sds_ei = rv[sds_proxy];
|
||||||
sds_ei.double_byte.clear(); /* region based on single byte scheme
|
sds_ei.double_byte.clear(); /* region based on single byte scheme
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -43,8 +43,8 @@ struct Grey;
|
|||||||
|
|
||||||
class accel_dfa_build_strat : public dfa_build_strat {
|
class accel_dfa_build_strat : public dfa_build_strat {
|
||||||
public:
|
public:
|
||||||
explicit accel_dfa_build_strat(const ReportManager &rm_in)
|
accel_dfa_build_strat(const ReportManager &rm_in, bool only_accel_init_in)
|
||||||
: dfa_build_strat(rm_in) {}
|
: dfa_build_strat(rm_in), only_accel_init(only_accel_init_in) {}
|
||||||
virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const;
|
virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const;
|
||||||
virtual size_t accelSize(void) const = 0;
|
virtual size_t accelSize(void) const = 0;
|
||||||
virtual u32 max_allowed_offset_accel() const = 0;
|
virtual u32 max_allowed_offset_accel() const = 0;
|
||||||
@ -53,6 +53,8 @@ public:
|
|||||||
virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
|
virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
|
||||||
void *accel_out);
|
void *accel_out);
|
||||||
virtual std::map<dstate_id_t, AccelScheme> getAccelInfo(const Grey &grey);
|
virtual std::map<dstate_id_t, AccelScheme> getAccelInfo(const Grey &grey);
|
||||||
|
private:
|
||||||
|
bool only_accel_init;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -93,54 +93,6 @@ const char *accelName(u8 accel_type) {
|
|||||||
return "truffle";
|
return "truffle";
|
||||||
case ACCEL_RED_TAPE:
|
case ACCEL_RED_TAPE:
|
||||||
return "red tape";
|
return "red tape";
|
||||||
case ACCEL_MLVERM:
|
|
||||||
return "multibyte long vermicelli";
|
|
||||||
case ACCEL_MLVERM_NOCASE:
|
|
||||||
return "multibyte long vermicelli nocase";
|
|
||||||
case ACCEL_MLGVERM:
|
|
||||||
return "multibyte long-grab vermicelli";
|
|
||||||
case ACCEL_MLGVERM_NOCASE:
|
|
||||||
return "multibyte long-grab vermicelli nocase";
|
|
||||||
case ACCEL_MSVERM:
|
|
||||||
return "multibyte shift vermicelli";
|
|
||||||
case ACCEL_MSVERM_NOCASE:
|
|
||||||
return "multibyte shift vermicelli nocase";
|
|
||||||
case ACCEL_MSGVERM:
|
|
||||||
return "multibyte shift-grab vermicelli";
|
|
||||||
case ACCEL_MSGVERM_NOCASE:
|
|
||||||
return "multibyte shift-grab vermicelli nocase";
|
|
||||||
case ACCEL_MDSVERM:
|
|
||||||
return "multibyte doubleshift vermicelli";
|
|
||||||
case ACCEL_MDSVERM_NOCASE:
|
|
||||||
return "multibyte doubleshift vermicelli nocase";
|
|
||||||
case ACCEL_MDSGVERM:
|
|
||||||
return "multibyte doubleshift-grab vermicelli";
|
|
||||||
case ACCEL_MDSGVERM_NOCASE:
|
|
||||||
return "multibyte doubleshift-grab vermicelli nocase";
|
|
||||||
case ACCEL_MLSHUFTI:
|
|
||||||
return "multibyte long shufti";
|
|
||||||
case ACCEL_MLGSHUFTI:
|
|
||||||
return "multibyte long-grab shufti";
|
|
||||||
case ACCEL_MSSHUFTI:
|
|
||||||
return "multibyte shift shufti";
|
|
||||||
case ACCEL_MSGSHUFTI:
|
|
||||||
return "multibyte shift-grab shufti";
|
|
||||||
case ACCEL_MDSSHUFTI:
|
|
||||||
return "multibyte doubleshift shufti";
|
|
||||||
case ACCEL_MDSGSHUFTI:
|
|
||||||
return "multibyte doubleshift-grab shufti";
|
|
||||||
case ACCEL_MLTRUFFLE:
|
|
||||||
return "multibyte long truffle";
|
|
||||||
case ACCEL_MLGTRUFFLE:
|
|
||||||
return "multibyte long-grab truffle";
|
|
||||||
case ACCEL_MSTRUFFLE:
|
|
||||||
return "multibyte shift truffle";
|
|
||||||
case ACCEL_MSGTRUFFLE:
|
|
||||||
return "multibyte shift-grab truffle";
|
|
||||||
case ACCEL_MDSTRUFFLE:
|
|
||||||
return "multibyte doubleshift truffle";
|
|
||||||
case ACCEL_MDSGTRUFFLE:
|
|
||||||
return "multibyte doubleshift-grab truffle";
|
|
||||||
default:
|
default:
|
||||||
return "unknown!";
|
return "unknown!";
|
||||||
}
|
}
|
||||||
@ -283,59 +235,6 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) {
|
|||||||
(const u8 *)&accel.truffle.mask2);
|
(const u8 *)&accel.truffle.mask2);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ACCEL_MLVERM:
|
|
||||||
case ACCEL_MLVERM_NOCASE:
|
|
||||||
case ACCEL_MLGVERM:
|
|
||||||
case ACCEL_MLGVERM_NOCASE:
|
|
||||||
case ACCEL_MSVERM:
|
|
||||||
case ACCEL_MSVERM_NOCASE:
|
|
||||||
case ACCEL_MSGVERM:
|
|
||||||
case ACCEL_MSGVERM_NOCASE:
|
|
||||||
fprintf(f, " [\\x%02hhx] len:%u\n", accel.mverm.c, accel.mverm.len);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSVERM:
|
|
||||||
case ACCEL_MDSVERM_NOCASE:
|
|
||||||
case ACCEL_MDSGVERM:
|
|
||||||
case ACCEL_MDSGVERM_NOCASE:
|
|
||||||
fprintf(f, " [\\x%02hhx] len1:%u len2:%u\n", accel.mdverm.c, accel.mdverm.len1,
|
|
||||||
accel.mdverm.len2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLSHUFTI:
|
|
||||||
case ACCEL_MLGSHUFTI:
|
|
||||||
case ACCEL_MSSHUFTI:
|
|
||||||
case ACCEL_MSGSHUFTI:
|
|
||||||
fprintf(f, " len:%u\n", accel.mshufti.len);
|
|
||||||
dumpShuftiMasks(f, (const u8 *)&accel.mshufti.lo,
|
|
||||||
(const u8 *)&accel.mshufti.hi);
|
|
||||||
dumpShuftiCharReach(f, (const u8 *)&accel.mshufti.lo,
|
|
||||||
(const u8 *)&accel.mshufti.hi);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSSHUFTI:
|
|
||||||
case ACCEL_MDSGSHUFTI:
|
|
||||||
fprintf(f, " len1:%u len2:%u\n", accel.mdshufti.len1, accel.mdshufti.len2);
|
|
||||||
dumpShuftiMasks(f, (const u8 *)&accel.mdshufti.lo,
|
|
||||||
(const u8 *)&accel.mdshufti.hi);
|
|
||||||
dumpShuftiCharReach(f, (const u8 *)&accel.mdshufti.lo,
|
|
||||||
(const u8 *)&accel.mdshufti.hi);
|
|
||||||
break;
|
|
||||||
case ACCEL_MLTRUFFLE:
|
|
||||||
case ACCEL_MLGTRUFFLE:
|
|
||||||
case ACCEL_MSTRUFFLE:
|
|
||||||
case ACCEL_MSGTRUFFLE:
|
|
||||||
fprintf(f, " len:%u\n", accel.mtruffle.len);
|
|
||||||
dumpTruffleMasks(f, (const u8 *)&accel.mtruffle.mask1,
|
|
||||||
(const u8 *)&accel.mtruffle.mask2);
|
|
||||||
dumpTruffleCharReach(f, (const u8 *)&accel.mtruffle.mask1,
|
|
||||||
(const u8 *)&accel.mtruffle.mask2);
|
|
||||||
break;
|
|
||||||
case ACCEL_MDSTRUFFLE:
|
|
||||||
case ACCEL_MDSGTRUFFLE:
|
|
||||||
fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2);
|
|
||||||
dumpTruffleMasks(f, (const u8 *)&accel.mdtruffle.mask1,
|
|
||||||
(const u8 *)&accel.mdtruffle.mask2);
|
|
||||||
dumpTruffleCharReach(f, (const u8 *)&accel.mdtruffle.mask1,
|
|
||||||
(const u8 *)&accel.mdtruffle.mask2);
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
fprintf(f, "\n");
|
fprintf(f, "\n");
|
||||||
break;
|
break;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -225,274 +225,6 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
|
|||||||
aux->accel_type = ACCEL_NONE;
|
aux->accel_type = ACCEL_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
|
||||||
void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
|
|
||||||
if (info.ma_type == MultibyteAccelInfo::MAT_NONE) {
|
|
||||||
DEBUG_PRINTF("no multimatch for us :(");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 offset = info.multiaccel_offset;
|
|
||||||
const CharReach &stops = info.multiaccel_stops;
|
|
||||||
|
|
||||||
assert(aux->accel_type == ACCEL_NONE);
|
|
||||||
if (stops.all()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t outs = stops.count();
|
|
||||||
DEBUG_PRINTF("%zu outs\n", outs);
|
|
||||||
assert(outs && outs < 256);
|
|
||||||
|
|
||||||
switch (info.ma_type) {
|
|
||||||
case MultibyteAccelInfo::MAT_LONG:
|
|
||||||
if (outs == 1) {
|
|
||||||
aux->accel_type = ACCEL_MLVERM;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first();
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (outs == 2 && stops.isCaselessChar()) {
|
|
||||||
aux->accel_type = ACCEL_MLVERM_NOCASE;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
|
||||||
aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
|
||||||
if (outs == 1) {
|
|
||||||
aux->accel_type = ACCEL_MLGVERM;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first();
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (outs == 2 && stops.isCaselessChar()) {
|
|
||||||
aux->accel_type = ACCEL_MLGVERM_NOCASE;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
|
||||||
aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFT:
|
|
||||||
if (outs == 1) {
|
|
||||||
aux->accel_type = ACCEL_MSVERM;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first();
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (outs == 2 && stops.isCaselessChar()) {
|
|
||||||
aux->accel_type = ACCEL_MSVERM_NOCASE;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
|
||||||
aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
|
||||||
if (outs == 1) {
|
|
||||||
aux->accel_type = ACCEL_MSGVERM;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first();
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (outs == 2 && stops.isCaselessChar()) {
|
|
||||||
aux->accel_type = ACCEL_MSGVERM_NOCASE;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
|
||||||
aux->mverm.len = info.ma_len1;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
|
||||||
aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
|
||||||
if (outs == 1) {
|
|
||||||
aux->accel_type = ACCEL_MDSVERM;
|
|
||||||
aux->mdverm.offset = offset;
|
|
||||||
aux->mdverm.c = stops.find_first();
|
|
||||||
aux->mdverm.len1 = info.ma_len1;
|
|
||||||
aux->mdverm.len2 = info.ma_len2;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (outs == 2 && stops.isCaselessChar()) {
|
|
||||||
aux->accel_type = ACCEL_MDSVERM_NOCASE;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
|
||||||
aux->mdverm.len1 = info.ma_len1;
|
|
||||||
aux->mdverm.len2 = info.ma_len2;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
|
||||||
aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
|
||||||
if (outs == 1) {
|
|
||||||
aux->accel_type = ACCEL_MDSGVERM;
|
|
||||||
aux->mdverm.offset = offset;
|
|
||||||
aux->mdverm.c = stops.find_first();
|
|
||||||
aux->mdverm.len1 = info.ma_len1;
|
|
||||||
aux->mdverm.len2 = info.ma_len2;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (outs == 2 && stops.isCaselessChar()) {
|
|
||||||
aux->accel_type = ACCEL_MDSGVERM_NOCASE;
|
|
||||||
aux->mverm.offset = offset;
|
|
||||||
aux->mverm.c = stops.find_first() & CASE_CLEAR;
|
|
||||||
aux->mdverm.len1 = info.ma_len1;
|
|
||||||
aux->mdverm.len2 = info.ma_len2;
|
|
||||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
|
||||||
aux->verm.c);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
|
|
||||||
|
|
||||||
switch (info.ma_type) {
|
|
||||||
case MultibyteAccelInfo::MAT_LONG:
|
|
||||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
|
|
||||||
(u8 *)&aux->mshufti.hi) == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
aux->accel_type = ACCEL_MLSHUFTI;
|
|
||||||
aux->mshufti.offset = offset;
|
|
||||||
aux->mshufti.len = info.ma_len1;
|
|
||||||
return;
|
|
||||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
|
||||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
|
|
||||||
(u8 *)&aux->mshufti.hi) == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
aux->accel_type = ACCEL_MLGSHUFTI;
|
|
||||||
aux->mshufti.offset = offset;
|
|
||||||
aux->mshufti.len = info.ma_len1;
|
|
||||||
return;
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFT:
|
|
||||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
|
|
||||||
(u8 *)&aux->mshufti.hi) == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
aux->accel_type = ACCEL_MSSHUFTI;
|
|
||||||
aux->mshufti.offset = offset;
|
|
||||||
aux->mshufti.len = info.ma_len1;
|
|
||||||
return;
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
|
||||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
|
|
||||||
(u8 *)&aux->mshufti.hi) == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
aux->accel_type = ACCEL_MSGSHUFTI;
|
|
||||||
aux->mshufti.offset = offset;
|
|
||||||
aux->mshufti.len = info.ma_len1;
|
|
||||||
return;
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
|
||||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo,
|
|
||||||
(u8 *)&aux->mdshufti.hi) == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
aux->accel_type = ACCEL_MDSSHUFTI;
|
|
||||||
aux->mdshufti.offset = offset;
|
|
||||||
aux->mdshufti.len1 = info.ma_len1;
|
|
||||||
aux->mdshufti.len2 = info.ma_len2;
|
|
||||||
return;
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
|
||||||
if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo,
|
|
||||||
(u8 *)&aux->mdshufti.hi) == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
aux->accel_type = ACCEL_MDSGSHUFTI;
|
|
||||||
aux->mdshufti.offset = offset;
|
|
||||||
aux->mdshufti.len1 = info.ma_len1;
|
|
||||||
aux->mdshufti.len2 = info.ma_len2;
|
|
||||||
return;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
DEBUG_PRINTF("shufti build failed, falling through\n");
|
|
||||||
|
|
||||||
if (outs <= ACCEL_MAX_STOP_CHAR) {
|
|
||||||
DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
|
|
||||||
switch (info.ma_type) {
|
|
||||||
case MultibyteAccelInfo::MAT_LONG:
|
|
||||||
aux->accel_type = ACCEL_MLTRUFFLE;
|
|
||||||
aux->mtruffle.offset = offset;
|
|
||||||
aux->mtruffle.len = info.ma_len1;
|
|
||||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
|
||||||
(u8 *)&aux->mtruffle.mask2);
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
|
||||||
aux->accel_type = ACCEL_MLGTRUFFLE;
|
|
||||||
aux->mtruffle.offset = offset;
|
|
||||||
aux->mtruffle.len = info.ma_len1;
|
|
||||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
|
||||||
(u8 *)&aux->mtruffle.mask2);
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFT:
|
|
||||||
aux->accel_type = ACCEL_MSTRUFFLE;
|
|
||||||
aux->mtruffle.offset = offset;
|
|
||||||
aux->mtruffle.len = info.ma_len1;
|
|
||||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
|
||||||
(u8 *)&aux->mtruffle.mask2);
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
|
||||||
aux->accel_type = ACCEL_MSGTRUFFLE;
|
|
||||||
aux->mtruffle.offset = offset;
|
|
||||||
aux->mtruffle.len = info.ma_len1;
|
|
||||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
|
||||||
(u8 *)&aux->mtruffle.mask2);
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
|
||||||
aux->accel_type = ACCEL_MDSTRUFFLE;
|
|
||||||
aux->mdtruffle.offset = offset;
|
|
||||||
aux->mdtruffle.len1 = info.ma_len1;
|
|
||||||
aux->mdtruffle.len2 = info.ma_len2;
|
|
||||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
|
||||||
(u8 *)&aux->mdtruffle.mask2);
|
|
||||||
break;
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
|
||||||
aux->accel_type = ACCEL_MDSGTRUFFLE;
|
|
||||||
aux->mdtruffle.offset = offset;
|
|
||||||
aux->mdtruffle.len1 = info.ma_len1;
|
|
||||||
aux->mdtruffle.len2 = info.ma_len2;
|
|
||||||
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
|
|
||||||
(u8 *)&aux->mdtruffle.mask2);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("unable to accelerate multibyte case with %zu outs\n", outs);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
|
bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
|
||||||
assert(aux->accel_type == ACCEL_NONE);
|
assert(aux->accel_type == ACCEL_NONE);
|
||||||
if (info.single_stops.none()) {
|
if (info.single_stops.none()) {
|
||||||
@ -500,9 +232,6 @@ bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
|
|||||||
aux->accel_type = ACCEL_RED_TAPE;
|
aux->accel_type = ACCEL_RED_TAPE;
|
||||||
aux->generic.offset = info.single_offset;
|
aux->generic.offset = info.single_offset;
|
||||||
}
|
}
|
||||||
if (aux->accel_type == ACCEL_NONE) {
|
|
||||||
buildAccelMulti(info, aux);
|
|
||||||
}
|
|
||||||
if (aux->accel_type == ACCEL_NONE) {
|
if (aux->accel_type == ACCEL_NONE) {
|
||||||
buildAccelDouble(info, aux);
|
buildAccelDouble(info, aux);
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -37,30 +37,9 @@ union AccelAux;
|
|||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
struct MultibyteAccelInfo {
|
|
||||||
/* multibyte accel schemes, ordered by strength */
|
|
||||||
enum multiaccel_type {
|
|
||||||
MAT_SHIFT,
|
|
||||||
MAT_SHIFTGRAB,
|
|
||||||
MAT_DSHIFT,
|
|
||||||
MAT_DSHIFTGRAB,
|
|
||||||
MAT_LONG,
|
|
||||||
MAT_LONGGRAB,
|
|
||||||
MAT_MAX,
|
|
||||||
MAT_NONE = MAT_MAX
|
|
||||||
};
|
|
||||||
CharReach cr;
|
|
||||||
u32 offset = 0;
|
|
||||||
u32 len1 = 0;
|
|
||||||
u32 len2 = 0;
|
|
||||||
multiaccel_type type = MAT_NONE;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct AccelInfo {
|
struct AccelInfo {
|
||||||
AccelInfo() : single_offset(0U), double_offset(0U),
|
AccelInfo() : single_offset(0U), double_offset(0U),
|
||||||
single_stops(CharReach::dot()),
|
single_stops(CharReach::dot()) {}
|
||||||
multiaccel_offset(0), ma_len1(0), ma_len2(0),
|
|
||||||
ma_type(MultibyteAccelInfo::MAT_NONE) {}
|
|
||||||
u32 single_offset; /**< offset correction to apply to single schemes */
|
u32 single_offset; /**< offset correction to apply to single schemes */
|
||||||
u32 double_offset; /**< offset correction to apply to double schemes */
|
u32 double_offset; /**< offset correction to apply to double schemes */
|
||||||
CharReach double_stop1; /**< single-byte accel stop literals for double
|
CharReach double_stop1; /**< single-byte accel stop literals for double
|
||||||
@ -68,11 +47,6 @@ struct AccelInfo {
|
|||||||
flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop
|
flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop
|
||||||
* literals */
|
* literals */
|
||||||
CharReach single_stops; /**< escapes for single byte acceleration */
|
CharReach single_stops; /**< escapes for single byte acceleration */
|
||||||
u32 multiaccel_offset; /**< offset correction to apply to multibyte schemes */
|
|
||||||
CharReach multiaccel_stops; /**< escapes for multibyte acceleration */
|
|
||||||
u32 ma_len1; /**< multiaccel len1 */
|
|
||||||
u32 ma_len2; /**< multiaccel len2 */
|
|
||||||
MultibyteAccelInfo::multiaccel_type ma_type; /**< multiaccel type */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
bool buildAccelAux(const AccelInfo &info, AccelAux *aux);
|
bool buildAccelAux(const AccelInfo &info, AccelAux *aux);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -26,9 +26,11 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \file
|
/**
|
||||||
|
* \file
|
||||||
* \brief Castle: multi-tenant repeat engine, compiler code.
|
* \brief Castle: multi-tenant repeat engine, compiler code.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "castlecompile.h"
|
#include "castlecompile.h"
|
||||||
|
|
||||||
#include "castle_internal.h"
|
#include "castle_internal.h"
|
||||||
@ -439,7 +441,7 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<NFA>
|
bytecode_ptr<NFA>
|
||||||
buildCastle(const CastleProto &proto,
|
buildCastle(const CastleProto &proto,
|
||||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||||
const CompileContext &cc, const ReportManager &rm) {
|
const CompileContext &cc, const ReportManager &rm) {
|
||||||
@ -501,7 +503,7 @@ buildCastle(const CastleProto &proto,
|
|||||||
// possibly means that we've got a repeat that we can't trigger. We do
|
// possibly means that we've got a repeat that we can't trigger. We do
|
||||||
// need to cope with it though.
|
// need to cope with it though.
|
||||||
if (contains(triggers, top)) {
|
if (contains(triggers, top)) {
|
||||||
min_period = minPeriod(triggers.at(top), cr, &is_reset);
|
min_period = depth(minPeriod(triggers.at(top), cr, &is_reset));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (min_period > pr.bounds.max) {
|
if (min_period > pr.bounds.max) {
|
||||||
@ -560,7 +562,7 @@ buildCastle(const CastleProto &proto,
|
|||||||
DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size());
|
DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size());
|
||||||
vector<mmbit_sparse_iter> stale_iter;
|
vector<mmbit_sparse_iter> stale_iter;
|
||||||
if (!may_stale.empty()) {
|
if (!may_stale.empty()) {
|
||||||
mmbBuildSparseIterator(stale_iter, may_stale, numRepeats);
|
stale_iter = mmbBuildSparseIterator(may_stale, numRepeats);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -577,7 +579,7 @@ buildCastle(const CastleProto &proto,
|
|||||||
total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter));
|
total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter));
|
||||||
total_size += byte_length(stale_iter); // stale sparse iter
|
total_size += byte_length(stale_iter); // stale sparse iter
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
nfa->type = verify_u8(CASTLE_NFA);
|
nfa->type = verify_u8(CASTLE_NFA);
|
||||||
nfa->length = verify_u32(total_size);
|
nfa->length = verify_u32(total_size);
|
||||||
nfa->nPositions = verify_u32(subs.size());
|
nfa->nPositions = verify_u32(subs.size());
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -26,7 +26,8 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \file
|
/**
|
||||||
|
* \file
|
||||||
* \brief Castle: multi-tenant repeat engine, compiler code.
|
* \brief Castle: multi-tenant repeat engine, compiler code.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -36,7 +37,7 @@
|
|||||||
#include "nfa_kind.h"
|
#include "nfa_kind.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "nfagraph/ng_repeat.h"
|
#include "nfagraph/ng_repeat.h"
|
||||||
#include "util/alloc.h"
|
#include "util/bytecode_ptr.h"
|
||||||
#include "util/depth.h"
|
#include "util/depth.h"
|
||||||
#include "util/ue2_containers.h"
|
#include "util/ue2_containers.h"
|
||||||
|
|
||||||
@ -120,7 +121,7 @@ void remapCastleTops(CastleProto &proto, std::map<u32, u32> &top_map);
|
|||||||
* NOTE: Tops must be contiguous, i.e. \ref remapCastleTops must have been run
|
* NOTE: Tops must be contiguous, i.e. \ref remapCastleTops must have been run
|
||||||
* first.
|
* first.
|
||||||
*/
|
*/
|
||||||
ue2::aligned_unique_ptr<NFA>
|
bytecode_ptr<NFA>
|
||||||
buildCastle(const CastleProto &proto,
|
buildCastle(const CastleProto &proto,
|
||||||
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
|
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
|
||||||
const CompileContext &cc, const ReportManager &rm);
|
const CompileContext &cc, const ReportManager &rm);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -26,12 +26,14 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \file
|
/**
|
||||||
* \brief Build code for DFA minimization
|
* \file
|
||||||
*/
|
* \brief Build code for DFA minimization.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* /Summary of the Hopcrofts algorithm/
|
* /Summary of the Hopcroft minimisation algorithm/
|
||||||
|
*
|
||||||
* partition := {F, Q \ F};
|
* partition := {F, Q \ F};
|
||||||
* work_queue := {F};
|
* work_queue := {F};
|
||||||
* while (work_queue is not empty) do
|
* while (work_queue is not empty) do
|
||||||
@ -57,22 +59,20 @@
|
|||||||
#include "dfa_min.h"
|
#include "dfa_min.h"
|
||||||
|
|
||||||
#include "grey.h"
|
#include "grey.h"
|
||||||
#include "nfa/rdfa.h"
|
#include "rdfa.h"
|
||||||
#include "nfagraph/ng_mcclellan.h"
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/partitioned_set.h"
|
|
||||||
#include "util/container.h"
|
#include "util/container.h"
|
||||||
|
#include "util/noncopyable.h"
|
||||||
|
#include "util/partitioned_set.h"
|
||||||
#include "util/ue2_containers.h"
|
#include "util/ue2_containers.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <iterator>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <queue>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <iterator>
|
|
||||||
|
|
||||||
#include <boost/core/noncopyable.hpp>
|
|
||||||
#include <boost/dynamic_bitset.hpp>
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@ -81,30 +81,32 @@ namespace ue2 {
|
|||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
struct hopcroft_state_info {
|
struct hopcroft_state_info {
|
||||||
vector<vector<dstate_id_t> > prev;
|
explicit hopcroft_state_info(size_t alpha_size) : prev(alpha_size) {}
|
||||||
|
|
||||||
|
/** \brief Mapping from symbol to a list of predecessors that transition to
|
||||||
|
* this state on that symbol. */
|
||||||
|
vector<vector<dstate_id_t>> prev;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct DFA_components : boost::noncopyable {
|
struct HopcroftInfo : noncopyable {
|
||||||
dstate_id_t nstates;
|
size_t alpha_size; //!< Size of DFA alphabet.
|
||||||
size_t inp_size;
|
queue<size_t> work_queue; //!< Hopcroft work queue of partition indices.
|
||||||
set<size_t> work_queue;
|
partitioned_set<dstate_id_t> partition; //!< Partition set of DFA states.
|
||||||
/*Partition contains reduced states*/
|
vector<hopcroft_state_info> states; //!< Pre-calculated state info (preds)
|
||||||
partitioned_set<dstate_id_t> partition;
|
|
||||||
vector<hopcroft_state_info> states;
|
|
||||||
|
|
||||||
explicit DFA_components(const raw_dfa &rdfa);
|
explicit HopcroftInfo(const raw_dfa &rdfa);
|
||||||
};
|
};
|
||||||
|
|
||||||
} //namespace
|
} // namespace
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create_map:
|
* \brief Create an initial partitioning and work_queue.
|
||||||
* Creates an initial partitioning and work_queue.
|
*
|
||||||
* Initial partition contains {accepting states..., Non-accepting states}
|
* Initial partition contains {accepting states..., Non-accepting states}
|
||||||
* Initial work_queue contains accepting state subsets
|
* Initial work_queue contains accepting state subsets
|
||||||
*
|
*
|
||||||
* The initial partitioning needs to distinguish between the different
|
* The initial partitioning needs to distinguish between the different
|
||||||
* reporting behaviours (unlike standard hopcroft) --> more than one subset
|
* reporting behaviours (unlike standard Hopcroft) --> more than one subset
|
||||||
* possible for the accepting states.
|
* possible for the accepting states.
|
||||||
*
|
*
|
||||||
* Look for accepting states in both reports and reports_eod.
|
* Look for accepting states in both reports and reports_eod.
|
||||||
@ -114,82 +116,43 @@ struct DFA_components : boost::noncopyable {
|
|||||||
* Non Accept states are added to partition[id+1].
|
* Non Accept states are added to partition[id+1].
|
||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
vector<size_t> create_map(const raw_dfa &rdfa, set<size_t> &work_queue) {
|
vector<size_t> create_map(const raw_dfa &rdfa, queue<size_t> &work_queue) {
|
||||||
using ReportKey = pair<flat_set<ReportID>, flat_set<ReportID>>;
|
using ReportKey = pair<flat_set<ReportID>, flat_set<ReportID>>;
|
||||||
map<ReportKey, size_t> subset_map;
|
map<ReportKey, size_t> subset_map;
|
||||||
vector<size_t> state_to_subset(rdfa.states.size(), INVALID_SUBSET);
|
vector<size_t> state_to_subset(rdfa.states.size(), INVALID_SUBSET);
|
||||||
|
|
||||||
for (size_t i = 0; i < rdfa.states.size(); i++) {
|
for (size_t i = 0; i < rdfa.states.size(); i++) {
|
||||||
if (!rdfa.states[i].reports.empty() ||
|
const auto &ds = rdfa.states[i];
|
||||||
!rdfa.states[i].reports_eod.empty()) {
|
if (!ds.reports.empty() || !ds.reports_eod.empty()) {
|
||||||
ReportKey key(rdfa.states[i].reports, rdfa.states[i].reports_eod);
|
ReportKey key(ds.reports, ds.reports_eod);
|
||||||
if (contains(subset_map, key)) {
|
if (contains(subset_map, key)) {
|
||||||
state_to_subset[i] = subset_map[key];
|
state_to_subset[i] = subset_map[key];
|
||||||
} else {
|
} else {
|
||||||
size_t sub = subset_map.size();
|
size_t sub = subset_map.size();
|
||||||
subset_map[key] = sub;
|
subset_map.emplace(std::move(key), sub);
|
||||||
state_to_subset[i] = sub;
|
state_to_subset[i] = sub;
|
||||||
work_queue.insert(sub);
|
work_queue.push(sub);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* handle non accepts */
|
/* Give non-accept states their own subset. */
|
||||||
size_t non_accept_sub = subset_map.size();
|
size_t non_accept_sub = subset_map.size();
|
||||||
for (size_t i = 0; i < state_to_subset.size(); i++) {
|
replace(state_to_subset.begin(), state_to_subset.end(), INVALID_SUBSET,
|
||||||
if (state_to_subset[i] == INVALID_SUBSET) {
|
non_accept_sub);
|
||||||
state_to_subset[i] = non_accept_sub;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return state_to_subset;
|
return state_to_subset;
|
||||||
}
|
}
|
||||||
|
|
||||||
DFA_components::DFA_components(const raw_dfa &rdfa)
|
HopcroftInfo::HopcroftInfo(const raw_dfa &rdfa)
|
||||||
: nstates(rdfa.states.size()),
|
: alpha_size(rdfa.alpha_size), partition(create_map(rdfa, work_queue)),
|
||||||
inp_size(rdfa.states[nstates - 1].next.size()),
|
states(rdfa.states.size(), hopcroft_state_info(alpha_size)) {
|
||||||
partition(create_map(rdfa, work_queue)) {
|
/* Construct predecessor lists for each state, indexed by symbol. */
|
||||||
/* initializing states */
|
for (size_t i = 0; i < states.size(); i++) { // i is the previous state
|
||||||
for (size_t i = 0; i < nstates; i++) {
|
for (size_t sym = 0; sym < alpha_size; sym++) {
|
||||||
states.push_back(hopcroft_state_info());
|
dstate_id_t present_state = rdfa.states[i].next[sym];
|
||||||
states.back().prev.resize(inp_size);
|
states[present_state].prev[sym].push_back(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < nstates; i++) { // i is the previous state
|
|
||||||
for (size_t j = 0; j < inp_size; j++) {
|
|
||||||
/* Creating X_table */
|
|
||||||
dstate_id_t present_state = rdfa.states[i].next[j];
|
|
||||||
states[present_state].prev[j].push_back(i);
|
|
||||||
|
|
||||||
DEBUG_PRINTF("rdfa.states[%zu].next[%zu] %hu \n", i, j,
|
|
||||||
rdfa.states[i].next[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* choose and remove a set A from work_queue.
|
|
||||||
*/
|
|
||||||
static
|
|
||||||
void get_work_item(DFA_components &mdfa, ue2::flat_set<dstate_id_t> &A) {
|
|
||||||
A.clear();
|
|
||||||
assert(!mdfa.work_queue.empty());
|
|
||||||
set<size_t>::iterator pt = mdfa.work_queue.begin();
|
|
||||||
insert(&A, mdfa.partition[*pt]);
|
|
||||||
mdfa.work_queue.erase(pt);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* X is the set of states for which a transition on the input leads to a state
|
|
||||||
* in A.
|
|
||||||
*/
|
|
||||||
static
|
|
||||||
void create_X(const DFA_components &mdfa, const ue2::flat_set<dstate_id_t> &A,
|
|
||||||
size_t inp, ue2::flat_set<dstate_id_t> &X) {
|
|
||||||
X.clear();
|
|
||||||
|
|
||||||
for (dstate_id_t id : A) {
|
|
||||||
insert(&X, mdfa.states[id].prev[inp]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -206,14 +169,14 @@ void create_X(const DFA_components &mdfa, const ue2::flat_set<dstate_id_t> &A,
|
|||||||
* - replace S in work_queue by the smaller of the two sets.
|
* - replace S in work_queue by the smaller of the two sets.
|
||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
void split_and_replace_set(const size_t part_index, DFA_components &mdfa,
|
void split_and_replace_set(const size_t part_index, HopcroftInfo &info,
|
||||||
const ue2::flat_set<dstate_id_t> &splitter) {
|
const flat_set<dstate_id_t> &splitter) {
|
||||||
/* singleton sets cannot be split */
|
/* singleton sets cannot be split */
|
||||||
if (mdfa.partition[part_index].size() == 1) {
|
if (info.partition[part_index].size() == 1) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t small_index = mdfa.partition.split(part_index, splitter);
|
size_t small_index = info.partition.split(part_index, splitter);
|
||||||
|
|
||||||
if (small_index == INVALID_SUBSET) {
|
if (small_index == INVALID_SUBSET) {
|
||||||
/* the set could not be split */
|
/* the set could not be split */
|
||||||
@ -223,54 +186,56 @@ void split_and_replace_set(const size_t part_index, DFA_components &mdfa,
|
|||||||
/* larger subset remains at the input subset index, if the input subset was
|
/* larger subset remains at the input subset index, if the input subset was
|
||||||
* already in the work queue then the larger subset will remain there. */
|
* already in the work queue then the larger subset will remain there. */
|
||||||
|
|
||||||
mdfa.work_queue.insert(small_index);
|
info.work_queue.push(small_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The complete Hopcrofts algorithm is implemented in this function.
|
* \brief Core of the Hopcroft minimisation algorithm.
|
||||||
* Choose and remove a set tray from work_queue
|
|
||||||
* For each input- X is created.
|
|
||||||
* For each subset in the partition, split_and_replace_sets are called with the
|
|
||||||
* split set.
|
|
||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
void dfa_min(DFA_components &mdfa) {
|
void dfa_min(HopcroftInfo &info) {
|
||||||
ue2::flat_set<dstate_id_t> A, X;
|
flat_set<dstate_id_t> curr, sym_preds;
|
||||||
vector<size_t> cand_subsets;
|
vector<size_t> cand_subsets;
|
||||||
|
|
||||||
while (!mdfa.work_queue.empty()) {
|
while (!info.work_queue.empty()) {
|
||||||
get_work_item(mdfa, A);
|
/* Choose and remove a set of states (curr, or A in the description
|
||||||
|
* above) from the work queue. Note that we copy the set because the
|
||||||
|
* partition may be split by the loop below. */
|
||||||
|
curr.clear();
|
||||||
|
insert(&curr, info.partition[info.work_queue.front()]);
|
||||||
|
info.work_queue.pop();
|
||||||
|
|
||||||
for (size_t inp = 0; inp < mdfa.inp_size; inp++) {
|
for (size_t sym = 0; sym < info.alpha_size; sym++) {
|
||||||
create_X(mdfa, A, inp, X);
|
/* Find the set of states sym_preds for which a transition on the
|
||||||
if (X.empty()) {
|
* given symbol leads to a state in curr. */
|
||||||
|
sym_preds.clear();
|
||||||
|
for (dstate_id_t s : curr) {
|
||||||
|
insert(&sym_preds, info.states[s].prev[sym]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sym_preds.empty()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* we only need to consider subsets with at least one member in X for
|
/* we only need to consider subsets with at least one member in
|
||||||
* splitting */
|
* sym_preds for splitting */
|
||||||
cand_subsets.clear();
|
cand_subsets.clear();
|
||||||
mdfa.partition.find_overlapping(X, &cand_subsets);
|
info.partition.find_overlapping(sym_preds, &cand_subsets);
|
||||||
|
|
||||||
for (size_t sub : cand_subsets) {
|
for (size_t sub : cand_subsets) {
|
||||||
split_and_replace_set(sub, mdfa, X);
|
split_and_replace_set(sub, info, sym_preds);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creating new dfa table
|
* \brief Build the new DFA state table.
|
||||||
* Map ordering contains key being an equivalence classes first state
|
|
||||||
* and the value being the equivalence class index.
|
|
||||||
* Eq_state[i] tells us new state id the equivalence class located at
|
|
||||||
* partition[i].
|
|
||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
void mapping_new_states(const DFA_components &mdfa,
|
void mapping_new_states(const HopcroftInfo &info,
|
||||||
vector<dstate_id_t> &old_to_new,
|
vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) {
|
||||||
raw_dfa &rdfa) {
|
const size_t num_partitions = info.partition.size();
|
||||||
const size_t num_partitions = mdfa.partition.size();
|
|
||||||
|
|
||||||
// Mapping from equiv class's first state to equiv class index.
|
// Mapping from equiv class's first state to equiv class index.
|
||||||
map<dstate_id_t, size_t> ordering;
|
map<dstate_id_t, size_t> ordering;
|
||||||
@ -279,7 +244,7 @@ void mapping_new_states(const DFA_components &mdfa,
|
|||||||
vector<dstate_id_t> eq_state(num_partitions);
|
vector<dstate_id_t> eq_state(num_partitions);
|
||||||
|
|
||||||
for (size_t i = 0; i < num_partitions; i++) {
|
for (size_t i = 0; i < num_partitions; i++) {
|
||||||
ordering[*mdfa.partition[i].begin()] = i;
|
ordering[*info.partition[i].begin()] = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
dstate_id_t new_id = 0;
|
dstate_id_t new_id = 0;
|
||||||
@ -287,30 +252,28 @@ void mapping_new_states(const DFA_components &mdfa,
|
|||||||
eq_state[m.second] = new_id++;
|
eq_state[m.second] = new_id++;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t t = 0; t < mdfa.partition.size(); t++) {
|
for (size_t t = 0; t < info.partition.size(); t++) {
|
||||||
for (dstate_id_t id : mdfa.partition[t]) {
|
for (dstate_id_t id : info.partition[t]) {
|
||||||
old_to_new[id] = eq_state[t];
|
old_to_new[id] = eq_state[t];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<dstate> new_states;
|
vector<dstate> new_states;
|
||||||
new_states.reserve(num_partitions);
|
new_states.reserve(num_partitions);
|
||||||
for (size_t i = 0; i < mdfa.nstates; i++) {
|
|
||||||
if (contains(ordering, i)) {
|
for (const auto &m : ordering) {
|
||||||
new_states.push_back(rdfa.states[i]);
|
new_states.push_back(rdfa.states[m.first]);
|
||||||
}
|
}
|
||||||
}
|
rdfa.states = std::move(new_states);
|
||||||
rdfa.states.swap(new_states);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void renumber_new_states(const DFA_components &mdfa,
|
void renumber_new_states(const HopcroftInfo &info,
|
||||||
const vector<dstate_id_t> &old_to_new,
|
const vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) {
|
||||||
raw_dfa &rdfa) {
|
for (size_t i = 0; i < info.partition.size(); i++) {
|
||||||
for (size_t i = 0; i < mdfa.partition.size(); i++) {
|
for (size_t sym = 0; sym < info.alpha_size; sym++) {
|
||||||
for (size_t j = 0; j < mdfa.inp_size; j++) {
|
dstate_id_t output = rdfa.states[i].next[sym];
|
||||||
dstate_id_t output = rdfa.states[i].next[j];
|
rdfa.states[i].next[sym] = old_to_new[output];
|
||||||
rdfa.states[i].next[j] = old_to_new[output];
|
|
||||||
}
|
}
|
||||||
dstate_id_t dad = rdfa.states[i].daddy;
|
dstate_id_t dad = rdfa.states[i].daddy;
|
||||||
rdfa.states[i].daddy = old_to_new[dad];
|
rdfa.states[i].daddy = old_to_new[dad];
|
||||||
@ -321,17 +284,16 @@ void renumber_new_states(const DFA_components &mdfa,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void new_dfa(raw_dfa &rdfa, const DFA_components &mdfa) {
|
void new_dfa(raw_dfa &rdfa, const HopcroftInfo &info) {
|
||||||
if (mdfa.partition.size() != mdfa.nstates) {
|
if (info.partition.size() == info.states.size()) {
|
||||||
vector<dstate_id_t> old_to_new(mdfa.nstates);
|
return;
|
||||||
mapping_new_states(mdfa, old_to_new, rdfa);
|
|
||||||
renumber_new_states(mdfa, old_to_new, rdfa);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vector<dstate_id_t> old_to_new(info.states.size());
|
||||||
|
mapping_new_states(info, old_to_new, rdfa);
|
||||||
|
renumber_new_states(info, old_to_new, rdfa);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* MAIN FUNCTION
|
|
||||||
*/
|
|
||||||
void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) {
|
void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) {
|
||||||
if (!grey.minimizeDFA) {
|
if (!grey.minimizeDFA) {
|
||||||
return;
|
return;
|
||||||
@ -339,10 +301,10 @@ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) {
|
|||||||
|
|
||||||
UNUSED const size_t states_before = rdfa.states.size();
|
UNUSED const size_t states_before = rdfa.states.size();
|
||||||
|
|
||||||
DFA_components mdfa(rdfa);
|
HopcroftInfo info(rdfa);
|
||||||
|
|
||||||
dfa_min(mdfa);
|
dfa_min(info);
|
||||||
new_dfa(rdfa, mdfa);
|
new_dfa(rdfa, info);
|
||||||
|
|
||||||
DEBUG_PRINTF("reduced from %zu to %zu states\n", states_before,
|
DEBUG_PRINTF("reduced from %zu to %zu states\n", states_before,
|
||||||
rdfa.states.size());
|
rdfa.states.size());
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -26,8 +26,9 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \file
|
/**
|
||||||
* \brief Build code for McClellan DFA.
|
* \file
|
||||||
|
* \brief Build code for DFA minimization.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef DFA_MIN_H
|
#ifndef DFA_MIN_H
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -35,7 +35,6 @@
|
|||||||
#include "grey.h"
|
#include "grey.h"
|
||||||
#include "mcclellancompile.h"
|
#include "mcclellancompile.h"
|
||||||
#include "nfa_internal.h"
|
#include "nfa_internal.h"
|
||||||
#include "util/alloc.h"
|
|
||||||
#include "util/compile_context.h"
|
#include "util/compile_context.h"
|
||||||
#include "util/container.h"
|
#include "util/container.h"
|
||||||
#include "util/graph_range.h"
|
#include "util/graph_range.h"
|
||||||
@ -81,7 +80,7 @@ public:
|
|||||||
gough_build_strat(
|
gough_build_strat(
|
||||||
raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in,
|
raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in,
|
||||||
const map<dstate_id_t, gough_accel_state_info> &accel_info)
|
const map<dstate_id_t, gough_accel_state_info> &accel_info)
|
||||||
: mcclellan_build_strat(r, rm_in), rdfa(r), gg(g),
|
: mcclellan_build_strat(r, rm_in, false), rdfa(r), gg(g),
|
||||||
accel_gough_info(accel_info) {}
|
accel_gough_info(accel_info) {}
|
||||||
unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */,
|
unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */,
|
||||||
vector<u32> &reports_eod /* out */,
|
vector<u32> &reports_eod /* out */,
|
||||||
@ -1036,7 +1035,7 @@ void update_accel_prog_offset(const gough_build_strat &gbs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
||||||
const CompileContext &cc,
|
const CompileContext &cc,
|
||||||
const ReportManager &rm) {
|
const ReportManager &rm) {
|
||||||
assert(somPrecision == 2 || somPrecision == 4 || somPrecision == 8
|
assert(somPrecision == 2 || somPrecision == 4 || somPrecision == 8
|
||||||
@ -1071,7 +1070,7 @@ aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
|||||||
map<dstate_id_t, gough_accel_state_info> accel_allowed;
|
map<dstate_id_t, gough_accel_state_info> accel_allowed;
|
||||||
find_allowed_accel_states(*cfg, blocks, &accel_allowed);
|
find_allowed_accel_states(*cfg, blocks, &accel_allowed);
|
||||||
gough_build_strat gbs(raw, *cfg, rm, accel_allowed);
|
gough_build_strat gbs(raw, *cfg, rm, accel_allowed);
|
||||||
aligned_unique_ptr<NFA> basic_dfa = mcclellanCompile_i(raw, gbs, cc);
|
auto basic_dfa = mcclellanCompile_i(raw, gbs, cc);
|
||||||
assert(basic_dfa);
|
assert(basic_dfa);
|
||||||
if (!basic_dfa) {
|
if (!basic_dfa) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -1117,7 +1116,7 @@ aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
|||||||
gi.stream_som_loc_width = somPrecision;
|
gi.stream_som_loc_width = somPrecision;
|
||||||
|
|
||||||
u32 gough_size = ROUNDUP_N(curr_offset, 16);
|
u32 gough_size = ROUNDUP_N(curr_offset, 16);
|
||||||
aligned_unique_ptr<NFA> gough_dfa = aligned_zmalloc_unique<NFA>(gough_size);
|
auto gough_dfa = make_zeroed_bytecode_ptr<NFA>(gough_size);
|
||||||
|
|
||||||
memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length);
|
memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length);
|
||||||
memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi));
|
memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi));
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -32,7 +32,7 @@
|
|||||||
#include "mcclellancompile.h"
|
#include "mcclellancompile.h"
|
||||||
#include "nfa_kind.h"
|
#include "nfa_kind.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/alloc.h"
|
#include "util/bytecode_ptr.h"
|
||||||
#include "util/ue2_containers.h"
|
#include "util/ue2_containers.h"
|
||||||
#include "util/order_check.h"
|
#include "util/order_check.h"
|
||||||
|
|
||||||
@ -88,10 +88,10 @@ struct raw_som_dfa : public raw_dfa {
|
|||||||
* som */
|
* som */
|
||||||
};
|
};
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
||||||
const CompileContext &cc,
|
const CompileContext &cc,
|
||||||
const ReportManager &rm);
|
const ReportManager &rm);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
#endif
|
#endif // GOUGHCOMPILE_H
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -33,6 +33,7 @@
|
|||||||
#include "mcclellancompile.h"
|
#include "mcclellancompile.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/charreach.h"
|
#include "util/charreach.h"
|
||||||
|
#include "util/noncopyable.h"
|
||||||
#include "util/order_check.h"
|
#include "util/order_check.h"
|
||||||
#include "util/ue2_containers.h"
|
#include "util/ue2_containers.h"
|
||||||
|
|
||||||
@ -41,7 +42,6 @@
|
|||||||
#include <set>
|
#include <set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <boost/core/noncopyable.hpp>
|
|
||||||
#include <boost/graph/adjacency_list.hpp>
|
#include <boost/graph/adjacency_list.hpp>
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
@ -103,7 +103,7 @@ struct GoughSSAVarWithInputs;
|
|||||||
struct GoughSSAVarMin;
|
struct GoughSSAVarMin;
|
||||||
struct GoughSSAVarJoin;
|
struct GoughSSAVarJoin;
|
||||||
|
|
||||||
struct GoughSSAVar : boost::noncopyable {
|
struct GoughSSAVar : noncopyable {
|
||||||
GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {}
|
GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {}
|
||||||
virtual ~GoughSSAVar();
|
virtual ~GoughSSAVar();
|
||||||
const ue2::flat_set<GoughSSAVar *> &get_inputs() const {
|
const ue2::flat_set<GoughSSAVar *> &get_inputs() const {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -39,11 +39,9 @@
|
|||||||
#include "nfa_internal.h"
|
#include "nfa_internal.h"
|
||||||
#include "shufti.h"
|
#include "shufti.h"
|
||||||
#include "truffle.h"
|
#include "truffle.h"
|
||||||
#include "multishufti.h"
|
|
||||||
#include "multitruffle.h"
|
|
||||||
#include "multivermicelli.h"
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "vermicelli.h"
|
#include "vermicelli.h"
|
||||||
|
#include "util/arch.h"
|
||||||
#include "util/bitutils.h"
|
#include "util/bitutils.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
|
|
||||||
@ -118,7 +116,7 @@ size_t doAccel256(const m256 *state, const struct LimExNFA256 *limex,
|
|||||||
DEBUG_PRINTF("using PSHUFB for 256-bit shuffle\n");
|
DEBUG_PRINTF("using PSHUFB for 256-bit shuffle\n");
|
||||||
m256 accelPerm = limex->accelPermute;
|
m256 accelPerm = limex->accelPermute;
|
||||||
m256 accelComp = limex->accelCompare;
|
m256 accelComp = limex->accelCompare;
|
||||||
#if !defined(__AVX2__)
|
#if !defined(HAVE_AVX2)
|
||||||
u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo);
|
u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo);
|
||||||
u32 idx2 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi);
|
u32 idx2 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi);
|
||||||
assert((idx1 & idx2) == 0); // should be no shared bits
|
assert((idx1 & idx2) == 0); // should be no shared bits
|
||||||
@ -153,18 +151,20 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex,
|
|||||||
DEBUG_PRINTF("using PSHUFB for 512-bit shuffle\n");
|
DEBUG_PRINTF("using PSHUFB for 512-bit shuffle\n");
|
||||||
m512 accelPerm = limex->accelPermute;
|
m512 accelPerm = limex->accelPermute;
|
||||||
m512 accelComp = limex->accelCompare;
|
m512 accelComp = limex->accelCompare;
|
||||||
#if !defined(__AVX2__)
|
#if defined(HAVE_AVX512)
|
||||||
|
idx = packedExtract512(s, accelPerm, accelComp);
|
||||||
|
#elif defined(HAVE_AVX2)
|
||||||
|
u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo);
|
||||||
|
u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi);
|
||||||
|
assert((idx1 & idx2) == 0); // should be no shared bits
|
||||||
|
idx = idx1 | idx2;
|
||||||
|
#else
|
||||||
u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo);
|
u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo);
|
||||||
u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi);
|
u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi);
|
||||||
u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo);
|
u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo);
|
||||||
u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi);
|
u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi);
|
||||||
assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits
|
assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits
|
||||||
idx = idx1 | idx2 | idx3 | idx4;
|
idx = idx1 | idx2 | idx3 | idx4;
|
||||||
#else
|
|
||||||
u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo);
|
|
||||||
u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi);
|
|
||||||
assert((idx1 & idx2) == 0); // should be no shared bits
|
|
||||||
idx = idx1 | idx2;
|
|
||||||
#endif
|
#endif
|
||||||
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
||||||
}
|
}
|
||||||
|
@ -26,9 +26,11 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \file
|
/**
|
||||||
|
* \file
|
||||||
* \brief Main NFA build code.
|
* \brief Main NFA build code.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "limex_compile.h"
|
#include "limex_compile.h"
|
||||||
|
|
||||||
#include "accel.h"
|
#include "accel.h"
|
||||||
@ -47,6 +49,7 @@
|
|||||||
#include "repeatcompile.h"
|
#include "repeatcompile.h"
|
||||||
#include "util/alloc.h"
|
#include "util/alloc.h"
|
||||||
#include "util/bitutils.h"
|
#include "util/bitutils.h"
|
||||||
|
#include "util/bytecode_ptr.h"
|
||||||
#include "util/charreach.h"
|
#include "util/charreach.h"
|
||||||
#include "util/compile_context.h"
|
#include "util/compile_context.h"
|
||||||
#include "util/container.h"
|
#include "util/container.h"
|
||||||
@ -66,6 +69,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <boost/graph/breadth_first_search.hpp>
|
#include <boost/graph/breadth_first_search.hpp>
|
||||||
|
#include <boost/graph/depth_first_search.hpp>
|
||||||
#include <boost/range/adaptor/map.hpp>
|
#include <boost/range/adaptor/map.hpp>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -89,8 +93,6 @@ struct precalcAccel {
|
|||||||
CharReach double_cr;
|
CharReach double_cr;
|
||||||
flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */
|
flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */
|
||||||
u32 double_offset;
|
u32 double_offset;
|
||||||
|
|
||||||
MultibyteAccelInfo ma_info;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct limex_accel_info {
|
struct limex_accel_info {
|
||||||
@ -354,16 +356,12 @@ void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct AccelBuild {
|
struct AccelBuild {
|
||||||
AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0), ma_len1(0),
|
AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0) {}
|
||||||
ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {}
|
|
||||||
NFAVertex v;
|
NFAVertex v;
|
||||||
u32 state;
|
u32 state;
|
||||||
u32 offset; // offset correction to apply
|
u32 offset; // offset correction to apply
|
||||||
CharReach stop1; // single-byte accel stop literals
|
CharReach stop1; // single-byte accel stop literals
|
||||||
flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals
|
flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals
|
||||||
u32 ma_len1; // multiaccel len1
|
|
||||||
u32 ma_len2; // multiaccel len2
|
|
||||||
MultibyteAccelInfo::multiaccel_type ma_type; // multiaccel type
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -378,12 +376,7 @@ void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) {
|
|||||||
build.stop1 = CharReach::dot();
|
build.stop1 = CharReach::dot();
|
||||||
} else {
|
} else {
|
||||||
const precalcAccel &precalc = bi.accel.precalc.at(ss);
|
const precalcAccel &precalc = bi.accel.precalc.at(ss);
|
||||||
unsigned ma_len = precalc.ma_info.len1 + precalc.ma_info.len2;
|
if (precalc.double_lits.empty()) {
|
||||||
if (ma_len >= MULTIACCEL_MIN_LEN) {
|
|
||||||
build.ma_len1 = precalc.ma_info.len1;
|
|
||||||
build.stop1 = precalc.ma_info.cr;
|
|
||||||
build.offset = precalc.ma_info.offset;
|
|
||||||
} else if (precalc.double_lits.empty()) {
|
|
||||||
build.stop1 = precalc.single_cr;
|
build.stop1 = precalc.single_cr;
|
||||||
build.offset = precalc.single_offset;
|
build.offset = precalc.single_offset;
|
||||||
} else {
|
} else {
|
||||||
@ -602,7 +595,6 @@ void fillAccelInfo(build_info &bi) {
|
|||||||
limex_accel_info &accel = bi.accel;
|
limex_accel_info &accel = bi.accel;
|
||||||
unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map;
|
unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map;
|
||||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic;
|
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic;
|
||||||
const CompileContext &cc = bi.cc;
|
|
||||||
const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids;
|
const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids;
|
||||||
const u32 num_states = bi.num_states;
|
const u32 num_states = bi.num_states;
|
||||||
|
|
||||||
@ -659,27 +651,17 @@ void fillAccelInfo(build_info &bi) {
|
|||||||
DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset,
|
DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset,
|
||||||
as.double_offset);
|
as.double_offset);
|
||||||
|
|
||||||
// try multibyte acceleration first
|
|
||||||
MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc);
|
|
||||||
|
|
||||||
precalcAccel &pa = accel.precalc[state_set];
|
precalcAccel &pa = accel.precalc[state_set];
|
||||||
useful |= state_set;
|
|
||||||
|
|
||||||
// if we successfully built a multibyte accel scheme, use that
|
|
||||||
if (mai.type != MultibyteAccelInfo::MAT_NONE) {
|
|
||||||
pa.ma_info = mai;
|
|
||||||
|
|
||||||
DEBUG_PRINTF("multibyte acceleration!\n");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
pa.single_offset = as.offset;
|
pa.single_offset = as.offset;
|
||||||
pa.single_cr = as.cr;
|
pa.single_cr = as.cr;
|
||||||
|
|
||||||
if (as.double_byte.size() != 0) {
|
if (as.double_byte.size() != 0) {
|
||||||
pa.double_offset = as.double_offset;
|
pa.double_offset = as.double_offset;
|
||||||
pa.double_lits = as.double_byte;
|
pa.double_lits = as.double_byte;
|
||||||
pa.double_cr = as.double_cr;
|
pa.double_cr = as.double_cr;
|
||||||
};
|
}
|
||||||
|
|
||||||
|
useful |= state_set;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto &m : accel_map) {
|
for (const auto &m : accel_map) {
|
||||||
@ -696,20 +678,9 @@ void fillAccelInfo(build_info &bi) {
|
|||||||
state_set.reset();
|
state_set.reset();
|
||||||
state_set.set(state_id);
|
state_set.set(state_id);
|
||||||
|
|
||||||
bool is_multi = false;
|
|
||||||
auto p_it = accel.precalc.find(state_set);
|
|
||||||
if (p_it != accel.precalc.end()) {
|
|
||||||
const precalcAccel &pa = p_it->second;
|
|
||||||
offset = max(pa.double_offset, pa.single_offset);
|
|
||||||
is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE;
|
|
||||||
assert(offset <= MAX_ACCEL_DEPTH);
|
|
||||||
}
|
|
||||||
|
|
||||||
accel.accelerable.insert(v);
|
accel.accelerable.insert(v);
|
||||||
if (!is_multi) {
|
|
||||||
findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]);
|
findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** The AccelAux structure has large alignment specified, and this makes some
|
/** The AccelAux structure has large alignment specified, and this makes some
|
||||||
@ -721,6 +692,7 @@ typedef vector<AccelAux, AlignedAllocator<AccelAux, alignof(AccelAux)>>
|
|||||||
|
|
||||||
static
|
static
|
||||||
u32 getEffectiveAccelStates(const build_info &args,
|
u32 getEffectiveAccelStates(const build_info &args,
|
||||||
|
const unordered_map<NFAVertex, NFAVertex> &dom_map,
|
||||||
u32 active_accel_mask,
|
u32 active_accel_mask,
|
||||||
const vector<AccelBuild> &accelStates) {
|
const vector<AccelBuild> &accelStates) {
|
||||||
/* accelStates is indexed by the acceleration bit index and contains a
|
/* accelStates is indexed by the acceleration bit index and contains a
|
||||||
@ -756,7 +728,6 @@ u32 getEffectiveAccelStates(const build_info &args,
|
|||||||
* so we may still require on earlier states to be accurately modelled.
|
* so we may still require on earlier states to be accurately modelled.
|
||||||
*/
|
*/
|
||||||
const NGHolder &h = args.h;
|
const NGHolder &h = args.h;
|
||||||
auto dom_map = findDominators(h);
|
|
||||||
|
|
||||||
/* map from accel_id to mask of accel_ids that it is dominated by */
|
/* map from accel_id to mask of accel_ids that it is dominated by */
|
||||||
vector<u32> dominated_by(accelStates.size());
|
vector<u32> dominated_by(accelStates.size());
|
||||||
@ -773,8 +744,8 @@ u32 getEffectiveAccelStates(const build_info &args,
|
|||||||
u32 accel_id = findAndClearLSB_32(&local_accel_mask);
|
u32 accel_id = findAndClearLSB_32(&local_accel_mask);
|
||||||
assert(accel_id < accelStates.size());
|
assert(accel_id < accelStates.size());
|
||||||
NFAVertex v = accelStates[accel_id].v;
|
NFAVertex v = accelStates[accel_id].v;
|
||||||
while (dom_map[v]) {
|
while (contains(dom_map, v) && dom_map.at(v)) {
|
||||||
v = dom_map[v];
|
v = dom_map.at(v);
|
||||||
if (contains(accel_id_map, v)) {
|
if (contains(accel_id_map, v)) {
|
||||||
dominated_by[accel_id] |= 1U << accel_id_map[v];
|
dominated_by[accel_id] |= 1U << accel_id_map[v];
|
||||||
}
|
}
|
||||||
@ -887,6 +858,8 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto dom_map = findDominators(args.h);
|
||||||
|
|
||||||
// We have 2^n different accel entries, one for each possible
|
// We have 2^n different accel entries, one for each possible
|
||||||
// combination of accelerable states.
|
// combination of accelerable states.
|
||||||
assert(accelStates.size() < 32);
|
assert(accelStates.size() < 32);
|
||||||
@ -900,7 +873,8 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask,
|
|||||||
effective_accel_set.push_back(0); /* empty is effectively empty */
|
effective_accel_set.push_back(0); /* empty is effectively empty */
|
||||||
|
|
||||||
for (u32 i = 1; i < accelCount; i++) {
|
for (u32 i = 1; i < accelCount; i++) {
|
||||||
u32 effective_i = getEffectiveAccelStates(args, i, accelStates);
|
u32 effective_i = getEffectiveAccelStates(args, dom_map, i,
|
||||||
|
accelStates);
|
||||||
effective_accel_set.push_back(effective_i);
|
effective_accel_set.push_back(effective_i);
|
||||||
|
|
||||||
if (effective_i == IMPOSSIBLE_ACCEL_MASK) {
|
if (effective_i == IMPOSSIBLE_ACCEL_MASK) {
|
||||||
@ -947,18 +921,10 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask,
|
|||||||
|
|
||||||
if (contains(accel.precalc, effective_states)) {
|
if (contains(accel.precalc, effective_states)) {
|
||||||
const auto &precalc = accel.precalc.at(effective_states);
|
const auto &precalc = accel.precalc.at(effective_states);
|
||||||
if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) {
|
|
||||||
ainfo.ma_len1 = precalc.ma_info.len1;
|
|
||||||
ainfo.ma_len2 = precalc.ma_info.len2;
|
|
||||||
ainfo.multiaccel_offset = precalc.ma_info.offset;
|
|
||||||
ainfo.multiaccel_stops = precalc.ma_info.cr;
|
|
||||||
ainfo.ma_type = precalc.ma_info.type;
|
|
||||||
} else {
|
|
||||||
ainfo.single_offset = precalc.single_offset;
|
ainfo.single_offset = precalc.single_offset;
|
||||||
ainfo.single_stops = precalc.single_cr;
|
ainfo.single_stops = precalc.single_cr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
buildAccelAux(ainfo, &aux);
|
buildAccelAux(ainfo, &aux);
|
||||||
|
|
||||||
@ -1637,6 +1603,84 @@ u32 findBestNumOfVarShifts(const build_info &args,
|
|||||||
return bestNumOfVarShifts;
|
return bestNumOfVarShifts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool cannotDie(const build_info &args, const set<NFAVertex> &tops) {
|
||||||
|
const auto &h = args.h;
|
||||||
|
|
||||||
|
// When this top is activated, all of the vertices in 'tops' are switched
|
||||||
|
// on. If any of those lead to a graph that cannot die, then this top
|
||||||
|
// cannot die.
|
||||||
|
|
||||||
|
// For each top, we use a depth-first search to traverse the graph from the
|
||||||
|
// top, looking for a cyclic path consisting of vertices of dot reach. If
|
||||||
|
// one exists, than the NFA cannot die after this top is triggered.
|
||||||
|
|
||||||
|
vector<boost::default_color_type> colours(num_vertices(h));
|
||||||
|
auto colour_map = boost::make_iterator_property_map(colours.begin(),
|
||||||
|
get(vertex_index, h));
|
||||||
|
|
||||||
|
struct CycleFound {};
|
||||||
|
struct CannotDieVisitor : public boost::default_dfs_visitor {
|
||||||
|
void back_edge(const NFAEdge &e, const NGHolder &g) const {
|
||||||
|
DEBUG_PRINTF("back-edge %zu,%zu\n", g[source(e, g)].index,
|
||||||
|
g[target(e, g)].index);
|
||||||
|
if (g[target(e, g)].char_reach.all()) {
|
||||||
|
assert(g[source(e, g)].char_reach.all());
|
||||||
|
throw CycleFound();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
for (const auto &top : tops) {
|
||||||
|
DEBUG_PRINTF("checking top vertex %zu\n", h[top].index);
|
||||||
|
|
||||||
|
// Constrain the search to the top vertices and any dot vertices it
|
||||||
|
// can reach.
|
||||||
|
auto term_func = [&](NFAVertex v, const NGHolder &g) {
|
||||||
|
if (v == top) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!g[v].char_reach.all()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (contains(args.br_cyclic, v) &&
|
||||||
|
args.br_cyclic.at(v).repeatMax != depth::infinity()) {
|
||||||
|
// Bounded repeat vertices without inf max can be turned
|
||||||
|
// off.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
boost::depth_first_visit(h, top, CannotDieVisitor(), colour_map,
|
||||||
|
term_func);
|
||||||
|
}
|
||||||
|
} catch (const CycleFound &) {
|
||||||
|
DEBUG_PRINTF("cycle found\n");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief True if this NFA cannot ever be in no states at all. */
|
||||||
|
static
|
||||||
|
bool cannotDie(const build_info &args) {
|
||||||
|
const auto &h = args.h;
|
||||||
|
const auto &state_ids = args.state_ids;
|
||||||
|
|
||||||
|
// If we have a startDs we're actually using, we can't die.
|
||||||
|
if (state_ids.at(h.startDs) != NO_STATE) {
|
||||||
|
DEBUG_PRINTF("is using startDs\n");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return all_of_in(args.tops | map_values, [&](const set<NFAVertex> &verts) {
|
||||||
|
return cannotDie(args, verts);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
template<NFAEngineType dtype>
|
template<NFAEngineType dtype>
|
||||||
struct Factory {
|
struct Factory {
|
||||||
// typedefs for readability, for types derived from traits
|
// typedefs for readability, for types derived from traits
|
||||||
@ -1700,7 +1744,7 @@ struct Factory {
|
|||||||
|
|
||||||
static
|
static
|
||||||
void buildRepeats(const build_info &args,
|
void buildRepeats(const build_info &args,
|
||||||
vector<pair<aligned_unique_ptr<NFARepeatInfo>, size_t>> &out,
|
vector<bytecode_ptr<NFARepeatInfo>> &out,
|
||||||
u32 *scratchStateSize, u32 *streamState) {
|
u32 *scratchStateSize, u32 *streamState) {
|
||||||
out.reserve(args.repeats.size());
|
out.reserve(args.repeats.size());
|
||||||
|
|
||||||
@ -1712,7 +1756,7 @@ struct Factory {
|
|||||||
|
|
||||||
u32 tableOffset, tugMaskOffset;
|
u32 tableOffset, tugMaskOffset;
|
||||||
size_t len = repeatAllocSize(br, &tableOffset, &tugMaskOffset);
|
size_t len = repeatAllocSize(br, &tableOffset, &tugMaskOffset);
|
||||||
auto info = aligned_zmalloc_unique<NFARepeatInfo>(len);
|
auto info = make_zeroed_bytecode_ptr<NFARepeatInfo>(len);
|
||||||
char *info_ptr = (char *)info.get();
|
char *info_ptr = (char *)info.get();
|
||||||
|
|
||||||
// Collect state space info.
|
// Collect state space info.
|
||||||
@ -1766,7 +1810,7 @@ struct Factory {
|
|||||||
*streamState += streamStateLen;
|
*streamState += streamStateLen;
|
||||||
*scratchStateSize += sizeof(RepeatControl);
|
*scratchStateSize += sizeof(RepeatControl);
|
||||||
|
|
||||||
out.emplace_back(move(info), len);
|
out.emplace_back(move(info));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2074,8 +2118,7 @@ struct Factory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void writeRepeats(const vector<pair<aligned_unique_ptr<NFARepeatInfo>,
|
void writeRepeats(const vector<bytecode_ptr<NFARepeatInfo>> &repeats,
|
||||||
size_t>> &repeats,
|
|
||||||
vector<u32> &repeatOffsets, implNFA_t *limex,
|
vector<u32> &repeatOffsets, implNFA_t *limex,
|
||||||
const u32 repeatOffsetsOffset, const u32 repeatOffset) {
|
const u32 repeatOffsetsOffset, const u32 repeatOffset) {
|
||||||
const u32 num_repeats = verify_u32(repeats.size());
|
const u32 num_repeats = verify_u32(repeats.size());
|
||||||
@ -2088,10 +2131,9 @@ struct Factory {
|
|||||||
|
|
||||||
for (u32 i = 0; i < num_repeats; i++) {
|
for (u32 i = 0; i < num_repeats; i++) {
|
||||||
repeatOffsets[i] = offset;
|
repeatOffsets[i] = offset;
|
||||||
assert(repeats[i].first);
|
assert(repeats[i]);
|
||||||
memcpy((char *)limex + offset, repeats[i].first.get(),
|
memcpy((char *)limex + offset, repeats[i].get(), repeats[i].size());
|
||||||
repeats[i].second);
|
offset += repeats[i].size();
|
||||||
offset += repeats[i].second;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write repeat offset lookup table.
|
// Write repeat offset lookup table.
|
||||||
@ -2112,19 +2154,19 @@ struct Factory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
aligned_unique_ptr<NFA> generateNfa(const build_info &args) {
|
bytecode_ptr<NFA> generateNfa(const build_info &args) {
|
||||||
if (args.num_states > NFATraits<dtype>::maxStates) {
|
if (args.num_states > NFATraits<dtype>::maxStates) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build bounded repeat structures.
|
// Build bounded repeat structures.
|
||||||
vector<pair<aligned_unique_ptr<NFARepeatInfo>, size_t>> repeats;
|
vector<bytecode_ptr<NFARepeatInfo>> repeats;
|
||||||
u32 repeats_full_state = 0;
|
u32 repeats_full_state = 0;
|
||||||
u32 repeats_stream_state = 0;
|
u32 repeats_stream_state = 0;
|
||||||
buildRepeats(args, repeats, &repeats_full_state, &repeats_stream_state);
|
buildRepeats(args, repeats, &repeats_full_state, &repeats_stream_state);
|
||||||
size_t repeatSize = 0;
|
size_t repeatSize = 0;
|
||||||
for (size_t i = 0; i < repeats.size(); i++) {
|
for (size_t i = 0; i < repeats.size(); i++) {
|
||||||
repeatSize += repeats[i].second;
|
repeatSize += repeats[i].size();
|
||||||
}
|
}
|
||||||
|
|
||||||
// We track report lists that have already been written into the global
|
// We track report lists that have already been written into the global
|
||||||
@ -2214,7 +2256,7 @@ struct Factory {
|
|||||||
|
|
||||||
size_t nfaSize = sizeof(NFA) + offset;
|
size_t nfaSize = sizeof(NFA) + offset;
|
||||||
DEBUG_PRINTF("nfa size %zu\n", nfaSize);
|
DEBUG_PRINTF("nfa size %zu\n", nfaSize);
|
||||||
auto nfa = aligned_zmalloc_unique<NFA>(nfaSize);
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(nfaSize);
|
||||||
assert(nfa); // otherwise we would have thrown std::bad_alloc
|
assert(nfa); // otherwise we would have thrown std::bad_alloc
|
||||||
|
|
||||||
implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa.get());
|
implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa.get());
|
||||||
@ -2234,6 +2276,11 @@ struct Factory {
|
|||||||
limex->shiftCount = shiftCount;
|
limex->shiftCount = shiftCount;
|
||||||
writeShiftMasks(args, limex);
|
writeShiftMasks(args, limex);
|
||||||
|
|
||||||
|
if (cannotDie(args)) {
|
||||||
|
DEBUG_PRINTF("nfa cannot die\n");
|
||||||
|
setLimexFlag(limex, LIMEX_FLAG_CANNOT_DIE);
|
||||||
|
}
|
||||||
|
|
||||||
// Determine the state required for our state vector.
|
// Determine the state required for our state vector.
|
||||||
findStateSize(args, limex);
|
findStateSize(args, limex);
|
||||||
|
|
||||||
@ -2295,7 +2342,7 @@ struct Factory {
|
|||||||
|
|
||||||
template<NFAEngineType dtype>
|
template<NFAEngineType dtype>
|
||||||
struct generateNfa {
|
struct generateNfa {
|
||||||
static aligned_unique_ptr<NFA> call(const build_info &args) {
|
static bytecode_ptr<NFA> call(const build_info &args) {
|
||||||
return Factory<dtype>::generateNfa(args);
|
return Factory<dtype>::generateNfa(args);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -2392,16 +2439,14 @@ u32 max_state(const ue2::unordered_map<NFAVertex, u32> &state_ids) {
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> generate(NGHolder &h,
|
bytecode_ptr<NFA> generate(NGHolder &h,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &states,
|
const ue2::unordered_map<NFAVertex, u32> &states,
|
||||||
const vector<BoundedRepeatData> &repeats,
|
const vector<BoundedRepeatData> &repeats,
|
||||||
const map<NFAVertex, NFAStateSet> &reportSquashMap,
|
const map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||||
const map<NFAVertex, NFAStateSet> &squashMap,
|
const map<NFAVertex, NFAStateSet> &squashMap,
|
||||||
const map<u32, set<NFAVertex>> &tops,
|
const map<u32, set<NFAVertex>> &tops,
|
||||||
const set<NFAVertex> &zombies,
|
const set<NFAVertex> &zombies, bool do_accel,
|
||||||
bool do_accel,
|
bool stateCompression, u32 hint,
|
||||||
bool stateCompression,
|
|
||||||
u32 hint,
|
|
||||||
const CompileContext &cc) {
|
const CompileContext &cc) {
|
||||||
const u32 num_states = max_state(states) + 1;
|
const u32 num_states = max_state(states) + 1;
|
||||||
DEBUG_PRINTF("total states: %u\n", num_states);
|
DEBUG_PRINTF("total states: %u\n", num_states);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -26,7 +26,8 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \file
|
/**
|
||||||
|
* \file
|
||||||
* \brief Main NFA build code.
|
* \brief Main NFA build code.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -37,10 +38,10 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "ue2common.h"
|
|
||||||
#include "nfagraph/ng_holder.h"
|
#include "nfagraph/ng_holder.h"
|
||||||
#include "nfagraph/ng_squash.h" // for NFAStateSet
|
#include "nfagraph/ng_squash.h" // for NFAStateSet
|
||||||
#include "util/alloc.h"
|
#include "ue2common.h"
|
||||||
|
#include "util/bytecode_ptr.h"
|
||||||
#include "util/ue2_containers.h"
|
#include "util/ue2_containers.h"
|
||||||
|
|
||||||
struct NFA;
|
struct NFA;
|
||||||
@ -50,7 +51,8 @@ namespace ue2 {
|
|||||||
struct BoundedRepeatData;
|
struct BoundedRepeatData;
|
||||||
struct CompileContext;
|
struct CompileContext;
|
||||||
|
|
||||||
/** \brief Construct a LimEx NFA from an NGHolder.
|
/**
|
||||||
|
* \brief Construct a LimEx NFA from an NGHolder.
|
||||||
*
|
*
|
||||||
* \param g Input NFA graph. Must have state IDs assigned.
|
* \param g Input NFA graph. Must have state IDs assigned.
|
||||||
* \param repeats Bounded repeat information, if any.
|
* \param repeats Bounded repeat information, if any.
|
||||||
@ -66,7 +68,7 @@ struct CompileContext;
|
|||||||
* \return a built NFA, or nullptr if no NFA could be constructed for this
|
* \return a built NFA, or nullptr if no NFA could be constructed for this
|
||||||
* graph.
|
* graph.
|
||||||
*/
|
*/
|
||||||
aligned_unique_ptr<NFA> generate(NGHolder &g,
|
bytecode_ptr<NFA> generate(NGHolder &g,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &states,
|
const ue2::unordered_map<NFAVertex, u32> &states,
|
||||||
const std::vector<BoundedRepeatData> &repeats,
|
const std::vector<BoundedRepeatData> &repeats,
|
||||||
const std::map<NFAVertex, NFAStateSet> &reportSquashMap,
|
const std::map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -290,6 +290,20 @@ static
|
|||||||
void dumpLimexText(const limex_type *limex, FILE *f) {
|
void dumpLimexText(const limex_type *limex, FILE *f) {
|
||||||
u32 size = limex_traits<limex_type>::size;
|
u32 size = limex_traits<limex_type>::size;
|
||||||
|
|
||||||
|
fprintf(f, "%u-bit LimEx NFA (%u shifts, %u exceptions)\n", size,
|
||||||
|
limex->shiftCount, limex->exceptionCount);
|
||||||
|
fprintf(f, "flags: ");
|
||||||
|
if (limex->flags & LIMEX_FLAG_COMPRESS_STATE) {
|
||||||
|
fprintf(f, "COMPRESS_STATE ");
|
||||||
|
}
|
||||||
|
if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) {
|
||||||
|
fprintf(f, "COMPRESS_MASKED ");
|
||||||
|
}
|
||||||
|
if (limex->flags & LIMEX_FLAG_CANNOT_DIE) {
|
||||||
|
fprintf(f, "CANNOT_DIE ");
|
||||||
|
}
|
||||||
|
fprintf(f, "\n\n");
|
||||||
|
|
||||||
dumpMask(f, "init", (const u8 *)&limex->init, size);
|
dumpMask(f, "init", (const u8 *)&limex->init, size);
|
||||||
dumpMask(f, "init_dot_star", (const u8 *)&limex->initDS, size);
|
dumpMask(f, "init_dot_star", (const u8 *)&limex->initDS, size);
|
||||||
dumpMask(f, "accept", (const u8 *)&limex->accept, size);
|
dumpMask(f, "accept", (const u8 *)&limex->accept, size);
|
||||||
|
@ -85,6 +85,7 @@
|
|||||||
|
|
||||||
#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
|
#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
|
||||||
#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
|
#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
|
||||||
|
#define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */
|
||||||
|
|
||||||
enum LimExTrigger {
|
enum LimExTrigger {
|
||||||
LIMEX_TRIGGER_NONE = 0,
|
LIMEX_TRIGGER_NONE = 0,
|
||||||
|
@ -60,6 +60,7 @@
|
|||||||
#define RUN_ACCEL_FN JOIN(LIMEX_API_ROOT, _Run_Accel)
|
#define RUN_ACCEL_FN JOIN(LIMEX_API_ROOT, _Run_Accel)
|
||||||
#define RUN_EXCEPTIONS_FN JOIN(LIMEX_API_ROOT, _Run_Exceptions)
|
#define RUN_EXCEPTIONS_FN JOIN(LIMEX_API_ROOT, _Run_Exceptions)
|
||||||
#define REV_STREAM_FN JOIN(LIMEX_API_ROOT, _Rev_Stream)
|
#define REV_STREAM_FN JOIN(LIMEX_API_ROOT, _Rev_Stream)
|
||||||
|
#define LOOP_NOACCEL_FN JOIN(LIMEX_API_ROOT, _Loop_No_Accel)
|
||||||
#define STREAM_FN JOIN(LIMEX_API_ROOT, _Stream)
|
#define STREAM_FN JOIN(LIMEX_API_ROOT, _Stream)
|
||||||
#define STREAMCB_FN JOIN(LIMEX_API_ROOT, _Stream_CB)
|
#define STREAMCB_FN JOIN(LIMEX_API_ROOT, _Stream_CB)
|
||||||
#define STREAMFIRST_FN JOIN(LIMEX_API_ROOT, _Stream_First)
|
#define STREAMFIRST_FN JOIN(LIMEX_API_ROOT, _Stream_First)
|
||||||
@ -172,24 +173,75 @@ size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask,
|
|||||||
switch (limex_m->shiftCount) { \
|
switch (limex_m->shiftCount) { \
|
||||||
case 8: \
|
case 8: \
|
||||||
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 7)); \
|
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 7)); \
|
||||||
|
/* fallthrough */ \
|
||||||
case 7: \
|
case 7: \
|
||||||
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 6)); \
|
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 6)); \
|
||||||
|
/* fallthrough */ \
|
||||||
case 6: \
|
case 6: \
|
||||||
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 5)); \
|
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 5)); \
|
||||||
|
/* fallthrough */ \
|
||||||
case 5: \
|
case 5: \
|
||||||
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 4)); \
|
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 4)); \
|
||||||
|
/* fallthrough */ \
|
||||||
case 4: \
|
case 4: \
|
||||||
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 3)); \
|
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 3)); \
|
||||||
|
/* fallthrough */ \
|
||||||
case 3: \
|
case 3: \
|
||||||
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 2)); \
|
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 2)); \
|
||||||
|
/* fallthrough */ \
|
||||||
case 2: \
|
case 2: \
|
||||||
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 1)); \
|
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 1)); \
|
||||||
|
/* fallthrough */ \
|
||||||
case 1: \
|
case 1: \
|
||||||
|
/* fallthrough */ \
|
||||||
case 0: \
|
case 0: \
|
||||||
; \
|
; \
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief LimEx NFAS inner loop without accel.
|
||||||
|
*
|
||||||
|
* Note that the "all zeroes" early death check is only performed if can_die is
|
||||||
|
* true.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static really_inline
|
||||||
|
char LOOP_NOACCEL_FN(const IMPL_NFA_T *limex, const u8 *input, size_t *loc,
|
||||||
|
size_t length, STATE_T *s_ptr, struct CONTEXT_T *ctx,
|
||||||
|
u64a offset, const char flags, u64a *final_loc,
|
||||||
|
const char first_match, const char can_die) {
|
||||||
|
const ENG_STATE_T *reach = get_reach_table(limex);
|
||||||
|
#if SIZE < 256
|
||||||
|
const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask);
|
||||||
|
#endif
|
||||||
|
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
|
||||||
|
STATE_T s = *s_ptr;
|
||||||
|
|
||||||
|
size_t i = *loc;
|
||||||
|
for (; i != length; i++) {
|
||||||
|
DUMP_INPUT(i);
|
||||||
|
if (can_die && ISZERO_STATE(s)) {
|
||||||
|
DEBUG_PRINTF("no states are switched on, early exit\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
STATE_T succ;
|
||||||
|
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
|
||||||
|
|
||||||
|
if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset,
|
||||||
|
&succ, final_loc, ctx, flags, 0, first_match)) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 c = input[i];
|
||||||
|
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
|
||||||
|
}
|
||||||
|
|
||||||
|
*loc = i;
|
||||||
|
*s_ptr = s;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
||||||
@ -202,7 +254,8 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
|||||||
= LOAD_FROM_ENG(&limex->accel_and_friends);
|
= LOAD_FROM_ENG(&limex->accel_and_friends);
|
||||||
const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask);
|
const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask);
|
||||||
#endif
|
#endif
|
||||||
const u8 *accelTable = (const u8 *)((const char *)limex + limex->accelTableOffset);
|
const u8 *accelTable =
|
||||||
|
(const u8 *)((const char *)limex + limex->accelTableOffset);
|
||||||
const union AccelAux *accelAux =
|
const union AccelAux *accelAux =
|
||||||
(const union AccelAux *)((const char *)limex + limex->accelAuxOffset);
|
(const union AccelAux *)((const char *)limex + limex->accelAuxOffset);
|
||||||
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
|
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
|
||||||
@ -221,24 +274,20 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
|||||||
}
|
}
|
||||||
|
|
||||||
without_accel:
|
without_accel:
|
||||||
for (; i != min_accel_offset; i++) {
|
if (limex->flags & LIMEX_FLAG_CANNOT_DIE) {
|
||||||
DUMP_INPUT(i);
|
const char can_die = 0;
|
||||||
if (ISZERO_STATE(s)) {
|
if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset,
|
||||||
DEBUG_PRINTF("no states are switched on, early exit\n");
|
flags, final_loc, first_match,
|
||||||
ctx->s = s;
|
can_die) == MO_HALT_MATCHING) {
|
||||||
return MO_CONTINUE_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
u8 c = input[i];
|
const char can_die = 1;
|
||||||
STATE_T succ;
|
if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset,
|
||||||
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
|
flags, final_loc, first_match,
|
||||||
|
can_die) == MO_HALT_MATCHING) {
|
||||||
if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset,
|
|
||||||
&succ, final_loc, ctx, flags, 0, first_match)) {
|
|
||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
|
|
||||||
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
with_accel:
|
with_accel:
|
||||||
@ -279,7 +328,6 @@ with_accel:
|
|||||||
goto without_accel;
|
goto without_accel;
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 c = input[i];
|
|
||||||
STATE_T succ;
|
STATE_T succ;
|
||||||
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
|
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
|
||||||
|
|
||||||
@ -288,6 +336,7 @@ with_accel:
|
|||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u8 c = input[i];
|
||||||
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
|
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -333,14 +382,13 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
|||||||
u64a *final_loc = NULL;
|
u64a *final_loc = NULL;
|
||||||
|
|
||||||
for (size_t i = length; i != 0; i--) {
|
for (size_t i = length; i != 0; i--) {
|
||||||
DUMP_INPUT(i-1);
|
DUMP_INPUT(i - 1);
|
||||||
if (ISZERO_STATE(s)) {
|
if (ISZERO_STATE(s)) {
|
||||||
DEBUG_PRINTF("no states are switched on, early exit\n");
|
DEBUG_PRINTF("no states are switched on, early exit\n");
|
||||||
ctx->s = s;
|
ctx->s = s;
|
||||||
return MO_CONTINUE_MATCHING;
|
return MO_CONTINUE_MATCHING;
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 c = input[i-1];
|
|
||||||
STATE_T succ;
|
STATE_T succ;
|
||||||
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
|
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
|
||||||
|
|
||||||
@ -349,6 +397,7 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
|||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u8 c = input[i - 1];
|
||||||
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
|
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -999,6 +1048,7 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
|
|||||||
#undef RUN_ACCEL_FN
|
#undef RUN_ACCEL_FN
|
||||||
#undef RUN_EXCEPTIONS_FN
|
#undef RUN_EXCEPTIONS_FN
|
||||||
#undef REV_STREAM_FN
|
#undef REV_STREAM_FN
|
||||||
|
#undef LOOP_NOACCEL_FN
|
||||||
#undef STREAM_FN
|
#undef STREAM_FN
|
||||||
#undef STREAMCB_FN
|
#undef STREAMCB_FN
|
||||||
#undef STREAMFIRST_FN
|
#undef STREAMFIRST_FN
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -38,22 +38,23 @@
|
|||||||
#define LIMEX_SHUFFLE_H
|
#define LIMEX_SHUFFLE_H
|
||||||
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
#include "util/arch.h"
|
||||||
#include "util/bitutils.h"
|
#include "util/bitutils.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
|
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
|
||||||
m128 shuffled = pshufb(s, permute);
|
m128 shuffled = pshufb_m128(s, permute);
|
||||||
m128 compared = and128(shuffled, compare);
|
m128 compared = and128(shuffled, compare);
|
||||||
u16 rv = ~movemask128(eq128(compared, shuffled));
|
u16 rv = ~movemask128(eq128(compared, shuffled));
|
||||||
return (u32)rv;
|
return (u32)rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(HAVE_AVX2)
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 packedExtract256(m256 s, const m256 permute, const m256 compare) {
|
u32 packedExtract256(m256 s, const m256 permute, const m256 compare) {
|
||||||
// vpshufb doesn't cross lanes, so this is a bit of a cheat
|
// vpshufb doesn't cross lanes, so this is a bit of a cheat
|
||||||
m256 shuffled = vpshufb(s, permute);
|
m256 shuffled = pshufb_m256(s, permute);
|
||||||
m256 compared = and256(shuffled, compare);
|
m256 compared = and256(shuffled, compare);
|
||||||
u32 rv = ~movemask256(eq256(compared, shuffled));
|
u32 rv = ~movemask256(eq256(compared, shuffled));
|
||||||
// stitch the lane-wise results back together
|
// stitch the lane-wise results back together
|
||||||
@ -61,4 +62,17 @@ u32 packedExtract256(m256 s, const m256 permute, const m256 compare) {
|
|||||||
}
|
}
|
||||||
#endif // AVX2
|
#endif // AVX2
|
||||||
|
|
||||||
|
#if defined(HAVE_AVX512)
|
||||||
|
static really_inline
|
||||||
|
u32 packedExtract512(m512 s, const m512 permute, const m512 compare) {
|
||||||
|
// vpshufb doesn't cross lanes, so this is a bit of a cheat
|
||||||
|
m512 shuffled = pshufb_m512(s, permute);
|
||||||
|
m512 compared = and512(shuffled, compare);
|
||||||
|
u64a rv = ~eq512mask(compared, shuffled);
|
||||||
|
// stitch the lane-wise results back together
|
||||||
|
rv = rv >> 32 | rv;
|
||||||
|
return (u32)(((rv >> 16) | rv) & 0xffffU);
|
||||||
|
}
|
||||||
|
#endif // AVX512
|
||||||
|
|
||||||
#endif // LIMEX_SHUFFLE_H
|
#endif // LIMEX_SHUFFLE_H
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -456,8 +456,7 @@ bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
|
bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
|
||||||
const CompileContext &cc,
|
|
||||||
set<dstate_id_t> *accel_states) {
|
set<dstate_id_t> *accel_states) {
|
||||||
DEBUG_PRINTF("building mcclellan 16\n");
|
DEBUG_PRINTF("building mcclellan 16\n");
|
||||||
|
|
||||||
@ -497,7 +496,7 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
|
|||||||
accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
|
accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
|
||||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
char *nfa_base = (char *)nfa.get();
|
char *nfa_base = (char *)nfa.get();
|
||||||
|
|
||||||
populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
|
populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
|
||||||
@ -685,8 +684,7 @@ void allocateFSN8(dfa_info &info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
|
bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc,
|
||||||
const CompileContext &cc,
|
|
||||||
set<dstate_id_t> *accel_states) {
|
set<dstate_id_t> *accel_states) {
|
||||||
DEBUG_PRINTF("building mcclellan 8\n");
|
DEBUG_PRINTF("building mcclellan 8\n");
|
||||||
|
|
||||||
@ -717,12 +715,13 @@ aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
|
|||||||
accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
|
accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
|
||||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
char *nfa_base = (char *)nfa.get();
|
char *nfa_base = (char *)nfa.get();
|
||||||
|
|
||||||
mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get());
|
mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get());
|
||||||
|
|
||||||
allocateFSN8(info, accel_escape_info, &m->accel_limit_8, &m->accept_limit_8);
|
allocateFSN8(info, accel_escape_info, &m->accel_limit_8,
|
||||||
|
&m->accept_limit_8);
|
||||||
populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset,
|
populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset,
|
||||||
accel_escape_info.size(), arb, single, nfa.get());
|
accel_escape_info.size(), arb, single, nfa.get());
|
||||||
|
|
||||||
@ -763,7 +762,7 @@ aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
|
|||||||
#define MAX_SHERMAN_LIST_LEN 8
|
#define MAX_SHERMAN_LIST_LEN 8
|
||||||
|
|
||||||
static
|
static
|
||||||
void addIfEarlier(set<dstate_id_t> &dest, dstate_id_t candidate,
|
void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate,
|
||||||
dstate_id_t max) {
|
dstate_id_t max) {
|
||||||
if (candidate < max) {
|
if (candidate < max) {
|
||||||
dest.insert(candidate);
|
dest.insert(candidate);
|
||||||
@ -771,19 +770,41 @@ void addIfEarlier(set<dstate_id_t> &dest, dstate_id_t candidate,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void addSuccessors(set<dstate_id_t> &dest, const dstate &source,
|
void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source,
|
||||||
u16 alphasize, dstate_id_t curr_id) {
|
u16 alphasize, dstate_id_t curr_id) {
|
||||||
for (symbol_t s = 0; s < alphasize; s++) {
|
for (symbol_t s = 0; s < alphasize; s++) {
|
||||||
addIfEarlier(dest, source.next[s], curr_id);
|
addIfEarlier(dest, source.next[s], curr_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* \brief Returns a set of states to search for a better daddy. */
|
||||||
|
static
|
||||||
|
flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info,
|
||||||
|
dstate_id_t curr_id) {
|
||||||
|
flat_set<dstate_id_t> hinted;
|
||||||
|
|
||||||
|
addIfEarlier(hinted, 0, curr_id);
|
||||||
|
addIfEarlier(hinted, info.raw.start_anchored, curr_id);
|
||||||
|
addIfEarlier(hinted, info.raw.start_floating, curr_id);
|
||||||
|
|
||||||
|
// Add existing daddy and his successors, then search back one generation.
|
||||||
|
const u16 alphasize = info.impl_alpha_size;
|
||||||
|
dstate_id_t daddy = info.states[curr_id].daddy;
|
||||||
|
for (u32 level = 0; daddy && level < 2; level++) {
|
||||||
|
addIfEarlier(hinted, daddy, curr_id);
|
||||||
|
addSuccessors(hinted, info.states[daddy], alphasize, curr_id);
|
||||||
|
daddy = info.states[daddy].daddy;
|
||||||
|
}
|
||||||
|
|
||||||
|
return hinted;
|
||||||
|
}
|
||||||
|
|
||||||
#define MAX_SHERMAN_SELF_LOOP 20
|
#define MAX_SHERMAN_SELF_LOOP 20
|
||||||
|
|
||||||
static
|
static
|
||||||
void find_better_daddy(dfa_info &info, dstate_id_t curr_id,
|
void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
|
||||||
bool using8bit, bool any_cyclic_near_anchored_state,
|
bool any_cyclic_near_anchored_state,
|
||||||
const Grey &grey) {
|
bool trust_daddy_states, const Grey &grey) {
|
||||||
if (!grey.allowShermanStates) {
|
if (!grey.allowShermanStates) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -818,21 +839,21 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id,
|
|||||||
dstate_id_t best_daddy = 0;
|
dstate_id_t best_daddy = 0;
|
||||||
dstate &currState = info.states[curr_id];
|
dstate &currState = info.states[curr_id];
|
||||||
|
|
||||||
set<dstate_id_t> hinted; /* set of states to search for a better daddy */
|
flat_set<dstate_id_t> hinted;
|
||||||
addIfEarlier(hinted, 0, curr_id);
|
if (trust_daddy_states) {
|
||||||
addIfEarlier(hinted, info.raw.start_anchored, curr_id);
|
// Use the daddy already set for this state so long as it isn't already
|
||||||
addIfEarlier(hinted, info.raw.start_floating, curr_id);
|
// a Sherman state.
|
||||||
|
if (!info.is_sherman(currState.daddy)) {
|
||||||
dstate_id_t mydaddy = currState.daddy;
|
hinted.insert(currState.daddy);
|
||||||
if (mydaddy) {
|
} else {
|
||||||
addIfEarlier(hinted, mydaddy, curr_id);
|
// Fall back to granddaddy, which has already been processed (due
|
||||||
addSuccessors(hinted, info.states[mydaddy], alphasize, curr_id);
|
// to BFS ordering) and cannot be a Sherman state.
|
||||||
dstate_id_t mygranddaddy = info.states[mydaddy].daddy;
|
dstate_id_t granddaddy = info.states[currState.daddy].daddy;
|
||||||
if (mygranddaddy) {
|
assert(!info.is_sherman(granddaddy));
|
||||||
addIfEarlier(hinted, mygranddaddy, curr_id);
|
hinted.insert(granddaddy);
|
||||||
addSuccessors(hinted, info.states[mygranddaddy], alphasize,
|
|
||||||
curr_id);
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
hinted = find_daddy_candidates(info, curr_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const dstate_id_t &donor : hinted) {
|
for (const dstate_id_t &donor : hinted) {
|
||||||
@ -939,8 +960,9 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
||||||
const CompileContext &cc,
|
const CompileContext &cc,
|
||||||
|
bool trust_daddy_states,
|
||||||
set<dstate_id_t> *accel_states) {
|
set<dstate_id_t> *accel_states) {
|
||||||
u16 total_daddy = 0;
|
u16 total_daddy = 0;
|
||||||
dfa_info info(strat);
|
dfa_info info(strat);
|
||||||
@ -957,7 +979,7 @@ aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &
|
|||||||
|
|
||||||
for (u32 i = 0; i < info.size(); i++) {
|
for (u32 i = 0; i < info.size(); i++) {
|
||||||
find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state,
|
find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state,
|
||||||
cc.grey);
|
trust_daddy_states, cc.grey);
|
||||||
total_daddy += info.extra[i].daddytaken;
|
total_daddy += info.extra[i].daddytaken;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -965,7 +987,7 @@ aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &
|
|||||||
info.size() * info.impl_alpha_size, info.size(),
|
info.size() * info.impl_alpha_size, info.size(),
|
||||||
info.impl_alpha_size);
|
info.impl_alpha_size);
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> nfa;
|
bytecode_ptr<NFA> nfa;
|
||||||
if (!using8bit) {
|
if (!using8bit) {
|
||||||
nfa = mcclellanCompile16(info, cc, accel_states);
|
nfa = mcclellanCompile16(info, cc, accel_states);
|
||||||
} else {
|
} else {
|
||||||
@ -980,11 +1002,13 @@ aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &
|
|||||||
return nfa;
|
return nfa;
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
|
bytecode_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
|
||||||
const ReportManager &rm,
|
const ReportManager &rm,
|
||||||
|
bool only_accel_init,
|
||||||
|
bool trust_daddy_states,
|
||||||
set<dstate_id_t> *accel_states) {
|
set<dstate_id_t> *accel_states) {
|
||||||
mcclellan_build_strat mbs(raw, rm);
|
mcclellan_build_strat mbs(raw, rm, only_accel_init);
|
||||||
return mcclellanCompile_i(raw, mbs, cc, accel_states);
|
return mcclellanCompile_i(raw, mbs, cc, trust_daddy_states, accel_states);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t mcclellan_build_strat::accelSize(void) const {
|
size_t mcclellan_build_strat::accelSize(void) const {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -32,7 +32,7 @@
|
|||||||
#include "accel_dfa_build_strat.h"
|
#include "accel_dfa_build_strat.h"
|
||||||
#include "rdfa.h"
|
#include "rdfa.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/alloc.h"
|
#include "util/bytecode_ptr.h"
|
||||||
#include "util/ue2_containers.h"
|
#include "util/ue2_containers.h"
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
@ -48,8 +48,9 @@ struct CompileContext;
|
|||||||
|
|
||||||
class mcclellan_build_strat : public accel_dfa_build_strat {
|
class mcclellan_build_strat : public accel_dfa_build_strat {
|
||||||
public:
|
public:
|
||||||
mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in)
|
mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in,
|
||||||
: accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {}
|
bool only_accel_init_in)
|
||||||
|
: accel_dfa_build_strat(rm_in, only_accel_init_in), rdfa(rdfa_in) {}
|
||||||
raw_dfa &get_raw() const override { return rdfa; }
|
raw_dfa &get_raw() const override { return rdfa; }
|
||||||
std::unique_ptr<raw_report_info> gatherReports(
|
std::unique_ptr<raw_report_info> gatherReports(
|
||||||
std::vector<u32> &reports /* out */,
|
std::vector<u32> &reports /* out */,
|
||||||
@ -65,17 +66,30 @@ private:
|
|||||||
raw_dfa &rdfa;
|
raw_dfa &rdfa;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* accel_states: (optional) on success, is filled with the set of accelerable
|
/**
|
||||||
* states */
|
* \brief Construct an implementation DFA.
|
||||||
ue2::aligned_unique_ptr<NFA>
|
*
|
||||||
|
* \param raw the raw dfa to construct from
|
||||||
|
* \param cc compile context
|
||||||
|
* \param rm report manger
|
||||||
|
* \param only_accel_init if true, only the init states will be examined for
|
||||||
|
* acceleration opportunities
|
||||||
|
* \param trust_daddy_states if true, trust the daddy state set in the raw dfa
|
||||||
|
* rather than conducting a search for a better daddy (for Sherman
|
||||||
|
* states)
|
||||||
|
* \param accel_states (optional) success, is filled with the set of
|
||||||
|
* accelerable states
|
||||||
|
*/
|
||||||
|
bytecode_ptr<NFA>
|
||||||
mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
|
mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
|
||||||
const ReportManager &rm,
|
const ReportManager &rm, bool only_accel_init,
|
||||||
|
bool trust_daddy_states = false,
|
||||||
std::set<dstate_id_t> *accel_states = nullptr);
|
std::set<dstate_id_t> *accel_states = nullptr);
|
||||||
|
|
||||||
/* used internally by mcclellan/haig/gough compile process */
|
/* used internally by mcclellan/haig/gough compile process */
|
||||||
ue2::aligned_unique_ptr<NFA>
|
bytecode_ptr<NFA>
|
||||||
mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
||||||
const CompileContext &cc,
|
const CompileContext &cc, bool trust_daddy_states = false,
|
||||||
std::set<dstate_id_t> *accel_states = nullptr);
|
std::set<dstate_id_t> *accel_states = nullptr);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -89,4 +103,4 @@ bool has_accel_mcclellan(const NFA *nfa);
|
|||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
#endif
|
#endif // MCCLELLANCOMPILE_H
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -43,6 +43,12 @@ namespace ue2 {
|
|||||||
|
|
||||||
#define INIT_STATE 1
|
#define INIT_STATE 1
|
||||||
|
|
||||||
|
static
|
||||||
|
bool state_has_reports(const raw_dfa &raw, dstate_id_t s) {
|
||||||
|
const auto &ds = raw.states[s];
|
||||||
|
return !ds.reports.empty() || !ds.reports_eod.empty();
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
u32 count_dots(const raw_dfa &raw) {
|
u32 count_dots(const raw_dfa &raw) {
|
||||||
assert(raw.start_anchored == INIT_STATE);
|
assert(raw.start_anchored == INIT_STATE);
|
||||||
@ -60,8 +66,7 @@ u32 count_dots(const raw_dfa &raw) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!raw.states[raw.states[i].next[0]].reports.empty()
|
if (state_has_reports(raw, raw.states[i].next[0])) {
|
||||||
|| !raw.states[raw.states[i].next[0]].reports_eod.empty()) {
|
|
||||||
goto validate;
|
goto validate;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -162,74 +167,8 @@ u32 calc_min_dist_from_bob(raw_dfa &raw, vector<u32> *dist_in) {
|
|||||||
return last_d;
|
return last_d;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
bool clear_deeper_reports(raw_dfa &raw, u32 max_offset) {
|
||||||
void find_in_edges(const raw_dfa &raw, vector<vector<dstate_id_t> > *in_edges) {
|
DEBUG_PRINTF("clearing reports on states deeper than %u\n", max_offset);
|
||||||
in_edges->clear();
|
|
||||||
in_edges->resize(raw.states.size());
|
|
||||||
ue2::unordered_set<dstate_id_t> seen;
|
|
||||||
|
|
||||||
for (u32 s = 1; s < raw.states.size(); s++) {
|
|
||||||
seen.clear();
|
|
||||||
for (u32 j = 0; j < raw.alpha_size; j++) {
|
|
||||||
dstate_id_t t = raw.states[s].next[j];
|
|
||||||
if (contains(seen, t)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
seen.insert(t);
|
|
||||||
(*in_edges)[t].push_back(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void calc_min_dist_to_accept(const raw_dfa &raw,
|
|
||||||
const vector<vector<dstate_id_t> > &in_edges,
|
|
||||||
vector<u32> *accept_dist) {
|
|
||||||
vector<u32> &dist = *accept_dist;
|
|
||||||
dist.clear();
|
|
||||||
dist.resize(raw.states.size(), ~0U);
|
|
||||||
|
|
||||||
/* for reporting states to start from */
|
|
||||||
deque<dstate_id_t> to_visit;
|
|
||||||
for (u32 s = 0; s < raw.states.size(); s++) {
|
|
||||||
if (!raw.states[s].reports.empty()
|
|
||||||
|| !raw.states[s].reports_eod.empty()) {
|
|
||||||
to_visit.push_back(s);
|
|
||||||
dist[s] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* bfs */
|
|
||||||
UNUSED u32 last_d = 0;
|
|
||||||
while (!to_visit.empty()) {
|
|
||||||
dstate_id_t s = to_visit.front();
|
|
||||||
to_visit.pop_front();
|
|
||||||
assert(s != DEAD_STATE);
|
|
||||||
|
|
||||||
u32 d = dist[s];
|
|
||||||
assert(d >= last_d);
|
|
||||||
assert(d != ~0U);
|
|
||||||
|
|
||||||
for (vector<dstate_id_t>::const_iterator it = in_edges[s].begin();
|
|
||||||
it != in_edges[s].end(); ++it) {
|
|
||||||
dstate_id_t t = *it;
|
|
||||||
if (t == DEAD_STATE) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (dist[t] == ~0U) {
|
|
||||||
to_visit.push_back(t);
|
|
||||||
dist[t] = d + 1;
|
|
||||||
} else {
|
|
||||||
assert(dist[t] <= d + 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
last_d = d;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool prune_overlong(raw_dfa &raw, u32 max_offset) {
|
|
||||||
DEBUG_PRINTF("pruning to at most %u\n", max_offset);
|
|
||||||
vector<u32> bob_dist;
|
vector<u32> bob_dist;
|
||||||
u32 max_min_dist_bob = calc_min_dist_from_bob(raw, &bob_dist);
|
u32 max_min_dist_bob = calc_min_dist_from_bob(raw, &bob_dist);
|
||||||
|
|
||||||
@ -237,53 +176,18 @@ bool prune_overlong(raw_dfa &raw, u32 max_offset) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<vector<dstate_id_t> > in_edges;
|
bool changed = false;
|
||||||
find_in_edges(raw, &in_edges);
|
|
||||||
|
|
||||||
vector<u32> accept_dist;
|
|
||||||
calc_min_dist_to_accept(raw, in_edges, &accept_dist);
|
|
||||||
|
|
||||||
in_edges.clear();
|
|
||||||
|
|
||||||
/* look over the states and filter out any which cannot reach a report
|
|
||||||
* states before max_offset */
|
|
||||||
vector<dstate_id_t> new_ids(raw.states.size());
|
|
||||||
vector<dstate> new_states;
|
|
||||||
u32 count = 1;
|
|
||||||
new_states.push_back(raw.states[DEAD_STATE]);
|
|
||||||
|
|
||||||
for (u32 s = DEAD_STATE + 1; s < raw.states.size(); s++) {
|
for (u32 s = DEAD_STATE + 1; s < raw.states.size(); s++) {
|
||||||
if (bob_dist[s] + accept_dist[s] > max_offset) {
|
if (bob_dist[s] > max_offset && state_has_reports(raw, s)) {
|
||||||
DEBUG_PRINTF("pruned %u: bob %u, report %u\n", s, bob_dist[s],
|
DEBUG_PRINTF("clearing reports on %u (depth %u)\n", s, bob_dist[s]);
|
||||||
accept_dist[s]);
|
auto &ds = raw.states[s];
|
||||||
new_ids[s] = DEAD_STATE;
|
ds.reports.clear();
|
||||||
} else {
|
ds.reports_eod.clear();
|
||||||
new_ids[s] = count++;
|
changed = true;
|
||||||
new_states.push_back(raw.states[s]);
|
|
||||||
assert(new_states.size() == count);
|
|
||||||
assert(new_ids[s] <= s);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* swap states */
|
return changed;
|
||||||
DEBUG_PRINTF("pruned %zu -> %u\n", raw.states.size(), count);
|
|
||||||
raw.states.swap(new_states);
|
|
||||||
new_states.clear();
|
|
||||||
|
|
||||||
/* update edges and daddys to refer to the new ids */
|
|
||||||
for (u32 s = DEAD_STATE + 1; s < raw.states.size(); s++) {
|
|
||||||
for (u32 j = 0; j < raw.alpha_size; j++) {
|
|
||||||
dstate_id_t old_t = raw.states[s].next[j];
|
|
||||||
raw.states[s].next[j] = new_ids[old_t];
|
|
||||||
}
|
|
||||||
raw.states[s].daddy = new_ids[raw.states[s].daddy];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* update specials */
|
|
||||||
raw.start_floating = new_ids[raw.start_floating];
|
|
||||||
raw.start_anchored = new_ids[raw.start_anchored];
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
set<ReportID> all_reports(const raw_dfa &rdfa) {
|
set<ReportID> all_reports(const raw_dfa &rdfa) {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -39,10 +39,12 @@ namespace ue2 {
|
|||||||
u32 remove_leading_dots(raw_dfa &raw);
|
u32 remove_leading_dots(raw_dfa &raw);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prunes any states which cannot be reached within max_offset from start of
|
* \brief Clear reports on any states that are deeper than \a max_offset from
|
||||||
* stream. Returns false if no changes are made to the rdfa
|
* start of stream.
|
||||||
|
*
|
||||||
|
* Returns false if no changes are made to the DFA.
|
||||||
*/
|
*/
|
||||||
bool prune_overlong(raw_dfa &raw, u32 max_offset);
|
bool clear_deeper_reports(raw_dfa &raw, u32 max_offset);
|
||||||
|
|
||||||
std::set<ReportID> all_reports(const raw_dfa &rdfa);
|
std::set<ReportID> all_reports(const raw_dfa &rdfa);
|
||||||
bool has_eod_accepts(const raw_dfa &rdfa);
|
bool has_eod_accepts(const raw_dfa &rdfa);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -33,6 +33,7 @@
|
|||||||
#include "nfa_api.h"
|
#include "nfa_api.h"
|
||||||
#include "nfa_api_queue.h"
|
#include "nfa_api_queue.h"
|
||||||
#include "nfa_internal.h"
|
#include "nfa_internal.h"
|
||||||
|
#include "util/arch.h"
|
||||||
#include "util/bitutils.h"
|
#include "util/bitutils.h"
|
||||||
#include "util/compare.h"
|
#include "util/compare.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
@ -168,7 +169,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
|
|||||||
* extract a single copy of the state from the u32 for checking. */
|
* extract a single copy of the state from the u32 for checking. */
|
||||||
u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101;
|
u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101;
|
||||||
|
|
||||||
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
|
#if defined(HAVE_BMI2) && defined(ARCH_64_BIT)
|
||||||
u32 sheng_limit_x4 = sheng_limit * 0x01010101;
|
u32 sheng_limit_x4 = sheng_limit * 0x01010101;
|
||||||
m128 simd_stop_limit = set4x32(sheng_stop_limit_x4);
|
m128 simd_stop_limit = set4x32(sheng_stop_limit_x4);
|
||||||
m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit);
|
m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit);
|
||||||
@ -178,9 +179,9 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
|
|||||||
|
|
||||||
#define SHENG_SINGLE_ITER do { \
|
#define SHENG_SINGLE_ITER do { \
|
||||||
m128 shuffle_mask = masks[*(c++)]; \
|
m128 shuffle_mask = masks[*(c++)]; \
|
||||||
s = pshufb(shuffle_mask, s); \
|
s = pshufb_m128(shuffle_mask, s); \
|
||||||
u32 s_gpr_x4 = movd(s); /* convert to u8 */ \
|
u32 s_gpr_x4 = movd(s); /* convert to u8 */ \
|
||||||
DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr); \
|
DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr_x4); \
|
||||||
if (s_gpr_x4 >= sheng_stop_limit_x4) { \
|
if (s_gpr_x4 >= sheng_stop_limit_x4) { \
|
||||||
s_gpr = s_gpr_x4; \
|
s_gpr = s_gpr_x4; \
|
||||||
goto exit; \
|
goto exit; \
|
||||||
@ -189,7 +190,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
|
|||||||
|
|
||||||
u8 s_gpr;
|
u8 s_gpr;
|
||||||
while (c < c_end) {
|
while (c < c_end) {
|
||||||
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
|
#if defined(HAVE_BMI2) && defined(ARCH_64_BIT)
|
||||||
/* This version uses pext for efficently bitbashing out scaled
|
/* This version uses pext for efficently bitbashing out scaled
|
||||||
* versions of the bytes to process from a u64a */
|
* versions of the bytes to process from a u64a */
|
||||||
|
|
||||||
@ -197,7 +198,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
|
|||||||
u64a cc0 = pdep64(data_bytes, 0xff0); /* extract scaled low byte */
|
u64a cc0 = pdep64(data_bytes, 0xff0); /* extract scaled low byte */
|
||||||
data_bytes &= ~0xffULL; /* clear low bits for scale space */
|
data_bytes &= ~0xffULL; /* clear low bits for scale space */
|
||||||
m128 shuffle_mask0 = load128((const char *)masks + cc0);
|
m128 shuffle_mask0 = load128((const char *)masks + cc0);
|
||||||
s = pshufb(shuffle_mask0, s);
|
s = pshufb_m128(shuffle_mask0, s);
|
||||||
m128 s_max = s;
|
m128 s_max = s;
|
||||||
m128 s_max0 = s_max;
|
m128 s_max0 = s_max;
|
||||||
DEBUG_PRINTF("c %02llx --> s %hhu\n", cc0 >> 4, movd(s));
|
DEBUG_PRINTF("c %02llx --> s %hhu\n", cc0 >> 4, movd(s));
|
||||||
@ -207,7 +208,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
|
|||||||
u64a cc##iter = pext64(data_bytes, mcsheng_pext_mask[iter]); \
|
u64a cc##iter = pext64(data_bytes, mcsheng_pext_mask[iter]); \
|
||||||
assert(cc##iter == (u64a)c[iter] << 4); \
|
assert(cc##iter == (u64a)c[iter] << 4); \
|
||||||
m128 shuffle_mask##iter = load128((const char *)masks + cc##iter); \
|
m128 shuffle_mask##iter = load128((const char *)masks + cc##iter); \
|
||||||
s = pshufb(shuffle_mask##iter, s); \
|
s = pshufb_m128(shuffle_mask##iter, s); \
|
||||||
if (do_accel && iter == 7) { \
|
if (do_accel && iter == 7) { \
|
||||||
/* in the final iteration we also have to check against accel */ \
|
/* in the final iteration we also have to check against accel */ \
|
||||||
m128 s_temp = sadd_u8_m128(s, accel_delta); \
|
m128 s_temp = sadd_u8_m128(s, accel_delta); \
|
||||||
@ -287,19 +288,19 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
|
|||||||
assert(soft_c_end - c < SHENG_CHUNK);
|
assert(soft_c_end - c < SHENG_CHUNK);
|
||||||
switch (soft_c_end - c) {
|
switch (soft_c_end - c) {
|
||||||
case 7:
|
case 7:
|
||||||
SHENG_SINGLE_ITER;
|
SHENG_SINGLE_ITER; // fallthrough
|
||||||
case 6:
|
case 6:
|
||||||
SHENG_SINGLE_ITER;
|
SHENG_SINGLE_ITER; // fallthrough
|
||||||
case 5:
|
case 5:
|
||||||
SHENG_SINGLE_ITER;
|
SHENG_SINGLE_ITER; // fallthrough
|
||||||
case 4:
|
case 4:
|
||||||
SHENG_SINGLE_ITER;
|
SHENG_SINGLE_ITER; // fallthrough
|
||||||
case 3:
|
case 3:
|
||||||
SHENG_SINGLE_ITER;
|
SHENG_SINGLE_ITER; // fallthrough
|
||||||
case 2:
|
case 2:
|
||||||
SHENG_SINGLE_ITER;
|
SHENG_SINGLE_ITER; // fallthrough
|
||||||
case 1:
|
case 1:
|
||||||
SHENG_SINGLE_ITER;
|
SHENG_SINGLE_ITER; // fallthrough
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -617,7 +617,7 @@ void fill_in_succ_table_16(NFA *nfa, const dfa_info &info,
|
|||||||
#define MAX_SHERMAN_LIST_LEN 8
|
#define MAX_SHERMAN_LIST_LEN 8
|
||||||
|
|
||||||
static
|
static
|
||||||
void addIfEarlier(set<dstate_id_t> &dest, dstate_id_t candidate,
|
void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate,
|
||||||
dstate_id_t max) {
|
dstate_id_t max) {
|
||||||
if (candidate < max) {
|
if (candidate < max) {
|
||||||
dest.insert(candidate);
|
dest.insert(candidate);
|
||||||
@ -625,13 +625,35 @@ void addIfEarlier(set<dstate_id_t> &dest, dstate_id_t candidate,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void addSuccessors(set<dstate_id_t> &dest, const dstate &source,
|
void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source,
|
||||||
u16 alphasize, dstate_id_t curr_id) {
|
u16 alphasize, dstate_id_t curr_id) {
|
||||||
for (symbol_t s = 0; s < alphasize; s++) {
|
for (symbol_t s = 0; s < alphasize; s++) {
|
||||||
addIfEarlier(dest, source.next[s], curr_id);
|
addIfEarlier(dest, source.next[s], curr_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* \brief Returns a set of states to search for a better daddy. */
|
||||||
|
static
|
||||||
|
flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info,
|
||||||
|
dstate_id_t curr_id) {
|
||||||
|
flat_set<dstate_id_t> hinted;
|
||||||
|
|
||||||
|
addIfEarlier(hinted, 0, curr_id);
|
||||||
|
addIfEarlier(hinted, info.raw.start_anchored, curr_id);
|
||||||
|
addIfEarlier(hinted, info.raw.start_floating, curr_id);
|
||||||
|
|
||||||
|
// Add existing daddy and his successors, then search back one generation.
|
||||||
|
const u16 alphasize = info.impl_alpha_size;
|
||||||
|
dstate_id_t daddy = info.states[curr_id].daddy;
|
||||||
|
for (u32 level = 0; daddy && level < 2; level++) {
|
||||||
|
addIfEarlier(hinted, daddy, curr_id);
|
||||||
|
addSuccessors(hinted, info.states[daddy], alphasize, curr_id);
|
||||||
|
daddy = info.states[daddy].daddy;
|
||||||
|
}
|
||||||
|
|
||||||
|
return hinted;
|
||||||
|
}
|
||||||
|
|
||||||
#define MAX_SHERMAN_SELF_LOOP 20
|
#define MAX_SHERMAN_SELF_LOOP 20
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -671,22 +693,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id,
|
|||||||
dstate_id_t best_daddy = 0;
|
dstate_id_t best_daddy = 0;
|
||||||
dstate &currState = info.states[curr_id];
|
dstate &currState = info.states[curr_id];
|
||||||
|
|
||||||
set<dstate_id_t> hinted; /* set of states to search for a better daddy */
|
flat_set<dstate_id_t> hinted = find_daddy_candidates(info, curr_id);
|
||||||
addIfEarlier(hinted, 0, curr_id);
|
|
||||||
addIfEarlier(hinted, info.raw.start_anchored, curr_id);
|
|
||||||
addIfEarlier(hinted, info.raw.start_floating, curr_id);
|
|
||||||
|
|
||||||
dstate_id_t mydaddy = currState.daddy;
|
|
||||||
if (mydaddy) {
|
|
||||||
addIfEarlier(hinted, mydaddy, curr_id);
|
|
||||||
addSuccessors(hinted, info.states[mydaddy], alphasize, curr_id);
|
|
||||||
dstate_id_t mygranddaddy = info.states[mydaddy].daddy;
|
|
||||||
if (mygranddaddy) {
|
|
||||||
addIfEarlier(hinted, mygranddaddy, curr_id);
|
|
||||||
addSuccessors(hinted, info.states[mygranddaddy], alphasize,
|
|
||||||
curr_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const dstate_id_t &donor : hinted) {
|
for (const dstate_id_t &donor : hinted) {
|
||||||
assert(donor < curr_id);
|
assert(donor < curr_id);
|
||||||
@ -821,7 +828,7 @@ void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
aligned_unique_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
|
bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
|
||||||
const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||||
const Grey &grey) {
|
const Grey &grey) {
|
||||||
DEBUG_PRINTF("building mcsheng 16\n");
|
DEBUG_PRINTF("building mcsheng 16\n");
|
||||||
@ -872,7 +879,7 @@ aligned_unique_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
|
accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
|
||||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get());
|
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get());
|
||||||
|
|
||||||
populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
|
populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
|
||||||
@ -967,7 +974,7 @@ void allocateImplId8(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
aligned_unique_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end,
|
bytecode_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end,
|
||||||
const map<dstate_id_t, AccelScheme> &accel_escape_info) {
|
const map<dstate_id_t, AccelScheme> &accel_escape_info) {
|
||||||
DEBUG_PRINTF("building mcsheng 8\n");
|
DEBUG_PRINTF("building mcsheng 8\n");
|
||||||
|
|
||||||
@ -998,7 +1005,7 @@ aligned_unique_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
|
accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
|
||||||
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
|
||||||
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get());
|
mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get());
|
||||||
|
|
||||||
allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
|
allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
|
||||||
@ -1019,13 +1026,13 @@ aligned_unique_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end,
|
|||||||
return nfa;
|
return nfa;
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||||
const ReportManager &rm) {
|
const ReportManager &rm) {
|
||||||
if (!cc.grey.allowMcSheng) {
|
if (!cc.grey.allowMcSheng) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
mcclellan_build_strat mbs(raw, rm);
|
mcclellan_build_strat mbs(raw, rm, false);
|
||||||
dfa_info info(mbs);
|
dfa_info info(mbs);
|
||||||
bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;
|
bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;
|
||||||
|
|
||||||
@ -1044,7 +1051,7 @@ aligned_unique_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> nfa;
|
bytecode_ptr<NFA> nfa;
|
||||||
if (!using8bit) {
|
if (!using8bit) {
|
||||||
nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey);
|
nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2016-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -29,13 +29,8 @@
|
|||||||
#ifndef MCSHENGCOMPILE_H
|
#ifndef MCSHENGCOMPILE_H
|
||||||
#define MCSHENGCOMPILE_H
|
#define MCSHENGCOMPILE_H
|
||||||
|
|
||||||
#include "accel_dfa_build_strat.h"
|
|
||||||
#include "rdfa.h"
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/alloc.h"
|
#include "util/bytecode_ptr.h"
|
||||||
#include "util/ue2_containers.h"
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
struct NFA;
|
struct NFA;
|
||||||
|
|
||||||
@ -43,9 +38,9 @@ namespace ue2 {
|
|||||||
|
|
||||||
class ReportManager;
|
class ReportManager;
|
||||||
struct CompileContext;
|
struct CompileContext;
|
||||||
|
struct raw_dfa;
|
||||||
|
|
||||||
ue2::aligned_unique_ptr<NFA>
|
bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
||||||
mcshengCompile(raw_dfa &raw, const CompileContext &cc,
|
|
||||||
const ReportManager &rm);
|
const ReportManager &rm);
|
||||||
|
|
||||||
bool has_accel_mcsheng(const NFA *nfa);
|
bool has_accel_mcsheng(const NFA *nfa);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -309,7 +309,7 @@ const mpv_counter_info &findCounter(const vector<mpv_counter_info> &counters,
|
|||||||
return counters.front();
|
return counters.front();
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in,
|
bytecode_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in,
|
||||||
const vector<raw_puff> &triggered_puffs,
|
const vector<raw_puff> &triggered_puffs,
|
||||||
const ReportManager &rm) {
|
const ReportManager &rm) {
|
||||||
assert(!puffs_in.empty() || !triggered_puffs.empty());
|
assert(!puffs_in.empty() || !triggered_puffs.empty());
|
||||||
@ -343,7 +343,7 @@ aligned_unique_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in,
|
|||||||
|
|
||||||
DEBUG_PRINTF("%u puffs, len = %u\n", puffette_count, len);
|
DEBUG_PRINTF("%u puffs, len = %u\n", puffette_count, len);
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(len);
|
auto nfa = make_zeroed_bytecode_ptr<NFA>(len);
|
||||||
|
|
||||||
mpv_puffette *pa_base = (mpv_puffette *)
|
mpv_puffette *pa_base = (mpv_puffette *)
|
||||||
((char *)nfa.get() + sizeof(NFA) + sizeof(mpv)
|
((char *)nfa.get() + sizeof(NFA) + sizeof(mpv)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -30,7 +30,7 @@
|
|||||||
#define MPV_COMPILE_H
|
#define MPV_COMPILE_H
|
||||||
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/alloc.h"
|
#include "util/bytecode_ptr.h"
|
||||||
#include "util/charreach.h"
|
#include "util/charreach.h"
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
@ -61,7 +61,7 @@ struct raw_puff {
|
|||||||
* puffs in the triggered_puffs vector are enabled when an TOP_N event is
|
* puffs in the triggered_puffs vector are enabled when an TOP_N event is
|
||||||
* delivered corresponding to their index in the vector
|
* delivered corresponding to their index in the vector
|
||||||
*/
|
*/
|
||||||
aligned_unique_ptr<NFA> mpvCompile(const std::vector<raw_puff> &puffs,
|
bytecode_ptr<NFA> mpvCompile(const std::vector<raw_puff> &puffs,
|
||||||
const std::vector<raw_puff> &triggered_puffs,
|
const std::vector<raw_puff> &triggered_puffs,
|
||||||
const ReportManager &rm);
|
const ReportManager &rm);
|
||||||
|
|
||||||
|
@ -1,265 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIACCEL_COMMON_H_
|
|
||||||
#define MULTIACCEL_COMMON_H_
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include "ue2common.h"
|
|
||||||
#include "util/join.h"
|
|
||||||
#include "util/bitutils.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When doing shifting, remember that the total number of shifts should be n-1
|
|
||||||
*/
|
|
||||||
#define VARISHIFT(src, dst, len) \
|
|
||||||
do { \
|
|
||||||
(dst) &= (src) >> (len); \
|
|
||||||
} while (0)
|
|
||||||
#define STATIC_SHIFT1(x) \
|
|
||||||
do { \
|
|
||||||
(x) &= (x) >> 1; \
|
|
||||||
} while (0)
|
|
||||||
#define STATIC_SHIFT2(x) \
|
|
||||||
do { \
|
|
||||||
(x) &= (x) >> 2;\
|
|
||||||
} while (0)
|
|
||||||
#define STATIC_SHIFT4(x) \
|
|
||||||
do { \
|
|
||||||
(x) &= (x) >> 4; \
|
|
||||||
} while (0)
|
|
||||||
#define STATIC_SHIFT8(x) \
|
|
||||||
do { \
|
|
||||||
(x) &= (x) >> 8; \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT1(x) \
|
|
||||||
do {} while (0)
|
|
||||||
#define SHIFT2(x) \
|
|
||||||
do { \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT3(x) \
|
|
||||||
do { \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT4(x) \
|
|
||||||
do { \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT5(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT4(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT6(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT4(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT7(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT4(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT8(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT4(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT9(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT10(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT11(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT12(x); \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x);\
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT13(x); \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT14(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT15(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT16(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT8(x); \
|
|
||||||
STATIC_SHIFT8(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT17(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT18(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT19(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT20(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT21(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT22(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT23(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT24(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT16(x); \
|
|
||||||
STATIC_SHIFT8(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT25(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT26(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT27(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT28(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT29(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT30(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT31(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT1(x); \
|
|
||||||
STATIC_SHIFT2(x); \
|
|
||||||
STATIC_SHIFT4(x); \
|
|
||||||
} while (0)
|
|
||||||
#define SHIFT32(x) \
|
|
||||||
do { \
|
|
||||||
SHIFT24(x); \
|
|
||||||
STATIC_SHIFT8(x); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* this function is used by 32-bit multiaccel matchers. 32-bit matchers accept
|
|
||||||
* a 32-bit integer as a buffer, where low 16 bits is movemask result and
|
|
||||||
* high 16 bits are "don't care" values. this function is not expected to return
|
|
||||||
* a result higher than 16.
|
|
||||||
*/
|
|
||||||
static really_inline
|
|
||||||
const u8 *match32(const u8 *buf, const u32 z) {
|
|
||||||
if (unlikely(z != 0)) {
|
|
||||||
u32 pos = ctz32(z);
|
|
||||||
assert(pos < 16);
|
|
||||||
return buf + pos;
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* this function is used by 64-bit multiaccel matchers. 64-bit matchers accept
|
|
||||||
* a 64-bit integer as a buffer, where low 32 bits is movemask result and
|
|
||||||
* high 32 bits are "don't care" values. this function is not expected to return
|
|
||||||
* a result higher than 32.
|
|
||||||
*/
|
|
||||||
static really_inline
|
|
||||||
const u8 *match64(const u8 *buf, const u64a z) {
|
|
||||||
if (unlikely(z != 0)) {
|
|
||||||
u32 pos = ctz64(z);
|
|
||||||
assert(pos < 32);
|
|
||||||
return buf + pos;
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* MULTIACCEL_COMMON_H_ */
|
|
@ -1,439 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "multiaccel_compilehelper.h"
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
using namespace ue2;
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
static const char* state_to_str[] = {
|
|
||||||
"FIRST_RUN",
|
|
||||||
"SECOND_RUN",
|
|
||||||
"WAITING_FOR_GRAB",
|
|
||||||
"FIRST_TAIL",
|
|
||||||
"SECOND_TAIL",
|
|
||||||
"STOPPED",
|
|
||||||
"INVALID"
|
|
||||||
};
|
|
||||||
static const char* type_to_str[] = {
|
|
||||||
"SHIFT",
|
|
||||||
"SHIFTGRAB",
|
|
||||||
"DOUBLESHIFT",
|
|
||||||
"DOUBLESHIFTGRAB",
|
|
||||||
"LONG",
|
|
||||||
"LONGGRAB",
|
|
||||||
"NONE"
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
void dumpMultiaccelState(const accel_data &d) {
|
|
||||||
DEBUG_PRINTF("type: %s state: %s len1: %u tlen1: %u len2: %u tlen2: %u\n",
|
|
||||||
type_to_str[(unsigned) d.type],
|
|
||||||
state_to_str[(unsigned) d.state],
|
|
||||||
d.len1, d.tlen1, d.len2, d.tlen2);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* stop all the matching. this may render most schemes invalid. */
|
|
||||||
static
|
|
||||||
void stop(accel_data &d) {
|
|
||||||
switch (d.state) {
|
|
||||||
case STATE_STOPPED:
|
|
||||||
case STATE_INVALID:
|
|
||||||
break;
|
|
||||||
case STATE_FIRST_TAIL:
|
|
||||||
case STATE_SECOND_RUN:
|
|
||||||
/*
|
|
||||||
* Shift matchers are special case, because they have "tails".
|
|
||||||
* When shift matcher reaches a mid/endpoint, tail mode is
|
|
||||||
* activated, which looks for more matches to extend the match.
|
|
||||||
*
|
|
||||||
* For example, consider pattern /a{5}ba{3}/. Under normal circumstances,
|
|
||||||
* long-grab matcher will be picked for this pattern (matching a run of a's,
|
|
||||||
* followed by a not-a), because doubleshift matcher would be confused by
|
|
||||||
* consecutive a's and would parse the pattern as a.{0}a.{0}a (two shifts
|
|
||||||
* by 1) and throw out the rest of the pattern.
|
|
||||||
*
|
|
||||||
* With tails, we defer ending the run until we actually run out of
|
|
||||||
* matching characters, so the above pattern will now be parsed by
|
|
||||||
* doubleshift matcher as /a.{3}a.{3}a/ (two shifts by 4).
|
|
||||||
*
|
|
||||||
* So if we are stopping shift matchers, we should check if we aren't in
|
|
||||||
* the process of matching first tail or second run. If we are, we can't
|
|
||||||
* finish the second run as we are stopping, but we can try and split
|
|
||||||
* the first tail instead to obtain a valid second run.
|
|
||||||
*/
|
|
||||||
if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
|
|
||||||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.tlen1 == 0) {
|
|
||||||
// can't split an empty void...
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
d.len2 = 0;
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
break;
|
|
||||||
case STATE_SECOND_TAIL:
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
break;
|
|
||||||
case STATE_WAITING_FOR_GRAB:
|
|
||||||
case STATE_FIRST_RUN:
|
|
||||||
if (d.type == MultibyteAccelInfo::MAT_LONG) {
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
} else {
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void validate(accel_data &d, unsigned max_len) {
|
|
||||||
// try and fit in all our tails
|
|
||||||
if (d.len1 + d.tlen1 + d.len2 + d.tlen2 < max_len && d.len2 > 0) {
|
|
||||||
// case 1: everything fits in
|
|
||||||
d.len1 += d.tlen1;
|
|
||||||
d.len2 += d.tlen2;
|
|
||||||
d.tlen1 = 0;
|
|
||||||
d.tlen2 = 0;
|
|
||||||
} else if (d.len1 + d.tlen1 + d.len2 < max_len && d.len2 > 0) {
|
|
||||||
// case 2: everything but the second tail fits in
|
|
||||||
d.len1 += d.tlen1;
|
|
||||||
d.tlen1 = 0;
|
|
||||||
// try going for a partial tail
|
|
||||||
if (d.tlen2 != 0) {
|
|
||||||
int new_tlen2 = max_len - 1 - d.len1 - d.len2;
|
|
||||||
if (new_tlen2 > 0) {
|
|
||||||
d.len2 += new_tlen2;
|
|
||||||
}
|
|
||||||
d.tlen2 = 0;
|
|
||||||
}
|
|
||||||
} else if (d.len1 + d.tlen1 < max_len) {
|
|
||||||
// case 3: first run and its tail fits in
|
|
||||||
if (d.type == MultibyteAccelInfo::MAT_DSHIFT ||
|
|
||||||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
|
|
||||||
// split the tail into a second run
|
|
||||||
d.len2 = d.tlen1;
|
|
||||||
} else {
|
|
||||||
d.len1 += d.tlen1;
|
|
||||||
d.len2 = 0;
|
|
||||||
}
|
|
||||||
d.tlen1 = 0;
|
|
||||||
d.tlen2 = 0;
|
|
||||||
} else if (d.len1 < max_len) {
|
|
||||||
// case 4: nothing but the first run fits in
|
|
||||||
// try going for a partial tail
|
|
||||||
if (d.tlen1 != 0) {
|
|
||||||
int new_tlen1 = max_len - 1 - d.len1;
|
|
||||||
if (new_tlen1 > 0) {
|
|
||||||
d.len1 += new_tlen1;
|
|
||||||
}
|
|
||||||
d.tlen1 = 0;
|
|
||||||
}
|
|
||||||
d.len2 = 0;
|
|
||||||
d.tlen2 = 0;
|
|
||||||
}
|
|
||||||
// if we removed our second run, doubleshift matchers are no longer valid
|
|
||||||
if ((d.type == MultibyteAccelInfo::MAT_DSHIFT ||
|
|
||||||
d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.len2 == 0) {
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
} else if ((d.type == MultibyteAccelInfo::MAT_LONG) && d.len1 >= max_len) {
|
|
||||||
// long matchers can just stop whenever they want to
|
|
||||||
d.len1 = max_len - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// now, general sanity checks
|
|
||||||
if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) >= max_len) {
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
}
|
|
||||||
if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) < MULTIACCEL_MIN_LEN) {
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) {
|
|
||||||
switch (d.type) {
|
|
||||||
case MultibyteAccelInfo::MAT_LONG:
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For long matcher, we want lots of consecutive same-or-subset
|
|
||||||
* char-reaches
|
|
||||||
*/
|
|
||||||
if ((ref_cr & cur_cr) == cur_cr) {
|
|
||||||
d.len1++;
|
|
||||||
} else {
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case MultibyteAccelInfo::MAT_LONGGRAB:
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For long-grab matcher, we want lots of consecutive same-or-subset
|
|
||||||
* char-reaches with a negative match in the end.
|
|
||||||
*/
|
|
||||||
if ((ref_cr & cur_cr) == cur_cr) {
|
|
||||||
d.len1++;
|
|
||||||
} else if (!(ref_cr & cur_cr).any()) {
|
|
||||||
/* we grabbed, stop immediately */
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
} else {
|
|
||||||
/* our run-n-grab was interrupted; mark as invalid */
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFTGRAB:
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For shift-grab matcher, we want two matches separated by anything;
|
|
||||||
* however the second vertex *must* be a negative (non-overlapping) match.
|
|
||||||
*
|
|
||||||
* Shiftgrab matcher is identical to shift except for presence of grab.
|
|
||||||
*/
|
|
||||||
if (d.state == STATE_WAITING_FOR_GRAB) {
|
|
||||||
if ((ref_cr & cur_cr).any()) {
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
} else {
|
|
||||||
d.state = STATE_FIRST_RUN;
|
|
||||||
d.len1++;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* no break, falling through */
|
|
||||||
case MultibyteAccelInfo::MAT_SHIFT:
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For shift-matcher, we want two matches separated by anything.
|
|
||||||
*/
|
|
||||||
if (ref_cr == cur_cr) {
|
|
||||||
// keep matching tail
|
|
||||||
switch (d.state) {
|
|
||||||
case STATE_FIRST_RUN:
|
|
||||||
d.state = STATE_FIRST_TAIL;
|
|
||||||
break;
|
|
||||||
case STATE_FIRST_TAIL:
|
|
||||||
d.tlen1++;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
switch (d.state) {
|
|
||||||
case STATE_FIRST_RUN:
|
|
||||||
// simply advance
|
|
||||||
d.len1++;
|
|
||||||
break;
|
|
||||||
case STATE_FIRST_TAIL:
|
|
||||||
// we found a non-matching char after tail, so stop
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For double shift-grab matcher, we want two matches separated by
|
|
||||||
* either negative matches or dots; however the second vertex *must*
|
|
||||||
* be a negative match.
|
|
||||||
*
|
|
||||||
* Doubleshiftgrab matcher is identical to doubleshift except for
|
|
||||||
* presence of grab.
|
|
||||||
*/
|
|
||||||
if (d.state == STATE_WAITING_FOR_GRAB) {
|
|
||||||
if ((ref_cr & cur_cr).any()) {
|
|
||||||
d.state = STATE_INVALID;
|
|
||||||
} else {
|
|
||||||
d.state = STATE_FIRST_RUN;
|
|
||||||
d.len1++;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* no break, falling through */
|
|
||||||
case MultibyteAccelInfo::MAT_DSHIFT:
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* For double shift matcher, we want three matches, each separated
|
|
||||||
* by a lot of anything.
|
|
||||||
*
|
|
||||||
* Doubleshift matcher is complicated by presence of tails.
|
|
||||||
*/
|
|
||||||
if (ref_cr == cur_cr) {
|
|
||||||
// decide if we are activating second shift or matching tails
|
|
||||||
switch (d.state) {
|
|
||||||
case STATE_FIRST_RUN:
|
|
||||||
d.state = STATE_FIRST_TAIL;
|
|
||||||
d.len2 = 1; // we're now ready for our second run
|
|
||||||
break;
|
|
||||||
case STATE_FIRST_TAIL:
|
|
||||||
d.tlen1++;
|
|
||||||
break;
|
|
||||||
case STATE_SECOND_RUN:
|
|
||||||
d.state = STATE_SECOND_TAIL;
|
|
||||||
break;
|
|
||||||
case STATE_SECOND_TAIL:
|
|
||||||
d.tlen2++;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
switch (d.state) {
|
|
||||||
case STATE_FIRST_RUN:
|
|
||||||
d.len1++;
|
|
||||||
break;
|
|
||||||
case STATE_FIRST_TAIL:
|
|
||||||
// start second run
|
|
||||||
d.state = STATE_SECOND_RUN;
|
|
||||||
d.len2++;
|
|
||||||
break;
|
|
||||||
case STATE_SECOND_RUN:
|
|
||||||
d.len2++;
|
|
||||||
break;
|
|
||||||
case STATE_SECOND_TAIL:
|
|
||||||
// stop
|
|
||||||
d.state = STATE_STOPPED;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
// shouldn't happen
|
|
||||||
assert(0);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr,
|
|
||||||
u32 off, unsigned max_length)
|
|
||||||
: cr(ref_cr), offset(off), max_len(max_length) {
|
|
||||||
int accel_num = (int) MultibyteAccelInfo::MAT_MAX;
|
|
||||||
accels.resize(accel_num);
|
|
||||||
|
|
||||||
// mark everything as valid
|
|
||||||
for (int i = 0; i < accel_num; i++) {
|
|
||||||
accel_data &ad = accels[i];
|
|
||||||
ad.len1 = 1;
|
|
||||||
ad.type = (MultibyteAccelInfo::multiaccel_type) i;
|
|
||||||
|
|
||||||
/* for shift-grab matchers, we are waiting for the grab right at the start */
|
|
||||||
if (ad.type == MultibyteAccelInfo::MAT_SHIFTGRAB
|
|
||||||
|| ad.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) {
|
|
||||||
ad.state = STATE_WAITING_FOR_GRAB;
|
|
||||||
} else {
|
|
||||||
ad.state = STATE_FIRST_RUN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool MultiaccelCompileHelper::canAdvance() {
|
|
||||||
for (const accel_data &ad : accels) {
|
|
||||||
if (ad.state != STATE_STOPPED && ad.state != STATE_INVALID) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MultiaccelCompileHelper::advance(const CharReach &cur_cr) {
|
|
||||||
for (accel_data &ad : accels) {
|
|
||||||
if (ad.state == STATE_STOPPED || ad.state == STATE_INVALID) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
match(ad, cr, cur_cr);
|
|
||||||
#ifdef DEBUG
|
|
||||||
dumpMultiaccelState(ad);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MultibyteAccelInfo MultiaccelCompileHelper::getBestScheme() {
|
|
||||||
int best_len = 0;
|
|
||||||
accel_data best;
|
|
||||||
|
|
||||||
DEBUG_PRINTF("Stopping multiaccel compile\n");
|
|
||||||
|
|
||||||
for (accel_data &ad : accels) {
|
|
||||||
// stop our matching
|
|
||||||
stop(ad);
|
|
||||||
validate(ad, max_len);
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
dumpMultiaccelState(ad);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// skip invalid schemes
|
|
||||||
if (ad.state == STATE_INVALID) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
DEBUG_PRINTF("Marking as viable\n");
|
|
||||||
|
|
||||||
// TODO: relative strengths of accel schemes? maybe e.g. a shorter
|
|
||||||
// long match would in some cases be preferable to a longer
|
|
||||||
// double shift match (for example, depending on length)?
|
|
||||||
int as_len = ad.len1 + ad.len2;
|
|
||||||
if (as_len >= best_len) {
|
|
||||||
DEBUG_PRINTF("Marking as best\n");
|
|
||||||
best_len = as_len;
|
|
||||||
best = ad;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// if we found at least one accel scheme, return it
|
|
||||||
if (best.state != STATE_INVALID) {
|
|
||||||
#ifdef DEBUG
|
|
||||||
DEBUG_PRINTF("Picked best multiaccel state:\n");
|
|
||||||
dumpMultiaccelState(best);
|
|
||||||
#endif
|
|
||||||
MultibyteAccelInfo info;
|
|
||||||
info.cr = cr;
|
|
||||||
info.offset = offset;
|
|
||||||
info.len1 = best.len1;
|
|
||||||
info.len2 = best.len2;
|
|
||||||
info.type = best.type;
|
|
||||||
return info;
|
|
||||||
}
|
|
||||||
return MultibyteAccelInfo();
|
|
||||||
}
|
|
@ -1,149 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIACCEL_DOUBLESHIFT_H_
|
|
||||||
#define MULTIACCEL_DOUBLESHIFT_H_
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
#define DOUBLESHIFT_MATCH(len, match_t, match_sz) \
|
|
||||||
static really_inline \
|
|
||||||
const u8 * JOIN4(doubleshiftMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\
|
|
||||||
if (unlikely(z)) { \
|
|
||||||
match_t tmp = z; \
|
|
||||||
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
|
|
||||||
tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \
|
|
||||||
VARISHIFT(z, z, len); \
|
|
||||||
VARISHIFT(tmp, tmp, len2); \
|
|
||||||
VARISHIFT(tmp, z, len); \
|
|
||||||
return JOIN(match, match_sz)(buf, z); \
|
|
||||||
} \
|
|
||||||
return NULL; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define DOUBLESHIFT_MATCH_32_DEF(n) \
|
|
||||||
DOUBLESHIFT_MATCH(n, u32, 32)
|
|
||||||
#define DOUBLESHIFT_MATCH_64_DEF(n) \
|
|
||||||
DOUBLESHIFT_MATCH(n, u64a, 64)
|
|
||||||
#define DOUBLESHIFT_MATCH_DEF(n) \
|
|
||||||
DOUBLESHIFT_MATCH_32_DEF(n) \
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(n)
|
|
||||||
|
|
||||||
DOUBLESHIFT_MATCH_DEF(1)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(2)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(3)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(4)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(5)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(6)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(7)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(8)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(9)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(10)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(11)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(12)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(13)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(14)
|
|
||||||
DOUBLESHIFT_MATCH_DEF(15)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(16)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(17)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(18)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(19)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(20)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(21)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(22)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(23)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(24)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(25)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(26)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(27)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(28)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(29)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(30)
|
|
||||||
DOUBLESHIFT_MATCH_64_DEF(31)
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 * (*doubleshift_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) =
|
|
||||||
{
|
|
||||||
// skip the first
|
|
||||||
0,
|
|
||||||
&doubleshiftMatch_32_1,
|
|
||||||
&doubleshiftMatch_32_2,
|
|
||||||
&doubleshiftMatch_32_3,
|
|
||||||
&doubleshiftMatch_32_4,
|
|
||||||
&doubleshiftMatch_32_5,
|
|
||||||
&doubleshiftMatch_32_6,
|
|
||||||
&doubleshiftMatch_32_7,
|
|
||||||
&doubleshiftMatch_32_8,
|
|
||||||
&doubleshiftMatch_32_9,
|
|
||||||
&doubleshiftMatch_32_10,
|
|
||||||
&doubleshiftMatch_32_11,
|
|
||||||
&doubleshiftMatch_32_12,
|
|
||||||
&doubleshiftMatch_32_13,
|
|
||||||
&doubleshiftMatch_32_14,
|
|
||||||
&doubleshiftMatch_32_15,
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 * (*doubleshift_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) =
|
|
||||||
{
|
|
||||||
// skip the first
|
|
||||||
0,
|
|
||||||
&doubleshiftMatch_64_1,
|
|
||||||
&doubleshiftMatch_64_2,
|
|
||||||
&doubleshiftMatch_64_3,
|
|
||||||
&doubleshiftMatch_64_4,
|
|
||||||
&doubleshiftMatch_64_5,
|
|
||||||
&doubleshiftMatch_64_6,
|
|
||||||
&doubleshiftMatch_64_7,
|
|
||||||
&doubleshiftMatch_64_8,
|
|
||||||
&doubleshiftMatch_64_9,
|
|
||||||
&doubleshiftMatch_64_10,
|
|
||||||
&doubleshiftMatch_64_11,
|
|
||||||
&doubleshiftMatch_64_12,
|
|
||||||
&doubleshiftMatch_64_13,
|
|
||||||
&doubleshiftMatch_64_14,
|
|
||||||
&doubleshiftMatch_64_15,
|
|
||||||
&doubleshiftMatch_64_16,
|
|
||||||
&doubleshiftMatch_64_17,
|
|
||||||
&doubleshiftMatch_64_18,
|
|
||||||
&doubleshiftMatch_64_19,
|
|
||||||
&doubleshiftMatch_64_20,
|
|
||||||
&doubleshiftMatch_64_21,
|
|
||||||
&doubleshiftMatch_64_22,
|
|
||||||
&doubleshiftMatch_64_23,
|
|
||||||
&doubleshiftMatch_64_24,
|
|
||||||
&doubleshiftMatch_64_25,
|
|
||||||
&doubleshiftMatch_64_26,
|
|
||||||
&doubleshiftMatch_64_27,
|
|
||||||
&doubleshiftMatch_64_28,
|
|
||||||
&doubleshiftMatch_64_29,
|
|
||||||
&doubleshiftMatch_64_30,
|
|
||||||
&doubleshiftMatch_64_31,
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* MULTIACCEL_DOUBLESHIFT_H_ */
|
|
@ -1,152 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIACCEL_DOUBLESHIFTGRAB_H_
|
|
||||||
#define MULTIACCEL_DOUBLESHIFTGRAB_H_
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
#define DOUBLESHIFTGRAB_MATCH(len, match_t, match_sz) \
|
|
||||||
static really_inline \
|
|
||||||
const u8 * JOIN4(doubleshiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\
|
|
||||||
if (unlikely(z)) { \
|
|
||||||
match_t neg = ~z; \
|
|
||||||
match_t tmp = z; \
|
|
||||||
z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \
|
|
||||||
tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \
|
|
||||||
neg |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
|
|
||||||
VARISHIFT(z, z, len); \
|
|
||||||
VARISHIFT(tmp, tmp, len2); \
|
|
||||||
VARISHIFT(neg, z, 1); \
|
|
||||||
VARISHIFT(tmp, z, len); \
|
|
||||||
return JOIN(match, match_sz)(buf, z); \
|
|
||||||
} \
|
|
||||||
return NULL; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define DOUBLESHIFTGRAB_MATCH_32_DEF(n) \
|
|
||||||
DOUBLESHIFTGRAB_MATCH(n, u32, 32)
|
|
||||||
#define DOUBLESHIFTGRAB_MATCH_64_DEF(n) \
|
|
||||||
DOUBLESHIFTGRAB_MATCH(n, u64a, 64)
|
|
||||||
#define DOUBLESHIFTGRAB_MATCH_DEF(n) \
|
|
||||||
DOUBLESHIFTGRAB_MATCH_32_DEF(n) \
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(n)
|
|
||||||
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(1)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(2)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(3)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(4)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(5)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(6)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(7)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(8)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(9)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(10)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(11)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(12)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(13)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(14)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_DEF(15)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(16)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(17)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(18)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(19)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(20)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(21)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(22)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(23)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(24)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(25)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(26)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(27)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(28)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(29)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(30)
|
|
||||||
DOUBLESHIFTGRAB_MATCH_64_DEF(31)
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 * (*doubleshiftgrab_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) =
|
|
||||||
{
|
|
||||||
// skip the first
|
|
||||||
0,
|
|
||||||
&doubleshiftgrabMatch_32_1,
|
|
||||||
&doubleshiftgrabMatch_32_2,
|
|
||||||
&doubleshiftgrabMatch_32_3,
|
|
||||||
&doubleshiftgrabMatch_32_4,
|
|
||||||
&doubleshiftgrabMatch_32_5,
|
|
||||||
&doubleshiftgrabMatch_32_6,
|
|
||||||
&doubleshiftgrabMatch_32_7,
|
|
||||||
&doubleshiftgrabMatch_32_8,
|
|
||||||
&doubleshiftgrabMatch_32_9,
|
|
||||||
&doubleshiftgrabMatch_32_10,
|
|
||||||
&doubleshiftgrabMatch_32_11,
|
|
||||||
&doubleshiftgrabMatch_32_12,
|
|
||||||
&doubleshiftgrabMatch_32_13,
|
|
||||||
&doubleshiftgrabMatch_32_14,
|
|
||||||
&doubleshiftgrabMatch_32_15,
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 * (*doubleshiftgrab_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) =
|
|
||||||
{
|
|
||||||
// skip the first
|
|
||||||
0,
|
|
||||||
&doubleshiftgrabMatch_64_1,
|
|
||||||
&doubleshiftgrabMatch_64_2,
|
|
||||||
&doubleshiftgrabMatch_64_3,
|
|
||||||
&doubleshiftgrabMatch_64_4,
|
|
||||||
&doubleshiftgrabMatch_64_5,
|
|
||||||
&doubleshiftgrabMatch_64_6,
|
|
||||||
&doubleshiftgrabMatch_64_7,
|
|
||||||
&doubleshiftgrabMatch_64_8,
|
|
||||||
&doubleshiftgrabMatch_64_9,
|
|
||||||
&doubleshiftgrabMatch_64_10,
|
|
||||||
&doubleshiftgrabMatch_64_11,
|
|
||||||
&doubleshiftgrabMatch_64_12,
|
|
||||||
&doubleshiftgrabMatch_64_13,
|
|
||||||
&doubleshiftgrabMatch_64_14,
|
|
||||||
&doubleshiftgrabMatch_64_15,
|
|
||||||
&doubleshiftgrabMatch_64_16,
|
|
||||||
&doubleshiftgrabMatch_64_17,
|
|
||||||
&doubleshiftgrabMatch_64_18,
|
|
||||||
&doubleshiftgrabMatch_64_19,
|
|
||||||
&doubleshiftgrabMatch_64_20,
|
|
||||||
&doubleshiftgrabMatch_64_21,
|
|
||||||
&doubleshiftgrabMatch_64_22,
|
|
||||||
&doubleshiftgrabMatch_64_23,
|
|
||||||
&doubleshiftgrabMatch_64_24,
|
|
||||||
&doubleshiftgrabMatch_64_25,
|
|
||||||
&doubleshiftgrabMatch_64_26,
|
|
||||||
&doubleshiftgrabMatch_64_27,
|
|
||||||
&doubleshiftgrabMatch_64_28,
|
|
||||||
&doubleshiftgrabMatch_64_29,
|
|
||||||
&doubleshiftgrabMatch_64_30,
|
|
||||||
&doubleshiftgrabMatch_64_31,
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* MULTIACCEL_DOUBLESHIFTGRAB_H_ */
|
|
@ -1,145 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIACCEL_LONG_H_
|
|
||||||
#define MULTIACCEL_LONG_H_
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
#define LONG_MATCH(len, match_t, match_sz) \
|
|
||||||
static really_inline \
|
|
||||||
const u8 * JOIN4(longMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \
|
|
||||||
if (unlikely(z)) { \
|
|
||||||
z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \
|
|
||||||
JOIN(SHIFT, len)(z); \
|
|
||||||
return JOIN(match, match_sz)(buf, z); \
|
|
||||||
} \
|
|
||||||
return NULL; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define LONG_MATCH_32_DEF(n) \
|
|
||||||
LONG_MATCH(n, u32, 32)
|
|
||||||
#define LONG_MATCH_64_DEF(n) \
|
|
||||||
LONG_MATCH(n, u64a, 64)
|
|
||||||
#define LONG_MATCH_DEF(n) \
|
|
||||||
LONG_MATCH_32_DEF(n) \
|
|
||||||
LONG_MATCH_64_DEF(n)
|
|
||||||
|
|
||||||
LONG_MATCH_DEF(1)
|
|
||||||
LONG_MATCH_DEF(2)
|
|
||||||
LONG_MATCH_DEF(3)
|
|
||||||
LONG_MATCH_DEF(4)
|
|
||||||
LONG_MATCH_DEF(5)
|
|
||||||
LONG_MATCH_DEF(6)
|
|
||||||
LONG_MATCH_DEF(7)
|
|
||||||
LONG_MATCH_DEF(8)
|
|
||||||
LONG_MATCH_DEF(9)
|
|
||||||
LONG_MATCH_DEF(10)
|
|
||||||
LONG_MATCH_DEF(11)
|
|
||||||
LONG_MATCH_DEF(12)
|
|
||||||
LONG_MATCH_DEF(13)
|
|
||||||
LONG_MATCH_DEF(14)
|
|
||||||
LONG_MATCH_DEF(15)
|
|
||||||
LONG_MATCH_64_DEF(16)
|
|
||||||
LONG_MATCH_64_DEF(17)
|
|
||||||
LONG_MATCH_64_DEF(18)
|
|
||||||
LONG_MATCH_64_DEF(19)
|
|
||||||
LONG_MATCH_64_DEF(20)
|
|
||||||
LONG_MATCH_64_DEF(21)
|
|
||||||
LONG_MATCH_64_DEF(22)
|
|
||||||
LONG_MATCH_64_DEF(23)
|
|
||||||
LONG_MATCH_64_DEF(24)
|
|
||||||
LONG_MATCH_64_DEF(25)
|
|
||||||
LONG_MATCH_64_DEF(26)
|
|
||||||
LONG_MATCH_64_DEF(27)
|
|
||||||
LONG_MATCH_64_DEF(28)
|
|
||||||
LONG_MATCH_64_DEF(29)
|
|
||||||
LONG_MATCH_64_DEF(30)
|
|
||||||
LONG_MATCH_64_DEF(31)
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 *(*long_match_funcs_32[])(const u8 *buf, u32 z) =
|
|
||||||
{
|
|
||||||
// skip the first three
|
|
||||||
0,
|
|
||||||
&longMatch_32_1,
|
|
||||||
&longMatch_32_2,
|
|
||||||
&longMatch_32_3,
|
|
||||||
&longMatch_32_4,
|
|
||||||
&longMatch_32_5,
|
|
||||||
&longMatch_32_6,
|
|
||||||
&longMatch_32_7,
|
|
||||||
&longMatch_32_8,
|
|
||||||
&longMatch_32_9,
|
|
||||||
&longMatch_32_10,
|
|
||||||
&longMatch_32_11,
|
|
||||||
&longMatch_32_12,
|
|
||||||
&longMatch_32_13,
|
|
||||||
&longMatch_32_14,
|
|
||||||
&longMatch_32_15,
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 *(*long_match_funcs_64[])(const u8 *buf, u64a z) =
|
|
||||||
{
|
|
||||||
// skip the first three
|
|
||||||
0,
|
|
||||||
&longMatch_64_1,
|
|
||||||
&longMatch_64_2,
|
|
||||||
&longMatch_64_3,
|
|
||||||
&longMatch_64_4,
|
|
||||||
&longMatch_64_5,
|
|
||||||
&longMatch_64_6,
|
|
||||||
&longMatch_64_7,
|
|
||||||
&longMatch_64_8,
|
|
||||||
&longMatch_64_9,
|
|
||||||
&longMatch_64_10,
|
|
||||||
&longMatch_64_11,
|
|
||||||
&longMatch_64_12,
|
|
||||||
&longMatch_64_13,
|
|
||||||
&longMatch_64_14,
|
|
||||||
&longMatch_64_15,
|
|
||||||
&longMatch_64_16,
|
|
||||||
&longMatch_64_17,
|
|
||||||
&longMatch_64_18,
|
|
||||||
&longMatch_64_19,
|
|
||||||
&longMatch_64_20,
|
|
||||||
&longMatch_64_21,
|
|
||||||
&longMatch_64_22,
|
|
||||||
&longMatch_64_23,
|
|
||||||
&longMatch_64_24,
|
|
||||||
&longMatch_64_25,
|
|
||||||
&longMatch_64_26,
|
|
||||||
&longMatch_64_27,
|
|
||||||
&longMatch_64_28,
|
|
||||||
&longMatch_64_29,
|
|
||||||
&longMatch_64_30,
|
|
||||||
&longMatch_64_31,
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* MULTIACCEL_LONG_H_ */
|
|
@ -1,148 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2015, Intel Corporation
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice,
|
|
||||||
* this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
* may be used to endorse or promote products derived from this software
|
|
||||||
* without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MULTIACCEL_LONGGRAB_H_
|
|
||||||
#define MULTIACCEL_LONGGRAB_H_
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
|
||||||
|
|
||||||
#define LONGGRAB_MATCH(len, match_t, match_sz) \
|
|
||||||
static really_inline \
|
|
||||||
const u8 * JOIN4(longgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \
|
|
||||||
if (unlikely(z)) { \
|
|
||||||
match_t tmp = ~z; \
|
|
||||||
tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \
|
|
||||||
z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \
|
|
||||||
JOIN(SHIFT, len)(z); \
|
|
||||||
VARISHIFT(tmp, z, len); \
|
|
||||||
return JOIN(match, match_sz)(buf, z); \
|
|
||||||
} \
|
|
||||||
return NULL; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define LONGGRAB_MATCH_32_DEF(n) \
|
|
||||||
LONGGRAB_MATCH(n, u32, 32)
|
|
||||||
#define LONGGRAB_MATCH_64_DEF(n) \
|
|
||||||
LONGGRAB_MATCH(n, u64a, 64)
|
|
||||||
#define LONGGRAB_MATCH_DEF(n) \
|
|
||||||
LONGGRAB_MATCH_32_DEF(n) \
|
|
||||||
LONGGRAB_MATCH_64_DEF(n)
|
|
||||||
|
|
||||||
LONGGRAB_MATCH_DEF(1)
|
|
||||||
LONGGRAB_MATCH_DEF(2)
|
|
||||||
LONGGRAB_MATCH_DEF(3)
|
|
||||||
LONGGRAB_MATCH_DEF(4)
|
|
||||||
LONGGRAB_MATCH_DEF(5)
|
|
||||||
LONGGRAB_MATCH_DEF(6)
|
|
||||||
LONGGRAB_MATCH_DEF(7)
|
|
||||||
LONGGRAB_MATCH_DEF(8)
|
|
||||||
LONGGRAB_MATCH_DEF(9)
|
|
||||||
LONGGRAB_MATCH_DEF(10)
|
|
||||||
LONGGRAB_MATCH_DEF(11)
|
|
||||||
LONGGRAB_MATCH_DEF(12)
|
|
||||||
LONGGRAB_MATCH_DEF(13)
|
|
||||||
LONGGRAB_MATCH_DEF(14)
|
|
||||||
LONGGRAB_MATCH_DEF(15)
|
|
||||||
LONGGRAB_MATCH_64_DEF(16)
|
|
||||||
LONGGRAB_MATCH_64_DEF(17)
|
|
||||||
LONGGRAB_MATCH_64_DEF(18)
|
|
||||||
LONGGRAB_MATCH_64_DEF(19)
|
|
||||||
LONGGRAB_MATCH_64_DEF(20)
|
|
||||||
LONGGRAB_MATCH_64_DEF(21)
|
|
||||||
LONGGRAB_MATCH_64_DEF(22)
|
|
||||||
LONGGRAB_MATCH_64_DEF(23)
|
|
||||||
LONGGRAB_MATCH_64_DEF(24)
|
|
||||||
LONGGRAB_MATCH_64_DEF(25)
|
|
||||||
LONGGRAB_MATCH_64_DEF(26)
|
|
||||||
LONGGRAB_MATCH_64_DEF(27)
|
|
||||||
LONGGRAB_MATCH_64_DEF(28)
|
|
||||||
LONGGRAB_MATCH_64_DEF(29)
|
|
||||||
LONGGRAB_MATCH_64_DEF(30)
|
|
||||||
LONGGRAB_MATCH_64_DEF(31)
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 *(*longgrab_match_funcs_32[])(const u8 *buf, u32 z) =
|
|
||||||
{
|
|
||||||
// skip the first three
|
|
||||||
0,
|
|
||||||
&longgrabMatch_32_1,
|
|
||||||
&longgrabMatch_32_2,
|
|
||||||
&longgrabMatch_32_3,
|
|
||||||
&longgrabMatch_32_4,
|
|
||||||
&longgrabMatch_32_5,
|
|
||||||
&longgrabMatch_32_6,
|
|
||||||
&longgrabMatch_32_7,
|
|
||||||
&longgrabMatch_32_8,
|
|
||||||
&longgrabMatch_32_9,
|
|
||||||
&longgrabMatch_32_10,
|
|
||||||
&longgrabMatch_32_11,
|
|
||||||
&longgrabMatch_32_12,
|
|
||||||
&longgrabMatch_32_13,
|
|
||||||
&longgrabMatch_32_14,
|
|
||||||
&longgrabMatch_32_15,
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
const UNUSED u8 *(*longgrab_match_funcs_64[])(const u8 *buf, u64a z) =
|
|
||||||
{
|
|
||||||
// skip the first three
|
|
||||||
0,
|
|
||||||
&longgrabMatch_64_1,
|
|
||||||
&longgrabMatch_64_2,
|
|
||||||
&longgrabMatch_64_3,
|
|
||||||
&longgrabMatch_64_4,
|
|
||||||
&longgrabMatch_64_5,
|
|
||||||
&longgrabMatch_64_6,
|
|
||||||
&longgrabMatch_64_7,
|
|
||||||
&longgrabMatch_64_8,
|
|
||||||
&longgrabMatch_64_9,
|
|
||||||
&longgrabMatch_64_10,
|
|
||||||
&longgrabMatch_64_11,
|
|
||||||
&longgrabMatch_64_12,
|
|
||||||
&longgrabMatch_64_13,
|
|
||||||
&longgrabMatch_64_14,
|
|
||||||
&longgrabMatch_64_15,
|
|
||||||
&longgrabMatch_64_16,
|
|
||||||
&longgrabMatch_64_17,
|
|
||||||
&longgrabMatch_64_18,
|
|
||||||
&longgrabMatch_64_19,
|
|
||||||
&longgrabMatch_64_20,
|
|
||||||
&longgrabMatch_64_21,
|
|
||||||
&longgrabMatch_64_22,
|
|
||||||
&longgrabMatch_64_23,
|
|
||||||
&longgrabMatch_64_24,
|
|
||||||
&longgrabMatch_64_25,
|
|
||||||
&longgrabMatch_64_26,
|
|
||||||
&longgrabMatch_64_27,
|
|
||||||
&longgrabMatch_64_28,
|
|
||||||
&longgrabMatch_64_29,
|
|
||||||
&longgrabMatch_64_30,
|
|
||||||
&longgrabMatch_64_31,
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* MULTIACCEL_LONGGRAB_H_ */
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user