mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch develop into master
This commit is contained in:
commit
bf99ad00eb
30
CHANGELOG.md
30
CHANGELOG.md
@ -2,6 +2,36 @@
|
|||||||
|
|
||||||
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
||||||
|
|
||||||
|
## [4.3.0] 2016-08-24
|
||||||
|
- Introduce a new analysis pass ("Violet") used for decomposition of patterns
|
||||||
|
into literals and smaller engines.
|
||||||
|
- Introduce a new container engine ("Tamarama") for infix and suffix engines
|
||||||
|
that can be proven to run exclusively of one another. This reduces stream
|
||||||
|
state for pattern sets with many such engines.
|
||||||
|
- Introduce a new shuffle-based DFA engine ("Sheng"). This improves scanning
|
||||||
|
performance for pattern sets where small engines are generated.
|
||||||
|
- Improve the analysis used to extract extra mask information from short
|
||||||
|
literals.
|
||||||
|
- Reduced compile time spent in equivalence class analysis.
|
||||||
|
- Build: frame pointers are now only omitted for 32-bit release builds.
|
||||||
|
- Build: Workaround for C++ issues reported on FreeBSD/libc++ platforms.
|
||||||
|
(github issue #27)
|
||||||
|
- Simplify the LimEx NFA with a unified "variable shift" model, which reduces
|
||||||
|
the number of different NFA code paths to one per model size.
|
||||||
|
- Allow some anchored prefixes that may squash the literal to which they are
|
||||||
|
attached to run eagerly. This improves scanning performance for some
|
||||||
|
patterns.
|
||||||
|
- Simplify and improve EOD ("end of data") matching, using the interpreter for
|
||||||
|
all operations.
|
||||||
|
- Elide unnecessary instructions in the Rose interpreter at compile time.
|
||||||
|
- Reduce the number of inlined instantiations of the Rose interpreter in order
|
||||||
|
to reduce instruction cache pressure.
|
||||||
|
- Small improvements to literal matcher acceleration.
|
||||||
|
- Parser: ignore `\E` metacharacters that are not preceded by `\Q`. This
|
||||||
|
conforms to PCRE's behaviour, rather than returning a compile error.
|
||||||
|
- Check for misaligned memory when allocating an error structure in Hyperscan's
|
||||||
|
compile path and return an appropriate error if detected.
|
||||||
|
|
||||||
## [4.2.0] 2016-05-31
|
## [4.2.0] 2016-05-31
|
||||||
- Introduce an interpreter for many complex actions to replace the use of
|
- Introduce an interpreter for many complex actions to replace the use of
|
||||||
internal reports within the core of Hyperscan (the "Rose" engine). This
|
internal reports within the core of Hyperscan (the "Rose" engine). This
|
||||||
|
153
CMakeLists.txt
153
CMakeLists.txt
@ -1,12 +1,18 @@
|
|||||||
cmake_minimum_required (VERSION 2.8.11)
|
cmake_minimum_required (VERSION 2.8.11)
|
||||||
|
|
||||||
|
# don't use the built-in default configs
|
||||||
|
set (CMAKE_NOT_USING_CONFIG_FLAGS TRUE)
|
||||||
|
|
||||||
project (Hyperscan C CXX)
|
project (Hyperscan C CXX)
|
||||||
|
|
||||||
set (HS_MAJOR_VERSION 4)
|
set (HS_MAJOR_VERSION 4)
|
||||||
set (HS_MINOR_VERSION 2)
|
set (HS_MINOR_VERSION 3)
|
||||||
set (HS_PATCH_VERSION 0)
|
set (HS_PATCH_VERSION 0)
|
||||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||||
|
|
||||||
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
|
# since we are doing this manually, we only have three types
|
||||||
|
set (CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo"
|
||||||
|
CACHE STRING "" FORCE)
|
||||||
|
|
||||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||||
include(CheckCCompilerFlag)
|
include(CheckCCompilerFlag)
|
||||||
@ -24,7 +30,7 @@ find_package(PkgConfig QUIET)
|
|||||||
|
|
||||||
if (NOT CMAKE_BUILD_TYPE)
|
if (NOT CMAKE_BUILD_TYPE)
|
||||||
message(STATUS "Default build type 'Release with debug info'")
|
message(STATUS "Default build type 'Release with debug info'")
|
||||||
set(CMAKE_BUILD_TYPE "RELWITHDEBINFO")
|
set(CMAKE_BUILD_TYPE RELWITHDEBINFO CACHE STRING "" FORCE )
|
||||||
else()
|
else()
|
||||||
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
|
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
|
||||||
message(STATUS "Build type ${CMAKE_BUILD_TYPE}")
|
message(STATUS "Build type ${CMAKE_BUILD_TYPE}")
|
||||||
@ -90,6 +96,18 @@ else()
|
|||||||
message(FATAL_ERROR "No python interpreter found")
|
message(FATAL_ERROR "No python interpreter found")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# allow for reproducible builds - python for portability
|
||||||
|
if (DEFINED ENV{SOURCE_DATE_EPOCH})
|
||||||
|
execute_process(
|
||||||
|
COMMAND "${PYTHON}" "${CMAKE_MODULE_PATH}/formatdate.py" "$ENV{SOURCE_DATE_EPOCH}"
|
||||||
|
OUTPUT_VARIABLE BUILD_DATE
|
||||||
|
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
else ()
|
||||||
|
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
|
||||||
|
endif ()
|
||||||
|
message(STATUS "Build date: ${BUILD_DATE}")
|
||||||
|
|
||||||
|
|
||||||
if(${RAGEL} STREQUAL "RAGEL-NOTFOUND")
|
if(${RAGEL} STREQUAL "RAGEL-NOTFOUND")
|
||||||
message(FATAL_ERROR "Ragel state machine compiler not found")
|
message(FATAL_ERROR "Ragel state machine compiler not found")
|
||||||
endif()
|
endif()
|
||||||
@ -121,13 +139,7 @@ endif()
|
|||||||
|
|
||||||
CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in release builds" ON "NOT RELEASE_BUILD" OFF)
|
CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in release builds" ON "NOT RELEASE_BUILD" OFF)
|
||||||
|
|
||||||
option(DISABLE_ASSERTS "Disable assert(); enabled in debug builds, disabled in release builds" FALSE)
|
CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON)
|
||||||
|
|
||||||
if (DISABLE_ASSERTS)
|
|
||||||
if (CMAKE_BUILD_TYPE STREQUAL "DEBUG")
|
|
||||||
add_definitions(-DNDEBUG)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF)
|
option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF)
|
||||||
|
|
||||||
@ -139,18 +151,26 @@ if(MSVC OR MSVC_IDE)
|
|||||||
if (MSVC_VERSION LESS 1700)
|
if (MSVC_VERSION LESS 1700)
|
||||||
message(FATAL_ERROR "The project requires C++11 features.")
|
message(FATAL_ERROR "The project requires C++11 features.")
|
||||||
else()
|
else()
|
||||||
|
# set base flags
|
||||||
|
set(CMAKE_C_FLAGS "/DWIN32 /D_WINDOWS /W3")
|
||||||
|
set(CMAKE_C_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od")
|
||||||
|
set(CMAKE_C_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi")
|
||||||
|
set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi")
|
||||||
|
|
||||||
|
set(CMAKE_CXX_FLAGS "/DWIN32 /D_WINDOWS /W3 /GR /EHsc")
|
||||||
|
set(CMAKE_CXX_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od")
|
||||||
|
set(CMAKE_CXX_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi")
|
||||||
|
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi")
|
||||||
|
|
||||||
if (WINDOWS_ICC)
|
if (WINDOWS_ICC)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /O3 /wd4267 /Qdiag-disable:remark")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /O2 /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
||||||
else()
|
else()
|
||||||
#TODO: don't hardcode arch
|
#TODO: don't hardcode arch
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /O2 /wd4267")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /wd4267")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /O2 /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
||||||
endif()
|
endif()
|
||||||
string(REGEX REPLACE "/RTC1" ""
|
|
||||||
CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" )
|
|
||||||
string(REGEX REPLACE "/RTC1" ""
|
|
||||||
CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}" )
|
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@ -172,16 +192,34 @@ else()
|
|||||||
unset(_GXX_OUTPUT)
|
unset(_GXX_OUTPUT)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# set compiler flags - more are tested and added later
|
if(OPTIMISE)
|
||||||
set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual")
|
set(OPT_C_FLAG "-O3")
|
||||||
set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wno-shadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor")
|
set(OPT_CXX_FLAG "-O2")
|
||||||
if (NOT RELEASE_BUILD)
|
else()
|
||||||
# -Werror is most useful during development, don't potentially break
|
set(OPT_C_FLAG "-O0")
|
||||||
# release builds
|
set(OPT_CXX_FLAG "-O0")
|
||||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")
|
endif(OPTIMISE)
|
||||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
|
|
||||||
|
# set up base flags for build types
|
||||||
|
set(CMAKE_C_FLAGS_DEBUG "-g ${OPT_C_FLAG} -Werror")
|
||||||
|
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-g ${OPT_C_FLAG}")
|
||||||
|
set(CMAKE_C_FLAGS_RELEASE "${OPT_C_FLAG}")
|
||||||
|
|
||||||
|
set(CMAKE_CXX_FLAGS_DEBUG "-g ${OPT_CXX_FLAG} -Werror")
|
||||||
|
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g ${OPT_CXX_FLAG}")
|
||||||
|
set(CMAKE_CXX_FLAGS_RELEASE "${OPT_CXX_FLAG}")
|
||||||
|
|
||||||
|
if (DISABLE_ASSERTS)
|
||||||
|
# usually true for release builds, false for debug
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNDEBUG")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
# set compiler flags - more are tested and added later
|
||||||
|
set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
|
||||||
|
set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing")
|
||||||
|
|
||||||
if (NOT CMAKE_C_FLAGS MATCHES .*march.*)
|
if (NOT CMAKE_C_FLAGS MATCHES .*march.*)
|
||||||
message(STATUS "Building for current host CPU")
|
message(STATUS "Building for current host CPU")
|
||||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -march=native -mtune=native")
|
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -march=native -mtune=native")
|
||||||
@ -199,15 +237,7 @@ else()
|
|||||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0 -Wno-unused-local-typedefs -Wno-maybe-uninitialized")
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0 -Wno-unused-local-typedefs -Wno-maybe-uninitialized")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(OPTIMISE)
|
if (NOT(ARCH_IA32 AND RELEASE_BUILD))
|
||||||
set(EXTRA_C_FLAGS "-O3 ${EXTRA_C_FLAGS}")
|
|
||||||
set(EXTRA_CXX_FLAGS "-O2 ${EXTRA_CXX_FLAGS}")
|
|
||||||
else()
|
|
||||||
set(EXTRA_C_FLAGS "-O0 ${EXTRA_C_FLAGS}")
|
|
||||||
set(EXTRA_CXX_FLAGS "-O0 ${EXTRA_CXX_FLAGS}")
|
|
||||||
endif(OPTIMISE)
|
|
||||||
|
|
||||||
if(NOT RELEASE_BUILD)
|
|
||||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer")
|
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer")
|
||||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
|
||||||
endif()
|
endif()
|
||||||
@ -297,6 +327,11 @@ if (CXX_UNUSED_CONST_VAR)
|
|||||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# gcc 6 complains about type attributes that get ignored, like alignment
|
||||||
|
CHECK_CXX_COMPILER_FLAG("-Wignored-attributes" CXX_IGNORED_ATTR)
|
||||||
|
if (CXX_IGNORED_ATTR)
|
||||||
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-ignored-attributes")
|
||||||
|
endif()
|
||||||
|
|
||||||
# note this for later
|
# note this for later
|
||||||
# g++ doesn't have this flag but clang does
|
# g++ doesn't have this flag but clang does
|
||||||
@ -438,15 +473,14 @@ set (hs_exec_SRCS
|
|||||||
src/nfa/limex_simd128.c
|
src/nfa/limex_simd128.c
|
||||||
src/nfa/limex_simd256.c
|
src/nfa/limex_simd256.c
|
||||||
src/nfa/limex_simd384.c
|
src/nfa/limex_simd384.c
|
||||||
src/nfa/limex_simd512a.c
|
src/nfa/limex_simd512.c
|
||||||
src/nfa/limex_simd512b.c
|
|
||||||
src/nfa/limex_simd512c.c
|
|
||||||
src/nfa/limex.h
|
src/nfa/limex.h
|
||||||
src/nfa/limex_common_impl.h
|
src/nfa/limex_common_impl.h
|
||||||
src/nfa/limex_context.h
|
src/nfa/limex_context.h
|
||||||
src/nfa/limex_internal.h
|
src/nfa/limex_internal.h
|
||||||
src/nfa/limex_runtime.h
|
src/nfa/limex_runtime.h
|
||||||
src/nfa/limex_runtime_impl.h
|
src/nfa/limex_runtime_impl.h
|
||||||
|
src/nfa/limex_shuffle.h
|
||||||
src/nfa/limex_state_impl.h
|
src/nfa/limex_state_impl.h
|
||||||
src/nfa/mpv.h
|
src/nfa/mpv.h
|
||||||
src/nfa/mpv.c
|
src/nfa/mpv.c
|
||||||
@ -477,9 +511,18 @@ set (hs_exec_SRCS
|
|||||||
src/nfa/repeat.c
|
src/nfa/repeat.c
|
||||||
src/nfa/repeat.h
|
src/nfa/repeat.h
|
||||||
src/nfa/repeat_internal.h
|
src/nfa/repeat_internal.h
|
||||||
|
src/nfa/sheng.c
|
||||||
|
src/nfa/sheng.h
|
||||||
|
src/nfa/sheng_defs.h
|
||||||
|
src/nfa/sheng_impl.h
|
||||||
|
src/nfa/sheng_impl4.h
|
||||||
|
src/nfa/sheng_internal.h
|
||||||
src/nfa/shufti_common.h
|
src/nfa/shufti_common.h
|
||||||
src/nfa/shufti.c
|
src/nfa/shufti.c
|
||||||
src/nfa/shufti.h
|
src/nfa/shufti.h
|
||||||
|
src/nfa/tamarama.c
|
||||||
|
src/nfa/tamarama.h
|
||||||
|
src/nfa/tamarama_internal.h
|
||||||
src/nfa/truffle_common.h
|
src/nfa/truffle_common.h
|
||||||
src/nfa/truffle.c
|
src/nfa/truffle.c
|
||||||
src/nfa/truffle.h
|
src/nfa/truffle.h
|
||||||
@ -495,7 +538,6 @@ set (hs_exec_SRCS
|
|||||||
src/rose/block.c
|
src/rose/block.c
|
||||||
src/rose/catchup.h
|
src/rose/catchup.h
|
||||||
src/rose/catchup.c
|
src/rose/catchup.c
|
||||||
src/rose/eod.c
|
|
||||||
src/rose/infix.h
|
src/rose/infix.h
|
||||||
src/rose/init.h
|
src/rose/init.h
|
||||||
src/rose/init.c
|
src/rose/init.c
|
||||||
@ -503,6 +545,7 @@ set (hs_exec_SRCS
|
|||||||
src/rose/match.h
|
src/rose/match.h
|
||||||
src/rose/match.c
|
src/rose/match.c
|
||||||
src/rose/miracle.h
|
src/rose/miracle.h
|
||||||
|
src/rose/program_runtime.c
|
||||||
src/rose/program_runtime.h
|
src/rose/program_runtime.h
|
||||||
src/rose/runtime.h
|
src/rose/runtime.h
|
||||||
src/rose/rose.h
|
src/rose/rose.h
|
||||||
@ -510,6 +553,7 @@ set (hs_exec_SRCS
|
|||||||
src/rose/rose_program.h
|
src/rose/rose_program.h
|
||||||
src/rose/rose_types.h
|
src/rose/rose_types.h
|
||||||
src/rose/rose_common.h
|
src/rose/rose_common.h
|
||||||
|
src/rose/validate_mask.h
|
||||||
src/util/bitutils.h
|
src/util/bitutils.h
|
||||||
src/util/exhaust.h
|
src/util/exhaust.h
|
||||||
src/util/fatbit.h
|
src/util/fatbit.h
|
||||||
@ -524,11 +568,8 @@ set (hs_exec_SRCS
|
|||||||
src/util/pqueue.h
|
src/util/pqueue.h
|
||||||
src/util/scatter.h
|
src/util/scatter.h
|
||||||
src/util/scatter_runtime.h
|
src/util/scatter_runtime.h
|
||||||
src/util/shuffle.h
|
|
||||||
src/util/shuffle_ssse3.h
|
|
||||||
src/util/simd_utils.h
|
src/util/simd_utils.h
|
||||||
src/util/simd_utils_ssse3.h
|
src/util/simd_utils.c
|
||||||
src/util/simd_utils_ssse3.c
|
|
||||||
src/util/state_compress.h
|
src/util/state_compress.h
|
||||||
src/util/state_compress.c
|
src/util/state_compress.c
|
||||||
src/util/unaligned.h
|
src/util/unaligned.h
|
||||||
@ -597,11 +638,15 @@ SET (hs_SRCS
|
|||||||
src/hwlm/noodle_build.h
|
src/hwlm/noodle_build.h
|
||||||
src/hwlm/noodle_internal.h
|
src/hwlm/noodle_internal.h
|
||||||
src/nfa/accel.h
|
src/nfa/accel.h
|
||||||
|
src/nfa/accel_dfa_build_strat.cpp
|
||||||
|
src/nfa/accel_dfa_build_strat.h
|
||||||
src/nfa/accelcompile.cpp
|
src/nfa/accelcompile.cpp
|
||||||
src/nfa/accelcompile.h
|
src/nfa/accelcompile.h
|
||||||
src/nfa/callback.h
|
src/nfa/callback.h
|
||||||
src/nfa/castlecompile.cpp
|
src/nfa/castlecompile.cpp
|
||||||
src/nfa/castlecompile.h
|
src/nfa/castlecompile.h
|
||||||
|
src/nfa/dfa_build_strat.cpp
|
||||||
|
src/nfa/dfa_build_strat.h
|
||||||
src/nfa/dfa_min.cpp
|
src/nfa/dfa_min.cpp
|
||||||
src/nfa/dfa_min.h
|
src/nfa/dfa_min.h
|
||||||
src/nfa/goughcompile.cpp
|
src/nfa/goughcompile.cpp
|
||||||
@ -613,8 +658,6 @@ SET (hs_SRCS
|
|||||||
src/nfa/mcclellan_internal.h
|
src/nfa/mcclellan_internal.h
|
||||||
src/nfa/mcclellancompile.cpp
|
src/nfa/mcclellancompile.cpp
|
||||||
src/nfa/mcclellancompile.h
|
src/nfa/mcclellancompile.h
|
||||||
src/nfa/mcclellancompile_accel.cpp
|
|
||||||
src/nfa/mcclellancompile_accel.h
|
|
||||||
src/nfa/mcclellancompile_util.cpp
|
src/nfa/mcclellancompile_util.cpp
|
||||||
src/nfa/mcclellancompile_util.h
|
src/nfa/mcclellancompile_util.h
|
||||||
src/nfa/limex_compile.cpp
|
src/nfa/limex_compile.cpp
|
||||||
@ -639,8 +682,13 @@ SET (hs_SRCS
|
|||||||
src/nfa/repeat_internal.h
|
src/nfa/repeat_internal.h
|
||||||
src/nfa/repeatcompile.cpp
|
src/nfa/repeatcompile.cpp
|
||||||
src/nfa/repeatcompile.h
|
src/nfa/repeatcompile.h
|
||||||
|
src/nfa/sheng_internal.h
|
||||||
|
src/nfa/shengcompile.cpp
|
||||||
|
src/nfa/shengcompile.h
|
||||||
src/nfa/shufticompile.cpp
|
src/nfa/shufticompile.cpp
|
||||||
src/nfa/shufticompile.h
|
src/nfa/shufticompile.h
|
||||||
|
src/nfa/tamaramacompile.cpp
|
||||||
|
src/nfa/tamaramacompile.h
|
||||||
src/nfa/trufflecompile.cpp
|
src/nfa/trufflecompile.cpp
|
||||||
src/nfa/trufflecompile.h
|
src/nfa/trufflecompile.h
|
||||||
src/nfagraph/ng.cpp
|
src/nfagraph/ng.cpp
|
||||||
@ -746,6 +794,8 @@ SET (hs_SRCS
|
|||||||
src/nfagraph/ng_util.h
|
src/nfagraph/ng_util.h
|
||||||
src/nfagraph/ng_vacuous.cpp
|
src/nfagraph/ng_vacuous.cpp
|
||||||
src/nfagraph/ng_vacuous.h
|
src/nfagraph/ng_vacuous.h
|
||||||
|
src/nfagraph/ng_violet.cpp
|
||||||
|
src/nfagraph/ng_violet.h
|
||||||
src/nfagraph/ng_width.cpp
|
src/nfagraph/ng_width.cpp
|
||||||
src/nfagraph/ng_width.h
|
src/nfagraph/ng_width.h
|
||||||
src/parser/AsciiComponentClass.cpp
|
src/parser/AsciiComponentClass.cpp
|
||||||
@ -825,6 +875,10 @@ SET (hs_SRCS
|
|||||||
src/rose/rose_build_compile.cpp
|
src/rose/rose_build_compile.cpp
|
||||||
src/rose/rose_build_convert.cpp
|
src/rose/rose_build_convert.cpp
|
||||||
src/rose/rose_build_convert.h
|
src/rose/rose_build_convert.h
|
||||||
|
src/rose/rose_build_exclusive.cpp
|
||||||
|
src/rose/rose_build_exclusive.h
|
||||||
|
src/rose/rose_build_groups.cpp
|
||||||
|
src/rose/rose_build_groups.h
|
||||||
src/rose/rose_build_impl.h
|
src/rose/rose_build_impl.h
|
||||||
src/rose/rose_build_infix.cpp
|
src/rose/rose_build_infix.cpp
|
||||||
src/rose/rose_build_infix.h
|
src/rose/rose_build_infix.h
|
||||||
@ -853,6 +907,8 @@ SET (hs_SRCS
|
|||||||
src/util/charreach.cpp
|
src/util/charreach.cpp
|
||||||
src/util/charreach.h
|
src/util/charreach.h
|
||||||
src/util/charreach_util.h
|
src/util/charreach_util.h
|
||||||
|
src/util/clique.cpp
|
||||||
|
src/util/clique.h
|
||||||
src/util/compare.h
|
src/util/compare.h
|
||||||
src/util/compile_context.cpp
|
src/util/compile_context.cpp
|
||||||
src/util/compile_context.h
|
src/util/compile_context.h
|
||||||
@ -878,7 +934,6 @@ SET (hs_SRCS
|
|||||||
src/util/report_manager.cpp
|
src/util/report_manager.cpp
|
||||||
src/util/report_manager.h
|
src/util/report_manager.h
|
||||||
src/util/simd_utils.h
|
src/util/simd_utils.h
|
||||||
src/util/simd_utils_ssse3.h
|
|
||||||
src/util/target_info.cpp
|
src/util/target_info.cpp
|
||||||
src/util/target_info.h
|
src/util/target_info.h
|
||||||
src/util/ue2_containers.h
|
src/util/ue2_containers.h
|
||||||
@ -916,6 +971,10 @@ set(hs_dump_SRCS
|
|||||||
src/nfa/nfa_dump_dispatch.cpp
|
src/nfa/nfa_dump_dispatch.cpp
|
||||||
src/nfa/nfa_dump_internal.cpp
|
src/nfa/nfa_dump_internal.cpp
|
||||||
src/nfa/nfa_dump_internal.h
|
src/nfa/nfa_dump_internal.h
|
||||||
|
src/nfa/shengdump.cpp
|
||||||
|
src/nfa/shengdump.h
|
||||||
|
src/nfa/tamarama_dump.cpp
|
||||||
|
src/nfa/tamarama_dump.h
|
||||||
src/parser/dump.cpp
|
src/parser/dump.cpp
|
||||||
src/parser/dump.h
|
src/parser/dump.h
|
||||||
src/parser/position_dump.h
|
src/parser/position_dump.h
|
||||||
@ -941,7 +1000,7 @@ endif()
|
|||||||
# choose which ones to build
|
# choose which ones to build
|
||||||
|
|
||||||
set (LIB_VERSION ${HS_VERSION})
|
set (LIB_VERSION ${HS_VERSION})
|
||||||
set (LIB_SOVERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION})
|
set (LIB_SOVERSION ${HS_MAJOR_VERSION})
|
||||||
|
|
||||||
add_library(hs_exec OBJECT ${hs_exec_SRCS})
|
add_library(hs_exec OBJECT ${hs_exec_SRCS})
|
||||||
|
|
||||||
|
18
cmake/formatdate.py
Executable file
18
cmake/formatdate.py
Executable file
@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import print_function
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
def usage():
|
||||||
|
print("Usage:", os.path.basename(sys.argv[0]), "<seconds from epoch>")
|
||||||
|
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
usage()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
ts = sys.argv[1]
|
||||||
|
|
||||||
|
build_date = datetime.datetime.utcfromtimestamp(int(ts))
|
||||||
|
|
||||||
|
print(build_date.strftime("%Y-%m-%d"))
|
@ -77,7 +77,7 @@ static int eventHandler(unsigned int id, unsigned long long from,
|
|||||||
* length with its length. Returns NULL on failure.
|
* length with its length. Returns NULL on failure.
|
||||||
*/
|
*/
|
||||||
static char *readInputData(const char *inputFN, unsigned int *length) {
|
static char *readInputData(const char *inputFN, unsigned int *length) {
|
||||||
FILE *f = fopen(inputFN, "r");
|
FILE *f = fopen(inputFN, "rb");
|
||||||
if (!f) {
|
if (!f) {
|
||||||
fprintf(stderr, "ERROR: unable to open file \"%s\": %s\n", inputFN,
|
fprintf(stderr, "ERROR: unable to open file \"%s\": %s\n", inputFN,
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -52,7 +52,6 @@
|
|||||||
#include "parser/shortcut_literal.h"
|
#include "parser/shortcut_literal.h"
|
||||||
#include "parser/unsupported.h"
|
#include "parser/unsupported.h"
|
||||||
#include "parser/utf8_validate.h"
|
#include "parser/utf8_validate.h"
|
||||||
#include "smallwrite/smallwrite_build.h"
|
|
||||||
#include "rose/rose_build.h"
|
#include "rose/rose_build.h"
|
||||||
#include "rose/rose_build_dump.h"
|
#include "rose/rose_build_dump.h"
|
||||||
#include "som/slot_manager_dump.h"
|
#include "som/slot_manager_dump.h"
|
||||||
@ -304,15 +303,6 @@ aligned_unique_ptr<RoseEngine> generateRoseEngine(NG &ng) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* avoid building a smwr if just a pure floating case. */
|
|
||||||
if (!roseIsPureLiteral(rose.get())) {
|
|
||||||
u32 qual = roseQuality(rose.get());
|
|
||||||
auto smwr = ng.smwr->build(qual);
|
|
||||||
if (smwr) {
|
|
||||||
rose = roseAddSmallWrite(rose.get(), smwr.get());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
dumpRose(*ng.rose, rose.get(), ng.cc.grey);
|
dumpRose(*ng.rose, rose.get(), ng.cc.grey);
|
||||||
dumpReportManager(ng.rm, ng.cc.grey);
|
dumpReportManager(ng.rm, ng.cc.grey);
|
||||||
dumpSomSlotManager(ng.ssm, ng.cc.grey);
|
dumpSomSlotManager(ng.ssm, ng.cc.grey);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -42,6 +42,7 @@ using std::string;
|
|||||||
|
|
||||||
static const char failureNoMemory[] = "Unable to allocate memory.";
|
static const char failureNoMemory[] = "Unable to allocate memory.";
|
||||||
static const char failureInternal[] = "Internal error.";
|
static const char failureInternal[] = "Internal error.";
|
||||||
|
static const char failureBadAlloc[] = "Allocator returned misaligned memory.";
|
||||||
|
|
||||||
extern const hs_compile_error_t hs_enomem = {
|
extern const hs_compile_error_t hs_enomem = {
|
||||||
const_cast<char *>(failureNoMemory), 0
|
const_cast<char *>(failureNoMemory), 0
|
||||||
@ -49,6 +50,9 @@ extern const hs_compile_error_t hs_enomem = {
|
|||||||
extern const hs_compile_error_t hs_einternal = {
|
extern const hs_compile_error_t hs_einternal = {
|
||||||
const_cast<char *>(failureInternal), 0
|
const_cast<char *>(failureInternal), 0
|
||||||
};
|
};
|
||||||
|
extern const hs_compile_error_t hs_badalloc = {
|
||||||
|
const_cast<char *>(failureBadAlloc), 0
|
||||||
|
};
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
@ -56,8 +60,18 @@ hs_compile_error_t *generateCompileError(const string &err, int expression) {
|
|||||||
hs_compile_error_t *ret =
|
hs_compile_error_t *ret =
|
||||||
(struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t));
|
(struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t));
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
hs_error_t e = hs_check_alloc(ret);
|
||||||
|
if (e != HS_SUCCESS) {
|
||||||
|
hs_misc_free(ret);
|
||||||
|
return const_cast<hs_compile_error_t *>(&hs_badalloc);
|
||||||
|
}
|
||||||
char *msg = (char *)hs_misc_alloc(err.size() + 1);
|
char *msg = (char *)hs_misc_alloc(err.size() + 1);
|
||||||
if (msg) {
|
if (msg) {
|
||||||
|
e = hs_check_alloc(msg);
|
||||||
|
if (e != HS_SUCCESS) {
|
||||||
|
hs_misc_free(msg);
|
||||||
|
return const_cast<hs_compile_error_t *>(&hs_badalloc);
|
||||||
|
}
|
||||||
memcpy(msg, err.c_str(), err.size() + 1);
|
memcpy(msg, err.c_str(), err.size() + 1);
|
||||||
ret->message = msg;
|
ret->message = msg;
|
||||||
} else {
|
} else {
|
||||||
@ -83,7 +97,8 @@ void freeCompileError(hs_compile_error_t *error) {
|
|||||||
if (!error) {
|
if (!error) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (error == &hs_enomem || error == &hs_einternal) {
|
if (error == &hs_enomem || error == &hs_einternal ||
|
||||||
|
error == &hs_badalloc) {
|
||||||
// These are not allocated.
|
// These are not allocated.
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -458,33 +458,16 @@ hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
|
|||||||
}
|
}
|
||||||
*info = NULL;
|
*info = NULL;
|
||||||
|
|
||||||
if (!bytes || length < sizeof(struct hs_database)) {
|
// Decode and check the header
|
||||||
return HS_INVALID;
|
hs_database_t header;
|
||||||
|
hs_error_t ret = db_decode_header(&bytes, length, &header);
|
||||||
|
if (ret != HS_SUCCESS) {
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 *buf = (const u32 *)bytes;
|
u32 mode = unaligned_load_u32(bytes + offsetof(struct RoseEngine, mode));
|
||||||
|
|
||||||
u32 magic = unaligned_load_u32(buf++);
|
return print_database_string(info, header.version, header.platform, mode);
|
||||||
if (magic != HS_DB_MAGIC) {
|
|
||||||
return HS_INVALID;
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 version = unaligned_load_u32(buf++);
|
|
||||||
|
|
||||||
buf++; /* length */
|
|
||||||
|
|
||||||
platform_t plat;
|
|
||||||
plat = unaligned_load_u64a(buf);
|
|
||||||
buf += 2;
|
|
||||||
|
|
||||||
buf++; /* crc */
|
|
||||||
buf++; /* reserved 0 */
|
|
||||||
buf++; /* reserved 1 */
|
|
||||||
|
|
||||||
const char *t_raw = (const char *)buf;
|
|
||||||
u32 mode = unaligned_load_u32(t_raw + offsetof(struct RoseEngine, mode));
|
|
||||||
|
|
||||||
return print_database_string(info, version, plat, mode);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
HS_PUBLIC_API
|
HS_PUBLIC_API
|
||||||
|
102
src/fdr/fdr.c
102
src/fdr/fdr.c
@ -36,7 +36,6 @@
|
|||||||
#include "teddy.h"
|
#include "teddy.h"
|
||||||
#include "teddy_internal.h"
|
#include "teddy_internal.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
#include "util/simd_utils_ssse3.h"
|
|
||||||
|
|
||||||
/** \brief number of bytes processed in each iteration */
|
/** \brief number of bytes processed in each iteration */
|
||||||
#define ITER_BYTES 16
|
#define ITER_BYTES 16
|
||||||
@ -132,7 +131,7 @@ m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft,
|
|||||||
u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1);
|
u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1);
|
||||||
tmp &= fdr->domainMask;
|
tmp &= fdr->domainMask;
|
||||||
s = *((const m128 *)ft + tmp);
|
s = *((const m128 *)ft + tmp);
|
||||||
s = shiftRight8Bits(s);
|
s = rshiftbyte_m128(s, 1);
|
||||||
} else {
|
} else {
|
||||||
s = fdr->start;
|
s = fdr->start;
|
||||||
}
|
}
|
||||||
@ -186,20 +185,20 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
|||||||
m128 st14 = *(const m128 *)(ft + v14*8);
|
m128 st14 = *(const m128 *)(ft + v14*8);
|
||||||
m128 st15 = *(const m128 *)(ft + v15*8);
|
m128 st15 = *(const m128 *)(ft + v15*8);
|
||||||
|
|
||||||
st1 = byteShiftLeft128(st1, 1);
|
st1 = lshiftbyte_m128(st1, 1);
|
||||||
st2 = byteShiftLeft128(st2, 2);
|
st2 = lshiftbyte_m128(st2, 2);
|
||||||
st3 = byteShiftLeft128(st3, 3);
|
st3 = lshiftbyte_m128(st3, 3);
|
||||||
st4 = byteShiftLeft128(st4, 4);
|
st4 = lshiftbyte_m128(st4, 4);
|
||||||
st5 = byteShiftLeft128(st5, 5);
|
st5 = lshiftbyte_m128(st5, 5);
|
||||||
st6 = byteShiftLeft128(st6, 6);
|
st6 = lshiftbyte_m128(st6, 6);
|
||||||
st7 = byteShiftLeft128(st7, 7);
|
st7 = lshiftbyte_m128(st7, 7);
|
||||||
st9 = byteShiftLeft128(st9, 1);
|
st9 = lshiftbyte_m128(st9, 1);
|
||||||
st10 = byteShiftLeft128(st10, 2);
|
st10 = lshiftbyte_m128(st10, 2);
|
||||||
st11 = byteShiftLeft128(st11, 3);
|
st11 = lshiftbyte_m128(st11, 3);
|
||||||
st12 = byteShiftLeft128(st12, 4);
|
st12 = lshiftbyte_m128(st12, 4);
|
||||||
st13 = byteShiftLeft128(st13, 5);
|
st13 = lshiftbyte_m128(st13, 5);
|
||||||
st14 = byteShiftLeft128(st14, 6);
|
st14 = lshiftbyte_m128(st14, 6);
|
||||||
st15 = byteShiftLeft128(st15, 7);
|
st15 = lshiftbyte_m128(st15, 7);
|
||||||
|
|
||||||
*s = or128(*s, st0);
|
*s = or128(*s, st0);
|
||||||
*s = or128(*s, st1);
|
*s = or128(*s, st1);
|
||||||
@ -210,7 +209,7 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
|||||||
*s = or128(*s, st6);
|
*s = or128(*s, st6);
|
||||||
*s = or128(*s, st7);
|
*s = or128(*s, st7);
|
||||||
*conf0 = movq(*s);
|
*conf0 = movq(*s);
|
||||||
*s = byteShiftRight128(*s, 8);
|
*s = rshiftbyte_m128(*s, 8);
|
||||||
*conf0 ^= ~0ULL;
|
*conf0 ^= ~0ULL;
|
||||||
|
|
||||||
*s = or128(*s, st8);
|
*s = or128(*s, st8);
|
||||||
@ -222,7 +221,7 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
|||||||
*s = or128(*s, st14);
|
*s = or128(*s, st14);
|
||||||
*s = or128(*s, st15);
|
*s = or128(*s, st15);
|
||||||
*conf8 = movq(*s);
|
*conf8 = movq(*s);
|
||||||
*s = byteShiftRight128(*s, 8);
|
*s = rshiftbyte_m128(*s, 8);
|
||||||
*conf8 ^= ~0ULL;
|
*conf8 ^= ~0ULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -253,19 +252,19 @@ void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
|||||||
m128 st12 = *(const m128 *)(ft + v12*8);
|
m128 st12 = *(const m128 *)(ft + v12*8);
|
||||||
m128 st14 = *(const m128 *)(ft + v14*8);
|
m128 st14 = *(const m128 *)(ft + v14*8);
|
||||||
|
|
||||||
st2 = byteShiftLeft128(st2, 2);
|
st2 = lshiftbyte_m128(st2, 2);
|
||||||
st4 = byteShiftLeft128(st4, 4);
|
st4 = lshiftbyte_m128(st4, 4);
|
||||||
st6 = byteShiftLeft128(st6, 6);
|
st6 = lshiftbyte_m128(st6, 6);
|
||||||
st10 = byteShiftLeft128(st10, 2);
|
st10 = lshiftbyte_m128(st10, 2);
|
||||||
st12 = byteShiftLeft128(st12, 4);
|
st12 = lshiftbyte_m128(st12, 4);
|
||||||
st14 = byteShiftLeft128(st14, 6);
|
st14 = lshiftbyte_m128(st14, 6);
|
||||||
|
|
||||||
*s = or128(*s, st0);
|
*s = or128(*s, st0);
|
||||||
*s = or128(*s, st2);
|
*s = or128(*s, st2);
|
||||||
*s = or128(*s, st4);
|
*s = or128(*s, st4);
|
||||||
*s = or128(*s, st6);
|
*s = or128(*s, st6);
|
||||||
*conf0 = movq(*s);
|
*conf0 = movq(*s);
|
||||||
*s = byteShiftRight128(*s, 8);
|
*s = rshiftbyte_m128(*s, 8);
|
||||||
*conf0 ^= ~0ULL;
|
*conf0 ^= ~0ULL;
|
||||||
|
|
||||||
*s = or128(*s, st8);
|
*s = or128(*s, st8);
|
||||||
@ -273,7 +272,7 @@ void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
|||||||
*s = or128(*s, st12);
|
*s = or128(*s, st12);
|
||||||
*s = or128(*s, st14);
|
*s = or128(*s, st14);
|
||||||
*conf8 = movq(*s);
|
*conf8 = movq(*s);
|
||||||
*s = byteShiftRight128(*s, 8);
|
*s = rshiftbyte_m128(*s, 8);
|
||||||
*conf8 ^= ~0ULL;
|
*conf8 ^= ~0ULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -296,27 +295,26 @@ void get_conf_stride_4(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
|||||||
m128 st8 = *(const m128 *)(ft + v8*8);
|
m128 st8 = *(const m128 *)(ft + v8*8);
|
||||||
m128 st12 = *(const m128 *)(ft + v12*8);
|
m128 st12 = *(const m128 *)(ft + v12*8);
|
||||||
|
|
||||||
st4 = byteShiftLeft128(st4, 4);
|
st4 = lshiftbyte_m128(st4, 4);
|
||||||
st12 = byteShiftLeft128(st12, 4);
|
st12 = lshiftbyte_m128(st12, 4);
|
||||||
|
|
||||||
*s = or128(*s, st0);
|
*s = or128(*s, st0);
|
||||||
*s = or128(*s, st4);
|
*s = or128(*s, st4);
|
||||||
*conf0 = movq(*s);
|
*conf0 = movq(*s);
|
||||||
*s = byteShiftRight128(*s, 8);
|
*s = rshiftbyte_m128(*s, 8);
|
||||||
*conf0 ^= ~0ULL;
|
*conf0 ^= ~0ULL;
|
||||||
|
|
||||||
*s = or128(*s, st8);
|
*s = or128(*s, st8);
|
||||||
*s = or128(*s, st12);
|
*s = or128(*s, st12);
|
||||||
*conf8 = movq(*s);
|
*conf8 = movq(*s);
|
||||||
*s = byteShiftRight128(*s, 8);
|
*s = rshiftbyte_m128(*s, 8);
|
||||||
*conf8 ^= ~0ULL;
|
*conf8 ^= ~0ULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *controlVal,
|
void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
|
||||||
const u32 *confBase, const struct FDR_Runtime_Args *a,
|
const u32 *confBase, const struct FDR_Runtime_Args *a,
|
||||||
const u8 *ptr, hwlmcb_rv_t *control, u32 *last_match_id,
|
const u8 *ptr, u32 *last_match_id, struct zone *z) {
|
||||||
struct zone *z) {
|
|
||||||
const u8 bucket = 8;
|
const u8 bucket = 8;
|
||||||
const u8 pullback = 1;
|
const u8 pullback = 1;
|
||||||
|
|
||||||
@ -352,13 +350,13 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *controlVal,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
*last_match_id = id;
|
*last_match_id = id;
|
||||||
*controlVal = a->cb(ptr_main + byte - a->buf,
|
*control = a->cb(ptr_main + byte - a->buf, ptr_main + byte - a->buf,
|
||||||
ptr_main + byte - a->buf, id, a->ctxt);
|
id, a->ctxt);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a));
|
u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a));
|
||||||
confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback,
|
confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, control,
|
||||||
control, last_match_id, confVal);
|
last_match_id, confVal);
|
||||||
} while (unlikely(!!*conf));
|
} while (unlikely(!!*conf));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -681,9 +679,9 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
|
|||||||
itPtr += ITER_BYTES) { \
|
itPtr += ITER_BYTES) { \
|
||||||
if (unlikely(itPtr > tryFloodDetect)) { \
|
if (unlikely(itPtr > tryFloodDetect)) { \
|
||||||
tryFloodDetect = floodDetect(fdr, a, &itPtr, tryFloodDetect,\
|
tryFloodDetect = floodDetect(fdr, a, &itPtr, tryFloodDetect,\
|
||||||
&floodBackoff, &controlVal, \
|
&floodBackoff, &control, \
|
||||||
ITER_BYTES); \
|
ITER_BYTES); \
|
||||||
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \
|
if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \
|
||||||
return HWLM_TERMINATED; \
|
return HWLM_TERMINATED; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
@ -692,11 +690,11 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
|
|||||||
u64a conf8; \
|
u64a conf8; \
|
||||||
get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_adjusted, \
|
get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_adjusted, \
|
||||||
ft, &conf0, &conf8, &s); \
|
ft, &conf0, &conf8, &s); \
|
||||||
do_confirm_fdr(&conf0, 0, &controlVal, confBase, a, itPtr, \
|
do_confirm_fdr(&conf0, 0, &control, confBase, a, itPtr, \
|
||||||
control, &last_match_id, zz); \
|
&last_match_id, zz); \
|
||||||
do_confirm_fdr(&conf8, 8, &controlVal, confBase, a, itPtr, \
|
do_confirm_fdr(&conf8, 8, &control, confBase, a, itPtr, \
|
||||||
control, &last_match_id, zz); \
|
&last_match_id, zz); \
|
||||||
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \
|
if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \
|
||||||
return HWLM_TERMINATED; \
|
return HWLM_TERMINATED; \
|
||||||
} \
|
} \
|
||||||
} /* end for loop */ \
|
} /* end for loop */ \
|
||||||
@ -704,9 +702,8 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
|
|||||||
|
|
||||||
static never_inline
|
static never_inline
|
||||||
hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
|
hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
hwlm_group_t control) {
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
u32 last_match_id = INVALID_MATCH_ID;
|
u32 last_match_id = INVALID_MATCH_ID;
|
||||||
u64a domain_mask_adjusted = fdr->domainMask << 1;
|
u64a domain_mask_adjusted = fdr->domainMask << 1;
|
||||||
@ -771,7 +768,10 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
|
|||||||
#define ONLY_AVX2(func) NULL
|
#define ONLY_AVX2(func) NULL
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a);
|
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr,
|
||||||
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
static const FDRFUNCTYPE funcs[] = {
|
static const FDRFUNCTYPE funcs[] = {
|
||||||
fdr_engine_exec,
|
fdr_engine_exec,
|
||||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fast),
|
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fast),
|
||||||
@ -814,7 +814,6 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
|
|||||||
start,
|
start,
|
||||||
cb,
|
cb,
|
||||||
ctxt,
|
ctxt,
|
||||||
&groups,
|
|
||||||
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
@ -822,7 +821,7 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
|
|||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
} else {
|
} else {
|
||||||
assert(funcs[fdr->engineID]);
|
assert(funcs[fdr->engineID]);
|
||||||
return funcs[fdr->engineID](fdr, &a);
|
return funcs[fdr->engineID](fdr, &a, groups);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -840,7 +839,6 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
|||||||
start,
|
start,
|
||||||
cb,
|
cb,
|
||||||
ctxt,
|
ctxt,
|
||||||
&groups,
|
|
||||||
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
||||||
/* we are guaranteed to always have 16 initialised bytes at the end of
|
/* we are guaranteed to always have 16 initialised bytes at the end of
|
||||||
* the history buffer (they may be garbage). */
|
* the history buffer (they may be garbage). */
|
||||||
@ -853,7 +851,7 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
|||||||
ret = HWLM_SUCCESS;
|
ret = HWLM_SUCCESS;
|
||||||
} else {
|
} else {
|
||||||
assert(funcs[fdr->engineID]);
|
assert(funcs[fdr->engineID]);
|
||||||
ret = funcs[fdr->engineID](fdr, &a);
|
ret = funcs[fdr->engineID](fdr, &a, groups);
|
||||||
}
|
}
|
||||||
|
|
||||||
fdrPackState(fdr, &a, stream_state);
|
fdrPackState(fdr, &a, stream_state);
|
||||||
|
@ -81,7 +81,7 @@ private:
|
|||||||
void dumpMasks(const u8 *defaultMask);
|
void dumpMasks(const u8 *defaultMask);
|
||||||
#endif
|
#endif
|
||||||
void setupTab();
|
void setupTab();
|
||||||
aligned_unique_ptr<FDR> setupFDR(pair<u8 *, size_t> link);
|
aligned_unique_ptr<FDR> setupFDR(pair<aligned_unique_ptr<u8>, size_t> &link);
|
||||||
void createInitialState(FDR *fdr);
|
void createInitialState(FDR *fdr);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -90,7 +90,7 @@ public:
|
|||||||
: eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in),
|
: eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in),
|
||||||
make_small(make_small_in) {}
|
make_small(make_small_in) {}
|
||||||
|
|
||||||
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
|
aligned_unique_ptr<FDR> build(pair<aligned_unique_ptr<u8>, size_t> &link);
|
||||||
};
|
};
|
||||||
|
|
||||||
u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
|
u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
|
||||||
@ -124,10 +124,8 @@ void FDRCompiler::createInitialState(FDR *fdr) {
|
|||||||
// Find the minimum length for the literals in this bucket.
|
// Find the minimum length for the literals in this bucket.
|
||||||
const vector<LiteralIndex> &bucket_lits = bucketToLits[b];
|
const vector<LiteralIndex> &bucket_lits = bucketToLits[b];
|
||||||
u32 min_len = ~0U;
|
u32 min_len = ~0U;
|
||||||
for (vector<LiteralIndex>::const_iterator it = bucket_lits.begin(),
|
for (const LiteralIndex &lit_idx : bucket_lits) {
|
||||||
ite = bucket_lits.end();
|
min_len = min(min_len, verify_u32(lits[lit_idx].s.length()));
|
||||||
it != ite; ++it) {
|
|
||||||
min_len = min(min_len, verify_u32(lits[*it].s.length()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
|
DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
|
||||||
@ -141,13 +139,12 @@ void FDRCompiler::createInitialState(FDR *fdr) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
|
aligned_unique_ptr<FDR>
|
||||||
|
FDRCompiler::setupFDR(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
||||||
size_t tabSize = eng.getTabSizeBytes();
|
size_t tabSize = eng.getTabSizeBytes();
|
||||||
|
|
||||||
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
|
auto floodControlTmp = setupFDRFloodControl(lits, eng);
|
||||||
|
auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
||||||
pair<u8 *, size_t> confirmTmp =
|
|
||||||
setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
|
||||||
|
|
||||||
assert(ISALIGNED_16(tabSize));
|
assert(ISALIGNED_16(tabSize));
|
||||||
assert(ISALIGNED_16(confirmTmp.second));
|
assert(ISALIGNED_16(confirmTmp.second));
|
||||||
@ -175,14 +172,12 @@ aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
|
|||||||
copy(tab.begin(), tab.end(), ptr);
|
copy(tab.begin(), tab.end(), ptr);
|
||||||
ptr += tabSize;
|
ptr += tabSize;
|
||||||
|
|
||||||
memcpy(ptr, confirmTmp.first, confirmTmp.second);
|
memcpy(ptr, confirmTmp.first.get(), confirmTmp.second);
|
||||||
ptr += confirmTmp.second;
|
ptr += confirmTmp.second;
|
||||||
aligned_free(confirmTmp.first);
|
|
||||||
|
|
||||||
fdr->floodOffset = verify_u32(ptr - fdr_base);
|
fdr->floodOffset = verify_u32(ptr - fdr_base);
|
||||||
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
|
memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second);
|
||||||
ptr += floodControlTmp.second;
|
ptr += floodControlTmp.second;
|
||||||
aligned_free(floodControlTmp.first);
|
|
||||||
|
|
||||||
/* we are allowing domains 9 to 15 only */
|
/* we are allowing domains 9 to 15 only */
|
||||||
assert(eng.bits > 8 && eng.bits < 16);
|
assert(eng.bits > 8 && eng.bits < 16);
|
||||||
@ -193,8 +188,7 @@ aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
|
|||||||
|
|
||||||
if (link.first) {
|
if (link.first) {
|
||||||
fdr->link = verify_u32(ptr - fdr_base);
|
fdr->link = verify_u32(ptr - fdr_base);
|
||||||
memcpy(ptr, link.first, link.second);
|
memcpy(ptr, link.first.get(), link.second);
|
||||||
aligned_free(link.first);
|
|
||||||
} else {
|
} else {
|
||||||
fdr->link = 0;
|
fdr->link = 0;
|
||||||
}
|
}
|
||||||
@ -217,13 +211,11 @@ struct LitOrder {
|
|||||||
if (len1 != len2) {
|
if (len1 != len2) {
|
||||||
return len1 < len2;
|
return len1 < len2;
|
||||||
} else {
|
} else {
|
||||||
string::const_reverse_iterator it1, it2;
|
auto p = std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
|
||||||
tie(it1, it2) =
|
if (p.first == i1s.rend()) {
|
||||||
std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
|
|
||||||
if (it1 == i1s.rend()) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return *it1 < *it2;
|
return *p.first < *p.second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -266,9 +258,8 @@ void FDRCompiler::assignStringsToBuckets() {
|
|||||||
stable_sort(vli.begin(), vli.end(), LitOrder(lits));
|
stable_sort(vli.begin(), vli.end(), LitOrder(lits));
|
||||||
|
|
||||||
#ifdef DEBUG_ASSIGNMENT
|
#ifdef DEBUG_ASSIGNMENT
|
||||||
for (map<u32, u32>::iterator i = lenCounts.begin(), e = lenCounts.end();
|
for (const auto &m : lenCounts) {
|
||||||
i != e; ++i) {
|
printf("l<%u>:%u ", m.first, m.second);
|
||||||
printf("l<%d>:%d ", i->first, i->second);
|
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
#endif
|
#endif
|
||||||
@ -324,12 +315,12 @@ void FDRCompiler::assignStringsToBuckets() {
|
|||||||
for (u32 k = j; k < nChunks; ++k) {
|
for (u32 k = j; k < nChunks; ++k) {
|
||||||
cnt += count[k];
|
cnt += count[k];
|
||||||
}
|
}
|
||||||
t[j][0] = make_pair(getScoreUtil(length[j], cnt), 0);
|
t[j][0] = {getScoreUtil(length[j], cnt), 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u32 i = 1; i < nb; i++) {
|
for (u32 i = 1; i < nb; i++) {
|
||||||
for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row
|
for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row
|
||||||
SCORE_INDEX_PAIR best = make_pair(MAX_SCORE, 0);
|
SCORE_INDEX_PAIR best = {MAX_SCORE, 0};
|
||||||
u32 cnt = count[j];
|
u32 cnt = count[j];
|
||||||
for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) {
|
for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) {
|
||||||
SCORE score = getScoreUtil(length[j], cnt);
|
SCORE score = getScoreUtil(length[j], cnt);
|
||||||
@ -338,12 +329,12 @@ void FDRCompiler::assignStringsToBuckets() {
|
|||||||
}
|
}
|
||||||
score += t[k][i-1].first;
|
score += t[k][i-1].first;
|
||||||
if (score < best.first) {
|
if (score < best.first) {
|
||||||
best = make_pair(score, k);
|
best = {score, k};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
t[j][i] = best;
|
t[j][i] = best;
|
||||||
}
|
}
|
||||||
t[nChunks - 1][i] = make_pair(0,0); // fill in empty final row for next iteration
|
t[nChunks - 1][i] = {0,0}; // fill in empty final row for next iteration
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG_ASSIGNMENT
|
#ifdef DEBUG_ASSIGNMENT
|
||||||
@ -405,8 +396,7 @@ bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
|
|||||||
distance = 4;
|
distance = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (vector<LiteralIndex>::const_iterator i = vl.begin(), e = vl.end();
|
for (auto i = vl.begin(), e = vl.end(); i != e; ++i) {
|
||||||
i != e; ++i) {
|
|
||||||
if (e - i > 5) {
|
if (e - i > 5) {
|
||||||
__builtin_prefetch(&lits[*(i + 5)]);
|
__builtin_prefetch(&lits[*(i + 5)]);
|
||||||
}
|
}
|
||||||
@ -460,31 +450,25 @@ void FDRCompiler::setupTab() {
|
|||||||
memcpy(tabIndexToMask(i), &defaultMask[0], mask_size);
|
memcpy(tabIndexToMask(i), &defaultMask[0], mask_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef std::map<u32, ue2::unordered_set<u32> > M2SET;
|
|
||||||
|
|
||||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||||
const vector<LiteralIndex> &vl = bucketToLits[b];
|
const vector<LiteralIndex> &vl = bucketToLits[b];
|
||||||
SuffixPositionInString pLimit = eng.getBucketWidth(b);
|
SuffixPositionInString pLimit = eng.getBucketWidth(b);
|
||||||
for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
|
for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
|
||||||
u32 bit = eng.getSchemeBit(b, pos);
|
u32 bit = eng.getSchemeBit(b, pos);
|
||||||
M2SET m2;
|
map<u32, ue2::unordered_set<u32>> m2;
|
||||||
bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
|
bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
|
||||||
if (done) {
|
if (done) {
|
||||||
clearbit(&defaultMask[0], bit);
|
clearbit(&defaultMask[0], bit);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for (M2SET::const_iterator i = m2.begin(), e = m2.end(); i != e;
|
for (const auto &elem : m2) {
|
||||||
++i) {
|
u32 dc = elem.first;
|
||||||
u32 dc = i->first;
|
const ue2::unordered_set<u32> &mskSet = elem.second;
|
||||||
const ue2::unordered_set<u32> &mskSet = i->second;
|
|
||||||
u32 v = ~dc;
|
u32 v = ~dc;
|
||||||
do {
|
do {
|
||||||
u32 b2 = v & dc;
|
u32 b2 = v & dc;
|
||||||
for (ue2::unordered_set<u32>::const_iterator
|
for (const u32 &mskVal : mskSet) {
|
||||||
i2 = mskSet.begin(),
|
u32 val = (mskVal & ~dc) | b2;
|
||||||
e2 = mskSet.end();
|
|
||||||
i2 != e2; ++i2) {
|
|
||||||
u32 val = (*i2 & ~dc) | b2;
|
|
||||||
clearbit(tabIndexToMask(val), bit);
|
clearbit(tabIndexToMask(val), bit);
|
||||||
}
|
}
|
||||||
v = (v + (dc & -dc)) | ~dc;
|
v = (v + (dc & -dc)) | ~dc;
|
||||||
@ -502,7 +486,8 @@ void FDRCompiler::setupTab() {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<FDR> FDRCompiler::build(pair<u8 *, size_t> link) {
|
aligned_unique_ptr<FDR>
|
||||||
|
FDRCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
||||||
assignStringsToBuckets();
|
assignStringsToBuckets();
|
||||||
setupTab();
|
setupTab();
|
||||||
return setupFDR(link);
|
return setupFDR(link);
|
||||||
@ -515,16 +500,15 @@ aligned_unique_ptr<FDR>
|
|||||||
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
|
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
|
||||||
const target_t &target, const Grey &grey, u32 hint,
|
const target_t &target, const Grey &grey, u32 hint,
|
||||||
hwlmStreamingControl *stream_control) {
|
hwlmStreamingControl *stream_control) {
|
||||||
pair<u8 *, size_t> link(nullptr, 0);
|
pair<aligned_unique_ptr<u8>, size_t> link(nullptr, 0);
|
||||||
if (stream_control) {
|
if (stream_control) {
|
||||||
link = fdrBuildTableStreaming(lits, stream_control);
|
link = fdrBuildTableStreaming(lits, *stream_control);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
|
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
|
||||||
|
|
||||||
if (grey.fdrAllowTeddy) {
|
if (grey.fdrAllowTeddy) {
|
||||||
aligned_unique_ptr<FDR> fdr
|
auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, link);
|
||||||
= teddyBuildTableHinted(lits, make_small, hint, target, link);
|
|
||||||
if (fdr) {
|
if (fdr) {
|
||||||
DEBUG_PRINTF("build with teddy succeeded\n");
|
DEBUG_PRINTF("build with teddy succeeded\n");
|
||||||
return fdr;
|
return fdr;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -31,6 +31,7 @@
|
|||||||
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "hwlm/hwlm_literal.h"
|
#include "hwlm/hwlm_literal.h"
|
||||||
|
#include "util/alloc.h"
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
@ -44,7 +45,6 @@ namespace ue2 {
|
|||||||
// a pile of decorative typedefs
|
// a pile of decorative typedefs
|
||||||
// good for documentation purposes more than anything else
|
// good for documentation purposes more than anything else
|
||||||
typedef u32 LiteralIndex;
|
typedef u32 LiteralIndex;
|
||||||
typedef u32 ConfirmIndex;
|
|
||||||
typedef u32 SuffixPositionInString; // zero is last byte, counting back
|
typedef u32 SuffixPositionInString; // zero is last byte, counting back
|
||||||
// into the string
|
// into the string
|
||||||
typedef u32 BucketIndex;
|
typedef u32 BucketIndex;
|
||||||
@ -56,25 +56,22 @@ class EngineDescription;
|
|||||||
class FDREngineDescription;
|
class FDREngineDescription;
|
||||||
struct hwlmStreamingControl;
|
struct hwlmStreamingControl;
|
||||||
|
|
||||||
size_t getFDRConfirm(const std::vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
std::pair<aligned_unique_ptr<u8>, size_t> setupFullMultiConfs(
|
||||||
bool make_small);
|
|
||||||
|
|
||||||
std::pair<u8 *, size_t> setupFullMultiConfs(
|
|
||||||
const std::vector<hwlmLiteral> &lits, const EngineDescription &eng,
|
const std::vector<hwlmLiteral> &lits, const EngineDescription &eng,
|
||||||
std::map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits,
|
std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
|
||||||
bool make_small);
|
bool make_small);
|
||||||
|
|
||||||
// all suffixes include an implicit max_bucket_width suffix to ensure that
|
// all suffixes include an implicit max_bucket_width suffix to ensure that
|
||||||
// we always read a full-scale flood "behind" us in terms of what's in our
|
// we always read a full-scale flood "behind" us in terms of what's in our
|
||||||
// state; if we don't have a flood that's long enough we won't be in the
|
// state; if we don't have a flood that's long enough we won't be in the
|
||||||
// right state yet to allow blindly advancing
|
// right state yet to allow blindly advancing
|
||||||
std::pair<u8 *, size_t>
|
std::pair<aligned_unique_ptr<u8>, size_t>
|
||||||
setupFDRFloodControl(const std::vector<hwlmLiteral> &lits,
|
setupFDRFloodControl(const std::vector<hwlmLiteral> &lits,
|
||||||
const EngineDescription &eng);
|
const EngineDescription &eng);
|
||||||
|
|
||||||
std::pair<u8 *, size_t>
|
std::pair<aligned_unique_ptr<u8>, size_t>
|
||||||
fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
|
fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
|
||||||
hwlmStreamingControl *stream_control);
|
hwlmStreamingControl &stream_control);
|
||||||
|
|
||||||
static constexpr u32 HINT_INVALID = 0xffffffff;
|
static constexpr u32 HINT_INVALID = 0xffffffff;
|
||||||
|
|
||||||
|
@ -45,9 +45,10 @@ using namespace std;
|
|||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
typedef u8 ConfSplitType;
|
using ConfSplitType = u8;
|
||||||
typedef pair<BucketIndex, ConfSplitType> BucketSplitPair;
|
using BucketSplitPair = pair<BucketIndex, ConfSplitType>;
|
||||||
typedef map<BucketSplitPair, pair<FDRConfirm *, size_t> > BC2CONF;
|
using BC2CONF = map<BucketSplitPair,
|
||||||
|
pair<aligned_unique_ptr<FDRConfirm>, size_t>>;
|
||||||
|
|
||||||
// return the number of bytes beyond a length threshold in all strings in lits
|
// return the number of bytes beyond a length threshold in all strings in lits
|
||||||
static
|
static
|
||||||
@ -149,9 +150,9 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
|
|||||||
|
|
||||||
//#define FDR_CONFIRM_DUMP 1
|
//#define FDR_CONFIRM_DUMP 1
|
||||||
|
|
||||||
static
|
static pair<aligned_unique_ptr<FDRConfirm>, size_t>
|
||||||
size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
|
||||||
bool applyOneCharOpt, bool make_small, bool make_confirm) {
|
bool make_small, bool make_confirm) {
|
||||||
vector<LitInfo> tmpLitInfo(lits.size());
|
vector<LitInfo> tmpLitInfo(lits.size());
|
||||||
CONF_TYPE andmsk;
|
CONF_TYPE andmsk;
|
||||||
fillLitInfo(lits, tmpLitInfo, andmsk);
|
fillLitInfo(lits, tmpLitInfo, andmsk);
|
||||||
@ -220,55 +221,61 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
|||||||
#ifdef FDR_CONFIRM_DUMP
|
#ifdef FDR_CONFIRM_DUMP
|
||||||
// print out the literals reversed - makes it easier to line up analyses
|
// print out the literals reversed - makes it easier to line up analyses
|
||||||
// that are end-offset based
|
// that are end-offset based
|
||||||
for (map<u32, vector<LiteralIndex> >::iterator i = res2lits.begin(),
|
for (const auto &m : res2lits) {
|
||||||
e = res2lits.end(); i != e; ++i) {
|
const u32 &hash = m.first;
|
||||||
u32 hash = i->first;
|
const vector<LiteralIndex> &vlidx = m.second;
|
||||||
vector<LiteralIndex> & vlidx = i->second;
|
if (vlidx.size() <= 1) {
|
||||||
if (vlidx.size() > 1) {
|
continue;
|
||||||
printf("%x -> %zu literals\n", hash, vlidx.size());
|
|
||||||
u32 min_len = lits[vlidx.front()].s.size();
|
|
||||||
vector<set<u8> > vsl; // contains the set of chars at each location
|
|
||||||
// reversed from the end
|
|
||||||
vsl.resize(1024);
|
|
||||||
u32 total_string_size = 0;
|
|
||||||
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
|
|
||||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
|
||||||
LiteralIndex litIdx = *i2;
|
|
||||||
total_string_size += lits[litIdx].s.size();
|
|
||||||
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
|
|
||||||
vsl[lits[litIdx].s.size()-j].insert(lits[litIdx].s.c_str()[j - 1]);
|
|
||||||
}
|
}
|
||||||
min_len = MIN(min_len, lits[litIdx].s.size());
|
printf("%x -> %zu literals\n", hash, vlidx.size());
|
||||||
|
size_t min_len = lits[vlidx.front()].s.size();
|
||||||
|
|
||||||
|
vector<set<u8>> vsl; // contains the set of chars at each location
|
||||||
|
// reversed from the end
|
||||||
|
|
||||||
|
for (const auto &litIdx : vlidx) {
|
||||||
|
const auto &lit = lits[litIdx];
|
||||||
|
if (lit.s.size() > vsl.size()) {
|
||||||
|
vsl.resize(lit.s.size());
|
||||||
|
}
|
||||||
|
for (size_t j = lit.s.size(); j != 0; j--) {
|
||||||
|
vsl[lit.s.size() - j].insert(lit.s[j - 1]);
|
||||||
|
}
|
||||||
|
min_len = min(min_len, lit.s.size());
|
||||||
}
|
}
|
||||||
printf("common ");
|
printf("common ");
|
||||||
for (u32 j = 0; j < min_len; j++) {
|
for (size_t j = 0; j < min_len; j++) {
|
||||||
if (vsl[j].size() == 1) {
|
if (vsl[j].size() == 1) {
|
||||||
printf("%02x", (u32)*vsl[j].begin());
|
printf("%02x", *vsl[j].begin());
|
||||||
} else {
|
} else {
|
||||||
printf("__");
|
printf("__");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
|
for (const auto &litIdx : vlidx) {
|
||||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
const auto &lit = lits[litIdx];
|
||||||
LiteralIndex litIdx = *i2;
|
printf("%8x %c", lit.id, lit.nocase ? '!' : ' ');
|
||||||
printf("%8x %c", lits[litIdx].id, lits[litIdx].nocase ? '!' : ' ');
|
for (size_t j = lit.s.size(); j != 0; j--) {
|
||||||
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
|
size_t dist_from_end = lit.s.size() - j;
|
||||||
u32 dist_from_end = lits[litIdx].s.size() - j;
|
|
||||||
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
|
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
|
||||||
printf("__");
|
printf("__");
|
||||||
} else {
|
} else {
|
||||||
printf("%02x", (u32)lits[litIdx].s.c_str()[j-1]);
|
printf("%02x", lit.s[j - 1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
u32 total_compares = 0;
|
size_t total_compares = 0;
|
||||||
for (u32 j = 0; j < 1024; j++) { // naughty
|
for (const auto &v : vsl) {
|
||||||
total_compares += vsl[j].size();
|
total_compares += v.size();
|
||||||
}
|
}
|
||||||
printf("Total compare load: %d Total string size: %d\n\n", total_compares, total_string_size);
|
size_t total_string_size = 0;
|
||||||
|
for (const auto &litIdx : vlidx) {
|
||||||
|
const auto &lit = lits[litIdx];
|
||||||
|
total_string_size += lit.s.size();
|
||||||
}
|
}
|
||||||
|
printf("Total compare load: %zu Total string size: %zu\n\n",
|
||||||
|
total_compares, total_string_size);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -281,7 +288,7 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
|||||||
sizeof(LitInfo) * lits.size() + totalLitSize;
|
sizeof(LitInfo) * lits.size() + totalLitSize;
|
||||||
size = ROUNDUP_N(size, alignof(FDRConfirm));
|
size = ROUNDUP_N(size, alignof(FDRConfirm));
|
||||||
|
|
||||||
FDRConfirm *fdrc = (FDRConfirm *)aligned_zmalloc(size);
|
auto fdrc = aligned_zmalloc_unique<FDRConfirm>(size);
|
||||||
assert(fdrc); // otherwise would have thrown std::bad_alloc
|
assert(fdrc); // otherwise would have thrown std::bad_alloc
|
||||||
|
|
||||||
fdrc->andmsk = andmsk;
|
fdrc->andmsk = andmsk;
|
||||||
@ -295,7 +302,7 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
|||||||
fdrc->groups = gm;
|
fdrc->groups = gm;
|
||||||
|
|
||||||
// After the FDRConfirm, we have the lit index array.
|
// After the FDRConfirm, we have the lit index array.
|
||||||
u8 *fdrc_base = (u8 *)fdrc;
|
u8 *fdrc_base = (u8 *)fdrc.get();
|
||||||
u8 *ptr = fdrc_base + sizeof(*fdrc);
|
u8 *ptr = fdrc_base + sizeof(*fdrc);
|
||||||
ptr = ROUNDUP_PTR(ptr, alignof(u32));
|
ptr = ROUNDUP_PTR(ptr, alignof(u32));
|
||||||
u32 *bitsToLitIndex = (u32 *)ptr;
|
u32 *bitsToLitIndex = (u32 *)ptr;
|
||||||
@ -307,14 +314,12 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
|||||||
|
|
||||||
// Walk the map by hash value assigning indexes and laying out the
|
// Walk the map by hash value assigning indexes and laying out the
|
||||||
// elements (and their associated string confirm material) in memory.
|
// elements (and their associated string confirm material) in memory.
|
||||||
for (std::map<u32, vector<LiteralIndex> >::const_iterator
|
for (const auto &m : res2lits) {
|
||||||
i = res2lits.begin(), e = res2lits.end(); i != e; ++i) {
|
const u32 hash = m.first;
|
||||||
const u32 hash = i->first;
|
const vector<LiteralIndex> &vlidx = m.second;
|
||||||
const vector<LiteralIndex> &vlidx = i->second;
|
bitsToLitIndex[hash] = verify_u32(ptr - fdrc_base);
|
||||||
bitsToLitIndex[hash] = verify_u32(ptr - (u8 *)fdrc);
|
for (auto i = vlidx.begin(), e = vlidx.end(); i != e; ++i) {
|
||||||
for (vector<LiteralIndex>::const_iterator i2 = vlidx.begin(),
|
LiteralIndex litIdx = *i;
|
||||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
|
||||||
LiteralIndex litIdx = *i2;
|
|
||||||
|
|
||||||
// Write LitInfo header.
|
// Write LitInfo header.
|
||||||
u8 *oldPtr = ptr;
|
u8 *oldPtr = ptr;
|
||||||
@ -333,7 +338,7 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
|
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
|
||||||
if (i2 + 1 == e2) {
|
if (next(i) == e) {
|
||||||
finalLI.next = 0x0;
|
finalLI.next = 0x0;
|
||||||
} else {
|
} else {
|
||||||
// our next field represents an adjustment on top of
|
// our next field represents an adjustment on top of
|
||||||
@ -348,14 +353,13 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
|||||||
assert((size_t)(ptr - fdrc_base) <= size);
|
assert((size_t)(ptr - fdrc_base) <= size);
|
||||||
}
|
}
|
||||||
|
|
||||||
*fdrc_p = fdrc;
|
|
||||||
|
|
||||||
// Return actual used size, not worst-case size. Must be rounded up to
|
// Return actual used size, not worst-case size. Must be rounded up to
|
||||||
// FDRConfirm alignment so that the caller can lay out a sequence of these.
|
// FDRConfirm alignment so that the caller can lay out a sequence of these.
|
||||||
size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
|
size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
|
||||||
alignof(FDRConfirm));
|
alignof(FDRConfirm));
|
||||||
assert(actual_size <= size);
|
assert(actual_size <= size);
|
||||||
return actual_size;
|
|
||||||
|
return {move(fdrc), actual_size};
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -377,12 +381,9 @@ u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
|
|||||||
u32 totalConfirmSize = 0;
|
u32 totalConfirmSize = 0;
|
||||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||||
if (!bucketToLits[b].empty()) {
|
if (!bucketToLits[b].empty()) {
|
||||||
vector<vector<hwlmLiteral> > vl(eng.getConfirmTopLevelSplit());
|
vector<vector<hwlmLiteral>> vl(eng.getConfirmTopLevelSplit());
|
||||||
for (vector<LiteralIndex>::const_iterator
|
for (const LiteralIndex &lit_idx : bucketToLits[b]) {
|
||||||
i = bucketToLits[b].begin(),
|
hwlmLiteral lit = lits[lit_idx]; // copy
|
||||||
e = bucketToLits[b].end();
|
|
||||||
i != e; ++i) {
|
|
||||||
hwlmLiteral lit = lits[*i]; // copy
|
|
||||||
// c is last char of this literal
|
// c is last char of this literal
|
||||||
u8 c = *(lit.s.rbegin());
|
u8 c = *(lit.s.rbegin());
|
||||||
|
|
||||||
@ -424,25 +425,26 @@ u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) {
|
for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) {
|
||||||
if (!vl[c].empty()) {
|
if (vl[c].empty()) {
|
||||||
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
|
continue;
|
||||||
FDRConfirm *fdrc;
|
|
||||||
size_t size = getFDRConfirm(vl[c], &fdrc,
|
|
||||||
eng.typicallyHoldsOneCharLits(),
|
|
||||||
make_small, makeConfirm);
|
|
||||||
BucketSplitPair p = make_pair(b, c);
|
|
||||||
bc2Conf[p] = make_pair(fdrc, size);
|
|
||||||
totalConfirmSize += size;
|
|
||||||
}
|
}
|
||||||
|
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
|
||||||
|
auto key = make_pair(b, c);
|
||||||
|
auto fc = getFDRConfirm(vl[c], eng.typicallyHoldsOneCharLits(),
|
||||||
|
make_small, makeConfirm);
|
||||||
|
totalConfirmSize += fc.second;
|
||||||
|
assert(bc2Conf.find(key) == end(bc2Conf));
|
||||||
|
bc2Conf.emplace(key, move(fc));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return totalConfirmSize;
|
return totalConfirmSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
pair<u8 *, size_t> setupFullMultiConfs(const vector<hwlmLiteral> &lits,
|
pair<aligned_unique_ptr<u8>, size_t>
|
||||||
|
setupFullMultiConfs(const vector<hwlmLiteral> &lits,
|
||||||
const EngineDescription &eng,
|
const EngineDescription &eng,
|
||||||
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
|
map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
|
||||||
bool make_small) {
|
bool make_small) {
|
||||||
BC2CONF bc2Conf;
|
BC2CONF bc2Conf;
|
||||||
u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits,
|
u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits,
|
||||||
@ -453,26 +455,24 @@ pair<u8 *, size_t> setupFullMultiConfs(const vector<hwlmLiteral> &lits,
|
|||||||
u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32);
|
u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32);
|
||||||
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
|
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
|
||||||
|
|
||||||
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
|
auto buf = aligned_zmalloc_unique<u8>(totalSize);
|
||||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||||
|
|
||||||
u32 *confBase = (u32 *)buf;
|
u32 *confBase = (u32 *)buf.get();
|
||||||
u8 *ptr = buf + totalConfSwitchSize;
|
u8 *ptr = buf.get() + totalConfSwitchSize;
|
||||||
|
|
||||||
for (BC2CONF::const_iterator i = bc2Conf.begin(), e = bc2Conf.end(); i != e;
|
for (const auto &m : bc2Conf) {
|
||||||
++i) {
|
const BucketIndex &b = m.first.first;
|
||||||
const pair<FDRConfirm *, size_t> &p = i->second;
|
const u8 &c = m.first.second;
|
||||||
|
const pair<aligned_unique_ptr<FDRConfirm>, size_t> &p = m.second;
|
||||||
// confirm offset is relative to the base of this structure, now
|
// confirm offset is relative to the base of this structure, now
|
||||||
u32 confirm_offset = verify_u32(ptr - (u8 *)buf);
|
u32 confirm_offset = verify_u32(ptr - buf.get());
|
||||||
memcpy(ptr, p.first, p.second);
|
memcpy(ptr, p.first.get(), p.second);
|
||||||
ptr += p.second;
|
ptr += p.second;
|
||||||
aligned_free(p.first);
|
|
||||||
BucketIndex b = i->first.first;
|
|
||||||
u8 c = i->first.second;
|
|
||||||
u32 idx = c * nBuckets + b;
|
u32 idx = c * nBuckets + b;
|
||||||
confBase[idx] = confirm_offset;
|
confBase[idx] = confirm_offset;
|
||||||
}
|
}
|
||||||
return make_pair(buf, totalSize);
|
return {move(buf), totalSize};
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -105,7 +105,6 @@ struct FDR_Runtime_Args {
|
|||||||
size_t start_offset;
|
size_t start_offset;
|
||||||
HWLMCallback cb;
|
HWLMCallback cb;
|
||||||
void *ctxt;
|
void *ctxt;
|
||||||
hwlm_group_t *groups;
|
|
||||||
const u8 *firstFloodDetect;
|
const u8 *firstFloodDetect;
|
||||||
const u64a histBytes;
|
const u64a histBytes;
|
||||||
};
|
};
|
||||||
|
@ -94,14 +94,13 @@ static
|
|||||||
bool setupLongLits(const vector<hwlmLiteral> &lits,
|
bool setupLongLits(const vector<hwlmLiteral> &lits,
|
||||||
vector<hwlmLiteral> &long_lits, size_t max_len) {
|
vector<hwlmLiteral> &long_lits, size_t max_len) {
|
||||||
long_lits.reserve(lits.size());
|
long_lits.reserve(lits.size());
|
||||||
for (vector<hwlmLiteral>::const_iterator it = lits.begin();
|
for (const auto &lit : lits) {
|
||||||
it != lits.end(); ++it) {
|
if (lit.s.length() > max_len) {
|
||||||
if (it->s.length() > max_len) {
|
hwlmLiteral tmp = lit; // copy
|
||||||
hwlmLiteral tmp = *it; // copy
|
tmp.s.pop_back();
|
||||||
tmp.s.erase(tmp.s.size() - 1, 1); // erase last char
|
|
||||||
tmp.id = 0; // recalc later
|
tmp.id = 0; // recalc later
|
||||||
tmp.groups = 0; // filled in later by hash bucket(s)
|
tmp.groups = 0; // filled in later by hash bucket(s)
|
||||||
long_lits.push_back(tmp);
|
long_lits.push_back(move(tmp));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -112,15 +111,12 @@ bool setupLongLits(const vector<hwlmLiteral> &lits,
|
|||||||
// sort long_literals by caseful/caseless and in lexicographical order,
|
// sort long_literals by caseful/caseless and in lexicographical order,
|
||||||
// remove duplicates
|
// remove duplicates
|
||||||
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
|
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
|
||||||
vector<hwlmLiteral>::iterator new_end =
|
auto new_end = unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
|
||||||
unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
|
|
||||||
long_lits.erase(new_end, long_lits.end());
|
long_lits.erase(new_end, long_lits.end());
|
||||||
|
|
||||||
// fill in ids; not currently used
|
// fill in ids; not currently used
|
||||||
for (vector<hwlmLiteral>::iterator i = long_lits.begin(),
|
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
|
||||||
e = long_lits.end();
|
i->id = distance(long_lits.begin(), i);
|
||||||
i != e; ++i) {
|
|
||||||
i->id = i - long_lits.begin();
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -143,23 +139,19 @@ void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
|||||||
hashedPositions[m] = 0;
|
hashedPositions[m] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
|
||||||
e = long_lits.end();
|
|
||||||
i != e; ++i) {
|
|
||||||
if (i->nocase) {
|
if (i->nocase) {
|
||||||
boundaries[CASEFUL] = verify_u32(i - long_lits.begin());
|
boundaries[CASEFUL] = verify_u32(distance(long_lits.begin(), i));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
for (const auto &lit : long_lits) {
|
||||||
e = long_lits.end();
|
Modes m = lit.nocase ? CASELESS : CASEFUL;
|
||||||
i != e; ++i) {
|
for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
|
||||||
MODES m = i->nocase ? CASELESS : CASEFUL;
|
|
||||||
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
|
|
||||||
hashedPositions[m]++;
|
hashedPositions[m]++;
|
||||||
}
|
}
|
||||||
positions[m] += i->s.size();
|
positions[m] += lit.s.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u32 m = CASEFUL; m < MAX_MODES; m++) {
|
for (u32 m = CASEFUL; m < MAX_MODES; m++) {
|
||||||
@ -170,7 +162,7 @@ void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
|||||||
|
|
||||||
#ifdef DEBUG_COMPILE
|
#ifdef DEBUG_COMPILE
|
||||||
printf("analyzeLits:\n");
|
printf("analyzeLits:\n");
|
||||||
for (MODES m = CASEFUL; m < MAX_MODES; m++) {
|
for (Modes m = CASEFUL; m < MAX_MODES; m++) {
|
||||||
printf("mode %s boundary %d positions %d hashedPositions %d "
|
printf("mode %s boundary %d positions %d hashedPositions %d "
|
||||||
"hashEntries %d\n",
|
"hashEntries %d\n",
|
||||||
(m == CASEFUL) ? "caseful" : "caseless", boundaries[m],
|
(m == CASEFUL) ? "caseful" : "caseless", boundaries[m],
|
||||||
@ -181,7 +173,7 @@ void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, MODES m) {
|
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, Modes m) {
|
||||||
return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m);
|
return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -203,24 +195,21 @@ struct OffsetIDFromEndOrder {
|
|||||||
|
|
||||||
static
|
static
|
||||||
void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||||
FDRSHashEntry *tab, size_t numEntries, MODES m,
|
FDRSHashEntry *tab, size_t numEntries, Modes mode,
|
||||||
map<u32, u32> &litToOffsetVal) {
|
map<u32, u32> &litToOffsetVal) {
|
||||||
const u32 nbits = lg2(numEntries);
|
const u32 nbits = lg2(numEntries);
|
||||||
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
|
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
|
||||||
map<u32, u64a> bucketToBitfield;
|
map<u32, u64a> bucketToBitfield;
|
||||||
|
|
||||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
for (const auto &lit : long_lits) {
|
||||||
e = long_lits.end();
|
if ((mode == CASELESS) != lit.nocase) {
|
||||||
i != e; ++i) {
|
|
||||||
const hwlmLiteral &l = *i;
|
|
||||||
if ((m == CASELESS) != i->nocase) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
|
for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
|
||||||
u32 h = hashLit(l, j, max_len, m);
|
u32 h = hashLit(lit, j, max_len, mode);
|
||||||
u32 h_ent = h & ((1U << nbits) - 1);
|
u32 h_ent = h & ((1U << nbits) - 1);
|
||||||
u32 h_low = (h >> nbits) & 63;
|
u32 h_low = (h >> nbits) & 63;
|
||||||
bucketToLitOffPairs[h_ent].push_back(make_pair(i->id, j));
|
bucketToLitOffPairs[h_ent].emplace_back(lit.id, j);
|
||||||
bucketToBitfield[h_ent] |= (1ULL << h_low);
|
bucketToBitfield[h_ent] |= (1ULL << h_low);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -231,11 +220,9 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
|||||||
|
|
||||||
// sweep out bitfield entries and save the results swapped accordingly
|
// sweep out bitfield entries and save the results swapped accordingly
|
||||||
// also, anything with bitfield entries is put in filledBuckets
|
// also, anything with bitfield entries is put in filledBuckets
|
||||||
for (map<u32, u64a>::const_iterator i = bucketToBitfield.begin(),
|
for (const auto &m : bucketToBitfield) {
|
||||||
e = bucketToBitfield.end();
|
const u32 &bucket = m.first;
|
||||||
i != e; ++i) {
|
const u64a &contents = m.second;
|
||||||
u32 bucket = i->first;
|
|
||||||
u64a contents = i->second;
|
|
||||||
tab[bucket].bitfield = contents;
|
tab[bucket].bitfield = contents;
|
||||||
filledBuckets.set(bucket);
|
filledBuckets.set(bucket);
|
||||||
}
|
}
|
||||||
@ -243,12 +230,9 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
|||||||
// store out all our chains based on free values in our hash table.
|
// store out all our chains based on free values in our hash table.
|
||||||
// find nearest free locations that are empty (there will always be more
|
// find nearest free locations that are empty (there will always be more
|
||||||
// entries than strings, at present)
|
// entries than strings, at present)
|
||||||
for (map<u32, deque<pair<u32, u32> > >::iterator
|
for (auto &m : bucketToLitOffPairs) {
|
||||||
i = bucketToLitOffPairs.begin(),
|
u32 bucket = m.first;
|
||||||
e = bucketToLitOffPairs.end();
|
deque<pair<u32, u32>> &d = m.second;
|
||||||
i != e; ++i) {
|
|
||||||
u32 bucket = i->first;
|
|
||||||
deque<pair<u32, u32> > &d = i->second;
|
|
||||||
|
|
||||||
// sort d by distance of the residual string (len minus our depth into
|
// sort d by distance of the residual string (len minus our depth into
|
||||||
// the string). We need to put the 'furthest back' string first...
|
// the string). We need to put the 'furthest back' string first...
|
||||||
@ -299,31 +283,30 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
|||||||
static
|
static
|
||||||
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
|
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
|
||||||
size_t rv = 0;
|
size_t rv = 0;
|
||||||
vector<hwlmLiteral>::const_iterator it, ite;
|
for (const auto &lit : lits) {
|
||||||
for (it = lits.begin(), ite = lits.end(); it != ite; ++it) {
|
rv = max(rv, lit.msk.size());
|
||||||
rv = max(rv, it->msk.size());
|
|
||||||
}
|
}
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
pair<u8 *, size_t>
|
pair<aligned_unique_ptr<u8>, size_t>
|
||||||
fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
||||||
hwlmStreamingControl *stream_control) {
|
hwlmStreamingControl &stream_control) {
|
||||||
// refuse to compile if we are forced to have smaller than minimum
|
// refuse to compile if we are forced to have smaller than minimum
|
||||||
// history required for long-literal support, full stop
|
// history required for long-literal support, full stop
|
||||||
// otherwise, choose the maximum of the preferred history quantity
|
// otherwise, choose the maximum of the preferred history quantity
|
||||||
// (currently a fairly extravagant 32) or the already used history
|
// (currently a fairly extravagant 32) or the already used history
|
||||||
// quantity - subject to the limitation of stream_control->history_max
|
// quantity - subject to the limitation of stream_control.history_max
|
||||||
|
|
||||||
const size_t MIN_HISTORY_REQUIRED = 32;
|
const size_t MIN_HISTORY_REQUIRED = 32;
|
||||||
|
|
||||||
if (MIN_HISTORY_REQUIRED > stream_control->history_max) {
|
if (MIN_HISTORY_REQUIRED > stream_control.history_max) {
|
||||||
throw std::logic_error("Cannot set history to minimum history required");
|
throw std::logic_error("Cannot set history to minimum history required");
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t max_len =
|
size_t max_len =
|
||||||
MIN(stream_control->history_max,
|
MIN(stream_control.history_max,
|
||||||
MAX(MIN_HISTORY_REQUIRED, stream_control->history_min));
|
MAX(MIN_HISTORY_REQUIRED, stream_control.history_min));
|
||||||
assert(max_len >= MIN_HISTORY_REQUIRED);
|
assert(max_len >= MIN_HISTORY_REQUIRED);
|
||||||
size_t max_mask_len = maxMaskLen(lits);
|
size_t max_mask_len = maxMaskLen(lits);
|
||||||
|
|
||||||
@ -334,10 +317,10 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
|||||||
|
|
||||||
// we want enough history to manage the longest literal and the longest
|
// we want enough history to manage the longest literal and the longest
|
||||||
// mask.
|
// mask.
|
||||||
stream_control->literal_history_required =
|
stream_control.literal_history_required =
|
||||||
max(maxLen(lits), max_mask_len) - 1;
|
max(maxLen(lits), max_mask_len) - 1;
|
||||||
stream_control->literal_stream_state_required = 0;
|
stream_control.literal_stream_state_required = 0;
|
||||||
return make_pair(nullptr, size_t{0});
|
return {nullptr, size_t{0}};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure that we have enough room for the longest mask.
|
// Ensure that we have enough room for the longest mask.
|
||||||
@ -381,11 +364,11 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
|||||||
streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2));
|
streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2));
|
||||||
u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8;
|
u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8;
|
||||||
|
|
||||||
u8 * secondaryTable = (u8 *)aligned_zmalloc(tabSize);
|
auto secondaryTable = aligned_zmalloc_unique<u8>(tabSize);
|
||||||
assert(secondaryTable); // otherwise would have thrown std::bad_alloc
|
assert(secondaryTable); // otherwise would have thrown std::bad_alloc
|
||||||
|
|
||||||
// then fill it in
|
// then fill it in
|
||||||
u8 * ptr = secondaryTable;
|
u8 * ptr = secondaryTable.get();
|
||||||
FDRSTableHeader * header = (FDRSTableHeader *)ptr;
|
FDRSTableHeader * header = (FDRSTableHeader *)ptr;
|
||||||
// fill in header
|
// fill in header
|
||||||
header->pseudoEngineID = (u32)0xffffffff;
|
header->pseudoEngineID = (u32)0xffffffff;
|
||||||
@ -407,11 +390,9 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
|||||||
ptr += litTabSize;
|
ptr += litTabSize;
|
||||||
|
|
||||||
map<u32, u32> litToOffsetVal;
|
map<u32, u32> litToOffsetVal;
|
||||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
|
||||||
e = long_lits.end();
|
|
||||||
i != e; ++i) {
|
|
||||||
u32 entry = verify_u32(i - long_lits.begin());
|
u32 entry = verify_u32(i - long_lits.begin());
|
||||||
u32 offset = verify_u32(ptr - secondaryTable);
|
u32 offset = verify_u32(ptr - secondaryTable.get());
|
||||||
|
|
||||||
// point the table entry to the string location
|
// point the table entry to the string location
|
||||||
litTabPtr[entry].offset = offset;
|
litTabPtr[entry].offset = offset;
|
||||||
@ -425,20 +406,20 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// fill in final lit table entry with current ptr (serves as end value)
|
// fill in final lit table entry with current ptr (serves as end value)
|
||||||
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable);
|
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable.get());
|
||||||
|
|
||||||
// fill hash tables
|
// fill hash tables
|
||||||
ptr = secondaryTable + htOffset[CASEFUL];
|
ptr = secondaryTable.get() + htOffset[CASEFUL];
|
||||||
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
|
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
|
||||||
fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m],
|
fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m],
|
||||||
(MODES)m, litToOffsetVal);
|
(Modes)m, litToOffsetVal);
|
||||||
ptr += htSize[m];
|
ptr += htSize[m];
|
||||||
}
|
}
|
||||||
|
|
||||||
// tell the world what we did
|
// tell the world what we did
|
||||||
stream_control->literal_history_required = max_len;
|
stream_control.literal_history_required = max_len;
|
||||||
stream_control->literal_stream_state_required = tot_state_bytes;
|
stream_control.literal_stream_state_required = tot_state_bytes;
|
||||||
return make_pair(secondaryTable, tabSize);
|
return {move(secondaryTable), tabSize};
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -41,11 +41,11 @@
|
|||||||
// hash table (caseful) (FDRSHashEntry)
|
// hash table (caseful) (FDRSHashEntry)
|
||||||
// hash table (caseless) (FDRSHashEntry)
|
// hash table (caseless) (FDRSHashEntry)
|
||||||
|
|
||||||
typedef enum {
|
enum Modes {
|
||||||
CASEFUL = 0,
|
CASEFUL = 0,
|
||||||
CASELESS = 1,
|
CASELESS = 1,
|
||||||
MAX_MODES = 2
|
MAX_MODES = 2
|
||||||
} MODES;
|
};
|
||||||
|
|
||||||
// We have one of these structures hanging off the 'link' of our secondary
|
// We have one of these structures hanging off the 'link' of our secondary
|
||||||
// FDR table that handles streaming strings
|
// FDR table that handles streaming strings
|
||||||
@ -91,12 +91,12 @@ struct FDRSHashEntry {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 get_start_lit_idx(const struct FDRSTableHeader * h, MODES m) {
|
u32 get_start_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
|
||||||
return m == CASEFUL ? 0 : h->boundary[m-1];
|
return m == CASEFUL ? 0 : h->boundary[m-1];
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 get_end_lit_idx(const struct FDRSTableHeader * h, MODES m) {
|
u32 get_end_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
|
||||||
return h->boundary[m];
|
return h->boundary[m];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,17 +107,17 @@ const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, MODES m) {
|
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, enum Modes m) {
|
||||||
return getLitTab(h)[get_start_lit_idx(h, m)].offset;
|
return getLitTab(h)[get_start_lit_idx(h, m)].offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 packStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
|
u32 packStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
|
||||||
return v - getBaseOffsetOfLits(h, m) + 1;
|
return v - getBaseOffsetOfLits(h, m) + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 unpackStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
|
u32 unpackStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
|
||||||
return v + getBaseOffsetOfLits(h, m) - 1;
|
return v + getBaseOffsetOfLits(h, m) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -127,7 +127,7 @@ u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, MODES mode) {
|
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, enum Modes mode) {
|
||||||
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
|
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
|
||||||
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
|
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
|
||||||
assert(len >= 32);
|
assert(len >= 32);
|
||||||
|
@ -143,7 +143,7 @@ u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) {
|
|||||||
// binary search for the literal index that contains the current state
|
// binary search for the literal index that contains the current state
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable,
|
u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable,
|
||||||
u32 stateValue, MODES m) {
|
u32 stateValue, enum Modes m) {
|
||||||
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
||||||
u32 lo = get_start_lit_idx(streamingTable, m);
|
u32 lo = get_start_lit_idx(streamingTable, m);
|
||||||
u32 hi = get_end_lit_idx(streamingTable, m);
|
u32 hi = get_end_lit_idx(streamingTable, m);
|
||||||
@ -175,7 +175,7 @@ void fdrUnpackStateMode(struct FDR_Runtime_Args *a,
|
|||||||
const struct FDRSTableHeader *streamingTable,
|
const struct FDRSTableHeader *streamingTable,
|
||||||
const struct FDRSLiteral * litTab,
|
const struct FDRSLiteral * litTab,
|
||||||
const u32 *state_table,
|
const u32 *state_table,
|
||||||
const MODES m) {
|
const enum Modes m) {
|
||||||
if (!state_table[m]) {
|
if (!state_table[m]) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -213,8 +213,9 @@ void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 do_single_confirm(const struct FDRSTableHeader * streamingTable,
|
u32 do_single_confirm(const struct FDRSTableHeader *streamingTable,
|
||||||
const struct FDR_Runtime_Args * a, u32 hashState, MODES m) {
|
const struct FDR_Runtime_Args *a, u32 hashState,
|
||||||
|
enum Modes m) {
|
||||||
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
||||||
u32 idx = findLitTabEntry(streamingTable, hashState, m);
|
u32 idx = findLitTabEntry(streamingTable, hashState, m);
|
||||||
size_t found_offset = litTab[idx].offset;
|
size_t found_offset = litTab[idx].offset;
|
||||||
@ -279,7 +280,7 @@ void fdrFindStreamingHash(const struct FDR_Runtime_Args *a,
|
|||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
|
const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
|
||||||
u32 h, const MODES m) {
|
u32 h, const enum Modes m) {
|
||||||
u32 nbits = streamingTable->hashNBits[m];
|
u32 nbits = streamingTable->hashNBits[m];
|
||||||
if (!nbits) {
|
if (!nbits) {
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -303,7 +304,7 @@ const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
|
|||||||
static really_inline
|
static really_inline
|
||||||
void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a,
|
void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a,
|
||||||
const struct FDRSTableHeader *streamingTable,
|
const struct FDRSTableHeader *streamingTable,
|
||||||
const struct FDRSHashEntry *ent, const MODES m) {
|
const struct FDRSHashEntry *ent, const enum Modes m) {
|
||||||
assert(ent);
|
assert(ent);
|
||||||
assert(streamingTable->hashNBits[m]);
|
assert(streamingTable->hashNBits[m]);
|
||||||
|
|
||||||
|
@ -69,7 +69,7 @@ static
|
|||||||
void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) {
|
void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) {
|
||||||
FDRFlood &fl = tmpFlood[c];
|
FDRFlood &fl = tmpFlood[c];
|
||||||
fl.suffix = MAX(fl.suffix, suffix + 1);
|
fl.suffix = MAX(fl.suffix, suffix + 1);
|
||||||
DEBUG_PRINTF("Updated Flood Suffix for char '%c' to %u\n", c, fl.suffix);
|
DEBUG_PRINTF("Updated Flood Suffix for char 0x%02x to %u\n", c, fl.suffix);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -90,7 +90,8 @@ void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
pair<aligned_unique_ptr<u8>, size_t>
|
||||||
|
setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
||||||
const EngineDescription &eng) {
|
const EngineDescription &eng) {
|
||||||
vector<FDRFlood> tmpFlood(N_CHARS);
|
vector<FDRFlood> tmpFlood(N_CHARS);
|
||||||
u32 default_suffix = eng.getDefaultFloodSuffixLength();
|
u32 default_suffix = eng.getDefaultFloodSuffixLength();
|
||||||
@ -124,7 +125,8 @@ pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
|||||||
for (u32 i = 0; i < iEnd; i++) {
|
for (u32 i = 0; i < iEnd; i++) {
|
||||||
if (i < litSize) {
|
if (i < litSize) {
|
||||||
if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) {
|
if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) {
|
||||||
DEBUG_PRINTF("non-flood char in literal[%u] %c != %c\n",
|
DEBUG_PRINTF("non-flood char in literal[%u]: "
|
||||||
|
"0x%02x != 0x%02x\n",
|
||||||
i, c, lit.s[litSize - i - 1]);
|
i, c, lit.s[litSize - i - 1]);
|
||||||
upSuffix = MIN(upSuffix, i);
|
upSuffix = MIN(upSuffix, i);
|
||||||
loSuffix = MIN(loSuffix, i); // makes sense only for case-less
|
loSuffix = MIN(loSuffix, i); // makes sense only for case-less
|
||||||
@ -195,11 +197,12 @@ pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
|||||||
size_t floodHeaderSize = sizeof(u32) * N_CHARS;
|
size_t floodHeaderSize = sizeof(u32) * N_CHARS;
|
||||||
size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
|
size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
|
||||||
size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
|
size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
|
||||||
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
|
|
||||||
|
auto buf = aligned_zmalloc_unique<u8>(totalSize);
|
||||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||||
|
|
||||||
u32 *floodHeader = (u32 *)buf;
|
u32 *floodHeader = (u32 *)buf.get();
|
||||||
FDRFlood *layoutFlood = (FDRFlood * )(buf + floodHeaderSize);
|
FDRFlood *layoutFlood = (FDRFlood *)(buf.get() + floodHeaderSize);
|
||||||
|
|
||||||
u32 currentFloodIndex = 0;
|
u32 currentFloodIndex = 0;
|
||||||
for (const auto &m : flood2chars) {
|
for (const auto &m : flood2chars) {
|
||||||
@ -215,7 +218,7 @@ pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
|||||||
DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
|
DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
|
||||||
floodHeaderSize, floodStructSize, totalSize);
|
floodHeaderSize, floodStructSize, totalSize);
|
||||||
|
|
||||||
return make_pair((u8 *)buf, totalSize);
|
return {move(buf), totalSize};
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
209
src/fdr/teddy.c
209
src/fdr/teddy.c
@ -36,7 +36,6 @@
|
|||||||
#include "teddy_internal.h"
|
#include "teddy_internal.h"
|
||||||
#include "teddy_runtime_common.h"
|
#include "teddy_runtime_common.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
#include "util/simd_utils_ssse3.h"
|
|
||||||
|
|
||||||
const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
@ -80,15 +79,15 @@ const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
|||||||
do { \
|
do { \
|
||||||
if (unlikely(isnonzero128(var))) { \
|
if (unlikely(isnonzero128(var))) { \
|
||||||
u64a lo = movq(var); \
|
u64a lo = movq(var); \
|
||||||
u64a hi = movq(byteShiftRight128(var, 8)); \
|
u64a hi = movq(rshiftbyte_m128(var, 8)); \
|
||||||
if (unlikely(lo)) { \
|
if (unlikely(lo)) { \
|
||||||
conf_fn(&lo, bucket, offset, confBase, reason, a, ptr, \
|
conf_fn(&lo, bucket, offset, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
if (unlikely(hi)) { \
|
if (unlikely(hi)) { \
|
||||||
conf_fn(&hi, bucket, offset + 8, confBase, reason, a, ptr, \
|
conf_fn(&hi, bucket, offset + 8, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
@ -98,27 +97,27 @@ do { \
|
|||||||
do { \
|
do { \
|
||||||
if (unlikely(isnonzero128(var))) { \
|
if (unlikely(isnonzero128(var))) { \
|
||||||
u32 part1 = movd(var); \
|
u32 part1 = movd(var); \
|
||||||
u32 part2 = movd(byteShiftRight128(var, 4)); \
|
u32 part2 = movd(rshiftbyte_m128(var, 4)); \
|
||||||
u32 part3 = movd(byteShiftRight128(var, 8)); \
|
u32 part3 = movd(rshiftbyte_m128(var, 8)); \
|
||||||
u32 part4 = movd(byteShiftRight128(var, 12)); \
|
u32 part4 = movd(rshiftbyte_m128(var, 12)); \
|
||||||
if (unlikely(part1)) { \
|
if (unlikely(part1)) { \
|
||||||
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
|
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
if (unlikely(part2)) { \
|
if (unlikely(part2)) { \
|
||||||
conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \
|
conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
if (unlikely(part3)) { \
|
if (unlikely(part3)) { \
|
||||||
conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \
|
conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
if (unlikely(part4)) { \
|
if (unlikely(part4)) { \
|
||||||
conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \
|
conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
@ -126,36 +125,34 @@ do { \
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m128 prep_conf_teddy_m1(const m128 *maskBase, m128 p_mask, m128 val) {
|
m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) {
|
||||||
m128 mask = set16x8(0xf);
|
m128 mask = set16x8(0xf);
|
||||||
m128 lo = and128(val, mask);
|
m128 lo = and128(val, mask);
|
||||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||||
return and128(and128(pshufb(maskBase[0*2], lo),
|
return and128(pshufb(maskBase[0*2], lo), pshufb(maskBase[0*2+1], hi));
|
||||||
pshufb(maskBase[0*2+1], hi)), p_mask);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 p_mask,
|
m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) {
|
||||||
m128 val) {
|
|
||||||
m128 mask = set16x8(0xf);
|
m128 mask = set16x8(0xf);
|
||||||
m128 lo = and128(val, mask);
|
m128 lo = and128(val, mask);
|
||||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||||
m128 r = prep_conf_teddy_m1(maskBase, p_mask, val);
|
m128 r = prep_conf_teddy_m1(maskBase, val);
|
||||||
|
|
||||||
m128 res_1 = and128(pshufb(maskBase[1*2], lo),
|
m128 res_1 = and128(pshufb(maskBase[1*2], lo),
|
||||||
pshufb(maskBase[1*2+1], hi));
|
pshufb(maskBase[1*2+1], hi));
|
||||||
m128 res_shifted_1 = palignr(res_1, *old_1, 16-1);
|
m128 res_shifted_1 = palignr(res_1, *old_1, 16-1);
|
||||||
*old_1 = res_1;
|
*old_1 = res_1;
|
||||||
return and128(and128(r, p_mask), res_shifted_1);
|
return and128(r, res_shifted_1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
||||||
m128 p_mask, m128 val) {
|
m128 val) {
|
||||||
m128 mask = set16x8(0xf);
|
m128 mask = set16x8(0xf);
|
||||||
m128 lo = and128(val, mask);
|
m128 lo = and128(val, mask);
|
||||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||||
m128 r = prep_conf_teddy_m2(maskBase, old_1, p_mask, val);
|
m128 r = prep_conf_teddy_m2(maskBase, old_1, val);
|
||||||
|
|
||||||
m128 res_2 = and128(pshufb(maskBase[2*2], lo),
|
m128 res_2 = and128(pshufb(maskBase[2*2], lo),
|
||||||
pshufb(maskBase[2*2+1], hi));
|
pshufb(maskBase[2*2+1], hi));
|
||||||
@ -166,11 +163,11 @@ m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
|||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
||||||
m128 *old_3, m128 p_mask, m128 val) {
|
m128 *old_3, m128 val) {
|
||||||
m128 mask = set16x8(0xf);
|
m128 mask = set16x8(0xf);
|
||||||
m128 lo = and128(val, mask);
|
m128 lo = and128(val, mask);
|
||||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||||
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, p_mask, val);
|
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val);
|
||||||
|
|
||||||
m128 res_3 = and128(pshufb(maskBase[3*2], lo),
|
m128 res_3 = and128(pshufb(maskBase[3*2], lo),
|
||||||
pshufb(maskBase[3*2+1], hi));
|
pshufb(maskBase[3*2+1], hi));
|
||||||
@ -180,11 +177,10 @@ m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
|||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -203,13 +199,14 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
|||||||
m128 p_mask;
|
m128 p_mask;
|
||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 1);
|
a->buf_history, a->len_history, 1);
|
||||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -217,9 +214,9 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
|||||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||||
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16));
|
m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16));
|
||||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -227,19 +224,19 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
|||||||
m128 p_mask;
|
m128 p_mask;
|
||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 1);
|
a->buf_history, a->len_history, 1);
|
||||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -258,13 +255,14 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
|||||||
m128 p_mask;
|
m128 p_mask;
|
||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 1);
|
a->buf_history, a->len_history, 1);
|
||||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -272,9 +270,9 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
|||||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16));
|
m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16));
|
||||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -282,19 +280,19 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
|||||||
m128 p_mask;
|
m128 p_mask;
|
||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 1);
|
a->buf_history, a->len_history, 1);
|
||||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -314,14 +312,14 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
|||||||
m128 p_mask;
|
m128 p_mask;
|
||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 2);
|
a->buf_history, a->len_history, 2);
|
||||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
|
||||||
load128(ptr));
|
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -329,11 +327,9 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
|||||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
|
||||||
load128(ptr));
|
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16));
|
||||||
load128(ptr + 16));
|
|
||||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -341,19 +337,19 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
|||||||
m128 p_mask;
|
m128 p_mask;
|
||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 2);
|
a->buf_history, a->len_history, 2);
|
||||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -373,14 +369,14 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
|||||||
m128 p_mask;
|
m128 p_mask;
|
||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 2);
|
a->buf_history, a->len_history, 2);
|
||||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
|
||||||
load128(ptr));
|
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -388,11 +384,9 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
|||||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
|
||||||
load128(ptr));
|
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16));
|
||||||
load128(ptr + 16));
|
|
||||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -400,19 +394,19 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
|||||||
m128 p_mask;
|
m128 p_mask;
|
||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 2);
|
a->buf_history, a->len_history, 2);
|
||||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -434,14 +428,15 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
|||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 3);
|
a->buf_history, a->len_history, 3);
|
||||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
p_mask, val_0);
|
val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
ones128(), load128(ptr));
|
load128(ptr));
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -450,10 +445,10 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
|||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
ones128(), load128(ptr));
|
load128(ptr));
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||||
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
ones128(), load128(ptr + 16));
|
load128(ptr + 16));
|
||||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -461,20 +456,19 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
|||||||
m128 p_mask;
|
m128 p_mask;
|
||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 3);
|
a->buf_history, a->len_history, 3);
|
||||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0);
|
||||||
p_mask, val_0);
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -496,14 +490,15 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
|||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 3);
|
a->buf_history, a->len_history, 3);
|
||||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
p_mask, val_0);
|
val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
ones128(), load128(ptr));
|
load128(ptr));
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -512,10 +507,10 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
|||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
ones128(), load128(ptr));
|
load128(ptr));
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
ones128(), load128(ptr + 16));
|
load128(ptr + 16));
|
||||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -523,20 +518,19 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
|||||||
m128 p_mask;
|
m128 p_mask;
|
||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 3);
|
a->buf_history, a->len_history, 3);
|
||||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0);
|
||||||
p_mask, val_0);
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -559,14 +553,15 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
|||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 4);
|
a->buf_history, a->len_history, 4);
|
||||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, p_mask, val_0);
|
&res_old_3, val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, ones128(), load128(ptr));
|
&res_old_3, load128(ptr));
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -575,10 +570,10 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
|||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, ones128(), load128(ptr));
|
&res_old_3, load128(ptr));
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||||
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, ones128(), load128(ptr + 16));
|
&res_old_3, load128(ptr + 16));
|
||||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -587,19 +582,19 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
|||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 4);
|
a->buf_history, a->len_history, 4);
|
||||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, p_mask, val_0);
|
&res_old_3, val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -622,14 +617,15 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
|||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 4);
|
a->buf_history, a->len_history, 4);
|
||||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, p_mask, val_0);
|
&res_old_3, val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, ones128(), load128(ptr));
|
&res_old_3, load128(ptr));
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -638,10 +634,10 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
|||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, ones128(), load128(ptr));
|
&res_old_3, load128(ptr));
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, ones128(), load128(ptr + 16));
|
&res_old_3, load128(ptr + 16));
|
||||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -650,9 +646,10 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
|||||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 4);
|
a->buf_history, a->len_history, 4);
|
||||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, p_mask, val_0);
|
&res_old_3, val_0);
|
||||||
|
r_0 = and128(r_0, p_mask);
|
||||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -33,64 +33,85 @@
|
|||||||
#ifndef TEDDY_H_
|
#ifndef TEDDY_H_
|
||||||
#define TEDDY_H_
|
#define TEDDY_H_
|
||||||
|
|
||||||
|
#include "hwlm/hwlm.h" // for hwlm_group_t
|
||||||
|
|
||||||
struct FDR; // forward declaration from fdr_internal.h
|
struct FDR; // forward declaration from fdr_internal.h
|
||||||
struct FDR_Runtime_Args;
|
struct FDR_Runtime_Args;
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a);
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
hwlm_error_t
|
||||||
const struct FDR_Runtime_Args *a);
|
fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
||||||
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control);
|
||||||
|
|
||||||
#endif /* __AVX2__ */
|
#endif /* __AVX2__ */
|
||||||
|
|
||||||
|
@ -36,7 +36,6 @@
|
|||||||
#include "teddy_internal.h"
|
#include "teddy_internal.h"
|
||||||
#include "teddy_runtime_common.h"
|
#include "teddy_runtime_common.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
#include "util/simd_utils_ssse3.h"
|
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
@ -122,22 +121,22 @@ do { \
|
|||||||
u64a part4 = extract64from256(r, 1); \
|
u64a part4 = extract64from256(r, 1); \
|
||||||
if (unlikely(part1)) { \
|
if (unlikely(part1)) { \
|
||||||
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
|
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
if (unlikely(part2)) { \
|
if (unlikely(part2)) { \
|
||||||
conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \
|
conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
if (unlikely(part3)) { \
|
if (unlikely(part3)) { \
|
||||||
conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \
|
conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
if (unlikely(part4)) { \
|
if (unlikely(part4)) { \
|
||||||
conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \
|
conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
@ -159,41 +158,41 @@ do { \
|
|||||||
u32 part8 = extract32from256(r, 3); \
|
u32 part8 = extract32from256(r, 3); \
|
||||||
if (unlikely(part1)) { \
|
if (unlikely(part1)) { \
|
||||||
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
|
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
if (unlikely(part2)) { \
|
if (unlikely(part2)) { \
|
||||||
conf_fn(&part2, bucket, offset + 2, confBase, reason, a, ptr, \
|
conf_fn(&part2, bucket, offset + 2, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
} \
|
} \
|
||||||
if (unlikely(part3)) { \
|
if (unlikely(part3)) { \
|
||||||
conf_fn(&part3, bucket, offset + 4, confBase, reason, a, ptr, \
|
conf_fn(&part3, bucket, offset + 4, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
if (unlikely(part4)) { \
|
if (unlikely(part4)) { \
|
||||||
conf_fn(&part4, bucket, offset + 6, confBase, reason, a, ptr, \
|
conf_fn(&part4, bucket, offset + 6, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
if (unlikely(part5)) { \
|
if (unlikely(part5)) { \
|
||||||
conf_fn(&part5, bucket, offset + 8, confBase, reason, a, ptr, \
|
conf_fn(&part5, bucket, offset + 8, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
if (unlikely(part6)) { \
|
if (unlikely(part6)) { \
|
||||||
conf_fn(&part6, bucket, offset + 10, confBase, reason, a, ptr, \
|
conf_fn(&part6, bucket, offset + 10, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
if (unlikely(part7)) { \
|
if (unlikely(part7)) { \
|
||||||
conf_fn(&part7, bucket, offset + 12, confBase, reason, a, ptr, \
|
conf_fn(&part7, bucket, offset + 12, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
if (unlikely(part8)) { \
|
if (unlikely(part8)) { \
|
||||||
conf_fn(&part8, bucket, offset + 14, confBase, reason, a, ptr, \
|
conf_fn(&part8, bucket, offset + 14, confBase, reason, a, ptr, \
|
||||||
control, &last_match); \
|
&control, &last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
@ -205,11 +204,11 @@ do { \
|
|||||||
if (unlikely(isnonzero256(var))) { \
|
if (unlikely(isnonzero256(var))) { \
|
||||||
u32 arrCnt = 0; \
|
u32 arrCnt = 0; \
|
||||||
m128 lo = cast256to128(var); \
|
m128 lo = cast256to128(var); \
|
||||||
m128 hi = cast256to128(swap128in256(var)); \
|
m128 hi = movdq_hi(var); \
|
||||||
bit_array_fast_teddy(lo, bitArr, &arrCnt, offset); \
|
bit_array_fast_teddy(lo, bitArr, &arrCnt, offset); \
|
||||||
bit_array_fast_teddy(hi, bitArr, &arrCnt, offset + 2); \
|
bit_array_fast_teddy(hi, bitArr, &arrCnt, offset + 2); \
|
||||||
for (u32 i = 0; i < arrCnt; i++) { \
|
for (u32 i = 0; i < arrCnt; i++) { \
|
||||||
conf_fn(bitArr[i], confBase, reason, a, ptr, control, \
|
conf_fn(bitArr[i], confBase, reason, a, ptr, &control, \
|
||||||
&last_match); \
|
&last_match); \
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
@ -372,7 +371,7 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
|
|||||||
64 * (offset);
|
64 * (offset);
|
||||||
*arrCnt += 1;
|
*arrCnt += 1;
|
||||||
}
|
}
|
||||||
u64a part_1 = movq(byteShiftRight128(var, 8));
|
u64a part_1 = movq(rshiftbyte_m128(var, 8));
|
||||||
while (unlikely(part_1)) {
|
while (unlikely(part_1)) {
|
||||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
|
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
|
||||||
64 * (offset + 1);
|
64 * (offset + 1);
|
||||||
@ -385,19 +384,19 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
|
|||||||
32 * (offset * 2);
|
32 * (offset * 2);
|
||||||
*arrCnt += 1;
|
*arrCnt += 1;
|
||||||
}
|
}
|
||||||
u32 part_1 = movd(byteShiftRight128(var, 4));
|
u32 part_1 = movd(rshiftbyte_m128(var, 4));
|
||||||
while (unlikely(part_1)) {
|
while (unlikely(part_1)) {
|
||||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
|
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
|
||||||
32 * (offset * 2 + 1);
|
32 * (offset * 2 + 1);
|
||||||
*arrCnt += 1;
|
*arrCnt += 1;
|
||||||
}
|
}
|
||||||
u32 part_2 = movd(byteShiftRight128(var, 8));
|
u32 part_2 = movd(rshiftbyte_m128(var, 8));
|
||||||
while (unlikely(part_2)) {
|
while (unlikely(part_2)) {
|
||||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_2) +
|
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_2) +
|
||||||
32 * (offset * 2 + 2);
|
32 * (offset * 2 + 2);
|
||||||
*arrCnt += 1;
|
*arrCnt += 1;
|
||||||
}
|
}
|
||||||
u32 part_3 = movd(byteShiftRight128(var, 12));
|
u32 part_3 = movd(rshiftbyte_m128(var, 12));
|
||||||
while (unlikely(part_3)) {
|
while (unlikely(part_3)) {
|
||||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_3) +
|
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_3) +
|
||||||
32 * (offset * 2 + 3);
|
32 * (offset * 2 + 3);
|
||||||
@ -408,36 +407,35 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 p_mask, m256 val) {
|
m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 val) {
|
||||||
m256 mask = set32x8(0xf);
|
m256 mask = set32x8(0xf);
|
||||||
m256 lo = and256(val, mask);
|
m256 lo = and256(val, mask);
|
||||||
m256 hi = and256(rshift4x64(val, 4), mask);
|
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||||
return and256(and256(vpshufb(maskBase[0*2], lo),
|
return and256(vpshufb(maskBase[0*2], lo),
|
||||||
vpshufb(maskBase[0*2+1], hi)), p_mask);
|
vpshufb(maskBase[0*2+1], hi));
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 p_mask,
|
m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 val) {
|
||||||
m256 val) {
|
|
||||||
m256 mask = set32x8(0xf);
|
m256 mask = set32x8(0xf);
|
||||||
m256 lo = and256(val, mask);
|
m256 lo = and256(val, mask);
|
||||||
m256 hi = and256(rshift4x64(val, 4), mask);
|
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||||
m256 r = prep_conf_fat_teddy_m1(maskBase, p_mask, val);
|
m256 r = prep_conf_fat_teddy_m1(maskBase, val);
|
||||||
|
|
||||||
m256 res_1 = and256(vpshufb(maskBase[1*2], lo),
|
m256 res_1 = and256(vpshufb(maskBase[1*2], lo),
|
||||||
vpshufb(maskBase[1*2+1], hi));
|
vpshufb(maskBase[1*2+1], hi));
|
||||||
m256 res_shifted_1 = vpalignr(res_1, *old_1, 16-1);
|
m256 res_shifted_1 = vpalignr(res_1, *old_1, 16-1);
|
||||||
*old_1 = res_1;
|
*old_1 = res_1;
|
||||||
return and256(and256(r, p_mask), res_shifted_1);
|
return and256(r, res_shifted_1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2,
|
m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2,
|
||||||
m256 p_mask, m256 val) {
|
m256 val) {
|
||||||
m256 mask = set32x8(0xf);
|
m256 mask = set32x8(0xf);
|
||||||
m256 lo = and256(val, mask);
|
m256 lo = and256(val, mask);
|
||||||
m256 hi = and256(rshift4x64(val, 4), mask);
|
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||||
m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, p_mask, val);
|
m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, val);
|
||||||
|
|
||||||
m256 res_2 = and256(vpshufb(maskBase[2*2], lo),
|
m256 res_2 = and256(vpshufb(maskBase[2*2], lo),
|
||||||
vpshufb(maskBase[2*2+1], hi));
|
vpshufb(maskBase[2*2+1], hi));
|
||||||
@ -448,11 +446,11 @@ m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2,
|
|||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2,
|
m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2,
|
||||||
m256 *old_3, m256 p_mask, m256 val) {
|
m256 *old_3, m256 val) {
|
||||||
m256 mask = set32x8(0xf);
|
m256 mask = set32x8(0xf);
|
||||||
m256 lo = and256(val, mask);
|
m256 lo = and256(val, mask);
|
||||||
m256 hi = and256(rshift4x64(val, 4), mask);
|
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||||
m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, p_mask, val);
|
m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, val);
|
||||||
|
|
||||||
m256 res_3 = and256(vpshufb(maskBase[3*2], lo),
|
m256 res_3 = and256(vpshufb(maskBase[3*2], lo),
|
||||||
vpshufb(maskBase[3*2+1], hi));
|
vpshufb(maskBase[3*2+1], hi));
|
||||||
@ -462,12 +460,10 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi,
|
m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi) {
|
||||||
m256 p_mask) {
|
|
||||||
m256 lo = and256(val, mask);
|
m256 lo = and256(val, mask);
|
||||||
m256 hi = and256(rshift4x64(val, 4), mask);
|
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||||
m256 res = and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi));
|
return and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi));
|
||||||
return and256(res, p_mask);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
@ -482,11 +478,10 @@ const u32 * getConfBase_avx2(const struct Teddy *teddy, u8 numMask) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -505,13 +500,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
|||||||
m256 p_mask;
|
m256 p_mask;
|
||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 1);
|
a->buf_history, a->len_history, 1);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0);
|
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr));
|
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -519,10 +515,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
|||||||
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr));
|
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||||
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, ones256(),
|
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr + 16));
|
||||||
load2x128(ptr + 16));
|
|
||||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -530,19 +525,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
|||||||
m256 p_mask;
|
m256 p_mask;
|
||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 1);
|
a->buf_history, a->len_history, 1);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0);
|
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -561,13 +556,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
|||||||
m256 p_mask;
|
m256 p_mask;
|
||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 1);
|
a->buf_history, a->len_history, 1);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0);
|
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr));
|
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -575,10 +571,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
|||||||
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr));
|
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, ones256(),
|
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr + 16));
|
||||||
load2x128(ptr + 16));
|
|
||||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -586,19 +581,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
|||||||
m256 p_mask;
|
m256 p_mask;
|
||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 1);
|
a->buf_history, a->len_history, 1);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0);
|
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -618,14 +613,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
|||||||
m256 p_mask;
|
m256 p_mask;
|
||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 2);
|
a->buf_history, a->len_history, 2);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
|
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
|
||||||
load2x128(ptr));
|
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -633,10 +628,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
|||||||
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
|
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
|
||||||
load2x128(ptr));
|
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||||
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
|
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1,
|
||||||
load2x128(ptr + 16));
|
load2x128(ptr + 16));
|
||||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||||
}
|
}
|
||||||
@ -645,19 +639,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
|||||||
m256 p_mask;
|
m256 p_mask;
|
||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 2);
|
a->buf_history, a->len_history, 2);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -677,14 +671,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
|
|||||||
m256 p_mask;
|
m256 p_mask;
|
||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 2);
|
a->buf_history, a->len_history, 2);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
|
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
|
||||||
load2x128(ptr));
|
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -692,10 +686,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
|
|||||||
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
|
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
|
||||||
load2x128(ptr));
|
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
|
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1,
|
||||||
load2x128(ptr + 16));
|
load2x128(ptr + 16));
|
||||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
@ -704,19 +697,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
|
|||||||
m256 p_mask;
|
m256 p_mask;
|
||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 2);
|
a->buf_history, a->len_history, 2);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -738,14 +731,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
|||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 3);
|
a->buf_history, a->len_history, 3);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
p_mask, val_0);
|
val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
ones256(), load2x128(ptr));
|
load2x128(ptr));
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -754,10 +748,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
|||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
ones256(), load2x128(ptr));
|
load2x128(ptr));
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||||
m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
ones256(), load2x128(ptr + 16));
|
load2x128(ptr + 16));
|
||||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -766,19 +760,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
|||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 3);
|
a->buf_history, a->len_history, 3);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
p_mask, val_0);
|
val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -800,14 +794,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
|||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 3);
|
a->buf_history, a->len_history, 3);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
p_mask, val_0);
|
val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
ones256(), load2x128(ptr));
|
load2x128(ptr));
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -816,10 +811,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
|||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
ones256(), load2x128(ptr));
|
load2x128(ptr));
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
ones256(), load2x128(ptr + 16));
|
load2x128(ptr + 16));
|
||||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -828,19 +823,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
|||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 3);
|
a->buf_history, a->len_history, 3);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||||
p_mask, val_0);
|
val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -863,15 +858,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
|||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 4);
|
a->buf_history, a->len_history, 4);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, p_mask, val_0);
|
&res_old_3, val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, ones256(),
|
&res_old_3, load2x128(ptr));
|
||||||
load2x128(ptr));
|
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -880,12 +875,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
|||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, ones256(),
|
&res_old_3, load2x128(ptr));
|
||||||
load2x128(ptr));
|
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||||
m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, ones256(),
|
&res_old_3, load2x128(ptr + 16));
|
||||||
load2x128(ptr + 16));
|
|
||||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -894,19 +887,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
|||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 4);
|
a->buf_history, a->len_history, 4);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, p_mask, val_0);
|
&res_old_3, val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -929,15 +922,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
|||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 4);
|
a->buf_history, a->len_history, 4);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, p_mask, val_0);
|
&res_old_3, val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 16 < buf_end) {
|
if (ptr + 16 < buf_end) {
|
||||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, ones256(),
|
&res_old_3, load2x128(ptr));
|
||||||
load2x128(ptr));
|
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
@ -946,12 +939,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
|||||||
__builtin_prefetch(ptr + (iterBytes*4));
|
__builtin_prefetch(ptr + (iterBytes*4));
|
||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, ones256(),
|
&res_old_3, load2x128(ptr));
|
||||||
load2x128(ptr));
|
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, ones256(),
|
&res_old_3, load2x128(ptr + 16));
|
||||||
load2x128(ptr + 16));
|
|
||||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -960,19 +951,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
|||||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||||
a->buf_history, a->len_history, 4);
|
a->buf_history, a->len_history, 4);
|
||||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||||
&res_old_3, p_mask, val_0);
|
&res_old_3, val_0);
|
||||||
|
r_0 = and256(r_0, p_mask);
|
||||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -996,16 +987,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
|||||||
m256 p_mask;
|
m256 p_mask;
|
||||||
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
||||||
buf_end, a->buf_history, a->len_history);
|
buf_end, a->buf_history, a->len_history);
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||||
p_mask);
|
res_0 = and256(res_0, p_mask);
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
|
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
|
||||||
ptr += 32;
|
ptr += 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 32 < buf_end) {
|
if (ptr + 32 < buf_end) {
|
||||||
m256 val_0 = load256(ptr + 0);
|
m256 val_0 = load256(ptr + 0);
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||||
ones256());
|
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
|
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
|
||||||
ptr += 32;
|
ptr += 32;
|
||||||
}
|
}
|
||||||
@ -1015,13 +1005,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
|||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
|
|
||||||
m256 val_0 = load256(ptr + 0);
|
m256 val_0 = load256(ptr + 0);
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||||
ones256());
|
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit1_fast_teddy);
|
CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit1_fast_teddy);
|
||||||
|
|
||||||
m256 val_1 = load256(ptr + 32);
|
m256 val_1 = load256(ptr + 32);
|
||||||
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi,
|
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi);
|
||||||
ones256());
|
|
||||||
CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit1_fast_teddy);
|
CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit1_fast_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1029,20 +1017,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
|||||||
m256 p_mask;
|
m256 p_mask;
|
||||||
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
||||||
buf_end, a->buf_history, a->len_history);
|
buf_end, a->buf_history, a->len_history);
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||||
p_mask);
|
res_0 = and256(res_0, p_mask);
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
|
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
||||||
const struct FDR_Runtime_Args *a) {
|
const struct FDR_Runtime_Args *a,
|
||||||
|
hwlm_group_t control) {
|
||||||
const u8 *buf_end = a->buf + a->len;
|
const u8 *buf_end = a->buf + a->len;
|
||||||
const u8 *ptr = a->buf + a->start_offset;
|
const u8 *ptr = a->buf + a->start_offset;
|
||||||
hwlmcb_rv_t controlVal = *a->groups;
|
|
||||||
hwlmcb_rv_t *control = &controlVal;
|
|
||||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||||
u32 last_match = (u32)-1;
|
u32 last_match = (u32)-1;
|
||||||
@ -1066,16 +1053,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
|||||||
m256 p_mask;
|
m256 p_mask;
|
||||||
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
||||||
buf_end, a->buf_history, a->len_history);
|
buf_end, a->buf_history, a->len_history);
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||||
p_mask);
|
res_0 = and256(res_0, p_mask);
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
|
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
|
||||||
ptr += 32;
|
ptr += 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr + 32 < buf_end) {
|
if (ptr + 32 < buf_end) {
|
||||||
m256 val_0 = load256(ptr + 0);
|
m256 val_0 = load256(ptr + 0);
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||||
ones256());
|
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
|
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
|
||||||
ptr += 32;
|
ptr += 32;
|
||||||
}
|
}
|
||||||
@ -1085,13 +1071,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
|||||||
CHECK_FLOOD;
|
CHECK_FLOOD;
|
||||||
|
|
||||||
m256 val_0 = load256(ptr + 0);
|
m256 val_0 = load256(ptr + 0);
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||||
ones256());
|
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit_fast_teddy);
|
CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit_fast_teddy);
|
||||||
|
|
||||||
m256 val_1 = load256(ptr + 32);
|
m256 val_1 = load256(ptr + 32);
|
||||||
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi,
|
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi);
|
||||||
ones256());
|
|
||||||
CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit_fast_teddy);
|
CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit_fast_teddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1099,11 +1083,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
|||||||
m256 p_mask;
|
m256 p_mask;
|
||||||
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
||||||
buf_end, a->buf_history, a->len_history);
|
buf_end, a->buf_history, a->len_history);
|
||||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||||
p_mask);
|
res_0 = and256(res_0, p_mask);
|
||||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
|
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
|
||||||
}
|
}
|
||||||
*a->groups = controlVal;
|
|
||||||
return HWLM_SUCCESS;
|
return HWLM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,12 +74,11 @@ public:
|
|||||||
const TeddyEngineDescription &eng_in, bool make_small_in)
|
const TeddyEngineDescription &eng_in, bool make_small_in)
|
||||||
: eng(eng_in), lits(lits_in), make_small(make_small_in) {}
|
: eng(eng_in), lits(lits_in), make_small(make_small_in) {}
|
||||||
|
|
||||||
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
|
aligned_unique_ptr<FDR> build(pair<aligned_unique_ptr<u8>, size_t> &link);
|
||||||
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
|
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
|
||||||
};
|
};
|
||||||
|
|
||||||
class TeddySet {
|
class TeddySet {
|
||||||
const vector<hwlmLiteral> &lits;
|
|
||||||
u32 len;
|
u32 len;
|
||||||
// nibbleSets is a series of bitfields over 16 predicates
|
// nibbleSets is a series of bitfields over 16 predicates
|
||||||
// that represent the whether shufti nibble set
|
// that represent the whether shufti nibble set
|
||||||
@ -89,8 +88,7 @@ class TeddySet {
|
|||||||
vector<u16> nibbleSets;
|
vector<u16> nibbleSets;
|
||||||
set<u32> litIds;
|
set<u32> litIds;
|
||||||
public:
|
public:
|
||||||
TeddySet(const vector<hwlmLiteral> &lits_in, u32 len_in)
|
explicit TeddySet(u32 len_in) : len(len_in), nibbleSets(len_in * 2, 0) {}
|
||||||
: lits(lits_in), len(len_in), nibbleSets(len_in * 2, 0) {}
|
|
||||||
const set<u32> & getLits() const { return litIds; }
|
const set<u32> & getLits() const { return litIds; }
|
||||||
size_t litCount() const { return litIds.size(); }
|
size_t litCount() const { return litIds.size(); }
|
||||||
|
|
||||||
@ -106,8 +104,8 @@ public:
|
|||||||
}
|
}
|
||||||
printf("\nnlits: %zu\nLit ids: ", litCount());
|
printf("\nnlits: %zu\nLit ids: ", litCount());
|
||||||
printf("Prob: %llu\n", probability());
|
printf("Prob: %llu\n", probability());
|
||||||
for (set<u32>::iterator i = litIds.begin(), e = litIds.end(); i != e; ++i) {
|
for (const auto &id : litIds) {
|
||||||
printf("%u ", *i);
|
printf("%u ", id);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("Flood prone : %s\n", isRunProne()?"yes":"no");
|
printf("Flood prone : %s\n", isRunProne()?"yes":"no");
|
||||||
@ -118,15 +116,15 @@ public:
|
|||||||
return nibbleSets == ts.nibbleSets;
|
return nibbleSets == ts.nibbleSets;
|
||||||
}
|
}
|
||||||
|
|
||||||
void addLiteral(u32 lit_id) {
|
void addLiteral(u32 lit_id, const hwlmLiteral &lit) {
|
||||||
const string &s = lits[lit_id].s;
|
const string &s = lit.s;
|
||||||
for (u32 i = 0; i < len; i++) {
|
for (u32 i = 0; i < len; i++) {
|
||||||
if (i < s.size()) {
|
if (i < s.size()) {
|
||||||
u8 c = s[s.size() - i - 1];
|
u8 c = s[s.size() - i - 1];
|
||||||
u8 c_hi = (c >> 4) & 0xf;
|
u8 c_hi = (c >> 4) & 0xf;
|
||||||
u8 c_lo = c & 0xf;
|
u8 c_lo = c & 0xf;
|
||||||
nibbleSets[i*2] = 1 << c_lo;
|
nibbleSets[i*2] = 1 << c_lo;
|
||||||
if (lits[lit_id].nocase && ourisalpha(c)) {
|
if (lit.nocase && ourisalpha(c)) {
|
||||||
nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2));
|
nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2));
|
||||||
} else {
|
} else {
|
||||||
nibbleSets[i*2+1] = 1 << c_hi;
|
nibbleSets[i*2+1] = 1 << c_hi;
|
||||||
@ -185,28 +183,26 @@ bool TeddyCompiler::pack(map<BucketIndex,
|
|||||||
set<TeddySet> sts;
|
set<TeddySet> sts;
|
||||||
|
|
||||||
for (u32 i = 0; i < lits.size(); i++) {
|
for (u32 i = 0; i < lits.size(); i++) {
|
||||||
TeddySet ts(lits, eng.numMasks);
|
TeddySet ts(eng.numMasks);
|
||||||
ts.addLiteral(i);
|
ts.addLiteral(i, lits[i]);
|
||||||
sts.insert(ts);
|
sts.insert(ts);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
#ifdef TEDDY_DEBUG
|
#ifdef TEDDY_DEBUG
|
||||||
printf("Size %zu\n", sts.size());
|
printf("Size %zu\n", sts.size());
|
||||||
for (set<TeddySet>::const_iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
|
for (const TeddySet &ts : sts) {
|
||||||
printf("\n"); i1->dump();
|
printf("\n"); ts.dump();
|
||||||
}
|
}
|
||||||
printf("\n===============================================\n");
|
printf("\n===============================================\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
set<TeddySet>::iterator m1 = sts.end(), m2 = sts.end();
|
auto m1 = sts.end(), m2 = sts.end();
|
||||||
u64a best = 0xffffffffffffffffULL;
|
u64a best = 0xffffffffffffffffULL;
|
||||||
|
|
||||||
for (set<TeddySet>::iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
|
for (auto i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
|
||||||
set<TeddySet>::iterator i2 = i1;
|
|
||||||
++i2;
|
|
||||||
const TeddySet &s1 = *i1;
|
const TeddySet &s1 = *i1;
|
||||||
for (set<TeddySet>::iterator e2 = sts.end(); i2 != e2; ++i2) {
|
for (auto i2 = next(i1), e2 = sts.end(); i2 != e2; ++i2) {
|
||||||
const TeddySet &s2 = *i2;
|
const TeddySet &s2 = *i2;
|
||||||
|
|
||||||
// be more conservative if we don't absolutely need to
|
// be more conservative if we don't absolutely need to
|
||||||
@ -216,7 +212,7 @@ bool TeddyCompiler::pack(map<BucketIndex,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
TeddySet tmpSet(lits, eng.numMasks);
|
TeddySet tmpSet(eng.numMasks);
|
||||||
tmpSet.merge(s1);
|
tmpSet.merge(s1);
|
||||||
tmpSet.merge(s2);
|
tmpSet.merge(s2);
|
||||||
u64a newScore = tmpSet.heuristic();
|
u64a newScore = tmpSet.heuristic();
|
||||||
@ -246,7 +242,7 @@ bool TeddyCompiler::pack(map<BucketIndex,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// do the merge
|
// do the merge
|
||||||
TeddySet nts(lits, eng.numMasks);
|
TeddySet nts(eng.numMasks);
|
||||||
nts.merge(*m1);
|
nts.merge(*m1);
|
||||||
nts.merge(*m2);
|
nts.merge(*m2);
|
||||||
#ifdef TEDDY_DEBUG
|
#ifdef TEDDY_DEBUG
|
||||||
@ -263,25 +259,23 @@ bool TeddyCompiler::pack(map<BucketIndex,
|
|||||||
sts.erase(m2);
|
sts.erase(m2);
|
||||||
sts.insert(nts);
|
sts.insert(nts);
|
||||||
}
|
}
|
||||||
u32 cnt = 0;
|
|
||||||
|
|
||||||
if (sts.size() > eng.getNumBuckets()) {
|
if (sts.size() > eng.getNumBuckets()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (set<TeddySet>::const_iterator i = sts.begin(), e = sts.end(); i != e;
|
u32 bucket_id = 0;
|
||||||
++i) {
|
for (const TeddySet &ts : sts) {
|
||||||
for (set<u32>::const_iterator i2 = i->getLits().begin(),
|
const auto &ts_lits = ts.getLits();
|
||||||
e2 = i->getLits().end();
|
auto &bucket_lits = bucketToLits[bucket_id];
|
||||||
i2 != e2; ++i2) {
|
bucket_lits.insert(end(bucket_lits), begin(ts_lits), end(ts_lits));
|
||||||
bucketToLits[cnt].push_back(*i2);
|
bucket_id++;
|
||||||
}
|
|
||||||
cnt++;
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
|
aligned_unique_ptr<FDR>
|
||||||
|
TeddyCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
||||||
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
|
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
|
||||||
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
|
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -314,9 +308,8 @@ aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
|
|||||||
|
|
||||||
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
|
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
|
||||||
|
|
||||||
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
|
auto floodControlTmp = setupFDRFloodControl(lits, eng);
|
||||||
pair<u8 *, size_t> confirmTmp
|
auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
||||||
= setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
|
||||||
|
|
||||||
size_t size = ROUNDUP_N(sizeof(Teddy) +
|
size_t size = ROUNDUP_N(sizeof(Teddy) +
|
||||||
maskLen +
|
maskLen +
|
||||||
@ -334,38 +327,29 @@ aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
|
|||||||
teddy->maxStringLen = verify_u32(maxLen(lits));
|
teddy->maxStringLen = verify_u32(maxLen(lits));
|
||||||
|
|
||||||
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
|
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
|
||||||
memcpy(ptr, confirmTmp.first, confirmTmp.second);
|
memcpy(ptr, confirmTmp.first.get(), confirmTmp.second);
|
||||||
ptr += confirmTmp.second;
|
ptr += confirmTmp.second;
|
||||||
aligned_free(confirmTmp.first);
|
|
||||||
|
|
||||||
teddy->floodOffset = verify_u32(ptr - teddy_base);
|
teddy->floodOffset = verify_u32(ptr - teddy_base);
|
||||||
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
|
memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second);
|
||||||
ptr += floodControlTmp.second;
|
ptr += floodControlTmp.second;
|
||||||
aligned_free(floodControlTmp.first);
|
|
||||||
|
|
||||||
if (link.first) {
|
if (link.first) {
|
||||||
teddy->link = verify_u32(ptr - teddy_base);
|
teddy->link = verify_u32(ptr - teddy_base);
|
||||||
memcpy(ptr, link.first, link.second);
|
memcpy(ptr, link.first.get(), link.second);
|
||||||
aligned_free(link.first);
|
|
||||||
} else {
|
} else {
|
||||||
teddy->link = 0;
|
teddy->link = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 *baseMsk = teddy_base + sizeof(Teddy);
|
u8 *baseMsk = teddy_base + sizeof(Teddy);
|
||||||
|
|
||||||
for (map<BucketIndex, std::vector<LiteralIndex> >::const_iterator
|
for (const auto &b2l : bucketToLits) {
|
||||||
i = bucketToLits.begin(),
|
const u32 &bucket_id = b2l.first;
|
||||||
e = bucketToLits.end();
|
const vector<LiteralIndex> &ids = b2l.second;
|
||||||
i != e; ++i) {
|
|
||||||
const u32 bucket_id = i->first;
|
|
||||||
const vector<LiteralIndex> &ids = i->second;
|
|
||||||
const u8 bmsk = 1U << (bucket_id % 8);
|
const u8 bmsk = 1U << (bucket_id % 8);
|
||||||
|
|
||||||
for (vector<LiteralIndex>::const_iterator i2 = ids.begin(),
|
for (const LiteralIndex &lit_id : ids) {
|
||||||
e2 = ids.end();
|
const hwlmLiteral &l = lits[lit_id];
|
||||||
i2 != e2; ++i2) {
|
|
||||||
LiteralIndex lit_id = *i2;
|
|
||||||
const hwlmLiteral & l = lits[lit_id];
|
|
||||||
DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
|
DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
|
||||||
const u32 sz = verify_u32(l.s.size());
|
const u32 sz = verify_u32(l.s.size());
|
||||||
|
|
||||||
@ -439,10 +423,10 @@ aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
aligned_unique_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
|
aligned_unique_ptr<FDR>
|
||||||
bool make_small, u32 hint,
|
teddyBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small,
|
||||||
const target_t &target,
|
u32 hint, const target_t &target,
|
||||||
pair<u8 *, size_t> link) {
|
pair<aligned_unique_ptr<u8>, size_t> &link) {
|
||||||
unique_ptr<TeddyEngineDescription> des;
|
unique_ptr<TeddyEngineDescription> des;
|
||||||
if (hint == HINT_INVALID) {
|
if (hint == HINT_INVALID) {
|
||||||
des = chooseTeddyEngine(target, lits);
|
des = chooseTeddyEngine(target, lits);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -49,7 +49,7 @@ struct hwlmLiteral;
|
|||||||
ue2::aligned_unique_ptr<FDR>
|
ue2::aligned_unique_ptr<FDR>
|
||||||
teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
|
teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
|
||||||
u32 hint, const target_t &target,
|
u32 hint, const target_t &target,
|
||||||
std::pair<u8 *, size_t> link);
|
std::pair<aligned_unique_ptr<u8>, size_t> &link);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
|
@ -51,8 +51,7 @@ extern const u8 ALIGN_DIRECTIVE p_mask_arr[17][32];
|
|||||||
|
|
||||||
#define CHECK_HWLM_TERMINATE_MATCHING \
|
#define CHECK_HWLM_TERMINATE_MATCHING \
|
||||||
do { \
|
do { \
|
||||||
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \
|
if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \
|
||||||
*a->groups = controlVal; \
|
|
||||||
return HWLM_TERMINATED; \
|
return HWLM_TERMINATED; \
|
||||||
} \
|
} \
|
||||||
} while (0);
|
} while (0);
|
||||||
@ -61,8 +60,7 @@ do { \
|
|||||||
do { \
|
do { \
|
||||||
if (unlikely(ptr > tryFloodDetect)) { \
|
if (unlikely(ptr > tryFloodDetect)) { \
|
||||||
tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, \
|
tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, \
|
||||||
&floodBackoff, &controlVal, \
|
&floodBackoff, &control, iterBytes); \
|
||||||
iterBytes); \
|
|
||||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||||
} \
|
} \
|
||||||
} while (0);
|
} while (0);
|
||||||
|
42
src/grey.cpp
42
src/grey.cpp
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -34,7 +34,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#define DEFAULT_MAX_HISTORY 60
|
#define DEFAULT_MAX_HISTORY 110
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@ -50,8 +50,11 @@ Grey::Grey(void) :
|
|||||||
allowLitHaig(true),
|
allowLitHaig(true),
|
||||||
allowLbr(true),
|
allowLbr(true),
|
||||||
allowMcClellan(true),
|
allowMcClellan(true),
|
||||||
|
allowSheng(true),
|
||||||
allowPuff(true),
|
allowPuff(true),
|
||||||
|
allowLiteral(true),
|
||||||
allowRose(true),
|
allowRose(true),
|
||||||
|
allowViolet(true),
|
||||||
allowExtendedNFA(true), /* bounded repeats of course */
|
allowExtendedNFA(true), /* bounded repeats of course */
|
||||||
allowLimExNFA(true),
|
allowLimExNFA(true),
|
||||||
allowAnchoredAcyclic(true),
|
allowAnchoredAcyclic(true),
|
||||||
@ -60,6 +63,13 @@ Grey::Grey(void) :
|
|||||||
allowDecoratedLiteral(true),
|
allowDecoratedLiteral(true),
|
||||||
allowNoodle(true),
|
allowNoodle(true),
|
||||||
fdrAllowTeddy(true),
|
fdrAllowTeddy(true),
|
||||||
|
violetAvoidSuffixes(true),
|
||||||
|
violetAvoidWeakInfixes(true),
|
||||||
|
violetDoubleCut(true),
|
||||||
|
violetExtractStrongLiterals(true),
|
||||||
|
violetLiteralChains(true),
|
||||||
|
violetDoubleCutLiteralLen(3),
|
||||||
|
violetEarlyCleanLiteralLen(6),
|
||||||
puffImproveHead(true),
|
puffImproveHead(true),
|
||||||
castleExclusive(true),
|
castleExclusive(true),
|
||||||
mergeSEP(true), /* short exhaustible passthroughs */
|
mergeSEP(true), /* short exhaustible passthroughs */
|
||||||
@ -81,7 +91,6 @@ Grey::Grey(void) :
|
|||||||
allowZombies(true),
|
allowZombies(true),
|
||||||
floodAsPuffette(false),
|
floodAsPuffette(false),
|
||||||
nfaForceSize(0),
|
nfaForceSize(0),
|
||||||
nfaForceShifts(0),
|
|
||||||
maxHistoryAvailable(DEFAULT_MAX_HISTORY),
|
maxHistoryAvailable(DEFAULT_MAX_HISTORY),
|
||||||
minHistoryAvailable(0), /* debugging only */
|
minHistoryAvailable(0), /* debugging only */
|
||||||
maxAnchoredRegion(63), /* for rose's atable to run over */
|
maxAnchoredRegion(63), /* for rose's atable to run over */
|
||||||
@ -119,6 +128,7 @@ Grey::Grey(void) :
|
|||||||
equivalenceEnable(true),
|
equivalenceEnable(true),
|
||||||
|
|
||||||
allowSmallWrite(true), // McClellan dfas for small patterns
|
allowSmallWrite(true), // McClellan dfas for small patterns
|
||||||
|
allowSmallWriteSheng(false), // allow use of Sheng for SMWR
|
||||||
|
|
||||||
smallWriteLargestBuffer(70), // largest buffer that can be
|
smallWriteLargestBuffer(70), // largest buffer that can be
|
||||||
// considered a small write
|
// considered a small write
|
||||||
@ -126,6 +136,10 @@ Grey::Grey(void) :
|
|||||||
// are given to rose &co
|
// are given to rose &co
|
||||||
smallWriteLargestBufferBad(35),
|
smallWriteLargestBufferBad(35),
|
||||||
limitSmallWriteOutfixSize(1048576), // 1 MB
|
limitSmallWriteOutfixSize(1048576), // 1 MB
|
||||||
|
smallWriteMaxPatterns(10000),
|
||||||
|
smallWriteMaxLiterals(10000),
|
||||||
|
allowTamarama(true), // Tamarama engine
|
||||||
|
tamaChunkSize(100),
|
||||||
dumpFlags(0),
|
dumpFlags(0),
|
||||||
limitPatternCount(8000000), // 8M patterns
|
limitPatternCount(8000000), // 8M patterns
|
||||||
limitPatternLength(16000), // 16K bytes
|
limitPatternLength(16000), // 16K bytes
|
||||||
@ -202,8 +216,11 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
G_UPDATE(allowLitHaig);
|
G_UPDATE(allowLitHaig);
|
||||||
G_UPDATE(allowLbr);
|
G_UPDATE(allowLbr);
|
||||||
G_UPDATE(allowMcClellan);
|
G_UPDATE(allowMcClellan);
|
||||||
|
G_UPDATE(allowSheng);
|
||||||
G_UPDATE(allowPuff);
|
G_UPDATE(allowPuff);
|
||||||
|
G_UPDATE(allowLiteral);
|
||||||
G_UPDATE(allowRose);
|
G_UPDATE(allowRose);
|
||||||
|
G_UPDATE(allowViolet);
|
||||||
G_UPDATE(allowExtendedNFA);
|
G_UPDATE(allowExtendedNFA);
|
||||||
G_UPDATE(allowLimExNFA);
|
G_UPDATE(allowLimExNFA);
|
||||||
G_UPDATE(allowAnchoredAcyclic);
|
G_UPDATE(allowAnchoredAcyclic);
|
||||||
@ -212,6 +229,13 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
G_UPDATE(allowDecoratedLiteral);
|
G_UPDATE(allowDecoratedLiteral);
|
||||||
G_UPDATE(allowNoodle);
|
G_UPDATE(allowNoodle);
|
||||||
G_UPDATE(fdrAllowTeddy);
|
G_UPDATE(fdrAllowTeddy);
|
||||||
|
G_UPDATE(violetAvoidSuffixes);
|
||||||
|
G_UPDATE(violetAvoidWeakInfixes);
|
||||||
|
G_UPDATE(violetDoubleCut);
|
||||||
|
G_UPDATE(violetExtractStrongLiterals);
|
||||||
|
G_UPDATE(violetLiteralChains);
|
||||||
|
G_UPDATE(violetDoubleCutLiteralLen);
|
||||||
|
G_UPDATE(violetEarlyCleanLiteralLen);
|
||||||
G_UPDATE(puffImproveHead);
|
G_UPDATE(puffImproveHead);
|
||||||
G_UPDATE(castleExclusive);
|
G_UPDATE(castleExclusive);
|
||||||
G_UPDATE(mergeSEP);
|
G_UPDATE(mergeSEP);
|
||||||
@ -232,7 +256,6 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
G_UPDATE(allowZombies);
|
G_UPDATE(allowZombies);
|
||||||
G_UPDATE(floodAsPuffette);
|
G_UPDATE(floodAsPuffette);
|
||||||
G_UPDATE(nfaForceSize);
|
G_UPDATE(nfaForceSize);
|
||||||
G_UPDATE(nfaForceShifts);
|
|
||||||
G_UPDATE(highlanderSquash);
|
G_UPDATE(highlanderSquash);
|
||||||
G_UPDATE(maxHistoryAvailable);
|
G_UPDATE(maxHistoryAvailable);
|
||||||
G_UPDATE(minHistoryAvailable);
|
G_UPDATE(minHistoryAvailable);
|
||||||
@ -270,9 +293,14 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
G_UPDATE(miracleHistoryBonus);
|
G_UPDATE(miracleHistoryBonus);
|
||||||
G_UPDATE(equivalenceEnable);
|
G_UPDATE(equivalenceEnable);
|
||||||
G_UPDATE(allowSmallWrite);
|
G_UPDATE(allowSmallWrite);
|
||||||
|
G_UPDATE(allowSmallWriteSheng);
|
||||||
G_UPDATE(smallWriteLargestBuffer);
|
G_UPDATE(smallWriteLargestBuffer);
|
||||||
G_UPDATE(smallWriteLargestBufferBad);
|
G_UPDATE(smallWriteLargestBufferBad);
|
||||||
G_UPDATE(limitSmallWriteOutfixSize);
|
G_UPDATE(limitSmallWriteOutfixSize);
|
||||||
|
G_UPDATE(smallWriteMaxPatterns);
|
||||||
|
G_UPDATE(smallWriteMaxLiterals);
|
||||||
|
G_UPDATE(allowTamarama);
|
||||||
|
G_UPDATE(tamaChunkSize);
|
||||||
G_UPDATE(limitPatternCount);
|
G_UPDATE(limitPatternCount);
|
||||||
G_UPDATE(limitPatternLength);
|
G_UPDATE(limitPatternLength);
|
||||||
G_UPDATE(limitGraphVertices);
|
G_UPDATE(limitGraphVertices);
|
||||||
@ -309,7 +337,9 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
g->allowLitHaig = false;
|
g->allowLitHaig = false;
|
||||||
g->allowMcClellan = false;
|
g->allowMcClellan = false;
|
||||||
g->allowPuff = false;
|
g->allowPuff = false;
|
||||||
|
g->allowLiteral = false;
|
||||||
g->allowRose = false;
|
g->allowRose = false;
|
||||||
|
g->allowViolet = false;
|
||||||
g->allowSmallLiteralSet = false;
|
g->allowSmallLiteralSet = false;
|
||||||
g->roseMasks = false;
|
g->roseMasks = false;
|
||||||
done = true;
|
done = true;
|
||||||
@ -325,7 +355,9 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
g->allowLitHaig = false;
|
g->allowLitHaig = false;
|
||||||
g->allowMcClellan = true;
|
g->allowMcClellan = true;
|
||||||
g->allowPuff = false;
|
g->allowPuff = false;
|
||||||
|
g->allowLiteral = false;
|
||||||
g->allowRose = false;
|
g->allowRose = false;
|
||||||
|
g->allowViolet = false;
|
||||||
g->allowSmallLiteralSet = false;
|
g->allowSmallLiteralSet = false;
|
||||||
g->roseMasks = false;
|
g->roseMasks = false;
|
||||||
done = true;
|
done = true;
|
||||||
@ -341,7 +373,9 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
g->allowLitHaig = false;
|
g->allowLitHaig = false;
|
||||||
g->allowMcClellan = true;
|
g->allowMcClellan = true;
|
||||||
g->allowPuff = false;
|
g->allowPuff = false;
|
||||||
|
g->allowLiteral = false;
|
||||||
g->allowRose = false;
|
g->allowRose = false;
|
||||||
|
g->allowViolet = false;
|
||||||
g->allowSmallLiteralSet = false;
|
g->allowSmallLiteralSet = false;
|
||||||
g->roseMasks = false;
|
g->roseMasks = false;
|
||||||
done = true;
|
done = true;
|
||||||
|
21
src/grey.h
21
src/grey.h
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -50,8 +50,11 @@ struct Grey {
|
|||||||
bool allowLitHaig;
|
bool allowLitHaig;
|
||||||
bool allowLbr;
|
bool allowLbr;
|
||||||
bool allowMcClellan;
|
bool allowMcClellan;
|
||||||
|
bool allowSheng;
|
||||||
bool allowPuff;
|
bool allowPuff;
|
||||||
|
bool allowLiteral;
|
||||||
bool allowRose;
|
bool allowRose;
|
||||||
|
bool allowViolet;
|
||||||
bool allowExtendedNFA;
|
bool allowExtendedNFA;
|
||||||
bool allowLimExNFA;
|
bool allowLimExNFA;
|
||||||
bool allowAnchoredAcyclic;
|
bool allowAnchoredAcyclic;
|
||||||
@ -62,6 +65,14 @@ struct Grey {
|
|||||||
bool allowNoodle;
|
bool allowNoodle;
|
||||||
bool fdrAllowTeddy;
|
bool fdrAllowTeddy;
|
||||||
|
|
||||||
|
u32 violetAvoidSuffixes; /* 0=never, 1=sometimes, 2=always */
|
||||||
|
bool violetAvoidWeakInfixes;
|
||||||
|
bool violetDoubleCut;
|
||||||
|
bool violetExtractStrongLiterals;
|
||||||
|
bool violetLiteralChains;
|
||||||
|
u32 violetDoubleCutLiteralLen;
|
||||||
|
u32 violetEarlyCleanLiteralLen;
|
||||||
|
|
||||||
bool puffImproveHead;
|
bool puffImproveHead;
|
||||||
bool castleExclusive; // enable castle mutual exclusion analysis
|
bool castleExclusive; // enable castle mutual exclusion analysis
|
||||||
|
|
||||||
@ -88,7 +99,6 @@ struct Grey {
|
|||||||
bool floodAsPuffette;
|
bool floodAsPuffette;
|
||||||
|
|
||||||
u32 nfaForceSize;
|
u32 nfaForceSize;
|
||||||
u32 nfaForceShifts;
|
|
||||||
|
|
||||||
u32 maxHistoryAvailable;
|
u32 maxHistoryAvailable;
|
||||||
u32 minHistoryAvailable;
|
u32 minHistoryAvailable;
|
||||||
@ -140,9 +150,16 @@ struct Grey {
|
|||||||
|
|
||||||
// SmallWrite engine
|
// SmallWrite engine
|
||||||
bool allowSmallWrite;
|
bool allowSmallWrite;
|
||||||
|
bool allowSmallWriteSheng;
|
||||||
u32 smallWriteLargestBuffer; // largest buffer that can be small write
|
u32 smallWriteLargestBuffer; // largest buffer that can be small write
|
||||||
u32 smallWriteLargestBufferBad;// largest buffer that can be small write
|
u32 smallWriteLargestBufferBad;// largest buffer that can be small write
|
||||||
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
|
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
|
||||||
|
u32 smallWriteMaxPatterns; // only try small writes if fewer patterns
|
||||||
|
u32 smallWriteMaxLiterals; // only try small writes if fewer literals
|
||||||
|
|
||||||
|
// Tamarama engine
|
||||||
|
bool allowTamarama;
|
||||||
|
u32 tamaChunkSize; //!< max chunk size for exclusivity analysis in Tamarama
|
||||||
|
|
||||||
enum DumpFlags {
|
enum DumpFlags {
|
||||||
DUMP_NONE = 0,
|
DUMP_NONE = 0,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -219,7 +219,7 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
|
|||||||
: get_current_target();
|
: get_current_target();
|
||||||
|
|
||||||
CompileContext cc(isStreaming, isVectored, target_info, g);
|
CompileContext cc(isStreaming, isVectored, target_info, g);
|
||||||
NG ng(cc, somPrecision);
|
NG ng(cc, elements, somPrecision);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
for (unsigned int i = 0; i < elements; i++) {
|
for (unsigned int i = 0; i < elements; i++) {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -98,6 +98,12 @@ extern "C"
|
|||||||
* The library was unable to allocate temporary storage used during
|
* The library was unable to allocate temporary storage used during
|
||||||
* compilation time.
|
* compilation time.
|
||||||
*
|
*
|
||||||
|
* - *Allocator returned misaligned memory*
|
||||||
|
*
|
||||||
|
* The memory allocator (either malloc() or the allocator set with @ref
|
||||||
|
* hs_set_allocator()) did not correctly return memory suitably aligned
|
||||||
|
* for the largest representable data type on this platform.
|
||||||
|
*
|
||||||
* - *Internal error*
|
* - *Internal error*
|
||||||
*
|
*
|
||||||
* An unexpected error occurred: if this error is reported, please contact
|
* An unexpected error occurred: if this error is reported, please contact
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -37,6 +37,7 @@
|
|||||||
#include "fdr/fdr.h"
|
#include "fdr/fdr.h"
|
||||||
#include "nfa/accel.h"
|
#include "nfa/accel.h"
|
||||||
#include "nfa/shufti.h"
|
#include "nfa/shufti.h"
|
||||||
|
#include "nfa/truffle.h"
|
||||||
#include "nfa/vermicelli.h"
|
#include "nfa/vermicelli.h"
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
@ -64,8 +65,13 @@ const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr,
|
|||||||
case ACCEL_SHUFTI:
|
case ACCEL_SHUFTI:
|
||||||
DEBUG_PRINTF("single shufti\n");
|
DEBUG_PRINTF("single shufti\n");
|
||||||
return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
|
return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
|
||||||
|
case ACCEL_TRUFFLE:
|
||||||
|
DEBUG_PRINTF("truffle\n");
|
||||||
|
return truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end);
|
||||||
default:
|
default:
|
||||||
/* no acceleration, fall through and return current ptr */
|
/* no acceleration, fall through and return current ptr */
|
||||||
|
DEBUG_PRINTF("no accel; %u\n", (int)aux->accel_type);
|
||||||
|
assert(aux->accel_type == ACCEL_NONE);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -35,9 +35,11 @@
|
|||||||
#include "hwlm_internal.h"
|
#include "hwlm_internal.h"
|
||||||
#include "noodle_engine.h"
|
#include "noodle_engine.h"
|
||||||
#include "noodle_build.h"
|
#include "noodle_build.h"
|
||||||
|
#include "scratch.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "fdr/fdr_compile.h"
|
#include "fdr/fdr_compile.h"
|
||||||
#include "nfa/shufticompile.h"
|
#include "nfa/shufticompile.h"
|
||||||
|
#include "nfa/trufflecompile.h"
|
||||||
#include "util/alloc.h"
|
#include "util/alloc.h"
|
||||||
#include "util/bitutils.h"
|
#include "util/bitutils.h"
|
||||||
#include "util/charreach.h"
|
#include "util/charreach.h"
|
||||||
@ -62,6 +64,28 @@ namespace ue2 {
|
|||||||
static const unsigned int MAX_ACCEL_OFFSET = 16;
|
static const unsigned int MAX_ACCEL_OFFSET = 16;
|
||||||
static const unsigned int MAX_SHUFTI_WIDTH = 240;
|
static const unsigned int MAX_SHUFTI_WIDTH = 240;
|
||||||
|
|
||||||
|
static
|
||||||
|
size_t mask_overhang(const hwlmLiteral &lit) {
|
||||||
|
size_t msk_true_size = lit.msk.size();
|
||||||
|
assert(msk_true_size <= HWLM_MASKLEN);
|
||||||
|
assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET);
|
||||||
|
for (u8 c : lit.msk) {
|
||||||
|
if (!c) {
|
||||||
|
msk_true_size--;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lit.s.length() >= msk_true_size) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* only short literals should be able to have a mask which overhangs */
|
||||||
|
assert(lit.s.length() < MAX_ACCEL_OFFSET);
|
||||||
|
return msk_true_size - lit.s.length();
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
||||||
const hwlmLiteral &first = *lits.front();
|
const hwlmLiteral &first = *lits.front();
|
||||||
@ -167,7 +191,8 @@ bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (found) {
|
if (found) {
|
||||||
curr.max_offset = MAX(curr.max_offset, j);
|
assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
|
||||||
|
ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -288,8 +313,8 @@ bool findSVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (found) {
|
if (found) {
|
||||||
curr.max_offset = MAX(curr.max_offset, j);
|
assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
|
||||||
break;
|
ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -346,6 +371,25 @@ void filterLits(const vector<hwlmLiteral> &lits, hwlm_group_t expected_groups,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool litGuardedByCharReach(const CharReach &cr, const hwlmLiteral &lit,
|
||||||
|
u32 max_offset) {
|
||||||
|
for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) {
|
||||||
|
unsigned char c = lit.s[i];
|
||||||
|
if (lit.nocase) {
|
||||||
|
if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (cr.test(c)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
||||||
hwlm_group_t expected_groups, AccelAux *aux) {
|
hwlm_group_t expected_groups, AccelAux *aux) {
|
||||||
@ -363,29 +407,45 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* look for shufti/truffle */
|
||||||
|
|
||||||
vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
|
vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
|
||||||
for (const auto &lit : lits) {
|
for (const auto &lit : lits) {
|
||||||
if (!(lit.groups & expected_groups)) {
|
if (!(lit.groups & expected_groups)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u32 i = 0; i < MAX_ACCEL_OFFSET && i < lit.s.length(); i++) {
|
u32 overhang = mask_overhang(lit);
|
||||||
unsigned char c = lit.s[i];
|
for (u32 i = 0; i < overhang; i++) {
|
||||||
|
/* this offset overhangs the start of the real literal; look at the
|
||||||
|
* msk/cmp */
|
||||||
|
for (u32 j = 0; j < N_CHARS; j++) {
|
||||||
|
if ((j & lit.msk[i]) == lit.cmp[i]) {
|
||||||
|
reach[i].set(j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) {
|
||||||
|
CharReach &reach_i = reach[i];
|
||||||
|
u32 i_effective = i - overhang;
|
||||||
|
|
||||||
|
if (litGuardedByCharReach(reach_i, lit, i_effective)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective]
|
||||||
|
: lit.s.back();
|
||||||
if (lit.nocase) {
|
if (lit.nocase) {
|
||||||
DEBUG_PRINTF("adding %02hhx to %u\n", mytoupper(c), i);
|
reach_i.set(mytoupper(c));
|
||||||
DEBUG_PRINTF("adding %02hhx to %u\n", mytolower(c), i);
|
reach_i.set(mytolower(c));
|
||||||
reach[i].set(mytoupper(c));
|
|
||||||
reach[i].set(mytolower(c));
|
|
||||||
} else {
|
} else {
|
||||||
DEBUG_PRINTF("adding %02hhx to %u\n", c, i);
|
reach_i.set(c);
|
||||||
reach[i].set(c);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 min_count = ~0U;
|
u32 min_count = ~0U;
|
||||||
u32 min_offset = ~0U;
|
u32 min_offset = ~0U;
|
||||||
for (u32 i = 0; i < min_len; i++) {
|
for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) {
|
||||||
size_t count = reach[i].count();
|
size_t count = reach[i].count();
|
||||||
DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
|
DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
|
||||||
describeClass(reach[i]).c_str(), count);
|
describeClass(reach[i]).c_str(), count);
|
||||||
@ -394,10 +454,9 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
|||||||
min_offset = i;
|
min_offset = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert(min_offset <= min_len);
|
|
||||||
|
|
||||||
if (min_count > MAX_SHUFTI_WIDTH) {
|
if (min_count > MAX_SHUFTI_WIDTH) {
|
||||||
DEBUG_PRINTF("min shufti with %u chars is too wide\n", min_count);
|
DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -410,7 +469,11 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("fail\n");
|
truffleBuildMasks(cr, &aux->truffle.mask1, &aux->truffle.mask2);
|
||||||
|
DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n",
|
||||||
|
describeClass(cr).c_str(), cr.count(), min_offset);
|
||||||
|
aux->truffle.accel_type = ACCEL_TRUFFLE;
|
||||||
|
aux->truffle.offset = verify_u8(min_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -466,6 +529,10 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
|
|||||||
stream_control->history_max);
|
stream_control->history_max);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (2 * lits.front().s.length() - 2 > FDR_TEMP_BUF_SIZE) {
|
||||||
|
assert(0);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!lits.front().msk.empty()) {
|
if (!lits.front().msk.empty()) {
|
||||||
|
@ -37,7 +37,6 @@
|
|||||||
#include "util/compare.h"
|
#include "util/compare.h"
|
||||||
#include "util/masked_move.h"
|
#include "util/masked_move.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
#include "util/simd_utils_ssse3.h"
|
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
@ -115,7 +115,8 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
|
|||||||
v = and128(v, caseMask);
|
v = and128(v, caseMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v)));
|
u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
|
||||||
|
eq128(mask2, v)));
|
||||||
|
|
||||||
// mask out where we can't match
|
// mask out where we can't match
|
||||||
u32 mask = (0xFFFF >> (16 - l));
|
u32 mask = (0xFFFF >> (16 - l));
|
||||||
@ -142,7 +143,8 @@ hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
|
|||||||
v = and128(v, caseMask);
|
v = and128(v, caseMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v)));
|
u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
|
||||||
|
eq128(mask2, v)));
|
||||||
|
|
||||||
// mask out where we can't match
|
// mask out where we can't match
|
||||||
u32 buf_off = start - offset;
|
u32 buf_off = start - offset;
|
||||||
|
405
src/nfa/mcclellancompile_accel.cpp → src/nfa/accel_dfa_build_strat.cpp
Normal file → Executable file
405
src/nfa/mcclellancompile_accel.cpp → src/nfa/accel_dfa_build_strat.cpp
Normal file → Executable file
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2016, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -26,18 +26,20 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "mcclellancompile_accel.h"
|
#include "accel_dfa_build_strat.h"
|
||||||
|
|
||||||
#include "mcclellancompile_util.h"
|
|
||||||
|
|
||||||
|
#include "accel.h"
|
||||||
#include "grey.h"
|
#include "grey.h"
|
||||||
#include "nfagraph/ng_limex_accel.h"
|
#include "nfagraph/ng_limex_accel.h"
|
||||||
|
#include "shufticompile.h"
|
||||||
|
#include "trufflecompile.h"
|
||||||
#include "util/charreach.h"
|
#include "util/charreach.h"
|
||||||
#include "util/container.h"
|
#include "util/container.h"
|
||||||
#include "util/dump_charclass.h"
|
#include "util/dump_charclass.h"
|
||||||
|
#include "util/verify_types.h"
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#define PATHS_LIMIT 500
|
#define PATHS_LIMIT 500
|
||||||
|
|
||||||
@ -46,14 +48,13 @@ using namespace std;
|
|||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
struct path {
|
struct path {
|
||||||
vector<CharReach> reach;
|
vector<CharReach> reach;
|
||||||
dstate_id_t dest = DEAD_STATE;
|
dstate_id_t dest = DEAD_STATE;
|
||||||
explicit path(dstate_id_t base) : dest(base) {}
|
explicit path(dstate_id_t base) : dest(base) {
|
||||||
|
}
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static UNUSED
|
static UNUSED
|
||||||
string describeClasses(const vector<CharReach> &v) {
|
string describeClasses(const vector<CharReach> &v) {
|
||||||
@ -85,8 +86,8 @@ bool is_useful_path(const vector<path> &good, const path &p) {
|
|||||||
goto next;
|
goto next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DEBUG_PRINTF("better: [%s] -> %u\n",
|
DEBUG_PRINTF("better: [%s] -> %u\n", describeClasses(g.reach).c_str(),
|
||||||
describeClasses(g.reach).c_str(), g.dest);
|
g.dest);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
next:;
|
next:;
|
||||||
@ -106,8 +107,7 @@ path append(const path &orig, const CharReach &cr, u32 new_dest) {
|
|||||||
|
|
||||||
static
|
static
|
||||||
void extend(const raw_dfa &rdfa, const path &p,
|
void extend(const raw_dfa &rdfa, const path &p,
|
||||||
map<u32, vector<path> > &all,
|
map<u32, vector<path>> &all, vector<path> &out) {
|
||||||
vector<path> &out) {
|
|
||||||
dstate s = rdfa.states[p.dest];
|
dstate s = rdfa.states[p.dest];
|
||||||
|
|
||||||
if (!p.reach.empty() && p.reach.back().none()) {
|
if (!p.reach.empty() && p.reach.back().none()) {
|
||||||
@ -154,10 +154,10 @@ void extend(const raw_dfa &rdfa, const path &p,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
vector<vector<CharReach> > generate_paths(const raw_dfa &rdfa, dstate_id_t base,
|
vector<vector<CharReach>> generate_paths(const raw_dfa &rdfa,
|
||||||
u32 len) {
|
dstate_id_t base, u32 len) {
|
||||||
vector<path> paths{ path(base) };
|
vector<path> paths{path(base)};
|
||||||
map<u32, vector<path> > all;
|
map<u32, vector<path>> all;
|
||||||
all[base].push_back(path(base));
|
all[base].push_back(path(base));
|
||||||
for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) {
|
for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) {
|
||||||
vector<path> next_gen;
|
vector<path> next_gen;
|
||||||
@ -170,7 +170,7 @@ vector<vector<CharReach> > generate_paths(const raw_dfa &rdfa, dstate_id_t base,
|
|||||||
|
|
||||||
dump_paths(paths);
|
dump_paths(paths);
|
||||||
|
|
||||||
vector<vector<CharReach> > rv;
|
vector<vector<CharReach>> rv;
|
||||||
for (auto &p : paths) {
|
for (auto &p : paths) {
|
||||||
rv.push_back(move(p.reach));
|
rv.push_back(move(p.reach));
|
||||||
}
|
}
|
||||||
@ -181,13 +181,55 @@ static
|
|||||||
AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
|
AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
|
||||||
u32 max_allowed_accel_offset) {
|
u32 max_allowed_accel_offset) {
|
||||||
DEBUG_PRINTF("looking for accel for %hu\n", base);
|
DEBUG_PRINTF("looking for accel for %hu\n", base);
|
||||||
vector<vector<CharReach> > paths = generate_paths(rdfa, base,
|
vector<vector<CharReach>> paths =
|
||||||
max_allowed_accel_offset + 1);
|
generate_paths(rdfa, base, max_allowed_accel_offset + 1);
|
||||||
AccelScheme as = findBestAccelScheme(paths, CharReach(), true);
|
AccelScheme as = findBestAccelScheme(paths, CharReach(), true);
|
||||||
DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset);
|
DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset);
|
||||||
return as;
|
return as;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static UNUSED
|
||||||
|
bool better(const AccelScheme &a, const AccelScheme &b) {
|
||||||
|
if (!a.double_byte.empty() && b.double_byte.empty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!b.double_byte.empty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return a.cr.count() < b.cr.count();
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
|
||||||
|
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
|
||||||
|
|
||||||
|
for (u32 i = 0; i < N_CHARS; i++) {
|
||||||
|
rv.at(rdfa.alpha_remap[i]).set(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool double_byte_ok(const AccelScheme &info) {
|
||||||
|
return !info.double_byte.empty() &&
|
||||||
|
info.double_cr.count() < info.double_byte.size() &&
|
||||||
|
info.double_cr.count() <= 2 && !info.double_byte.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
|
||||||
|
u16 top_remap = raw.alpha_remap[TOP];
|
||||||
|
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||||
|
if (i != top_remap && raw.states[s].next[i] == s) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
|
vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
|
||||||
const CharReach &escape) {
|
const CharReach &escape) {
|
||||||
@ -201,6 +243,55 @@ vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
|
|||||||
return vector<u16>(rv.begin(), rv.end());
|
return vector<u16>(rv.begin(), rv.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
|
||||||
|
if (raw.start_floating != DEAD_STATE) {
|
||||||
|
DEBUG_PRINTF("has floating start\n");
|
||||||
|
return raw.start_floating;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("looking for SDS proxy\n");
|
||||||
|
|
||||||
|
dstate_id_t s = raw.start_anchored;
|
||||||
|
|
||||||
|
if (has_self_loop(s, raw)) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 top_remap = raw.alpha_remap[TOP];
|
||||||
|
|
||||||
|
ue2::unordered_set<dstate_id_t> seen;
|
||||||
|
while (true) {
|
||||||
|
seen.insert(s);
|
||||||
|
DEBUG_PRINTF("basis %hu\n", s);
|
||||||
|
|
||||||
|
/* check if we are connected to a state with a self loop */
|
||||||
|
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||||
|
dstate_id_t t = raw.states[s].next[i];
|
||||||
|
if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* find a neighbour to use as a basis for looking for the sds proxy */
|
||||||
|
dstate_id_t t = DEAD_STATE;
|
||||||
|
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||||
|
dstate_id_t tt = raw.states[s].next[i];
|
||||||
|
if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
|
||||||
|
t = tt;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (t == DEAD_STATE) {
|
||||||
|
/* we were unable to find a state to use as a SDS proxy */
|
||||||
|
return DEAD_STATE;
|
||||||
|
}
|
||||||
|
|
||||||
|
s = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
|
set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
|
||||||
const AccelScheme &ei) {
|
const AccelScheme &ei) {
|
||||||
@ -236,98 +327,10 @@ set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
|
|||||||
return region;
|
return region;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
AccelScheme
|
||||||
bool better(const AccelScheme &a, const AccelScheme &b) {
|
accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const {
|
||||||
if (!a.double_byte.empty() && b.double_byte.empty()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!b.double_byte.empty()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return a.cr.count() < b.cr.count();
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
|
|
||||||
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
|
|
||||||
|
|
||||||
for (u32 i = 0; i < N_CHARS; i++) {
|
|
||||||
rv.at(rdfa.alpha_remap[i]).set(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
map<dstate_id_t, AccelScheme> populateAccelerationInfo(const raw_dfa &rdfa,
|
|
||||||
const dfa_build_strat &strat,
|
|
||||||
const Grey &grey) {
|
|
||||||
map<dstate_id_t, AccelScheme> rv;
|
|
||||||
if (!grey.accelerateDFA) {
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
|
|
||||||
DEBUG_PRINTF("sds %hu\n", sds_proxy);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < rdfa.states.size(); i++) {
|
|
||||||
if (i == DEAD_STATE) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Note on report acceleration states: While we can't accelerate while we
|
|
||||||
* are spamming out callbacks, the QR code paths don't raise reports
|
|
||||||
* during scanning so they can accelerate report states. */
|
|
||||||
if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t single_limit = i == sds_proxy ? ACCEL_DFA_MAX_FLOATING_STOP_CHAR
|
|
||||||
: ACCEL_DFA_MAX_STOP_CHAR;
|
|
||||||
DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
|
|
||||||
|
|
||||||
AccelScheme ei = strat.find_escape_strings(i);
|
|
||||||
if (ei.cr.count() > single_limit) {
|
|
||||||
DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
|
|
||||||
ei.cr.count());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("state %zu should be accelerable %zu\n",
|
|
||||||
i, ei.cr.count());
|
|
||||||
|
|
||||||
rv[i] = ei;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* provide accleration states to states in the region of sds */
|
|
||||||
if (contains(rv, sds_proxy)) {
|
|
||||||
AccelScheme sds_ei = rv[sds_proxy];
|
|
||||||
sds_ei.double_byte.clear(); /* region based on single byte scheme
|
|
||||||
* may differ from double byte */
|
|
||||||
DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
|
|
||||||
sds_ei.cr.count());
|
|
||||||
auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
|
|
||||||
for (auto s : sds_region) {
|
|
||||||
if (!contains(rv, s) || better(sds_ei, rv[s])) {
|
|
||||||
rv[s] = sds_ei;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
bool double_byte_ok(const AccelScheme &info) {
|
|
||||||
return !info.double_byte.empty()
|
|
||||||
&& info.double_cr.count() < info.double_byte.size()
|
|
||||||
&& info.double_cr.count() <= 2 && !info.double_byte.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx,
|
|
||||||
u32 max_allowed_accel_offset) {
|
|
||||||
AccelScheme rv;
|
AccelScheme rv;
|
||||||
|
const raw_dfa &rdfa = get_raw();
|
||||||
rv.cr.clear();
|
rv.cr.clear();
|
||||||
rv.offset = 0;
|
rv.offset = 0;
|
||||||
const dstate &raw = rdfa.states[this_idx];
|
const dstate &raw = rdfa.states[this_idx];
|
||||||
@ -402,14 +405,12 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx
|
|||||||
|
|
||||||
DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa));
|
DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa));
|
||||||
DEBUG_PRINTF("broken %d\n", outs2_broken);
|
DEBUG_PRINTF("broken %d\n", outs2_broken);
|
||||||
if (!double_byte_ok(rv) && !is_triggered(rdfa.kind)
|
if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) &&
|
||||||
&& this_idx == rdfa.start_floating
|
this_idx == rdfa.start_floating && this_idx != DEAD_STATE) {
|
||||||
&& this_idx != DEAD_STATE) {
|
|
||||||
DEBUG_PRINTF("looking for offset accel at %u\n", this_idx);
|
DEBUG_PRINTF("looking for offset accel at %u\n", this_idx);
|
||||||
auto offset = look_for_offset_accel(rdfa, this_idx,
|
auto offset =
|
||||||
max_allowed_accel_offset);
|
look_for_offset_accel(rdfa, this_idx, max_allowed_offset_accel());
|
||||||
DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(),
|
DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), rv.cr.count());
|
||||||
rv.cr.count());
|
|
||||||
if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) {
|
if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) {
|
||||||
DEBUG_PRINTF("using offset accel\n");
|
DEBUG_PRINTF("using offset accel\n");
|
||||||
rv = offset;
|
rv = offset;
|
||||||
@ -419,4 +420,172 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||||
|
const AccelScheme &info,
|
||||||
|
void *accel_out) {
|
||||||
|
AccelAux *accel = (AccelAux *)accel_out;
|
||||||
|
|
||||||
|
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
|
||||||
|
info.double_offset);
|
||||||
|
accel->generic.offset = verify_u8(info.offset);
|
||||||
|
|
||||||
|
if (double_byte_ok(info) && info.double_cr.none() &&
|
||||||
|
info.double_byte.size() == 1) {
|
||||||
|
accel->accel_type = ACCEL_DVERM;
|
||||||
|
accel->dverm.c1 = info.double_byte.begin()->first;
|
||||||
|
accel->dverm.c2 = info.double_byte.begin()->second;
|
||||||
|
accel->dverm.offset = verify_u8(info.double_offset);
|
||||||
|
DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (double_byte_ok(info) && info.double_cr.none() &&
|
||||||
|
(info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
|
||||||
|
bool ok = true;
|
||||||
|
|
||||||
|
assert(!info.double_byte.empty());
|
||||||
|
u8 firstC = info.double_byte.begin()->first & CASE_CLEAR;
|
||||||
|
u8 secondC = info.double_byte.begin()->second & CASE_CLEAR;
|
||||||
|
|
||||||
|
for (const pair<u8, u8> &p : info.double_byte) {
|
||||||
|
if ((p.first & CASE_CLEAR) != firstC ||
|
||||||
|
(p.second & CASE_CLEAR) != secondC) {
|
||||||
|
ok = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ok) {
|
||||||
|
accel->accel_type = ACCEL_DVERM_NOCASE;
|
||||||
|
accel->dverm.c1 = firstC;
|
||||||
|
accel->dverm.c2 = secondC;
|
||||||
|
accel->dverm.offset = verify_u8(info.double_offset);
|
||||||
|
DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 m1;
|
||||||
|
u8 m2;
|
||||||
|
if (buildDvermMask(info.double_byte, &m1, &m2)) {
|
||||||
|
accel->accel_type = ACCEL_DVERM_MASKED;
|
||||||
|
accel->dverm.offset = verify_u8(info.double_offset);
|
||||||
|
accel->dverm.c1 = info.double_byte.begin()->first & m1;
|
||||||
|
accel->dverm.c2 = info.double_byte.begin()->second & m2;
|
||||||
|
accel->dverm.m1 = m1;
|
||||||
|
accel->dverm.m2 = m2;
|
||||||
|
DEBUG_PRINTF(
|
||||||
|
"building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
|
||||||
|
accel->dverm.c1, accel->dverm.c2);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (double_byte_ok(info) &&
|
||||||
|
shuftiBuildDoubleMasks(info.double_cr, info.double_byte,
|
||||||
|
&accel->dshufti.lo1, &accel->dshufti.hi1,
|
||||||
|
&accel->dshufti.lo2, &accel->dshufti.hi2)) {
|
||||||
|
accel->accel_type = ACCEL_DSHUFTI;
|
||||||
|
accel->dshufti.offset = verify_u8(info.double_offset);
|
||||||
|
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (info.cr.none()) {
|
||||||
|
accel->accel_type = ACCEL_RED_TAPE;
|
||||||
|
DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
|
||||||
|
" from which there is no escape\n",
|
||||||
|
this_idx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (info.cr.count() == 1) {
|
||||||
|
accel->accel_type = ACCEL_VERM;
|
||||||
|
accel->verm.c = info.cr.find_first();
|
||||||
|
DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (info.cr.count() == 2 && info.cr.isCaselessChar()) {
|
||||||
|
accel->accel_type = ACCEL_VERM_NOCASE;
|
||||||
|
accel->verm.c = info.cr.find_first() & CASE_CLEAR;
|
||||||
|
DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (info.cr.count() > max_floating_stop_char()) {
|
||||||
|
accel->accel_type = ACCEL_NONE;
|
||||||
|
DEBUG_PRINTF("state %hu is too broad\n", this_idx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
accel->accel_type = ACCEL_SHUFTI;
|
||||||
|
if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo, &accel->shufti.hi)) {
|
||||||
|
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(!info.cr.none());
|
||||||
|
accel->accel_type = ACCEL_TRUFFLE;
|
||||||
|
truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2);
|
||||||
|
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
map<dstate_id_t, AccelScheme>
|
||||||
|
accel_dfa_build_strat::getAccelInfo(const Grey &grey) {
|
||||||
|
map<dstate_id_t, AccelScheme> rv;
|
||||||
|
raw_dfa &rdfa = get_raw();
|
||||||
|
if (!grey.accelerateDFA) {
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
|
||||||
|
DEBUG_PRINTF("sds %hu\n", sds_proxy);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < rdfa.states.size(); i++) {
|
||||||
|
if (i == DEAD_STATE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Note on report acceleration states: While we can't accelerate while
|
||||||
|
* we
|
||||||
|
* are spamming out callbacks, the QR code paths don't raise reports
|
||||||
|
* during scanning so they can accelerate report states. */
|
||||||
|
if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t single_limit =
|
||||||
|
i == sds_proxy ? max_floating_stop_char() : max_stop_char();
|
||||||
|
DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
|
||||||
|
|
||||||
|
AccelScheme ei = find_escape_strings(i);
|
||||||
|
if (ei.cr.count() > single_limit) {
|
||||||
|
DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
|
||||||
|
ei.cr.count());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count());
|
||||||
|
|
||||||
|
rv[i] = ei;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* provide accleration states to states in the region of sds */
|
||||||
|
if (contains(rv, sds_proxy)) {
|
||||||
|
AccelScheme sds_ei = rv[sds_proxy];
|
||||||
|
sds_ei.double_byte.clear(); /* region based on single byte scheme
|
||||||
|
* may differ from double byte */
|
||||||
|
DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
|
||||||
|
sds_ei.cr.count());
|
||||||
|
auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
|
||||||
|
for (auto s : sds_region) {
|
||||||
|
if (!contains(rv, s) || better(sds_ei, rv[s])) {
|
||||||
|
rv[s] = sds_ei;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
};
|
60
src/nfa/accel_dfa_build_strat.h
Executable file
60
src/nfa/accel_dfa_build_strat.h
Executable file
@ -0,0 +1,60 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef ACCEL_DFA_BUILD_STRAT_H
|
||||||
|
#define ACCEL_DFA_BUILD_STRAT_H
|
||||||
|
|
||||||
|
#include "rdfa.h"
|
||||||
|
#include "dfa_build_strat.h"
|
||||||
|
#include "ue2common.h"
|
||||||
|
#include "util/accel_scheme.h"
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
namespace ue2 {
|
||||||
|
|
||||||
|
class ReportManager;
|
||||||
|
struct Grey;
|
||||||
|
|
||||||
|
class accel_dfa_build_strat : public dfa_build_strat {
|
||||||
|
public:
|
||||||
|
explicit accel_dfa_build_strat(const ReportManager &rm_in)
|
||||||
|
: dfa_build_strat(rm_in) {}
|
||||||
|
virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const;
|
||||||
|
virtual size_t accelSize(void) const = 0;
|
||||||
|
virtual u32 max_allowed_offset_accel() const = 0;
|
||||||
|
virtual u32 max_stop_char() const = 0;
|
||||||
|
virtual u32 max_floating_stop_char() const = 0;
|
||||||
|
virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
|
||||||
|
void *accel_out);
|
||||||
|
virtual std::map<dstate_id_t, AccelScheme> getAccelInfo(const Grey &grey);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace ue2
|
||||||
|
|
||||||
|
#endif // ACCEL_DFA_BUILD_STRAT_H
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -37,30 +37,26 @@
|
|||||||
|
|
||||||
/** \brief The type for an NFA callback.
|
/** \brief The type for an NFA callback.
|
||||||
*
|
*
|
||||||
* This is a function that takes as arguments the current offset where the
|
* This is a function that takes as arguments the current start and end offsets
|
||||||
* match occurs, the id of the match and the context pointer that was passed
|
* where the match occurs, the id of the match and the context pointer that was
|
||||||
* into the NFA API function that executed the NFA.
|
* passed into the NFA API function that executed the NFA.
|
||||||
*
|
*
|
||||||
* The offset where the match occurs will be the offset after the character
|
* The start offset is the "start of match" (SOM) offset for the match. It is
|
||||||
* that caused the match. Thus, if we have a buffer containing 'abc', then a
|
* only provided by engines that natively support SOM tracking (e.g. Gough).
|
||||||
* pattern that matches an empty string will have an offset of 0, a pattern
|
*
|
||||||
* that matches 'a' will have an offset of 1, and a pattern that matches 'abc'
|
* The end offset will be the offset after the character that caused the match.
|
||||||
* will have an offset of 3, which will be a value that is 'beyond' the size of
|
* Thus, if we have a buffer containing 'abc', then a pattern that matches an
|
||||||
* the buffer. That is, if we have n characters in the buffer, there are n+1
|
* empty string will have an offset of 0, a pattern that matches 'a' will have
|
||||||
* different potential offsets for matches.
|
* an offset of 1, and a pattern that matches 'abc' will have an offset of 3,
|
||||||
|
* which will be a value that is 'beyond' the size of the buffer. That is, if
|
||||||
|
* we have n characters in the buffer, there are n+1 different potential
|
||||||
|
* offsets for matches.
|
||||||
*
|
*
|
||||||
* This function should return an int - currently the possible return values
|
* This function should return an int - currently the possible return values
|
||||||
* are 0, which means 'stop running the engine' or non-zero, which means
|
* are 0, which means 'stop running the engine' or non-zero, which means
|
||||||
* 'continue matching'.
|
* 'continue matching'.
|
||||||
*/
|
*/
|
||||||
typedef int (*NfaCallback)(u64a offset, ReportID id, void *context);
|
typedef int (*NfaCallback)(u64a start, u64a end, ReportID id, void *context);
|
||||||
|
|
||||||
/** \brief The type for an NFA callback which also tracks start of match.
|
|
||||||
*
|
|
||||||
* see \ref NfaCallback
|
|
||||||
*/
|
|
||||||
typedef int (*SomNfaCallback)(u64a from_offset, u64a to_offset, ReportID id,
|
|
||||||
void *context);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* standard \ref NfaCallback return value indicating that engine execution
|
* standard \ref NfaCallback return value indicating that engine execution
|
||||||
|
@ -98,7 +98,7 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q,
|
|||||||
if (match == REPEAT_MATCH) {
|
if (match == REPEAT_MATCH) {
|
||||||
DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset,
|
DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset,
|
||||||
subIdx, sub->report);
|
subIdx, sub->report);
|
||||||
if (q->cb(offset, sub->report, q->context) == MO_HALT_MATCHING) {
|
if (q->cb(0, offset, sub->report, q->context) == MO_HALT_MATCHING) {
|
||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -457,7 +457,7 @@ char subCastleFireMatch(const struct Castle *c, const void *full_state,
|
|||||||
i = mmbit_iterate(matching, c->numRepeats, i)) {
|
i = mmbit_iterate(matching, c->numRepeats, i)) {
|
||||||
const struct SubCastle *sub = getSubCastle(c, i);
|
const struct SubCastle *sub = getSubCastle(c, i);
|
||||||
DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, i);
|
DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, i);
|
||||||
if (cb(offset, sub->report, ctx) == MO_HALT_MATCHING) {
|
if (cb(0, offset, sub->report, ctx) == MO_HALT_MATCHING) {
|
||||||
DEBUG_PRINTF("caller told us to halt\n");
|
DEBUG_PRINTF("caller told us to halt\n");
|
||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
@ -979,6 +979,46 @@ char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
|
|||||||
return castleInAccept(c, q, report, q_cur_offset(q));
|
return castleInAccept(c, q, report, q_cur_offset(q));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||||
|
assert(n && q);
|
||||||
|
assert(n->type == CASTLE_NFA_0);
|
||||||
|
DEBUG_PRINTF("entry\n");
|
||||||
|
|
||||||
|
const struct Castle *c = getImplNfa(n);
|
||||||
|
const u64a offset = q_cur_offset(q);
|
||||||
|
DEBUG_PRINTF("offset=%llu\n", offset);
|
||||||
|
|
||||||
|
if (c->exclusive) {
|
||||||
|
u8 *active = (u8 *)q->streamState;
|
||||||
|
u8 *groups = active + c->groupIterOffset;
|
||||||
|
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||||
|
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||||
|
u8 *cur = active + i * c->activeIdxSize;
|
||||||
|
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||||
|
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||||
|
const struct SubCastle *sub = getSubCastle(c, activeIdx);
|
||||||
|
if (subCastleInAccept(c, q, sub->report, offset, activeIdx)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||||
|
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
|
||||||
|
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
||||||
|
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||||
|
DEBUG_PRINTF("subcastle %u\n", i);
|
||||||
|
const struct SubCastle *sub = getSubCastle(c, i);
|
||||||
|
if (subCastleInAccept(c, q, sub->report, offset, i)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
|
char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
|
||||||
assert(n && q);
|
assert(n && q);
|
||||||
assert(n->type == CASTLE_NFA_0);
|
assert(n->type == CASTLE_NFA_0);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -44,6 +44,7 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report);
|
|||||||
char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -48,7 +48,8 @@
|
|||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
void nfaExecCastle0_dumpDot(const struct NFA *, FILE *) {
|
void nfaExecCastle0_dumpDot(const struct NFA *, FILE *,
|
||||||
|
UNUSED const std::string &base) {
|
||||||
// No GraphViz output for Castles.
|
// No GraphViz output for Castles.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -32,12 +32,14 @@
|
|||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
struct NFA;
|
struct NFA;
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file);
|
void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file,
|
||||||
|
const std::string &base);
|
||||||
void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file);
|
void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
40
src/nfa/dfa_build_strat.cpp
Executable file
40
src/nfa/dfa_build_strat.cpp
Executable file
@ -0,0 +1,40 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "dfa_build_strat.h"
|
||||||
|
|
||||||
|
namespace ue2 {
|
||||||
|
|
||||||
|
// prevent weak vtables for raw_report_info, dfa_build_strat and raw_dfa
|
||||||
|
raw_report_info::~raw_report_info() {}
|
||||||
|
|
||||||
|
dfa_build_strat::~dfa_build_strat() {}
|
||||||
|
|
||||||
|
raw_dfa::~raw_dfa() {}
|
||||||
|
|
||||||
|
} // namespace ue2
|
68
src/nfa/dfa_build_strat.h
Normal file
68
src/nfa/dfa_build_strat.h
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef DFA_BUILD_STRAT_H
|
||||||
|
#define DFA_BUILD_STRAT_H
|
||||||
|
|
||||||
|
#include "rdfa.h"
|
||||||
|
#include "ue2common.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
struct NFA;
|
||||||
|
|
||||||
|
namespace ue2 {
|
||||||
|
|
||||||
|
class ReportManager;
|
||||||
|
|
||||||
|
struct raw_report_info {
|
||||||
|
virtual ~raw_report_info();
|
||||||
|
virtual u32 getReportListSize() const = 0; /* in bytes */
|
||||||
|
virtual size_t size() const = 0; /* number of lists */
|
||||||
|
virtual void fillReportLists(NFA *n, size_t base_offset,
|
||||||
|
std::vector<u32> &ro /* out */) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class dfa_build_strat {
|
||||||
|
public:
|
||||||
|
explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {}
|
||||||
|
virtual ~dfa_build_strat();
|
||||||
|
virtual raw_dfa &get_raw() const = 0;
|
||||||
|
virtual std::unique_ptr<raw_report_info> gatherReports(
|
||||||
|
std::vector<u32> &reports /* out */,
|
||||||
|
std::vector<u32> &reports_eod /* out */,
|
||||||
|
u8 *isSingleReport /* out */,
|
||||||
|
ReportID *arbReport /* out */) const = 0;
|
||||||
|
protected:
|
||||||
|
const ReportManager &rm;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace ue2
|
||||||
|
|
||||||
|
#endif // DFA_BUILD_STRAT_H
|
@ -110,7 +110,7 @@ u64a expandSomValue(u32 comp_slot_width, u64a curr_offset,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
char doReports(SomNfaCallback cb, void *ctxt, const struct mcclellan *m,
|
char doReports(NfaCallback cb, void *ctxt, const struct mcclellan *m,
|
||||||
const struct gough_som_info *som, u16 s, u64a loc,
|
const struct gough_som_info *som, u16 s, u64a loc,
|
||||||
char eod, u16 * const cached_accept_state,
|
char eod, u16 * const cached_accept_state,
|
||||||
u32 * const cached_accept_id, u32 * const cached_accept_som) {
|
u32 * const cached_accept_id, u32 * const cached_accept_som) {
|
||||||
@ -307,7 +307,7 @@ u16 goughEnableStarts(const struct mcclellan *m, u16 s, u64a som_offset,
|
|||||||
static really_inline
|
static really_inline
|
||||||
char goughExec16_i(const struct mcclellan *m, struct gough_som_info *som,
|
char goughExec16_i(const struct mcclellan *m, struct gough_som_info *som,
|
||||||
u16 *state, const u8 *buf, size_t len, u64a offAdj,
|
u16 *state, const u8 *buf, size_t len, u64a offAdj,
|
||||||
SomNfaCallback cb, void *ctxt, const u8 **c_final,
|
NfaCallback cb, void *ctxt, const u8 **c_final,
|
||||||
enum MatchMode mode) {
|
enum MatchMode mode) {
|
||||||
assert(ISALIGNED_N(state, 2));
|
assert(ISALIGNED_N(state, 2));
|
||||||
|
|
||||||
@ -461,7 +461,7 @@ with_accel:
|
|||||||
static really_inline
|
static really_inline
|
||||||
char goughExec8_i(const struct mcclellan *m, struct gough_som_info *som,
|
char goughExec8_i(const struct mcclellan *m, struct gough_som_info *som,
|
||||||
u8 *state, const u8 *buf, size_t len, u64a offAdj,
|
u8 *state, const u8 *buf, size_t len, u64a offAdj,
|
||||||
SomNfaCallback cb, void *ctxt, const u8 **c_final,
|
NfaCallback cb, void *ctxt, const u8 **c_final,
|
||||||
enum MatchMode mode) {
|
enum MatchMode mode) {
|
||||||
u8 s = *state;
|
u8 s = *state;
|
||||||
const u8 *c = buf, *c_end = buf + len;
|
const u8 *c = buf, *c_end = buf + len;
|
||||||
@ -595,7 +595,7 @@ with_accel:
|
|||||||
static never_inline
|
static never_inline
|
||||||
char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som,
|
char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som,
|
||||||
u8 *state, const u8 *buf, size_t len, u64a offAdj,
|
u8 *state, const u8 *buf, size_t len, u64a offAdj,
|
||||||
SomNfaCallback cb, void *ctxt, const u8 **final_point,
|
NfaCallback cb, void *ctxt, const u8 **final_point,
|
||||||
enum MatchMode mode) {
|
enum MatchMode mode) {
|
||||||
return goughExec8_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
|
return goughExec8_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
|
||||||
mode);
|
mode);
|
||||||
@ -604,7 +604,7 @@ char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som,
|
|||||||
static never_inline
|
static never_inline
|
||||||
char goughExec16_i_ni(const struct mcclellan *m, struct gough_som_info *som,
|
char goughExec16_i_ni(const struct mcclellan *m, struct gough_som_info *som,
|
||||||
u16 *state, const u8 *buf, size_t len, u64a offAdj,
|
u16 *state, const u8 *buf, size_t len, u64a offAdj,
|
||||||
SomNfaCallback cb, void *ctxt, const u8 **final_point,
|
NfaCallback cb, void *ctxt, const u8 **final_point,
|
||||||
enum MatchMode mode) {
|
enum MatchMode mode) {
|
||||||
return goughExec16_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
|
return goughExec16_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
|
||||||
mode);
|
mode);
|
||||||
@ -622,7 +622,7 @@ const struct gough_som_info *getSomInfoConst(const char *state_base) {
|
|||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||||
const u8 *hend, SomNfaCallback cb, void *context,
|
const u8 *hend, NfaCallback cb, void *context,
|
||||||
struct mq *q, s64a end, enum MatchMode mode) {
|
struct mq *q, s64a end, enum MatchMode mode) {
|
||||||
DEBUG_PRINTF("enter\n");
|
DEBUG_PRINTF("enter\n");
|
||||||
struct gough_som_info *som = getSomInfo(q->state);
|
struct gough_som_info *som = getSomInfo(q->state);
|
||||||
@ -755,7 +755,7 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
|||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||||
const u8 *hend, SomNfaCallback cb, void *context,
|
const u8 *hend, NfaCallback cb, void *context,
|
||||||
struct mq *q, s64a end, enum MatchMode mode) {
|
struct mq *q, s64a end, enum MatchMode mode) {
|
||||||
struct gough_som_info *som = getSomInfo(q->state);
|
struct gough_som_info *som = getSomInfo(q->state);
|
||||||
assert(n->type == GOUGH_NFA_16);
|
assert(n->type == GOUGH_NFA_16);
|
||||||
@ -887,7 +887,7 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
|||||||
char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) {
|
char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) {
|
||||||
u64a offset = q->offset;
|
u64a offset = q->offset;
|
||||||
const u8 *buffer = q->buffer;
|
const u8 *buffer = q->buffer;
|
||||||
SomNfaCallback cb = q->som_cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == GOUGH_NFA_8);
|
assert(n->type == GOUGH_NFA_8);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
@ -899,7 +899,7 @@ char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) {
|
|||||||
char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) {
|
char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) {
|
||||||
u64a offset = q->offset;
|
u64a offset = q->offset;
|
||||||
const u8 *buffer = q->buffer;
|
const u8 *buffer = q->buffer;
|
||||||
SomNfaCallback cb = q->som_cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == GOUGH_NFA_16);
|
assert(n->type == GOUGH_NFA_16);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
@ -911,7 +911,7 @@ char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) {
|
|||||||
char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||||
u64a offset = q->offset;
|
u64a offset = q->offset;
|
||||||
const u8 *buffer = q->buffer;
|
const u8 *buffer = q->buffer;
|
||||||
SomNfaCallback cb = q->som_cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == GOUGH_NFA_8);
|
assert(n->type == GOUGH_NFA_8);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
@ -923,7 +923,7 @@ char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
|||||||
char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||||
u64a offset = q->offset;
|
u64a offset = q->offset;
|
||||||
const u8 *buffer = q->buffer;
|
const u8 *buffer = q->buffer;
|
||||||
SomNfaCallback cb = q->som_cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == GOUGH_NFA_16);
|
assert(n->type == GOUGH_NFA_16);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
@ -935,7 +935,7 @@ char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
|||||||
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||||
u64a offset = q->offset;
|
u64a offset = q->offset;
|
||||||
const u8 *buffer = q->buffer;
|
const u8 *buffer = q->buffer;
|
||||||
SomNfaCallback cb = q->som_cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == GOUGH_NFA_8);
|
assert(n->type == GOUGH_NFA_8);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
@ -952,7 +952,7 @@ char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
|||||||
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||||
u64a offset = q->offset;
|
u64a offset = q->offset;
|
||||||
const u8 *buffer = q->buffer;
|
const u8 *buffer = q->buffer;
|
||||||
SomNfaCallback cb = q->som_cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == GOUGH_NFA_16);
|
assert(n->type == GOUGH_NFA_16);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
@ -994,7 +994,7 @@ char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset,
|
|||||||
|
|
||||||
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
|
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||||
SomNfaCallback cb = q->som_cb;
|
NfaCallback cb = q->cb;
|
||||||
void *ctxt = q->context;
|
void *ctxt = q->context;
|
||||||
u8 s = *(u8 *)q->state;
|
u8 s = *(u8 *)q->state;
|
||||||
u64a offset = q_cur_offset(q);
|
u64a offset = q_cur_offset(q);
|
||||||
@ -1016,7 +1016,7 @@ char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
|
|||||||
|
|
||||||
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) {
|
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||||
SomNfaCallback cb = q->som_cb;
|
NfaCallback cb = q->cb;
|
||||||
void *ctxt = q->context;
|
void *ctxt = q->context;
|
||||||
u16 s = *(u16 *)q->state;
|
u16 s = *(u16 *)q->state;
|
||||||
const struct mstate_aux *aux = get_aux(m, s);
|
const struct mstate_aux *aux = get_aux(m, s);
|
||||||
@ -1048,10 +1048,18 @@ char nfaExecGough16_inAccept(const struct NFA *n, ReportID report,
|
|||||||
return nfaExecMcClellan16_inAccept(n, report, q);
|
return nfaExecMcClellan16_inAccept(n, report, q);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||||
|
return nfaExecMcClellan8_inAnyAccept(n, q);
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||||
|
return nfaExecMcClellan16_inAnyAccept(n, q);
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
char goughCheckEOD(const struct NFA *nfa, u16 s,
|
char goughCheckEOD(const struct NFA *nfa, u16 s,
|
||||||
const struct gough_som_info *som,
|
const struct gough_som_info *som,
|
||||||
u64a offset, SomNfaCallback cb, void *ctxt) {
|
u64a offset, NfaCallback cb, void *ctxt) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
||||||
const struct mstate_aux *aux = get_aux(m, s);
|
const struct mstate_aux *aux = get_aux(m, s);
|
||||||
|
|
||||||
@ -1063,20 +1071,18 @@ char goughCheckEOD(const struct NFA *nfa, u16 s,
|
|||||||
|
|
||||||
char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
|
char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
|
||||||
UNUSED const char *streamState, u64a offset,
|
UNUSED const char *streamState, u64a offset,
|
||||||
UNUSED NfaCallback callback,
|
NfaCallback callback, void *context) {
|
||||||
SomNfaCallback som_callback, void *context) {
|
|
||||||
const struct gough_som_info *som = getSomInfoConst(state);
|
const struct gough_som_info *som = getSomInfoConst(state);
|
||||||
return goughCheckEOD(nfa, *(const u8 *)state, som, offset, som_callback,
|
return goughCheckEOD(nfa, *(const u8 *)state, som, offset, callback,
|
||||||
context);
|
context);
|
||||||
}
|
}
|
||||||
|
|
||||||
char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
|
char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
|
||||||
UNUSED const char *streamState, u64a offset,
|
UNUSED const char *streamState, u64a offset,
|
||||||
UNUSED NfaCallback callback,
|
NfaCallback callback, void *context) {
|
||||||
SomNfaCallback som_callback, void *context) {
|
|
||||||
assert(ISALIGNED_N(state, 8));
|
assert(ISALIGNED_N(state, 8));
|
||||||
const struct gough_som_info *som = getSomInfoConst(state);
|
const struct gough_som_info *som = getSomInfoConst(state);
|
||||||
return goughCheckEOD(nfa, *(const u16 *)state, som, offset, som_callback,
|
return goughCheckEOD(nfa, *(const u16 *)state, som, offset, callback,
|
||||||
context);
|
context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -39,13 +39,13 @@ struct mq;
|
|||||||
|
|
||||||
char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
|
char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
|
||||||
const char *streamState, u64a offset,
|
const char *streamState, u64a offset,
|
||||||
NfaCallback callback, SomNfaCallback som_cb,
|
NfaCallback callback, void *context);
|
||||||
void *context);
|
|
||||||
char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end);
|
char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||||
char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end);
|
char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||||
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report);
|
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||||
|
char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -61,13 +61,13 @@ char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
|
|||||||
|
|
||||||
char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
|
char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
|
||||||
const char *streamState, u64a offset,
|
const char *streamState, u64a offset,
|
||||||
NfaCallback callback, SomNfaCallback som_cb,
|
NfaCallback callback, void *context);
|
||||||
void *context);
|
|
||||||
char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end);
|
char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||||
char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end);
|
char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||||
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report);
|
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||||
|
char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
|
@ -79,9 +79,9 @@ namespace {
|
|||||||
class gough_build_strat : public mcclellan_build_strat {
|
class gough_build_strat : public mcclellan_build_strat {
|
||||||
public:
|
public:
|
||||||
gough_build_strat(
|
gough_build_strat(
|
||||||
raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm,
|
raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in,
|
||||||
const map<dstate_id_t, gough_accel_state_info> &accel_info)
|
const map<dstate_id_t, gough_accel_state_info> &accel_info)
|
||||||
: mcclellan_build_strat(r, rm), rdfa(r), gg(g),
|
: mcclellan_build_strat(r, rm_in), rdfa(r), gg(g),
|
||||||
accel_gough_info(accel_info) {}
|
accel_gough_info(accel_info) {}
|
||||||
unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */,
|
unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */,
|
||||||
vector<u32> &reports_eod /* out */,
|
vector<u32> &reports_eod /* out */,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -259,7 +259,8 @@ void dumpTransitions(const NFA *nfa, FILE *f,
|
|||||||
fprintf(f, "\n");
|
fprintf(f, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f) {
|
void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f,
|
||||||
|
UNUSED const string &base) {
|
||||||
assert(nfa->type == GOUGH_NFA_8);
|
assert(nfa->type == GOUGH_NFA_8);
|
||||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||||
|
|
||||||
@ -302,7 +303,8 @@ void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) {
|
|||||||
dumpTextReverse(nfa, f);
|
dumpTextReverse(nfa, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f) {
|
void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f,
|
||||||
|
UNUSED const string &base) {
|
||||||
assert(nfa->type == GOUGH_NFA_16);
|
assert(nfa->type == GOUGH_NFA_16);
|
||||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -33,12 +33,16 @@
|
|||||||
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
struct NFA;
|
struct NFA;
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file);
|
void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file,
|
||||||
void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file);
|
const std::string &base);
|
||||||
|
void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file,
|
||||||
|
const std::string &base);
|
||||||
void nfaExecGough8_dumpText(const NFA *nfa, FILE *file);
|
void nfaExecGough8_dumpText(const NFA *nfa, FILE *file);
|
||||||
void nfaExecGough16_dumpText(const NFA *nfa, FILE *file);
|
void nfaExecGough16_dumpText(const NFA *nfa, FILE *file);
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -293,7 +293,7 @@ char lbrMatchLoop(const struct lbr_common *l, const u64a begin, const u64a end,
|
|||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("firing match at %llu\n", i);
|
DEBUG_PRINTF("firing match at %llu\n", i);
|
||||||
if (cb(i, l->report, ctx) == MO_HALT_MATCHING) {
|
if (cb(0, i, l->report, ctx) == MO_HALT_MATCHING) {
|
||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -46,6 +46,7 @@ char nfaExecLbrDot_Q2(const struct NFA *n, struct mq *q, s64a end);
|
|||||||
char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report);
|
char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||||
|
char nfaExecLbrDot_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -66,6 +67,7 @@ char nfaExecLbrVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
|
|||||||
char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecLbrVerm_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -86,6 +88,7 @@ char nfaExecLbrNVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
|
|||||||
char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecLbrNVerm_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -106,6 +109,7 @@ char nfaExecLbrShuf_QR(const struct NFA *n, struct mq *q, ReportID report);
|
|||||||
char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecLbrShuf_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -126,6 +130,7 @@ char nfaExecLbrTruf_QR(const struct NFA *n, struct mq *q, ReportID report);
|
|||||||
char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecLbrTruf_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -72,7 +72,7 @@ char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa,
|
|||||||
const struct lbr_common *l = getImplNfa(nfa);
|
const struct lbr_common *l = getImplNfa(nfa);
|
||||||
u64a offset = q_cur_offset(q);
|
u64a offset = q_cur_offset(q);
|
||||||
DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset);
|
DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset);
|
||||||
q->cb(offset, l->report, q->context);
|
q->cb(0, offset, l->report, q->context);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -94,6 +94,15 @@ char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa,
|
|||||||
return lbrInAccept(l, lstate, q->streamState, offset, report);
|
return lbrInAccept(l, lstate, q->streamState, offset, report);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
|
||||||
|
assert(nfa && q);
|
||||||
|
assert(isLbrType(nfa->type));
|
||||||
|
DEBUG_PRINTF("entry\n");
|
||||||
|
|
||||||
|
const struct lbr_common *l = getImplNfa(nfa);
|
||||||
|
return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q);
|
||||||
|
}
|
||||||
|
|
||||||
char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa,
|
char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa,
|
||||||
struct mq *q) {
|
struct mq *q) {
|
||||||
assert(nfa && q);
|
assert(nfa && q);
|
||||||
@ -206,7 +215,7 @@ char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q,
|
|||||||
|
|
||||||
if (q->report_current) {
|
if (q->report_current) {
|
||||||
DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q));
|
DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q));
|
||||||
int rv = q->cb(q_cur_offset(q), l->report, q->context);
|
int rv = q->cb(0, q_cur_offset(q), l->report, q->context);
|
||||||
q->report_current = 0;
|
q->report_current = 0;
|
||||||
if (rv == MO_HALT_MATCHING) {
|
if (rv == MO_HALT_MATCHING) {
|
||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -49,23 +49,28 @@
|
|||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
|
void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
|
||||||
|
UNUSED const std::string &base) {
|
||||||
// No impl
|
// No impl
|
||||||
}
|
}
|
||||||
|
|
||||||
void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
|
void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
|
||||||
|
UNUSED const std::string &base) {
|
||||||
// No impl
|
// No impl
|
||||||
}
|
}
|
||||||
|
|
||||||
void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
|
void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
|
||||||
|
UNUSED const std::string &base) {
|
||||||
// No impl
|
// No impl
|
||||||
}
|
}
|
||||||
|
|
||||||
void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
|
void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
|
||||||
|
UNUSED const std::string &base) {
|
||||||
// No impl
|
// No impl
|
||||||
}
|
}
|
||||||
|
|
||||||
void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
|
void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
|
||||||
|
UNUSED const std::string &base) {
|
||||||
// No impl
|
// No impl
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -32,16 +32,22 @@
|
|||||||
#ifdef DUMP_SUPPORT
|
#ifdef DUMP_SUPPORT
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
struct NFA;
|
struct NFA;
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file);
|
void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file,
|
||||||
void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file);
|
const std::string &base);
|
||||||
void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file);
|
void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file,
|
||||||
void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file);
|
const std::string &base);
|
||||||
void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file);
|
void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file,
|
||||||
|
const std::string &base);
|
||||||
|
void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file,
|
||||||
|
const std::string &base);
|
||||||
|
void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file,
|
||||||
|
const std::string &base);
|
||||||
void nfaExecLbrDot_dumpText(const struct NFA *nfa, FILE *file);
|
void nfaExecLbrDot_dumpText(const struct NFA *nfa, FILE *file);
|
||||||
void nfaExecLbrVerm_dumpText(const struct NFA *nfa, FILE *file);
|
void nfaExecLbrVerm_dumpText(const struct NFA *nfa, FILE *file);
|
||||||
void nfaExecLbrNVerm_dumpText(const struct NFA *nfa, FILE *file);
|
void nfaExecLbrNVerm_dumpText(const struct NFA *nfa, FILE *file);
|
||||||
|
@ -30,6 +30,7 @@
|
|||||||
#define LIMEX_H
|
#define LIMEX_H
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
#include <string>
|
||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
#endif
|
#endif
|
||||||
@ -40,7 +41,8 @@ extern "C"
|
|||||||
#define GENERATE_NFA_DUMP_DECL(gf_name) \
|
#define GENERATE_NFA_DUMP_DECL(gf_name) \
|
||||||
} /* extern "C" */ \
|
} /* extern "C" */ \
|
||||||
namespace ue2 { \
|
namespace ue2 { \
|
||||||
void gf_name##_dumpDot(const struct NFA *nfa, FILE *file); \
|
void gf_name##_dumpDot(const struct NFA *nfa, FILE *file, \
|
||||||
|
const std::string &base); \
|
||||||
void gf_name##_dumpText(const struct NFA *nfa, FILE *file); \
|
void gf_name##_dumpText(const struct NFA *nfa, FILE *file); \
|
||||||
} /* namespace ue2 */ \
|
} /* namespace ue2 */ \
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@ -52,14 +54,14 @@ extern "C"
|
|||||||
#define GENERATE_NFA_DECL(gf_name) \
|
#define GENERATE_NFA_DECL(gf_name) \
|
||||||
char gf_name##_testEOD(const struct NFA *nfa, const char *state, \
|
char gf_name##_testEOD(const struct NFA *nfa, const char *state, \
|
||||||
const char *streamState, u64a offset, \
|
const char *streamState, u64a offset, \
|
||||||
NfaCallback callback, SomNfaCallback som_cb, \
|
NfaCallback callback, void *context); \
|
||||||
void *context); \
|
|
||||||
char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \
|
char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \
|
||||||
char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \
|
char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \
|
||||||
char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \
|
char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \
|
||||||
char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \
|
char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \
|
||||||
char gf_name##_inAccept(const struct NFA *n, ReportID report, \
|
char gf_name##_inAccept(const struct NFA *n, ReportID report, \
|
||||||
struct mq *q); \
|
struct mq *q); \
|
||||||
|
char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \
|
||||||
char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \
|
char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \
|
||||||
char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \
|
char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \
|
||||||
void *state, u8 key); \
|
void *state, u8 key); \
|
||||||
@ -74,41 +76,11 @@ extern "C"
|
|||||||
struct mq *q, s64a loc); \
|
struct mq *q, s64a loc); \
|
||||||
GENERATE_NFA_DUMP_DECL(gf_name)
|
GENERATE_NFA_DUMP_DECL(gf_name)
|
||||||
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx32_1)
|
GENERATE_NFA_DECL(nfaExecLimEx32)
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx32_2)
|
GENERATE_NFA_DECL(nfaExecLimEx128)
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx32_3)
|
GENERATE_NFA_DECL(nfaExecLimEx256)
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx32_4)
|
GENERATE_NFA_DECL(nfaExecLimEx384)
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx32_5)
|
GENERATE_NFA_DECL(nfaExecLimEx512)
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx32_6)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx32_7)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx128_1)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx128_2)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx128_3)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx128_4)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx128_5)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx128_6)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx128_7)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx256_1)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx256_2)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx256_3)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx256_4)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx256_5)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx256_6)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx256_7)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx384_1)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx384_2)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx384_3)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx384_4)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx384_5)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx384_6)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx384_7)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx512_1)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx512_2)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx512_3)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx512_4)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx512_5)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx512_6)
|
|
||||||
GENERATE_NFA_DECL(nfaExecLimEx512_7)
|
|
||||||
|
|
||||||
#undef GENERATE_NFA_DECL
|
#undef GENERATE_NFA_DECL
|
||||||
#undef GENERATE_NFA_DUMP_DECL
|
#undef GENERATE_NFA_DUMP_DECL
|
||||||
|
@ -35,6 +35,7 @@
|
|||||||
#include "accel.h"
|
#include "accel.h"
|
||||||
#include "limex_internal.h"
|
#include "limex_internal.h"
|
||||||
#include "limex_limits.h"
|
#include "limex_limits.h"
|
||||||
|
#include "limex_shuffle.h"
|
||||||
#include "nfa_internal.h"
|
#include "nfa_internal.h"
|
||||||
#include "shufti.h"
|
#include "shufti.h"
|
||||||
#include "truffle.h"
|
#include "truffle.h"
|
||||||
@ -44,10 +45,7 @@
|
|||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "vermicelli.h"
|
#include "vermicelli.h"
|
||||||
#include "util/bitutils.h"
|
#include "util/bitutils.h"
|
||||||
#include "util/shuffle.h"
|
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
#include "util/simd_utils_ssse3.h"
|
|
||||||
#include "util/shuffle_ssse3.h"
|
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
|
size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
|
||||||
@ -80,7 +78,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
|
|||||||
size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
|
size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
|
||||||
const union AccelAux *aux, const u8 *input, size_t i,
|
const union AccelAux *aux, const u8 *input, size_t i,
|
||||||
size_t end) {
|
size_t end) {
|
||||||
u32 idx = shuffleDynamic32(s, accel);
|
u32 idx = packedExtract32(s, accel);
|
||||||
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -92,7 +90,7 @@ size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex,
|
|||||||
DEBUG_PRINTF("using PSHUFB for 128-bit shuffle\n");
|
DEBUG_PRINTF("using PSHUFB for 128-bit shuffle\n");
|
||||||
m128 accelPerm = limex->accelPermute;
|
m128 accelPerm = limex->accelPermute;
|
||||||
m128 accelComp = limex->accelCompare;
|
m128 accelComp = limex->accelCompare;
|
||||||
idx = shufflePshufb128(s, accelPerm, accelComp);
|
idx = packedExtract128(s, accelPerm, accelComp);
|
||||||
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -105,17 +103,13 @@ size_t doAccel256(const m256 *state, const struct LimExNFA256 *limex,
|
|||||||
m256 accelPerm = limex->accelPermute;
|
m256 accelPerm = limex->accelPermute;
|
||||||
m256 accelComp = limex->accelCompare;
|
m256 accelComp = limex->accelCompare;
|
||||||
#if !defined(__AVX2__)
|
#if !defined(__AVX2__)
|
||||||
u32 idx1 = shufflePshufb128(s.lo, accelPerm.lo, accelComp.lo);
|
u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo);
|
||||||
u32 idx2 = shufflePshufb128(s.hi, accelPerm.hi, accelComp.hi);
|
u32 idx2 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi);
|
||||||
#else
|
|
||||||
// TODO: learn you some avx2 shuffles for great good
|
|
||||||
u32 idx1 = shufflePshufb128(movdq_lo(s), movdq_lo(accelPerm),
|
|
||||||
movdq_lo(accelComp));
|
|
||||||
u32 idx2 = shufflePshufb128(movdq_hi(s), movdq_hi(accelPerm),
|
|
||||||
movdq_hi(accelComp));
|
|
||||||
#endif
|
|
||||||
assert((idx1 & idx2) == 0); // should be no shared bits
|
assert((idx1 & idx2) == 0); // should be no shared bits
|
||||||
idx = idx1 | idx2;
|
idx = idx1 | idx2;
|
||||||
|
#else
|
||||||
|
idx = packedExtract256(s, accelPerm, accelComp);
|
||||||
|
#endif
|
||||||
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -127,9 +121,9 @@ size_t doAccel384(const m384 *state, const struct LimExNFA384 *limex,
|
|||||||
DEBUG_PRINTF("using PSHUFB for 384-bit shuffle\n");
|
DEBUG_PRINTF("using PSHUFB for 384-bit shuffle\n");
|
||||||
m384 accelPerm = limex->accelPermute;
|
m384 accelPerm = limex->accelPermute;
|
||||||
m384 accelComp = limex->accelCompare;
|
m384 accelComp = limex->accelCompare;
|
||||||
u32 idx1 = shufflePshufb128(s.lo, accelPerm.lo, accelComp.lo);
|
u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo);
|
||||||
u32 idx2 = shufflePshufb128(s.mid, accelPerm.mid, accelComp.mid);
|
u32 idx2 = packedExtract128(s.mid, accelPerm.mid, accelComp.mid);
|
||||||
u32 idx3 = shufflePshufb128(s.hi, accelPerm.hi, accelComp.hi);
|
u32 idx3 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi);
|
||||||
assert((idx1 & idx2 & idx3) == 0); // should be no shared bits
|
assert((idx1 & idx2 & idx3) == 0); // should be no shared bits
|
||||||
idx = idx1 | idx2 | idx3;
|
idx = idx1 | idx2 | idx3;
|
||||||
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
||||||
@ -144,21 +138,17 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex,
|
|||||||
m512 accelPerm = limex->accelPermute;
|
m512 accelPerm = limex->accelPermute;
|
||||||
m512 accelComp = limex->accelCompare;
|
m512 accelComp = limex->accelCompare;
|
||||||
#if !defined(__AVX2__)
|
#if !defined(__AVX2__)
|
||||||
u32 idx1 = shufflePshufb128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo);
|
u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo);
|
||||||
u32 idx2 = shufflePshufb128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi);
|
u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi);
|
||||||
u32 idx3 = shufflePshufb128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo);
|
u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo);
|
||||||
u32 idx4 = shufflePshufb128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi);
|
u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi);
|
||||||
#else
|
|
||||||
u32 idx1 = shufflePshufb128(movdq_lo(s.lo), movdq_lo(accelPerm.lo),
|
|
||||||
movdq_lo(accelComp.lo));
|
|
||||||
u32 idx2 = shufflePshufb128(movdq_hi(s.lo), movdq_hi(accelPerm.lo),
|
|
||||||
movdq_hi(accelComp.lo));
|
|
||||||
u32 idx3 = shufflePshufb128(movdq_lo(s.hi), movdq_lo(accelPerm.hi),
|
|
||||||
movdq_lo(accelComp.hi));
|
|
||||||
u32 idx4 = shufflePshufb128(movdq_hi(s.hi), movdq_hi(accelPerm.hi),
|
|
||||||
movdq_hi(accelComp.hi));
|
|
||||||
#endif
|
|
||||||
assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits
|
assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits
|
||||||
idx = idx1 | idx2 | idx3 | idx4;
|
idx = idx1 | idx2 | idx3 | idx4;
|
||||||
|
#else
|
||||||
|
u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo);
|
||||||
|
u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi);
|
||||||
|
assert((idx1 & idx2) == 0); // should be no shared bits
|
||||||
|
idx = idx1 | idx2;
|
||||||
|
#endif
|
||||||
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -40,6 +40,7 @@
|
|||||||
#define TESTEOD_FN JOIN(moNfaTestEod, SIZE)
|
#define TESTEOD_FN JOIN(moNfaTestEod, SIZE)
|
||||||
#define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE)
|
#define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE)
|
||||||
#define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE)
|
#define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE)
|
||||||
|
#define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE)
|
||||||
#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE)
|
#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE)
|
||||||
#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE)
|
#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE)
|
||||||
#define INITIAL_FN JOIN(moNfaInitial, SIZE)
|
#define INITIAL_FN JOIN(moNfaInitial, SIZE)
|
||||||
@ -118,7 +119,7 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s,
|
|||||||
if (TESTBIT_STATE(s, a->state)) {
|
if (TESTBIT_STATE(s, a->state)) {
|
||||||
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
|
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
|
||||||
a->state, a->externalId, offset);
|
a->state, a->externalId, offset);
|
||||||
int rv = callback(offset, a->externalId, context);
|
int rv = callback(0, offset, a->externalId, context);
|
||||||
if (unlikely(rv == MO_HALT_MATCHING)) {
|
if (unlikely(rv == MO_HALT_MATCHING)) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -149,7 +150,7 @@ char PROCESS_ACCEPTS_NOSQUASH_FN(const STATE_T *s,
|
|||||||
if (TESTBIT_STATE(s, a->state)) {
|
if (TESTBIT_STATE(s, a->state)) {
|
||||||
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
|
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
|
||||||
a->state, a->externalId, offset);
|
a->state, a->externalId, offset);
|
||||||
int rv = callback(offset, a->externalId, context);
|
int rv = callback(0, offset, a->externalId, context);
|
||||||
if (unlikely(rv == MO_HALT_MATCHING)) {
|
if (unlikely(rv == MO_HALT_MATCHING)) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -374,11 +375,32 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||||
|
union RepeatControl *repeat_ctrl, char *repeat_state,
|
||||||
|
u64a offset) {
|
||||||
|
assert(limex);
|
||||||
|
|
||||||
|
const STATE_T acceptMask = LOAD_STATE(&limex->accept);
|
||||||
|
STATE_T accstate = AND_STATE(state, acceptMask);
|
||||||
|
|
||||||
|
// Are we in an accept state?
|
||||||
|
if (ISZERO_STATE(accstate)) {
|
||||||
|
DEBUG_PRINTF("no accept states are on\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accstate);
|
||||||
|
|
||||||
|
return ISNONZERO_STATE(accstate);
|
||||||
|
}
|
||||||
|
|
||||||
#undef TESTEOD_FN
|
#undef TESTEOD_FN
|
||||||
#undef TESTEOD_REV_FN
|
#undef TESTEOD_REV_FN
|
||||||
#undef REPORTCURRENT_FN
|
#undef REPORTCURRENT_FN
|
||||||
#undef EXPIRE_ESTATE_FN
|
#undef EXPIRE_ESTATE_FN
|
||||||
#undef LIMEX_INACCEPT_FN
|
#undef LIMEX_INACCEPT_FN
|
||||||
|
#undef LIMEX_INANYACCEPT_FN
|
||||||
#undef INITIAL_FN
|
#undef INITIAL_FN
|
||||||
#undef TOP_FN
|
#undef TOP_FN
|
||||||
#undef TOPN_FN
|
#undef TOPN_FN
|
||||||
|
@ -167,12 +167,10 @@ struct build_info {
|
|||||||
limex_accel_info accel;
|
limex_accel_info accel;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define LAST_LIMEX_NFA LIMEX_NFA_512
|
||||||
|
|
||||||
// Constants for scoring mechanism
|
// Constants for scoring mechanism
|
||||||
|
const int SHIFT_COST = 10; // limex: cost per shift mask
|
||||||
#define LAST_LIMEX_NFA LIMEX_NFA_512_7
|
|
||||||
|
|
||||||
const int LIMEX_INITIAL_SCORE = 2000;
|
|
||||||
const int SHIFT_COST = 20; // limex: cost per shift mask
|
|
||||||
const int EXCEPTION_COST = 4; // limex: per exception
|
const int EXCEPTION_COST = 4; // limex: per exception
|
||||||
|
|
||||||
template<NFAEngineType t> struct NFATraits { };
|
template<NFAEngineType t> struct NFATraits { };
|
||||||
@ -261,6 +259,17 @@ void maskSetBits(Mask &m, const NFAStateSet &bits) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class Mask>
|
||||||
|
bool isMaskZero(Mask &m) {
|
||||||
|
u8 *m8 = (u8 *)&m;
|
||||||
|
for (u32 i = 0; i < sizeof(m); i++) {
|
||||||
|
if (m8[i]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// Sets an entire byte in a mask to the given value
|
// Sets an entire byte in a mask to the given value
|
||||||
template<class Mask>
|
template<class Mask>
|
||||||
void maskSetByte(Mask &m, const unsigned int idx, const char val) {
|
void maskSetByte(Mask &m, const unsigned int idx, const char val) {
|
||||||
@ -336,7 +345,7 @@ void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct AccelBuild {
|
struct AccelBuild {
|
||||||
AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0), ma_len1(0),
|
AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0), ma_len1(0),
|
||||||
ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {}
|
ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {}
|
||||||
NFAVertex v;
|
NFAVertex v;
|
||||||
u32 state;
|
u32 state;
|
||||||
@ -999,7 +1008,8 @@ void findMaskedCompressionStates(const build_info &args,
|
|||||||
// Suffixes and outfixes can mask out leaf states, which should all be
|
// Suffixes and outfixes can mask out leaf states, which should all be
|
||||||
// accepts. Right now we can only do this when there is nothing in initDs,
|
// accepts. Right now we can only do this when there is nothing in initDs,
|
||||||
// as we switch that on unconditionally in the expand call.
|
// as we switch that on unconditionally in the expand call.
|
||||||
if (generates_callbacks(h) && !hasInitDsStates(h, args.state_ids)) {
|
if (!inspects_states_for_accepts(h)
|
||||||
|
&& !hasInitDsStates(h, args.state_ids)) {
|
||||||
NFAStateSet nonleaf(args.num_states);
|
NFAStateSet nonleaf(args.num_states);
|
||||||
for (const auto &e : edges_range(h)) {
|
for (const auto &e : edges_range(h)) {
|
||||||
u32 from = args.state_ids.at(source(e, h));
|
u32 from = args.state_ids.at(source(e, h));
|
||||||
@ -1162,12 +1172,13 @@ u32 getReportListIndex(const flat_set<ReportID> &reports,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void buildExceptionMap(const build_info &args,
|
u32 buildExceptionMap(const build_info &args,
|
||||||
const ue2::unordered_set<NFAEdge> &exceptional,
|
const ue2::unordered_set<NFAEdge> &exceptional,
|
||||||
map<ExceptionProto, vector<u32> > &exceptionMap,
|
map<ExceptionProto, vector<u32> > &exceptionMap,
|
||||||
vector<ReportID> &exceptionReports) {
|
vector<ReportID> &exceptionReports) {
|
||||||
const NGHolder &h = args.h;
|
const NGHolder &h = args.h;
|
||||||
const u32 num_states = args.num_states;
|
const u32 num_states = args.num_states;
|
||||||
|
u32 exceptionCount = 0;
|
||||||
|
|
||||||
ue2::unordered_map<NFAVertex, u32> pos_trigger;
|
ue2::unordered_map<NFAVertex, u32> pos_trigger;
|
||||||
ue2::unordered_map<NFAVertex, u32> tug_trigger;
|
ue2::unordered_map<NFAVertex, u32> tug_trigger;
|
||||||
@ -1297,10 +1308,13 @@ void buildExceptionMap(const build_info &args,
|
|||||||
assert(e.succ_states.size() == num_states);
|
assert(e.succ_states.size() == num_states);
|
||||||
assert(e.squash_states.size() == num_states);
|
assert(e.squash_states.size() == num_states);
|
||||||
exceptionMap[e].push_back(i);
|
exceptionMap[e].push_back(i);
|
||||||
|
exceptionCount++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("%zu unique exceptions found.\n", exceptionMap.size());
|
DEBUG_PRINTF("%u exceptions found (%zu unique)\n", exceptionCount,
|
||||||
|
exceptionMap.size());
|
||||||
|
return exceptionCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -1315,6 +1329,92 @@ u32 depth_to_u32(const depth &d) {
|
|||||||
return d_val;
|
return d_val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e,
|
||||||
|
const build_info &args, u32 maxShift) {
|
||||||
|
NFAVertex from = source(e, h);
|
||||||
|
NFAVertex to = target(e, h);
|
||||||
|
u32 f = args.state_ids.at(from);
|
||||||
|
u32 t = args.state_ids.at(to);
|
||||||
|
if (!isLimitedTransition(f, t, maxShift)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// All transitions out of a tug trigger are exceptional.
|
||||||
|
if (contains(args.tugs, from)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
u32 findMaxVarShift(const build_info &args, u32 nShifts) {
|
||||||
|
const NGHolder &h = args.h;
|
||||||
|
u32 shiftMask = 0;
|
||||||
|
for (const auto &e : edges_range(h)) {
|
||||||
|
u32 from = args.state_ids.at(source(e, h));
|
||||||
|
u32 to = args.state_ids.at(target(e, h));
|
||||||
|
if (from == NO_STATE || to == NO_STATE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!isExceptionalTransition(h, e, args, MAX_SHIFT_AMOUNT)) {
|
||||||
|
shiftMask |= (1UL << (to - from));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 maxVarShift = 0;
|
||||||
|
for (u32 shiftCnt = 0; shiftMask != 0 && shiftCnt < nShifts; shiftCnt++) {
|
||||||
|
maxVarShift = findAndClearLSB_32(&shiftMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
return maxVarShift;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
int getLimexScore(const build_info &args, u32 nShifts) {
|
||||||
|
const NGHolder &h = args.h;
|
||||||
|
u32 maxVarShift = nShifts;
|
||||||
|
int score = 0;
|
||||||
|
|
||||||
|
score += SHIFT_COST * nShifts;
|
||||||
|
maxVarShift = findMaxVarShift(args, nShifts);
|
||||||
|
|
||||||
|
NFAStateSet exceptionalStates(args.num_states);
|
||||||
|
for (const auto &e : edges_range(h)) {
|
||||||
|
u32 from = args.state_ids.at(source(e, h));
|
||||||
|
u32 to = args.state_ids.at(target(e, h));
|
||||||
|
if (from == NO_STATE || to == NO_STATE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (isExceptionalTransition(h, e, args, maxVarShift)) {
|
||||||
|
exceptionalStates.set(from);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
score += EXCEPTION_COST * exceptionalStates.count();
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function finds the best shift scheme with highest score
|
||||||
|
// Returns number of shifts and score calculated for appropriate scheme
|
||||||
|
// Returns zero if no appropriate scheme was found
|
||||||
|
static
|
||||||
|
u32 findBestNumOfVarShifts(const build_info &args,
|
||||||
|
int *bestScoreRet = nullptr) {
|
||||||
|
u32 bestNumOfVarShifts = 0;
|
||||||
|
int bestScore = INT_MAX;
|
||||||
|
for (u32 shiftCount = 1; shiftCount <= MAX_SHIFT_COUNT; shiftCount++) {
|
||||||
|
int score = getLimexScore(args, shiftCount);
|
||||||
|
if (score < bestScore) {
|
||||||
|
bestScore = score;
|
||||||
|
bestNumOfVarShifts = shiftCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (bestScoreRet != nullptr) {
|
||||||
|
*bestScoreRet = bestScore;
|
||||||
|
}
|
||||||
|
return bestNumOfVarShifts;
|
||||||
|
}
|
||||||
|
|
||||||
template<NFAEngineType dtype>
|
template<NFAEngineType dtype>
|
||||||
struct Factory {
|
struct Factory {
|
||||||
// typedefs for readability, for types derived from traits
|
// typedefs for readability, for types derived from traits
|
||||||
@ -1322,25 +1422,6 @@ struct Factory {
|
|||||||
typedef typename NFATraits<dtype>::implNFA_t implNFA_t;
|
typedef typename NFATraits<dtype>::implNFA_t implNFA_t;
|
||||||
typedef typename NFATraits<dtype>::tableRow_t tableRow_t;
|
typedef typename NFATraits<dtype>::tableRow_t tableRow_t;
|
||||||
|
|
||||||
static
|
|
||||||
bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e,
|
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
|
||||||
const ue2::unordered_set<NFAVertex> &tugs) {
|
|
||||||
NFAVertex from = source(e, h);
|
|
||||||
NFAVertex to = target(e, h);
|
|
||||||
u32 f = state_ids.at(from);
|
|
||||||
u32 t = state_ids.at(to);
|
|
||||||
if (!isLimitedTransition(f, t, NFATraits<dtype>::maxShift)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// All transitions out of a tug trigger are exceptional.
|
|
||||||
if (contains(tugs, from)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
static
|
||||||
void allocState(NFA *nfa, u32 repeatscratchStateSize,
|
void allocState(NFA *nfa, u32 repeatscratchStateSize,
|
||||||
u32 repeatStreamState) {
|
u32 repeatStreamState) {
|
||||||
@ -1504,6 +1585,9 @@ struct Factory {
|
|||||||
static
|
static
|
||||||
void writeShiftMasks(const build_info &args, implNFA_t *limex) {
|
void writeShiftMasks(const build_info &args, implNFA_t *limex) {
|
||||||
const NGHolder &h = args.h;
|
const NGHolder &h = args.h;
|
||||||
|
u32 maxShift = findMaxVarShift(args, limex->shiftCount);
|
||||||
|
u32 shiftMask = 0;
|
||||||
|
int shiftMaskIdx = 0;
|
||||||
|
|
||||||
for (const auto &e : edges_range(h)) {
|
for (const auto &e : edges_range(h)) {
|
||||||
u32 from = args.state_ids.at(source(e, h));
|
u32 from = args.state_ids.at(source(e, h));
|
||||||
@ -1515,15 +1599,32 @@ struct Factory {
|
|||||||
// We check for exceptional transitions here, as we don't want tug
|
// We check for exceptional transitions here, as we don't want tug
|
||||||
// trigger transitions emitted as limited transitions (even if they
|
// trigger transitions emitted as limited transitions (even if they
|
||||||
// could be in this model).
|
// could be in this model).
|
||||||
if (!isExceptionalTransition(h, e, args.state_ids, args.tugs)) {
|
if (!isExceptionalTransition(h, e, args, maxShift)) {
|
||||||
maskSetBit(limex->shift[to - from], from);
|
u32 shift = to - from;
|
||||||
|
if ((shiftMask & (1UL << shift)) == 0UL) {
|
||||||
|
shiftMask |= (1UL << shift);
|
||||||
|
limex->shiftAmount[shiftMaskIdx++] = (u8)shift;
|
||||||
|
}
|
||||||
|
assert(limex->shiftCount <= MAX_SHIFT_COUNT);
|
||||||
|
for (u32 i = 0; i < limex->shiftCount; i++) {
|
||||||
|
if (limex->shiftAmount[i] == (u8)shift) {
|
||||||
|
maskSetBit(limex->shift[i], from);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (maxShift && limex->shiftCount > 1) {
|
||||||
|
for (u32 i = 0; i < limex->shiftCount; i++) {
|
||||||
|
assert(!isMaskZero(limex->shift[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void findExceptionalTransitions(const build_info &args,
|
void findExceptionalTransitions(const build_info &args,
|
||||||
ue2::unordered_set<NFAEdge> &exceptional) {
|
ue2::unordered_set<NFAEdge> &exceptional,
|
||||||
|
u32 maxShift) {
|
||||||
const NGHolder &h = args.h;
|
const NGHolder &h = args.h;
|
||||||
|
|
||||||
for (const auto &e : edges_range(h)) {
|
for (const auto &e : edges_range(h)) {
|
||||||
@ -1533,7 +1634,7 @@ struct Factory {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isExceptionalTransition(h, e, args.state_ids, args.tugs)) {
|
if (isExceptionalTransition(h, e, args, maxShift)) {
|
||||||
exceptional.insert(e);
|
exceptional.insert(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1545,19 +1646,25 @@ struct Factory {
|
|||||||
implNFA_t *limex, const u32 exceptionsOffset) {
|
implNFA_t *limex, const u32 exceptionsOffset) {
|
||||||
DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset);
|
DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset);
|
||||||
|
|
||||||
// to make testing easier, we pre-set the exceptionMap to all invalid
|
|
||||||
// values
|
|
||||||
memset(limex->exceptionMap, 0xff, sizeof(limex->exceptionMap));
|
|
||||||
|
|
||||||
exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset);
|
exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset);
|
||||||
assert(ISALIGNED(etable));
|
assert(ISALIGNED(etable));
|
||||||
|
|
||||||
u32 ecount = 0;
|
map<u32, ExceptionProto> exception_by_state;
|
||||||
for (const auto &m : exceptionMap) {
|
for (const auto &m : exceptionMap) {
|
||||||
const ExceptionProto &proto = m.first;
|
const ExceptionProto &proto = m.first;
|
||||||
const vector<u32> &states = m.second;
|
const vector<u32> &states = m.second;
|
||||||
DEBUG_PRINTF("exception %u, triggered by %zu states.\n", ecount,
|
for (u32 i : states) {
|
||||||
states.size());
|
assert(!contains(exception_by_state, i));
|
||||||
|
exception_by_state.emplace(i, proto);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 ecount = 0;
|
||||||
|
for (const auto &m : exception_by_state) {
|
||||||
|
const ExceptionProto &proto = m.second;
|
||||||
|
u32 state_id = m.first;
|
||||||
|
DEBUG_PRINTF("exception %u, triggered by state %u\n", ecount,
|
||||||
|
state_id);
|
||||||
|
|
||||||
// Write the exception entry.
|
// Write the exception entry.
|
||||||
exception_t &e = etable[ecount];
|
exception_t &e = etable[ecount];
|
||||||
@ -1571,13 +1678,10 @@ struct Factory {
|
|||||||
: repeatOffsets[proto.repeat_index];
|
: repeatOffsets[proto.repeat_index];
|
||||||
e.repeatOffset = repeat_offset;
|
e.repeatOffset = repeat_offset;
|
||||||
|
|
||||||
// for each state that can switch it on
|
// for the state that can switch it on
|
||||||
for (auto state_id : states) {
|
|
||||||
// set this bit in the exception mask
|
// set this bit in the exception mask
|
||||||
maskSetBit(limex->exceptionMask, state_id);
|
maskSetBit(limex->exceptionMask, state_id);
|
||||||
// set this index in the exception map
|
|
||||||
limex->exceptionMap[state_id] = ecount;
|
|
||||||
}
|
|
||||||
ecount++;
|
ecount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1778,16 +1882,17 @@ struct Factory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ue2::unordered_set<NFAEdge> exceptional;
|
ue2::unordered_set<NFAEdge> exceptional;
|
||||||
findExceptionalTransitions(args, exceptional);
|
u32 shiftCount = findBestNumOfVarShifts(args);
|
||||||
|
assert(shiftCount);
|
||||||
|
u32 maxShift = findMaxVarShift(args, shiftCount);
|
||||||
|
findExceptionalTransitions(args, exceptional, maxShift);
|
||||||
|
|
||||||
map<ExceptionProto, vector<u32> > exceptionMap;
|
map<ExceptionProto, vector<u32> > exceptionMap;
|
||||||
vector<ReportID> exceptionReports;
|
vector<ReportID> exceptionReports;
|
||||||
buildExceptionMap(args, exceptional, exceptionMap, exceptionReports);
|
u32 exceptionCount = buildExceptionMap(args, exceptional, exceptionMap,
|
||||||
|
exceptionReports);
|
||||||
|
|
||||||
if (exceptionMap.size() > ~0U) {
|
assert(exceptionCount <= args.num_states);
|
||||||
DEBUG_PRINTF("too many exceptions!\n");
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build reach table and character mapping.
|
// Build reach table and character mapping.
|
||||||
vector<NFAStateSet> reach;
|
vector<NFAStateSet> reach;
|
||||||
@ -1842,7 +1947,7 @@ struct Factory {
|
|||||||
|
|
||||||
offset = ROUNDUP_CL(offset);
|
offset = ROUNDUP_CL(offset);
|
||||||
const u32 exceptionsOffset = offset;
|
const u32 exceptionsOffset = offset;
|
||||||
offset += sizeof(exception_t) * exceptionMap.size();
|
offset += sizeof(exception_t) * exceptionCount;
|
||||||
|
|
||||||
const u32 exceptionReportsOffset = offset;
|
const u32 exceptionReportsOffset = offset;
|
||||||
offset += sizeof(ReportID) * exceptionReports.size();
|
offset += sizeof(ReportID) * exceptionReports.size();
|
||||||
@ -1874,6 +1979,7 @@ struct Factory {
|
|||||||
writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash,
|
writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash,
|
||||||
limex, acceptsOffset, acceptsEodOffset, squashOffset);
|
limex, acceptsOffset, acceptsEodOffset, squashOffset);
|
||||||
|
|
||||||
|
limex->shiftCount = shiftCount;
|
||||||
writeShiftMasks(args, limex);
|
writeShiftMasks(args, limex);
|
||||||
|
|
||||||
// Determine the state required for our state vector.
|
// Determine the state required for our state vector.
|
||||||
@ -1907,8 +2013,6 @@ struct Factory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int score(const build_info &args) {
|
static int score(const build_info &args) {
|
||||||
const NGHolder &h = args.h;
|
|
||||||
|
|
||||||
// LimEx NFAs are available in sizes from 32 to 512-bit.
|
// LimEx NFAs are available in sizes from 32 to 512-bit.
|
||||||
size_t num_states = args.num_states;
|
size_t num_states = args.num_states;
|
||||||
|
|
||||||
@ -1928,45 +2032,17 @@ struct Factory {
|
|||||||
sz = args.cc.grey.nfaForceSize;
|
sz = args.cc.grey.nfaForceSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (args.cc.grey.nfaForceShifts &&
|
|
||||||
NFATraits<dtype>::maxShift != args.cc.grey.nfaForceShifts) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sz != NFATraits<dtype>::maxStates) {
|
if (sz != NFATraits<dtype>::maxStates) {
|
||||||
return -1; // fail, size not appropriate
|
return -1; // fail, size not appropriate
|
||||||
}
|
}
|
||||||
|
|
||||||
// We are of the right size, calculate a score based on the number
|
// We are of the right size, calculate a score based on the number
|
||||||
// of exceptions and the number of shifts used by this LimEx.
|
// of exceptions and the number of shifts used by this LimEx.
|
||||||
int score = LIMEX_INITIAL_SCORE;
|
int score;
|
||||||
if (NFATraits<dtype>::maxShift != 0) {
|
u32 shiftCount = findBestNumOfVarShifts(args, &score);
|
||||||
score -= SHIFT_COST / 2; // first shift mask is cheap
|
if (shiftCount == 0) {
|
||||||
score -= SHIFT_COST * (NFATraits<dtype>::maxShift - 1);
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
NFAStateSet exceptionalStates(num_states); // outbound exc trans
|
|
||||||
|
|
||||||
for (const auto &e : edges_range(h)) {
|
|
||||||
u32 from = args.state_ids.at(source(e, h));
|
|
||||||
u32 to = args.state_ids.at(target(e, h));
|
|
||||||
if (from == NO_STATE || to == NO_STATE) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isExceptionalTransition(h, e, args.state_ids, args.tugs)) {
|
|
||||||
exceptionalStates.set(from);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
DEBUG_PRINTF("%zu exceptional states\n", exceptionalStates.count());
|
|
||||||
score -= EXCEPTION_COST * exceptionalStates.count();
|
|
||||||
|
|
||||||
/* ensure that we always report a valid score if have the right number
|
|
||||||
* of states */
|
|
||||||
if (score < 0) {
|
|
||||||
score = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return score;
|
return score;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -1985,50 +2061,19 @@ struct scoreNfa {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#define MAKE_LIMEX_TRAITS(mlt_size, mlt_shift) \
|
#define MAKE_LIMEX_TRAITS(mlt_size) \
|
||||||
template<> struct NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift> { \
|
template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \
|
||||||
typedef LimExNFA##mlt_size implNFA_t; \
|
typedef LimExNFA##mlt_size implNFA_t; \
|
||||||
typedef u_##mlt_size tableRow_t; \
|
typedef u_##mlt_size tableRow_t; \
|
||||||
typedef NFAException##mlt_size exception_t; \
|
typedef NFAException##mlt_size exception_t; \
|
||||||
static const size_t maxStates = mlt_size; \
|
static const size_t maxStates = mlt_size; \
|
||||||
static const u32 maxShift = mlt_shift; \
|
};
|
||||||
}; \
|
|
||||||
|
|
||||||
MAKE_LIMEX_TRAITS(32, 1)
|
MAKE_LIMEX_TRAITS(32)
|
||||||
MAKE_LIMEX_TRAITS(32, 2)
|
MAKE_LIMEX_TRAITS(128)
|
||||||
MAKE_LIMEX_TRAITS(32, 3)
|
MAKE_LIMEX_TRAITS(256)
|
||||||
MAKE_LIMEX_TRAITS(32, 4)
|
MAKE_LIMEX_TRAITS(384)
|
||||||
MAKE_LIMEX_TRAITS(32, 5)
|
MAKE_LIMEX_TRAITS(512)
|
||||||
MAKE_LIMEX_TRAITS(32, 6)
|
|
||||||
MAKE_LIMEX_TRAITS(32, 7)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 1)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 2)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 3)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 4)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 5)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 6)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 7)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 1)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 2)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 3)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 4)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 5)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 6)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 7)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 1)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 2)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 3)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 4)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 5)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 6)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 7)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 1)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 2)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 3)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 4)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 5)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 6)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 7)
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
@ -2133,20 +2178,18 @@ aligned_unique_ptr<NFA> generate(NGHolder &h,
|
|||||||
// Acceleration analysis.
|
// Acceleration analysis.
|
||||||
fillAccelInfo(arg);
|
fillAccelInfo(arg);
|
||||||
|
|
||||||
typedef pair<int, NFAEngineType> EngineScore;
|
vector<pair<int, NFAEngineType>> scores;
|
||||||
vector<EngineScore> scores;
|
|
||||||
|
|
||||||
if (hint != INVALID_NFA) {
|
if (hint != INVALID_NFA) {
|
||||||
// The caller has told us what to (attempt to) build.
|
// The caller has told us what to (attempt to) build.
|
||||||
scores.push_back(make_pair(0, (NFAEngineType)hint));
|
scores.emplace_back(0, (NFAEngineType)hint);
|
||||||
} else {
|
} else {
|
||||||
for (size_t i = 0; i <= LAST_LIMEX_NFA; i++) {
|
for (size_t i = 0; i <= LAST_LIMEX_NFA; i++) {
|
||||||
NFAEngineType ntype = (NFAEngineType)i;
|
NFAEngineType ntype = (NFAEngineType)i;
|
||||||
|
|
||||||
int score = DISPATCH_BY_LIMEX_TYPE(ntype, scoreNfa, arg);
|
int score = DISPATCH_BY_LIMEX_TYPE(ntype, scoreNfa, arg);
|
||||||
if (score >= 0) {
|
if (score >= 0) {
|
||||||
DEBUG_PRINTF("%s scores %d\n", nfa_type_name(ntype), score);
|
DEBUG_PRINTF("%s scores %d\n", nfa_type_name(ntype), score);
|
||||||
scores.push_back(make_pair(score, ntype));
|
scores.emplace_back(score, ntype);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2156,22 +2199,22 @@ aligned_unique_ptr<NFA> generate(NGHolder &h,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
sort(scores.begin(), scores.end(), greater<EngineScore>());
|
// Sort acceptable models in priority order, lowest score first.
|
||||||
|
sort(scores.begin(), scores.end());
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> nfa;
|
for (const auto &elem : scores) {
|
||||||
for (auto i = scores.begin(); !nfa && i != scores.end(); ++i) {
|
assert(elem.first >= 0);
|
||||||
assert(i->first >= 0);
|
NFAEngineType limex_model = elem.second;
|
||||||
nfa = DISPATCH_BY_LIMEX_TYPE(i->second, generateNfa, arg);
|
auto nfa = DISPATCH_BY_LIMEX_TYPE(limex_model, generateNfa, arg);
|
||||||
|
if (nfa) {
|
||||||
|
DEBUG_PRINTF("successful build with NFA engine: %s\n",
|
||||||
|
nfa_type_name(limex_model));
|
||||||
|
return nfa;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!nfa) {
|
|
||||||
DEBUG_PRINTF("NFA build failed.\n");
|
DEBUG_PRINTF("NFA build failed.\n");
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("successful build with NFA engine: %s\n",
|
|
||||||
nfa_type_name((NFAEngineType)nfa->type));
|
|
||||||
return nfa;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 countAccelStates(NGHolder &h,
|
u32 countAccelStates(NGHolder &h,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -80,6 +80,23 @@ void dumpMask(FILE *f, const char *name, const u8 *mask, u32 mask_bits) {
|
|||||||
fprintf(f, "MSK %-20s %s\n", name, dumpMask(mask, mask_bits).c_str());
|
fprintf(f, "MSK %-20s %s\n", name, dumpMask(mask, mask_bits).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename mask_t>
|
||||||
|
static
|
||||||
|
u32 rank_in_mask(mask_t mask, u32 bit) {
|
||||||
|
assert(bit < 8 * sizeof(mask));
|
||||||
|
|
||||||
|
u32 chunks[sizeof(mask)/sizeof(u32)];
|
||||||
|
memcpy(chunks, &mask, sizeof(mask));
|
||||||
|
u32 base_rank = 0;
|
||||||
|
for (u32 i = 0; i < bit / 32; i++) {
|
||||||
|
base_rank += popcount32(chunks[i]);
|
||||||
|
}
|
||||||
|
u32 chunk = chunks[bit / 32];
|
||||||
|
u32 local_bit = bit % 32;
|
||||||
|
assert(chunk & (1U << local_bit));
|
||||||
|
return base_rank + popcount32(chunk & ((1U << local_bit) - 1));
|
||||||
|
}
|
||||||
|
|
||||||
template <typename limex_type>
|
template <typename limex_type>
|
||||||
static
|
static
|
||||||
void dumpRepeats(const limex_type *limex, u32 model_size, FILE *f) {
|
void dumpRepeats(const limex_type *limex, u32 model_size, FILE *f) {
|
||||||
@ -244,6 +261,16 @@ void dumpLimexExceptions(const limex_type *limex, FILE *f) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename limex_type>
|
||||||
|
static
|
||||||
|
void dumpLimexShifts(const limex_type *limex, FILE *f) {
|
||||||
|
u32 size = limex_traits<limex_type>::size;
|
||||||
|
fprintf(f, "Shift Masks:\n");
|
||||||
|
for(u32 i = 0; i < limex->shiftCount; i++) {
|
||||||
|
fprintf(f, "\t Shift %u(%hhu)\t\tMask: %s\n", i, limex->shiftAmount[i],
|
||||||
|
dumpMask((const u8 *)&limex->shift[i], size).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
template<typename limex_type>
|
template<typename limex_type>
|
||||||
static
|
static
|
||||||
void dumpLimexText(const limex_type *limex, FILE *f) {
|
void dumpLimexText(const limex_type *limex, FILE *f) {
|
||||||
@ -270,6 +297,9 @@ void dumpLimexText(const limex_type *limex, FILE *f) {
|
|||||||
topMask += size / 8;
|
topMask += size / 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Dump shift masks
|
||||||
|
dumpLimexShifts(limex, f);
|
||||||
|
|
||||||
dumpSquash(limex, f);
|
dumpSquash(limex, f);
|
||||||
|
|
||||||
dumpLimexReachMap(limex->reachMap, f);
|
dumpLimexReachMap(limex->reachMap, f);
|
||||||
@ -325,7 +355,7 @@ struct limex_labeller : public nfa_labeller {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 ex_index = limex->exceptionMap[state];
|
u32 ex_index = rank_in_mask(limex->exceptionMask, state);
|
||||||
const typename limex_traits<limex_type>::exception_type *e
|
const typename limex_traits<limex_type>::exception_type *e
|
||||||
= &exceptions[ex_index];
|
= &exceptions[ex_index];
|
||||||
|
|
||||||
@ -396,7 +426,7 @@ void dumpExDotInfo(const limex_type *limex, u32 state, FILE *f) {
|
|||||||
const typename limex_traits<limex_type>::exception_type *exceptions
|
const typename limex_traits<limex_type>::exception_type *exceptions
|
||||||
= getExceptionTable(limex);
|
= getExceptionTable(limex);
|
||||||
|
|
||||||
u32 ex_index = limex->exceptionMap[state];
|
u32 ex_index = rank_in_mask(limex->exceptionMask, state);
|
||||||
const typename limex_traits<limex_type>::exception_type *e
|
const typename limex_traits<limex_type>::exception_type *e
|
||||||
= &exceptions[ex_index];
|
= &exceptions[ex_index];
|
||||||
|
|
||||||
@ -420,21 +450,23 @@ void dumpExDotInfo(const limex_type *limex, u32 state, FILE *f) {
|
|||||||
template<typename limex_type>
|
template<typename limex_type>
|
||||||
static
|
static
|
||||||
void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
|
void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
|
||||||
for (u32 j = 0; j < MAX_MAX_SHIFT; j++) {
|
for (u32 j = 0; j < limex->shiftCount; j++) {
|
||||||
|
const u32 shift_amount = limex->shiftAmount[j];
|
||||||
if (testbit((const u8 *)&limex->shift[j],
|
if (testbit((const u8 *)&limex->shift[j],
|
||||||
limex_traits<limex_type>::size, state)) {
|
limex_traits<limex_type>::size, state)) {
|
||||||
fprintf(f, "%u -> %u;\n", state, state + j);
|
fprintf(f, "%u -> %u;\n", state, state + shift_amount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DUMP_TEXT_FN(ddf_u, ddf_n, ddf_s) \
|
#define DUMP_TEXT_FN(ddf_n) \
|
||||||
void nfaExecLimEx##ddf_n##_##ddf_s##_dumpText(const NFA *nfa, FILE *f) { \
|
void nfaExecLimEx##ddf_n##_dumpText(const NFA *nfa, FILE *f) { \
|
||||||
dumpLimexText((const LimExNFA##ddf_n *)getImplNfa(nfa), f); \
|
dumpLimexText((const LimExNFA##ddf_n *)getImplNfa(nfa), f); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DUMP_DOT_FN(ddf_u, ddf_n, ddf_s) \
|
#define DUMP_DOT_FN(ddf_n) \
|
||||||
void nfaExecLimEx##ddf_n##_##ddf_s##_dumpDot(const NFA *nfa, FILE *f) { \
|
void nfaExecLimEx##ddf_n##_dumpDot(const NFA *nfa, FILE *f, \
|
||||||
|
UNUSED const string &base) { \
|
||||||
const LimExNFA##ddf_n *limex = \
|
const LimExNFA##ddf_n *limex = \
|
||||||
(const LimExNFA##ddf_n *)getImplNfa(nfa); \
|
(const LimExNFA##ddf_n *)getImplNfa(nfa); \
|
||||||
\
|
\
|
||||||
@ -446,52 +478,17 @@ void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
|
|||||||
dumpLimDotInfo(limex, i, f); \
|
dumpLimDotInfo(limex, i, f); \
|
||||||
dumpExDotInfo(limex, i, f); \
|
dumpExDotInfo(limex, i, f); \
|
||||||
} \
|
} \
|
||||||
\
|
|
||||||
dumpDotTrailer(f); \
|
dumpDotTrailer(f); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define LIMEX_DUMP_FNS(ntype, size, shifts) \
|
#define LIMEX_DUMP_FNS(size) \
|
||||||
DUMP_TEXT_FN(ntype, size, shifts) \
|
DUMP_TEXT_FN(size) \
|
||||||
DUMP_DOT_FN(ntype, size, shifts)
|
DUMP_DOT_FN(size)
|
||||||
|
|
||||||
LIMEX_DUMP_FNS(u32, 32, 1)
|
LIMEX_DUMP_FNS(32)
|
||||||
LIMEX_DUMP_FNS(u32, 32, 2)
|
LIMEX_DUMP_FNS(128)
|
||||||
LIMEX_DUMP_FNS(u32, 32, 3)
|
LIMEX_DUMP_FNS(256)
|
||||||
LIMEX_DUMP_FNS(u32, 32, 4)
|
LIMEX_DUMP_FNS(384)
|
||||||
LIMEX_DUMP_FNS(u32, 32, 5)
|
LIMEX_DUMP_FNS(512)
|
||||||
LIMEX_DUMP_FNS(u32, 32, 6)
|
|
||||||
LIMEX_DUMP_FNS(u32, 32, 7)
|
|
||||||
|
|
||||||
LIMEX_DUMP_FNS(m128, 128, 1)
|
|
||||||
LIMEX_DUMP_FNS(m128, 128, 2)
|
|
||||||
LIMEX_DUMP_FNS(m128, 128, 3)
|
|
||||||
LIMEX_DUMP_FNS(m128, 128, 4)
|
|
||||||
LIMEX_DUMP_FNS(m128, 128, 5)
|
|
||||||
LIMEX_DUMP_FNS(m128, 128, 6)
|
|
||||||
LIMEX_DUMP_FNS(m128, 128, 7)
|
|
||||||
|
|
||||||
LIMEX_DUMP_FNS(m256, 256, 1)
|
|
||||||
LIMEX_DUMP_FNS(m256, 256, 2)
|
|
||||||
LIMEX_DUMP_FNS(m256, 256, 3)
|
|
||||||
LIMEX_DUMP_FNS(m256, 256, 4)
|
|
||||||
LIMEX_DUMP_FNS(m256, 256, 5)
|
|
||||||
LIMEX_DUMP_FNS(m256, 256, 6)
|
|
||||||
LIMEX_DUMP_FNS(m256, 256, 7)
|
|
||||||
|
|
||||||
LIMEX_DUMP_FNS(m384, 384, 1)
|
|
||||||
LIMEX_DUMP_FNS(m384, 384, 2)
|
|
||||||
LIMEX_DUMP_FNS(m384, 384, 3)
|
|
||||||
LIMEX_DUMP_FNS(m384, 384, 4)
|
|
||||||
LIMEX_DUMP_FNS(m384, 384, 5)
|
|
||||||
LIMEX_DUMP_FNS(m384, 384, 6)
|
|
||||||
LIMEX_DUMP_FNS(m384, 384, 7)
|
|
||||||
|
|
||||||
LIMEX_DUMP_FNS(m512, 512, 1)
|
|
||||||
LIMEX_DUMP_FNS(m512, 512, 2)
|
|
||||||
LIMEX_DUMP_FNS(m512, 512, 3)
|
|
||||||
LIMEX_DUMP_FNS(m512, 512, 4)
|
|
||||||
LIMEX_DUMP_FNS(m512, 512, 5)
|
|
||||||
LIMEX_DUMP_FNS(m512, 512, 6)
|
|
||||||
LIMEX_DUMP_FNS(m512, 512, 7)
|
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -79,9 +79,13 @@
|
|||||||
#ifdef ARCH_64_BIT
|
#ifdef ARCH_64_BIT
|
||||||
#define CHUNK_T u64a
|
#define CHUNK_T u64a
|
||||||
#define FIND_AND_CLEAR_FN findAndClearLSB_64
|
#define FIND_AND_CLEAR_FN findAndClearLSB_64
|
||||||
|
#define POPCOUNT_FN popcount64
|
||||||
|
#define RANK_IN_MASK_FN rank_in_mask64
|
||||||
#else
|
#else
|
||||||
#define CHUNK_T u32
|
#define CHUNK_T u32
|
||||||
#define FIND_AND_CLEAR_FN findAndClearLSB_32
|
#define FIND_AND_CLEAR_FN findAndClearLSB_32
|
||||||
|
#define POPCOUNT_FN popcount32
|
||||||
|
#define RANK_IN_MASK_FN rank_in_mask32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** \brief Process a single exception. Returns 1 if exception handling should
|
/** \brief Process a single exception. Returns 1 if exception handling should
|
||||||
@ -206,13 +210,13 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
|
|||||||
|
|
||||||
#ifndef RUN_EXCEPTION_FN_ONLY
|
#ifndef RUN_EXCEPTION_FN_ONLY
|
||||||
|
|
||||||
/** \brief Process all of the exceptions associated with the states in the \a estate. */
|
/** \brief Process all of the exceptions associated with the states in the \a
|
||||||
|
* estate. */
|
||||||
static really_inline
|
static really_inline
|
||||||
int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
||||||
const struct IMPL_NFA_T *limex,
|
const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
|
||||||
const u32 *exceptionMap, const EXCEPTION_T *exceptions,
|
const ReportID *exReports, u64a offset, struct CONTEXT_T *ctx,
|
||||||
const ReportID *exReports,
|
char in_rev, char flags) {
|
||||||
u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) {
|
|
||||||
assert(diffmask > 0); // guaranteed by caller macro
|
assert(diffmask > 0); // guaranteed by caller macro
|
||||||
|
|
||||||
if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) {
|
if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) {
|
||||||
@ -237,15 +241,23 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
|||||||
|
|
||||||
// A copy of the estate as an array of GPR-sized chunks.
|
// A copy of the estate as an array of GPR-sized chunks.
|
||||||
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
|
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
#ifdef ESTATE_ON_STACK
|
#ifdef ESTATE_ON_STACK
|
||||||
memcpy(chunks, &estate, sizeof(STATE_T));
|
memcpy(chunks, &estate, sizeof(STATE_T));
|
||||||
#else
|
#else
|
||||||
memcpy(chunks, estatep, sizeof(STATE_T));
|
memcpy(chunks, estatep, sizeof(STATE_T));
|
||||||
#endif
|
#endif
|
||||||
|
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
|
||||||
|
|
||||||
struct proto_cache new_cache = {0, NULL};
|
struct proto_cache new_cache = {0, NULL};
|
||||||
enum CacheResult cacheable = CACHE_RESULT;
|
enum CacheResult cacheable = CACHE_RESULT;
|
||||||
|
|
||||||
|
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||||
|
base_index[0] = 0;
|
||||||
|
for (u32 i = 0; i < ARRAY_LENGTH(base_index) - 1; i++) {
|
||||||
|
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
|
||||||
|
}
|
||||||
|
|
||||||
do {
|
do {
|
||||||
u32 t = findAndClearLSB_32(&diffmask);
|
u32 t = findAndClearLSB_32(&diffmask);
|
||||||
#ifdef ARCH_64_BIT
|
#ifdef ARCH_64_BIT
|
||||||
@ -254,10 +266,10 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
|||||||
assert(t < ARRAY_LENGTH(chunks));
|
assert(t < ARRAY_LENGTH(chunks));
|
||||||
CHUNK_T word = chunks[t];
|
CHUNK_T word = chunks[t];
|
||||||
assert(word != 0);
|
assert(word != 0);
|
||||||
u32 base = t * sizeof(CHUNK_T) * 8;
|
|
||||||
do {
|
do {
|
||||||
u32 bit = FIND_AND_CLEAR_FN(&word) + base;
|
u32 bit = FIND_AND_CLEAR_FN(&word);
|
||||||
u32 idx = exceptionMap[bit];
|
u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit);
|
||||||
|
u32 idx = local_index + base_index[t];
|
||||||
const EXCEPTION_T *e = &exceptions[idx];
|
const EXCEPTION_T *e = &exceptions[idx];
|
||||||
|
|
||||||
if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
|
if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -68,6 +68,9 @@
|
|||||||
The value of NFA.stateSize gives the total state size in bytes (the sum of
|
The value of NFA.stateSize gives the total state size in bytes (the sum of
|
||||||
all the above).
|
all the above).
|
||||||
|
|
||||||
|
Number of shifts should be always greater or equal to 1
|
||||||
|
Number of shifts 0 means that no appropriate NFA engine was found.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef LIMEX_INTERNAL_H
|
#ifndef LIMEX_INTERNAL_H
|
||||||
@ -77,7 +80,8 @@
|
|||||||
#include "repeat_internal.h"
|
#include "repeat_internal.h"
|
||||||
|
|
||||||
// Constants
|
// Constants
|
||||||
#define MAX_MAX_SHIFT 8 /**< largest maxshift used by a LimEx NFA */
|
#define MAX_SHIFT_COUNT 8 /**< largest number of shifts used by a LimEx NFA */
|
||||||
|
#define MAX_SHIFT_AMOUNT 16 /**< largest shift amount used by a LimEx NFA */
|
||||||
|
|
||||||
#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
|
#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
|
||||||
#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
|
#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
|
||||||
@ -95,24 +99,6 @@ enum LimExSquash {
|
|||||||
LIMEX_SQUASH_REPORT = 3 //!< squash when report is raised
|
LIMEX_SQUASH_REPORT = 3 //!< squash when report is raised
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LimExNFABase {
|
|
||||||
u8 reachMap[N_CHARS];
|
|
||||||
u32 reachSize;
|
|
||||||
u32 accelCount;
|
|
||||||
u32 accelTableOffset;
|
|
||||||
u32 accelAuxCount;
|
|
||||||
u32 accelAuxOffset;
|
|
||||||
u32 acceptCount;
|
|
||||||
u32 acceptOffset;
|
|
||||||
u32 acceptEodCount;
|
|
||||||
u32 acceptEodOffset;
|
|
||||||
u32 exceptionCount;
|
|
||||||
u32 exceptionOffset;
|
|
||||||
u32 exReportOffset;
|
|
||||||
u32 repeatCount;
|
|
||||||
u32 repeatOffset;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* uniform looking types for the macros */
|
/* uniform looking types for the macros */
|
||||||
typedef u8 u_8;
|
typedef u8 u_8;
|
||||||
typedef u16 u_16;
|
typedef u16 u_16;
|
||||||
@ -133,7 +119,7 @@ struct NFAException##size { \
|
|||||||
u8 trigger; /**< from enum LimExTrigger */ \
|
u8 trigger; /**< from enum LimExTrigger */ \
|
||||||
}; \
|
}; \
|
||||||
\
|
\
|
||||||
struct LimExNFA##size { /* MUST align with LimExNFABase */ \
|
struct LimExNFA##size { \
|
||||||
u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */ \
|
u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */ \
|
||||||
u32 reachSize; /**< number of reach masks */ \
|
u32 reachSize; /**< number of reach masks */ \
|
||||||
u32 accelCount; /**< number of entries in accel table */ \
|
u32 accelCount; /**< number of entries in accel table */ \
|
||||||
@ -149,7 +135,6 @@ struct LimExNFA##size { /* MUST align with LimExNFABase */ \
|
|||||||
u32 exReportOffset; /* rel. to start of LimExNFA */ \
|
u32 exReportOffset; /* rel. to start of LimExNFA */ \
|
||||||
u32 repeatCount; \
|
u32 repeatCount; \
|
||||||
u32 repeatOffset; \
|
u32 repeatOffset; \
|
||||||
u32 exceptionMap[size]; \
|
|
||||||
u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \
|
u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \
|
||||||
u32 squashCount; \
|
u32 squashCount; \
|
||||||
u32 topCount; \
|
u32 topCount; \
|
||||||
@ -168,8 +153,10 @@ struct LimExNFA##size { /* MUST align with LimExNFABase */ \
|
|||||||
u_##size compressMask; /**< switch off before compress */ \
|
u_##size compressMask; /**< switch off before compress */ \
|
||||||
u_##size exceptionMask; \
|
u_##size exceptionMask; \
|
||||||
u_##size repeatCyclicMask; \
|
u_##size repeatCyclicMask; \
|
||||||
u_##size shift[MAX_MAX_SHIFT]; \
|
|
||||||
u_##size zombieMask; /**< zombie if in any of the set states */ \
|
u_##size zombieMask; /**< zombie if in any of the set states */ \
|
||||||
|
u_##size shift[MAX_SHIFT_COUNT]; \
|
||||||
|
u32 shiftCount; /**< number of shift masks used */ \
|
||||||
|
u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \
|
||||||
};
|
};
|
||||||
|
|
||||||
CREATE_NFA_LIMEX(32)
|
CREATE_NFA_LIMEX(32)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -74,7 +74,6 @@
|
|||||||
static really_inline
|
static really_inline
|
||||||
int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
|
int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
|
||||||
const struct LimExNFA32 *limex,
|
const struct LimExNFA32 *limex,
|
||||||
const u32 *exceptionMap,
|
|
||||||
const struct NFAException32 *exceptions,
|
const struct NFAException32 *exceptions,
|
||||||
const ReportID *exReports, u64a offset,
|
const ReportID *exReports, u64a offset,
|
||||||
struct NFAContext32 *ctx, char in_rev, char flags) {
|
struct NFAContext32 *ctx, char in_rev, char flags) {
|
||||||
@ -104,7 +103,7 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
|
|||||||
|
|
||||||
do {
|
do {
|
||||||
u32 bit = findAndClearLSB_32(&estate);
|
u32 bit = findAndClearLSB_32(&estate);
|
||||||
u32 idx = exceptionMap[bit];
|
u32 idx = rank_in_mask32(limex->exceptionMask, bit);
|
||||||
const struct NFAException32 *e = &exceptions[idx];
|
const struct NFAException32 *e = &exceptions[idx];
|
||||||
if (!runException32(e, s, succ, &local_succ, limex, exReports, offset,
|
if (!runException32(e, s, succ, &local_succ, limex, exReports, offset,
|
||||||
ctx, &new_cache, &cacheable, in_rev, flags)) {
|
ctx, &new_cache, &cacheable, in_rev, flags)) {
|
||||||
@ -132,35 +131,4 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
|
|||||||
|
|
||||||
#define SIZE 32
|
#define SIZE 32
|
||||||
#define STATE_T u32
|
#define STATE_T u32
|
||||||
#define SHIFT 1
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 32
|
|
||||||
#define STATE_T u32
|
|
||||||
#define SHIFT 2
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 32
|
|
||||||
#define STATE_T u32
|
|
||||||
#define SHIFT 3
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 32
|
|
||||||
#define STATE_T u32
|
|
||||||
#define SHIFT 4
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 32
|
|
||||||
#define STATE_T u32
|
|
||||||
#define SHIFT 5
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 32
|
|
||||||
#define STATE_T u32
|
|
||||||
#define SHIFT 6
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 32
|
|
||||||
#define STATE_T u32
|
|
||||||
#define SHIFT 7
|
|
||||||
#include "limex_runtime_impl.h"
|
#include "limex_runtime_impl.h"
|
||||||
|
@ -73,34 +73,35 @@ struct proto_cache {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Shift macros for Limited NFAs. Defined in terms of uniform ops.
|
// Shift macros for Limited NFAs. Defined in terms of uniform ops.
|
||||||
|
// LimExNFAxxx ptr in 'limex' and the current state in 's'
|
||||||
#define NFA_EXEC_LIM_SHIFT(nels_type, nels_i) \
|
#define NFA_EXEC_LIM_SHIFT(nels_type, nels_i) \
|
||||||
(JOIN(shift_, nels_type)( \
|
(JOIN(lshift_, nels_type)( \
|
||||||
JOIN(and_, nels_type)(s, \
|
JOIN(and_, nels_type)(s, \
|
||||||
JOIN(load_, nels_type)(&limex->shift[nels_i])), \
|
JOIN(load_, nels_type)(&limex->shift[nels_i])), \
|
||||||
nels_i))
|
limex->shiftAmount[nels_i]))
|
||||||
|
|
||||||
// Calculate the (limited model) successors for a given max shift. Assumes
|
// Calculate the (limited model) successors for a number of variable shifts.
|
||||||
// LimExNFAxxx ptr in 'l', current state in 's' and successors in 'succ'.
|
// Assumes current state in 's' and successors in 'succ'.
|
||||||
|
|
||||||
#define NFA_EXEC_GET_LIM_SUCC(gls_type, gls_shift) \
|
#define NFA_EXEC_GET_LIM_SUCC(gls_type) \
|
||||||
do { \
|
do { \
|
||||||
succ = \
|
succ = NFA_EXEC_LIM_SHIFT(gls_type, 0); \
|
||||||
JOIN(and_, gls_type)(s, JOIN(load_, gls_type)(&limex->shift[0])); \
|
switch (limex->shiftCount) { \
|
||||||
switch (gls_shift) { \
|
case 8: \
|
||||||
case 7: \
|
|
||||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 7)); \
|
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 7)); \
|
||||||
case 6: \
|
case 7: \
|
||||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 6)); \
|
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 6)); \
|
||||||
case 5: \
|
case 6: \
|
||||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 5)); \
|
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 5)); \
|
||||||
case 4: \
|
case 5: \
|
||||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 4)); \
|
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 4)); \
|
||||||
case 3: \
|
case 4: \
|
||||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 3)); \
|
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 3)); \
|
||||||
case 2: \
|
case 3: \
|
||||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 2)); \
|
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 2)); \
|
||||||
case 1: \
|
case 2: \
|
||||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 1)); \
|
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 1)); \
|
||||||
|
case 1: \
|
||||||
case 0: \
|
case 0: \
|
||||||
; \
|
; \
|
||||||
} \
|
} \
|
||||||
@ -129,7 +130,7 @@ int limexRunReports(const ReportID *reports, NfaCallback callback,
|
|||||||
for (; *reports != MO_INVALID_IDX; ++reports) {
|
for (; *reports != MO_INVALID_IDX; ++reports) {
|
||||||
DEBUG_PRINTF("firing report for id %u at offset %llu\n",
|
DEBUG_PRINTF("firing report for id %u at offset %llu\n",
|
||||||
*reports, offset);
|
*reports, offset);
|
||||||
int rv = callback(offset, *reports, context);
|
int rv = callback(0, offset, *reports, context);
|
||||||
if (rv == MO_HALT_MATCHING) {
|
if (rv == MO_HALT_MATCHING) {
|
||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
|
@ -37,11 +37,11 @@
|
|||||||
* Version 2.0: now with X-Macros, so you get line numbers in your debugger.
|
* Version 2.0: now with X-Macros, so you get line numbers in your debugger.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if !defined(SIZE) || !defined(STATE_T) || !defined(SHIFT)
|
#if !defined(SIZE) || !defined(STATE_T)
|
||||||
# error Must define SIZE and STATE_T and SHIFT in includer.
|
# error Must define SIZE and STATE_T in includer.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define LIMEX_API_ROOT JOIN(JOIN(JOIN(nfaExecLimEx, SIZE), _), SHIFT)
|
#define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE)
|
||||||
|
|
||||||
#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
|
#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
|
||||||
|
|
||||||
@ -73,6 +73,7 @@
|
|||||||
#define ANDNOT_STATE JOIN(andnot_, STATE_T)
|
#define ANDNOT_STATE JOIN(andnot_, STATE_T)
|
||||||
#define OR_STATE JOIN(or_, STATE_T)
|
#define OR_STATE JOIN(or_, STATE_T)
|
||||||
#define TESTBIT_STATE JOIN(testbit_, STATE_T)
|
#define TESTBIT_STATE JOIN(testbit_, STATE_T)
|
||||||
|
#define CLEARBIT_STATE JOIN(clearbit_, STATE_T)
|
||||||
#define ZERO_STATE JOIN(zero_, STATE_T)
|
#define ZERO_STATE JOIN(zero_, STATE_T)
|
||||||
#define ISNONZERO_STATE JOIN(isNonZero_, STATE_T)
|
#define ISNONZERO_STATE JOIN(isNonZero_, STATE_T)
|
||||||
#define ISZERO_STATE JOIN(isZero_, STATE_T)
|
#define ISZERO_STATE JOIN(isZero_, STATE_T)
|
||||||
@ -104,8 +105,8 @@
|
|||||||
// continue, 1 if an accept was fired and the user instructed us to halt.
|
// continue, 1 if an accept was fired and the user instructed us to halt.
|
||||||
static really_inline
|
static really_inline
|
||||||
char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
|
char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
|
||||||
const ReportID *exReports, const u32 *exceptionMap,
|
const ReportID *exReports, STATE_T s,
|
||||||
STATE_T s, const STATE_T emask, size_t i, u64a offset,
|
const STATE_T emask, size_t i, u64a offset,
|
||||||
STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx,
|
STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx,
|
||||||
const char flags, const char in_rev,
|
const char flags, const char in_rev,
|
||||||
const char first_match) {
|
const char first_match) {
|
||||||
@ -132,8 +133,8 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
|
|||||||
char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags;
|
char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags;
|
||||||
|
|
||||||
int rv = JOIN(processExceptional, SIZE)(
|
int rv = JOIN(processExceptional, SIZE)(
|
||||||
pass_state, pass_estate, diffmask, succ, limex, exceptionMap,
|
pass_state, pass_estate, diffmask, succ, limex, exceptions, exReports,
|
||||||
exceptions, exReports, callback_offset, ctx, in_rev, localflags);
|
callback_offset, ctx, in_rev, localflags);
|
||||||
if (rv == PE_RV_HALT) {
|
if (rv == PE_RV_HALT) {
|
||||||
return 1; // Halt matching.
|
return 1; // Halt matching.
|
||||||
}
|
}
|
||||||
@ -175,7 +176,6 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
|||||||
(const union AccelAux *)((const char *)limex + limex->accelAuxOffset);
|
(const union AccelAux *)((const char *)limex + limex->accelAuxOffset);
|
||||||
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
|
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
|
||||||
const ReportID *exReports = getExReports(limex);
|
const ReportID *exReports = getExReports(limex);
|
||||||
const u32 *exceptionMap = limex->exceptionMap;
|
|
||||||
STATE_T s = LOAD_STATE(&ctx->s);
|
STATE_T s = LOAD_STATE(&ctx->s);
|
||||||
|
|
||||||
/* assert(ISALIGNED_16(exceptions)); */
|
/* assert(ISALIGNED_16(exceptions)); */
|
||||||
@ -201,11 +201,11 @@ without_accel:
|
|||||||
|
|
||||||
u8 c = input[i];
|
u8 c = input[i];
|
||||||
STATE_T succ;
|
STATE_T succ;
|
||||||
NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT);
|
NFA_EXEC_GET_LIM_SUCC(STATE_T);
|
||||||
|
|
||||||
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s,
|
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK,
|
||||||
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx,
|
i, offset, &succ, final_loc, ctx, flags, 0,
|
||||||
flags, 0, first_match)) {
|
first_match)) {
|
||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -252,11 +252,11 @@ with_accel:
|
|||||||
|
|
||||||
u8 c = input[i];
|
u8 c = input[i];
|
||||||
STATE_T succ;
|
STATE_T succ;
|
||||||
NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT);
|
NFA_EXEC_GET_LIM_SUCC(STATE_T);
|
||||||
|
|
||||||
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s,
|
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK,
|
||||||
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx,
|
i, offset, &succ, final_loc, ctx, flags, 0,
|
||||||
flags, 0, first_match)) {
|
first_match)) {
|
||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -300,7 +300,6 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
|||||||
#endif
|
#endif
|
||||||
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
|
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
|
||||||
const ReportID *exReports = getExReports(limex);
|
const ReportID *exReports = getExReports(limex);
|
||||||
const u32 *exceptionMap = limex->exceptionMap;
|
|
||||||
STATE_T s = LOAD_STATE(&ctx->s);
|
STATE_T s = LOAD_STATE(&ctx->s);
|
||||||
|
|
||||||
/* assert(ISALIGNED_16(exceptions)); */
|
/* assert(ISALIGNED_16(exceptions)); */
|
||||||
@ -318,9 +317,9 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
|||||||
|
|
||||||
u8 c = input[i-1];
|
u8 c = input[i-1];
|
||||||
STATE_T succ;
|
STATE_T succ;
|
||||||
NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT);
|
NFA_EXEC_GET_LIM_SUCC(STATE_T);
|
||||||
|
|
||||||
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s,
|
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s,
|
||||||
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx,
|
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx,
|
||||||
flags, 1, 0)) {
|
flags, 1, 0)) {
|
||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
@ -349,36 +348,57 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
|
void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src,
|
||||||
u64a offset) {
|
u64a offset) {
|
||||||
if (!limex->repeatCount) {
|
if (!limex->repeatCount) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: we compress all repeats, as they may have *just* had their
|
STATE_T s = LOAD_STATE(src);
|
||||||
// cyclic states switched off a moment ago. TODO: is this required
|
|
||||||
|
if (ISZERO_STATE(AND_STATE(s, LOAD_STATE(&limex->repeatCyclicMask)))) {
|
||||||
|
DEBUG_PRINTF("no cyclics are on\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const union RepeatControl *ctrl =
|
const union RepeatControl *ctrl =
|
||||||
getRepeatControlBaseConst((const char *)src, sizeof(STATE_T));
|
getRepeatControlBaseConst((const char *)src, sizeof(STATE_T));
|
||||||
char *state_base = (char *)dest + limex->stateSize;
|
char *state_base = (char *)dest + limex->stateSize;
|
||||||
|
|
||||||
for (u32 i = 0; i < limex->repeatCount; i++) {
|
for (u32 i = 0; i < limex->repeatCount; i++) {
|
||||||
|
DEBUG_PRINTF("repeat %u\n", i);
|
||||||
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
|
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
|
||||||
|
|
||||||
|
if (!TESTBIT_STATE(&s, info->cyclicState)) {
|
||||||
|
DEBUG_PRINTF("is dead\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const struct RepeatInfo *repeat = getRepeatInfo(info);
|
const struct RepeatInfo *repeat = getRepeatInfo(info);
|
||||||
|
if (repeatHasMatch(repeat, &ctrl[i], state_base + info->stateOffset,
|
||||||
|
offset) == REPEAT_STALE) {
|
||||||
|
DEBUG_PRINTF("is stale, clearing state\n");
|
||||||
|
CLEARBIT_STATE(&s, info->cyclicState);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("packing state (packedCtrlOffset=%u)\n",
|
||||||
|
info->packedCtrlOffset);
|
||||||
repeatPack(state_base + info->packedCtrlOffset, repeat, &ctrl[i],
|
repeatPack(state_base + info->packedCtrlOffset, repeat, &ctrl[i],
|
||||||
offset);
|
offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
STORE_STATE(src, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n,
|
char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n,
|
||||||
const struct mq *q,
|
const struct mq *q, s64a loc) {
|
||||||
s64a loc) {
|
|
||||||
void *dest = q->streamState;
|
void *dest = q->streamState;
|
||||||
const void *src = q->state;
|
void *src = q->state;
|
||||||
u8 key = queue_prev_byte(q, loc);
|
u8 key = queue_prev_byte(q, loc);
|
||||||
const IMPL_NFA_T *limex = getImplNfa(n);
|
const IMPL_NFA_T *limex = getImplNfa(n);
|
||||||
COMPRESS_FN(limex, dest, src, key);
|
|
||||||
COMPRESS_REPEATS_FN(limex, dest, src, q->offset + loc);
|
COMPRESS_REPEATS_FN(limex, dest, src, q->offset + loc);
|
||||||
|
COMPRESS_FN(limex, dest, src, key);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -389,15 +409,29 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: we expand all repeats, as they may have *just* had their
|
// Note: state has already been expanded into 'dest'.
|
||||||
// cyclic states switched off a moment ago. TODO: is this required?
|
const STATE_T cyclics =
|
||||||
|
AND_STATE(LOAD_STATE(dest), LOAD_STATE(&limex->repeatCyclicMask));
|
||||||
|
if (ISZERO_STATE(cyclics)) {
|
||||||
|
DEBUG_PRINTF("no cyclics are on\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
union RepeatControl *ctrl =
|
union RepeatControl *ctrl =
|
||||||
getRepeatControlBase((char *)dest, sizeof(STATE_T));
|
getRepeatControlBase((char *)dest, sizeof(STATE_T));
|
||||||
const char *state_base = (const char *)src + limex->stateSize;
|
const char *state_base = (const char *)src + limex->stateSize;
|
||||||
|
|
||||||
for (u32 i = 0; i < limex->repeatCount; i++) {
|
for (u32 i = 0; i < limex->repeatCount; i++) {
|
||||||
|
DEBUG_PRINTF("repeat %u\n", i);
|
||||||
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
|
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
|
||||||
|
|
||||||
|
if (!TESTBIT_STATE(&cyclics, info->cyclicState)) {
|
||||||
|
DEBUG_PRINTF("is dead\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("unpacking state (packedCtrlOffset=%u)\n",
|
||||||
|
info->packedCtrlOffset);
|
||||||
const struct RepeatInfo *repeat = getRepeatInfo(info);
|
const struct RepeatInfo *repeat = getRepeatInfo(info);
|
||||||
repeatUnpack(state_base + info->packedCtrlOffset, repeat, offset,
|
repeatUnpack(state_base + info->packedCtrlOffset, repeat, offset,
|
||||||
&ctrl[i]);
|
&ctrl[i]);
|
||||||
@ -650,7 +684,27 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
|
|||||||
ep = MIN(ep, end_abs);
|
ep = MIN(ep, end_abs);
|
||||||
assert(ep >= sp);
|
assert(ep >= sp);
|
||||||
|
|
||||||
assert(sp >= offset); // We no longer do history buffer scans here.
|
if (sp < offset) {
|
||||||
|
DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
|
||||||
|
assert(offset - sp <= q->hlength);
|
||||||
|
u64a local_ep = MIN(offset, ep);
|
||||||
|
u64a final_look = 0;
|
||||||
|
/* we are starting inside the history buffer */
|
||||||
|
if (STREAMFIRST_FN(limex, q->history + q->hlength + sp - offset,
|
||||||
|
local_ep - sp, &ctx, sp,
|
||||||
|
&final_look) == MO_HALT_MATCHING) {
|
||||||
|
DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu "
|
||||||
|
"offset:%llu\n", final_look, sp, end_abs, offset);
|
||||||
|
assert(q->cur);
|
||||||
|
q->cur--;
|
||||||
|
q->items[q->cur].type = MQE_START;
|
||||||
|
q->items[q->cur].location = sp + final_look - offset;
|
||||||
|
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
|
||||||
|
sp = local_ep;
|
||||||
|
}
|
||||||
|
|
||||||
if (sp >= ep) {
|
if (sp >= ep) {
|
||||||
goto scan_done;
|
goto scan_done;
|
||||||
@ -790,9 +844,7 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
|
|||||||
|
|
||||||
char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
|
char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
|
||||||
const char *streamState, u64a offset,
|
const char *streamState, u64a offset,
|
||||||
NfaCallback callback,
|
NfaCallback callback, void *context) {
|
||||||
UNUSED SomNfaCallback som_callback,
|
|
||||||
void *context) {
|
|
||||||
assert(n && state);
|
assert(n && state);
|
||||||
|
|
||||||
const IMPL_NFA_T *limex = getImplNfa(n);
|
const IMPL_NFA_T *limex = getImplNfa(n);
|
||||||
@ -868,6 +920,21 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa,
|
|||||||
offset, report);
|
offset, report);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
|
||||||
|
assert(nfa && q);
|
||||||
|
assert(q->state && q->streamState);
|
||||||
|
|
||||||
|
const IMPL_NFA_T *limex = getImplNfa(nfa);
|
||||||
|
union RepeatControl *repeat_ctrl =
|
||||||
|
getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||||
|
char *repeat_state = q->streamState + limex->stateSize;
|
||||||
|
STATE_T state = LOAD_STATE(q->state);
|
||||||
|
u64a offset = q->offset + q_last_loc(q) + 1;
|
||||||
|
|
||||||
|
return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state,
|
||||||
|
offset);
|
||||||
|
}
|
||||||
|
|
||||||
enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
|
enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
|
||||||
const struct NFA *nfa,
|
const struct NFA *nfa,
|
||||||
struct mq *q,
|
struct mq *q,
|
||||||
@ -920,6 +987,7 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
|
|||||||
#undef ANDNOT_STATE
|
#undef ANDNOT_STATE
|
||||||
#undef OR_STATE
|
#undef OR_STATE
|
||||||
#undef TESTBIT_STATE
|
#undef TESTBIT_STATE
|
||||||
|
#undef CLEARBIT_STATE
|
||||||
#undef ZERO_STATE
|
#undef ZERO_STATE
|
||||||
#undef ISNONZERO_STATE
|
#undef ISNONZERO_STATE
|
||||||
#undef ISZERO_STATE
|
#undef ISZERO_STATE
|
||||||
@ -935,5 +1003,4 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
|
|||||||
// Parameters.
|
// Parameters.
|
||||||
#undef SIZE
|
#undef SIZE
|
||||||
#undef STATE_T
|
#undef STATE_T
|
||||||
#undef SHIFT
|
|
||||||
#undef LIMEX_API_ROOT
|
#undef LIMEX_API_ROOT
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -34,20 +34,19 @@
|
|||||||
* be faster and actually correct if these assumptions don't hold true.
|
* be faster and actually correct if these assumptions don't hold true.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef SHUFFLE_H
|
#ifndef LIMEX_SHUFFLE_H
|
||||||
#define SHUFFLE_H
|
#define LIMEX_SHUFFLE_H
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include "bitutils.h"
|
|
||||||
#include "simd_utils.h"
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
#include "util/bitutils.h"
|
||||||
|
#include "util/simd_utils.h"
|
||||||
|
|
||||||
#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__))
|
#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__))
|
||||||
#define HAVE_PEXT
|
#define HAVE_PEXT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 shuffleDynamic32(u32 x, u32 mask) {
|
u32 packedExtract32(u32 x, u32 mask) {
|
||||||
#if defined(HAVE_PEXT)
|
#if defined(HAVE_PEXT)
|
||||||
// Intel BMI2 can do this operation in one instruction.
|
// Intel BMI2 can do this operation in one instruction.
|
||||||
return _pext_u32(x, mask);
|
return _pext_u32(x, mask);
|
||||||
@ -67,7 +66,7 @@ u32 shuffleDynamic32(u32 x, u32 mask) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 shuffleDynamic64(u64a x, u64a mask) {
|
u32 packedExtract64(u64a x, u64a mask) {
|
||||||
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
|
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
|
||||||
// Intel BMI2 can do this operation in one instruction.
|
// Intel BMI2 can do this operation in one instruction.
|
||||||
return _pext_u64(x, mask);
|
return _pext_u64(x, mask);
|
||||||
@ -88,4 +87,24 @@ u32 shuffleDynamic64(u64a x, u64a mask) {
|
|||||||
|
|
||||||
#undef HAVE_PEXT
|
#undef HAVE_PEXT
|
||||||
|
|
||||||
#endif // SHUFFLE_H
|
static really_inline
|
||||||
|
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
|
||||||
|
m128 shuffled = pshufb(s, permute);
|
||||||
|
m128 compared = and128(shuffled, compare);
|
||||||
|
u16 rv = ~movemask128(eq128(compared, shuffled));
|
||||||
|
return (u32)rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
static really_inline
|
||||||
|
u32 packedExtract256(m256 s, const m256 permute, const m256 compare) {
|
||||||
|
// vpshufb doesn't cross lanes, so this is a bit of a cheat
|
||||||
|
m256 shuffled = vpshufb(s, permute);
|
||||||
|
m256 compared = and256(shuffled, compare);
|
||||||
|
u32 rv = ~movemask256(eq256(compared, shuffled));
|
||||||
|
// stitch the lane-wise results back together
|
||||||
|
return (u32)((rv >> 16) | (rv & 0xffffU));
|
||||||
|
}
|
||||||
|
#endif // AVX2
|
||||||
|
|
||||||
|
#endif // LIMEX_SHUFFLE_H
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -63,35 +63,4 @@
|
|||||||
|
|
||||||
#define SIZE 128
|
#define SIZE 128
|
||||||
#define STATE_T m128
|
#define STATE_T m128
|
||||||
#define SHIFT 1
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 128
|
|
||||||
#define STATE_T m128
|
|
||||||
#define SHIFT 2
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 128
|
|
||||||
#define STATE_T m128
|
|
||||||
#define SHIFT 3
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 128
|
|
||||||
#define STATE_T m128
|
|
||||||
#define SHIFT 4
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 128
|
|
||||||
#define STATE_T m128
|
|
||||||
#define SHIFT 5
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 128
|
|
||||||
#define STATE_T m128
|
|
||||||
#define SHIFT 6
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 128
|
|
||||||
#define STATE_T m128
|
|
||||||
#define SHIFT 7
|
|
||||||
#include "limex_runtime_impl.h"
|
#include "limex_runtime_impl.h"
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -60,35 +60,4 @@
|
|||||||
|
|
||||||
#define SIZE 256
|
#define SIZE 256
|
||||||
#define STATE_T m256
|
#define STATE_T m256
|
||||||
#define SHIFT 1
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 256
|
|
||||||
#define STATE_T m256
|
|
||||||
#define SHIFT 2
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 256
|
|
||||||
#define STATE_T m256
|
|
||||||
#define SHIFT 3
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 256
|
|
||||||
#define STATE_T m256
|
|
||||||
#define SHIFT 4
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 256
|
|
||||||
#define STATE_T m256
|
|
||||||
#define SHIFT 5
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 256
|
|
||||||
#define STATE_T m256
|
|
||||||
#define SHIFT 6
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 256
|
|
||||||
#define STATE_T m256
|
|
||||||
#define SHIFT 7
|
|
||||||
#include "limex_runtime_impl.h"
|
#include "limex_runtime_impl.h"
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -60,35 +60,4 @@
|
|||||||
|
|
||||||
#define SIZE 384
|
#define SIZE 384
|
||||||
#define STATE_T m384
|
#define STATE_T m384
|
||||||
#define SHIFT 1
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 384
|
|
||||||
#define STATE_T m384
|
|
||||||
#define SHIFT 2
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 384
|
|
||||||
#define STATE_T m384
|
|
||||||
#define SHIFT 3
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 384
|
|
||||||
#define STATE_T m384
|
|
||||||
#define SHIFT 4
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 384
|
|
||||||
#define STATE_T m384
|
|
||||||
#define SHIFT 5
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 384
|
|
||||||
#define STATE_T m384
|
|
||||||
#define SHIFT 6
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 384
|
|
||||||
#define STATE_T m384
|
|
||||||
#define SHIFT 7
|
|
||||||
#include "limex_runtime_impl.h"
|
#include "limex_runtime_impl.h"
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -60,10 +60,4 @@
|
|||||||
|
|
||||||
#define SIZE 512
|
#define SIZE 512
|
||||||
#define STATE_T m512
|
#define STATE_T m512
|
||||||
#define SHIFT 4
|
|
||||||
#include "limex_runtime_impl.h"
|
|
||||||
|
|
||||||
#define SIZE 512
|
|
||||||
#define STATE_T m512
|
|
||||||
#define SHIFT 5
|
|
||||||
#include "limex_runtime_impl.h"
|
#include "limex_runtime_impl.h"
|
@ -42,13 +42,13 @@
|
|||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
|
char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
|
||||||
u16 s, u64a loc, char eod, u16 * const cached_accept_state,
|
u16 s, u64a loc, char eod, u16 *const cached_accept_state,
|
||||||
u32 * const cached_accept_id) {
|
u32 *const cached_accept_id) {
|
||||||
DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n",
|
DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n",
|
||||||
(u16)(s & STATE_MASK), loc, eod);
|
(u16)(s & STATE_MASK), loc, eod);
|
||||||
|
|
||||||
if (!eod && s == *cached_accept_state) {
|
if (!eod && s == *cached_accept_state) {
|
||||||
if (cb(loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
|
if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
|
||||||
return MO_HALT_MATCHING; /* termination requested */
|
return MO_HALT_MATCHING; /* termination requested */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -71,7 +71,7 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
|
|||||||
*cached_accept_id = rl->report[0];
|
*cached_accept_id = rl->report[0];
|
||||||
|
|
||||||
DEBUG_PRINTF("reporting %u\n", rl->report[0]);
|
DEBUG_PRINTF("reporting %u\n", rl->report[0]);
|
||||||
if (cb(loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
|
if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
|
||||||
return MO_HALT_MATCHING; /* termination requested */
|
return MO_HALT_MATCHING; /* termination requested */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
|
|||||||
|
|
||||||
for (u32 i = 0; i < count; i++) {
|
for (u32 i = 0; i < count; i++) {
|
||||||
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
|
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
|
||||||
if (cb(loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
|
if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
|
||||||
return MO_HALT_MATCHING; /* termination requested */
|
return MO_HALT_MATCHING; /* termination requested */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -146,7 +146,7 @@ without_accel:
|
|||||||
|
|
||||||
if (single) {
|
if (single) {
|
||||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||||
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
||||||
return MO_HALT_MATCHING; /* termination requested */
|
return MO_HALT_MATCHING; /* termination requested */
|
||||||
}
|
}
|
||||||
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
|
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
|
||||||
@ -186,7 +186,7 @@ with_accel:
|
|||||||
|
|
||||||
if (single) {
|
if (single) {
|
||||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||||
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
||||||
return MO_HALT_MATCHING; /* termination requested */
|
return MO_HALT_MATCHING; /* termination requested */
|
||||||
}
|
}
|
||||||
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
|
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
|
||||||
@ -328,7 +328,7 @@ without_accel:
|
|||||||
u64a loc = (c - 1) - buf + offAdj + 1;
|
u64a loc = (c - 1) - buf + offAdj + 1;
|
||||||
if (single) {
|
if (single) {
|
||||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||||
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
|
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
|
||||||
@ -360,7 +360,7 @@ with_accel:
|
|||||||
u64a loc = (c - 1) - buf + offAdj + 1;
|
u64a loc = (c - 1) - buf + offAdj + 1;
|
||||||
if (single) {
|
if (single) {
|
||||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||||
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
|
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
|
||||||
@ -475,7 +475,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
|||||||
int rv;
|
int rv;
|
||||||
if (single) {
|
if (single) {
|
||||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||||
rv = cb(q_cur_offset(q), m->arb_report, context);
|
rv = cb(0, q_cur_offset(q), m->arb_report, context);
|
||||||
} else {
|
} else {
|
||||||
u32 cached_accept_id = 0;
|
u32 cached_accept_id = 0;
|
||||||
u16 cached_accept_state = 0;
|
u16 cached_accept_state = 0;
|
||||||
@ -632,7 +632,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
|||||||
int rv;
|
int rv;
|
||||||
if (single) {
|
if (single) {
|
||||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||||
rv = cb(q_cur_offset(q), m->arb_report, context);
|
rv = cb(0, q_cur_offset(q), m->arb_report, context);
|
||||||
} else {
|
} else {
|
||||||
u32 cached_accept_id = 0;
|
u32 cached_accept_id = 0;
|
||||||
u16 cached_accept_state = 0;
|
u16 cached_accept_state = 0;
|
||||||
@ -836,7 +836,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
|
|||||||
if (s >= m->accept_limit_8) {
|
if (s >= m->accept_limit_8) {
|
||||||
if (single) {
|
if (single) {
|
||||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||||
cb(offset, m->arb_report, ctxt);
|
cb(0, offset, m->arb_report, ctxt);
|
||||||
} else {
|
} else {
|
||||||
u32 cached_accept_id = 0;
|
u32 cached_accept_id = 0;
|
||||||
u16 cached_accept_state = 0;
|
u16 cached_accept_state = 0;
|
||||||
@ -850,7 +850,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
|
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
NfaCallback cb = q->cb;
|
NfaCallback cb = q->cb;
|
||||||
void *ctxt = q->context;
|
void *ctxt = q->context;
|
||||||
u16 s = *(u16 *)q->state;
|
u16 s = *(u16 *)q->state;
|
||||||
@ -864,7 +864,7 @@ char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
|
|||||||
if (aux->accept) {
|
if (aux->accept) {
|
||||||
if (single) {
|
if (single) {
|
||||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||||
cb(offset, m->arb_report, ctxt);
|
cb(0, offset, m->arb_report, ctxt);
|
||||||
} else {
|
} else {
|
||||||
u32 cached_accept_id = 0;
|
u32 cached_accept_id = 0;
|
||||||
u16 cached_accept_state = 0;
|
u16 cached_accept_state = 0;
|
||||||
@ -905,7 +905,7 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
|
|||||||
struct mq *q) {
|
struct mq *q) {
|
||||||
assert(n && q);
|
assert(n && q);
|
||||||
|
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
u8 s = *(u8 *)q->state;
|
u8 s = *(u8 *)q->state;
|
||||||
DEBUG_PRINTF("checking accepts for %hhu\n", s);
|
DEBUG_PRINTF("checking accepts for %hhu\n", s);
|
||||||
if (s < m->accept_limit_8) {
|
if (s < m->accept_limit_8) {
|
||||||
@ -915,25 +915,45 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
|
|||||||
return mcclellanHasAccept(m, get_aux(m, s), report);
|
return mcclellanHasAccept(m, get_aux(m, s), report);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||||
|
assert(n && q);
|
||||||
|
|
||||||
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
|
u8 s = *(u8 *)q->state;
|
||||||
|
DEBUG_PRINTF("checking accepts for %hhu\n", s);
|
||||||
|
assert(s < m->accept_limit_8 || get_aux(m, s)->accept);
|
||||||
|
|
||||||
|
return s >= m->accept_limit_8;
|
||||||
|
}
|
||||||
|
|
||||||
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q) {
|
struct mq *q) {
|
||||||
assert(n && q);
|
assert(n && q);
|
||||||
|
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
u16 s = *(u16 *)q->state;
|
u16 s = *(u16 *)q->state;
|
||||||
DEBUG_PRINTF("checking accepts for %hu\n", s);
|
DEBUG_PRINTF("checking accepts for %hu\n", s);
|
||||||
|
|
||||||
return mcclellanHasAccept(m, get_aux(m, s), report);
|
return mcclellanHasAccept(m, get_aux(m, s), report);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||||
|
assert(n && q);
|
||||||
|
|
||||||
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
|
u16 s = *(u16 *)q->state;
|
||||||
|
DEBUG_PRINTF("checking accepts for %hu\n", s);
|
||||||
|
|
||||||
|
return !!get_aux(m, s)->accept;
|
||||||
|
}
|
||||||
|
|
||||||
char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||||
u64a offset = q->offset;
|
u64a offset = q->offset;
|
||||||
const u8 *buffer = q->buffer;
|
const u8 *buffer = q->buffer;
|
||||||
NfaCallback cb = q->cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == MCCLELLAN_NFA_8);
|
assert(n->type == MCCLELLAN_NFA_8);
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
|
|
||||||
return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
|
return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
|
||||||
@ -947,7 +967,7 @@ char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
|||||||
NfaCallback cb = q->cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == MCCLELLAN_NFA_16);
|
assert(n->type == MCCLELLAN_NFA_16);
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
|
|
||||||
return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
|
return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
|
||||||
@ -961,7 +981,7 @@ char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
|||||||
NfaCallback cb = q->cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == MCCLELLAN_NFA_8);
|
assert(n->type == MCCLELLAN_NFA_8);
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
|
|
||||||
char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
|
char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
|
||||||
@ -980,7 +1000,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
|||||||
NfaCallback cb = q->cb;
|
NfaCallback cb = q->cb;
|
||||||
void *context = q->context;
|
void *context = q->context;
|
||||||
assert(n->type == MCCLELLAN_NFA_16);
|
assert(n->type == MCCLELLAN_NFA_16);
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
const struct mcclellan *m = getImplNfa(n);
|
||||||
const u8 *hend = q->history + q->hlength;
|
const u8 *hend = q->history + q->hlength;
|
||||||
|
|
||||||
char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
|
char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
|
||||||
@ -996,7 +1016,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
|||||||
|
|
||||||
char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
|
char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||||
void *state, UNUSED u8 key) {
|
void *state, UNUSED u8 key) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
const struct mcclellan *m = getImplNfa(nfa);
|
||||||
u8 s = offset ? m->start_floating : m->start_anchored;
|
u8 s = offset ? m->start_floating : m->start_anchored;
|
||||||
if (s) {
|
if (s) {
|
||||||
*(u8 *)state = s;
|
*(u8 *)state = s;
|
||||||
@ -1007,7 +1027,7 @@ char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
|
|||||||
|
|
||||||
char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
|
char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||||
void *state, UNUSED u8 key) {
|
void *state, UNUSED u8 key) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
const struct mcclellan *m = getImplNfa(nfa);
|
||||||
u16 s = offset ? m->start_floating : m->start_anchored;
|
u16 s = offset ? m->start_floating : m->start_anchored;
|
||||||
if (s) {
|
if (s) {
|
||||||
unaligned_store_u16(state, s);
|
unaligned_store_u16(state, s);
|
||||||
@ -1019,7 +1039,7 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
|
|||||||
void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
|
void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
|
||||||
const u8 *buf, char top, size_t start_off,
|
const u8 *buf, char top, size_t start_off,
|
||||||
size_t len, NfaCallback cb, void *ctxt) {
|
size_t len, NfaCallback cb, void *ctxt) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
const struct mcclellan *m = getImplNfa(nfa);
|
||||||
|
|
||||||
u8 s = top ? m->start_anchored : *(u8 *)state;
|
u8 s = top ? m->start_anchored : *(u8 *)state;
|
||||||
|
|
||||||
@ -1037,7 +1057,7 @@ void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
|
|||||||
void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
|
void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
|
||||||
const u8 *buf, char top, size_t start_off,
|
const u8 *buf, char top, size_t start_off,
|
||||||
size_t len, NfaCallback cb, void *ctxt) {
|
size_t len, NfaCallback cb, void *ctxt) {
|
||||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
const struct mcclellan *m = getImplNfa(nfa);
|
||||||
|
|
||||||
u16 s = top ? m->start_anchored : unaligned_load_u16(state);
|
u16 s = top ? m->start_anchored : unaligned_load_u16(state);
|
||||||
|
|
||||||
@ -1053,17 +1073,15 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
|
|||||||
}
|
}
|
||||||
|
|
||||||
char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
|
char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
|
||||||
UNUSED const char *streamState,
|
UNUSED const char *streamState, u64a offset,
|
||||||
u64a offset, NfaCallback callback,
|
NfaCallback callback, void *context) {
|
||||||
UNUSED SomNfaCallback som_cb, void *context) {
|
|
||||||
return mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback,
|
return mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback,
|
||||||
context);
|
context);
|
||||||
}
|
}
|
||||||
|
|
||||||
char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
|
char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
|
||||||
UNUSED const char *streamState,
|
UNUSED const char *streamState, u64a offset,
|
||||||
u64a offset, NfaCallback callback,
|
NfaCallback callback, void *context) {
|
||||||
UNUSED SomNfaCallback som_cb, void *context) {
|
|
||||||
assert(ISALIGNED_N(state, 2));
|
assert(ISALIGNED_N(state, 2));
|
||||||
return mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback,
|
return mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback,
|
||||||
context);
|
context);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -39,14 +39,14 @@ struct NFA;
|
|||||||
|
|
||||||
char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
|
char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
|
||||||
const char *streamState, u64a offset,
|
const char *streamState, u64a offset,
|
||||||
NfaCallback callback, SomNfaCallback som_cb,
|
NfaCallback callback, void *context);
|
||||||
void *context);
|
|
||||||
char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end);
|
char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||||
char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end);
|
char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||||
char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report);
|
char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -62,14 +62,14 @@ char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest,
|
|||||||
|
|
||||||
char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
|
char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
|
||||||
const char *streamState, u64a offset,
|
const char *streamState, u64a offset,
|
||||||
NfaCallback callback, SomNfaCallback som_cb,
|
NfaCallback callback, void *context);
|
||||||
void *context);
|
|
||||||
char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end);
|
char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||||
char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end);
|
char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||||
char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report);
|
char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
|
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
|
||||||
struct mq *q);
|
struct mq *q);
|
||||||
|
char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
|
@ -32,7 +32,6 @@
|
|||||||
#include "accelcompile.h"
|
#include "accelcompile.h"
|
||||||
#include "grey.h"
|
#include "grey.h"
|
||||||
#include "mcclellan_internal.h"
|
#include "mcclellan_internal.h"
|
||||||
#include "mcclellancompile_accel.h"
|
|
||||||
#include "mcclellancompile_util.h"
|
#include "mcclellancompile_util.h"
|
||||||
#include "nfa_internal.h"
|
#include "nfa_internal.h"
|
||||||
#include "shufticompile.h"
|
#include "shufticompile.h"
|
||||||
@ -65,6 +64,17 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
using boost::adaptors::map_keys;
|
using boost::adaptors::map_keys;
|
||||||
|
|
||||||
|
#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
|
||||||
|
|
||||||
|
/** Maximum tolerated number of escape character from an accel state.
|
||||||
|
* This is larger than nfa, as we don't have a budget and the nfa cheats on stop
|
||||||
|
* characters for sets of states */
|
||||||
|
#define ACCEL_DFA_MAX_STOP_CHAR 160
|
||||||
|
|
||||||
|
/** Maximum tolerated number of escape character from a sds accel state. Larger
|
||||||
|
* than normal states as accelerating sds is important. Matches NFA value */
|
||||||
|
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
namespace /* anon */ {
|
namespace /* anon */ {
|
||||||
@ -75,7 +85,7 @@ struct dstate_extra {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct dfa_info {
|
struct dfa_info {
|
||||||
dfa_build_strat &strat;
|
accel_dfa_build_strat &strat;
|
||||||
raw_dfa &raw;
|
raw_dfa &raw;
|
||||||
vector<dstate> &states;
|
vector<dstate> &states;
|
||||||
vector<dstate_extra> extra;
|
vector<dstate_extra> extra;
|
||||||
@ -85,7 +95,7 @@ struct dfa_info {
|
|||||||
|
|
||||||
u8 getAlphaShift() const;
|
u8 getAlphaShift() const;
|
||||||
|
|
||||||
explicit dfa_info(dfa_build_strat &s)
|
explicit dfa_info(accel_dfa_build_strat &s)
|
||||||
: strat(s),
|
: strat(s),
|
||||||
raw(s.get_raw()),
|
raw(s.get_raw()),
|
||||||
states(raw.states),
|
states(raw.states),
|
||||||
@ -128,13 +138,6 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) {
|
|||||||
return aux;
|
return aux;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
|
||||||
bool double_byte_ok(const AccelScheme &info) {
|
|
||||||
return !info.double_byte.empty()
|
|
||||||
&& info.double_cr.count() < info.double_byte.size()
|
|
||||||
&& info.double_cr.count() <= 2 && !info.double_byte.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
static
|
||||||
void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
||||||
assert((size_t)succ_table % 2 == 0);
|
assert((size_t)succ_table % 2 == 0);
|
||||||
@ -190,120 +193,12 @@ u32 mcclellan_build_strat::max_allowed_offset_accel() const {
|
|||||||
return ACCEL_DFA_MAX_OFFSET_DEPTH;
|
return ACCEL_DFA_MAX_OFFSET_DEPTH;
|
||||||
}
|
}
|
||||||
|
|
||||||
AccelScheme mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx)
|
u32 mcclellan_build_strat::max_stop_char() const {
|
||||||
const {
|
return ACCEL_DFA_MAX_STOP_CHAR;
|
||||||
return find_mcclellan_escape_info(rdfa, this_idx,
|
|
||||||
max_allowed_offset_accel());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** builds acceleration schemes for states */
|
u32 mcclellan_build_strat::max_floating_stop_char() const {
|
||||||
void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
return ACCEL_DFA_MAX_FLOATING_STOP_CHAR;
|
||||||
const AccelScheme &info,
|
|
||||||
void *accel_out) {
|
|
||||||
AccelAux *accel = (AccelAux *)accel_out;
|
|
||||||
|
|
||||||
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
|
|
||||||
info.double_offset);
|
|
||||||
accel->generic.offset = verify_u8(info.offset);
|
|
||||||
|
|
||||||
if (double_byte_ok(info) && info.double_cr.none()
|
|
||||||
&& info.double_byte.size() == 1) {
|
|
||||||
accel->accel_type = ACCEL_DVERM;
|
|
||||||
accel->dverm.c1 = info.double_byte.begin()->first;
|
|
||||||
accel->dverm.c2 = info.double_byte.begin()->second;
|
|
||||||
accel->dverm.offset = verify_u8(info.double_offset);
|
|
||||||
DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (double_byte_ok(info) && info.double_cr.none()
|
|
||||||
&& (info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
|
|
||||||
bool ok = true;
|
|
||||||
|
|
||||||
assert(!info.double_byte.empty());
|
|
||||||
u8 firstC = info.double_byte.begin()->first & CASE_CLEAR;
|
|
||||||
u8 secondC = info.double_byte.begin()->second & CASE_CLEAR;
|
|
||||||
|
|
||||||
for (const pair<u8, u8> &p : info.double_byte) {
|
|
||||||
if ((p.first & CASE_CLEAR) != firstC
|
|
||||||
|| (p.second & CASE_CLEAR) != secondC) {
|
|
||||||
ok = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ok) {
|
|
||||||
accel->accel_type = ACCEL_DVERM_NOCASE;
|
|
||||||
accel->dverm.c1 = firstC;
|
|
||||||
accel->dverm.c2 = secondC;
|
|
||||||
accel->dverm.offset = verify_u8(info.double_offset);
|
|
||||||
DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
u8 m1;
|
|
||||||
u8 m2;
|
|
||||||
if (buildDvermMask(info.double_byte, &m1, &m2)) {
|
|
||||||
accel->accel_type = ACCEL_DVERM_MASKED;
|
|
||||||
accel->dverm.offset = verify_u8(info.double_offset);
|
|
||||||
accel->dverm.c1 = info.double_byte.begin()->first & m1;
|
|
||||||
accel->dverm.c2 = info.double_byte.begin()->second & m2;
|
|
||||||
accel->dverm.m1 = m1;
|
|
||||||
accel->dverm.m2 = m2;
|
|
||||||
DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
|
|
||||||
accel->dverm.c1, accel->dverm.c2);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (double_byte_ok(info)
|
|
||||||
&& shuftiBuildDoubleMasks(info.double_cr, info.double_byte,
|
|
||||||
&accel->dshufti.lo1, &accel->dshufti.hi1,
|
|
||||||
&accel->dshufti.lo2, &accel->dshufti.hi2)) {
|
|
||||||
accel->accel_type = ACCEL_DSHUFTI;
|
|
||||||
accel->dshufti.offset = verify_u8(info.double_offset);
|
|
||||||
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info.cr.none()) {
|
|
||||||
accel->accel_type = ACCEL_RED_TAPE;
|
|
||||||
DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
|
|
||||||
" from which there is no escape\n", this_idx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info.cr.count() == 1) {
|
|
||||||
accel->accel_type = ACCEL_VERM;
|
|
||||||
accel->verm.c = info.cr.find_first();
|
|
||||||
DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info.cr.count() == 2 && info.cr.isCaselessChar()) {
|
|
||||||
accel->accel_type = ACCEL_VERM_NOCASE;
|
|
||||||
accel->verm.c = info.cr.find_first() & CASE_CLEAR;
|
|
||||||
DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info.cr.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) {
|
|
||||||
accel->accel_type = ACCEL_NONE;
|
|
||||||
DEBUG_PRINTF("state %hu is too broad\n", this_idx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
accel->accel_type = ACCEL_SHUFTI;
|
|
||||||
if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo,
|
|
||||||
&accel->shufti.hi)) {
|
|
||||||
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(!info.cr.none());
|
|
||||||
accel->accel_type = ACCEL_TRUFFLE;
|
|
||||||
truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2);
|
|
||||||
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -343,15 +238,6 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_dfa::~raw_dfa() {
|
|
||||||
}
|
|
||||||
|
|
||||||
raw_report_info::raw_report_info() {
|
|
||||||
}
|
|
||||||
|
|
||||||
raw_report_info::~raw_report_info() {
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
struct raw_report_list {
|
struct raw_report_list {
|
||||||
@ -592,7 +478,7 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
|
|||||||
|
|
||||||
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
||||||
map<dstate_id_t, AccelScheme> accel_escape_info
|
map<dstate_id_t, AccelScheme> accel_escape_info
|
||||||
= populateAccelerationInfo(info.raw, info.strat, cc.grey);
|
= info.strat.getAccelInfo(cc.grey);
|
||||||
|
|
||||||
size_t tran_size = (1 << info.getAlphaShift())
|
size_t tran_size = (1 << info.getAlphaShift())
|
||||||
* sizeof(u16) * count_real_states;
|
* sizeof(u16) * count_real_states;
|
||||||
@ -811,7 +697,7 @@ aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
|
|||||||
|
|
||||||
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
||||||
map<dstate_id_t, AccelScheme> accel_escape_info
|
map<dstate_id_t, AccelScheme> accel_escape_info
|
||||||
= populateAccelerationInfo(info.raw, info.strat, cc.grey);
|
= info.strat.getAccelInfo(cc.grey);
|
||||||
|
|
||||||
size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size();
|
size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size();
|
||||||
size_t aux_size = sizeof(mstate_aux) * info.size();
|
size_t aux_size = sizeof(mstate_aux) * info.size();
|
||||||
@ -1053,7 +939,7 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat,
|
aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
||||||
const CompileContext &cc,
|
const CompileContext &cc,
|
||||||
set<dstate_id_t> *accel_states) {
|
set<dstate_id_t> *accel_states) {
|
||||||
u16 total_daddy = 0;
|
u16 total_daddy = 0;
|
||||||
@ -1123,12 +1009,9 @@ u32 mcclellanStartReachSize(const raw_dfa *raw) {
|
|||||||
return out.count();
|
return out.count();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool has_accel_dfa(const NFA *nfa) {
|
bool has_accel_mcclellan(const NFA *nfa) {
|
||||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||||
return m->has_accel;
|
return m->has_accel;
|
||||||
}
|
}
|
||||||
|
|
||||||
dfa_build_strat::~dfa_build_strat() {
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#ifndef MCCLELLANCOMPILE_H
|
#ifndef MCCLELLANCOMPILE_H
|
||||||
#define MCCLELLANCOMPILE_H
|
#define MCCLELLANCOMPILE_H
|
||||||
|
|
||||||
|
#include "accel_dfa_build_strat.h"
|
||||||
#include "rdfa.h"
|
#include "rdfa.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/accel_scheme.h"
|
#include "util/accel_scheme.h"
|
||||||
@ -47,48 +48,20 @@ namespace ue2 {
|
|||||||
class ReportManager;
|
class ReportManager;
|
||||||
struct CompileContext;
|
struct CompileContext;
|
||||||
|
|
||||||
struct raw_report_info {
|
class mcclellan_build_strat : public accel_dfa_build_strat {
|
||||||
raw_report_info();
|
|
||||||
virtual ~raw_report_info();
|
|
||||||
virtual u32 getReportListSize() const = 0; /* in bytes */
|
|
||||||
virtual size_t size() const = 0; /* number of lists */
|
|
||||||
virtual void fillReportLists(NFA *n, size_t base_offset,
|
|
||||||
std::vector<u32> &ro /* out */) const = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
class dfa_build_strat {
|
|
||||||
public:
|
|
||||||
explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {}
|
|
||||||
virtual ~dfa_build_strat();
|
|
||||||
virtual raw_dfa &get_raw() const = 0;
|
|
||||||
virtual std::unique_ptr<raw_report_info> gatherReports(
|
|
||||||
std::vector<u32> &reports /* out */,
|
|
||||||
std::vector<u32> &reports_eod /* out */,
|
|
||||||
u8 *isSingleReport /* out */,
|
|
||||||
ReportID *arbReport /* out */) const = 0;
|
|
||||||
virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const = 0;
|
|
||||||
virtual size_t accelSize(void) const = 0;
|
|
||||||
virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
|
|
||||||
void *accel_out) = 0;
|
|
||||||
protected:
|
|
||||||
const ReportManager &rm;
|
|
||||||
};
|
|
||||||
|
|
||||||
class mcclellan_build_strat : public dfa_build_strat {
|
|
||||||
public:
|
public:
|
||||||
mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in)
|
mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in)
|
||||||
: dfa_build_strat(rm_in), rdfa(rdfa_in) {}
|
: accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {}
|
||||||
raw_dfa &get_raw() const override { return rdfa; }
|
raw_dfa &get_raw() const override { return rdfa; }
|
||||||
std::unique_ptr<raw_report_info> gatherReports(
|
std::unique_ptr<raw_report_info> gatherReports(
|
||||||
std::vector<u32> &reports /* out */,
|
std::vector<u32> &reports /* out */,
|
||||||
std::vector<u32> &reports_eod /* out */,
|
std::vector<u32> &reports_eod /* out */,
|
||||||
u8 *isSingleReport /* out */,
|
u8 *isSingleReport /* out */,
|
||||||
ReportID *arbReport /* out */) const override;
|
ReportID *arbReport /* out */) const override;
|
||||||
AccelScheme find_escape_strings(dstate_id_t this_idx) const override;
|
|
||||||
size_t accelSize(void) const override;
|
size_t accelSize(void) const override;
|
||||||
void buildAccel(dstate_id_t this_idx,const AccelScheme &info,
|
u32 max_allowed_offset_accel() const override;
|
||||||
void *accel_out) override;
|
u32 max_stop_char() const override;
|
||||||
virtual u32 max_allowed_offset_accel() const;
|
u32 max_floating_stop_char() const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
raw_dfa &rdfa;
|
raw_dfa &rdfa;
|
||||||
@ -103,7 +76,7 @@ mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
|
|||||||
|
|
||||||
/* used internally by mcclellan/haig/gough compile process */
|
/* used internally by mcclellan/haig/gough compile process */
|
||||||
ue2::aligned_unique_ptr<NFA>
|
ue2::aligned_unique_ptr<NFA>
|
||||||
mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat,
|
mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
||||||
const CompileContext &cc,
|
const CompileContext &cc,
|
||||||
std::set<dstate_id_t> *accel_states = nullptr);
|
std::set<dstate_id_t> *accel_states = nullptr);
|
||||||
|
|
||||||
@ -114,7 +87,7 @@ u32 mcclellanStartReachSize(const raw_dfa *raw);
|
|||||||
|
|
||||||
std::set<ReportID> all_reports(const raw_dfa &rdfa);
|
std::set<ReportID> all_reports(const raw_dfa &rdfa);
|
||||||
|
|
||||||
bool has_accel_dfa(const NFA *nfa);
|
bool has_accel_mcclellan(const NFA *nfa);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
|
@ -337,62 +337,35 @@ size_t hash_dfa(const raw_dfa &rdfa) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
|
bool can_die_early(const raw_dfa &raw, dstate_id_t s,
|
||||||
u16 top_remap = raw.alpha_remap[TOP];
|
map<dstate_id_t, u32> &visited, u32 age_limit) {
|
||||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
if (contains(visited, s) && visited[s] >= age_limit) {
|
||||||
if (i != top_remap && raw.states[s].next[i] == s) {
|
/* we have already visited (or are in the process of visiting) here with
|
||||||
|
* a looser limit. */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
visited[s] = age_limit;
|
||||||
|
|
||||||
|
if (s == DEAD_STATE) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (age_limit == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &next : raw.states[s].next) {
|
||||||
|
if (can_die_early(raw, next, visited, age_limit - 1)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
|
bool can_die_early(const raw_dfa &raw, u32 age_limit) {
|
||||||
if (raw.start_floating != DEAD_STATE) {
|
map<dstate_id_t, u32> visited;
|
||||||
DEBUG_PRINTF("has floating start\n");
|
return can_die_early(raw, raw.start_anchored, visited, age_limit);
|
||||||
return raw.start_floating;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("looking for SDS proxy\n");
|
|
||||||
|
|
||||||
dstate_id_t s = raw.start_anchored;
|
|
||||||
|
|
||||||
if (has_self_loop(s, raw)) {
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
u16 top_remap = raw.alpha_remap[TOP];
|
|
||||||
|
|
||||||
ue2::unordered_set<dstate_id_t> seen;
|
|
||||||
while (true) {
|
|
||||||
seen.insert(s);
|
|
||||||
DEBUG_PRINTF("basis %hu\n", s);
|
|
||||||
|
|
||||||
/* check if we are connected to a state with a self loop */
|
|
||||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
|
||||||
dstate_id_t t = raw.states[s].next[i];
|
|
||||||
if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* find a neighbour to use as a basis for looking for the sds proxy */
|
|
||||||
dstate_id_t t = DEAD_STATE;
|
|
||||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
|
||||||
dstate_id_t tt = raw.states[s].next[i];
|
|
||||||
if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
|
|
||||||
t = tt;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (t == DEAD_STATE) {
|
|
||||||
/* we were unable to find a state to use as a SDS proxy */
|
|
||||||
return DEAD_STATE;
|
|
||||||
}
|
|
||||||
|
|
||||||
s = t;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -55,7 +55,7 @@ size_t hash_dfa_no_reports(const raw_dfa &rdfa);
|
|||||||
/** \brief Compute a simple hash of this raw_dfa, including its reports. */
|
/** \brief Compute a simple hash of this raw_dfa, including its reports. */
|
||||||
size_t hash_dfa(const raw_dfa &rdfa);
|
size_t hash_dfa(const raw_dfa &rdfa);
|
||||||
|
|
||||||
dstate_id_t get_sds_or_proxy(const raw_dfa &raw);
|
bool can_die_early(const raw_dfa &raw, u32 age_limit);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -267,7 +267,8 @@ void dumpDotPreambleDfa(FILE *f) {
|
|||||||
fprintf(f, "0 [style=invis];\n");
|
fprintf(f, "0 [style=invis];\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) {
|
void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f,
|
||||||
|
UNUSED const string &base) {
|
||||||
assert(nfa->type == MCCLELLAN_NFA_16);
|
assert(nfa->type == MCCLELLAN_NFA_16);
|
||||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||||
|
|
||||||
@ -286,7 +287,8 @@ void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) {
|
|||||||
fprintf(f, "}\n");
|
fprintf(f, "}\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f) {
|
void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f,
|
||||||
|
UNUSED const string &base) {
|
||||||
assert(nfa->type == MCCLELLAN_NFA_8);
|
assert(nfa->type == MCCLELLAN_NFA_8);
|
||||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -34,6 +34,7 @@
|
|||||||
#include "rdfa.h"
|
#include "rdfa.h"
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
struct mcclellan;
|
struct mcclellan;
|
||||||
struct mstate_aux;
|
struct mstate_aux;
|
||||||
@ -42,8 +43,10 @@ union AccelAux;
|
|||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file);
|
void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file,
|
||||||
void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file);
|
const std::string &base);
|
||||||
|
void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file,
|
||||||
|
const std::string &base);
|
||||||
void nfaExecMcClellan8_dumpText(const struct NFA *nfa, FILE *file);
|
void nfaExecMcClellan8_dumpText(const struct NFA *nfa, FILE *file);
|
||||||
void nfaExecMcClellan16_dumpText(const struct NFA *nfa, FILE *file);
|
void nfaExecMcClellan16_dumpText(const struct NFA *nfa, FILE *file);
|
||||||
|
|
||||||
|
@ -131,7 +131,8 @@ char processReports(const struct mpv *m, u8 *reporters,
|
|||||||
rl_count++;
|
rl_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cb(report_offset, curr->report, ctxt) == MO_HALT_MATCHING) {
|
if (cb(0, report_offset, curr->report, ctxt) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
DEBUG_PRINTF("bailing\n");
|
DEBUG_PRINTF("bailing\n");
|
||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
@ -180,7 +181,7 @@ char processReportsForRange(const struct mpv *m, u8 *reporters,
|
|||||||
|
|
||||||
for (size_t i = 2; i <= length; i++) {
|
for (size_t i = 2; i <= length; i++) {
|
||||||
for (u32 j = 0; j < rl_count; j++) {
|
for (u32 j = 0; j < rl_count; j++) {
|
||||||
if (cb(first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) {
|
if (cb(0, first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) {
|
||||||
DEBUG_PRINTF("bailing\n");
|
DEBUG_PRINTF("bailing\n");
|
||||||
return MO_HALT_MATCHING;
|
return MO_HALT_MATCHING;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -36,7 +36,6 @@ struct NFA;
|
|||||||
|
|
||||||
char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end);
|
char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||||
char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q);
|
char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMpv0_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
|
||||||
char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q);
|
char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q);
|
||||||
char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset,
|
char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset,
|
||||||
void *state, u8 key);
|
void *state, u8 key);
|
||||||
@ -47,6 +46,7 @@ char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src,
|
|||||||
|
|
||||||
#define nfaExecMpv0_testEOD NFA_API_NO_IMPL
|
#define nfaExecMpv0_testEOD NFA_API_NO_IMPL
|
||||||
#define nfaExecMpv0_inAccept NFA_API_NO_IMPL
|
#define nfaExecMpv0_inAccept NFA_API_NO_IMPL
|
||||||
|
#define nfaExecMpv0_inAnyAccept NFA_API_NO_IMPL
|
||||||
#define nfaExecMpv0_QR NFA_API_NO_IMPL
|
#define nfaExecMpv0_QR NFA_API_NO_IMPL
|
||||||
#define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
|
#define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
|
||||||
#define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL
|
#define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL
|
||||||
|
@ -48,7 +48,8 @@
|
|||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file) {
|
void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file,
|
||||||
|
UNUSED const std::string &base) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -32,12 +32,14 @@
|
|||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
struct NFA;
|
struct NFA;
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file);
|
void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file,
|
||||||
|
const std::string &base);
|
||||||
void nfaExecMpv0_dumpText(const struct NFA *nfa, FILE *file);
|
void nfaExecMpv0_dumpText(const struct NFA *nfa, FILE *file);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -347,9 +347,9 @@ void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, u32 off,
|
MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr,
|
||||||
unsigned max_len) :
|
u32 off, unsigned max_length)
|
||||||
cr(ref_cr), offset(off), max_len(max_len) {
|
: cr(ref_cr), offset(off), max_len(max_length) {
|
||||||
int accel_num = (int) MultibyteAccelInfo::MAT_MAX;
|
int accel_num = (int) MultibyteAccelInfo::MAT_MAX;
|
||||||
accels.resize(accel_num);
|
accels.resize(accel_num);
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -31,7 +31,6 @@
|
|||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/bitutils.h"
|
#include "util/bitutils.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
#include "util/simd_utils_ssse3.h"
|
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars,
|
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -31,7 +31,6 @@
|
|||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/bitutils.h"
|
#include "util/bitutils.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
#include "util/simd_utils_ssse3.h"
|
|
||||||
|
|
||||||
/* Normal SSSE3 shufti */
|
/* Normal SSSE3 shufti */
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -32,7 +32,6 @@
|
|||||||
#include "multitruffle.h"
|
#include "multitruffle.h"
|
||||||
#include "util/bitutils.h"
|
#include "util/bitutils.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
#include "util/simd_utils_ssse3.h"
|
|
||||||
|
|
||||||
#include "multiaccel_common.h"
|
#include "multiaccel_common.h"
|
||||||
|
|
||||||
|
@ -120,6 +120,16 @@ char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state,
|
|||||||
*/
|
*/
|
||||||
char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
|
char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main execution function that doesn't perform the checks and optimisations of
|
||||||
|
* nfaQueueExec() and just dispatches directly to the nfa implementations. It is
|
||||||
|
* intended to be used by the Tamarama engine.
|
||||||
|
*/
|
||||||
|
char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end);
|
||||||
|
|
||||||
|
/** Return value indicating that the engine is dead. */
|
||||||
|
#define MO_DEAD 0
|
||||||
|
|
||||||
/** Return value indicating that the engine is alive. */
|
/** Return value indicating that the engine is alive. */
|
||||||
#define MO_ALIVE 1
|
#define MO_ALIVE 1
|
||||||
|
|
||||||
@ -155,6 +165,13 @@ char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
|
|||||||
*/
|
*/
|
||||||
char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end);
|
char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main execution function that doesn't perform the checks and optimisations of
|
||||||
|
* nfaQueueExecToMatch() and just dispatches directly to the nfa
|
||||||
|
* implementations. It is intended to be used by the Tamarama engine.
|
||||||
|
*/
|
||||||
|
char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Report matches at the current queue location.
|
* Report matches at the current queue location.
|
||||||
*
|
*
|
||||||
@ -175,10 +192,16 @@ char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q);
|
|||||||
*/
|
*/
|
||||||
char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
|
char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns non-zero if the NFA is in any accept state regardless of report
|
||||||
|
* ID.
|
||||||
|
*/
|
||||||
|
char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process the queued commands on the given NFA up to end or the first match.
|
* Process the queued commands on the given NFA up to end or the first match.
|
||||||
*
|
*
|
||||||
* Note: This version is meant for rose prefix NFAs:
|
* Note: This version is meant for rose prefix/infix NFAs:
|
||||||
* - never uses a callback
|
* - never uses a callback
|
||||||
* - loading of state at a point in history is not special cased
|
* - loading of state at a point in history is not special cased
|
||||||
*
|
*
|
||||||
@ -187,9 +210,9 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
|
|||||||
* end with some variant of end. The location field of the events must
|
* end with some variant of end. The location field of the events must
|
||||||
* be monotonically increasing. If not all the data was processed during
|
* be monotonically increasing. If not all the data was processed during
|
||||||
* the call, the queue is updated to reflect the remaining work.
|
* the call, the queue is updated to reflect the remaining work.
|
||||||
* @param report we are interested in, if set at the end of the scan returns
|
* @param report we are interested in. If the given report will be raised at
|
||||||
* @ref MO_MATCHES_PENDING. If no report is desired, MO_INVALID_IDX should
|
* the end location, the function returns @ref MO_MATCHES_PENDING. If no
|
||||||
* be passed in.
|
* match information is desired, MO_INVALID_IDX should be passed in.
|
||||||
* @return @ref MO_ALIVE if the nfa is still active with no matches pending,
|
* @return @ref MO_ALIVE if the nfa is still active with no matches pending,
|
||||||
* and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not
|
* and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not
|
||||||
* alive
|
* alive
|
||||||
@ -205,6 +228,9 @@ char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID report);
|
|||||||
* Runs an NFA in reverse from (buf + buflen) to buf and then from (hbuf + hlen)
|
* Runs an NFA in reverse from (buf + buflen) to buf and then from (hbuf + hlen)
|
||||||
* to hbuf (main buffer and history buffer).
|
* to hbuf (main buffer and history buffer).
|
||||||
*
|
*
|
||||||
|
* Note: provides the match location as the "end" offset when the callback is
|
||||||
|
* called.
|
||||||
|
*
|
||||||
* @param nfa engine to run
|
* @param nfa engine to run
|
||||||
* @param offset base offset of buf
|
* @param offset base offset of buf
|
||||||
* @param buf main buffer
|
* @param buf main buffer
|
||||||
@ -229,7 +255,6 @@ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf,
|
|||||||
* (including br region)
|
* (including br region)
|
||||||
* @param offset the offset to return (via the callback) with each match
|
* @param offset the offset to return (via the callback) with each match
|
||||||
* @param callback the callback to call for each match raised
|
* @param callback the callback to call for each match raised
|
||||||
* @param som_cb the callback to call for each match raised (Haig)
|
|
||||||
* @param context context pointer passed to each callback
|
* @param context context pointer passed to each callback
|
||||||
*
|
*
|
||||||
* @return @ref MO_HALT_MATCHING if the user instructed us to halt, otherwise
|
* @return @ref MO_HALT_MATCHING if the user instructed us to halt, otherwise
|
||||||
@ -237,8 +262,7 @@ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf,
|
|||||||
*/
|
*/
|
||||||
char nfaCheckFinalState(const struct NFA *nfa, const char *state,
|
char nfaCheckFinalState(const struct NFA *nfa, const char *state,
|
||||||
const char *streamState, u64a offset,
|
const char *streamState, u64a offset,
|
||||||
NfaCallback callback, SomNfaCallback som_cb,
|
NfaCallback callback, void *context);
|
||||||
void *context);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Indicates if an engine is a zombie.
|
* Indicates if an engine is a zombie.
|
||||||
|
@ -42,6 +42,8 @@
|
|||||||
#include "limex.h"
|
#include "limex.h"
|
||||||
#include "mcclellan.h"
|
#include "mcclellan.h"
|
||||||
#include "mpv.h"
|
#include "mpv.h"
|
||||||
|
#include "sheng.h"
|
||||||
|
#include "tamarama.h"
|
||||||
|
|
||||||
#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \
|
#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \
|
||||||
case dc_ltype##_NFA_##dc_subtype: \
|
case dc_ltype##_NFA_##dc_subtype: \
|
||||||
@ -52,41 +54,11 @@
|
|||||||
|
|
||||||
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \
|
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \
|
||||||
switch (nfa->type) { \
|
switch (nfa->type) { \
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_1, dbnt_func); \
|
DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_2, dbnt_func); \
|
DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_3, dbnt_func); \
|
DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_4, dbnt_func); \
|
DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_5, dbnt_func); \
|
DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_6, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_7, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_1, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_2, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_3, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_4, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_5, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_6, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_7, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_1, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_2, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_3, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_4, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_5, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_6, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_7, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_1, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_2, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_3, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_4, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_5, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_6, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_7, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_1, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_2, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_3, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_4, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_5, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_6, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_7, dbnt_func); \
|
|
||||||
DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \
|
DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \
|
||||||
DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \
|
DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \
|
||||||
DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \
|
DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \
|
||||||
@ -98,21 +70,22 @@
|
|||||||
DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \
|
DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \
|
||||||
DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \
|
DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \
|
||||||
DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \
|
DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \
|
||||||
|
DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \
|
||||||
|
DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \
|
||||||
default: \
|
default: \
|
||||||
assert(0); \
|
assert(0); \
|
||||||
}
|
}
|
||||||
|
|
||||||
char nfaCheckFinalState(const struct NFA *nfa, const char *state,
|
char nfaCheckFinalState(const struct NFA *nfa, const char *state,
|
||||||
const char *streamState, u64a offset,
|
const char *streamState, u64a offset,
|
||||||
NfaCallback callback, SomNfaCallback som_cb,
|
NfaCallback callback, void *context) {
|
||||||
void *context) {
|
|
||||||
assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
|
assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
|
||||||
|
|
||||||
// Caller should avoid calling us if we can never produce matches.
|
// Caller should avoid calling us if we can never produce matches.
|
||||||
assert(nfaAcceptsEod(nfa));
|
assert(nfaAcceptsEod(nfa));
|
||||||
|
|
||||||
DISPATCH_BY_NFA_TYPE(_testEOD(nfa, state, streamState, offset, callback,
|
DISPATCH_BY_NFA_TYPE(_testEOD(nfa, state, streamState, offset, callback,
|
||||||
som_cb, context));
|
context));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,6 +108,14 @@ char nfaQueueExec2_i(const struct NFA *nfa, struct mq *q, s64a end) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end) {
|
||||||
|
return nfaQueueExec_i(nfa, q, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end) {
|
||||||
|
return nfaQueueExec2_i(nfa, q, end);
|
||||||
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) {
|
char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) {
|
||||||
DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report));
|
DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report));
|
||||||
@ -258,7 +239,6 @@ char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end) {
|
|||||||
|
|
||||||
assert(q);
|
assert(q);
|
||||||
assert(end >= 0);
|
assert(end >= 0);
|
||||||
assert(q->context);
|
|
||||||
assert(q->state);
|
assert(q->state);
|
||||||
assert(q->cur < q->end);
|
assert(q->cur < q->end);
|
||||||
assert(q->end <= MAX_MQE_LEN);
|
assert(q->end <= MAX_MQE_LEN);
|
||||||
@ -315,6 +295,11 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q) {
|
||||||
|
DISPATCH_BY_NFA_TYPE(_inAnyAccept(nfa, q));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) {
|
char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) {
|
||||||
DEBUG_PRINTF("nfa=%p\n", nfa);
|
DEBUG_PRINTF("nfa=%p\n", nfa);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -91,12 +91,12 @@ struct mq {
|
|||||||
* history buffer; (logically) immediately before the
|
* history buffer; (logically) immediately before the
|
||||||
* main buffer */
|
* main buffer */
|
||||||
size_t hlength; /**< length of the history buffer */
|
size_t hlength; /**< length of the history buffer */
|
||||||
|
struct hs_scratch *scratch; /**< global scratch space */
|
||||||
char report_current; /**<
|
char report_current; /**<
|
||||||
* report_current matches at starting offset through
|
* report_current matches at starting offset through
|
||||||
* callback. If true, the queue must be located at a
|
* callback. If true, the queue must be located at a
|
||||||
* point where MO_MATCHES_PENDING was returned */
|
* point where MO_MATCHES_PENDING was returned */
|
||||||
NfaCallback cb; /**< callback to trigger on matches */
|
NfaCallback cb; /**< callback to trigger on matches */
|
||||||
SomNfaCallback som_cb; /**< callback with som info; used by haig */
|
|
||||||
void *context; /**< context to pass along with a callback */
|
void *context; /**< context to pass along with a callback */
|
||||||
struct mq_item items[MAX_MQE_LEN]; /**< queue items */
|
struct mq_item items[MAX_MQE_LEN]; /**< queue items */
|
||||||
};
|
};
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -30,6 +30,7 @@
|
|||||||
|
|
||||||
#include "limex_internal.h"
|
#include "limex_internal.h"
|
||||||
#include "mcclellancompile.h"
|
#include "mcclellancompile.h"
|
||||||
|
#include "shengcompile.h"
|
||||||
#include "nfa_internal.h"
|
#include "nfa_internal.h"
|
||||||
#include "repeat_internal.h"
|
#include "repeat_internal.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
@ -78,7 +79,7 @@ struct DISPATCH_BY_NFA_TYPE_INT<sfunc, rv_t, arg_t, INVALID_NFA> {
|
|||||||
decltype(arg), (NFAEngineType)0>::doOp(i, arg)
|
decltype(arg), (NFAEngineType)0>::doOp(i, arg)
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef bool (*has_accel_fn)(const NFA *nfa);
|
typedef bool (*nfa_dispatch_fn)(const NFA *nfa);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static
|
static
|
||||||
@ -87,8 +88,37 @@ bool has_accel_limex(const NFA *nfa) {
|
|||||||
return limex->accelCount;
|
return limex->accelCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
static
|
static
|
||||||
bool has_accel_generic(const NFA *) {
|
bool has_repeats_limex(const NFA *nfa) {
|
||||||
|
const T *limex = (const T *)getImplNfa(nfa);
|
||||||
|
return limex->repeatCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static
|
||||||
|
bool has_repeats_other_than_firsts_limex(const NFA *nfa) {
|
||||||
|
const T *limex = (const T *)getImplNfa(nfa);
|
||||||
|
const char *ptr = (const char *)limex;
|
||||||
|
|
||||||
|
const u32 *repeatOffset = (const u32 *)(ptr + limex->repeatOffset);
|
||||||
|
|
||||||
|
for (u32 i = 0; i < limex->repeatCount; i++) {
|
||||||
|
u32 offset = repeatOffset[i];
|
||||||
|
const NFARepeatInfo *info = (const NFARepeatInfo *)(ptr + offset);
|
||||||
|
const RepeatInfo *repeat =
|
||||||
|
(const RepeatInfo *)((const char *)info + sizeof(*info));
|
||||||
|
if (repeat->type != REPEAT_FIRST) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool dispatch_false(const NFA *) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,72 +170,53 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER};
|
|||||||
#define DO_IF_DUMP_SUPPORT(a)
|
#define DO_IF_DUMP_SUPPORT(a)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MAKE_LIMEX_TRAITS(mlt_size, mlt_shift) \
|
#define MAKE_LIMEX_TRAITS(mlt_size) \
|
||||||
template<> struct NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift> { \
|
template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \
|
||||||
static UNUSED const char *name; \
|
static UNUSED const char *name; \
|
||||||
static const NFACategory category = NFA_LIMEX; \
|
static const NFACategory category = NFA_LIMEX; \
|
||||||
typedef LimExNFA##mlt_size implNFA_t; \
|
typedef LimExNFA##mlt_size implNFA_t; \
|
||||||
typedef u_##mlt_size tableRow_t; \
|
typedef u_##mlt_size tableRow_t; \
|
||||||
static const has_accel_fn has_accel; \
|
static const nfa_dispatch_fn has_accel; \
|
||||||
|
static const nfa_dispatch_fn has_repeats; \
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts; \
|
||||||
static const u32 stateAlign = \
|
static const u32 stateAlign = \
|
||||||
MAX(alignof(tableRow_t), alignof(RepeatControl)); \
|
MAX(alignof(tableRow_t), alignof(RepeatControl)); \
|
||||||
static const bool fast = mlt_size <= 64; \
|
static const bool fast = mlt_size <= 64; \
|
||||||
}; \
|
}; \
|
||||||
const has_accel_fn NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift>::has_accel \
|
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_accel \
|
||||||
= has_accel_limex<LimExNFA##mlt_size>; \
|
= has_accel_limex<LimExNFA##mlt_size>; \
|
||||||
|
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_repeats \
|
||||||
|
= has_repeats_limex<LimExNFA##mlt_size>; \
|
||||||
|
const nfa_dispatch_fn \
|
||||||
|
NFATraits<LIMEX_NFA_##mlt_size>::has_repeats_other_than_firsts \
|
||||||
|
= has_repeats_other_than_firsts_limex<LimExNFA##mlt_size>; \
|
||||||
DO_IF_DUMP_SUPPORT( \
|
DO_IF_DUMP_SUPPORT( \
|
||||||
const char *NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift>::name \
|
const char *NFATraits<LIMEX_NFA_##mlt_size>::name \
|
||||||
= "LimEx (0-"#mlt_shift") "#mlt_size; \
|
= "LimEx "#mlt_size; \
|
||||||
template<> struct getDescription<LIMEX_NFA_##mlt_size##_##mlt_shift> { \
|
template<> struct getDescription<LIMEX_NFA_##mlt_size> { \
|
||||||
static string call(const void *ptr) { \
|
static string call(const void *ptr) { \
|
||||||
return getDescriptionLimEx<LIMEX_NFA_##mlt_size##_##mlt_shift>((const NFA *)ptr); \
|
return getDescriptionLimEx<LIMEX_NFA_##mlt_size>((const NFA *)ptr); \
|
||||||
} \
|
} \
|
||||||
};)
|
};)
|
||||||
|
|
||||||
MAKE_LIMEX_TRAITS(32, 1)
|
MAKE_LIMEX_TRAITS(32)
|
||||||
MAKE_LIMEX_TRAITS(32, 2)
|
MAKE_LIMEX_TRAITS(128)
|
||||||
MAKE_LIMEX_TRAITS(32, 3)
|
MAKE_LIMEX_TRAITS(256)
|
||||||
MAKE_LIMEX_TRAITS(32, 4)
|
MAKE_LIMEX_TRAITS(384)
|
||||||
MAKE_LIMEX_TRAITS(32, 5)
|
MAKE_LIMEX_TRAITS(512)
|
||||||
MAKE_LIMEX_TRAITS(32, 6)
|
|
||||||
MAKE_LIMEX_TRAITS(32, 7)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 1)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 2)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 3)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 4)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 5)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 6)
|
|
||||||
MAKE_LIMEX_TRAITS(128, 7)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 1)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 2)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 3)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 4)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 5)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 6)
|
|
||||||
MAKE_LIMEX_TRAITS(256, 7)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 1)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 2)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 3)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 4)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 5)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 6)
|
|
||||||
MAKE_LIMEX_TRAITS(384, 7)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 1)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 2)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 3)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 4)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 5)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 6)
|
|
||||||
MAKE_LIMEX_TRAITS(512, 7)
|
|
||||||
|
|
||||||
template<> struct NFATraits<MCCLELLAN_NFA_8> {
|
template<> struct NFATraits<MCCLELLAN_NFA_8> {
|
||||||
UNUSED static const char *name;
|
UNUSED static const char *name;
|
||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 1;
|
static const u32 stateAlign = 1;
|
||||||
static const bool fast = true;
|
static const bool fast = true;
|
||||||
static const has_accel_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
};
|
};
|
||||||
const has_accel_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_dfa;
|
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_mcclellan;
|
||||||
|
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
const char *NFATraits<MCCLELLAN_NFA_8>::name = "McClellan 8";
|
const char *NFATraits<MCCLELLAN_NFA_8>::name = "McClellan 8";
|
||||||
#endif
|
#endif
|
||||||
@ -215,9 +226,13 @@ template<> struct NFATraits<MCCLELLAN_NFA_16> {
|
|||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 2;
|
static const u32 stateAlign = 2;
|
||||||
static const bool fast = true;
|
static const bool fast = true;
|
||||||
static const has_accel_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
};
|
};
|
||||||
const has_accel_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_dfa;
|
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_mcclellan;
|
||||||
|
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
const char *NFATraits<MCCLELLAN_NFA_16>::name = "McClellan 16";
|
const char *NFATraits<MCCLELLAN_NFA_16>::name = "McClellan 16";
|
||||||
#endif
|
#endif
|
||||||
@ -227,9 +242,13 @@ template<> struct NFATraits<GOUGH_NFA_8> {
|
|||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
static const bool fast = true;
|
||||||
static const has_accel_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
};
|
};
|
||||||
const has_accel_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_dfa;
|
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_mcclellan;
|
||||||
|
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
const char *NFATraits<GOUGH_NFA_8>::name = "Goughfish 8";
|
const char *NFATraits<GOUGH_NFA_8>::name = "Goughfish 8";
|
||||||
#endif
|
#endif
|
||||||
@ -239,9 +258,13 @@ template<> struct NFATraits<GOUGH_NFA_16> {
|
|||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
static const bool fast = true;
|
||||||
static const has_accel_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
};
|
};
|
||||||
const has_accel_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_dfa;
|
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_mcclellan;
|
||||||
|
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
const char *NFATraits<GOUGH_NFA_16>::name = "Goughfish 16";
|
const char *NFATraits<GOUGH_NFA_16>::name = "Goughfish 16";
|
||||||
#endif
|
#endif
|
||||||
@ -251,9 +274,13 @@ template<> struct NFATraits<MPV_NFA_0> {
|
|||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
static const bool fast = true;
|
||||||
static const has_accel_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
};
|
};
|
||||||
const has_accel_fn NFATraits<MPV_NFA_0>::has_accel = has_accel_generic;
|
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_accel = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
const char *NFATraits<MPV_NFA_0>::name = "Mega-Puff-Vac";
|
const char *NFATraits<MPV_NFA_0>::name = "Mega-Puff-Vac";
|
||||||
#endif
|
#endif
|
||||||
@ -263,9 +290,13 @@ template<> struct NFATraits<CASTLE_NFA_0> {
|
|||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
static const bool fast = true;
|
||||||
static const has_accel_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
};
|
};
|
||||||
const has_accel_fn NFATraits<CASTLE_NFA_0>::has_accel = has_accel_generic;
|
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_accel = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
const char *NFATraits<CASTLE_NFA_0>::name = "Castle";
|
const char *NFATraits<CASTLE_NFA_0>::name = "Castle";
|
||||||
#endif
|
#endif
|
||||||
@ -275,9 +306,13 @@ template<> struct NFATraits<LBR_NFA_Dot> {
|
|||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
static const bool fast = true;
|
||||||
static const has_accel_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
};
|
};
|
||||||
const has_accel_fn NFATraits<LBR_NFA_Dot>::has_accel = has_accel_generic;
|
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_accel = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
const char *NFATraits<LBR_NFA_Dot>::name = "Lim Bounded Repeat (D)";
|
const char *NFATraits<LBR_NFA_Dot>::name = "Lim Bounded Repeat (D)";
|
||||||
#endif
|
#endif
|
||||||
@ -287,9 +322,13 @@ template<> struct NFATraits<LBR_NFA_Verm> {
|
|||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
static const bool fast = true;
|
||||||
static const has_accel_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
};
|
};
|
||||||
const has_accel_fn NFATraits<LBR_NFA_Verm>::has_accel = has_accel_generic;
|
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_accel = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
const char *NFATraits<LBR_NFA_Verm>::name = "Lim Bounded Repeat (V)";
|
const char *NFATraits<LBR_NFA_Verm>::name = "Lim Bounded Repeat (V)";
|
||||||
#endif
|
#endif
|
||||||
@ -299,9 +338,13 @@ template<> struct NFATraits<LBR_NFA_NVerm> {
|
|||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
static const bool fast = true;
|
||||||
static const has_accel_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
};
|
};
|
||||||
const has_accel_fn NFATraits<LBR_NFA_NVerm>::has_accel = has_accel_generic;
|
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_accel = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
const char *NFATraits<LBR_NFA_NVerm>::name = "Lim Bounded Repeat (NV)";
|
const char *NFATraits<LBR_NFA_NVerm>::name = "Lim Bounded Repeat (NV)";
|
||||||
#endif
|
#endif
|
||||||
@ -311,9 +354,13 @@ template<> struct NFATraits<LBR_NFA_Shuf> {
|
|||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
static const bool fast = true;
|
||||||
static const has_accel_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
};
|
};
|
||||||
const has_accel_fn NFATraits<LBR_NFA_Shuf>::has_accel = has_accel_generic;
|
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_accel = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
const char *NFATraits<LBR_NFA_Shuf>::name = "Lim Bounded Repeat (S)";
|
const char *NFATraits<LBR_NFA_Shuf>::name = "Lim Bounded Repeat (S)";
|
||||||
#endif
|
#endif
|
||||||
@ -323,13 +370,49 @@ template<> struct NFATraits<LBR_NFA_Truf> {
|
|||||||
static const NFACategory category = NFA_OTHER;
|
static const NFACategory category = NFA_OTHER;
|
||||||
static const u32 stateAlign = 8;
|
static const u32 stateAlign = 8;
|
||||||
static const bool fast = true;
|
static const bool fast = true;
|
||||||
static const has_accel_fn has_accel;
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
};
|
};
|
||||||
const has_accel_fn NFATraits<LBR_NFA_Truf>::has_accel = has_accel_generic;
|
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_accel = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
const char *NFATraits<LBR_NFA_Truf>::name = "Lim Bounded Repeat (M)";
|
const char *NFATraits<LBR_NFA_Truf>::name = "Lim Bounded Repeat (M)";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template<> struct NFATraits<SHENG_NFA_0> {
|
||||||
|
UNUSED static const char *name;
|
||||||
|
static const NFACategory category = NFA_OTHER;
|
||||||
|
static const u32 stateAlign = 1;
|
||||||
|
static const bool fast = true;
|
||||||
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
|
};
|
||||||
|
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_accel = has_accel_sheng;
|
||||||
|
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
|
#if defined(DUMP_SUPPORT)
|
||||||
|
const char *NFATraits<SHENG_NFA_0>::name = "Sheng";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<> struct NFATraits<TAMARAMA_NFA_0> {
|
||||||
|
UNUSED static const char *name;
|
||||||
|
static const NFACategory category = NFA_OTHER;
|
||||||
|
static const u32 stateAlign = 32;
|
||||||
|
static const bool fast = true;
|
||||||
|
static const nfa_dispatch_fn has_accel;
|
||||||
|
static const nfa_dispatch_fn has_repeats;
|
||||||
|
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||||
|
};
|
||||||
|
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_accel = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_repeats = dispatch_false;
|
||||||
|
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
|
||||||
|
#if defined(DUMP_SUPPORT)
|
||||||
|
const char *NFATraits<TAMARAMA_NFA_0>::name = "Tamarama";
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
@ -380,42 +463,39 @@ struct is_limex {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
template<NFAEngineType t>
|
||||||
|
struct has_repeats_other_than_firsts_dispatch {
|
||||||
|
static nfa_dispatch_fn call(const void *) {
|
||||||
|
return NFATraits<t>::has_repeats_other_than_firsts;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
bool has_bounded_repeats_other_than_firsts(const NFA &nfa) {
|
bool has_bounded_repeats_other_than_firsts(const NFA &nfa) {
|
||||||
if (!DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa)) {
|
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type,
|
||||||
return false;
|
has_repeats_other_than_firsts_dispatch,
|
||||||
|
&nfa)(&nfa);
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
template<NFAEngineType t>
|
||||||
|
struct has_repeats_dispatch {
|
||||||
|
static nfa_dispatch_fn call(const void *) {
|
||||||
|
return NFATraits<t>::has_repeats;
|
||||||
}
|
}
|
||||||
|
};
|
||||||
const LimExNFABase *limex = (const LimExNFABase *)getImplNfa(&nfa);
|
|
||||||
const char *ptr = (const char *)limex;
|
|
||||||
|
|
||||||
const u32 *repeatOffset = (const u32 *)(ptr + limex->repeatOffset);
|
|
||||||
|
|
||||||
for (u32 i = 0; i < limex->repeatCount; i++) {
|
|
||||||
u32 offset = repeatOffset[i];
|
|
||||||
const NFARepeatInfo *info = (const NFARepeatInfo *)(ptr + offset);
|
|
||||||
const RepeatInfo *repeat =
|
|
||||||
(const RepeatInfo *)((const char *)info + sizeof(*info));
|
|
||||||
if (repeat->type != REPEAT_FIRST) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool has_bounded_repeats(const NFA &nfa) {
|
bool has_bounded_repeats(const NFA &nfa) {
|
||||||
if (!DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa)) {
|
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_repeats_dispatch,
|
||||||
return false;
|
&nfa)(&nfa);
|
||||||
}
|
|
||||||
|
|
||||||
const LimExNFABase *limex = (const LimExNFABase *)getImplNfa(&nfa);
|
|
||||||
return limex->repeatCount;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
template<NFAEngineType t>
|
template<NFAEngineType t>
|
||||||
struct has_accel_dispatch {
|
struct has_accel_dispatch {
|
||||||
static has_accel_fn call(const void *) {
|
static nfa_dispatch_fn call(const void *) {
|
||||||
return NFATraits<t>::has_accel;
|
return NFATraits<t>::has_accel;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -423,8 +503,7 @@ struct has_accel_dispatch {
|
|||||||
|
|
||||||
bool has_accel(const NFA &nfa) {
|
bool has_accel(const NFA &nfa) {
|
||||||
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_accel_dispatch,
|
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_accel_dispatch,
|
||||||
&nfa)
|
&nfa)(&nfa);
|
||||||
(&nfa);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool requires_decompress_key(const NFA &nfa) {
|
bool requires_decompress_key(const NFA &nfa) {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -36,6 +36,7 @@
|
|||||||
#if defined(DUMP_SUPPORT)
|
#if defined(DUMP_SUPPORT)
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
struct NFA;
|
struct NFA;
|
||||||
|
|
||||||
@ -45,7 +46,7 @@ namespace ue2 {
|
|||||||
* \brief Dump (in Graphviz 'dot' format) a representation of the NFA into the
|
* \brief Dump (in Graphviz 'dot' format) a representation of the NFA into the
|
||||||
* file pointed to by dotFile.
|
* file pointed to by dotFile.
|
||||||
*/
|
*/
|
||||||
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile);
|
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile, const std::string &base);
|
||||||
|
|
||||||
/** \brief Dump a textual representation of the NFA. */
|
/** \brief Dump a textual representation of the NFA. */
|
||||||
void nfaDumpText(const struct NFA *fact, FILE *textFile);
|
void nfaDumpText(const struct NFA *fact, FILE *textFile);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -40,6 +40,8 @@
|
|||||||
#include "limex.h"
|
#include "limex.h"
|
||||||
#include "mcclellandump.h"
|
#include "mcclellandump.h"
|
||||||
#include "mpv_dump.h"
|
#include "mpv_dump.h"
|
||||||
|
#include "shengdump.h"
|
||||||
|
#include "tamarama_dump.h"
|
||||||
|
|
||||||
#ifndef DUMP_SUPPORT
|
#ifndef DUMP_SUPPORT
|
||||||
#error "no dump support"
|
#error "no dump support"
|
||||||
@ -57,41 +59,11 @@ namespace ue2 {
|
|||||||
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \
|
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \
|
||||||
DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \
|
DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \
|
||||||
switch (nfa->type) { \
|
switch (nfa->type) { \
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_1, dbnt_func); \
|
DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_2, dbnt_func); \
|
DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_3, dbnt_func); \
|
DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_4, dbnt_func); \
|
DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_5, dbnt_func); \
|
DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_6, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 32_7, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_1, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_2, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_3, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_4, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_5, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_6, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 128_7, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_1, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_2, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_3, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_4, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_5, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_6, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 256_7, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_1, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_2, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_3, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_4, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_5, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_6, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 384_7, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_1, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_2, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_3, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_4, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_5, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_6, dbnt_func); \
|
|
||||||
DISPATCH_CASE(LIMEX, LimEx, 512_7, dbnt_func); \
|
|
||||||
DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \
|
DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \
|
||||||
DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \
|
DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \
|
||||||
DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \
|
DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \
|
||||||
@ -103,12 +75,15 @@ namespace ue2 {
|
|||||||
DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \
|
DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \
|
||||||
DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \
|
DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \
|
||||||
DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \
|
DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \
|
||||||
|
DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \
|
||||||
|
DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \
|
||||||
default: \
|
default: \
|
||||||
assert(0); \
|
assert(0); \
|
||||||
}
|
}
|
||||||
|
|
||||||
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile) {
|
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile,
|
||||||
DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile));
|
const std::string &base) {
|
||||||
|
DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile, base));
|
||||||
}
|
}
|
||||||
|
|
||||||
void nfaDumpText(const struct NFA *nfa, FILE *txtFile) {
|
void nfaDumpText(const struct NFA *nfa, FILE *txtFile) {
|
||||||
|
@ -51,41 +51,11 @@ extern "C"
|
|||||||
// Common data structures for NFAs
|
// Common data structures for NFAs
|
||||||
|
|
||||||
enum NFAEngineType {
|
enum NFAEngineType {
|
||||||
LIMEX_NFA_32_1,
|
LIMEX_NFA_32,
|
||||||
LIMEX_NFA_32_2,
|
LIMEX_NFA_128,
|
||||||
LIMEX_NFA_32_3,
|
LIMEX_NFA_256,
|
||||||
LIMEX_NFA_32_4,
|
LIMEX_NFA_384,
|
||||||
LIMEX_NFA_32_5,
|
LIMEX_NFA_512,
|
||||||
LIMEX_NFA_32_6,
|
|
||||||
LIMEX_NFA_32_7,
|
|
||||||
LIMEX_NFA_128_1,
|
|
||||||
LIMEX_NFA_128_2,
|
|
||||||
LIMEX_NFA_128_3,
|
|
||||||
LIMEX_NFA_128_4,
|
|
||||||
LIMEX_NFA_128_5,
|
|
||||||
LIMEX_NFA_128_6,
|
|
||||||
LIMEX_NFA_128_7,
|
|
||||||
LIMEX_NFA_256_1,
|
|
||||||
LIMEX_NFA_256_2,
|
|
||||||
LIMEX_NFA_256_3,
|
|
||||||
LIMEX_NFA_256_4,
|
|
||||||
LIMEX_NFA_256_5,
|
|
||||||
LIMEX_NFA_256_6,
|
|
||||||
LIMEX_NFA_256_7,
|
|
||||||
LIMEX_NFA_384_1,
|
|
||||||
LIMEX_NFA_384_2,
|
|
||||||
LIMEX_NFA_384_3,
|
|
||||||
LIMEX_NFA_384_4,
|
|
||||||
LIMEX_NFA_384_5,
|
|
||||||
LIMEX_NFA_384_6,
|
|
||||||
LIMEX_NFA_384_7,
|
|
||||||
LIMEX_NFA_512_1,
|
|
||||||
LIMEX_NFA_512_2,
|
|
||||||
LIMEX_NFA_512_3,
|
|
||||||
LIMEX_NFA_512_4,
|
|
||||||
LIMEX_NFA_512_5,
|
|
||||||
LIMEX_NFA_512_6,
|
|
||||||
LIMEX_NFA_512_7,
|
|
||||||
MCCLELLAN_NFA_8, /**< magic pseudo nfa */
|
MCCLELLAN_NFA_8, /**< magic pseudo nfa */
|
||||||
MCCLELLAN_NFA_16, /**< magic pseudo nfa */
|
MCCLELLAN_NFA_16, /**< magic pseudo nfa */
|
||||||
GOUGH_NFA_8, /**< magic pseudo nfa */
|
GOUGH_NFA_8, /**< magic pseudo nfa */
|
||||||
@ -97,6 +67,8 @@ enum NFAEngineType {
|
|||||||
LBR_NFA_Shuf, /**< magic pseudo nfa */
|
LBR_NFA_Shuf, /**< magic pseudo nfa */
|
||||||
LBR_NFA_Truf, /**< magic pseudo nfa */
|
LBR_NFA_Truf, /**< magic pseudo nfa */
|
||||||
CASTLE_NFA_0, /**< magic pseudo nfa */
|
CASTLE_NFA_0, /**< magic pseudo nfa */
|
||||||
|
SHENG_NFA_0, /**< magic pseudo nfa */
|
||||||
|
TAMARAMA_NFA_0, /**< magic nfa container */
|
||||||
/** \brief bogus NFA - not used */
|
/** \brief bogus NFA - not used */
|
||||||
INVALID_NFA
|
INVALID_NFA
|
||||||
};
|
};
|
||||||
@ -175,50 +147,27 @@ static really_inline int isGoughType(u8 t) {
|
|||||||
return t == GOUGH_NFA_8 || t == GOUGH_NFA_16;
|
return t == GOUGH_NFA_8 || t == GOUGH_NFA_16;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \brief True if the given type (from NFA::type) is a McClellan or Gough DFA.
|
/** \brief True if the given type (from NFA::type) is a Sheng DFA. */
|
||||||
* */
|
static really_inline int isShengType(u8 t) {
|
||||||
|
return t == SHENG_NFA_0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief True if the given type (from NFA::type) is a McClellan, Gough or
|
||||||
|
* Sheng DFA.
|
||||||
|
*/
|
||||||
static really_inline int isDfaType(u8 t) {
|
static really_inline int isDfaType(u8 t) {
|
||||||
return isMcClellanType(t) || isGoughType(t);
|
return isMcClellanType(t) || isGoughType(t) || isShengType(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \brief True if the given type (from NFA::type) is an NFA. */
|
/** \brief True if the given type (from NFA::type) is an NFA. */
|
||||||
static really_inline int isNfaType(u8 t) {
|
static really_inline int isNfaType(u8 t) {
|
||||||
switch (t) {
|
switch (t) {
|
||||||
case LIMEX_NFA_32_1:
|
case LIMEX_NFA_32:
|
||||||
case LIMEX_NFA_32_2:
|
case LIMEX_NFA_128:
|
||||||
case LIMEX_NFA_32_3:
|
case LIMEX_NFA_256:
|
||||||
case LIMEX_NFA_32_4:
|
case LIMEX_NFA_384:
|
||||||
case LIMEX_NFA_32_5:
|
case LIMEX_NFA_512:
|
||||||
case LIMEX_NFA_32_6:
|
|
||||||
case LIMEX_NFA_32_7:
|
|
||||||
case LIMEX_NFA_128_1:
|
|
||||||
case LIMEX_NFA_128_2:
|
|
||||||
case LIMEX_NFA_128_3:
|
|
||||||
case LIMEX_NFA_128_4:
|
|
||||||
case LIMEX_NFA_128_5:
|
|
||||||
case LIMEX_NFA_128_6:
|
|
||||||
case LIMEX_NFA_128_7:
|
|
||||||
case LIMEX_NFA_256_1:
|
|
||||||
case LIMEX_NFA_256_2:
|
|
||||||
case LIMEX_NFA_256_3:
|
|
||||||
case LIMEX_NFA_256_4:
|
|
||||||
case LIMEX_NFA_256_5:
|
|
||||||
case LIMEX_NFA_256_6:
|
|
||||||
case LIMEX_NFA_256_7:
|
|
||||||
case LIMEX_NFA_384_1:
|
|
||||||
case LIMEX_NFA_384_2:
|
|
||||||
case LIMEX_NFA_384_3:
|
|
||||||
case LIMEX_NFA_384_4:
|
|
||||||
case LIMEX_NFA_384_5:
|
|
||||||
case LIMEX_NFA_384_6:
|
|
||||||
case LIMEX_NFA_384_7:
|
|
||||||
case LIMEX_NFA_512_1:
|
|
||||||
case LIMEX_NFA_512_2:
|
|
||||||
case LIMEX_NFA_512_3:
|
|
||||||
case LIMEX_NFA_512_4:
|
|
||||||
case LIMEX_NFA_512_5:
|
|
||||||
case LIMEX_NFA_512_6:
|
|
||||||
case LIMEX_NFA_512_7:
|
|
||||||
return 1;
|
return 1;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
@ -233,6 +182,12 @@ int isLbrType(u8 t) {
|
|||||||
t == LBR_NFA_Shuf || t == LBR_NFA_Truf;
|
t == LBR_NFA_Shuf || t == LBR_NFA_Truf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \brief True if the given type (from NFA::type) is a container engine. */
|
||||||
|
static really_inline
|
||||||
|
int isContainerType(u8 t) {
|
||||||
|
return t == TAMARAMA_NFA_0;
|
||||||
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
int isMultiTopType(u8 t) {
|
int isMultiTopType(u8 t) {
|
||||||
return !isDfaType(t) && !isLbrType(t);
|
return !isDfaType(t) && !isLbrType(t);
|
||||||
|
@ -37,6 +37,8 @@
|
|||||||
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
/** \brief Specify the use-case for an nfa engine. */
|
/** \brief Specify the use-case for an nfa engine. */
|
||||||
@ -47,6 +49,7 @@ enum nfa_kind {
|
|||||||
NFA_OUTFIX, //!< "outfix" nfa not triggered by external events
|
NFA_OUTFIX, //!< "outfix" nfa not triggered by external events
|
||||||
NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports
|
NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports
|
||||||
NFA_REV_PREFIX, //! reverse running prefixes (for som)
|
NFA_REV_PREFIX, //! reverse running prefixes (for som)
|
||||||
|
NFA_EAGER_PREFIX, //!< rose prefix that is also run up to matches
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \brief True if this kind of engine is triggered by a top event. */
|
/** \brief True if this kind of engine is triggered by a top event. */
|
||||||
@ -63,8 +66,10 @@ bool is_triggered(enum nfa_kind k) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief True if this kind of engine generates callback events when it
|
* \brief True if this kind of engine generates actively checks for accept
|
||||||
* enters accept states.
|
* states either to halt matching or to raise a callback. Only these engines
|
||||||
|
* generated with this property should call nfaQueueExec() or
|
||||||
|
* nfaQueueExecToMatch().
|
||||||
*/
|
*/
|
||||||
inline
|
inline
|
||||||
bool generates_callbacks(enum nfa_kind k) {
|
bool generates_callbacks(enum nfa_kind k) {
|
||||||
@ -73,6 +78,24 @@ bool generates_callbacks(enum nfa_kind k) {
|
|||||||
case NFA_OUTFIX:
|
case NFA_OUTFIX:
|
||||||
case NFA_OUTFIX_RAW:
|
case NFA_OUTFIX_RAW:
|
||||||
case NFA_REV_PREFIX:
|
case NFA_REV_PREFIX:
|
||||||
|
case NFA_EAGER_PREFIX:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief True if this kind of engine has its state inspected to see if it is in
|
||||||
|
* an accept state. Engines generated with this property will commonly call
|
||||||
|
* nfaQueueExecRose(), nfaInAcceptState(), and nfaInAnyAcceptState().
|
||||||
|
*/
|
||||||
|
inline
|
||||||
|
bool inspects_states_for_accepts(enum nfa_kind k) {
|
||||||
|
switch (k) {
|
||||||
|
case NFA_PREFIX:
|
||||||
|
case NFA_INFIX:
|
||||||
|
case NFA_EAGER_PREFIX:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
@ -94,6 +117,32 @@ bool has_managed_reports(enum nfa_kind k) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(DEBUG) || defined(DUMP_SUPPORT)
|
||||||
|
|
||||||
|
inline
|
||||||
|
std::string to_string(nfa_kind k) {
|
||||||
|
switch (k) {
|
||||||
|
case NFA_PREFIX:
|
||||||
|
return "PREFIX";
|
||||||
|
case NFA_INFIX:
|
||||||
|
return "INFIX";
|
||||||
|
case NFA_SUFFIX:
|
||||||
|
return "SUFFIX";
|
||||||
|
case NFA_OUTFIX:
|
||||||
|
return "OUTFIX";
|
||||||
|
case NFA_REV_PREFIX:
|
||||||
|
return "REV_PREFIX";
|
||||||
|
case NFA_OUTFIX_RAW:
|
||||||
|
return "OUTFIX_RAW";
|
||||||
|
case NFA_EAGER_PREFIX:
|
||||||
|
return "EAGER_PREFIX";
|
||||||
|
}
|
||||||
|
assert(0);
|
||||||
|
return "?";
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
676
src/nfa/sheng.c
Normal file
676
src/nfa/sheng.c
Normal file
@ -0,0 +1,676 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "sheng.h"
|
||||||
|
|
||||||
|
#include "accel.h"
|
||||||
|
#include "sheng_internal.h"
|
||||||
|
#include "nfa_api.h"
|
||||||
|
#include "nfa_api_queue.h"
|
||||||
|
#include "nfa_internal.h"
|
||||||
|
#include "util/bitutils.h"
|
||||||
|
#include "util/compare.h"
|
||||||
|
#include "util/join.h"
|
||||||
|
#include "util/simd_utils.h"
|
||||||
|
|
||||||
|
enum MatchMode {
|
||||||
|
CALLBACK_OUTPUT,
|
||||||
|
STOP_AT_MATCH,
|
||||||
|
NO_MATCHES
|
||||||
|
};
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const struct sheng *get_sheng(const struct NFA *n) {
|
||||||
|
return (const struct sheng *)getImplNfa(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const struct sstate_aux *get_aux(const struct sheng *sh, u8 id) {
|
||||||
|
u32 offset = sh->aux_offset - sizeof(struct NFA) +
|
||||||
|
(id & SHENG_STATE_MASK) * sizeof(struct sstate_aux);
|
||||||
|
DEBUG_PRINTF("Getting aux for state %u at offset %llu\n",
|
||||||
|
id & SHENG_STATE_MASK, (u64a)offset + sizeof(struct NFA));
|
||||||
|
return (const struct sstate_aux *)((const char *) sh + offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const union AccelAux *get_accel(const struct sheng *sh, u8 id) {
|
||||||
|
const struct sstate_aux *saux = get_aux(sh, id);
|
||||||
|
DEBUG_PRINTF("Getting accel aux at offset %u\n", saux->accel);
|
||||||
|
const union AccelAux *aux = (const union AccelAux *)
|
||||||
|
((const char *)sh + saux->accel - sizeof(struct NFA));
|
||||||
|
return aux;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const struct report_list *get_rl(const struct sheng *sh,
|
||||||
|
const struct sstate_aux *aux) {
|
||||||
|
DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept);
|
||||||
|
return (const struct report_list *)
|
||||||
|
((const char *)sh + aux->accept - sizeof(struct NFA));
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const struct report_list *get_eod_rl(const struct sheng *sh,
|
||||||
|
const struct sstate_aux *aux) {
|
||||||
|
DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept);
|
||||||
|
return (const struct report_list *)
|
||||||
|
((const char *)sh + aux->accept_eod - sizeof(struct NFA));
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
char shengHasAccept(const struct sheng *sh, const struct sstate_aux *aux,
|
||||||
|
ReportID report) {
|
||||||
|
assert(sh && aux);
|
||||||
|
|
||||||
|
const struct report_list *rl = get_rl(sh, aux);
|
||||||
|
assert(ISALIGNED_N(rl, 4));
|
||||||
|
|
||||||
|
DEBUG_PRINTF("report list has %u entries\n", rl->count);
|
||||||
|
|
||||||
|
for (u32 i = 0; i < rl->count; i++) {
|
||||||
|
if (rl->report[i] == report) {
|
||||||
|
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
char fireSingleReport(NfaCallback cb, void *ctxt, ReportID r, u64a loc) {
|
||||||
|
DEBUG_PRINTF("reporting %u\n", r);
|
||||||
|
if (cb(0, loc, r, ctxt) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING; /* termination requested */
|
||||||
|
}
|
||||||
|
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt,
|
||||||
|
const u8 state, u64a loc, u8 *const cached_accept_state,
|
||||||
|
ReportID *const cached_accept_id, char eod) {
|
||||||
|
DEBUG_PRINTF("reporting matches @ %llu\n", loc);
|
||||||
|
|
||||||
|
if (!eod && state == *cached_accept_state) {
|
||||||
|
DEBUG_PRINTF("reporting %u\n", *cached_accept_id);
|
||||||
|
if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING; /* termination requested */
|
||||||
|
}
|
||||||
|
|
||||||
|
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||||
|
}
|
||||||
|
const struct sstate_aux *aux = get_aux(sh, state);
|
||||||
|
const struct report_list *rl = eod ? get_eod_rl(sh, aux) : get_rl(sh, aux);
|
||||||
|
assert(ISALIGNED(rl));
|
||||||
|
|
||||||
|
DEBUG_PRINTF("report list has %u entries\n", rl->count);
|
||||||
|
u32 count = rl->count;
|
||||||
|
|
||||||
|
if (!eod && count == 1) {
|
||||||
|
*cached_accept_state = state;
|
||||||
|
*cached_accept_id = rl->report[0];
|
||||||
|
|
||||||
|
DEBUG_PRINTF("reporting %u\n", rl->report[0]);
|
||||||
|
if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING; /* termination requested */
|
||||||
|
}
|
||||||
|
|
||||||
|
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||||
|
}
|
||||||
|
|
||||||
|
for (u32 i = 0; i < count; i++) {
|
||||||
|
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
|
||||||
|
if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING; /* termination requested */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* include Sheng function definitions */
|
||||||
|
#include "sheng_defs.h"
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
char runShengCb(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset,
|
||||||
|
u8 *const cached_accept_state, ReportID *const cached_accept_id,
|
||||||
|
const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die,
|
||||||
|
u8 has_accel, u8 single, const u8 **scanned, u8 *state) {
|
||||||
|
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n",
|
||||||
|
(u64a)(end - start), offset);
|
||||||
|
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
|
||||||
|
(s64a)(end - cur_buf));
|
||||||
|
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
|
||||||
|
!!has_accel, !!single);
|
||||||
|
int rv;
|
||||||
|
/* scan and report all matches */
|
||||||
|
if (can_die) {
|
||||||
|
if (has_accel) {
|
||||||
|
rv = sheng4_coda(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, start,
|
||||||
|
end, scanned);
|
||||||
|
} else {
|
||||||
|
rv = sheng4_cod(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, start,
|
||||||
|
end, scanned);
|
||||||
|
}
|
||||||
|
if (rv == MO_HALT_MATCHING) {
|
||||||
|
return MO_DEAD;
|
||||||
|
}
|
||||||
|
rv = sheng_cod(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, *scanned, end,
|
||||||
|
scanned);
|
||||||
|
} else {
|
||||||
|
if (has_accel) {
|
||||||
|
rv = sheng4_coa(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, start,
|
||||||
|
end, scanned);
|
||||||
|
} else {
|
||||||
|
rv = sheng4_co(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, start,
|
||||||
|
end, scanned);
|
||||||
|
}
|
||||||
|
if (rv == MO_HALT_MATCHING) {
|
||||||
|
return MO_DEAD;
|
||||||
|
}
|
||||||
|
rv = sheng_co(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, *scanned, end,
|
||||||
|
scanned);
|
||||||
|
}
|
||||||
|
if (rv == MO_HALT_MATCHING) {
|
||||||
|
return MO_DEAD;
|
||||||
|
}
|
||||||
|
return MO_ALIVE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
void runShengNm(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset,
|
||||||
|
u8 *const cached_accept_state, ReportID *const cached_accept_id,
|
||||||
|
const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die,
|
||||||
|
u8 has_accel, u8 single, const u8 **scanned, u8 *state) {
|
||||||
|
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n",
|
||||||
|
(u64a)(end - start), offset);
|
||||||
|
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
|
||||||
|
(s64a)(end - cur_buf));
|
||||||
|
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
|
||||||
|
!!has_accel, !!single);
|
||||||
|
/* just scan the buffer */
|
||||||
|
if (can_die) {
|
||||||
|
if (has_accel) {
|
||||||
|
sheng4_nmda(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, start, end,
|
||||||
|
scanned);
|
||||||
|
} else {
|
||||||
|
sheng4_nmd(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, start, end,
|
||||||
|
scanned);
|
||||||
|
}
|
||||||
|
sheng_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
|
||||||
|
single, offset, cur_buf, *scanned, end, scanned);
|
||||||
|
} else {
|
||||||
|
sheng4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
|
||||||
|
single, offset, cur_buf, start, end, scanned);
|
||||||
|
sheng_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
|
||||||
|
single, offset, cur_buf, *scanned, end, scanned);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
char runShengSam(const struct sheng *sh, NfaCallback cb, void *ctxt,
|
||||||
|
u64a offset, u8 *const cached_accept_state,
|
||||||
|
ReportID *const cached_accept_id, const u8 *cur_buf,
|
||||||
|
const u8 *start, const u8 *end, u8 can_die, u8 has_accel,
|
||||||
|
u8 single, const u8 **scanned, u8 *state) {
|
||||||
|
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n",
|
||||||
|
(u64a)(end - start), offset);
|
||||||
|
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
|
||||||
|
(s64a)(end - cur_buf));
|
||||||
|
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
|
||||||
|
!!has_accel, !!single);
|
||||||
|
int rv;
|
||||||
|
/* scan until first match */
|
||||||
|
if (can_die) {
|
||||||
|
if (has_accel) {
|
||||||
|
rv = sheng4_samda(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, start,
|
||||||
|
end, scanned);
|
||||||
|
} else {
|
||||||
|
rv = sheng4_samd(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, start,
|
||||||
|
end, scanned);
|
||||||
|
}
|
||||||
|
if (rv == MO_HALT_MATCHING) {
|
||||||
|
return MO_DEAD;
|
||||||
|
}
|
||||||
|
/* if we stopped before we expected, we found a match */
|
||||||
|
if (rv == MO_MATCHES_PENDING) {
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
|
||||||
|
rv = sheng_samd(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, *scanned,
|
||||||
|
end, scanned);
|
||||||
|
} else {
|
||||||
|
if (has_accel) {
|
||||||
|
rv = sheng4_sama(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, start,
|
||||||
|
end, scanned);
|
||||||
|
} else {
|
||||||
|
rv = sheng4_sam(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, start,
|
||||||
|
end, scanned);
|
||||||
|
}
|
||||||
|
if (rv == MO_HALT_MATCHING) {
|
||||||
|
return MO_DEAD;
|
||||||
|
}
|
||||||
|
/* if we stopped before we expected, we found a match */
|
||||||
|
if (rv == MO_MATCHES_PENDING) {
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
|
||||||
|
rv = sheng_sam(state, cb, ctxt, sh, cached_accept_state,
|
||||||
|
cached_accept_id, single, offset, cur_buf, *scanned, end,
|
||||||
|
scanned);
|
||||||
|
}
|
||||||
|
if (rv == MO_HALT_MATCHING) {
|
||||||
|
return MO_DEAD;
|
||||||
|
}
|
||||||
|
/* if we stopped before we expected, we found a match */
|
||||||
|
if (rv == MO_MATCHES_PENDING) {
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
return MO_ALIVE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static never_inline
|
||||||
|
char runSheng(const struct sheng *sh, struct mq *q, s64a b_end,
|
||||||
|
enum MatchMode mode) {
|
||||||
|
u8 state = *(u8 *)q->state;
|
||||||
|
u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
|
||||||
|
u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
|
||||||
|
u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
|
||||||
|
|
||||||
|
u8 cached_accept_state = 0;
|
||||||
|
ReportID cached_accept_id = 0;
|
||||||
|
|
||||||
|
DEBUG_PRINTF("starting Sheng execution in state %u\n",
|
||||||
|
state & SHENG_STATE_MASK);
|
||||||
|
|
||||||
|
if (q->report_current) {
|
||||||
|
DEBUG_PRINTF("reporting current pending matches\n");
|
||||||
|
assert(sh);
|
||||||
|
|
||||||
|
q->report_current = 0;
|
||||||
|
|
||||||
|
int rv;
|
||||||
|
if (single) {
|
||||||
|
rv = fireSingleReport(q->cb, q->context, sh->report,
|
||||||
|
q_cur_offset(q));
|
||||||
|
} else {
|
||||||
|
rv = fireReports(sh, q->cb, q->context, state, q_cur_offset(q),
|
||||||
|
&cached_accept_state, &cached_accept_id, 0);
|
||||||
|
}
|
||||||
|
if (rv == MO_HALT_MATCHING) {
|
||||||
|
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
|
||||||
|
return MO_DEAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("proceeding with matching\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(q_cur_type(q) == MQE_START);
|
||||||
|
s64a start = q_cur_loc(q);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start,
|
||||||
|
mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" :
|
||||||
|
mode == NO_MATCHES ? "NO MATCHES" :
|
||||||
|
mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???");
|
||||||
|
|
||||||
|
DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
|
||||||
|
q_cur_type(q) == MQE_START ? "START" :
|
||||||
|
q_cur_type(q) == MQE_TOP ? "TOP" :
|
||||||
|
q_cur_type(q) == MQE_END ? "END" : "???");
|
||||||
|
|
||||||
|
const u8* cur_buf;
|
||||||
|
if (start < 0) {
|
||||||
|
DEBUG_PRINTF("negative location, scanning history\n");
|
||||||
|
DEBUG_PRINTF("min location: %zd\n", -q->hlength);
|
||||||
|
cur_buf = q->history + q->hlength;
|
||||||
|
} else {
|
||||||
|
DEBUG_PRINTF("positive location, scanning buffer\n");
|
||||||
|
DEBUG_PRINTF("max location: %lli\n", b_end);
|
||||||
|
cur_buf = q->buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if we our queue event is past our end */
|
||||||
|
if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
|
||||||
|
DEBUG_PRINTF("current location past buffer end\n");
|
||||||
|
DEBUG_PRINTF("setting q location to %llu\n", b_end);
|
||||||
|
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
|
||||||
|
q->items[q->cur].location = b_end;
|
||||||
|
return MO_ALIVE;
|
||||||
|
}
|
||||||
|
|
||||||
|
q->cur++;
|
||||||
|
|
||||||
|
s64a cur_start = start;
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
|
||||||
|
q_cur_type(q) == MQE_START ? "START" :
|
||||||
|
q_cur_type(q) == MQE_TOP ? "TOP" :
|
||||||
|
q_cur_type(q) == MQE_END ? "END" : "???");
|
||||||
|
s64a end = q_cur_loc(q);
|
||||||
|
if (mode != NO_MATCHES) {
|
||||||
|
end = MIN(end, b_end);
|
||||||
|
}
|
||||||
|
assert(end <= (s64a) q->length);
|
||||||
|
s64a cur_end = end;
|
||||||
|
|
||||||
|
/* we may cross the border between history and current buffer */
|
||||||
|
if (cur_start < 0) {
|
||||||
|
cur_end = MIN(0, cur_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("start: %lli end: %lli\n", start, end);
|
||||||
|
|
||||||
|
/* don't scan zero length buffer */
|
||||||
|
if (cur_start != cur_end) {
|
||||||
|
const u8 * scanned = cur_buf;
|
||||||
|
char rv;
|
||||||
|
|
||||||
|
/* if we're in nomatch mode or if we're scanning history buffer */
|
||||||
|
if (mode == NO_MATCHES ||
|
||||||
|
(cur_start < 0 && mode == CALLBACK_OUTPUT)) {
|
||||||
|
runShengNm(sh, q->cb, q->context, q->offset,
|
||||||
|
&cached_accept_state, &cached_accept_id, cur_buf,
|
||||||
|
cur_buf + cur_start, cur_buf + cur_end, can_die,
|
||||||
|
has_accel, single, &scanned, &state);
|
||||||
|
} else if (mode == CALLBACK_OUTPUT) {
|
||||||
|
rv = runShengCb(sh, q->cb, q->context, q->offset,
|
||||||
|
&cached_accept_state, &cached_accept_id,
|
||||||
|
cur_buf, cur_buf + cur_start, cur_buf + cur_end,
|
||||||
|
can_die, has_accel, single, &scanned, &state);
|
||||||
|
if (rv == MO_DEAD) {
|
||||||
|
DEBUG_PRINTF("exiting in state %u\n",
|
||||||
|
state & SHENG_STATE_MASK);
|
||||||
|
return MO_DEAD;
|
||||||
|
}
|
||||||
|
} else if (mode == STOP_AT_MATCH) {
|
||||||
|
rv = runShengSam(sh, q->cb, q->context, q->offset,
|
||||||
|
&cached_accept_state, &cached_accept_id,
|
||||||
|
cur_buf, cur_buf + cur_start,
|
||||||
|
cur_buf + cur_end, can_die, has_accel, single,
|
||||||
|
&scanned, &state);
|
||||||
|
if (rv == MO_DEAD) {
|
||||||
|
DEBUG_PRINTF("exiting in state %u\n",
|
||||||
|
state & SHENG_STATE_MASK);
|
||||||
|
return rv;
|
||||||
|
} else if (rv == MO_MATCHES_PENDING) {
|
||||||
|
assert(q->cur);
|
||||||
|
DEBUG_PRINTF("found a match, setting q location to %zd\n",
|
||||||
|
scanned - cur_buf + 1);
|
||||||
|
q->cur--;
|
||||||
|
q->items[q->cur].type = MQE_START;
|
||||||
|
q->items[q->cur].location =
|
||||||
|
scanned - cur_buf + 1; /* due to exiting early */
|
||||||
|
*(u8 *)q->state = state;
|
||||||
|
DEBUG_PRINTF("exiting in state %u\n",
|
||||||
|
state & SHENG_STATE_MASK);
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assert(!"invalid scanning mode!");
|
||||||
|
}
|
||||||
|
assert(scanned == cur_buf + cur_end);
|
||||||
|
|
||||||
|
cur_start = cur_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if we our queue event is past our end */
|
||||||
|
if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
|
||||||
|
DEBUG_PRINTF("current location past buffer end\n");
|
||||||
|
DEBUG_PRINTF("setting q location to %llu\n", b_end);
|
||||||
|
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
|
||||||
|
q->cur--;
|
||||||
|
q->items[q->cur].type = MQE_START;
|
||||||
|
q->items[q->cur].location = b_end;
|
||||||
|
*(u8 *)q->state = state;
|
||||||
|
return MO_ALIVE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* crossing over into actual buffer */
|
||||||
|
if (cur_start == 0) {
|
||||||
|
DEBUG_PRINTF("positive location, scanning buffer\n");
|
||||||
|
DEBUG_PRINTF("max offset: %lli\n", b_end);
|
||||||
|
cur_buf = q->buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* continue scanning the same buffer */
|
||||||
|
if (end != cur_end) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (q_cur_type(q)) {
|
||||||
|
case MQE_END:
|
||||||
|
*(u8 *)q->state = state;
|
||||||
|
q->cur++;
|
||||||
|
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
|
||||||
|
if (can_die) {
|
||||||
|
return (state & SHENG_STATE_DEAD) ? MO_DEAD : MO_ALIVE;
|
||||||
|
}
|
||||||
|
return MO_ALIVE;
|
||||||
|
case MQE_TOP:
|
||||||
|
if (q->offset + cur_start == 0) {
|
||||||
|
DEBUG_PRINTF("Anchored start, going to state %u\n",
|
||||||
|
sh->anchored);
|
||||||
|
state = sh->anchored;
|
||||||
|
} else {
|
||||||
|
u8 new_state = get_aux(sh, state)->top;
|
||||||
|
DEBUG_PRINTF("Top event %u->%u\n", state & SHENG_STATE_MASK,
|
||||||
|
new_state & SHENG_STATE_MASK);
|
||||||
|
state = new_state;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(!"invalid queue event");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
q->cur++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||||
|
size_t length, NfaCallback cb, void *context) {
|
||||||
|
DEBUG_PRINTF("smallwrite Sheng\n");
|
||||||
|
assert(n->type == SHENG_NFA_0);
|
||||||
|
const struct sheng *sh = getImplNfa(n);
|
||||||
|
u8 state = sh->anchored;
|
||||||
|
u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
|
||||||
|
u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
|
||||||
|
u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
|
||||||
|
u8 cached_accept_state = 0;
|
||||||
|
ReportID cached_accept_id = 0;
|
||||||
|
|
||||||
|
/* scan and report all matches */
|
||||||
|
int rv;
|
||||||
|
s64a end = length;
|
||||||
|
const u8 *scanned;
|
||||||
|
|
||||||
|
rv = runShengCb(sh, cb, context, offset, &cached_accept_state,
|
||||||
|
&cached_accept_id, buffer, buffer, buffer + end, can_die,
|
||||||
|
has_accel, single, &scanned, &state);
|
||||||
|
if (rv == MO_DEAD) {
|
||||||
|
DEBUG_PRINTF("exiting in state %u\n",
|
||||||
|
state & SHENG_STATE_MASK);
|
||||||
|
return MO_DEAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("%u\n", state & SHENG_STATE_MASK);
|
||||||
|
|
||||||
|
const struct sstate_aux *aux = get_aux(sh, state);
|
||||||
|
|
||||||
|
if (aux->accept_eod) {
|
||||||
|
DEBUG_PRINTF("Reporting EOD matches\n");
|
||||||
|
fireReports(sh, cb, context, state, end + offset, &cached_accept_state,
|
||||||
|
&cached_accept_id, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE;
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end) {
|
||||||
|
const struct sheng *sh = get_sheng(n);
|
||||||
|
char rv = runSheng(sh, q, end, CALLBACK_OUTPUT);
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||||
|
const struct sheng *sh = get_sheng(n);
|
||||||
|
char rv = runSheng(sh, q, end, STOP_AT_MATCH);
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||||
|
assert(q_cur_type(q) == MQE_START);
|
||||||
|
|
||||||
|
const struct sheng *sh = get_sheng(n);
|
||||||
|
char rv = runSheng(sh, q, 0 /* end */, NO_MATCHES);
|
||||||
|
|
||||||
|
if (rv && nfaExecSheng0_inAccept(n, report, q)) {
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report,
|
||||||
|
struct mq *q) {
|
||||||
|
assert(n && q);
|
||||||
|
|
||||||
|
const struct sheng *sh = get_sheng(n);
|
||||||
|
u8 s = *(const u8 *)q->state;
|
||||||
|
DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
|
||||||
|
|
||||||
|
const struct sstate_aux *aux = get_aux(sh, s);
|
||||||
|
|
||||||
|
if (!aux->accept) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return shengHasAccept(sh, aux, report);
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||||
|
assert(n && q);
|
||||||
|
|
||||||
|
const struct sheng *sh = get_sheng(n);
|
||||||
|
u8 s = *(const u8 *)q->state;
|
||||||
|
DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
|
||||||
|
|
||||||
|
const struct sstate_aux *aux = get_aux(sh, s);
|
||||||
|
return !!aux->accept;
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state,
|
||||||
|
UNUSED const char *streamState, u64a offset,
|
||||||
|
NfaCallback cb, void *ctxt) {
|
||||||
|
assert(nfa);
|
||||||
|
|
||||||
|
const struct sheng *sh = get_sheng(nfa);
|
||||||
|
u8 s = *(const u8 *)state;
|
||||||
|
DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
|
||||||
|
|
||||||
|
const struct sstate_aux *aux = get_aux(sh, s);
|
||||||
|
|
||||||
|
if (!aux->accept_eod) {
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
|
||||||
|
return fireReports(sh, cb, ctxt, s, offset, NULL, NULL, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||||
|
const struct sheng *sh = (const struct sheng *)getImplNfa(n);
|
||||||
|
NfaCallback cb = q->cb;
|
||||||
|
void *ctxt = q->context;
|
||||||
|
u8 s = *(u8 *)q->state;
|
||||||
|
const struct sstate_aux *aux = get_aux(sh, s);
|
||||||
|
u64a offset = q_cur_offset(q);
|
||||||
|
u8 cached_state_id = 0;
|
||||||
|
ReportID cached_report_id = 0;
|
||||||
|
assert(q_cur_type(q) == MQE_START);
|
||||||
|
|
||||||
|
if (aux->accept) {
|
||||||
|
if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
|
||||||
|
fireSingleReport(cb, ctxt, sh->report, offset);
|
||||||
|
} else {
|
||||||
|
fireReports(sh, cb, ctxt, s, offset, &cached_state_id,
|
||||||
|
&cached_report_id, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||||
|
void *state, UNUSED u8 key) {
|
||||||
|
const struct sheng *sh = get_sheng(nfa);
|
||||||
|
u8 *s = (u8 *)state;
|
||||||
|
*s = offset ? sh->floating: sh->anchored;
|
||||||
|
return !(*s & SHENG_STATE_DEAD);
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q) {
|
||||||
|
assert(nfa->scratchStateSize == 1);
|
||||||
|
|
||||||
|
/* starting in floating state */
|
||||||
|
const struct sheng *sh = get_sheng(nfa);
|
||||||
|
*(u8 *)q->state = sh->floating;
|
||||||
|
DEBUG_PRINTF("starting in floating state\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecSheng0_queueCompressState(UNUSED const struct NFA *nfa,
|
||||||
|
const struct mq *q, UNUSED s64a loc) {
|
||||||
|
void *dest = q->streamState;
|
||||||
|
const void *src = q->state;
|
||||||
|
assert(nfa->scratchStateSize == 1);
|
||||||
|
assert(nfa->streamStateSize == 1);
|
||||||
|
*(u8 *)dest = *(const u8 *)src;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
char nfaExecSheng0_expandState(UNUSED const struct NFA *nfa, void *dest,
|
||||||
|
const void *src, UNUSED u64a offset,
|
||||||
|
UNUSED u8 key) {
|
||||||
|
assert(nfa->scratchStateSize == 1);
|
||||||
|
assert(nfa->streamStateSize == 1);
|
||||||
|
*(u8 *)dest = *(const u8 *)src;
|
||||||
|
return 0;
|
||||||
|
}
|
61
src/nfa/sheng.h
Normal file
61
src/nfa/sheng.h
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SHENG_H_
|
||||||
|
#define SHENG_H_
|
||||||
|
|
||||||
|
#include "callback.h"
|
||||||
|
#include "ue2common.h"
|
||||||
|
|
||||||
|
struct mq;
|
||||||
|
struct NFA;
|
||||||
|
|
||||||
|
#define nfaExecSheng0_B_Reverse NFA_API_NO_IMPL
|
||||||
|
#define nfaExecSheng0_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||||
|
|
||||||
|
char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||||
|
char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||||
|
char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||||
|
char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||||
|
char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q);
|
||||||
|
char nfaExecSheng0_queueCompressState(const struct NFA *nfa, const struct mq *q,
|
||||||
|
s64a loc);
|
||||||
|
char nfaExecSheng0_expandState(const struct NFA *nfa, void *dest,
|
||||||
|
const void *src, u64a offset, u8 key);
|
||||||
|
char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||||
|
void *state, u8 key);
|
||||||
|
char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state,
|
||||||
|
const char *streamState, u64a offset,
|
||||||
|
NfaCallback callback, void *context);
|
||||||
|
char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q);
|
||||||
|
|
||||||
|
char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||||
|
size_t length, NfaCallback cb, void *context);
|
||||||
|
|
||||||
|
#endif /* SHENG_H_ */
|
353
src/nfa/sheng_defs.h
Normal file
353
src/nfa/sheng_defs.h
Normal file
@ -0,0 +1,353 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SHENG_DEFS_H
|
||||||
|
#define SHENG_DEFS_H
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Utility functions used by various versions of Sheng engine
|
||||||
|
*/
|
||||||
|
static really_inline
|
||||||
|
u8 isDeadState(const u8 a) {
|
||||||
|
return a & SHENG_STATE_DEAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u8 isAcceptState(const u8 a) {
|
||||||
|
return a & SHENG_STATE_ACCEPT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u8 isAccelState(const u8 a) {
|
||||||
|
return a & SHENG_STATE_ACCEL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
|
||||||
|
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* these functions should be optimized out, used by NO_MATCHES mode */
|
||||||
|
static really_inline
|
||||||
|
u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c,
|
||||||
|
UNUSED const u8 d) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
u8 dummyFunc(UNUSED const u8 a) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sheng function definitions for single byte loops
|
||||||
|
*/
|
||||||
|
/* callback output, can die */
|
||||||
|
#define SHENG_IMPL sheng_cod
|
||||||
|
#define DEAD_FUNC isDeadState
|
||||||
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#define STOP_AT_MATCH 0
|
||||||
|
#include "sheng_impl.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef DEAD_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* callback output, can't die */
|
||||||
|
#define SHENG_IMPL sheng_co
|
||||||
|
#define DEAD_FUNC dummyFunc
|
||||||
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#define STOP_AT_MATCH 0
|
||||||
|
#include "sheng_impl.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef DEAD_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* stop at match, can die */
|
||||||
|
#define SHENG_IMPL sheng_samd
|
||||||
|
#define DEAD_FUNC isDeadState
|
||||||
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#define STOP_AT_MATCH 1
|
||||||
|
#include "sheng_impl.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef DEAD_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* stop at match, can't die */
|
||||||
|
#define SHENG_IMPL sheng_sam
|
||||||
|
#define DEAD_FUNC dummyFunc
|
||||||
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#define STOP_AT_MATCH 1
|
||||||
|
#include "sheng_impl.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef DEAD_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* no match, can die */
|
||||||
|
#define SHENG_IMPL sheng_nmd
|
||||||
|
#define DEAD_FUNC isDeadState
|
||||||
|
#define ACCEPT_FUNC dummyFunc
|
||||||
|
#define STOP_AT_MATCH 0
|
||||||
|
#include "sheng_impl.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef DEAD_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* no match, can't die */
|
||||||
|
#define SHENG_IMPL sheng_nm
|
||||||
|
#define DEAD_FUNC dummyFunc
|
||||||
|
#define ACCEPT_FUNC dummyFunc
|
||||||
|
#define STOP_AT_MATCH 0
|
||||||
|
#include "sheng_impl.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef DEAD_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sheng function definitions for 4-byte loops
|
||||||
|
*/
|
||||||
|
/* callback output, can die, accelerated */
|
||||||
|
#define SHENG_IMPL sheng4_coda
|
||||||
|
#define INTERESTING_FUNC hasInterestingStates
|
||||||
|
#define INNER_DEAD_FUNC isDeadState
|
||||||
|
#define OUTER_DEAD_FUNC dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC isAccelState
|
||||||
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#define STOP_AT_MATCH 0
|
||||||
|
#include "sheng_impl4.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef INTERESTING_FUNC
|
||||||
|
#undef INNER_DEAD_FUNC
|
||||||
|
#undef OUTER_DEAD_FUNC
|
||||||
|
#undef INNER_ACCEL_FUNC
|
||||||
|
#undef OUTER_ACCEL_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* callback output, can die, not accelerated */
|
||||||
|
#define SHENG_IMPL sheng4_cod
|
||||||
|
#define INTERESTING_FUNC hasInterestingStates
|
||||||
|
#define INNER_DEAD_FUNC isDeadState
|
||||||
|
#define OUTER_DEAD_FUNC dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#define STOP_AT_MATCH 0
|
||||||
|
#include "sheng_impl4.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef INTERESTING_FUNC
|
||||||
|
#undef INNER_DEAD_FUNC
|
||||||
|
#undef OUTER_DEAD_FUNC
|
||||||
|
#undef INNER_ACCEL_FUNC
|
||||||
|
#undef OUTER_ACCEL_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* callback output, can't die, accelerated */
|
||||||
|
#define SHENG_IMPL sheng4_coa
|
||||||
|
#define INTERESTING_FUNC hasInterestingStates
|
||||||
|
#define INNER_DEAD_FUNC dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC isAccelState
|
||||||
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#define STOP_AT_MATCH 0
|
||||||
|
#include "sheng_impl4.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef INTERESTING_FUNC
|
||||||
|
#undef INNER_DEAD_FUNC
|
||||||
|
#undef OUTER_DEAD_FUNC
|
||||||
|
#undef INNER_ACCEL_FUNC
|
||||||
|
#undef OUTER_ACCEL_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* callback output, can't die, not accelerated */
|
||||||
|
#define SHENG_IMPL sheng4_co
|
||||||
|
#define INTERESTING_FUNC hasInterestingStates
|
||||||
|
#define INNER_DEAD_FUNC dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#define STOP_AT_MATCH 0
|
||||||
|
#include "sheng_impl4.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef INTERESTING_FUNC
|
||||||
|
#undef INNER_DEAD_FUNC
|
||||||
|
#undef OUTER_DEAD_FUNC
|
||||||
|
#undef INNER_ACCEL_FUNC
|
||||||
|
#undef OUTER_ACCEL_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* stop at match, can die, accelerated */
|
||||||
|
#define SHENG_IMPL sheng4_samda
|
||||||
|
#define INTERESTING_FUNC hasInterestingStates
|
||||||
|
#define INNER_DEAD_FUNC isDeadState
|
||||||
|
#define OUTER_DEAD_FUNC dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC isAccelState
|
||||||
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#define STOP_AT_MATCH 1
|
||||||
|
#include "sheng_impl4.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef INTERESTING_FUNC
|
||||||
|
#undef INNER_DEAD_FUNC
|
||||||
|
#undef OUTER_DEAD_FUNC
|
||||||
|
#undef INNER_ACCEL_FUNC
|
||||||
|
#undef OUTER_ACCEL_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* stop at match, can die, not accelerated */
|
||||||
|
#define SHENG_IMPL sheng4_samd
|
||||||
|
#define INTERESTING_FUNC hasInterestingStates
|
||||||
|
#define INNER_DEAD_FUNC isDeadState
|
||||||
|
#define OUTER_DEAD_FUNC dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#define STOP_AT_MATCH 1
|
||||||
|
#include "sheng_impl4.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef INTERESTING_FUNC
|
||||||
|
#undef INNER_DEAD_FUNC
|
||||||
|
#undef OUTER_DEAD_FUNC
|
||||||
|
#undef INNER_ACCEL_FUNC
|
||||||
|
#undef OUTER_ACCEL_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* stop at match, can't die, accelerated */
|
||||||
|
#define SHENG_IMPL sheng4_sama
|
||||||
|
#define INTERESTING_FUNC hasInterestingStates
|
||||||
|
#define INNER_DEAD_FUNC dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC isAccelState
|
||||||
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#define STOP_AT_MATCH 1
|
||||||
|
#include "sheng_impl4.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef INTERESTING_FUNC
|
||||||
|
#undef INNER_DEAD_FUNC
|
||||||
|
#undef OUTER_DEAD_FUNC
|
||||||
|
#undef INNER_ACCEL_FUNC
|
||||||
|
#undef OUTER_ACCEL_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* stop at match, can't die, not accelerated */
|
||||||
|
#define SHENG_IMPL sheng4_sam
|
||||||
|
#define INTERESTING_FUNC hasInterestingStates
|
||||||
|
#define INNER_DEAD_FUNC dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
|
#define ACCEPT_FUNC isAcceptState
|
||||||
|
#define STOP_AT_MATCH 1
|
||||||
|
#include "sheng_impl4.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef INTERESTING_FUNC
|
||||||
|
#undef INNER_DEAD_FUNC
|
||||||
|
#undef OUTER_DEAD_FUNC
|
||||||
|
#undef INNER_ACCEL_FUNC
|
||||||
|
#undef OUTER_ACCEL_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* no-match have interesting func as dummy, and die/accel checks are outer */
|
||||||
|
|
||||||
|
/* no match, can die, accelerated */
|
||||||
|
#define SHENG_IMPL sheng4_nmda
|
||||||
|
#define INTERESTING_FUNC dummyFunc4
|
||||||
|
#define INNER_DEAD_FUNC dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC isDeadState
|
||||||
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC isAccelState
|
||||||
|
#define ACCEPT_FUNC dummyFunc
|
||||||
|
#define STOP_AT_MATCH 0
|
||||||
|
#include "sheng_impl4.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef INTERESTING_FUNC
|
||||||
|
#undef INNER_DEAD_FUNC
|
||||||
|
#undef OUTER_DEAD_FUNC
|
||||||
|
#undef INNER_ACCEL_FUNC
|
||||||
|
#undef OUTER_ACCEL_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* no match, can die, not accelerated */
|
||||||
|
#define SHENG_IMPL sheng4_nmd
|
||||||
|
#define INTERESTING_FUNC dummyFunc4
|
||||||
|
#define INNER_DEAD_FUNC dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC isDeadState
|
||||||
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
|
#define ACCEPT_FUNC dummyFunc
|
||||||
|
#define STOP_AT_MATCH 0
|
||||||
|
#include "sheng_impl4.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef INTERESTING_FUNC
|
||||||
|
#undef INNER_DEAD_FUNC
|
||||||
|
#undef OUTER_DEAD_FUNC
|
||||||
|
#undef INNER_ACCEL_FUNC
|
||||||
|
#undef OUTER_ACCEL_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
/* there is no performance benefit in accelerating a no-match case that can't
|
||||||
|
* die */
|
||||||
|
|
||||||
|
/* no match, can't die */
|
||||||
|
#define SHENG_IMPL sheng4_nm
|
||||||
|
#define INTERESTING_FUNC dummyFunc4
|
||||||
|
#define INNER_DEAD_FUNC dummyFunc
|
||||||
|
#define OUTER_DEAD_FUNC dummyFunc
|
||||||
|
#define INNER_ACCEL_FUNC dummyFunc
|
||||||
|
#define OUTER_ACCEL_FUNC dummyFunc
|
||||||
|
#define ACCEPT_FUNC dummyFunc
|
||||||
|
#define STOP_AT_MATCH 0
|
||||||
|
#include "sheng_impl4.h"
|
||||||
|
#undef SHENG_IMPL
|
||||||
|
#undef INTERESTING_FUNC
|
||||||
|
#undef INNER_DEAD_FUNC
|
||||||
|
#undef OUTER_DEAD_FUNC
|
||||||
|
#undef INNER_ACCEL_FUNC
|
||||||
|
#undef OUTER_ACCEL_FUNC
|
||||||
|
#undef ACCEPT_FUNC
|
||||||
|
#undef STOP_AT_MATCH
|
||||||
|
|
||||||
|
#endif // SHENG_DEFS_H
|
97
src/nfa/sheng_impl.h
Normal file
97
src/nfa/sheng_impl.h
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In order to use this macro, the following things need to be defined:
|
||||||
|
*
|
||||||
|
* - SHENG_IMPL (name of the Sheng implementation function)
|
||||||
|
* - DEAD_FUNC (name of the function checking for dead states)
|
||||||
|
* - ACCEPT_FUNC (name of the function checking for accept state)
|
||||||
|
* - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match)
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* byte-by-byte version. we don't do byte-by-byte death checking as it's
|
||||||
|
* pretty pointless to do it over a buffer that's at most 3 bytes long */
|
||||||
|
static really_inline
|
||||||
|
char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||||
|
u8 *const cached_accept_state, ReportID *const cached_accept_id,
|
||||||
|
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
|
||||||
|
const u8 *end, const u8 **scan_end) {
|
||||||
|
DEBUG_PRINTF("Starting DFA execution in state %u\n",
|
||||||
|
*state & SHENG_STATE_MASK);
|
||||||
|
const u8 *cur_buf = start;
|
||||||
|
if (DEAD_FUNC(*state)) {
|
||||||
|
DEBUG_PRINTF("Dead on arrival\n");
|
||||||
|
*scan_end = end;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||||
|
|
||||||
|
m128 cur_state = set16x8(*state);
|
||||||
|
const m128 *masks = s->shuffle_masks;
|
||||||
|
|
||||||
|
while (likely(cur_buf != end)) {
|
||||||
|
const u8 c = *cur_buf;
|
||||||
|
const m128 shuffle_mask = masks[c];
|
||||||
|
cur_state = pshufb(shuffle_mask, cur_state);
|
||||||
|
const u8 tmp = movd(cur_state);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", tmp, (tmp & 0xF0) >> 4,
|
||||||
|
tmp & 0xF);
|
||||||
|
|
||||||
|
if (unlikely(ACCEPT_FUNC(tmp))) {
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG_STATE_MASK);
|
||||||
|
u64a match_offset = base_offset + (cur_buf - buf) + 1;
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(u64a)(cur_buf - start));
|
||||||
|
*state = tmp;
|
||||||
|
*scan_end = cur_buf;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports(s, cb, ctxt, tmp, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cur_buf++;
|
||||||
|
}
|
||||||
|
*state = movd(cur_state);
|
||||||
|
*scan_end = cur_buf;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
284
src/nfa/sheng_impl4.h
Normal file
284
src/nfa/sheng_impl4.h
Normal file
@ -0,0 +1,284 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In order to use this macro, the following things need to be defined:
|
||||||
|
*
|
||||||
|
* - SHENG_IMPL (name of the Sheng implementation function)
|
||||||
|
* - INTERESTING_FUNC (name of the function checking for accept, accel or dead
|
||||||
|
* states)
|
||||||
|
* - INNER_DEAD_FUNC (name of the inner function checking for dead states)
|
||||||
|
* - OUTER_DEAD_FUNC (name of the outer function checking for dead states)
|
||||||
|
* - INNER_ACCEL_FUNC (name of the inner function checking for accel states)
|
||||||
|
* - OUTER_ACCEL_FUNC (name of the outer function checking for accel states)
|
||||||
|
* - ACCEPT_FUNC (name of the function checking for accept state)
|
||||||
|
* - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match)
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* unrolled 4-byte-at-a-time version.
|
||||||
|
*
|
||||||
|
* we put innerDeadFunc inside interestingFunc() block so that we don't pay for
|
||||||
|
* dead states checking. however, if interestingFunc is dummy, innerDeadFunc
|
||||||
|
* gets lost with it, so we need an additional check outside the
|
||||||
|
* interestingFunc() branch - it's normally dummy so we don't pay for it, but
|
||||||
|
* when interestingFunc is dummy, outerDeadFunc should be set if we want to
|
||||||
|
* check for dead states.
|
||||||
|
*
|
||||||
|
* also, deadFunc only checks the last known state, but since we can't ever get
|
||||||
|
* out of the dead state and we don't really care where we died, it's not a
|
||||||
|
* problem.
|
||||||
|
*/
|
||||||
|
static really_inline
|
||||||
|
char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||||
|
u8 *const cached_accept_state, ReportID *const cached_accept_id,
|
||||||
|
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
|
||||||
|
const u8 *end, const u8 **scan_end) {
|
||||||
|
DEBUG_PRINTF("Starting DFAx4 execution in state %u\n",
|
||||||
|
*state & SHENG_STATE_MASK);
|
||||||
|
const u8 *cur_buf = start;
|
||||||
|
const u8 *min_accel_dist = start;
|
||||||
|
base_offset++;
|
||||||
|
DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start));
|
||||||
|
|
||||||
|
if (INNER_ACCEL_FUNC(*state) || OUTER_ACCEL_FUNC(*state)) {
|
||||||
|
DEBUG_PRINTF("Accel state reached @ 0\n");
|
||||||
|
const union AccelAux *aaux = get_accel(s, *state & SHENG_STATE_MASK);
|
||||||
|
const u8 *new_offset = run_accel(aaux, cur_buf, end);
|
||||||
|
if (new_offset < cur_buf + BAD_ACCEL_DIST) {
|
||||||
|
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
|
||||||
|
} else {
|
||||||
|
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("Next accel chance: %llu\n",
|
||||||
|
(u64a)(min_accel_dist - start));
|
||||||
|
DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf);
|
||||||
|
cur_buf = new_offset;
|
||||||
|
DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start));
|
||||||
|
}
|
||||||
|
if (INNER_DEAD_FUNC(*state) || OUTER_DEAD_FUNC(*state)) {
|
||||||
|
DEBUG_PRINTF("Dead on arrival\n");
|
||||||
|
*scan_end = end;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
|
||||||
|
m128 cur_state = set16x8(*state);
|
||||||
|
const m128 *masks = s->shuffle_masks;
|
||||||
|
|
||||||
|
while (likely(end - cur_buf >= 4)) {
|
||||||
|
const u8 *b1 = cur_buf;
|
||||||
|
const u8 *b2 = cur_buf + 1;
|
||||||
|
const u8 *b3 = cur_buf + 2;
|
||||||
|
const u8 *b4 = cur_buf + 3;
|
||||||
|
const u8 c1 = *b1;
|
||||||
|
const u8 c2 = *b2;
|
||||||
|
const u8 c3 = *b3;
|
||||||
|
const u8 c4 = *b4;
|
||||||
|
|
||||||
|
const m128 shuffle_mask1 = masks[c1];
|
||||||
|
cur_state = pshufb(shuffle_mask1, cur_state);
|
||||||
|
const u8 a1 = movd(cur_state);
|
||||||
|
|
||||||
|
const m128 shuffle_mask2 = masks[c2];
|
||||||
|
cur_state = pshufb(shuffle_mask2, cur_state);
|
||||||
|
const u8 a2 = movd(cur_state);
|
||||||
|
|
||||||
|
const m128 shuffle_mask3 = masks[c3];
|
||||||
|
cur_state = pshufb(shuffle_mask3, cur_state);
|
||||||
|
const u8 a3 = movd(cur_state);
|
||||||
|
|
||||||
|
const m128 shuffle_mask4 = masks[c4];
|
||||||
|
cur_state = pshufb(shuffle_mask4, cur_state);
|
||||||
|
const u8 a4 = movd(cur_state);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a1, (a1 & 0xF0) >> 4, a1 & 0xF);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a2, (a2 & 0xF0) >> 4, a2 & 0xF);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a3, (a3 & 0xF0) >> 4, a3 & 0xF);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?');
|
||||||
|
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a4, (a4 & 0xF0) >> 4, a4 & 0xF);
|
||||||
|
|
||||||
|
if (unlikely(INTERESTING_FUNC(a1, a2, a3, a4))) {
|
||||||
|
if (ACCEPT_FUNC(a1)) {
|
||||||
|
u64a match_offset = base_offset + b1 - buf;
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n",
|
||||||
|
a1 & SHENG_STATE_MASK);
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(s64a)(b1 - start));
|
||||||
|
*scan_end = b1;
|
||||||
|
*state = a1;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports(s, cb, ctxt, a1, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ACCEPT_FUNC(a2)) {
|
||||||
|
u64a match_offset = base_offset + b2 - buf;
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n",
|
||||||
|
a2 & SHENG_STATE_MASK);
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(s64a)(b2 - start));
|
||||||
|
*scan_end = b2;
|
||||||
|
*state = a2;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports(s, cb, ctxt, a2, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ACCEPT_FUNC(a3)) {
|
||||||
|
u64a match_offset = base_offset + b3 - buf;
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n",
|
||||||
|
a3 & SHENG_STATE_MASK);
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(s64a)(b3 - start));
|
||||||
|
*scan_end = b3;
|
||||||
|
*state = a3;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports(s, cb, ctxt, a3, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ACCEPT_FUNC(a4)) {
|
||||||
|
u64a match_offset = base_offset + b4 - buf;
|
||||||
|
DEBUG_PRINTF("Accept state %u reached\n",
|
||||||
|
a4 & SHENG_STATE_MASK);
|
||||||
|
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||||
|
if (STOP_AT_MATCH) {
|
||||||
|
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||||
|
(s64a)(b4 - start));
|
||||||
|
*scan_end = b4;
|
||||||
|
*state = a4;
|
||||||
|
return MO_MATCHES_PENDING;
|
||||||
|
}
|
||||||
|
if (single) {
|
||||||
|
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||||
|
MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fireReports(s, cb, ctxt, a4, match_offset,
|
||||||
|
cached_accept_state, cached_accept_id,
|
||||||
|
0) == MO_HALT_MATCHING) {
|
||||||
|
return MO_HALT_MATCHING;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (INNER_DEAD_FUNC(a4)) {
|
||||||
|
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf));
|
||||||
|
*scan_end = end;
|
||||||
|
*state = a4;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
||||||
|
if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC(a4)) {
|
||||||
|
DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
|
||||||
|
const union AccelAux *aaux =
|
||||||
|
get_accel(s, a4 & SHENG_STATE_MASK);
|
||||||
|
const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
|
||||||
|
if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
|
||||||
|
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
|
||||||
|
} else {
|
||||||
|
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("Next accel chance: %llu\n",
|
||||||
|
(u64a)(min_accel_dist - start));
|
||||||
|
DEBUG_PRINTF("Accel scanned %llu bytes\n",
|
||||||
|
(u64a)(new_offset - cur_buf - 4));
|
||||||
|
cur_buf = new_offset;
|
||||||
|
DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (OUTER_DEAD_FUNC(a4)) {
|
||||||
|
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf));
|
||||||
|
*scan_end = end;
|
||||||
|
*state = a4;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
};
|
||||||
|
if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC(a4)) {
|
||||||
|
DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
|
||||||
|
const union AccelAux *aaux = get_accel(s, a4 & SHENG_STATE_MASK);
|
||||||
|
const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
|
||||||
|
if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
|
||||||
|
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
|
||||||
|
} else {
|
||||||
|
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("Next accel chance: %llu\n",
|
||||||
|
(u64a)(min_accel_dist - start));
|
||||||
|
DEBUG_PRINTF("Accel scanned %llu bytes\n",
|
||||||
|
(u64a)(new_offset - cur_buf - 4));
|
||||||
|
cur_buf = new_offset;
|
||||||
|
DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
cur_buf += 4;
|
||||||
|
}
|
||||||
|
*state = movd(cur_state);
|
||||||
|
*scan_end = cur_buf;
|
||||||
|
return MO_CONTINUE_MATCHING;
|
||||||
|
}
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -26,44 +26,45 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** \file
|
#ifndef SHENG_INTERNAL_H_
|
||||||
* \brief LimEx NFA: 512-bit SIMD runtime implementations.
|
#define SHENG_INTERNAL_H_
|
||||||
*/
|
|
||||||
|
|
||||||
//#define DEBUG_INPUT
|
|
||||||
//#define DEBUG_EXCEPTIONS
|
|
||||||
|
|
||||||
#include "limex.h"
|
|
||||||
|
|
||||||
#include "accel.h"
|
|
||||||
#include "limex_internal.h"
|
|
||||||
#include "nfa_internal.h"
|
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
#include "util/bitutils.h"
|
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
|
|
||||||
// Common code
|
#define SHENG_STATE_ACCEPT 0x10
|
||||||
#include "limex_runtime.h"
|
#define SHENG_STATE_DEAD 0x20
|
||||||
|
#define SHENG_STATE_ACCEL 0x40
|
||||||
|
#define SHENG_STATE_MASK 0xF
|
||||||
|
#define SHENG_STATE_FLAG_MASK 0x70
|
||||||
|
|
||||||
#define SIZE 512
|
#define SHENG_FLAG_SINGLE_REPORT 0x1
|
||||||
#define STATE_T m512
|
#define SHENG_FLAG_CAN_DIE 0x2
|
||||||
#include "limex_exceptional.h"
|
#define SHENG_FLAG_HAS_ACCEL 0x4
|
||||||
|
|
||||||
#define SIZE 512
|
struct report_list {
|
||||||
#define STATE_T m512
|
u32 count;
|
||||||
#include "limex_state_impl.h"
|
ReportID report[];
|
||||||
|
};
|
||||||
|
|
||||||
#define SIZE 512
|
struct sstate_aux {
|
||||||
#define STATE_T m512
|
u32 accept;
|
||||||
#define INLINE_ATTR really_inline
|
u32 accept_eod;
|
||||||
#include "limex_common_impl.h"
|
u32 accel;
|
||||||
|
u32 top;
|
||||||
|
};
|
||||||
|
|
||||||
#define SIZE 512
|
struct sheng {
|
||||||
#define STATE_T m512
|
m128 shuffle_masks[256];
|
||||||
#define SHIFT 6
|
u32 length;
|
||||||
#include "limex_runtime_impl.h"
|
u32 aux_offset;
|
||||||
|
u32 report_offset;
|
||||||
|
u32 accel_offset;
|
||||||
|
u8 n_states;
|
||||||
|
u8 anchored;
|
||||||
|
u8 floating;
|
||||||
|
u8 flags;
|
||||||
|
ReportID report;
|
||||||
|
};
|
||||||
|
|
||||||
#define SIZE 512
|
#endif /* SHENG_INTERNAL_H_ */
|
||||||
#define STATE_T m512
|
|
||||||
#define SHIFT 7
|
|
||||||
#include "limex_runtime_impl.h"
|
|
541
src/nfa/shengcompile.cpp
Normal file
541
src/nfa/shengcompile.cpp
Normal file
@ -0,0 +1,541 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "shengcompile.h"
|
||||||
|
|
||||||
|
#include "accel.h"
|
||||||
|
#include "accelcompile.h"
|
||||||
|
#include "shufticompile.h"
|
||||||
|
#include "trufflecompile.h"
|
||||||
|
#include "util/alloc.h"
|
||||||
|
#include "util/bitutils.h"
|
||||||
|
#include "util/charreach.h"
|
||||||
|
#include "util/compare.h"
|
||||||
|
#include "util/container.h"
|
||||||
|
#include "util/order_check.h"
|
||||||
|
#include "util/report_manager.h"
|
||||||
|
#include "util/unaligned.h"
|
||||||
|
|
||||||
|
#include "grey.h"
|
||||||
|
#include "nfa_internal.h"
|
||||||
|
#include "sheng_internal.h"
|
||||||
|
#include "ue2common.h"
|
||||||
|
#include "util/compile_context.h"
|
||||||
|
#include "util/make_unique.h"
|
||||||
|
#include "util/verify_types.h"
|
||||||
|
#include "util/simd_utils.h"
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
#include <boost/range/adaptor/map.hpp>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using boost::adaptors::map_keys;
|
||||||
|
|
||||||
|
namespace ue2 {
|
||||||
|
|
||||||
|
#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
|
||||||
|
|
||||||
|
/** Maximum tolerated number of escape character from an accel state.
|
||||||
|
* This is larger than nfa, as we don't have a budget and the nfa cheats on stop
|
||||||
|
* characters for sets of states */
|
||||||
|
#define ACCEL_DFA_MAX_STOP_CHAR 160
|
||||||
|
|
||||||
|
/** Maximum tolerated number of escape character from a sds accel state. Larger
|
||||||
|
* than normal states as accelerating sds is important. Matches NFA value */
|
||||||
|
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
|
||||||
|
|
||||||
|
struct dfa_info {
|
||||||
|
accel_dfa_build_strat &strat;
|
||||||
|
raw_dfa &raw;
|
||||||
|
vector<dstate> &states;
|
||||||
|
dstate &floating;
|
||||||
|
dstate &anchored;
|
||||||
|
bool can_die;
|
||||||
|
|
||||||
|
explicit dfa_info(accel_dfa_build_strat &s)
|
||||||
|
: strat(s), raw(strat.get_raw()), states(raw.states),
|
||||||
|
floating(states[raw.start_floating]),
|
||||||
|
anchored(states[raw.start_anchored]), can_die(dfaCanDie(raw)) {}
|
||||||
|
|
||||||
|
// returns adjusted size
|
||||||
|
size_t size() const {
|
||||||
|
return can_die ? states.size() : states.size() - 1;
|
||||||
|
}
|
||||||
|
// expects adjusted index
|
||||||
|
dstate &operator[](dstate_id_t idx) {
|
||||||
|
return states[raw_id(idx)];
|
||||||
|
}
|
||||||
|
dstate &top(dstate_id_t idx) {
|
||||||
|
if (isDead(idx)) {
|
||||||
|
return floating;
|
||||||
|
}
|
||||||
|
return next(idx, TOP);
|
||||||
|
}
|
||||||
|
dstate &next(dstate_id_t idx, u16 chr) {
|
||||||
|
auto &src = (*this)[idx];
|
||||||
|
auto next_id = src.next[raw.alpha_remap[chr]];
|
||||||
|
return states[next_id];
|
||||||
|
}
|
||||||
|
// get original idx from adjusted idx
|
||||||
|
dstate_id_t raw_id(dstate_id_t idx) {
|
||||||
|
assert(idx < size());
|
||||||
|
// if DFA can't die, shift all indices left by 1
|
||||||
|
return can_die ? idx : idx + 1;
|
||||||
|
}
|
||||||
|
bool isDead(dstate &state) {
|
||||||
|
return raw_id(state.impl_id) == DEAD_STATE;
|
||||||
|
}
|
||||||
|
bool isDead(dstate_id_t idx) {
|
||||||
|
return raw_id(idx) == DEAD_STATE;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static bool dfaCanDie(raw_dfa &rdfa) {
|
||||||
|
for (unsigned chr = 0; chr < 256; chr++) {
|
||||||
|
for (dstate_id_t state = 0; state < rdfa.states.size(); state++) {
|
||||||
|
auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]];
|
||||||
|
if (succ == DEAD_STATE) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
struct raw_report_list {
|
||||||
|
flat_set<ReportID> reports;
|
||||||
|
|
||||||
|
raw_report_list(const flat_set<ReportID> &reports_in,
|
||||||
|
const ReportManager &rm, bool do_remap) {
|
||||||
|
if (do_remap) {
|
||||||
|
for (auto &id : reports_in) {
|
||||||
|
reports.insert(rm.getProgramOffset(id));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
reports = reports_in;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator<(const raw_report_list &b) const {
|
||||||
|
return reports < b.reports;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct raw_report_info_impl : public raw_report_info {
|
||||||
|
vector<raw_report_list> rl;
|
||||||
|
u32 getReportListSize() const override;
|
||||||
|
size_t size() const override;
|
||||||
|
void fillReportLists(NFA *n, size_t base_offset,
|
||||||
|
std::vector<u32> &ro /* out */) const override;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 raw_report_info_impl::getReportListSize() const {
|
||||||
|
u32 rv = 0;
|
||||||
|
|
||||||
|
for (const auto &reps : rl) {
|
||||||
|
rv += sizeof(report_list);
|
||||||
|
rv += sizeof(ReportID) * reps.reports.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t raw_report_info_impl::size() const {
|
||||||
|
return rl.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
|
||||||
|
vector<u32> &ro) const {
|
||||||
|
for (const auto &reps : rl) {
|
||||||
|
ro.push_back(base_offset);
|
||||||
|
|
||||||
|
report_list *p = (report_list *)((char *)n + base_offset);
|
||||||
|
|
||||||
|
u32 i = 0;
|
||||||
|
for (const ReportID report : reps.reports) {
|
||||||
|
p->report[i++] = report;
|
||||||
|
}
|
||||||
|
p->count = verify_u32(reps.reports.size());
|
||||||
|
|
||||||
|
base_offset += sizeof(report_list);
|
||||||
|
base_offset += sizeof(ReportID) * reps.reports.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unique_ptr<raw_report_info> sheng_build_strat::gatherReports(
|
||||||
|
vector<u32> &reports,
|
||||||
|
vector<u32> &reports_eod,
|
||||||
|
u8 *isSingleReport,
|
||||||
|
ReportID *arbReport) const {
|
||||||
|
DEBUG_PRINTF("gathering reports\n");
|
||||||
|
|
||||||
|
const bool remap_reports = has_managed_reports(rdfa.kind);
|
||||||
|
|
||||||
|
auto ri = ue2::make_unique<raw_report_info_impl>();
|
||||||
|
map<raw_report_list, u32> rev;
|
||||||
|
|
||||||
|
for (const dstate &s : rdfa.states) {
|
||||||
|
if (s.reports.empty()) {
|
||||||
|
reports.push_back(MO_INVALID_IDX);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
raw_report_list rrl(s.reports, rm, remap_reports);
|
||||||
|
DEBUG_PRINTF("non empty r\n");
|
||||||
|
if (rev.find(rrl) != rev.end()) {
|
||||||
|
reports.push_back(rev[rrl]);
|
||||||
|
} else {
|
||||||
|
DEBUG_PRINTF("adding to rl %zu\n", ri->size());
|
||||||
|
rev[rrl] = ri->size();
|
||||||
|
reports.push_back(ri->size());
|
||||||
|
ri->rl.push_back(rrl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const dstate &s : rdfa.states) {
|
||||||
|
if (s.reports_eod.empty()) {
|
||||||
|
reports_eod.push_back(MO_INVALID_IDX);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("non empty r eod\n");
|
||||||
|
raw_report_list rrl(s.reports_eod, rm, remap_reports);
|
||||||
|
if (rev.find(rrl) != rev.end()) {
|
||||||
|
reports_eod.push_back(rev[rrl]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size());
|
||||||
|
rev[rrl] = ri->size();
|
||||||
|
reports_eod.push_back(ri->size());
|
||||||
|
ri->rl.push_back(rrl);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(!ri->rl.empty()); /* all components should be able to generate
|
||||||
|
reports */
|
||||||
|
if (!ri->rl.empty()) {
|
||||||
|
*arbReport = *ri->rl.begin()->reports.begin();
|
||||||
|
} else {
|
||||||
|
*arbReport = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if we have only a single report id generated from all accepts (not eod)
|
||||||
|
* we can take some short cuts */
|
||||||
|
set<ReportID> reps;
|
||||||
|
|
||||||
|
for (u32 rl_index : reports) {
|
||||||
|
if (rl_index == MO_INVALID_IDX) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
assert(rl_index < ri->size());
|
||||||
|
insert(&reps, ri->rl[rl_index].reports);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (reps.size() == 1) {
|
||||||
|
*isSingleReport = 1;
|
||||||
|
*arbReport = *reps.begin();
|
||||||
|
DEBUG_PRINTF("single -- %u\n", *arbReport);
|
||||||
|
} else {
|
||||||
|
*isSingleReport = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return move(ri);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 sheng_build_strat::max_allowed_offset_accel() const {
|
||||||
|
return ACCEL_DFA_MAX_OFFSET_DEPTH;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 sheng_build_strat::max_stop_char() const {
|
||||||
|
return ACCEL_DFA_MAX_STOP_CHAR;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 sheng_build_strat::max_floating_stop_char() const {
|
||||||
|
return ACCEL_DFA_MAX_FLOATING_STOP_CHAR;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t sheng_build_strat::accelSize() const {
|
||||||
|
return sizeof(AccelAux);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
static really_inline
|
||||||
|
void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) {
|
||||||
|
stringstream o;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < sz; i++) {
|
||||||
|
o.width(2);
|
||||||
|
o << (buf[i] & SHENG_STATE_MASK) << " ";
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static
|
||||||
|
void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||||
|
set<dstate_id_t> *accel_states) {
|
||||||
|
for (dstate_id_t i : accel_escape_info | map_keys) {
|
||||||
|
accel_states->insert(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
u8 getShengState(dstate &state, dfa_info &info,
|
||||||
|
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
|
u8 s = state.impl_id;
|
||||||
|
if (!state.reports.empty()) {
|
||||||
|
s |= SHENG_STATE_ACCEPT;
|
||||||
|
}
|
||||||
|
if (info.isDead(state)) {
|
||||||
|
s |= SHENG_STATE_DEAD;
|
||||||
|
}
|
||||||
|
if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) {
|
||||||
|
s |= SHENG_STATE_ACCEL;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void fillAccelAux(struct NFA *n, dfa_info &info,
|
||||||
|
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
|
DEBUG_PRINTF("Filling accel aux structures\n");
|
||||||
|
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||||
|
u32 offset = s->accel_offset;
|
||||||
|
|
||||||
|
for (dstate_id_t i = 0; i < info.size(); i++) {
|
||||||
|
dstate_id_t state_id = info.raw_id(i);
|
||||||
|
if (accelInfo.find(state_id) != accelInfo.end()) {
|
||||||
|
s->flags |= SHENG_FLAG_HAS_ACCEL;
|
||||||
|
AccelAux *aux = (AccelAux *)((char *)n + offset);
|
||||||
|
info.strat.buildAccel(state_id, accelInfo[state_id], aux);
|
||||||
|
sstate_aux *saux =
|
||||||
|
(sstate_aux *)((char *)n + s->aux_offset) + state_id;
|
||||||
|
saux->accel = offset;
|
||||||
|
DEBUG_PRINTF("Accel offset: %u\n", offset);
|
||||||
|
offset += ROUNDUP_N(sizeof(AccelAux), alignof(AccelAux));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void populateBasicInfo(struct NFA *n, dfa_info &info,
|
||||||
|
map<dstate_id_t, AccelScheme> &accelInfo, u32 aux_offset,
|
||||||
|
u32 report_offset, u32 accel_offset, u32 total_size,
|
||||||
|
u32 dfa_size) {
|
||||||
|
n->length = total_size;
|
||||||
|
n->scratchStateSize = 1;
|
||||||
|
n->streamStateSize = 1;
|
||||||
|
n->nPositions = info.size();
|
||||||
|
n->type = SHENG_NFA_0;
|
||||||
|
n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0;
|
||||||
|
|
||||||
|
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||||
|
s->aux_offset = aux_offset;
|
||||||
|
s->report_offset = report_offset;
|
||||||
|
s->accel_offset = accel_offset;
|
||||||
|
s->n_states = info.size();
|
||||||
|
s->length = dfa_size;
|
||||||
|
s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
|
||||||
|
|
||||||
|
s->anchored = getShengState(info.anchored, info, accelInfo);
|
||||||
|
s->floating = getShengState(info.floating, info, accelInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void fillTops(NFA *n, dfa_info &info, dstate_id_t id,
|
||||||
|
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
|
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||||
|
u32 aux_base = s->aux_offset;
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Filling tops for state %u\n", id);
|
||||||
|
|
||||||
|
sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id;
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id,
|
||||||
|
(char *)aux - (char *)n);
|
||||||
|
|
||||||
|
/* we could conceivably end up in an accept/dead state on a top event,
|
||||||
|
* so mark top as accept/dead state if it indeed is.
|
||||||
|
*/
|
||||||
|
auto &top_state = info.top(id);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id);
|
||||||
|
|
||||||
|
aux->top = getShengState(top_state, info, accelInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports,
|
||||||
|
vector<u32> &reports_eod, vector<u32> &report_offsets) {
|
||||||
|
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||||
|
u32 aux_base = s->aux_offset;
|
||||||
|
auto raw_id = info.raw_id(id);
|
||||||
|
|
||||||
|
auto &state = info[id];
|
||||||
|
|
||||||
|
sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id;
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Filling aux and report structures for state %u\n", id);
|
||||||
|
DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id,
|
||||||
|
(char *)aux - (char *)n);
|
||||||
|
|
||||||
|
aux->accept = state.reports.empty() ? 0 : report_offsets[reports[raw_id]];
|
||||||
|
aux->accept_eod =
|
||||||
|
state.reports_eod.empty() ? 0 : report_offsets[reports_eod[raw_id]];
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Report list offset: %u\n", aux->accept);
|
||||||
|
DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void fillSingleReport(NFA *n, ReportID r_id) {
|
||||||
|
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Single report ID: %u\n", r_id);
|
||||||
|
s->report = r_id;
|
||||||
|
s->flags |= SHENG_FLAG_SINGLE_REPORT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void createShuffleMasks(sheng *s, dfa_info &info,
|
||||||
|
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||||
|
for (u16 chr = 0; chr < 256; chr++) {
|
||||||
|
u8 buf[16] = {0};
|
||||||
|
|
||||||
|
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||||
|
auto &succ_state = info.next(idx, chr);
|
||||||
|
|
||||||
|
buf[idx] = getShengState(succ_state, info, accelInfo);
|
||||||
|
}
|
||||||
|
#ifdef DEBUG
|
||||||
|
dumpShuffleMask(chr, buf, sizeof(buf));
|
||||||
|
#endif
|
||||||
|
m128 mask = loadu128(buf);
|
||||||
|
s->shuffle_masks[chr] = mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool has_accel_sheng(const NFA *nfa) {
|
||||||
|
const sheng *s = (const sheng *)getImplNfa(nfa);
|
||||||
|
return s->flags & SHENG_FLAG_HAS_ACCEL;
|
||||||
|
}
|
||||||
|
|
||||||
|
aligned_unique_ptr<NFA> shengCompile(raw_dfa &raw,
|
||||||
|
const CompileContext &cc,
|
||||||
|
const ReportManager &rm,
|
||||||
|
set<dstate_id_t> *accel_states) {
|
||||||
|
if (!cc.grey.allowSheng) {
|
||||||
|
DEBUG_PRINTF("Sheng is not allowed!\n");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
sheng_build_strat strat(raw, rm);
|
||||||
|
dfa_info info(strat);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
|
||||||
|
raw.start_anchored, raw.start_floating);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
|
||||||
|
info.can_die ? "can" : "cannot", info.size());
|
||||||
|
if (info.size() > 16) {
|
||||||
|
DEBUG_PRINTF("Too many states\n");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
|
||||||
|
* mode with our semantics */
|
||||||
|
raw.stripExtraEodReports();
|
||||||
|
}
|
||||||
|
auto accelInfo = strat.getAccelInfo(cc.grey);
|
||||||
|
|
||||||
|
// set impl_id of each dfa state
|
||||||
|
for (dstate_id_t i = 0; i < info.size(); i++) {
|
||||||
|
info[i].impl_id = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n",
|
||||||
|
info.anchored.impl_id, info.floating.impl_id);
|
||||||
|
|
||||||
|
u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(sheng));
|
||||||
|
vector<u32> reports, eod_reports, report_offsets;
|
||||||
|
u8 isSingle = 0;
|
||||||
|
ReportID single_report = 0;
|
||||||
|
|
||||||
|
auto ri =
|
||||||
|
strat.gatherReports(reports, eod_reports, &isSingle, &single_report);
|
||||||
|
|
||||||
|
u32 total_aux = sizeof(sstate_aux) * info.size();
|
||||||
|
u32 total_accel = strat.accelSize() * accelInfo.size();
|
||||||
|
u32 total_reports = ri->getReportListSize();
|
||||||
|
|
||||||
|
u32 reports_offset = nfa_size + total_aux;
|
||||||
|
u32 accel_offset =
|
||||||
|
ROUNDUP_N(reports_offset + total_reports, alignof(AccelAux));
|
||||||
|
u32 total_size = ROUNDUP_N(accel_offset + total_accel, 64);
|
||||||
|
|
||||||
|
DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n",
|
||||||
|
nfa_size, total_aux, total_reports, total_accel, total_size);
|
||||||
|
|
||||||
|
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
|
||||||
|
|
||||||
|
populateBasicInfo(nfa.get(), info, accelInfo, nfa_size, reports_offset,
|
||||||
|
accel_offset, total_size, total_size - sizeof(NFA));
|
||||||
|
|
||||||
|
DEBUG_PRINTF("Setting up aux and report structures\n");
|
||||||
|
|
||||||
|
ri->fillReportLists(nfa.get(), reports_offset, report_offsets);
|
||||||
|
|
||||||
|
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||||
|
fillTops(nfa.get(), info, idx, accelInfo);
|
||||||
|
fillAux(nfa.get(), info, idx, reports, eod_reports, report_offsets);
|
||||||
|
}
|
||||||
|
if (isSingle) {
|
||||||
|
fillSingleReport(nfa.get(), single_report);
|
||||||
|
}
|
||||||
|
|
||||||
|
fillAccelAux(nfa.get(), info, accelInfo);
|
||||||
|
|
||||||
|
if (accel_states) {
|
||||||
|
fillAccelOut(accelInfo, accel_states);
|
||||||
|
}
|
||||||
|
|
||||||
|
createShuffleMasks((sheng *)getMutableImplNfa(nfa.get()), info, accelInfo);
|
||||||
|
|
||||||
|
return nfa;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ue2
|
80
src/nfa/shengcompile.h
Normal file
80
src/nfa/shengcompile.h
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SHENGCOMPILE_H_
|
||||||
|
#define SHENGCOMPILE_H_
|
||||||
|
|
||||||
|
#include "accel_dfa_build_strat.h"
|
||||||
|
#include "rdfa.h"
|
||||||
|
#include "util/alloc.h"
|
||||||
|
#include "util/charreach.h"
|
||||||
|
#include "util/ue2_containers.h"
|
||||||
|
|
||||||
|
struct NFA;
|
||||||
|
|
||||||
|
namespace ue2 {
|
||||||
|
|
||||||
|
class ReportManager;
|
||||||
|
struct CompileContext;
|
||||||
|
struct raw_dfa;
|
||||||
|
|
||||||
|
class sheng_build_strat : public accel_dfa_build_strat {
|
||||||
|
public:
|
||||||
|
sheng_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in)
|
||||||
|
: accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {}
|
||||||
|
raw_dfa &get_raw() const override { return rdfa; }
|
||||||
|
std::unique_ptr<raw_report_info> gatherReports(
|
||||||
|
std::vector<u32> &reports /* out */,
|
||||||
|
std::vector<u32> &reports_eod /* out */,
|
||||||
|
u8 *isSingleReport /* out */,
|
||||||
|
ReportID *arbReport /* out */) const override;
|
||||||
|
size_t accelSize(void) const override;
|
||||||
|
u32 max_allowed_offset_accel() const override;
|
||||||
|
u32 max_stop_char() const override;
|
||||||
|
u32 max_floating_stop_char() const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
raw_dfa &rdfa;
|
||||||
|
};
|
||||||
|
|
||||||
|
aligned_unique_ptr<NFA>
|
||||||
|
shengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm,
|
||||||
|
std::set<dstate_id_t> *accel_states = nullptr);
|
||||||
|
|
||||||
|
struct sheng_escape_info {
|
||||||
|
CharReach outs;
|
||||||
|
CharReach outs2_single;
|
||||||
|
flat_set<std::pair<u8, u8>> outs2;
|
||||||
|
bool outs2_broken = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool has_accel_sheng(const NFA *nfa);
|
||||||
|
|
||||||
|
} // namespace ue2
|
||||||
|
|
||||||
|
#endif /* SHENGCOMPILE_H_ */
|
265
src/nfa/shengdump.cpp
Normal file
265
src/nfa/shengdump.cpp
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include "shengdump.h"
|
||||||
|
|
||||||
|
#include "accel_dump.h"
|
||||||
|
#include "nfa_dump_internal.h"
|
||||||
|
#include "nfa_internal.h"
|
||||||
|
#include "sheng_internal.h"
|
||||||
|
#include "rdfa.h"
|
||||||
|
#include "ue2common.h"
|
||||||
|
#include "util/charreach.h"
|
||||||
|
#include "util/dump_charclass.h"
|
||||||
|
#include "util/simd_utils.h"
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef DUMP_SUPPORT
|
||||||
|
#error No dump support!
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
namespace ue2 {
|
||||||
|
|
||||||
|
static
|
||||||
|
const sstate_aux *get_aux(const NFA *n, dstate_id_t i) {
|
||||||
|
assert(n && isShengType(n->type));
|
||||||
|
|
||||||
|
const sheng *s = (const sheng *)getImplNfa(n);
|
||||||
|
const sstate_aux *aux_base =
|
||||||
|
(const sstate_aux *)((const char *)n + s->aux_offset);
|
||||||
|
|
||||||
|
const sstate_aux *aux = aux_base + i;
|
||||||
|
|
||||||
|
assert((const char *)aux < (const char *)s + s->length);
|
||||||
|
|
||||||
|
return aux;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpHeader(FILE *f, const sheng *s) {
|
||||||
|
fprintf(f, "number of states: %u, DFA engine size: %u\n", s->n_states,
|
||||||
|
s->length);
|
||||||
|
fprintf(f, "aux base offset: %u, reports base offset: %u, "
|
||||||
|
"accel offset: %u\n",
|
||||||
|
s->aux_offset, s->report_offset, s->accel_offset);
|
||||||
|
fprintf(f, "anchored start state: %u, floating start state: %u\n",
|
||||||
|
s->anchored & SHENG_STATE_MASK, s->floating & SHENG_STATE_MASK);
|
||||||
|
fprintf(f, "has accel: %u can die: %u single report: %u\n",
|
||||||
|
!!(s->flags & SHENG_FLAG_HAS_ACCEL),
|
||||||
|
!!(s->flags & SHENG_FLAG_CAN_DIE),
|
||||||
|
!!(s->flags & SHENG_FLAG_SINGLE_REPORT));
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpAux(FILE *f, u32 state, const sstate_aux *aux) {
|
||||||
|
fprintf(f, "state id: %u, reports offset: %u, EOD reports offset: %u, "
|
||||||
|
"accel offset: %u, top: %u\n",
|
||||||
|
state, aux->accept, aux->accept_eod, aux->accel,
|
||||||
|
aux->top & SHENG_STATE_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpReports(FILE *f, const report_list *rl) {
|
||||||
|
fprintf(f, "reports count: %u\n", rl->count);
|
||||||
|
for (u32 i = 0; i < rl->count; i++) {
|
||||||
|
fprintf(f, " report: %u, report ID: %u\n", i, rl->report[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpMasks(FILE *f, const sheng *s) {
|
||||||
|
for (u32 chr = 0; chr < 256; chr++) {
|
||||||
|
u8 buf[16];
|
||||||
|
m128 shuffle_mask = s->shuffle_masks[chr];
|
||||||
|
store128(buf, shuffle_mask);
|
||||||
|
|
||||||
|
fprintf(f, "%3u: ", chr);
|
||||||
|
for (u32 pos = 0; pos < 16; pos++) {
|
||||||
|
u8 c = buf[pos];
|
||||||
|
if (c & SHENG_STATE_FLAG_MASK) {
|
||||||
|
fprintf(f, "%2u* ", c & SHENG_STATE_MASK);
|
||||||
|
} else {
|
||||||
|
fprintf(f, "%2u ", c & SHENG_STATE_MASK);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(f, "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void nfaExecSheng0_dumpText(const NFA *nfa, FILE *f) {
|
||||||
|
assert(nfa->type == SHENG_NFA_0);
|
||||||
|
const sheng *s = (const sheng *)getImplNfa(nfa);
|
||||||
|
|
||||||
|
fprintf(f, "sheng DFA\n");
|
||||||
|
dumpHeader(f, s);
|
||||||
|
|
||||||
|
for (u32 state = 0; state < s->n_states; state++) {
|
||||||
|
const sstate_aux *aux = get_aux(nfa, state);
|
||||||
|
dumpAux(f, state, aux);
|
||||||
|
if (aux->accept) {
|
||||||
|
fprintf(f, "report list:\n");
|
||||||
|
const report_list *rl =
|
||||||
|
(const report_list *)((const char *)nfa + aux->accept);
|
||||||
|
dumpReports(f, rl);
|
||||||
|
}
|
||||||
|
if (aux->accept_eod) {
|
||||||
|
fprintf(f, "EOD report list:\n");
|
||||||
|
const report_list *rl =
|
||||||
|
(const report_list *)((const char *)nfa + aux->accept_eod);
|
||||||
|
dumpReports(f, rl);
|
||||||
|
}
|
||||||
|
if (aux->accel) {
|
||||||
|
fprintf(f, "accel:\n");
|
||||||
|
const AccelAux *accel =
|
||||||
|
(const AccelAux *)((const char *)nfa + aux->accel);
|
||||||
|
dumpAccelInfo(f, *accel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "\n");
|
||||||
|
|
||||||
|
dumpMasks(f, s);
|
||||||
|
|
||||||
|
fprintf(f, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dumpDotPreambleDfa(FILE *f) {
|
||||||
|
dumpDotPreamble(f);
|
||||||
|
|
||||||
|
// DFA specific additions.
|
||||||
|
fprintf(f, "STARTF [style=invis];\n");
|
||||||
|
fprintf(f, "STARTA [style=invis];\n");
|
||||||
|
fprintf(f, "0 [style=invis];\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void describeNode(const NFA *n, const sheng *s, u16 i, FILE *f) {
|
||||||
|
const sstate_aux *aux = get_aux(n, i);
|
||||||
|
|
||||||
|
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
|
||||||
|
"label = \"%u\" ]; \n",
|
||||||
|
i, i);
|
||||||
|
|
||||||
|
if (aux->accept_eod) {
|
||||||
|
fprintf(f, "%u [ color = darkorchid ];\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->accept) {
|
||||||
|
fprintf(f, "%u [ shape = doublecircle ];\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aux->top && (aux->top & SHENG_STATE_MASK) != i) {
|
||||||
|
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
|
||||||
|
aux->top & SHENG_STATE_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == (s->anchored & SHENG_STATE_MASK)) {
|
||||||
|
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == (s->floating & SHENG_STATE_MASK)) {
|
||||||
|
fprintf(f, "STARTF -> %u [color = red ]\n", i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void describeEdge(FILE *f, const u16 *t, u16 i) {
|
||||||
|
for (u16 s = 0; s < N_CHARS; s++) {
|
||||||
|
if (!t[s]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 ss;
|
||||||
|
for (ss = 0; ss < s; ss++) {
|
||||||
|
if (t[s] == t[ss]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ss != s) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
CharReach reach;
|
||||||
|
for (ss = s; ss < 256; ss++) {
|
||||||
|
if (t[s] == t[ss]) {
|
||||||
|
reach.set(ss);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "%u -> %u [ label = \"", i, t[s]);
|
||||||
|
|
||||||
|
describeClass(f, reach, 5, CC_OUT_DOT);
|
||||||
|
|
||||||
|
fprintf(f, "\" ];\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void shengGetTransitions(const NFA *n, u16 state, u16 *t) {
|
||||||
|
assert(isShengType(n->type));
|
||||||
|
const sheng *s = (const sheng *)getImplNfa(n);
|
||||||
|
const sstate_aux *aux = get_aux(n, state);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < N_CHARS; i++) {
|
||||||
|
u8 buf[16];
|
||||||
|
m128 shuffle_mask = s->shuffle_masks[i];
|
||||||
|
|
||||||
|
store128(buf, shuffle_mask);
|
||||||
|
|
||||||
|
t[i] = buf[state] & SHENG_STATE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
t[TOP] = aux->top & SHENG_STATE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nfaExecSheng0_dumpDot(const NFA *nfa, FILE *f, const string &) {
|
||||||
|
assert(nfa->type == SHENG_NFA_0);
|
||||||
|
const sheng *s = (const sheng *)getImplNfa(nfa);
|
||||||
|
|
||||||
|
dumpDotPreambleDfa(f);
|
||||||
|
|
||||||
|
for (u16 i = 1; i < s->n_states; i++) {
|
||||||
|
describeNode(nfa, s, i, f);
|
||||||
|
|
||||||
|
u16 t[ALPHABET_SIZE];
|
||||||
|
|
||||||
|
shengGetTransitions(nfa, i, t);
|
||||||
|
|
||||||
|
describeEdge(f, t, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(f, "}\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ue2
|
@ -26,15 +26,24 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "simd_utils_ssse3.h"
|
#ifndef SHENGDUMP_H_
|
||||||
|
#define SHENGDUMP_H_
|
||||||
|
|
||||||
const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = {
|
#ifdef DUMP_SUPPORT
|
||||||
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
|
|
||||||
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
|
|
||||||
|
|
||||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
#include <cstdio>
|
||||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
#include <string>
|
||||||
|
|
||||||
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
|
struct NFA;
|
||||||
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
|
|
||||||
};
|
namespace ue2 {
|
||||||
|
|
||||||
|
void nfaExecSheng0_dumpDot(const struct NFA *nfa, FILE *file,
|
||||||
|
const std::string &base);
|
||||||
|
void nfaExecSheng0_dumpText(const struct NFA *nfa, FILE *file);
|
||||||
|
|
||||||
|
} // namespace ue2
|
||||||
|
|
||||||
|
#endif // DUMP_SUPPORT
|
||||||
|
|
||||||
|
#endif /* SHENGDUMP_H_ */
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -40,8 +40,6 @@
|
|||||||
|
|
||||||
#include "shufti_common.h"
|
#include "shufti_common.h"
|
||||||
|
|
||||||
#include "util/simd_utils_ssse3.h"
|
|
||||||
|
|
||||||
/** \brief Naive byte-by-byte implementation. */
|
/** \brief Naive byte-by-byte implementation. */
|
||||||
static really_inline
|
static really_inline
|
||||||
const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf,
|
const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf,
|
||||||
@ -235,7 +233,7 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi,
|
|||||||
|
|
||||||
m128 c2_lo = pshufb(mask2_lo, chars_lo);
|
m128 c2_lo = pshufb(mask2_lo, chars_lo);
|
||||||
m128 c2_hi = pshufb(mask2_hi, chars_hi);
|
m128 c2_hi = pshufb(mask2_hi, chars_hi);
|
||||||
m128 t2 = or128(t, shiftRight8Bits(or128(c2_lo, c2_hi)));
|
m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1));
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
DEBUG_PRINTF(" c2_lo: "); dumpMsk128(c2_lo); printf("\n");
|
DEBUG_PRINTF(" c2_lo: "); dumpMsk128(c2_lo); printf("\n");
|
||||||
@ -472,7 +470,7 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi,
|
|||||||
|
|
||||||
m256 c2_lo = vpshufb(mask2_lo, chars_lo);
|
m256 c2_lo = vpshufb(mask2_lo, chars_lo);
|
||||||
m256 c2_hi = vpshufb(mask2_hi, chars_hi);
|
m256 c2_hi = vpshufb(mask2_hi, chars_hi);
|
||||||
m256 t2 = or256(t, shift256Right8Bits(or256(c2_lo, c2_hi)));
|
m256 t2 = or256(t, rshift128_m256(or256(c2_lo, c2_hi), 1));
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
DEBUG_PRINTF(" c2_lo: "); dumpMsk256(c2_lo); printf("\n");
|
DEBUG_PRINTF(" c2_lo: "); dumpMsk256(c2_lo); printf("\n");
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -34,7 +34,6 @@
|
|||||||
#include "util/bitutils.h"
|
#include "util/bitutils.h"
|
||||||
#include "util/simd_utils.h"
|
#include "util/simd_utils.h"
|
||||||
#include "util/unaligned.h"
|
#include "util/unaligned.h"
|
||||||
#include "util/simd_utils_ssse3.h"
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Common stuff for all versions of shufti (single, multi and multidouble)
|
* Common stuff for all versions of shufti (single, multi and multidouble)
|
||||||
@ -94,7 +93,7 @@ DUMP_MSK(128)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define GET_LO_4(chars) and128(chars, low4bits)
|
#define GET_LO_4(chars) and128(chars, low4bits)
|
||||||
#define GET_HI_4(chars) rshift2x64(andnot128(low4bits, chars), 4)
|
#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
|
u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
|
||||||
@ -120,7 +119,7 @@ DUMP_MSK(256)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define GET_LO_4(chars) and256(chars, low4bits)
|
#define GET_LO_4(chars) and256(chars, low4bits)
|
||||||
#define GET_HI_4(chars) rshift4x64(andnot256(low4bits, chars), 4)
|
#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4)
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,
|
u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user