Merge branch develop into master

This commit is contained in:
Matthew Barr 2017-01-20 14:16:41 +11:00
commit f4c2fc3b9b
304 changed files with 22271 additions and 7404 deletions

View File

@ -2,8 +2,40 @@
This is a list of notable changes to Hyperscan, in reverse chronological order.
## [4.3.2] 2016-11-15
## [4.4.0] 2017-01-20
- Introduce the "fat runtime" build. This will build several variants of the
Hyperscan scanning engine specialised for different processor feature sets,
and use the appropriate one for the host at runtime. This uses the "ifunc"
indirect function attribute provided by GCC and is currently available on
Linux only, where it is the default for release builds.
- New API function: add the `hs_valid_platform()` function. This function tests
whether the host provides the SSSE3 instruction set required by Hyperscan.
- Introduce a new standard benchmarking tool, "hsbench". This provides an easy
way to measure Hyperscan's performance for a particular set of patterns and
corpus of data to be scanned.
- Introduce a 64-bit GPR LimEx NFA model, which uses 64-bit GPRs on 64-bit
hosts and SSE registers on 32-bit hosts.
- Introduce a new DFA model ("McSheng") which is a hybrid of the existing
McClellan and Sheng models. This improves scanning performance for some
cases.
- Introduce lookaround specialisations to improve scanning performance.
- Improve the handling of long literals by moving confirmation to the Rose
interpreter and simplifying the hash table used to track them in streaming
mode.
- Improve compile time optimisation for removing redundant paths from
expression graphs.
- Build: improve support for building with MSVC toolchain.
- Reduce the size of small write DFAs used for small scans in block mode.
- Introduce a custom graph type (`ue2_graph`) used in place of the Boost Graph
Library's `adjacency_list` type. Improves compile time performance and type
safety.
- Improve scanning performance of the McClellan DFA.
- Bugfix for a very unusual SOM case where the incorrect start offset was
reported for a match.
- Bugfix for issue #37, removing execute permissions from some source files.
- Bugfix for issue #41, handle Windows line endings in pattern files.
## [4.3.2] 2016-11-15
- Bugfix for issue #39. This small change is a workaround for an issue in
Boost 1.62. The fix has been submitted to Boost for inclusion in a future
release.
@ -11,7 +43,7 @@ This is a list of notable changes to Hyperscan, in reverse chronological order.
## [4.3.1] 2016-08-29
- Bugfix for issue #30. In recent versions of Clang, a write to a variable was
being elided, resulting in corrupted stream state after calling
hs_reset_stream().
`hs_reset_stream()`.
## [4.3.0] 2016-08-24
- Introduce a new analysis pass ("Violet") used for decomposition of patterns

View File

@ -1,19 +1,11 @@
cmake_minimum_required (VERSION 2.8.11)
# don't use the built-in default configs
set (CMAKE_NOT_USING_CONFIG_FLAGS TRUE)
project (Hyperscan C CXX)
set (HS_MAJOR_VERSION 4)
set (HS_MINOR_VERSION 3)
set (HS_PATCH_VERSION 2)
set (HS_MINOR_VERSION 4)
set (HS_PATCH_VERSION 0)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
# since we are doing this manually, we only have three types
set (CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo"
CACHE STRING "" FORCE)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
include(CheckCCompilerFlag)
include(CheckCXXCompilerFlag)
@ -70,7 +62,14 @@ include_directories(SYSTEM include)
set(BOOST_USE_STATIC_LIBS OFF)
set(BOOST_USE_MULTITHREADED OFF)
set(BOOST_USE_STATIC_RUNTIME OFF)
set(BOOST_MINVERSION 1.57.0)
if (CMAKE_SYSTEM_NAME MATCHES "Darwin"
OR (CMAKE_SYSTEM_NAME MATCHES "FreeBSD"
AND CMAKE_C_COMPILER_ID MATCHES "Clang"))
# we need a more recent boost for libc++ used by clang on OSX and FreeBSD
set(BOOST_MINVERSION 1.61.0)
else ()
set(BOOST_MINVERSION 1.57.0)
endif ()
set(BOOST_NO_BOOST_CMAKE ON)
# first check for Boost installed on the system
@ -85,6 +84,7 @@ if(NOT Boost_FOUND)
endif()
endif()
include (${CMAKE_MODULE_PATH}/boost.cmake)
# -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
find_package(PythonInterp)
@ -151,27 +151,21 @@ if(MSVC OR MSVC_IDE)
if (MSVC_VERSION LESS 1700)
message(FATAL_ERROR "The project requires C++11 features.")
else()
# set base flags
set(CMAKE_C_FLAGS "/DWIN32 /D_WINDOWS /W3")
set(CMAKE_C_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od")
set(CMAKE_C_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi")
set(CMAKE_CXX_FLAGS "/DWIN32 /D_WINDOWS /W3 /GR /EHsc")
set(CMAKE_CXX_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od")
set(CMAKE_CXX_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi")
if (WINDOWS_ICC)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O3 /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
else()
#TODO: don't hardcode arch
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /wd4267")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /arch:AVX /wd4267")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /arch:AVX /wd4244 /wd4267 /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
endif()
string(REPLACE "/RTC1" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
string(REPLACE "/RTC1" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
if (DISABLE_ASSERTS)
set(CMAKE_C_FLAGS_DEBUG "/DNDEBUG ${CMAKE_C_FLAGS_DEBUG}")
set(CMAKE_CXX_FLAGS_DEBUG "/DNDEBUG ${CMAKE_CXX_FLAGS_DEBUG}")
endif ()
endif()
else()
@ -192,6 +186,12 @@ else()
unset(_GXX_OUTPUT)
endif()
# remove CMake's idea of optimisation
foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES})
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_C_FLAGS_${CONFIG} "${CMAKE_C_FLAGS_${CONFIG}}")
string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}")
endforeach ()
if(OPTIMISE)
set(OPT_C_FLAG "-O3")
set(OPT_CXX_FLAG "-O2")
@ -200,32 +200,28 @@ else()
set(OPT_CXX_FLAG "-O0")
endif(OPTIMISE)
# set up base flags for build types
set(CMAKE_C_FLAGS_DEBUG "-g ${OPT_C_FLAG} -Werror")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-g ${OPT_C_FLAG}")
set(CMAKE_C_FLAGS_RELEASE "${OPT_C_FLAG}")
# set compiler flags - more are tested and added later
set(EXTRA_C_FLAGS "${OPT_C_FLAG} -std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
set(EXTRA_CXX_FLAGS "${OPT_CXX_FLAG} -std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing")
set(CMAKE_CXX_FLAGS_DEBUG "-g ${OPT_CXX_FLAG} -Werror")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g ${OPT_CXX_FLAG}")
set(CMAKE_CXX_FLAGS_RELEASE "${OPT_CXX_FLAG}")
if (NOT RELEASE_BUILD)
# -Werror is most useful during development, don't potentially break
# release builds
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
endif()
if (DISABLE_ASSERTS)
# usually true for release builds, false for debug
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNDEBUG")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG")
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -DNDEBUG")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -DNDEBUG")
endif()
# set compiler flags - more are tested and added later
set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing")
if (NOT CMAKE_C_FLAGS MATCHES .*march.*)
message(STATUS "Building for current host CPU")
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -march=native -mtune=native")
set(ARCH_C_FLAGS "${ARCH_C_FLAGS} -march=native -mtune=native")
endif()
if (NOT CMAKE_CXX_FLAGS MATCHES .*march.*)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -march=native -mtune=native")
set(ARCH_CXX_FLAGS "${ARCH_CXX_FLAGS} -march=native -mtune=native")
endif()
if(CMAKE_COMPILER_IS_GNUCC)
@ -242,12 +238,17 @@ else()
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
endif()
if (RELEASE_BUILD)
# we don't need the noise of ABI warnings in a release build
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi")
endif ()
endif()
CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H)
CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H)
CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H)
CHECK_INCLUDE_FILES(tmmintrin.h HAVE_TMMINTRIN_H)
CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H)
CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H)
@ -267,9 +268,36 @@ if (RELEASE_BUILD)
endif()
endif()
# ensure we are building for the right target arch
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
# This is a Linux-only feature for now - requires platform support
# elsewhere
message(STATUS "generator is ${CMAKE_GENERATOR}")
if (CMAKE_C_COMPILER_ID MATCHES "Clang" AND
CMAKE_C_COMPILER_VERSION VERSION_LESS "3.9")
message (STATUS "Clang v3.9 or higher required for fat runtime, cannot build fat runtime")
set (FAT_RUNTIME_REQUISITES FALSE)
elseif (NOT (CMAKE_GENERATOR MATCHES "Unix Makefiles" OR
(CMAKE_VERSION VERSION_GREATER "3.0" AND CMAKE_GENERATOR MATCHES "Ninja")))
message (STATUS "Building the fat runtime requires the Unix Makefiles generator, or Ninja with CMake v3.0 or higher")
set (FAT_RUNTIME_REQUISITES FALSE)
else()
include (${CMAKE_MODULE_PATH}/attrib.cmake)
if (NOT HAS_C_ATTR_IFUNC)
message(STATUS "Compiler does not support ifunc attribute, cannot build fat runtime")
set (FAT_RUNTIME_REQUISITES FALSE)
else ()
set (FAT_RUNTIME_REQUISITES TRUE)
endif()
endif()
CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitecures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF)
endif ()
include (${CMAKE_MODULE_PATH}/arch.cmake)
if (NOT FAT_RUNTIME AND NOT HAVE_SSSE3)
message(FATAL_ERROR "A minimum of SSSE3 compiler support is required")
endif ()
# testing a builtin takes a little more work
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
@ -375,6 +403,16 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
endif()
endif()
if (NOT FAT_RUNTIME)
message(STATUS "Building for current host CPU")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}")
else()
message(STATUS "Building runtime for multiple microarchitectures")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
add_subdirectory(util)
add_subdirectory(unit)
add_subdirectory(doc/dev-reference)
@ -401,8 +439,13 @@ if (NOT WIN32)
endif()
# only set these after all tests are done
if (NOT FAT_RUNTIME)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
endif()
if(NOT WIN32)
@ -424,12 +467,21 @@ SET(hs_HEADERS
)
install(FILES ${hs_HEADERS} DESTINATION include/hs)
set (hs_exec_common_SRCS
src/alloc.c
src/scratch.c
src/util/cpuid_flags.c
src/util/cpuid_flags.h
src/util/multibit.c
)
set (hs_exec_SRCS
${hs_HEADERS}
src/hs_version.h
src/ue2common.h
src/alloc.c
src/allocator.h
src/crc32.c
src/crc32.h
src/report.h
src/runtime.c
src/fdr/fdr.c
@ -437,7 +489,6 @@ set (hs_exec_SRCS
src/fdr/fdr_internal.h
src/fdr/fdr_confirm.h
src/fdr/fdr_confirm_runtime.h
src/fdr/fdr_streaming_runtime.h
src/fdr/flood_runtime.h
src/fdr/fdr_loadval.h
src/fdr/teddy.c
@ -461,15 +512,12 @@ set (hs_exec_SRCS
src/nfa/lbr.h
src/nfa/lbr_common_impl.h
src/nfa/lbr_internal.h
src/nfa/mcclellan.c
src/nfa/mcclellan.h
src/nfa/mcclellan_common_impl.h
src/nfa/mcclellan_internal.h
src/nfa/limex_accel.c
src/nfa/limex_accel.h
src/nfa/limex_exceptional.h
src/nfa/limex_native.c
src/nfa/limex_ring.h
src/nfa/limex_64.c
src/nfa/limex_simd128.c
src/nfa/limex_simd256.c
src/nfa/limex_simd384.c
@ -482,6 +530,14 @@ set (hs_exec_SRCS
src/nfa/limex_runtime_impl.h
src/nfa/limex_shuffle.h
src/nfa/limex_state_impl.h
src/nfa/mcclellan.c
src/nfa/mcclellan.h
src/nfa/mcclellan_common_impl.h
src/nfa/mcclellan_internal.h
src/nfa/mcsheng.c
src/nfa/mcsheng_data.c
src/nfa/mcsheng.h
src/nfa/mcsheng_internal.h
src/nfa/mpv.h
src/nfa/mpv.c
src/nfa/mpv_internal.h
@ -542,6 +598,8 @@ set (hs_exec_SRCS
src/rose/init.h
src/rose/init.c
src/rose/stream.c
src/rose/stream_long_lit.h
src/rose/stream_long_lit_hash.h
src/rose/match.h
src/rose/match.c
src/rose/miracle.h
@ -554,15 +612,16 @@ set (hs_exec_SRCS
src/rose/rose_types.h
src/rose/rose_common.h
src/rose/validate_mask.h
src/rose/validate_shufti.h
src/util/bitutils.h
src/util/copybytes.h
src/util/exhaust.h
src/util/fatbit.h
src/util/fatbit.c
src/util/join.h
src/util/masked_move.h
src/util/multibit.h
src/util/multibit_internal.h
src/util/multibit.c
src/util/multibit_internal.h
src/util/pack_bits.h
src/util/popcount.h
src/util/pqueue.h
@ -574,21 +633,14 @@ set (hs_exec_SRCS
src/util/state_compress.c
src/util/unaligned.h
src/util/uniform_ops.h
src/scratch.h
src/scratch.c
src/crc32.c
src/crc32.h
src/database.c
src/database.h
)
if (HAVE_AVX2)
set (hs_exec_SRCS
${hs_exec_SRCS}
src/fdr/teddy_avx2.c
src/util/masked_move.c
)
endif ()
set (hs_exec_avx2_SRCS
src/fdr/teddy_avx2.c
src/util/masked_move.c
)
SET (hs_SRCS
@ -621,8 +673,6 @@ SET (hs_SRCS
src/fdr/fdr_engine_description.cpp
src/fdr/fdr_engine_description.h
src/fdr/fdr_internal.h
src/fdr/fdr_streaming_compile.cpp
src/fdr/fdr_streaming_internal.h
src/fdr/flood_compile.cpp
src/fdr/teddy_compile.cpp
src/fdr/teddy_compile.h
@ -660,6 +710,8 @@ SET (hs_SRCS
src/nfa/mcclellancompile.h
src/nfa/mcclellancompile_util.cpp
src/nfa/mcclellancompile_util.h
src/nfa/mcsheng_compile.cpp
src/nfa/mcsheng_compile.h
src/nfa/limex_compile.cpp
src/nfa/limex_compile.h
src/nfa/limex_accel.h
@ -677,6 +729,8 @@ SET (hs_SRCS
src/nfa/nfa_internal.h
src/nfa/nfa_kind.h
src/nfa/rdfa.h
src/nfa/rdfa_graph.cpp
src/nfa/rdfa_graph.h
src/nfa/rdfa_merge.cpp
src/nfa/rdfa_merge.h
src/nfa/repeat_internal.h
@ -721,7 +775,6 @@ SET (hs_SRCS
src/nfagraph/ng_extparam.h
src/nfagraph/ng_fixed_width.cpp
src/nfagraph/ng_fixed_width.h
src/nfagraph/ng_graph.h
src/nfagraph/ng_haig.cpp
src/nfagraph/ng_haig.h
src/nfagraph/ng_holder.cpp
@ -875,6 +928,7 @@ SET (hs_SRCS
src/rose/rose_build_compile.cpp
src/rose/rose_build_convert.cpp
src/rose/rose_build_convert.h
src/rose/rose_build_engine_blob.h
src/rose/rose_build_exclusive.cpp
src/rose/rose_build_exclusive.h
src/rose/rose_build_groups.cpp
@ -882,6 +936,8 @@ SET (hs_SRCS
src/rose/rose_build_impl.h
src/rose/rose_build_infix.cpp
src/rose/rose_build_infix.h
src/rose/rose_build_long_lit.cpp
src/rose/rose_build_long_lit.h
src/rose/rose_build_lookaround.cpp
src/rose/rose_build_lookaround.h
src/rose/rose_build_matchers.cpp
@ -889,6 +945,8 @@ SET (hs_SRCS
src/rose/rose_build_merge.cpp
src/rose/rose_build_merge.h
src/rose/rose_build_misc.cpp
src/rose/rose_build_program.cpp
src/rose/rose_build_program.h
src/rose/rose_build_role_aliasing.cpp
src/rose/rose_build_scatter.cpp
src/rose/rose_build_scatter.h
@ -915,14 +973,15 @@ SET (hs_SRCS
src/util/compile_error.cpp
src/util/compile_error.h
src/util/container.h
src/util/cpuid_flags.c
src/util/cpuid_flags.h
src/util/depth.cpp
src/util/depth.h
src/util/determinise.h
src/util/dump_mask.cpp
src/util/dump_mask.h
src/util/fatbit_build.cpp
src/util/fatbit_build.h
src/util/graph.h
src/util/hash.h
src/util/multibit_build.cpp
src/util/multibit_build.h
src/util/order_check.h
@ -937,6 +996,7 @@ SET (hs_SRCS
src/util/target_info.cpp
src/util/target_info.h
src/util/ue2_containers.h
src/util/ue2_graph.h
src/util/ue2string.cpp
src/util/ue2string.h
src/util/unaligned.h
@ -966,6 +1026,8 @@ set(hs_dump_SRCS
src/nfa/limex_dump.cpp
src/nfa/mcclellandump.cpp
src/nfa/mcclellandump.h
src/nfa/mcsheng_dump.cpp
src/nfa/mcsheng_dump.h
src/nfa/mpv_dump.cpp
src/nfa/nfa_dump_api.h
src/nfa/nfa_dump_dispatch.cpp
@ -990,6 +1052,8 @@ set(hs_dump_SRCS
src/rose/rose_dump.h
src/util/dump_charclass.cpp
src/util/dump_charclass.h
src/util/dump_util.cpp
src/util/dump_util.h
)
if (DUMP_SUPPORT)
@ -1002,27 +1066,106 @@ endif()
set (LIB_VERSION ${HS_VERSION})
set (LIB_SOVERSION ${HS_MAJOR_VERSION})
add_library(hs_exec OBJECT ${hs_exec_SRCS})
if (NOT FAT_RUNTIME)
set(hs_exec_SRCS ${hs_exec_SRCS} ${hs_exec_common_SRCS})
if (HAVE_AVX2)
set(hs_exec_SRCS ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
endif()
add_library(hs_exec OBJECT ${hs_exec_SRCS})
add_library(hs_runtime STATIC src/hs_version.c src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec>)
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
add_library(hs_exec_shared OBJECT ${hs_exec_SRCS})
set_target_properties(hs_exec_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
endif()
else (FAT_RUNTIME)
set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh")
add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS})
set_target_properties(hs_exec_core2 PROPERTIES
COMPILE_FLAGS "-march=core2"
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
)
add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS})
set_target_properties(hs_exec_corei7 PROPERTIES
COMPILE_FLAGS "-march=corei7"
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
)
add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
set_target_properties(hs_exec_avx2 PROPERTIES
COMPILE_FLAGS "-march=core-avx2"
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in"
)
add_library(hs_exec_common OBJECT
${hs_exec_common_SRCS}
src/dispatcher.c
)
set_source_files_properties(src/dispatcher.c PROPERTIES
COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function")
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS})
set_target_properties(hs_exec_shared_core2 PROPERTIES
COMPILE_FLAGS "-march=core2"
POSITION_INDEPENDENT_CODE TRUE
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in"
)
add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS})
set_target_properties(hs_exec_shared_corei7 PROPERTIES
COMPILE_FLAGS "-march=corei7"
POSITION_INDEPENDENT_CODE TRUE
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in"
)
add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS})
set_target_properties(hs_exec_shared_avx2 PROPERTIES
COMPILE_FLAGS "-march=core-avx2"
POSITION_INDEPENDENT_CODE TRUE
RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in"
)
add_library(hs_exec_common_shared OBJECT
${hs_exec_common_SRCS}
src/dispatcher.c
)
set_target_properties(hs_exec_common_shared PROPERTIES
OUTPUT_NAME hs_exec_common
POSITION_INDEPENDENT_CODE TRUE)
endif() # SHARED
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
add_library(hs_exec_shared OBJECT ${hs_exec_SRCS})
set_target_properties(hs_exec_shared PROPERTIES
POSITION_INDEPENDENT_CODE TRUE)
endif()
# hs_version.c is added explicitly to avoid some build systems that refuse to
# create a lib without any src (I'm looking at you Xcode)
add_library(hs_runtime STATIC src/hs_version.c $<TARGET_OBJECTS:hs_exec>)
add_library(hs_runtime STATIC src/hs_version.c
$<TARGET_OBJECTS:hs_exec_common> $<TARGET_OBJECTS:hs_exec_core2>
$<TARGET_OBJECTS:hs_exec_corei7> $<TARGET_OBJECTS:hs_exec_avx2>)
endif (NOT FAT_RUNTIME)
set_target_properties(hs_runtime PROPERTIES
LINKER_LANGUAGE C)
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
if (NOT BUILD_SHARED_LIBS)
install(TARGETS hs_runtime DESTINATION lib)
endif()
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
add_library(hs_runtime_shared SHARED src/hs_version.c $<TARGET_OBJECTS:hs_exec_shared>)
if (NOT FAT_RUNTIME)
add_library(hs_runtime_shared SHARED src/hs_version.c src/hs_valid_platform.c
$<TARGET_OBJECTS:hs_exec_shared>)
else()
add_library(hs_runtime_shared SHARED src/hs_version.c
src/hs_valid_platform.c
$<TARGET_OBJECTS:hs_exec_common_shared>
$<TARGET_OBJECTS:hs_exec_shared_core2>
$<TARGET_OBJECTS:hs_exec_shared_corei7>
$<TARGET_OBJECTS:hs_exec_shared_avx2>)
endif()
set_target_properties(hs_runtime_shared PROPERTIES
VERSION ${LIB_VERSION}
SOVERSION ${LIB_SOVERSION}
@ -1035,8 +1178,14 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
LIBRARY DESTINATION lib)
endif()
# we want the static lib for testing
add_library(hs STATIC ${hs_SRCS} $<TARGET_OBJECTS:hs_exec>)
if (NOT FAT_RUNTIME)
add_library(hs STATIC ${hs_SRCS} src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec>)
else()
# we want the static lib for testing
add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c
${hs_SRCS} $<TARGET_OBJECTS:hs_exec_common> $<TARGET_OBJECTS:hs_exec_core2>
$<TARGET_OBJECTS:hs_exec_corei7> $<TARGET_OBJECTS:hs_exec_avx2>)
endif()
add_dependencies(hs ragel_Parser)
@ -1045,7 +1194,17 @@ install(TARGETS hs DESTINATION lib)
endif()
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
add_library(hs_shared SHARED ${hs_SRCS} $<TARGET_OBJECTS:hs_exec_shared>)
if (NOT FAT_RUNTIME)
add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c
${hs_SRCS} $<TARGET_OBJECTS:hs_exec_shared>)
else()
add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c
${hs_SRCS} $<TARGET_OBJECTS:hs_exec_common_shared>
$<TARGET_OBJECTS:hs_exec_shared_core2>
$<TARGET_OBJECTS:hs_exec_shared_corei7>
$<TARGET_OBJECTS:hs_exec_shared_avx2>)
endif()
add_dependencies(hs_shared ragel_Parser)
set_target_properties(hs_shared PROPERTIES
OUTPUT_NAME hs

View File

@ -11,7 +11,8 @@ else ()
endif ()
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}")
# ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
int main() {
@ -19,10 +20,6 @@ int main() {
(void)_mm_shuffle_epi8(a, a);
}" HAVE_SSSE3)
if (NOT HAVE_SSSE3)
message(FATAL_ERROR "A minimum of SSSE3 compiler support is required")
endif ()
# now look for AVX2
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
#if !defined(__AVX2__)
@ -34,9 +31,5 @@ int main(){
(void)_mm256_xor_si256(z, z);
}" HAVE_AVX2)
if (NOT HAVE_AVX2)
message(STATUS "Building without AVX2 support")
endif ()
unset (CMAKE_REQUIRED_FLAGS)
unset (INTRIN_INC_H)

13
cmake/attrib.cmake Normal file
View File

@ -0,0 +1,13 @@
# tests for compiler properties
# set -Werror so we can't ignore unused attribute warnings
set (CMAKE_REQUIRED_FLAGS "-Werror")
CHECK_C_SOURCE_COMPILES("
int foo(int) __attribute__ ((ifunc(\"foo_i\")));
int f1(int i) { return i; }
void (*foo_i()) { return f1; }
int main(void) { return 0; }
" HAS_C_ATTR_IFUNC)
unset(CMAKE_REQUIRED_FLAGS)

41
cmake/boost.cmake Normal file
View File

@ -0,0 +1,41 @@
# Boost 1.62 has a bug that we've patched around, check if it is required
if (Boost_VERSION EQUAL 106200)
set (CMAKE_REQUIRED_INCLUDES ${BOOST_INCLUDEDIR} "${PROJECT_SOURCE_DIR}/include")
set (BOOST_REV_TEST "
#include <boost/graph/graph_concepts.hpp>
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/reverse_graph.hpp>
#include <boost/concept/assert.hpp>
int main(int,char*[])
{
using namespace boost;
// Check const reverse_graph
{
typedef adjacency_list< vecS, vecS, bidirectionalS,
property<vertex_color_t, int>,
property<edge_weight_t, int>,
property<graph_name_t, std::string>
> AdjList;
typedef reverse_graph<AdjList> Graph;
BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept<Graph> ));
}
return 0;
}
")
CHECK_CXX_SOURCE_COMPILES("${BOOST_REV_TEST}" BOOST_REVGRAPH_OK)
if (NOT BOOST_REVGRAPH_OK)
message(STATUS "trying patched")
CHECK_CXX_SOURCE_COMPILES("
#include <boost-patched/graph/reverse_graph.hpp>
${BOOST_REV_TEST}" BOOST_REVGRAPH_PATCH)
endif()
if (NOT BOOST_REVGRAPH_OK AND NOT BOOST_REVGRAPH_PATCH)
message(FATAL_ERROR "Something is wrong with this copy of boost::reverse_graph")
endif()
unset (CMAKE_REQUIRED_INCLUDES)
endif () # Boost 1.62.0

27
cmake/build_wrapper.sh Executable file
View File

@ -0,0 +1,27 @@
#!/bin/sh -e
# This is used for renaming symbols for the fat runtime, don't call directly
# TODO: make this a lot less fragile!
PREFIX=$1
KEEPSYMS_IN=$2
shift 2
BUILD=$@
OUT=$(echo $BUILD | sed 's/.* -o \(.*\.o\).*/\1/')
SYMSFILE=/tmp/${PREFIX}_rename.syms.$$
KEEPSYMS=/tmp/keep.syms.$$
# grab the command without the target obj or src file flags
# we don't just call gcc directly as there may be flags modifying the arch
CC_CMD=$(echo $BUILD | sed 's/ -o .*\.o//;s/ -c //;s/ .[^ ]*\.c//;')
# find me a libc
LIBC_SO=$(${CC_CMD} --print-file-name=libc.so.6)
cp ${KEEPSYMS_IN} ${KEEPSYMS}
# get all symbols from libc and turn them into patterns
nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ ]*\).*/^\1$/' >> ${KEEPSYMS}
# build the object
${BUILD}
# rename the symbols in the object
nm -f p -g ${OUT} | cut -f1 -d' ' | grep -v -f ${KEEPSYMS} | sed -e "s/\(.*\)/\1\ ${PREFIX}_\1/" >> ${SYMSFILE}
if test -s ${SYMSFILE}
then
objcopy --redefine-syms=${SYMSFILE} ${OUT}
fi
rm -f ${SYMSFILE} ${KEEPSYMS}

View File

@ -15,6 +15,9 @@
/* internal build, switch on dump support. */
#cmakedefine DUMP_SUPPORT
/* Define if building "fat" runtime. */
#cmakedefine FAT_RUNTIME
/* Define to 1 if `backtrace' works. */
#cmakedefine HAVE_BACKTRACE
@ -67,9 +70,6 @@
/* Define if the sqlite3_open_v2 call is available */
#cmakedefine HAVE_SQLITE3_OPEN_V2
/* Define to 1 if you have the <tmmintrin.h> header file. */
#cmakedefine HAVE_TMMINTRIN_H
/* Define to 1 if you have the <unistd.h> header file. */
#cmakedefine HAVE_UNISTD_H
@ -89,3 +89,5 @@
/* define if this is a release build. */
#cmakedefine RELEASE_BUILD
/* define if reverse_graph requires patch for boost 1.62.0 */
#cmakedefine BOOST_REVGRAPH_PATCH

11
cmake/keep.syms.in Normal file
View File

@ -0,0 +1,11 @@
# names to exclude
hs_misc_alloc
hs_misc_free
hs_free_scratch
hs_stream_alloc
hs_stream_free
hs_scratch_alloc
hs_scratch_free
hs_database_alloc
hs_database_free
^_

53
cmake/sqlite3.cmake Normal file
View File

@ -0,0 +1,53 @@
#
# a lot of noise to find sqlite
#
option(SQLITE_PREFER_STATIC "Build sqlite3 statically instead of using an installed lib" OFF)
if(NOT WIN32 AND NOT SQLITE_PREFER_STATIC)
find_package(PkgConfig QUIET)
# first check for sqlite on the system
pkg_check_modules(SQLITE3 sqlite3)
endif()
if (NOT SQLITE3_FOUND)
message(STATUS "looking for sqlite3 in source tree")
# look in the source tree
if (EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.h" AND
EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c")
message(STATUS " found sqlite3 in source tree")
set(SQLITE3_FOUND TRUE)
set(SQLITE3_BUILD_SOURCE TRUE)
set(SQLITE3_INCLUDE_DIRS "${PROJECT_SOURCE_DIR}/sqlite3")
set(SQLITE3_LDFLAGS sqlite3_static)
else()
message(FATAL_ERROR " no sqlite3 in source tree")
endif()
endif()
# now do version checks
if (SQLITE3_FOUND)
list(INSERT CMAKE_REQUIRED_INCLUDES 0 "${SQLITE3_INCLUDE_DIRS}")
CHECK_C_SOURCE_COMPILES("#include <sqlite3.h>\n#if SQLITE_VERSION_NUMBER >= 3008007 && SQLITE_VERSION_NUMBER < 3008010\n#error broken sqlite\n#endif\nint main() {return 0;}" SQLITE_VERSION_OK)
if (NOT SQLITE_VERSION_OK)
message(FATAL_ERROR "sqlite3 is broken from 3.8.7 to 3.8.10 - please find a working version")
endif()
if (NOT SQLITE3_BUILD_SOURCE)
set(_SAVED_FLAGS ${CMAKE_REQUIRED_FLAGS})
list(INSERT CMAKE_REQUIRED_LIBRARIES 0 ${SQLITE3_LDFLAGS})
CHECK_SYMBOL_EXISTS(sqlite3_open_v2 sqlite3.h HAVE_SQLITE3_OPEN_V2)
list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES "${SQLITE3_INCLUDE_DIRS}")
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${SQLITE3_LDFLAGS})
else()
if (NOT TARGET sqlite3_static)
# build sqlite as a static lib to compile into our test programs
add_library(sqlite3_static STATIC "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c")
if (NOT WIN32)
set_target_properties(sqlite3_static PROPERTIES COMPILE_FLAGS "-Wno-unused -Wno-cast-qual -DSQLITE_OMIT_LOAD_EXTENSION")
endif()
endif()
endif()
endif()
# that's enough about sqlite

View File

@ -169,6 +169,9 @@ Common options for CMake include:
+------------------------+----------------------------------------------------+
| DEBUG_OUTPUT | Enable very verbose debug output. Default off. |
+------------------------+----------------------------------------------------+
| FAT_RUNTIME | Build the :ref:`fat runtime<fat_runtime>`. Default |
| | true on Linux, not available elsewhere. |
+------------------------+----------------------------------------------------+
For example, to generate a ``Debug`` build: ::
@ -199,11 +202,11 @@ The other types of builds are:
Target Architecture
-------------------
By default, Hyperscan will be compiled to target the instruction set of the
processor of the machine that being used for compilation. This is done via
the use of ``-march=native``. The result of this means that a library built on
one machine may not work on a different machine if they differ in supported
instruction subsets.
Unless using the :ref:`fat runtime<fat_runtime>`, by default Hyperscan will be
compiled to target the instruction set of the processor of the machine that
being used for compilation. This is done via the use of ``-march=native``. The
result of this means that a library built on one machine may not work on a
different machine if they differ in supported instruction subsets.
To override the use of ``-march=native``, set appropriate flags for the
compiler in ``CFLAGS`` and ``CXXFLAGS`` environment variables before invoking
@ -215,3 +218,57 @@ example, to set the instruction subsets up to ``SSE4.2`` using GCC 4.8: ::
For more information, refer to :ref:`instr_specialization`.
.. _fat_runtime:
Fat Runtime
-----------
A feature introduced in Hyperscan v4.4 is the ability for the Hyperscan
library to dispatch the most appropriate runtime code for the host processor.
This feature is called the "fat runtime", as a single Hyperscan library
contains multiple copies of the runtime code for different instruction sets.
.. note::
The fat runtime feature is only available on Linux. Release builds of
Hyperscan will default to having the fat runtime enabled where supported.
When building the library with the fat runtime, the Hyperscan runtime code
will be compiled multiple times for these different instruction sets, and
these compiled objects are combined into one library. There are no changes to
how user applications are built against this library.
When applications are executed, the correct version of the runtime is selected
for the machine that it is running on. This is done using a ``CPUID`` check
for the presence of the instruction set, and then an indirect function is
resolved so that the right version of each API function is used. There is no
impact on function call performance, as this check and resolution is performed
by the ELF loader once when the binary is loaded.
If the Hyperscan library is used on x86 systems without ``SSSE3``, the runtime
API functions will resolve to functions that return :c:member:`HS_ARCH_ERROR`
instead of potentially executing illegal instructions. The API function
:c:func:`hs_valid_platform` can be used by application writers to determine if
the current platform is supported by Hyperscan.
At of this release, the variants of the runtime that are built, and the CPU
capability that is required, are the following:
+----------+-------------------------------+---------------------+
| Variant | CPU Feature Flag(s) Required | gcc arch flag |
+==========+===============================+=====================+
| Core 2 | ``SSSE3`` | ``-march=core2`` |
+----------+-------------------------------+---------------------+
| Core i7 | ``SSE4_2`` and ``POPCNT`` | ``-march=corei7`` |
+----------+-------------------------------+---------------------+
| AVX 2 | ``AVX2`` | ``-march=avx2`` |
+----------+-------------------------------+---------------------+
As this requires compiler, libc, and binutils support, at this time the fat
runtime will only be enabled for Linux builds where the compiler supports the
`indirect function "ifunc" function attribute
<https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-indirect-functions-3321>`_.
This attribute should be available on all supported versions of GCC, and
recent versions of Clang and ICC. There is currently no operating system
support for this feature on non-Linux systems.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -833,6 +833,8 @@ static unsigned parseFlags(const string &flagsStr) {
flags |= HS_FLAG_UTF8; break;
case 'W':
flags |= HS_FLAG_UCP; break;
case '\r': // stray carriage-return
break;
default:
cerr << "Unsupported flag \'" << c << "\'" << endl;
exit(-1);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -621,6 +621,8 @@ static unsigned parseFlags(const string &flagsStr) {
flags |= HS_FLAG_UTF8; break;
case 'W':
flags |= HS_FLAG_UCP; break;
case '\r': // stray carriage-return
break;
default:
cerr << "Unsupported flag \'" << c << "\'" << endl;
exit(-1);

View File

@ -5,7 +5,7 @@
#include <boost/graph/reverse_graph.hpp>
#if (BOOST_VERSION == 106200)
#if defined(BOOST_REVGRAPH_PATCH)
// Boost 1.62.0 does not implement degree() in reverse_graph which is required
// by BidirectionalGraph, so add it.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -117,11 +117,11 @@ typedef map<pair<NFAVertex, NFAVertex>, NFAEdge> edge_cache_t;
static
void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
u32 &assert_edge_count) {
DEBUG_PRINTF("replacing assert vertex %u\n", g[t].index);
DEBUG_PRINTF("replacing assert vertex %zu\n", g[t].index);
const u32 flags = g[t].assert_flags;
DEBUG_PRINTF("consider assert vertex %u with flags %u\n",
g[t].index, flags);
DEBUG_PRINTF("consider assert vertex %zu with flags %u\n", g[t].index,
flags);
// Wire up all the predecessors to all the successors.
@ -142,7 +142,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
for (const auto &outEdge : out_edges_range(t, g)) {
NFAVertex v = target(outEdge, g);
DEBUG_PRINTF("consider path [%u,%u,%u]\n", g[u].index,
DEBUG_PRINTF("consider path [%zu,%zu,%zu]\n", g[u].index,
g[t].index, g[v].index);
if (v == t) {
@ -173,9 +173,8 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
auto cache_key = make_pair(u, v);
auto ecit = edge_cache.find(cache_key);
if (ecit == edge_cache.end()) {
DEBUG_PRINTF("adding edge %u %u\n", g[u].index,
g[v].index);
NFAEdge e = add_edge(u, v, g).first;
DEBUG_PRINTF("adding edge %zu %zu\n", g[u].index, g[v].index);
NFAEdge e = add_edge(u, v, g);
edge_cache.emplace(cache_key, e);
g[e].assert_flags = flags;
if (++assert_edge_count > MAX_ASSERT_EDGES) {
@ -184,7 +183,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
}
} else {
NFAEdge e = ecit->second;
DEBUG_PRINTF("updating edge %u %u [a %u]\n", g[u].index,
DEBUG_PRINTF("updating edge %zu %zu [a %zu]\n", g[u].index,
g[v].index, g[t].index);
// Edge already exists.
u32 &e_flags = g[e].assert_flags;
@ -211,8 +210,7 @@ void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) {
Report r = rm.getBasicInternalReport(g, adj);
g[v].reports.insert(rm.getInternalId(r));
DEBUG_PRINTF("set report id for vertex %u, adj %d\n",
g[v].index, adj);
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
}
static
@ -222,8 +220,7 @@ void checkForMultilineStart(ReportManager &rm, NGWrapper &g) {
if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) {
continue;
}
DEBUG_PRINTF("mls %u %08x\n", g[v].index,
g[v].assert_flags);
DEBUG_PRINTF("mls %zu %08x\n", g[v].index, g[v].assert_flags);
/* we have found a multi-line start (maybe more than one) */
@ -299,8 +296,8 @@ void removeAssertVertices(ReportManager &rm, NGWrapper &g) {
DEBUG_PRINTF("resolved %zu assert vertices\n", num);
pruneUseless(g);
pruneEmptyVertices(g);
g.renumberVertices();
g.renumberEdges();
renumber_vertices(g);
renumber_edges(g);
}
DEBUG_PRINTF("after: graph has %zu vertices\n", num_vertices(g));

View File

@ -29,8 +29,10 @@
/** \file
* \brief Compiler front-end interface.
*/
#include "allocator.h"
#include "asserts.h"
#include "compiler.h"
#include "crc32.h"
#include "database.h"
#include "grey.h"
#include "hs_internal.h"
@ -321,6 +323,45 @@ platform_t target_to_platform(const target_t &target_info) {
return p;
}
/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated
* \ref hs_database, ensuring that it is padded correctly to give cacheline
* alignment. */
static
hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
size_t db_len = sizeof(struct hs_database) + len;
DEBUG_PRINTF("db size %zu\n", db_len);
DEBUG_PRINTF("db platform %llx\n", platform);
struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len);
if (hs_check_alloc(db) != HS_SUCCESS) {
hs_database_free(db);
return nullptr;
}
// So that none of our database is uninitialized
memset(db, 0, db_len);
// we need to align things manually
size_t shift = (uintptr_t)db->bytes & 0x3f;
DEBUG_PRINTF("shift is %zu\n", shift);
db->bytecode = offsetof(struct hs_database, bytes) - shift;
char *bytecode = (char *)db + db->bytecode;
assert(ISALIGNED_CL(bytecode));
db->magic = HS_DB_MAGIC;
db->version = HS_DB_VERSION;
db->length = len;
db->platform = platform;
// Copy bytecode
memcpy(bytecode, in_bytecode, len);
db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length);
return db;
}
struct hs_database *build(NG &ng, unsigned int *length) {
assert(length);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -348,43 +348,6 @@ hs_error_t dbIsValid(const hs_database_t *db) {
return HS_SUCCESS;
}
/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated
* \ref hs_database, ensuring that it is padded correctly to give cacheline
* alignment. */
hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
size_t db_len = sizeof(struct hs_database) + len;
DEBUG_PRINTF("db size %zu\n", db_len);
DEBUG_PRINTF("db platform %llx\n", platform);
struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len);
if (hs_check_alloc(db) != HS_SUCCESS) {
hs_database_free(db);
return NULL;
}
// So that none of our database is uninitialized
memset(db, 0, db_len);
// we need to align things manually
size_t shift = (uintptr_t)db->bytes & 0x3f;
DEBUG_PRINTF("shift is %zu\n", shift);
db->bytecode = offsetof(struct hs_database, bytes) - shift;
char *bytecode = (char *)db + db->bytecode;
assert(ISALIGNED_CL(bytecode));
db->magic = HS_DB_MAGIC;
db->version = HS_DB_VERSION;
db->length = len;
db->platform = platform;
// Copy bytecode
memcpy(bytecode, in_bytecode, len);
db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length);
return db;
}
#if defined(_WIN32)
#define SNPRINTF_COMPAT _snprintf
#else

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -110,7 +110,6 @@ hs_error_t validDatabase(const hs_database_t *db) {
}
hs_error_t dbIsValid(const struct hs_database *db);
struct hs_database *dbCreate(const char *bytecode, size_t len, u64a platform);
#ifdef __cplusplus
} /* extern "C" */

123
src/dispatcher.c Normal file
View File

@ -0,0 +1,123 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "hs_common.h"
#include "hs_runtime.h"
#include "ue2common.h"
#include "util/cpuid_flags.h"
#include "util/join.h"
#define CREATE_DISPATCH(RTYPE, NAME, ...) \
/* create defns */ \
RTYPE JOIN(avx2_, NAME)(__VA_ARGS__); \
RTYPE JOIN(corei7_, NAME)(__VA_ARGS__); \
RTYPE JOIN(core2_, NAME)(__VA_ARGS__); \
\
/* error func */ \
static inline RTYPE JOIN(error_, NAME)(__VA_ARGS__) { \
return (RTYPE)HS_ARCH_ERROR; \
} \
\
/* resolver */ \
static void(*JOIN(resolve_, NAME)(void)) { \
if (check_avx2()) { \
return JOIN(avx2_, NAME); \
} \
if (check_sse42() && check_popcnt()) { \
return JOIN(corei7_, NAME); \
} \
if (check_ssse3()) { \
return JOIN(core2_, NAME); \
} \
/* anything else is fail */ \
return JOIN(error_, NAME); \
} \
\
/* function */ \
HS_PUBLIC_API \
RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME)))
CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data,
unsigned length, unsigned flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *userCtx);
CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database,
size_t *stream_size);
CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db,
size_t *size);
CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db);
CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db);
CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db,
unsigned int flags, hs_stream_t **stream);
CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data,
unsigned int length, unsigned int flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *ctxt);
CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id,
hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt);
CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db,
const char *const *data, const unsigned int *length,
unsigned int count, unsigned int flags, hs_scratch_t *scratch,
match_event_handler onevent, void *context);
CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info);
CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id,
const hs_stream_t *from_id);
CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id,
unsigned int flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *context);
CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id,
const hs_stream_t *from_id, hs_scratch_t *scratch,
match_event_handler onEvent, void *context);
CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db,
char **bytes, size_t *length);
CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes,
const size_t length, hs_database_t **db);
CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes,
const size_t length, hs_database_t *db);
CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes,
size_t length, char **info);
CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes,
const size_t length, size_t *deserialized_size);
/** INTERNALS **/
CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);

View File

@ -31,7 +31,6 @@
#include "fdr_confirm_runtime.h"
#include "fdr_internal.h"
#include "fdr_loadval.h"
#include "fdr_streaming_runtime.h"
#include "flood_runtime.h"
#include "teddy.h"
#include "teddy_internal.h"
@ -809,8 +808,6 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
len,
hbuf,
0,
hbuf, // nocase
0,
start,
cb,
ctxt,
@ -828,14 +825,12 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
hwlm_group_t groups, u8 *stream_state) {
hwlm_group_t groups) {
struct FDR_Runtime_Args a = {
buf,
len,
hbuf,
hlen,
hbuf, // nocase - start same as caseful, override later if needed
hlen, // nocase
start,
cb,
ctxt,
@ -844,7 +839,6 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
* the history buffer (they may be garbage). */
hbuf ? unaligned_load_u64a(hbuf + hlen - sizeof(u64a)) : (u64a)0
};
fdrUnpackState(fdr, &a, stream_state);
hwlm_error_t ret;
if (unlikely(a.start_offset >= a.len)) {
@ -854,6 +848,5 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
ret = funcs[fdr->engineID](fdr, &a, groups);
}
fdrPackState(fdr, &a, stream_state);
return ret;
}

View File

@ -43,10 +43,6 @@ extern "C" {
struct FDR;
/** \brief Returns non-zero if the contents of the stream state indicate that
* there is active FDR history beyond the regularly used history. */
u32 fdrStreamStateActive(const struct FDR *fdr, const u8 *stream_state);
/**
* \brief Block-mode scan.
*
@ -74,12 +70,11 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
* \param cb Callback to call when a match is found.
* \param ctxt Caller-provided context pointer supplied to callback on match.
* \param groups Initial groups mask.
* \param stream_state Persistent stream state for use by FDR.
*/
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
hwlm_group_t groups, u8 *stream_state);
hwlm_group_t groups);
#ifdef __cplusplus
}

View File

@ -39,6 +39,7 @@
#include "teddy_engine_description.h"
#include "grey.h"
#include "ue2common.h"
#include "hwlm/hwlm_build.h"
#include "util/alloc.h"
#include "util/compare.h"
#include "util/dump_mask.h"
@ -495,14 +496,34 @@ FDRCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
} // namespace
static
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
size_t rv = 0;
for (const auto &lit : lits) {
rv = max(rv, lit.msk.size());
}
return rv;
}
static
void setHistoryRequired(hwlmStreamingControl &stream_ctl,
const vector<hwlmLiteral> &lits) {
size_t max_mask_len = maxMaskLen(lits);
// we want enough history to manage the longest literal and the longest
// mask.
stream_ctl.literal_history_required = max(maxLen(lits), max_mask_len) - 1;
}
static
aligned_unique_ptr<FDR>
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
const target_t &target, const Grey &grey, u32 hint,
hwlmStreamingControl *stream_control) {
pair<aligned_unique_ptr<u8>, size_t> link(nullptr, 0);
if (stream_control) {
link = fdrBuildTableStreaming(lits, *stream_control);
setHistoryRequired(*stream_control, lits);
}
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -52,19 +52,18 @@ typedef enum LitInfoFlags {
/**
* \brief Structure describing a literal, linked to by FDRConfirm.
*
* This structure is followed in memory by a variable-sized string prefix at
* LitInfo::s, for strings that are longer than CONF_TYPE.
* This structure is followed in memory by a variable-sized string prefix, for
* strings that are longer than CONF_TYPE.
*/
struct LitInfo {
CONF_TYPE v;
CONF_TYPE msk;
hwlm_group_t groups;
u32 size;
u32 id; // literal ID as passed in
u8 size;
u8 flags; /* LitInfoFlags */
u8 next;
u8 extended_size;
u8 s[1]; // literal prefix, which continues "beyond" this struct.
};
#define FDRC_FLAG_NO_CONFIRM 1

View File

@ -107,7 +107,7 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
info.extended_size = verify_u8(lit.msk.size());
}
info.flags = flags;
info.size = verify_u32(lit.s.size());
info.size = verify_u8(lit.s.size());
info.groups = lit.groups;
// these are built up assuming a LE machine
@ -333,13 +333,13 @@ getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
const string &t = lits[litIdx].s;
if (t.size() > sizeof(CONF_TYPE)) {
size_t prefix_len = t.size() - sizeof(CONF_TYPE);
memcpy(&finalLI.s[0], t.c_str(), prefix_len);
ptr = &finalLI.s[0] + prefix_len;
memcpy(ptr, t.c_str(), prefix_len);
ptr += prefix_len;
}
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
if (next(i) == e) {
finalLI.next = 0x0;
finalLI.next = 0;
} else {
// our next field represents an adjustment on top of
// current address + the actual size of the literal

View File

@ -74,10 +74,8 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
if (loc < buf) {
u32 full_overhang = buf - loc;
const u8 *history = caseless ? a->buf_history_nocase
: a->buf_history;
size_t len_history = caseless ? a->len_history_nocase
: a->len_history;
const u8 *history = a->buf_history;
size_t len_history = a->len_history;
// can't do a vectored confirm either if we don't have
// the bytes
@ -88,7 +86,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
// as for the regular case, no need to do a full confirm if
// we're a short literal
if (unlikely(li->size > sizeof(CONF_TYPE))) {
const u8 *s1 = li->s;
const u8 *s1 = (const u8 *)li + sizeof(*li);
const u8 *s2 = s1 + full_overhang;
const u8 *loc1 = history + len_history - full_overhang;
const u8 *loc2 = buf;
@ -108,7 +106,8 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
// if string < conf_type we don't need regular string cmp
if (unlikely(li->size > sizeof(CONF_TYPE))) {
if (cmpForward(loc, li->s, li->size - sizeof(CONF_TYPE),
const u8 *s = (const u8 *)li + sizeof(*li);
if (cmpForward(loc, s, li->size - sizeof(CONF_TYPE),
caseless)) {
goto out;
}
@ -123,8 +122,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
const u8 *loc2 = buf + i - li->extended_size + 1 - pullBackAmount;
if (loc2 < buf) {
u32 full_overhang = buf - loc2;
size_t len_history = caseless ? a->len_history_nocase
: a->len_history;
size_t len_history = a->len_history;
if (full_overhang > len_history) {
goto out;
}

View File

@ -100,8 +100,6 @@ struct FDR_Runtime_Args {
size_t len;
const u8 *buf_history;
size_t len_history;
const u8 *buf_history_nocase;
size_t len_history_nocase;
size_t start_offset;
HWLMCallback cb;
void *ctxt;

View File

@ -1,425 +0,0 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr_internal.h"
#include "fdr_streaming_internal.h"
#include "fdr_compile_internal.h"
#include "hwlm/hwlm_build.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/target_info.h"
#include "util/verify_types.h"
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <cstring>
#include <deque>
#include <set>
#include <boost/dynamic_bitset.hpp>
using namespace std;
using boost::dynamic_bitset;
namespace ue2 {
namespace {
struct LongLitOrder {
bool operator()(const hwlmLiteral &i1, const hwlmLiteral &i2) const {
if (i1.nocase != i2.nocase) {
return i1.nocase < i2.nocase;
} else {
return i1.s < i2.s;
}
}
};
}
static
bool hwlmLitEqual(const hwlmLiteral &l1, const hwlmLiteral &l2) {
return l1.s == l2.s && l1.nocase == l2.nocase;
}
static
u32 roundUpToPowerOfTwo(u32 x) {
x -= 1;
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
return x + 1;
}
/**
* \brief Creates a long literals vector containing all literals of length > max_len.
*
* The last char of each literal is trimmed as we're not interested in full
* matches, only partial matches.
*
* Literals are sorted (by caseful/caseless, then lexicographical order) and
* made unique.
*
* The ID of each literal is set to its position in the vector.
*
* \return False if there aren't any long literals.
*/
static
bool setupLongLits(const vector<hwlmLiteral> &lits,
vector<hwlmLiteral> &long_lits, size_t max_len) {
long_lits.reserve(lits.size());
for (const auto &lit : lits) {
if (lit.s.length() > max_len) {
hwlmLiteral tmp = lit; // copy
tmp.s.pop_back();
tmp.id = 0; // recalc later
tmp.groups = 0; // filled in later by hash bucket(s)
long_lits.push_back(move(tmp));
}
}
if (long_lits.empty()) {
return false;
}
// sort long_literals by caseful/caseless and in lexicographical order,
// remove duplicates
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
auto new_end = unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
long_lits.erase(new_end, long_lits.end());
// fill in ids; not currently used
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
i->id = distance(long_lits.begin(), i);
}
return true;
}
// boundaries are the 'start' boundaries for each 'mode'
// so boundary[CASEFUL] is the index one above the largest caseful index
// positions[CASEFUL] is the # of positions in caseful strings (stream)
// hashedPositions[CASEFUL] is the # of positions in caseful strings
// (not returned - a temporary)
// hashEntries[CASEFUL] is the # of positions hashed for caseful strings
// (rounded up to the nearest power of two)
static
void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
u32 *boundaries, u32 *positions, u32 *hashEntries) {
u32 hashedPositions[MAX_MODES];
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
boundaries[m] = verify_u32(long_lits.size());
positions[m] = 0;
hashedPositions[m] = 0;
}
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
if (i->nocase) {
boundaries[CASEFUL] = verify_u32(distance(long_lits.begin(), i));
break;
}
}
for (const auto &lit : long_lits) {
Modes m = lit.nocase ? CASELESS : CASEFUL;
for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
hashedPositions[m]++;
}
positions[m] += lit.s.size();
}
for (u32 m = CASEFUL; m < MAX_MODES; m++) {
hashEntries[m] = hashedPositions[m]
? roundUpToPowerOfTwo(MAX(4096, hashedPositions[m]))
: 0;
}
#ifdef DEBUG_COMPILE
printf("analyzeLits:\n");
for (Modes m = CASEFUL; m < MAX_MODES; m++) {
printf("mode %s boundary %d positions %d hashedPositions %d "
"hashEntries %d\n",
(m == CASEFUL) ? "caseful" : "caseless", boundaries[m],
positions[m], hashedPositions[m], hashEntries[m]);
}
printf("\n");
#endif
}
static
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, Modes m) {
return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m);
}
// sort by 'distance from start'
namespace {
struct OffsetIDFromEndOrder {
const vector<hwlmLiteral> &lits; // not currently used
explicit OffsetIDFromEndOrder(const vector<hwlmLiteral> &lits_in)
: lits(lits_in) {}
bool operator()(const pair<u32, u32> &i1, const pair<u32, u32> &i2) const {
if (i1.second != i2.second) {
// longest is 'first', so > not <
return i1.second > i2.second;
}
return i1.first < i2.first;
}
};
}
static
void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
FDRSHashEntry *tab, size_t numEntries, Modes mode,
map<u32, u32> &litToOffsetVal) {
const u32 nbits = lg2(numEntries);
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
map<u32, u64a> bucketToBitfield;
for (const auto &lit : long_lits) {
if ((mode == CASELESS) != lit.nocase) {
continue;
}
for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
u32 h = hashLit(lit, j, max_len, mode);
u32 h_ent = h & ((1U << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
bucketToLitOffPairs[h_ent].emplace_back(lit.id, j);
bucketToBitfield[h_ent] |= (1ULL << h_low);
}
}
// this used to be a set<u32>, but a bitset is much much faster given that
// we're using it only for membership testing.
dynamic_bitset<> filledBuckets(numEntries); // all bits zero by default.
// sweep out bitfield entries and save the results swapped accordingly
// also, anything with bitfield entries is put in filledBuckets
for (const auto &m : bucketToBitfield) {
const u32 &bucket = m.first;
const u64a &contents = m.second;
tab[bucket].bitfield = contents;
filledBuckets.set(bucket);
}
// store out all our chains based on free values in our hash table.
// find nearest free locations that are empty (there will always be more
// entries than strings, at present)
for (auto &m : bucketToLitOffPairs) {
u32 bucket = m.first;
deque<pair<u32, u32>> &d = m.second;
// sort d by distance of the residual string (len minus our depth into
// the string). We need to put the 'furthest back' string first...
stable_sort(d.begin(), d.end(), OffsetIDFromEndOrder(long_lits));
while (1) {
// first time through is always at bucket, then we fill in links
filledBuckets.set(bucket);
FDRSHashEntry *ent = &tab[bucket];
u32 lit_id = d.front().first;
u32 offset = d.front().second;
ent->state = verify_u32(litToOffsetVal[lit_id] + offset + max_len);
ent->link = (u32)LINK_INVALID;
d.pop_front();
if (d.empty()) {
break;
}
// now, if there is another value
// find a bucket for it and put in 'bucket' and repeat
// all we really need to do is find something not in filledBuckets,
// ideally something close to bucket
// we search backward and forward from bucket, trying to stay as
// close as possible.
UNUSED bool found = false;
int bucket_candidate = 0;
for (u32 k = 1; k < numEntries * 2; k++) {
bucket_candidate = bucket + (((k & 1) == 0)
? (-(int)k / 2) : (k / 2));
if (bucket_candidate < 0 ||
(size_t)bucket_candidate >= numEntries) {
continue;
}
if (!filledBuckets.test(bucket_candidate)) {
found = true;
break;
}
}
assert(found);
bucket = bucket_candidate;
ent->link = bucket;
}
}
}
static
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
size_t rv = 0;
for (const auto &lit : lits) {
rv = max(rv, lit.msk.size());
}
return rv;
}
pair<aligned_unique_ptr<u8>, size_t>
fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
hwlmStreamingControl &stream_control) {
// refuse to compile if we are forced to have smaller than minimum
// history required for long-literal support, full stop
// otherwise, choose the maximum of the preferred history quantity
// (currently a fairly extravagant 32) or the already used history
// quantity - subject to the limitation of stream_control.history_max
const size_t MIN_HISTORY_REQUIRED = 32;
if (MIN_HISTORY_REQUIRED > stream_control.history_max) {
throw std::logic_error("Cannot set history to minimum history required");
}
size_t max_len =
MIN(stream_control.history_max,
MAX(MIN_HISTORY_REQUIRED, stream_control.history_min));
assert(max_len >= MIN_HISTORY_REQUIRED);
size_t max_mask_len = maxMaskLen(lits);
vector<hwlmLiteral> long_lits;
if (!setupLongLits(lits, long_lits, max_len) || false) {
// "Don't need to do anything" path, not really a fail
DEBUG_PRINTF("Streaming literal path produces no table\n");
// we want enough history to manage the longest literal and the longest
// mask.
stream_control.literal_history_required =
max(maxLen(lits), max_mask_len) - 1;
stream_control.literal_stream_state_required = 0;
return {nullptr, size_t{0}};
}
// Ensure that we have enough room for the longest mask.
if (max_mask_len) {
max_len = max(max_len, max_mask_len - 1);
}
u32 boundary[MAX_MODES];
u32 positions[MAX_MODES];
u32 hashEntries[MAX_MODES];
analyzeLits(long_lits, max_len, boundary, positions, hashEntries);
// first assess the size and find our caseless threshold
size_t headerSize = ROUNDUP_16(sizeof(FDRSTableHeader));
size_t litTabOffset = headerSize;
size_t litTabNumEntries = long_lits.size() + 1;
size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(FDRSLiteral));
size_t wholeLitTabOffset = litTabOffset + litTabSize;
size_t totalWholeLitTabSize = ROUNDUP_16(positions[CASEFUL] +
positions[CASELESS]);
size_t htOffset[MAX_MODES];
size_t htSize[MAX_MODES];
htOffset[CASEFUL] = wholeLitTabOffset + totalWholeLitTabSize;
htSize[CASEFUL] = hashEntries[CASEFUL] * sizeof(FDRSHashEntry);
htOffset[CASELESS] = htOffset[CASEFUL] + htSize[CASEFUL];
htSize[CASELESS] = hashEntries[CASELESS] * sizeof(FDRSHashEntry);
size_t tabSize = ROUNDUP_16(htOffset[CASELESS] + htSize[CASELESS]);
// need to add +2 to both of these to allow space for the actual largest
// value as well as handling the fact that we add one to the space when
// storing out a position to allow zero to mean "no stream state value"
u8 streamBits[MAX_MODES];
streamBits[CASEFUL] = lg2(roundUpToPowerOfTwo(positions[CASEFUL] + 2));
streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2));
u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8;
auto secondaryTable = aligned_zmalloc_unique<u8>(tabSize);
assert(secondaryTable); // otherwise would have thrown std::bad_alloc
// then fill it in
u8 * ptr = secondaryTable.get();
FDRSTableHeader * header = (FDRSTableHeader *)ptr;
// fill in header
header->pseudoEngineID = (u32)0xffffffff;
header->N = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
header->boundary[m] = boundary[m];
header->hashOffset[m] = verify_u32(htOffset[m]);
header->hashNBits[m] = lg2(hashEntries[m]);
header->streamStateBits[m] = streamBits[m];
}
assert(tot_state_bytes < sizeof(u64a));
header->streamStateBytes = verify_u8(tot_state_bytes); // u8
ptr += headerSize;
// now fill in the rest
FDRSLiteral * litTabPtr = (FDRSLiteral *)ptr;
ptr += litTabSize;
map<u32, u32> litToOffsetVal;
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
u32 entry = verify_u32(i - long_lits.begin());
u32 offset = verify_u32(ptr - secondaryTable.get());
// point the table entry to the string location
litTabPtr[entry].offset = offset;
litToOffsetVal[entry] = offset;
// copy the string into the string location
memcpy(ptr, i->s.c_str(), i->s.size());
ptr += i->s.size(); // and the string location
}
// fill in final lit table entry with current ptr (serves as end value)
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable.get());
// fill hash tables
ptr = secondaryTable.get() + htOffset[CASEFUL];
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m],
(Modes)m, litToOffsetVal);
ptr += htSize[m];
}
// tell the world what we did
stream_control.literal_history_required = max_len;
stream_control.literal_stream_state_required = tot_state_bytes;
return {move(secondaryTable), tabSize};
}
} // namespace ue2

View File

@ -1,152 +0,0 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_STREAMING_INTERNAL_H
#define FDR_STREAMING_INTERNAL_H
#include "ue2common.h"
#include "fdr_internal.h"
#include "util/unaligned.h"
// tertiary table:
// a header (FDRSTableHeader)
// long_lits.size()+1 entries holding an offset to the string in the
// 'whole literal table' (FDRSLiteral structure)
// the whole literal table - every string packed in (freeform)
// hash table (caseful) (FDRSHashEntry)
// hash table (caseless) (FDRSHashEntry)
enum Modes {
CASEFUL = 0,
CASELESS = 1,
MAX_MODES = 2
};
// We have one of these structures hanging off the 'link' of our secondary
// FDR table that handles streaming strings
struct FDRSTableHeader {
u32 pseudoEngineID; // set to 0xffffffff to indicate this isn't an FDR
// string id one beyond the maximum entry for this type of literal
// boundary[CASEFUL] is the end of the caseful literals
// boundary[CASELESS] is the end of the caseless literals and one beyond
// the largest literal id (the size of the littab)
u32 boundary[MAX_MODES];
// offsets are 0 if no such table exists
// offset from the base of the tertiary structure to the hash table
u32 hashOffset[MAX_MODES];
u32 hashNBits[MAX_MODES]; // lg2 of the size of the hash table
u8 streamStateBits[MAX_MODES];
u8 streamStateBytes; // total size of packed stream state in bytes
u8 N; // prefix lengths
u16 pad;
};
// One of these structures per literal entry in our secondary FDR table.
struct FDRSLiteral {
u32 offset;
// potentially - another u32 to point to the 'next lesser included literal'
// which would be a literal that overlaps this one in such a way that a
// failure to match _this_ literal can leave us in a state that we might
// still match that literal. Offset information might also be called for,
// in which case we might be wanting to use a FDRSLiteralOffset
};
typedef u32 FDRSLiteralOffset;
#define LINK_INVALID 0xffffffff
// One of these structures per hash table entry in our secondary FDR table
struct FDRSHashEntry {
u64a bitfield;
FDRSLiteralOffset state;
u32 link;
};
static really_inline
u32 get_start_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
return m == CASEFUL ? 0 : h->boundary[m-1];
}
static really_inline
u32 get_end_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
return h->boundary[m];
}
static really_inline
const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) {
return (const struct FDRSLiteral *) (((const u8 *)h) +
ROUNDUP_16(sizeof(struct FDRSTableHeader)));
}
static really_inline
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, enum Modes m) {
return getLitTab(h)[get_start_lit_idx(h, m)].offset;
}
static really_inline
u32 packStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
return v - getBaseOffsetOfLits(h, m) + 1;
}
static really_inline
u32 unpackStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
return v + getBaseOffsetOfLits(h, m) - 1;
}
static really_inline
u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) {
return (ent->bitfield >> bit) & 0x1;
}
static really_inline
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, enum Modes mode) {
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
assert(len >= 32);
u64a v1 = unaligned_load_u64a(ptr);
u64a v2 = unaligned_load_u64a(ptr + 8);
u64a v3 = unaligned_load_u64a(ptr + 16);
if (mode == CASELESS) {
v1 &= CASEMASK;
v2 &= CASEMASK;
v3 &= CASEMASK;
}
v1 *= MULTIPLIER;
v2 *= (MULTIPLIER*MULTIPLIER);
v3 *= (MULTIPLIER*MULTIPLIER*MULTIPLIER);
v1 >>= 32;
v2 >>= 32;
v3 >>= 32;
return v1 ^ v2 ^ v3;
}
#endif

View File

@ -1,368 +0,0 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_STREAMING_RUNTIME_H
#define FDR_STREAMING_RUNTIME_H
#include "fdr_streaming_internal.h"
#include "util/partial_store.h"
#include <string.h>
static really_inline
const struct FDRSTableHeader * getSHDR(const struct FDR * fdr) {
const u8 * linkPtr = ((const u8 *)fdr) + fdr->link;
// test if it's not really a engineID, but a 'pseudo engine id'
assert(*(const u32 *)linkPtr == 0xffffffff);
assert(linkPtr);
return (const struct FDRSTableHeader *)linkPtr;
}
// Reads from stream state and unpacks values into stream state table.
static really_inline
void getStreamStates(const struct FDRSTableHeader * streamingTable,
const u8 * stream_state, u32 * table) {
assert(streamingTable);
assert(stream_state);
assert(table);
u8 ss_bytes = streamingTable->streamStateBytes;
u8 ssb = streamingTable->streamStateBits[CASEFUL];
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
#if defined(ARCH_32_BIT)
// On 32-bit hosts, we may be able to avoid having to do any u64a
// manipulation at all.
if (ss_bytes <= 4) {
u32 ssb_mask = (1U << ssb) - 1;
u32 streamVal = partial_load_u32(stream_state, ss_bytes);
table[CASEFUL] = (u32)(streamVal & ssb_mask);
table[CASELESS] = (u32)(streamVal >> ssb);
return;
}
#endif
u64a ssb_mask = (1ULL << ssb) - 1;
u64a streamVal = partial_load_u64a(stream_state, ss_bytes);
table[CASEFUL] = (u32)(streamVal & ssb_mask);
table[CASELESS] = (u32)(streamVal >> (u64a)ssb);
}
#ifndef NDEBUG
// Defensive checking (used in assert) that these table values don't overflow
// outside the range available.
static really_inline UNUSED
u32 streamingTableOverflow(u32 * table, u8 ssb, u8 ssb_nc) {
u32 ssb_mask = (1ULL << (ssb)) - 1;
if (table[CASEFUL] & ~ssb_mask) {
return 1;
}
u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
if (table[CASELESS] & ~ssb_nc_mask) {
return 1;
}
return 0;
}
#endif
// Reads from stream state table and packs values into stream state.
static really_inline
void setStreamStates(const struct FDRSTableHeader * streamingTable,
u8 * stream_state, u32 * table) {
assert(streamingTable);
assert(stream_state);
assert(table);
u8 ss_bytes = streamingTable->streamStateBytes;
u8 ssb = streamingTable->streamStateBits[CASEFUL];
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
assert(!streamingTableOverflow(table, ssb, ssb_nc));
#if defined(ARCH_32_BIT)
// On 32-bit hosts, we may be able to avoid having to do any u64a
// manipulation at all.
if (ss_bytes <= 4) {
u32 stagingStreamState = table[CASEFUL];
stagingStreamState |= (table[CASELESS] << ssb);
partial_store_u32(stream_state, stagingStreamState, ss_bytes);
return;
}
#endif
u64a stagingStreamState = (u64a)table[CASEFUL];
stagingStreamState |= (u64a)table[CASELESS] << ((u64a)ssb);
partial_store_u64a(stream_state, stagingStreamState, ss_bytes);
}
u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) {
if (!stream_state) {
return 0;
}
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
u8 ss_bytes = streamingTable->streamStateBytes;
// We just care if there are any bits set, and the test below is faster
// than a partial_load_u64a (especially on 32-bit hosts).
for (u32 i = 0; i < ss_bytes; i++) {
if (*stream_state) {
return 1;
}
++stream_state;
}
return 0;
}
// binary search for the literal index that contains the current state
static really_inline
u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable,
u32 stateValue, enum Modes m) {
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 lo = get_start_lit_idx(streamingTable, m);
u32 hi = get_end_lit_idx(streamingTable, m);
// Now move stateValue back by one so that we're looking for the
// litTab entry that includes it the string, not the one 'one past' it
stateValue -= 1;
assert(lo != hi);
assert(litTab[lo].offset <= stateValue);
assert(litTab[hi].offset > stateValue);
// binary search to find the entry e such that:
// litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral
while (lo + 1 < hi) {
u32 mid = (lo + hi) / 2;
if (litTab[mid].offset <= stateValue) {
lo = mid;
} else { //(litTab[mid].offset > stateValue) {
hi = mid;
}
}
assert(litTab[lo].offset <= stateValue);
assert(litTab[hi].offset > stateValue);
return lo;
}
static really_inline
void fdrUnpackStateMode(struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
const struct FDRSLiteral * litTab,
const u32 *state_table,
const enum Modes m) {
if (!state_table[m]) {
return;
}
u32 stateValue = unpackStateVal(streamingTable, m, state_table[m]);
u32 idx = findLitTabEntry(streamingTable, stateValue, m);
size_t found_offset = litTab[idx].offset;
const u8 * found_buf = found_offset + (const u8 *)streamingTable;
size_t found_sz = stateValue - found_offset;
if (m == CASEFUL) {
a->buf_history = found_buf;
a->len_history = found_sz;
} else {
a->buf_history_nocase = found_buf;
a->len_history_nocase = found_sz;
}
}
static really_inline
void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a,
const u8 * stream_state) {
// nothing to do if there's no stream state for the case
if (!stream_state) {
return;
}
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 state_table[MAX_MODES];
getStreamStates(streamingTable, stream_state, state_table);
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASEFUL);
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASELESS);
}
static really_inline
u32 do_single_confirm(const struct FDRSTableHeader *streamingTable,
const struct FDR_Runtime_Args *a, u32 hashState,
enum Modes m) {
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 idx = findLitTabEntry(streamingTable, hashState, m);
size_t found_offset = litTab[idx].offset;
const u8 * s1 = found_offset + (const u8 *)streamingTable;
assert(hashState > found_offset);
size_t l1 = hashState - found_offset;
const u8 * buf = a->buf;
size_t len = a->len;
const char nocase = m != CASEFUL;
if (l1 > len) {
const u8 * hist = nocase ? a->buf_history_nocase : a->buf_history;
size_t hist_len = nocase ? a->len_history_nocase : a->len_history;
if (l1 > len+hist_len) {
return 0; // Break out - not enough total history
}
size_t overhang = l1 - len;
assert(overhang <= hist_len);
if (cmpForward(hist + hist_len - overhang, s1, overhang, nocase)) {
return 0;
}
s1 += overhang;
l1 -= overhang;
}
// if we got here, we don't need history or we compared ok out of history
assert(l1 <= len);
if (cmpForward(buf + len - l1, s1, l1, nocase)) {
return 0;
}
return hashState; // our new state
}
static really_inline
void fdrFindStreamingHash(const struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
u8 hash_len, u32 *hashes) {
u8 tempbuf[128];
const u8 *base;
if (hash_len > a->len) {
assert(hash_len <= 128);
size_t overhang = hash_len - a->len;
assert(overhang <= a->len_history);
memcpy(tempbuf, a->buf_history + a->len_history - overhang, overhang);
memcpy(tempbuf + overhang, a->buf, a->len);
base = tempbuf;
} else {
assert(hash_len <= a->len);
base = a->buf + a->len - hash_len;
}
if (streamingTable->hashNBits[CASEFUL]) {
hashes[CASEFUL] = streaming_hash(base, hash_len, CASEFUL);
}
if (streamingTable->hashNBits[CASELESS]) {
hashes[CASELESS] = streaming_hash(base, hash_len, CASELESS);
}
}
static really_inline
const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
u32 h, const enum Modes m) {
u32 nbits = streamingTable->hashNBits[m];
if (!nbits) {
return NULL;
}
u32 h_ent = h & ((1 << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
const struct FDRSHashEntry *tab =
(const struct FDRSHashEntry *)((const u8 *)streamingTable
+ streamingTable->hashOffset[m]);
const struct FDRSHashEntry *ent = tab + h_ent;
if (!has_bit(ent, h_low)) {
return NULL;
}
return ent;
}
static really_inline
void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
const struct FDRSHashEntry *ent, const enum Modes m) {
assert(ent);
assert(streamingTable->hashNBits[m]);
const struct FDRSHashEntry *tab =
(const struct FDRSHashEntry *)((const u8 *)streamingTable
+ streamingTable->hashOffset[m]);
while (1) {
u32 tmp = 0;
if ((tmp = do_single_confirm(streamingTable, a, ent->state, m))) {
state_table[m] = packStateVal(streamingTable, m, tmp);
break;
}
if (ent->link == LINK_INVALID) {
break;
}
ent = tab + ent->link;
}
}
static really_inline
void fdrPackState(const struct FDR *fdr, const struct FDR_Runtime_Args *a,
u8 *stream_state) {
// nothing to do if there's no stream state for the case
if (!stream_state) {
return;
}
// get pointers to the streamer FDR and the tertiary structure
const struct FDRSTableHeader *streamingTable = getSHDR(fdr);
assert(streamingTable->N);
u32 state_table[MAX_MODES] = {0, 0};
// if we don't have enough history, we don't need to do anything
if (streamingTable->N <= a->len + a->len_history) {
u32 hashes[MAX_MODES] = {0, 0};
fdrFindStreamingHash(a, streamingTable, streamingTable->N, hashes);
const struct FDRSHashEntry *ent_ful = getEnt(streamingTable,
hashes[CASEFUL], CASEFUL);
const struct FDRSHashEntry *ent_less = getEnt(streamingTable,
hashes[CASELESS], CASELESS);
if (ent_ful) {
fdrPackStateMode(state_table, a, streamingTable, ent_ful,
CASEFUL);
}
if (ent_less) {
fdrPackStateMode(state_table, a, streamingTable, ent_less,
CASELESS);
}
}
setStreamStates(streamingTable, stream_state, state_table);
}
#endif

View File

@ -51,6 +51,7 @@ Grey::Grey(void) :
allowLbr(true),
allowMcClellan(true),
allowSheng(true),
allowMcSheng(true),
allowPuff(true),
allowLiteral(true),
allowRose(true),
@ -217,6 +218,7 @@ void applyGreyOverrides(Grey *g, const string &s) {
G_UPDATE(allowLbr);
G_UPDATE(allowMcClellan);
G_UPDATE(allowSheng);
G_UPDATE(allowMcSheng);
G_UPDATE(allowPuff);
G_UPDATE(allowLiteral);
G_UPDATE(allowRose);

View File

@ -51,6 +51,7 @@ struct Grey {
bool allowLbr;
bool allowMcClellan;
bool allowSheng;
bool allowMcSheng;
bool allowPuff;
bool allowLiteral;
bool allowRose;

View File

@ -192,6 +192,14 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
return HS_COMPILER_ERROR;
}
#if defined(FAT_RUNTIME)
if (!check_ssse3()) {
*db = nullptr;
*comp_error = generateCompileError("Unsupported architecture", -1);
return HS_ARCH_ERROR;
}
#endif
if (!checkMode(mode, comp_error)) {
*db = nullptr;
assert(*comp_error); // set by checkMode.
@ -319,6 +327,13 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
return HS_COMPILER_ERROR;
}
#if defined(FAT_RUNTIME)
if (!check_ssse3()) {
*error = generateCompileError("Unsupported architecture", -1);
return HS_ARCH_ERROR;
}
#endif
if (!info) {
*error = generateCompileError("Invalid parameter: info is NULL", -1);
return HS_COMPILER_ERROR;
@ -426,6 +441,11 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform) {
extern "C" HS_PUBLIC_API
hs_error_t hs_free_compile_error(hs_compile_error_t *error) {
#if defined(FAT_RUNTIME)
if (!check_ssse3()) {
return HS_ARCH_ERROR;
}
#endif
freeCompileError(error);
return HS_SUCCESS;
}

View File

@ -435,6 +435,23 @@ hs_error_t hs_set_stream_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
*/
const char *hs_version(void);
/**
* Utility function to test the current system architecture.
*
* Hyperscan requires the Supplemental Streaming SIMD Extensions 3 instruction
* set. This function can be called on any x86 platform to determine if the
* system provides the required instruction set.
*
* This function does not test for more advanced features if Hyperscan has
* been built for a more specific architecture, for example the AVX2
* instruction set.
*
* @return
* @ref HS_SUCCESS on success, @ref HS_ARCH_ERROR if system does not
* support Hyperscan.
*/
hs_error_t hs_valid_platform(void);
/**
* @defgroup HS_ERROR hs_error_t values
*
@ -519,6 +536,17 @@ const char *hs_version(void);
*/
#define HS_SCRATCH_IN_USE (-10)
/**
* Unsupported CPU architecture.
*
* This error is returned when Hyperscan is able to detect that the current
* system does not support the required instruction set.
*
* At a minimum, Hyperscan requires Supplemental Streaming SIMD Extensions 3
* (SSSE3).
*/
#define HS_ARCH_ERROR (-11)
/** @} */
#ifdef __cplusplus

40
src/hs_valid_platform.c Normal file
View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "hs_common.h"
#include "util/cpuid_flags.h"
HS_PUBLIC_API
hs_error_t hs_valid_platform(void) {
/* Hyperscan requires SSSE3, anything else is a bonus */
if (check_ssse3()) {
return HS_SUCCESS;
} else {
return HS_ARCH_ERROR;
}
}

View File

@ -200,8 +200,7 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
size_t len, size_t start, HWLMCallback cb,
void *ctxt, hwlm_group_t groups,
u8 *stream_state) {
void *ctxt, hwlm_group_t groups) {
const u8 *hbuf = scratch->core_info.hbuf;
const size_t hlen = scratch->core_info.hlen;
const u8 *buf = scratch->core_info.buf;
@ -234,13 +233,10 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
aa = &t->accel1;
}
// if no active stream state, use acceleration
if (!fdrStreamStateActive(HWLM_C_DATA(t), stream_state)) {
do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
}
do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
start);
return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len,
start, cb, ctxt, groups, stream_state);
start, cb, ctxt, groups);
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -132,8 +132,7 @@ hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len,
hwlm_error_t hwlmExecStreaming(const struct HWLM *tab,
struct hs_scratch *scratch, size_t len,
size_t start, HWLMCallback callback,
void *context, hwlm_group_t groups,
u8 *stream_state);
void *context, hwlm_group_t groups);
#ifdef __cplusplus
} /* extern "C" */

View File

@ -461,7 +461,8 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
}
const CharReach &cr = reach[min_offset];
if (shuftiBuildMasks(cr, &aux->shufti.lo, &aux->shufti.hi) != -1) {
if (-1 !=
shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) {
DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n",
describeClass(cr).c_str(), cr.count(), min_offset);
aux->shufti.accel_type = ACCEL_SHUFTI;
@ -469,7 +470,7 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
return;
}
truffleBuildMasks(cr, &aux->truffle.mask1, &aux->truffle.mask2);
truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2);
DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n",
describeClass(cr).c_str(), cr.count(), min_offset);
aux->truffle.accel_type = ACCEL_TRUFFLE;
@ -523,7 +524,7 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
}
if (stream_control) { // nullptr if in block mode
if (lits.front().s.length() + 1 > stream_control->history_max) {
if (lits.front().s.length() > stream_control->history_max + 1) {
DEBUG_PRINTF("length of %zu too long for history max %zu\n",
lits.front().s.length(),
stream_control->history_max);
@ -552,6 +553,12 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
if (stream_control) {
assert(stream_control->history_min <= stream_control->history_max);
// We should not have been passed any literals that are too long to
// match with a maximally-sized history buffer.
assert(all_of(begin(lits), end(lits), [&](const hwlmLiteral &lit) {
return lit.s.length() <= stream_control->history_max + 1;
}));
}
// Check that we haven't exceeded the maximum number of literals.
@ -602,7 +609,6 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
stream_control->literal_history_required = lit.s.length() - 1;
assert(stream_control->literal_history_required
<= stream_control->history_max);
stream_control->literal_stream_state_required = 0;
}
eng = move(noodle);
} else {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -63,10 +63,6 @@ struct hwlmStreamingControl {
/** \brief OUT parameter: History required by the literal matcher to
* correctly match all literals. */
size_t literal_history_required;
/** OUT parameter: Stream state required by literal matcher in bytes. Can
* be zero, and generally will be small (0-8 bytes). */
size_t literal_stream_state_required;
};
/** \brief Build an \ref HWLM literal matcher runtime structure for a group of

View File

@ -86,6 +86,7 @@ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
const vector<u8> &msk_in, const vector<u8> &cmp_in)
: s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
groups(groups_in), msk(msk_in), cmp(cmp_in) {
assert(s.size() <= HWLM_LITERAL_MAX_LEN);
assert(msk.size() <= HWLM_MASKLEN);
assert(msk.size() == cmp.size());

View File

@ -41,6 +41,9 @@
namespace ue2 {
/** \brief Max length of the literal passed to HWLM. */
#define HWLM_LITERAL_MAX_LEN 255
/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
#define HWLM_MASKLEN 8

22
src/nfa/accel_dfa_build_strat.cpp Executable file → Normal file
View File

@ -56,15 +56,6 @@ struct path {
};
};
static UNUSED
string describeClasses(const vector<CharReach> &v) {
std::ostringstream oss;
for (const auto &cr : v) {
describeClass(oss, cr);
}
return oss.str();
}
static
void dump_paths(const vector<path> &paths) {
for (UNUSED const auto &p : paths) {
@ -482,9 +473,10 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
}
if (double_byte_ok(info) &&
shuftiBuildDoubleMasks(info.double_cr, info.double_byte,
&accel->dshufti.lo1, &accel->dshufti.hi1,
&accel->dshufti.lo2, &accel->dshufti.hi2)) {
shuftiBuildDoubleMasks(
info.double_cr, info.double_byte, (u8 *)&accel->dshufti.lo1,
(u8 *)&accel->dshufti.hi1, (u8 *)&accel->dshufti.lo2,
(u8 *)&accel->dshufti.hi2)) {
accel->accel_type = ACCEL_DSHUFTI;
accel->dshufti.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
@ -520,14 +512,16 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
}
accel->accel_type = ACCEL_SHUFTI;
if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo, &accel->shufti.hi)) {
if (-1 != shuftiBuildMasks(info.cr, (u8 *)&accel->shufti.lo,
(u8 *)&accel->shufti.hi)) {
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
return;
}
assert(!info.cr.none());
accel->accel_type = ACCEL_TRUFFLE;
truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2);
truffleBuildMasks(info.cr, (u8 *)&accel->truffle.mask1,
(u8 *)&accel->truffle.mask2);
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
}

0
src/nfa/accel_dfa_build_strat.h Executable file → Normal file
View File

View File

@ -41,7 +41,7 @@
#include "util/charreach.h"
#include "util/dump_charclass.h"
#include "util/dump_mask.h"
#include "util/simd_utils.h"
#include "util/simd_types.h"
#include <cstdio>
#include <vector>
@ -147,16 +147,20 @@ const char *accelName(u8 accel_type) {
}
static
void dumpShuftiCharReach(FILE *f, const m128 &lo, const m128 &hi) {
void dumpShuftiCharReach(FILE *f, const u8 *lo, const u8 *hi) {
CharReach cr = shufti2cr(lo, hi);
fprintf(f, "count %zu class %s\n", cr.count(),
describeClass(cr).c_str());
}
static
vector<CharReach> shufti2cr_array(const m128 lo_in, const m128 hi_in) {
const u8 *lo = (const u8 *)&lo_in;
const u8 *hi = (const u8 *)&hi_in;
vector<CharReach> dshufti2cr_array(const u8 *lo_in, const u8 *hi_in) {
u8 lo[16];
u8 hi[16];
for (u32 i = 0; i < 16; i++) {
lo[i] = ~lo_in[i];
hi[i] = ~hi_in[i];
}
vector<CharReach> crs(8);
for (u32 i = 0; i < 256; i++) {
u32 combined = lo[(u8)i & 0xf] & hi[(u8)i >> 4];
@ -169,10 +173,10 @@ vector<CharReach> shufti2cr_array(const m128 lo_in, const m128 hi_in) {
}
static
void dumpDShuftiCharReach(FILE *f, const m128 &lo1, const m128 &hi1,
const m128 &lo2, const m128 &hi2) {
vector<CharReach> cr1 = shufti2cr_array(not128(lo1), not128(hi1));
vector<CharReach> cr2 = shufti2cr_array(not128(lo2), not128(hi2));
void dumpDShuftiCharReach(FILE *f, const u8 *lo1, const u8 *hi1,
const u8 *lo2, const u8 *hi2) {
vector<CharReach> cr1 = dshufti2cr_array(lo1, hi1);
vector<CharReach> cr2 = dshufti2cr_array(lo2, hi2);
map<CharReach, set<u32> > cr1_group;
assert(cr1.size() == 8 && cr2.size() == 8);
for (u32 i = 0; i < 8; i++) {
@ -208,26 +212,22 @@ void dumpDShuftiCharReach(FILE *f, const m128 &lo1, const m128 &hi1,
}
static
void dumpShuftiMasks(FILE *f, const m128 &lo, const m128 &hi) {
fprintf(f, "lo %s\n",
dumpMask((const u8 *)&lo, 128).c_str());
fprintf(f, "hi %s\n",
dumpMask((const u8 *)&hi, 128).c_str());
void dumpShuftiMasks(FILE *f, const u8 *lo, const u8 *hi) {
fprintf(f, "lo %s\n", dumpMask(lo, 128).c_str());
fprintf(f, "hi %s\n", dumpMask(hi, 128).c_str());
}
static
void dumpTruffleCharReach(FILE *f, const m128 &hiset, const m128 &hiclear) {
void dumpTruffleCharReach(FILE *f, const u8 *hiset, const u8 *hiclear) {
CharReach cr = truffle2cr(hiset, hiclear);
fprintf(f, "count %zu class %s\n", cr.count(),
describeClass(cr).c_str());
}
static
void dumpTruffleMasks(FILE *f, const m128 &hiset, const m128 &hiclear) {
fprintf(f, "lo %s\n",
dumpMask((const u8 *)&hiset, 128).c_str());
fprintf(f, "hi %s\n",
dumpMask((const u8 *)&hiclear, 128).c_str());
void dumpTruffleMasks(FILE *f, const u8 *hiset, const u8 *hiclear) {
fprintf(f, "lo %s\n", dumpMask(hiset, 128).c_str());
fprintf(f, "hi %s\n", dumpMask(hiclear, 128).c_str());
}
@ -256,23 +256,31 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) {
break;
case ACCEL_SHUFTI: {
fprintf(f, "\n");
dumpShuftiMasks(f, accel.shufti.lo, accel.shufti.hi);
dumpShuftiCharReach(f, accel.shufti.lo, accel.shufti.hi);
dumpShuftiMasks(f, (const u8 *)&accel.shufti.lo,
(const u8 *)&accel.shufti.hi);
dumpShuftiCharReach(f, (const u8 *)&accel.shufti.lo,
(const u8 *)&accel.shufti.hi);
break;
}
case ACCEL_DSHUFTI:
fprintf(f, "\n");
fprintf(f, "mask 1\n");
dumpShuftiMasks(f, accel.dshufti.lo1, accel.dshufti.hi1);
dumpShuftiMasks(f, (const u8 *)&accel.dshufti.lo1,
(const u8 *)&accel.dshufti.hi1);
fprintf(f, "mask 2\n");
dumpShuftiMasks(f, accel.dshufti.lo2, accel.dshufti.hi2);
dumpDShuftiCharReach(f, accel.dshufti.lo1, accel.dshufti.hi1,
accel.dshufti.lo2, accel.dshufti.hi2);
dumpShuftiMasks(f, (const u8 *)&accel.dshufti.lo2,
(const u8 *)&accel.dshufti.hi2);
dumpDShuftiCharReach(f, (const u8 *)&accel.dshufti.lo1,
(const u8 *)&accel.dshufti.hi1,
(const u8 *)&accel.dshufti.lo2,
(const u8 *)&accel.dshufti.hi2);
break;
case ACCEL_TRUFFLE: {
fprintf(f, "\n");
dumpTruffleMasks(f, accel.truffle.mask1, accel.truffle.mask2);
dumpTruffleCharReach(f, accel.truffle.mask1, accel.truffle.mask2);
dumpTruffleMasks(f, (const u8 *)&accel.truffle.mask1,
(const u8 *)&accel.truffle.mask2);
dumpTruffleCharReach(f, (const u8 *)&accel.truffle.mask1,
(const u8 *)&accel.truffle.mask2);
break;
}
case ACCEL_MLVERM:
@ -297,28 +305,36 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) {
case ACCEL_MSSHUFTI:
case ACCEL_MSGSHUFTI:
fprintf(f, " len:%u\n", accel.mshufti.len);
dumpShuftiMasks(f, accel.mshufti.lo, accel.mshufti.hi);
dumpShuftiCharReach(f, accel.mshufti.lo, accel.mshufti.hi);
dumpShuftiMasks(f, (const u8 *)&accel.mshufti.lo,
(const u8 *)&accel.mshufti.hi);
dumpShuftiCharReach(f, (const u8 *)&accel.mshufti.lo,
(const u8 *)&accel.mshufti.hi);
break;
case ACCEL_MDSSHUFTI:
case ACCEL_MDSGSHUFTI:
fprintf(f, " len1:%u len2:%u\n", accel.mdshufti.len1, accel.mdshufti.len2);
dumpShuftiMasks(f, accel.mdshufti.lo, accel.mdshufti.hi);
dumpShuftiCharReach(f, accel.mdshufti.lo, accel.mdshufti.hi);
dumpShuftiMasks(f, (const u8 *)&accel.mdshufti.lo,
(const u8 *)&accel.mdshufti.hi);
dumpShuftiCharReach(f, (const u8 *)&accel.mdshufti.lo,
(const u8 *)&accel.mdshufti.hi);
break;
case ACCEL_MLTRUFFLE:
case ACCEL_MLGTRUFFLE:
case ACCEL_MSTRUFFLE:
case ACCEL_MSGTRUFFLE:
fprintf(f, " len:%u\n", accel.mtruffle.len);
dumpTruffleMasks(f, accel.mtruffle.mask1, accel.mtruffle.mask2);
dumpTruffleCharReach(f, accel.mtruffle.mask1, accel.mtruffle.mask2);
dumpTruffleMasks(f, (const u8 *)&accel.mtruffle.mask1,
(const u8 *)&accel.mtruffle.mask2);
dumpTruffleCharReach(f, (const u8 *)&accel.mtruffle.mask1,
(const u8 *)&accel.mtruffle.mask2);
break;
case ACCEL_MDSTRUFFLE:
case ACCEL_MDSGTRUFFLE:
fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2);
dumpTruffleMasks(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2);
dumpTruffleCharReach(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2);
dumpTruffleMasks(f, (const u8 *)&accel.mdtruffle.mask1,
(const u8 *)&accel.mdtruffle.mask2);
dumpTruffleCharReach(f, (const u8 *)&accel.mdtruffle.mask1,
(const u8 *)&accel.mdtruffle.mask2);
break;
default:
fprintf(f, "\n");

View File

@ -72,8 +72,8 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
}
DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
if (-1 != shuftiBuildMasks(info.single_stops, &aux->shufti.lo,
&aux->shufti.hi)) {
if (-1 != shuftiBuildMasks(info.single_stops, (u8 *)&aux->shufti.lo,
(u8 *)&aux->shufti.hi)) {
aux->accel_type = ACCEL_SHUFTI;
aux->shufti.offset = offset;
DEBUG_PRINTF("shufti built OK\n");
@ -86,8 +86,8 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
aux->accel_type = ACCEL_TRUFFLE;
aux->truffle.offset = offset;
truffleBuildMasks(info.single_stops, &aux->truffle.mask1,
&aux->truffle.mask2);
truffleBuildMasks(info.single_stops, (u8 *)&aux->truffle.mask1,
(u8 *)&aux->truffle.mask2);
return;
}
@ -212,9 +212,10 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
" two-byte literals\n", outs1, outs2);
aux->accel_type = ACCEL_DSHUFTI;
aux->dshufti.offset = offset;
if (shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2,
&aux->dshufti.lo1, &aux->dshufti.hi1,
&aux->dshufti.lo2, &aux->dshufti.hi2)) {
if (shuftiBuildDoubleMasks(
info.double_stop1, info.double_stop2, (u8 *)&aux->dshufti.lo1,
(u8 *)&aux->dshufti.hi1, (u8 *)&aux->dshufti.lo2,
(u8 *)&aux->dshufti.hi2)) {
return;
}
}
@ -372,8 +373,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
switch (info.ma_type) {
case MultibyteAccelInfo::MAT_LONG:
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
&aux->mshufti.hi) == -1) {
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
(u8 *)&aux->mshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MLSHUFTI;
@ -381,8 +382,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
aux->mshufti.len = info.ma_len1;
return;
case MultibyteAccelInfo::MAT_LONGGRAB:
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
&aux->mshufti.hi) == -1) {
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
(u8 *)&aux->mshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MLGSHUFTI;
@ -390,8 +391,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
aux->mshufti.len = info.ma_len1;
return;
case MultibyteAccelInfo::MAT_SHIFT:
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
&aux->mshufti.hi) == -1) {
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
(u8 *)&aux->mshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MSSHUFTI;
@ -399,8 +400,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
aux->mshufti.len = info.ma_len1;
return;
case MultibyteAccelInfo::MAT_SHIFTGRAB:
if (shuftiBuildMasks(stops, &aux->mshufti.lo,
&aux->mshufti.hi) == -1) {
if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo,
(u8 *)&aux->mshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MSGSHUFTI;
@ -408,8 +409,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
aux->mshufti.len = info.ma_len1;
return;
case MultibyteAccelInfo::MAT_DSHIFT:
if (shuftiBuildMasks(stops, &aux->mdshufti.lo,
&aux->mdshufti.hi) == -1) {
if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo,
(u8 *)&aux->mdshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MDSSHUFTI;
@ -418,8 +419,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
aux->mdshufti.len2 = info.ma_len2;
return;
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
if (shuftiBuildMasks(stops, &aux->mdshufti.lo,
&aux->mdshufti.hi) == -1) {
if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo,
(u8 *)&aux->mdshufti.hi) == -1) {
break;
}
aux->accel_type = ACCEL_MDSGSHUFTI;
@ -441,45 +442,45 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) {
aux->accel_type = ACCEL_MLTRUFFLE;
aux->mtruffle.offset = offset;
aux->mtruffle.len = info.ma_len1;
truffleBuildMasks(stops, &aux->mtruffle.mask1,
&aux->mtruffle.mask2);
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
(u8 *)&aux->mtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_LONGGRAB:
aux->accel_type = ACCEL_MLGTRUFFLE;
aux->mtruffle.offset = offset;
aux->mtruffle.len = info.ma_len1;
truffleBuildMasks(stops, &aux->mtruffle.mask1,
&aux->mtruffle.mask2);
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
(u8 *)&aux->mtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_SHIFT:
aux->accel_type = ACCEL_MSTRUFFLE;
aux->mtruffle.offset = offset;
aux->mtruffle.len = info.ma_len1;
truffleBuildMasks(stops, &aux->mtruffle.mask1,
&aux->mtruffle.mask2);
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
(u8 *)&aux->mtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_SHIFTGRAB:
aux->accel_type = ACCEL_MSGTRUFFLE;
aux->mtruffle.offset = offset;
aux->mtruffle.len = info.ma_len1;
truffleBuildMasks(stops, &aux->mtruffle.mask1,
&aux->mtruffle.mask2);
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
(u8 *)&aux->mtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_DSHIFT:
aux->accel_type = ACCEL_MDSTRUFFLE;
aux->mdtruffle.offset = offset;
aux->mdtruffle.len1 = info.ma_len1;
aux->mdtruffle.len2 = info.ma_len2;
truffleBuildMasks(stops, &aux->mtruffle.mask1,
&aux->mdtruffle.mask2);
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
(u8 *)&aux->mdtruffle.mask2);
break;
case MultibyteAccelInfo::MAT_DSHIFTGRAB:
aux->accel_type = ACCEL_MDSGTRUFFLE;
aux->mdtruffle.offset = offset;
aux->mdtruffle.len1 = info.ma_len1;
aux->mdtruffle.len2 = info.ma_len2;
truffleBuildMasks(stops, &aux->mtruffle.mask1,
&aux->mdtruffle.mask2);
truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1,
(u8 *)&aux->mdtruffle.mask2);
break;
default:
// shouldn't happen

View File

@ -745,10 +745,10 @@ void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) {
}
static really_inline
char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
enum MatchMode mode) {
char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end,
enum MatchMode mode) {
assert(n && q);
assert(n->type == CASTLE_NFA_0);
assert(n->type == CASTLE_NFA);
DEBUG_PRINTF("state=%p, streamState=%p\n", q->state, q->streamState);
@ -856,14 +856,14 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end,
return mmbit_any_precise(active, c->numRepeats);
}
char nfaExecCastle0_Q(const struct NFA *n, struct mq *q, s64a end) {
char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end) {
DEBUG_PRINTF("entry\n");
return nfaExecCastle0_Q_i(n, q, end, CALLBACK_OUTPUT);
return nfaExecCastle_Q_i(n, q, end, CALLBACK_OUTPUT);
}
char nfaExecCastle0_Q2(const struct NFA *n, struct mq *q, s64a end) {
char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end) {
DEBUG_PRINTF("entry\n");
return nfaExecCastle0_Q_i(n, q, end, STOP_AT_MATCH);
return nfaExecCastle_Q_i(n, q, end, STOP_AT_MATCH);
}
static
@ -896,9 +896,9 @@ s64a castleLastKillLoc(const struct Castle *c, struct mq *q) {
return sp - 1; /* the repeats are never killed */
}
char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) {
char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) {
assert(n && q);
assert(n->type == CASTLE_NFA_0);
assert(n->type == CASTLE_NFA);
DEBUG_PRINTF("entry\n");
if (q->cur == q->end) {
@ -959,9 +959,9 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) {
return 1;
}
char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q) {
char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q) {
assert(n && q);
assert(n->type == CASTLE_NFA_0);
assert(n->type == CASTLE_NFA);
DEBUG_PRINTF("entry\n");
const struct Castle *c = getImplNfa(n);
@ -969,19 +969,19 @@ char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q) {
return 0;
}
char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
struct mq *q) {
char nfaExecCastle_inAccept(const struct NFA *n, ReportID report,
struct mq *q) {
assert(n && q);
assert(n->type == CASTLE_NFA_0);
assert(n->type == CASTLE_NFA);
DEBUG_PRINTF("entry\n");
const struct Castle *c = getImplNfa(n);
return castleInAccept(c, q, report, q_cur_offset(q));
}
char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) {
char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q) {
assert(n && q);
assert(n->type == CASTLE_NFA_0);
assert(n->type == CASTLE_NFA);
DEBUG_PRINTF("entry\n");
const struct Castle *c = getImplNfa(n);
@ -1019,9 +1019,9 @@ char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) {
}
char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
char nfaExecCastle_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
assert(n && q);
assert(n->type == CASTLE_NFA_0);
assert(n->type == CASTLE_NFA);
DEBUG_PRINTF("entry\n");
const struct Castle *c = getImplNfa(n);
@ -1038,10 +1038,10 @@ char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
return 0;
}
char nfaExecCastle0_initCompressedState(const struct NFA *n, UNUSED u64a offset,
void *state, UNUSED u8 key) {
char nfaExecCastle_initCompressedState(const struct NFA *n, UNUSED u64a offset,
void *state, UNUSED u8 key) {
assert(n && state);
assert(n->type == CASTLE_NFA_0);
assert(n->type == CASTLE_NFA);
DEBUG_PRINTF("entry\n");
const struct Castle *c = getImplNfa(n);
@ -1070,10 +1070,10 @@ void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx,
repeatPack(packed, info, rctrl, offset);
}
char nfaExecCastle0_queueCompressState(const struct NFA *n, const struct mq *q,
s64a loc) {
char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q,
s64a loc) {
assert(n && q);
assert(n->type == CASTLE_NFA_0);
assert(n->type == CASTLE_NFA);
DEBUG_PRINTF("entry, loc=%lld\n", loc);
const struct Castle *c = getImplNfa(n);
@ -1118,11 +1118,10 @@ void subCastleExpandState(const struct Castle *c, const u32 subIdx,
packed + info->packedCtrlSize, offset));
}
char nfaExecCastle0_expandState(const struct NFA *n, void *dest,
const void *src, u64a offset,
UNUSED u8 key) {
char nfaExecCastle_expandState(const struct NFA *n, void *dest, const void *src,
u64a offset, UNUSED u8 key) {
assert(n && dest && src);
assert(n->type == CASTLE_NFA_0);
assert(n->type == CASTLE_NFA);
DEBUG_PRINTF("entry, src=%p, dest=%p, offset=%llu\n", src, dest, offset);
const struct Castle *c = getImplNfa(n);

View File

@ -38,24 +38,24 @@ extern "C" {
struct mq;
struct NFA;
char nfaExecCastle0_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecCastle0_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
char nfaExecCastle0_queueCompressState(const struct NFA *nfa,
const struct mq *q, s64a loc);
char nfaExecCastle0_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecCastle_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecCastle_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecCastle_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
char nfaExecCastle_queueCompressState(const struct NFA *nfa, const struct mq *q,
s64a loc);
char nfaExecCastle_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecCastle0_testEOD NFA_API_NO_IMPL
#define nfaExecCastle0_B_Reverse NFA_API_NO_IMPL
#define nfaExecCastle0_zombie_status NFA_API_ZOMBIE_NO_IMPL
#define nfaExecCastle_testEOD NFA_API_NO_IMPL
#define nfaExecCastle_B_Reverse NFA_API_NO_IMPL
#define nfaExecCastle_zombie_status NFA_API_ZOMBIE_NO_IMPL
#ifdef __cplusplus
}

View File

@ -40,18 +40,18 @@
#include "shufticompile.h"
#include "trufflecompile.h"
#include "util/charreach.h"
#include "util/dump_util.h"
#include "util/dump_charclass.h"
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
namespace ue2 {
/* Note: No dot files for castle */
void nfaExecCastle0_dumpDot(const struct NFA *, FILE *,
UNUSED const std::string &base) {
// No GraphViz output for Castles.
}
using namespace std;
namespace ue2 {
static
void dumpTextSubCastle(const SubCastle &sub, FILE *f) {
@ -68,9 +68,11 @@ void dumpTextSubCastle(const SubCastle &sub, FILE *f) {
fprintf(f, "\n");
}
void nfaExecCastle0_dumpText(const struct NFA *nfa, FILE *f) {
void nfaExecCastle_dump(const struct NFA *nfa, const string &base) {
const Castle *c = (const Castle *)getImplNfa(nfa);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
fprintf(f, "Castle multi-tenant repeat engine\n");
fprintf(f, "\n");
fprintf(f, "Number of repeat tenants: %u\n", c->numRepeats);
@ -86,13 +88,15 @@ void nfaExecCastle0_dumpText(const struct NFA *nfa, FILE *f) {
fprintf(f, "negated verm, scanning for 0x%02x\n", c->u.verm.c);
break;
case CASTLE_SHUFTI: {
const CharReach cr = shufti2cr(c->u.shuf.mask_lo, c->u.shuf.mask_hi);
const CharReach cr = shufti2cr((const u8 *)&c->u.shuf.mask_lo,
(const u8 *)&c->u.shuf.mask_hi);
fprintf(f, "shufti, scanning for %s (%zu chars)\n",
describeClass(cr).c_str(), cr.count());
break;
}
case CASTLE_TRUFFLE: {
const CharReach cr = truffle2cr(c->u.truffle.mask1, c->u.truffle.mask2);
const CharReach cr = truffle2cr((const u8 *)&c->u.truffle.mask1,
(const u8 *)&c->u.truffle.mask2);
fprintf(f, "truffle, scanning for %s (%zu chars)\n",
describeClass(cr).c_str(), cr.count());
break;
@ -113,6 +117,7 @@ void nfaExecCastle0_dumpText(const struct NFA *nfa, FILE *f) {
fprintf(f, "Sub %u:\n", i);
dumpTextSubCastle(sub[i], f);
}
fclose(f);
}
} // namespace ue2

View File

@ -31,16 +31,13 @@
#if defined(DUMP_SUPPORT)
#include <cstdio>
#include <string>
struct NFA;
namespace ue2 {
void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file,
const std::string &base);
void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file);
void nfaExecCastle_dump(const NFA *nfa, const std::string &base);
} // namespace ue2

View File

@ -49,7 +49,6 @@
#include "util/graph.h"
#include "util/make_unique.h"
#include "util/multibit_build.h"
#include "util/multibit_internal.h"
#include "util/report_manager.h"
#include "util/ue2_containers.h"
#include "util/verify_types.h"
@ -58,6 +57,7 @@
#include <stack>
#include <cassert>
#include <boost/graph/adjacency_list.hpp>
#include <boost/range/adaptor/map.hpp>
using namespace std;
@ -100,13 +100,15 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) {
return;
}
if (shuftiBuildMasks(negated, &c->u.shuf.mask_lo, &c->u.shuf.mask_hi) != -1) {
if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo,
(u8 *)&c->u.shuf.mask_hi) != -1) {
c->type = CASTLE_SHUFTI;
return;
}
c->type = CASTLE_TRUFFLE;
truffleBuildMasks(negated, &c->u.truffle.mask1, &c->u.truffle.mask2);
truffleBuildMasks(negated, (u8 *)(u8 *)&c->u.truffle.mask1,
(u8 *)&c->u.truffle.mask2);
}
static
@ -576,7 +578,7 @@ buildCastle(const CastleProto &proto,
total_size += byte_length(stale_iter); // stale sparse iter
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
nfa->type = verify_u8(CASTLE_NFA_0);
nfa->type = verify_u8(CASTLE_NFA);
nfa->length = verify_u32(total_size);
nfa->nPositions = verify_u32(subs.size());
nfa->streamStateSize = streamStateSize;
@ -903,8 +905,8 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
u32 min_bound = pr.bounds.min; // always finite
if (min_bound == 0) { // Vacuous case, we can only do this once.
assert(!edge(g.start, g.accept, g).second);
NFAEdge e = add_edge(g.start, g.accept, g).first;
g[e].top = top;
NFAEdge e = add_edge(g.start, g.accept, g);
g[e].tops.insert(top);
g[u].reports.insert(pr.reports.begin(), pr.reports.end());
min_bound = 1;
}
@ -912,9 +914,9 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
for (u32 i = 0; i < min_bound; i++) {
NFAVertex v = add_vertex(g);
g[v].char_reach = pr.reach;
NFAEdge e = add_edge(u, v, g).first;
NFAEdge e = add_edge(u, v, g);
if (u == g.start) {
g[e].top = top;
g[e].tops.insert(top);
}
u = v;
}
@ -931,9 +933,9 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
if (head != u) {
add_edge(head, v, g);
}
NFAEdge e = add_edge(u, v, g).first;
NFAEdge e = add_edge(u, v, g);
if (u == g.start) {
g[e].top = top;
g[e].tops.insert(top);
}
u = v;
}
@ -978,15 +980,10 @@ unique_ptr<NGHolder> makeHolder(const CastleProto &proto,
auto g = ue2::make_unique<NGHolder>(proto.kind);
for (const auto &m : proto.repeats) {
if (m.first >= NFA_MAX_TOP_MASKS) {
DEBUG_PRINTF("top %u too big for an NFA\n", m.first);
return nullptr;
}
addToHolder(*g, m.first, m.second);
}
//dumpGraph("castle_holder.dot", g->g);
//dumpGraph("castle_holder.dot", *g);
// Sanity checks.
assert(allMatchStatesHaveReports(*g));

0
src/nfa/dfa_build_strat.cpp Executable file → Normal file
View File

View File

@ -655,12 +655,6 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
const u8 *cur_buf = sp < 0 ? hend : buffer;
char report = 1;
if (mode == CALLBACK_OUTPUT) {
/* we are starting inside the history buffer: matches are suppressed */
report = !(sp < 0);
}
if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
/* this is as far as we go */
q->cur--;
@ -691,8 +685,7 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
const u8 *final_look;
if (goughExec8_i_ni(m, som, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, &final_look,
report ? mode : NO_MATCHES)
offset + sp, cb, context, &final_look, mode)
== MO_HALT_MATCHING) {
*(u8 *)q->state = 0;
return 0;
@ -724,7 +717,6 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
if (sp == 0) {
cur_buf = buffer;
report = 1;
}
if (sp != ep) {
@ -789,12 +781,6 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
const u8 *cur_buf = sp < 0 ? hend : buffer;
char report = 1;
if (mode == CALLBACK_OUTPUT) {
/* we are starting inside the history buffer: matches are suppressed */
report = !(sp < 0);
}
assert(q->cur);
if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
/* this is as far as we go */
@ -822,10 +808,8 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
/* do main buffer region */
const u8 *final_look;
if (goughExec16_i_ni(m, som, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, &final_look,
report ? mode : NO_MATCHES)
offset + sp, cb, context, &final_look, mode)
== MO_HALT_MATCHING) {
assert(report);
*(u16 *)q->state = 0;
return 0;
}
@ -856,7 +840,6 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
if (sp == 0) {
cur_buf = buffer;
report = 1;
}
if (sp != ep) {

View File

@ -275,7 +275,7 @@ void dump_vars(const GoughGraph &g, const string &base, const Grey &grey) {
}
void dump(const GoughGraph &g, const string &base, const Grey &grey) {
if (!grey.dumpFlags) {
if (!(grey.dumpFlags & Grey::DUMP_INT_GRAPH)) {
return;
}
@ -311,9 +311,9 @@ void dump_block(FILE *f, const gough_edge_id &e,
}
}
void dump_blocks(const map<gough_edge_id, vector<gough_ins> > &blocks,
void dump_blocks(const map<gough_edge_id, vector<gough_ins>> &blocks,
const string &base, const Grey &grey) {
if (!grey.dumpFlags) {
if (!(grey.dumpFlags & Grey::DUMP_INT_GRAPH)) {
return;
}

View File

@ -37,6 +37,7 @@
#include "ue2common.h"
#include "util/charreach.h"
#include "util/dump_charclass.h"
#include "util/dump_util.h"
#include "util/unaligned.h"
#include <cctype>
@ -259,8 +260,8 @@ void dumpTransitions(const NFA *nfa, FILE *f,
fprintf(f, "\n");
}
void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f,
UNUSED const string &base) {
static
void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f) {
assert(nfa->type == GOUGH_NFA_8);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
@ -279,6 +280,7 @@ void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f,
fprintf(f, "}\n");
}
static
void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) {
assert(nfa->type == GOUGH_NFA_8);
@ -303,8 +305,8 @@ void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) {
dumpTextReverse(nfa, f);
}
void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f,
UNUSED const string &base) {
static
void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f) {
assert(nfa->type == GOUGH_NFA_16);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
@ -323,6 +325,7 @@ void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f,
fprintf(f, "}\n");
}
static
void nfaExecGough16_dumpText(const struct NFA *nfa, FILE *f) {
assert(nfa->type == GOUGH_NFA_16);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
@ -348,4 +351,24 @@ void nfaExecGough16_dumpText(const struct NFA *nfa, FILE *f) {
dumpTextReverse(nfa, f);
}
void nfaExecGough16_dump(const NFA *nfa, const string &base) {
assert(nfa->type == GOUGH_NFA_16);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
nfaExecGough16_dumpText(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
nfaExecGough16_dumpDot(nfa, f);
fclose(f);
}
void nfaExecGough8_dump(const NFA *nfa, const string &base) {
assert(nfa->type == GOUGH_NFA_8);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
nfaExecGough8_dumpText(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
nfaExecGough8_dumpDot(nfa, f);
fclose(f);
}
} // namespace ue2

View File

@ -39,12 +39,8 @@ struct NFA;
namespace ue2 {
void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file,
const std::string &base);
void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file,
const std::string &base);
void nfaExecGough8_dumpText(const NFA *nfa, FILE *file);
void nfaExecGough16_dumpText(const NFA *nfa, FILE *file);
void nfaExecGough8_dump(const NFA *nfa, const std::string &base);
void nfaExecGough16_dump(const NFA *nfa, const std::string &base);
} // namespace ue2

View File

@ -307,7 +307,7 @@ char lbrRevScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf,
UNUSED size_t begin, UNUSED size_t end,
UNUSED size_t *loc) {
assert(begin <= end);
assert(nfa->type == LBR_NFA_Dot);
assert(nfa->type == LBR_NFA_DOT);
// Nothing can kill a dot!
return 0;
}
@ -316,7 +316,7 @@ static really_inline
char lbrRevScanVerm(const struct NFA *nfa, const u8 *buf,
size_t begin, size_t end, size_t *loc) {
assert(begin <= end);
assert(nfa->type == LBR_NFA_Verm);
assert(nfa->type == LBR_NFA_VERM);
const struct lbr_verm *l = getImplNfa(nfa);
if (begin == end) {
@ -340,7 +340,7 @@ static really_inline
char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf,
size_t begin, size_t end, size_t *loc) {
assert(begin <= end);
assert(nfa->type == LBR_NFA_NVerm);
assert(nfa->type == LBR_NFA_NVERM);
const struct lbr_verm *l = getImplNfa(nfa);
if (begin == end) {
@ -365,7 +365,7 @@ char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf,
size_t begin, size_t end,
size_t *loc) {
assert(begin <= end);
assert(nfa->type == LBR_NFA_Shuf);
assert(nfa->type == LBR_NFA_SHUF);
const struct lbr_shuf *l = getImplNfa(nfa);
if (begin == end) {
@ -389,7 +389,7 @@ char lbrRevScanTruf(const struct NFA *nfa, const u8 *buf,
size_t begin, size_t end,
size_t *loc) {
assert(begin <= end);
assert(nfa->type == LBR_NFA_Truf);
assert(nfa->type == LBR_NFA_TRUF);
const struct lbr_truf *l = getImplNfa(nfa);
if (begin == end) {
@ -413,7 +413,7 @@ char lbrFwdScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf,
UNUSED size_t begin, UNUSED size_t end,
UNUSED size_t *loc) {
assert(begin <= end);
assert(nfa->type == LBR_NFA_Dot);
assert(nfa->type == LBR_NFA_DOT);
// Nothing can kill a dot!
return 0;
}
@ -422,7 +422,7 @@ static really_inline
char lbrFwdScanVerm(const struct NFA *nfa, const u8 *buf,
size_t begin, size_t end, size_t *loc) {
assert(begin <= end);
assert(nfa->type == LBR_NFA_Verm);
assert(nfa->type == LBR_NFA_VERM);
const struct lbr_verm *l = getImplNfa(nfa);
if (begin == end) {
@ -446,7 +446,7 @@ static really_inline
char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf,
size_t begin, size_t end, size_t *loc) {
assert(begin <= end);
assert(nfa->type == LBR_NFA_NVerm);
assert(nfa->type == LBR_NFA_NVERM);
const struct lbr_verm *l = getImplNfa(nfa);
if (begin == end) {
@ -471,7 +471,7 @@ char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf,
size_t begin, size_t end,
size_t *loc) {
assert(begin <= end);
assert(nfa->type == LBR_NFA_Shuf);
assert(nfa->type == LBR_NFA_SHUF);
const struct lbr_shuf *l = getImplNfa(nfa);
if (begin == end) {
@ -495,7 +495,7 @@ char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf,
size_t begin, size_t end,
size_t *loc) {
assert(begin <= end);
assert(nfa->type == LBR_NFA_Truf);
assert(nfa->type == LBR_NFA_TRUF);
const struct lbr_truf *l = getImplNfa(nfa);
if (begin == end) {

View File

@ -42,38 +42,17 @@
#include "trufflecompile.h"
#include "util/charreach.h"
#include "util/dump_charclass.h"
#include "util/dump_util.h"
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
/* Note: No dot files for LBR */
using namespace std;
namespace ue2 {
void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl
}
void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl
}
void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl
}
void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl
}
void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl
}
static
void lbrDumpCommon(const lbr_common *lc, FILE *f) {
const RepeatInfo *info
@ -88,60 +67,80 @@ void lbrDumpCommon(const lbr_common *lc, FILE *f) {
fprintf(f, "min period: %u\n", info->minPeriod);
}
void nfaExecLbrDot_dumpText(const NFA *nfa, FILE *f) {
void nfaExecLbrDot_dump(const NFA *nfa, const string &base) {
assert(nfa);
assert(nfa->type == LBR_NFA_Dot);
assert(nfa->type == LBR_NFA_DOT);
const lbr_dot *ld = (const lbr_dot *)getImplNfa(nfa);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
lbrDumpCommon(&ld->common, f);
fprintf(f, "DOT model\n");
fprintf(f, "\n");
dumpTextReverse(nfa, f);
fclose(f);
}
void nfaExecLbrVerm_dumpText(const NFA *nfa, FILE *f) {
void nfaExecLbrVerm_dump(const NFA *nfa, const string &base) {
assert(nfa);
assert(nfa->type == LBR_NFA_Verm);
assert(nfa->type == LBR_NFA_VERM);
const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
lbrDumpCommon(&lv->common, f);
fprintf(f, "VERM model, scanning for 0x%02x\n", lv->c);
fprintf(f, "\n");
dumpTextReverse(nfa, f);
fclose(f);
}
void nfaExecLbrNVerm_dumpText(const NFA *nfa, FILE *f) {
void nfaExecLbrNVerm_dump(const NFA *nfa, const string &base) {
assert(nfa);
assert(nfa->type == LBR_NFA_NVerm);
assert(nfa->type == LBR_NFA_NVERM);
const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
lbrDumpCommon(&lv->common, f);
fprintf(f, "NEGATED VERM model, scanning for 0x%02x\n", lv->c);
fprintf(f, "\n");
dumpTextReverse(nfa, f);
fclose(f);
}
void nfaExecLbrShuf_dumpText(const NFA *nfa, FILE *f) {
void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) {
assert(nfa);
assert(nfa->type == LBR_NFA_Shuf);
assert(nfa->type == LBR_NFA_SHUF);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa);
lbrDumpCommon(&ls->common, f);
CharReach cr = shufti2cr(ls->mask_lo, ls->mask_hi);
CharReach cr = shufti2cr((const u8 *)&ls->mask_lo,
(const u8 *)&ls->mask_hi);
fprintf(f, "SHUF model, scanning for: %s (%zu chars)\n",
describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
fprintf(f, "\n");
dumpTextReverse(nfa, f);
fclose(f);
}
void nfaExecLbrTruf_dumpText(const NFA *nfa, FILE *f) {
void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) {
assert(nfa);
assert(nfa->type == LBR_NFA_Truf);
assert(nfa->type == LBR_NFA_TRUF);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa);
lbrDumpCommon(&lt->common, f);
CharReach cr = truffle2cr(lt->mask1, lt->mask2);
CharReach cr = truffle2cr((const u8 *)&lt->mask1,
(const u8 *)&lt->mask2);
fprintf(f, "TRUFFLE model, scanning for: %s (%zu chars)\n",
describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
fprintf(f, "\n");
dumpTextReverse(nfa, f);
fclose(f);
}
} // namespace ue2

View File

@ -31,28 +31,17 @@
#ifdef DUMP_SUPPORT
#include <cstdio>
#include <string>
struct NFA;
namespace ue2 {
void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecLbrDot_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecLbrVerm_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecLbrNVerm_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecLbrTruf_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecLbrShuf_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecLbrDot_dump(const struct NFA *nfa, const std::string &base);
void nfaExecLbrVerm_dump(const struct NFA *nfa, const std::string &base);
void nfaExecLbrNVerm_dump(const struct NFA *nfa, const std::string &base);
void nfaExecLbrShuf_dump(const struct NFA *nfa, const std::string &base);
void nfaExecLbrTruf_dump(const struct NFA *nfa, const std::string &base);
} // namespace ue2

View File

@ -41,9 +41,7 @@ extern "C"
#define GENERATE_NFA_DUMP_DECL(gf_name) \
} /* extern "C" */ \
namespace ue2 { \
void gf_name##_dumpDot(const struct NFA *nfa, FILE *file, \
const std::string &base); \
void gf_name##_dumpText(const struct NFA *nfa, FILE *file); \
void gf_name##_dump(const struct NFA *nfa, const std::string &base); \
} /* namespace ue2 */ \
extern "C" {
@ -77,6 +75,7 @@ extern "C"
GENERATE_NFA_DUMP_DECL(gf_name)
GENERATE_NFA_DECL(nfaExecLimEx32)
GENERATE_NFA_DECL(nfaExecLimEx64)
GENERATE_NFA_DECL(nfaExecLimEx128)
GENERATE_NFA_DECL(nfaExecLimEx256)
GENERATE_NFA_DECL(nfaExecLimEx384)

73
src/nfa/limex_64.c Normal file
View File

@ -0,0 +1,73 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief LimEx NFA: 128-bit SIMD runtime implementations.
*/
/* Limex64 is unusual on as on 32 bit platforms, at runtime it uses an m128 for
* state calculations.
*/
//#define DEBUG_INPUT
//#define DEBUG_EXCEPTIONS
#include "limex.h"
#include "accel.h"
#include "limex_internal.h"
#include "nfa_internal.h"
#include "ue2common.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
// Common code
#define STATE_ON_STACK
#define ESTATE_ON_STACK
#include "limex_runtime.h"
#define SIZE 64
#define ENG_STATE_T u64a
#ifdef ARCH_64_BIT
#define STATE_T u64a
#define LOAD_FROM_ENG load_u64a
#else
#define STATE_T m128
#define LOAD_FROM_ENG load_m128_from_u64a
#endif
#include "limex_exceptional.h"
#include "limex_state_impl.h"
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#include "limex_runtime_impl.h"

View File

@ -78,10 +78,26 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
u32 idx = packedExtract32(s, accel);
u32 idx = pext32(s, accel);
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
#ifdef ARCH_64_BIT
size_t doAccel64(u64a s, u64a accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
u32 idx = pext64(s, accel);
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
#else
size_t doAccel64(m128 s, m128 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
u32 idx = pext64(movq(s), movq(accel));
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
#endif
size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex,
const u8 *accelTable, const union AccelAux *aux,
const u8 *input, size_t i, size_t end) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -40,6 +40,7 @@
#include "util/simd_utils.h" // for m128 etc
union AccelAux;
struct LimExNFA64;
struct LimExNFA128;
struct LimExNFA256;
struct LimExNFA384;
@ -49,6 +50,16 @@ size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end);
#ifdef ARCH_64_BIT
size_t doAccel64(u64a s, u64a accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end);
#else
size_t doAccel64(m128 s, m128 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end);
#endif
size_t doAccel128(const m128 *s, const struct LimExNFA128 *limex,
const u8 *accelTable, const union AccelAux *aux,
const u8 *input, size_t i, size_t end);

View File

@ -31,14 +31,14 @@
/* impl of limex functions which depend only on state size */
#if !defined(SIZE) || !defined(STATE_T) || !defined(INLINE_ATTR)
# error Must define SIZE and STATE_T and INLINE_ATTR in includer.
#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) \
|| !defined(INLINE_ATTR)
# error Must define SIZE, STATE_T, LOAD_FROM_ENG and INLINE_ATTR in includer.
#endif
#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
#define TESTEOD_FN JOIN(moNfaTestEod, SIZE)
#define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE)
#define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE)
#define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE)
#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE)
@ -46,12 +46,11 @@
#define INITIAL_FN JOIN(moNfaInitial, SIZE)
#define TOP_FN JOIN(moNfaTop, SIZE)
#define TOPN_FN JOIN(moNfaTopN, SIZE)
#define PROCESS_ACCEPTS_IMPL_FN JOIN(moProcessAcceptsImpl, SIZE)
#define PROCESS_ACCEPTS_FN JOIN(moProcessAccepts, SIZE)
#define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE)
#define CONTEXT_T JOIN(NFAContext, SIZE)
#define ONES_STATE JOIN(ones_, STATE_T)
#define LOAD_STATE JOIN(load_, STATE_T)
#define STORE_STATE JOIN(store_, STATE_T)
#define AND_STATE JOIN(and_, STATE_T)
#define OR_STATE JOIN(or_, STATE_T)
#define ANDNOT_STATE JOIN(andnot_, STATE_T)
@ -62,6 +61,20 @@
#define SQUASH_UNTUG_BR_FN JOIN(lazyTug, SIZE)
#define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE)
#if defined(ARCH_64_BIT) && (SIZE >= 64)
#define CHUNK_T u64a
#define FIND_AND_CLEAR_FN findAndClearLSB_64
#define POPCOUNT_FN popcount64
#define RANK_IN_MASK_FN rank_in_mask64
#else
#define CHUNK_T u32
#define FIND_AND_CLEAR_FN findAndClearLSB_32
#define POPCOUNT_FN popcount32
#define RANK_IN_MASK_FN rank_in_mask32
#endif
#define NUM_STATE_CHUNKS (sizeof(STATE_T) / sizeof(CHUNK_T))
static really_inline
void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex,
const union RepeatControl *repeat_ctrl,
@ -83,7 +96,7 @@ void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex,
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
u32 cyclicState = info->cyclicState;
if (!TESTBIT_STATE(accstate, cyclicState)) {
if (!TESTBIT_STATE(*accstate, cyclicState)) {
continue;
}
@ -100,70 +113,85 @@ void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex,
}
}
static never_inline
char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s,
const struct NFAAccept *acceptTable, u32 acceptCount,
u64a offset, NfaCallback callback, void *context) {
static really_inline
char PROCESS_ACCEPTS_IMPL_FN(const IMPL_NFA_T *limex, const STATE_T *s,
STATE_T *squash, const STATE_T *acceptMask,
const struct NFAAccept *acceptTable, u64a offset,
NfaCallback callback, void *context) {
assert(s);
assert(limex);
assert(callback);
assert(acceptCount);
// We have squash masks we might have to apply after firing reports.
STATE_T squash = ONES_STATE;
const STATE_T *squashMasks = (const STATE_T *)
((const char *)limex + limex->squashOffset);
const STATE_T accept_mask = *acceptMask;
STATE_T accepts = AND_STATE(*s, accept_mask);
for (u32 i = 0; i < acceptCount; i++) {
const struct NFAAccept *a = &acceptTable[i];
if (TESTBIT_STATE(s, a->state)) {
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
a->state, a->externalId, offset);
int rv = callback(0, offset, a->externalId, context);
// Caller must ensure that we have at least one accept state on.
assert(ISNONZERO_STATE(accepts));
CHUNK_T chunks[NUM_STATE_CHUNKS];
memcpy(chunks, &accepts, sizeof(accepts));
CHUNK_T mask_chunks[NUM_STATE_CHUNKS];
memcpy(mask_chunks, &accept_mask, sizeof(accept_mask));
u32 base_index = 0; // Cumulative sum of mask popcount up to current chunk.
for (u32 i = 0; i < NUM_STATE_CHUNKS; i++) {
CHUNK_T chunk = chunks[i];
while (chunk != 0) {
u32 bit = FIND_AND_CLEAR_FN(&chunk);
u32 local_idx = RANK_IN_MASK_FN(mask_chunks[i], bit);
u32 idx = local_idx + base_index;
const struct NFAAccept *a = &acceptTable[idx];
DEBUG_PRINTF("state %u: firing report list=%u, offset=%llu\n",
bit + i * (u32)sizeof(chunk) * 8, a->reports, offset);
int rv = limexRunAccept((const char *)limex, a, callback, context,
offset);
if (unlikely(rv == MO_HALT_MATCHING)) {
return 1;
}
if (a->squash != MO_INVALID_IDX) {
assert(a->squash < limex->squashCount);
const STATE_T *sq = &squashMasks[a->squash];
DEBUG_PRINTF("squash mask %u @ %p\n", a->squash, sq);
squash = AND_STATE(squash, LOAD_STATE(sq));
if (squash != NULL && a->squash != MO_INVALID_IDX) {
DEBUG_PRINTF("applying squash mask at offset %u\n", a->squash);
const ENG_STATE_T *sq =
(const ENG_STATE_T *)((const char *)limex + a->squash);
*squash = AND_STATE(*squash, LOAD_FROM_ENG(sq));
}
}
base_index += POPCOUNT_FN(mask_chunks[i]);
}
STORE_STATE(s, AND_STATE(LOAD_STATE(s), squash));
return 0;
}
static never_inline
char PROCESS_ACCEPTS_NOSQUASH_FN(const STATE_T *s,
const struct NFAAccept *acceptTable,
u32 acceptCount, u64a offset,
NfaCallback callback, void *context) {
assert(s);
assert(callback);
assert(acceptCount);
char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s,
const STATE_T *acceptMask,
const struct NFAAccept *acceptTable, u64a offset,
NfaCallback callback, void *context) {
// We have squash masks we might have to apply after firing reports.
STATE_T squash = ONES_STATE;
return PROCESS_ACCEPTS_IMPL_FN(limex, s, &squash, acceptMask, acceptTable,
offset, callback, context);
for (u32 i = 0; i < acceptCount; i++) {
const struct NFAAccept *a = &acceptTable[i];
if (TESTBIT_STATE(s, a->state)) {
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
a->state, a->externalId, offset);
int rv = callback(0, offset, a->externalId, context);
if (unlikely(rv == MO_HALT_MATCHING)) {
return 1;
}
}
}
return 0;
*s = AND_STATE(*s, squash);
}
// Run EOD accepts.
static never_inline
char PROCESS_ACCEPTS_NOSQUASH_FN(const IMPL_NFA_T *limex, const STATE_T *s,
const STATE_T *acceptMask,
const struct NFAAccept *acceptTable,
u64a offset, NfaCallback callback,
void *context) {
STATE_T *squash = NULL;
return PROCESS_ACCEPTS_IMPL_FN(limex, s, squash, acceptMask, acceptTable,
offset, callback, context);
}
// Run EOD accepts. Note that repeat_ctrl and repeat_state may be NULL if this
// LimEx contains no repeat structures.
static really_inline
char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s,
const union RepeatControl *repeat_ctrl,
const char *repeat_state, u64a offset, char do_br,
const char *repeat_state, u64a offset,
NfaCallback callback, void *context) {
assert(limex && s);
@ -172,47 +200,16 @@ char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s,
return MO_CONTINUE_MATCHING;
}
const STATE_T acceptEodMask = LOAD_STATE(&limex->acceptAtEOD);
STATE_T foundAccepts = AND_STATE(LOAD_STATE(s), acceptEodMask);
const STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD);
STATE_T foundAccepts = AND_STATE(*s, acceptEodMask);
if (do_br) {
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state,
offset + 1 /* EOD 'symbol' */, &foundAccepts);
} else {
assert(!limex->repeatCount);
}
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state,
offset + 1 /* EOD 'symbol' */, &foundAccepts);
if (unlikely(ISNONZERO_STATE(foundAccepts))) {
const struct NFAAccept *acceptEodTable = getAcceptEodTable(limex);
if (PROCESS_ACCEPTS_NOSQUASH_FN(&foundAccepts, acceptEodTable,
limex->acceptEodCount, offset, callback,
context)) {
return MO_HALT_MATCHING;
}
}
return MO_CONTINUE_MATCHING;
}
static really_inline
char TESTEOD_REV_FN(const IMPL_NFA_T *limex, const STATE_T *s, u64a offset,
NfaCallback callback, void *context) {
assert(limex && s);
// There may not be any EOD accepts in this NFA.
if (!limex->acceptEodCount) {
return MO_CONTINUE_MATCHING;
}
STATE_T acceptEodMask = LOAD_STATE(&limex->acceptAtEOD);
STATE_T foundAccepts = AND_STATE(LOAD_STATE(s), acceptEodMask);
assert(!limex->repeatCount);
if (unlikely(ISNONZERO_STATE(foundAccepts))) {
const struct NFAAccept *acceptEodTable = getAcceptEodTable(limex);
if (PROCESS_ACCEPTS_NOSQUASH_FN(&foundAccepts, acceptEodTable,
limex->acceptEodCount, offset, callback,
if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptEodMask,
acceptEodTable, offset, callback,
context)) {
return MO_HALT_MATCHING;
}
@ -228,8 +225,8 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) {
assert(q->state);
assert(q_cur_type(q) == MQE_START);
STATE_T s = LOAD_STATE(q->state);
STATE_T acceptMask = LOAD_STATE(&limex->accept);
STATE_T s = *(STATE_T *)q->state;
STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
STATE_T foundAccepts = AND_STATE(s, acceptMask);
if (unlikely(ISNONZERO_STATE(foundAccepts))) {
@ -238,8 +235,8 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) {
const struct NFAAccept *acceptTable = getAcceptTable(limex);
u64a offset = q_cur_offset(q);
if (PROCESS_ACCEPTS_NOSQUASH_FN(&foundAccepts, acceptTable,
limex->acceptCount, offset, q->cb,
if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptMask,
acceptTable, offset, q->cb,
q->context)) {
return MO_HALT_MATCHING;
}
@ -250,7 +247,7 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) {
static really_inline
STATE_T INITIAL_FN(const IMPL_NFA_T *impl, char onlyDs) {
return LOAD_STATE(onlyDs ? &impl->initDS : &impl->init);
return LOAD_FROM_ENG(onlyDs ? &impl->initDS : &impl->init);
}
static really_inline
@ -261,9 +258,9 @@ STATE_T TOP_FN(const IMPL_NFA_T *impl, char onlyDs, STATE_T state) {
static really_inline
STATE_T TOPN_FN(const IMPL_NFA_T *limex, STATE_T state, u32 n) {
assert(n < limex->topCount);
const STATE_T *topsptr =
(const STATE_T *)((const char *)limex + limex->topOffset);
STATE_T top = LOAD_STATE(&topsptr[n]);
const ENG_STATE_T *topsptr =
(const ENG_STATE_T *)((const char *)limex + limex->topOffset);
STATE_T top = LOAD_FROM_ENG(&topsptr[n]);
return OR_STATE(top, state);
}
@ -279,8 +276,8 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx,
DEBUG_PRINTF("expire estate at offset %llu\n", offset);
const STATE_T cyclics =
AND_STATE(LOAD_STATE(&ctx->s), LOAD_STATE(&limex->repeatCyclicMask));
const STATE_T cyclics
= AND_STATE(ctx->s, LOAD_FROM_ENG(&limex->repeatCyclicMask));
if (ISZERO_STATE(cyclics)) {
DEBUG_PRINTF("no cyclic states are on\n");
return;
@ -290,7 +287,7 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx,
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
u32 cyclicState = info->cyclicState;
if (!TESTBIT_STATE(&cyclics, cyclicState)) {
if (!TESTBIT_STATE(cyclics, cyclicState)) {
continue;
}
@ -310,14 +307,14 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx,
last_top, repeat->repeatMax);
u64a adj = 0;
/* if the cycle's tugs are active at repeat max, it is still alive */
if (TESTBIT_STATE((const STATE_T *)&limex->accept, cyclicState) ||
TESTBIT_STATE((const STATE_T *)&limex->acceptAtEOD, cyclicState)) {
if (TESTBIT_STATE(LOAD_FROM_ENG(&limex->accept), cyclicState) ||
TESTBIT_STATE(LOAD_FROM_ENG(&limex->acceptAtEOD), cyclicState)) {
DEBUG_PRINTF("lazy tug possible - may still be inspected\n");
adj = 1;
} else {
const STATE_T *tug_mask =
(const STATE_T *)((const char *)info + info->tugMaskOffset);
if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_STATE(tug_mask)))) {
const ENG_STATE_T *tug_mask =
(const ENG_STATE_T *)((const char *)info + info->tugMaskOffset);
if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_FROM_ENG(tug_mask)))) {
DEBUG_PRINTF("tug possible - may still be inspected\n");
adj = 1;
}
@ -339,37 +336,45 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
u64a offset, ReportID report) {
assert(limex);
const STATE_T acceptMask = LOAD_STATE(&limex->accept);
STATE_T accstate = AND_STATE(state, acceptMask);
const STATE_T accept_mask = LOAD_FROM_ENG(&limex->accept);
STATE_T accepts = AND_STATE(state, accept_mask);
// Are we in an accept state?
if (ISZERO_STATE(accstate)) {
if (ISZERO_STATE(accepts)) {
DEBUG_PRINTF("no accept states are on\n");
return 0;
}
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accstate);
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accepts);
DEBUG_PRINTF("looking for report %u\n", report);
#ifdef DEBUG
DEBUG_PRINTF("accept states that are on: ");
for (u32 i = 0; i < sizeof(STATE_T) * 8; i++) {
if (TESTBIT_STATE(&accstate, i)) printf("%u ", i);
}
printf("\n");
#endif
// Does one of our states match the given report ID?
const struct NFAAccept *acceptTable = getAcceptTable(limex);
for (u32 i = 0; i < limex->acceptCount; i++) {
const struct NFAAccept *a = &acceptTable[i];
DEBUG_PRINTF("checking idx=%u, externalId=%u\n", a->state,
a->externalId);
if (a->externalId == report && TESTBIT_STATE(&accstate, a->state)) {
DEBUG_PRINTF("report is on!\n");
return 1;
CHUNK_T chunks[NUM_STATE_CHUNKS];
memcpy(chunks, &accepts, sizeof(accepts));
CHUNK_T mask_chunks[NUM_STATE_CHUNKS];
memcpy(mask_chunks, &accept_mask, sizeof(accept_mask));
u32 base_index = 0; // Cumulative sum of mask popcount up to current chunk.
for (u32 i = 0; i < NUM_STATE_CHUNKS; i++) {
CHUNK_T chunk = chunks[i];
while (chunk != 0) {
u32 bit = FIND_AND_CLEAR_FN(&chunk);
u32 local_idx = RANK_IN_MASK_FN(mask_chunks[i], bit);
u32 idx = local_idx + base_index;
assert(idx < limex->acceptCount);
const struct NFAAccept *a = &acceptTable[idx];
DEBUG_PRINTF("state %u is on, report list at %u\n",
bit + i * (u32)sizeof(chunk) * 8, a->reports);
if (limexAcceptHasReport((const char *)limex, a, report)) {
DEBUG_PRINTF("report %u is on\n", report);
return 1;
}
}
base_index += POPCOUNT_FN(mask_chunks[i]);
}
return 0;
@ -381,7 +386,7 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
u64a offset) {
assert(limex);
const STATE_T acceptMask = LOAD_STATE(&limex->accept);
const STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
STATE_T accstate = AND_STATE(state, acceptMask);
// Are we in an accept state?
@ -396,7 +401,6 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
}
#undef TESTEOD_FN
#undef TESTEOD_REV_FN
#undef REPORTCURRENT_FN
#undef EXPIRE_ESTATE_FN
#undef LIMEX_INACCEPT_FN
@ -407,8 +411,6 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
#undef CONTEXT_T
#undef IMPL_NFA_T
#undef ONES_STATE
#undef LOAD_STATE
#undef STORE_STATE
#undef AND_STATE
#undef OR_STATE
#undef ANDNOT_STATE
@ -416,11 +418,14 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
#undef TESTBIT_STATE
#undef ISNONZERO_STATE
#undef ISZERO_STATE
#undef PROCESS_ACCEPTS_IMPL_FN
#undef PROCESS_ACCEPTS_FN
#undef PROCESS_ACCEPTS_NOSQUASH_FN
#undef SQUASH_UNTUG_BR_FN
#undef GET_NFA_REPEAT_INFO_FN
#undef SIZE
#undef STATE_T
#undef INLINE_ATTR
#undef CHUNK_T
#undef FIND_AND_CLEAR_FN
#undef POPCOUNT_FN
#undef RANK_IN_MASK_FN
#undef NUM_STATE_CHUNKS

View File

@ -37,10 +37,10 @@
#include "limex_internal.h"
#include "limex_limits.h"
#include "nfa_build_util.h"
#include "nfagraph/ng_dominators.h"
#include "nfagraph/ng_holder.h"
#include "nfagraph/ng_limex_accel.h"
#include "nfagraph/ng_repeat.h"
#include "nfagraph/ng_restructuring.h"
#include "nfagraph/ng_squash.h"
#include "nfagraph/ng_util.h"
#include "ue2common.h"
@ -64,12 +64,21 @@
#include <map>
#include <set>
#include <vector>
#include <boost/graph/breadth_first_search.hpp>
#include <boost/range/adaptor/map.hpp>
using namespace std;
using boost::adaptors::map_values;
namespace ue2 {
/**
* \brief Special state index value meaning that the vertex will not
* participate in an (NFA/DFA/etc) implementation.
*/
static constexpr u32 NO_STATE = ~0;
namespace {
struct precalcAccel {
@ -87,7 +96,7 @@ struct precalcAccel {
struct limex_accel_info {
ue2::unordered_set<NFAVertex> accelerable;
map<NFAStateSet, precalcAccel> precalc;
ue2::unordered_map<NFAVertex, flat_set<NFAVertex> > friends;
ue2::unordered_map<NFAVertex, flat_set<NFAVertex>> friends;
ue2::unordered_map<NFAVertex, AccelScheme> accel_map;
};
@ -130,7 +139,7 @@ struct build_info {
const vector<BoundedRepeatData> &ri,
const map<NFAVertex, NFAStateSet> &rsmi,
const map<NFAVertex, NFAStateSet> &smi,
const map<u32, NFAVertex> &ti, const set<NFAVertex> &zi,
const map<u32, set<NFAVertex>> &ti, const set<NFAVertex> &zi,
bool dai, bool sci, const CompileContext &cci,
u32 nsi)
: h(hi), state_ids(states_in), repeats(ri), tops(ti), zombies(zi),
@ -156,7 +165,7 @@ struct build_info {
map<NFAVertex, NFAStateSet> reportSquashMap;
map<NFAVertex, NFAStateSet> squashMap;
const map<u32, NFAVertex> &tops;
const map<u32, set<NFAVertex>> &tops;
ue2::unordered_set<NFAVertex> tugs;
map<NFAVertex, BoundedRepeatSummary> br_cyclic;
const set<NFAVertex> &zombies;
@ -485,7 +494,7 @@ void nfaFindAccelSchemes(const NGHolder &g,
// We want to skip any vertices that don't lead to at least one other
// (self-loops don't count) vertex.
if (!has_proper_successor(v, g)) {
DEBUG_PRINTF("skipping vertex %u\n", g[v].index);
DEBUG_PRINTF("skipping vertex %zu\n", g[v].index);
continue;
}
@ -493,7 +502,7 @@ void nfaFindAccelSchemes(const NGHolder &g,
AccelScheme as;
if (nfaCheckAccel(g, v, refined_cr, br_cyclic, &as, allow_wide)) {
DEBUG_PRINTF("graph vertex %u is accelerable with offset %u.\n",
DEBUG_PRINTF("graph vertex %zu is accelerable with offset %u.\n",
g[v].index, as.offset);
(*out)[v] = as;
}
@ -505,7 +514,7 @@ struct fas_visitor : public boost::default_bfs_visitor {
ue2::unordered_map<NFAVertex, AccelScheme> *out_in)
: accel_map(am_in), out(out_in) {}
void discover_vertex(NFAVertex v, const NFAGraph &) {
void discover_vertex(NFAVertex v, const NGHolder &) {
if (accel_map.find(v) != accel_map.end()) {
(*out)[v] = accel_map.find(v)->second;
}
@ -518,36 +527,40 @@ struct fas_visitor : public boost::default_bfs_visitor {
};
static
void filterAccelStates(NGHolder &g, const map<u32, NFAVertex> &tops,
void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops,
ue2::unordered_map<NFAVertex, AccelScheme> *accel_map) {
/* We want the NFA_MAX_ACCEL_STATES best acceleration states, everything
* else should be ditched. We use a simple BFS to choose accel states near
* the start. */
// Temporarily wire start to each top for the BFS.
vector<NFAEdge> topEdges;
wireStartToTops(g, tops, topEdges);
vector<NFAEdge> tempEdges;
for (const auto &vv : tops | map_values) {
for (NFAVertex v : vv) {
if (!edge(g.start, v, g).second) {
tempEdges.push_back(add_edge(g.start, v, g).first);
}
}
}
// Similarly, connect (start, startDs) if necessary.
if (!edge(g.start, g.startDs, g).second) {
auto e = add_edge(g.start, g.startDs, g).first;
topEdges.push_back(e); // Remove edge later.
NFAEdge e = add_edge(g.start, g.startDs, g);
tempEdges.push_back(e); // Remove edge later.
}
ue2::unordered_map<NFAVertex, AccelScheme> out;
try {
vector<boost::default_color_type> colour(num_vertices(g));
breadth_first_search(
g.g, g.start,
boost::breadth_first_search(g, g.start,
visitor(fas_visitor(*accel_map, &out))
.color_map(make_iterator_property_map(
colour.begin(), get(&NFAGraphVertexProps::index, g.g))));
.color_map(make_iterator_property_map(colour.begin(),
get(vertex_index, g))));
} catch (fas_visitor *) {
; /* found max accel_states */
}
remove_edges(topEdges, g);
remove_edges(tempEdges, g);
assert(out.size() <= NFA_MAX_ACCEL_STATES);
accel_map->swap(out);
@ -614,7 +627,7 @@ void fillAccelInfo(build_info &bi) {
/* for each subset of the accel keys need to find an accel scheme */
assert(astates.size() < 32);
sort(astates.begin(), astates.end(), make_index_ordering(g));
sort(astates.begin(), astates.end());
for (u32 i = 1, i_end = 1U << astates.size(); i < i_end; i++) {
DEBUG_PRINTF("saving info for accel %u\n", i);
@ -701,9 +714,157 @@ void fillAccelInfo(build_info &bi) {
/** The AccelAux structure has large alignment specified, and this makes some
* compilers do odd things unless we specify a custom allocator. */
typedef vector<AccelAux, AlignedAllocator<AccelAux, alignof(AccelAux)> >
typedef vector<AccelAux, AlignedAllocator<AccelAux, alignof(AccelAux)>>
AccelAuxVector;
#define IMPOSSIBLE_ACCEL_MASK (~0U)
static
u32 getEffectiveAccelStates(const build_info &args,
u32 active_accel_mask,
const vector<AccelBuild> &accelStates) {
/* accelStates is indexed by the acceleration bit index and contains a
* reference to the original vertex & state_id */
/* Cases to consider:
*
* 1: Accel states a and b are on and b can squash a
* --> we can ignore a. This will result in a no longer being accurately
* modelled - we may miss escapes turning it off and we may also miss
* its successors being activated.
*
* 2: Accel state b is on but accel state a is off and a is .* and must be
* seen before b is reached (and would not be covered by (1))
* --> if a is squashable (or may die unexpectedly) we should continue
* as is
* --> if a is not squashable we can treat this as a+b or as a no accel,
* impossible case
* --> this case could be extended to handle non dot reaches by
* effectively creating something similar to squash masks for the
* reverse graph
*
*
* Other cases:
*
* 3: Accel states a and b are on but have incompatible reaches
* --> we should treat this as an impossible case. Actually, this case
* is unlikely to arise as we pick states with wide reaches to
* accelerate so an empty intersection is unlikely.
*
* Note: we need to be careful when dealing with accel states corresponding
* to bounded repeat cyclics - they may 'turn off' based on a max bound and
* so we may still require on earlier states to be accurately modelled.
*/
const NGHolder &h = args.h;
auto dom_map = findDominators(h);
/* map from accel_id to mask of accel_ids that it is dominated by */
vector<u32> dominated_by(accelStates.size());
map<NFAVertex, u32> accel_id_map;
for (u32 accel_id = 0; accel_id < accelStates.size(); accel_id++) {
NFAVertex v = accelStates[accel_id].v;
accel_id_map[v] = accel_id;
}
/* Note: we want a slightly less strict defn of dominate as skip edges
* prevent .* 'truly' dominating */
for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) {
u32 accel_id = findAndClearLSB_32(&local_accel_mask);
assert(accel_id < accelStates.size());
NFAVertex v = accelStates[accel_id].v;
while (dom_map[v]) {
v = dom_map[v];
if (contains(accel_id_map, v)) {
dominated_by[accel_id] |= 1U << accel_id_map[v];
}
/* TODO: could also look at inv_adj vertices to handle fan-in */
for (NFAVertex a : adjacent_vertices_range(v, h)) {
if (a == v || !contains(accel_id_map, a)
|| a == accelStates[accel_id].v /* not likely */) {
continue;
}
if (!is_subset_of(h[v].reports, h[a].reports)) {
continue;
}
auto v_succ = succs(v, h);
auto a_succ = succs(a, h);
if (is_subset_of(v_succ, a_succ)) {
dominated_by[accel_id] |= 1U << accel_id_map[a];
}
}
}
}
u32 may_turn_off = 0; /* BR with max bound, non-dots, squashed, etc */
for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) {
u32 accel_id = findAndClearLSB_32(&local_accel_mask);
NFAVertex v = accelStates[accel_id].v;
u32 state_id = accelStates[accel_id].state;
assert(contains(args.accel.accelerable, v));
if (!h[v].char_reach.all()) {
may_turn_off |= 1U << accel_id;
continue;
}
if (contains(args.br_cyclic, v)
&& args.br_cyclic.at(v).repeatMax != depth::infinity()) {
may_turn_off |= 1U << accel_id;
continue;
}
for (const auto &s_mask : args.squashMap | map_values) {
if (!s_mask.test(state_id)) {
may_turn_off |= 1U << accel_id;
break;
}
}
for (const auto &s_mask : args.reportSquashMap | map_values) {
if (!s_mask.test(state_id)) {
may_turn_off |= 1U << accel_id;
break;
}
}
}
/* Case 1: */
u32 ignored = 0;
for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) {
u32 accel_id_b = findAndClearLSB_32(&local_accel_mask);
NFAVertex v = accelStates[accel_id_b].v;
if (!contains(args.squashMap, v)) {
continue;
}
assert(!contains(args.br_cyclic, v)
|| args.br_cyclic.at(v).repeatMax == depth::infinity());
NFAStateSet squashed = args.squashMap.at(v);
squashed.flip(); /* default sense for mask of survivors */
for (u32 local_accel_mask2 = active_accel_mask; local_accel_mask2; ) {
u32 accel_id_a = findAndClearLSB_32(&local_accel_mask2);
if (squashed.test(accelStates[accel_id_a].state)) {
ignored |= 1U << accel_id_a;
}
}
}
/* Case 2: */
for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) {
u32 accel_id = findAndClearLSB_32(&local_accel_mask);
u32 stuck_dominators = dominated_by[accel_id] & ~may_turn_off;
if ((stuck_dominators & active_accel_mask) != stuck_dominators) {
DEBUG_PRINTF("only %08x on, but we require %08x\n",
active_accel_mask, stuck_dominators);
return IMPOSSIBLE_ACCEL_MASK;
}
}
if (ignored) {
DEBUG_PRINTF("in %08x, ignoring %08x\n", active_accel_mask, ignored);
}
return active_accel_mask & ~ignored;
}
static
void buildAccel(const build_info &args, NFAStateSet &accelMask,
NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec,
@ -735,11 +896,22 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask,
// Set up a unioned AccelBuild for every possible combination of the set
// bits in accelStates.
vector<AccelBuild> accelOuts(accelCount);
vector<u32> effective_accel_set;
effective_accel_set.push_back(0); /* empty is effectively empty */
for (u32 i = 1; i < accelCount; i++) {
for (u32 j = 0, j_end = accelStates.size(); j < j_end; j++) {
if (i & (1U << j)) {
combineAccel(accelStates[j], accelOuts[i]);
}
u32 effective_i = getEffectiveAccelStates(args, i, accelStates);
effective_accel_set.push_back(effective_i);
if (effective_i == IMPOSSIBLE_ACCEL_MASK) {
DEBUG_PRINTF("this combination of accel states is not possible\n");
accelOuts[i].stop1 = CharReach::dot();
continue;
}
while (effective_i) {
u32 base_accel_state = findAndClearLSB_32(&effective_i);
combineAccel(accelStates[base_accel_state], accelOuts[i]);
}
minimiseAccel(accelOuts[i]);
}
@ -759,29 +931,32 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask,
for (u32 i = 1; i < accelCount; i++) {
memset(&aux, 0, sizeof(aux));
NFAStateSet states(args.num_states);
for (u32 j = 0; j < accelStates.size(); j++) {
if (i & (1U << j)) {
states.set(accelStates[j].state);
}
}
NFAStateSet effective_states(args.num_states);
u32 effective_i = effective_accel_set[i];
AccelInfo ainfo;
ainfo.double_offset = accelOuts[i].offset;
ainfo.double_stop1 = accelOuts[i].stop1;
ainfo.double_stop2 = accelOuts[i].stop2;
if (contains(accel.precalc, states)) {
const precalcAccel &precalc = accel.precalc.at(states);
if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) {
ainfo.ma_len1 = precalc.ma_info.len1;
ainfo.ma_len2 = precalc.ma_info.len2;
ainfo.multiaccel_offset = precalc.ma_info.offset;
ainfo.multiaccel_stops = precalc.ma_info.cr;
ainfo.ma_type = precalc.ma_info.type;
} else {
ainfo.single_offset = precalc.single_offset;
ainfo.single_stops = precalc.single_cr;
if (effective_i != IMPOSSIBLE_ACCEL_MASK) {
while (effective_i) {
u32 base_accel_id = findAndClearLSB_32(&effective_i);
effective_states.set(accelStates[base_accel_id].state);
}
if (contains(accel.precalc, effective_states)) {
const auto &precalc = accel.precalc.at(effective_states);
if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) {
ainfo.ma_len1 = precalc.ma_info.len1;
ainfo.ma_len2 = precalc.ma_info.len2;
ainfo.multiaccel_offset = precalc.ma_info.offset;
ainfo.multiaccel_stops = precalc.ma_info.cr;
ainfo.ma_type = precalc.ma_info.type;
} else {
ainfo.single_offset = precalc.single_offset;
ainfo.single_stops = precalc.single_cr;
}
}
}
@ -824,14 +999,105 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask,
}
static
void buildAccepts(const build_info &args, NFAStateSet &acceptMask,
NFAStateSet &acceptEodMask, vector<NFAAccept> &accepts,
vector<NFAAccept> &acceptsEod, vector<NFAStateSet> &squash) {
u32 addSquashMask(const build_info &args, const NFAVertex &v,
vector<NFAStateSet> &squash) {
auto sit = args.reportSquashMap.find(v);
if (sit == args.reportSquashMap.end()) {
return MO_INVALID_IDX;
}
// This state has a squash mask. Paw through the existing vector to
// see if we've already seen it, otherwise add a new one.
auto it = find(squash.begin(), squash.end(), sit->second);
if (it != squash.end()) {
return verify_u32(distance(squash.begin(), it));
}
u32 idx = verify_u32(squash.size());
squash.push_back(sit->second);
return idx;
}
static
u32 addReports(const flat_set<ReportID> &r, vector<ReportID> &reports,
unordered_map<vector<ReportID>, u32> &reportListCache) {
assert(!r.empty());
vector<ReportID> my_reports(begin(r), end(r));
my_reports.push_back(MO_INVALID_IDX); // sentinel
auto cache_it = reportListCache.find(my_reports);
if (cache_it != end(reportListCache)) {
u32 offset = cache_it->second;
DEBUG_PRINTF("reusing cached report list at %u\n", offset);
return offset;
}
auto it = search(begin(reports), end(reports), begin(my_reports),
end(my_reports));
if (it != end(reports)) {
u32 offset = verify_u32(distance(begin(reports), it));
DEBUG_PRINTF("reusing found report list at %u\n", offset);
return offset;
}
u32 offset = verify_u32(reports.size());
insert(&reports, reports.end(), my_reports);
reportListCache.emplace(move(my_reports), offset);
return offset;
}
static
void buildAcceptsList(const build_info &args,
unordered_map<vector<ReportID>, u32> &reports_cache,
vector<NFAVertex> &verts, vector<NFAAccept> &accepts,
vector<ReportID> &reports, vector<NFAStateSet> &squash) {
if (verts.empty()) {
return;
}
DEBUG_PRINTF("building accept lists for %zu states\n", verts.size());
auto cmp_state_id = [&args](NFAVertex a, NFAVertex b) {
u32 a_state = args.state_ids.at(a);
u32 b_state = args.state_ids.at(b);
assert(a_state != b_state || a == b);
return a_state < b_state;
};
sort(begin(verts), end(verts), cmp_state_id);
const NGHolder &h = args.h;
for (const auto &v : verts) {
DEBUG_PRINTF("state=%u, reports: [%s]\n", args.state_ids.at(v),
as_string_list(h[v].reports).c_str());
NFAAccept a;
memset(&a, 0, sizeof(a));
assert(!h[v].reports.empty());
if (h[v].reports.size() == 1) {
a.single_report = 1;
a.reports = *h[v].reports.begin();
} else {
a.single_report = 0;
a.reports = addReports(h[v].reports, reports, reports_cache);
}
a.squash = addSquashMask(args, v, squash);
accepts.push_back(move(a));
}
}
static
void buildAccepts(const build_info &args,
unordered_map<vector<ReportID>, u32> &reports_cache,
NFAStateSet &acceptMask, NFAStateSet &acceptEodMask,
vector<NFAAccept> &accepts, vector<NFAAccept> &acceptsEod,
vector<ReportID> &reports, vector<NFAStateSet> &squash) {
const NGHolder &h = args.h;
acceptMask.resize(args.num_states);
acceptEodMask.resize(args.num_states);
vector<NFAVertex> verts_accept, verts_accept_eod;
for (auto v : vertices_range(h)) {
u32 state_id = args.state_ids.at(v);
@ -839,41 +1105,20 @@ void buildAccepts(const build_info &args, NFAStateSet &acceptMask,
continue;
}
u32 squashMaskOffset = MO_INVALID_IDX;
auto sit = args.reportSquashMap.find(v);
if (sit != args.reportSquashMap.end()) {
// This state has a squash mask. Paw through the existing vector to
// see if we've already seen it, otherwise add a new one.
auto it = find(squash.begin(), squash.end(), sit->second);
if (it != squash.end()) {
squashMaskOffset = verify_u32(distance(squash.begin(), it));
} else {
squashMaskOffset = verify_u32(squash.size());
squash.push_back(sit->second);
}
}
// Add an accept (or acceptEod) per report ID.
vector<NFAAccept> *accepts_out;
if (edge(v, h.accept, h).second) {
acceptMask.set(state_id);
accepts_out = &accepts;
verts_accept.push_back(v);
} else {
assert(edge(v, h.acceptEod, h).second);
acceptEodMask.set(state_id);
accepts_out = &acceptsEod;
}
for (auto report : h[v].reports) {
accepts_out->push_back(NFAAccept());
NFAAccept &a = accepts_out->back();
a.state = state_id;
a.externalId = report;
a.squash = squashMaskOffset;
DEBUG_PRINTF("Accept: state=%u, externalId=%u\n", state_id, report);
verts_accept_eod.push_back(v);
}
}
buildAcceptsList(args, reports_cache, verts_accept, accepts, reports,
squash);
buildAcceptsList(args, reports_cache, verts_accept_eod, acceptsEod, reports,
squash);
}
static
@ -884,19 +1129,20 @@ void buildTopMasks(const build_info &args, vector<NFAStateSet> &topMasks) {
u32 numMasks = args.tops.rbegin()->first + 1; // max mask index
DEBUG_PRINTF("we have %u top masks\n", numMasks);
assert(numMasks <= NFA_MAX_TOP_MASKS);
topMasks.assign(numMasks, NFAStateSet(args.num_states)); // all zeroes
for (const auto &m : args.tops) {
u32 mask_idx = m.first;
u32 state_id = args.state_ids.at(m.second);
DEBUG_PRINTF("state %u is in top mask %u\n", state_id, mask_idx);
for (NFAVertex v : m.second) {
u32 state_id = args.state_ids.at(v);
DEBUG_PRINTF("state %u is in top mask %u\n", state_id, mask_idx);
assert(mask_idx < numMasks);
assert(state_id != NO_STATE);
assert(mask_idx < numMasks);
assert(state_id != NO_STATE);
topMasks[mask_idx].set(state_id);
topMasks[mask_idx].set(state_id);
}
}
}
@ -1146,36 +1392,12 @@ struct ExceptionProto {
}
};
static
u32 getReportListIndex(const flat_set<ReportID> &reports,
vector<ReportID> &exceptionReports,
map<vector<ReportID>, u32> &reportListCache) {
if (reports.empty()) {
return MO_INVALID_IDX;
}
const vector<ReportID> r(reports.begin(), reports.end());
auto it = reportListCache.find(r);
if (it != reportListCache.end()) {
u32 idx = it->second;
assert(idx < exceptionReports.size());
assert(equal(r.begin(), r.end(), exceptionReports.begin() + idx));
return idx;
}
u32 idx = verify_u32(exceptionReports.size());
reportListCache[r] = idx;
exceptionReports.insert(exceptionReports.end(), r.begin(), r.end());
exceptionReports.push_back(MO_INVALID_IDX); // terminator
return idx;
}
static
u32 buildExceptionMap(const build_info &args,
unordered_map<vector<ReportID>, u32> &reports_cache,
const ue2::unordered_set<NFAEdge> &exceptional,
map<ExceptionProto, vector<u32> > &exceptionMap,
vector<ReportID> &exceptionReports) {
map<ExceptionProto, vector<u32>> &exceptionMap,
vector<ReportID> &reportList) {
const NGHolder &h = args.h;
const u32 num_states = args.num_states;
u32 exceptionCount = 0;
@ -1193,10 +1415,6 @@ u32 buildExceptionMap(const build_info &args,
}
}
// We track report lists that have already been written into the global
// list in case we can reuse them.
map<vector<ReportID>, u32> reportListCache;
for (auto v : vertices_range(h)) {
const u32 i = args.state_ids.at(v);
@ -1215,8 +1433,12 @@ u32 buildExceptionMap(const build_info &args,
DEBUG_PRINTF("state %u is exceptional due to accept "
"(%zu reports)\n", i, reports.size());
e.reports_index =
getReportListIndex(reports, exceptionReports, reportListCache);
if (reports.empty()) {
e.reports_index = MO_INVALID_IDX;
} else {
e.reports_index =
addReports(reports, reportList, reports_cache);
}
// We may be applying a report squash too.
auto mi = args.reportSquashMap.find(v);
@ -1438,7 +1660,8 @@ struct Factory {
sizeof(limex->init), stateSize, repeatscratchStateSize,
repeatStreamState);
size_t scratchStateSize = sizeof(limex->init);
size_t scratchStateSize = NFATraits<dtype>::scratch_state_size;
if (repeatscratchStateSize) {
scratchStateSize
= ROUNDUP_N(scratchStateSize, alignof(RepeatControl));
@ -1641,9 +1864,10 @@ struct Factory {
}
static
void writeExceptions(const map<ExceptionProto, vector<u32> > &exceptionMap,
const vector<u32> &repeatOffsets,
implNFA_t *limex, const u32 exceptionsOffset) {
void writeExceptions(const map<ExceptionProto, vector<u32>> &exceptionMap,
const vector<u32> &repeatOffsets, implNFA_t *limex,
const u32 exceptionsOffset,
const u32 reportListOffset) {
DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset);
exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset);
@ -1670,7 +1894,12 @@ struct Factory {
exception_t &e = etable[ecount];
maskSetBits(e.squash, proto.squash_states);
maskSetBits(e.successors, proto.succ_states);
e.reports = proto.reports_index;
if (proto.reports_index == MO_INVALID_IDX) {
e.reports = MO_INVALID_IDX;
} else {
e.reports = reportListOffset +
proto.reports_index * sizeof(ReportID);
}
e.hasSquash = verify_u8(proto.squash);
e.trigger = verify_u8(proto.trigger);
u32 repeat_offset = proto.repeat_index == MO_INVALID_IDX
@ -1789,7 +2018,9 @@ struct Factory {
const vector<NFAAccept> &acceptsEod,
const vector<NFAStateSet> &squash, implNFA_t *limex,
const u32 acceptsOffset, const u32 acceptsEodOffset,
const u32 squashOffset) {
const u32 squashOffset, const u32 reportListOffset) {
char *limex_base = (char *)limex;
DEBUG_PRINTF("acceptsOffset=%u, acceptsEodOffset=%u, squashOffset=%u\n",
acceptsOffset, acceptsEodOffset, squashOffset);
@ -1797,27 +2028,39 @@ struct Factory {
maskSetBits(limex->accept, acceptMask);
maskSetBits(limex->acceptAtEOD, acceptEodMask);
// Transforms the indices (report list, squash mask) into offsets
// relative to the base of the limex.
auto transform_offset_fn = [&](NFAAccept a) {
if (!a.single_report) {
a.reports = reportListOffset + a.reports * sizeof(ReportID);
}
a.squash = squashOffset + a.squash * sizeof(tableRow_t);
return a;
};
// Write accept table.
limex->acceptOffset = acceptsOffset;
limex->acceptCount = verify_u32(accepts.size());
DEBUG_PRINTF("NFA has %zu accepts\n", accepts.size());
NFAAccept *acceptsTable = (NFAAccept *)((char *)limex + acceptsOffset);
NFAAccept *acceptsTable = (NFAAccept *)(limex_base + acceptsOffset);
assert(ISALIGNED(acceptsTable));
copy(accepts.begin(), accepts.end(), acceptsTable);
transform(accepts.begin(), accepts.end(), acceptsTable,
transform_offset_fn);
// Write eod accept table.
limex->acceptEodOffset = acceptsEodOffset;
limex->acceptEodCount = verify_u32(acceptsEod.size());
DEBUG_PRINTF("NFA has %zu EOD accepts\n", acceptsEod.size());
NFAAccept *acceptsEodTable = (NFAAccept *)((char *)limex + acceptsEodOffset);
NFAAccept *acceptsEodTable = (NFAAccept *)(limex_base + acceptsEodOffset);
assert(ISALIGNED(acceptsEodTable));
copy(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable);
transform(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable,
transform_offset_fn);
// Write squash mask table.
limex->squashCount = verify_u32(squash.size());
limex->squashOffset = squashOffset;
DEBUG_PRINTF("NFA has %zu report squash masks\n", squash.size());
tableRow_t *mask = (tableRow_t *)((char *)limex + squashOffset);
tableRow_t *mask = (tableRow_t *)(limex_base + squashOffset);
assert(ISALIGNED(mask));
for (size_t i = 0, end = squash.size(); i < end; i++) {
maskSetBits(mask[i], squash[i]);
@ -1854,15 +2097,12 @@ struct Factory {
}
static
void writeExceptionReports(const vector<ReportID> &reports,
implNFA_t *limex,
const u32 exceptionReportsOffset) {
DEBUG_PRINTF("exceptionReportsOffset=%u\n", exceptionReportsOffset);
limex->exReportOffset = exceptionReportsOffset;
assert(ISALIGNED_N((char *)limex + exceptionReportsOffset,
void writeReportList(const vector<ReportID> &reports, implNFA_t *limex,
const u32 reportListOffset) {
DEBUG_PRINTF("reportListOffset=%u\n", reportListOffset);
assert(ISALIGNED_N((char *)limex + reportListOffset,
alignof(ReportID)));
copy_bytes((char *)limex + exceptionReportsOffset, reports);
copy_bytes((char *)limex + reportListOffset, reports);
}
static
@ -1881,16 +2121,21 @@ struct Factory {
repeatSize += repeats[i].second;
}
// We track report lists that have already been written into the global
// list in case we can reuse them.
unordered_map<vector<ReportID>, u32> reports_cache;
ue2::unordered_set<NFAEdge> exceptional;
u32 shiftCount = findBestNumOfVarShifts(args);
assert(shiftCount);
u32 maxShift = findMaxVarShift(args, shiftCount);
findExceptionalTransitions(args, exceptional, maxShift);
map<ExceptionProto, vector<u32> > exceptionMap;
vector<ReportID> exceptionReports;
u32 exceptionCount = buildExceptionMap(args, exceptional, exceptionMap,
exceptionReports);
map<ExceptionProto, vector<u32>> exceptionMap;
vector<ReportID> reportList;
u32 exceptionCount = buildExceptionMap(args, reports_cache, exceptional,
exceptionMap, reportList);
assert(exceptionCount <= args.num_states);
@ -1907,8 +2152,8 @@ struct Factory {
NFAStateSet acceptMask, acceptEodMask;
vector<NFAAccept> accepts, acceptsEod;
vector<NFAStateSet> squash;
buildAccepts(args, acceptMask, acceptEodMask, accepts, acceptsEod,
squash);
buildAccepts(args, reports_cache, acceptMask, acceptEodMask, accepts,
acceptsEod, reportList, squash);
// Build all our accel info.
NFAStateSet accelMask, accelFriendsMask;
@ -1949,8 +2194,8 @@ struct Factory {
const u32 exceptionsOffset = offset;
offset += sizeof(exception_t) * exceptionCount;
const u32 exceptionReportsOffset = offset;
offset += sizeof(ReportID) * exceptionReports.size();
const u32 reportListOffset = offset;
offset += sizeof(ReportID) * reportList.size();
const u32 repeatOffsetsOffset = offset;
offset += sizeof(u32) * args.repeats.size();
@ -1977,7 +2222,8 @@ struct Factory {
limex, accelTableOffset, accelAuxOffset);
writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash,
limex, acceptsOffset, acceptsEodOffset, squashOffset);
limex, acceptsOffset, acceptsEodOffset, squashOffset,
reportListOffset);
limex->shiftCount = shiftCount;
writeShiftMasks(args, limex);
@ -1985,14 +2231,15 @@ struct Factory {
// Determine the state required for our state vector.
findStateSize(args, limex);
writeExceptionReports(exceptionReports, limex, exceptionReportsOffset);
writeReportList(reportList, limex, reportListOffset);
// Repeat structures and offset table.
vector<u32> repeatOffsets;
writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset,
repeatsOffset);
writeExceptions(exceptionMap, repeatOffsets, limex, exceptionsOffset);
writeExceptions(exceptionMap, repeatOffsets, limex, exceptionsOffset,
reportListOffset);
writeLimexMasks(args, limex);
@ -2021,13 +2268,6 @@ struct Factory {
sz = 32;
}
// Special case: with SIMD available, we definitely prefer using
// 128-bit NFAs over 64-bit ones given the paucity of registers
// available.
if (sz == 64) {
sz = 128;
}
if (args.cc.grey.nfaForceSize) {
sz = args.cc.grey.nfaForceSize;
}
@ -2067,9 +2307,12 @@ struct scoreNfa {
typedef u_##mlt_size tableRow_t; \
typedef NFAException##mlt_size exception_t; \
static const size_t maxStates = mlt_size; \
static const size_t scratch_state_size = mlt_size == 64 ? sizeof(m128) \
: sizeof(tableRow_t); \
};
MAKE_LIMEX_TRAITS(32)
MAKE_LIMEX_TRAITS(64)
MAKE_LIMEX_TRAITS(128)
MAKE_LIMEX_TRAITS(256)
MAKE_LIMEX_TRAITS(384)
@ -2080,19 +2323,18 @@ MAKE_LIMEX_TRAITS(512)
#ifndef NDEBUG
// Some sanity tests, called by an assertion in generate().
static UNUSED
bool isSane(const NGHolder &h, const map<u32, NFAVertex> &tops,
bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
u32 num_states) {
ue2::unordered_set<u32> seen;
ue2::unordered_set<NFAVertex> top_starts;
for (const auto &m : tops) {
top_starts.insert(m.second);
for (const auto &vv : tops | map_values) {
insert(&top_starts, vv);
}
for (auto v : vertices_range(h)) {
if (!contains(state_ids, v)) {
DEBUG_PRINTF("no entry for vertex %u in state map\n",
h[v].index);
DEBUG_PRINTF("no entry for vertex %zu in state map\n", h[v].index);
return false;
}
const u32 i = state_ids.at(v);
@ -2100,8 +2342,7 @@ bool isSane(const NGHolder &h, const map<u32, NFAVertex> &tops,
continue;
}
DEBUG_PRINTF("checking vertex %u (state %u)\n", h[v].index,
i);
DEBUG_PRINTF("checking vertex %zu (state %u)\n", h[v].index, i);
if (i >= num_states || contains(seen, i)) {
DEBUG_PRINTF("vertex %u/%u has invalid state\n", i, num_states);
@ -2111,7 +2352,7 @@ bool isSane(const NGHolder &h, const map<u32, NFAVertex> &tops,
// All our states should be reachable and have a state assigned.
if (h[v].char_reach.none()) {
DEBUG_PRINTF("vertex %u has empty reachability\n", h[v].index);
DEBUG_PRINTF("vertex %zu has empty reachability\n", h[v].index);
return false;
}
@ -2119,7 +2360,7 @@ bool isSane(const NGHolder &h, const map<u32, NFAVertex> &tops,
// must have at least one predecessor that is not itself.
if (v != h.start && v != h.startDs && !contains(top_starts, v)
&& !proper_in_degree(v, h)) {
DEBUG_PRINTF("vertex %u has no pred\n", h[v].index);
DEBUG_PRINTF("vertex %zu has no pred\n", h[v].index);
return false;
}
}
@ -2150,7 +2391,7 @@ aligned_unique_ptr<NFA> generate(NGHolder &h,
const vector<BoundedRepeatData> &repeats,
const map<NFAVertex, NFAStateSet> &reportSquashMap,
const map<NFAVertex, NFAStateSet> &squashMap,
const map<u32, NFAVertex> &tops,
const map<u32, set<NFAVertex>> &tops,
const set<NFAVertex> &zombies,
bool do_accel,
bool stateCompression,
@ -2222,7 +2463,7 @@ u32 countAccelStates(NGHolder &h,
const vector<BoundedRepeatData> &repeats,
const map<NFAVertex, NFAStateSet> &reportSquashMap,
const map<NFAVertex, NFAStateSet> &squashMap,
const map<u32, NFAVertex> &tops,
const map<u32, set<NFAVertex>> &tops,
const set<NFAVertex> &zombies,
const CompileContext &cc) {
const u32 num_states = max_state(states) + 1;

View File

@ -71,7 +71,7 @@ aligned_unique_ptr<NFA> generate(NGHolder &g,
const std::vector<BoundedRepeatData> &repeats,
const std::map<NFAVertex, NFAStateSet> &reportSquashMap,
const std::map<NFAVertex, NFAStateSet> &squashMap,
const std::map<u32, NFAVertex> &tops,
const std::map<u32, std::set<NFAVertex>> &tops,
const std::set<NFAVertex> &zombies,
bool do_accel,
bool stateCompression,
@ -89,7 +89,7 @@ u32 countAccelStates(NGHolder &h,
const std::vector<BoundedRepeatData> &repeats,
const std::map<NFAVertex, NFAStateSet> &reportSquashMap,
const std::map<NFAVertex, NFAStateSet> &squashMap,
const std::map<u32, NFAVertex> &tops,
const std::map<u32, std::set<NFAVertex>> &tops,
const std::set<NFAVertex> &zombies,
const CompileContext &cc);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -39,6 +39,16 @@
// Runtime context structures.
/* Note: The size of the context structures may vary from platform to platform
* (notably, for the Limex64 structure). As a result, information based on the
* size and other detail of these structures should not be written into the
* bytecode -- really, the details of the structure should not be accessed by
* the ue2 compile side at all.
*/
#ifdef __cplusplus
#error ue2 runtime only file
#endif
/* cached_estate/esucc etc...
*
* If the exception state matches the cached_estate we will apply
@ -66,6 +76,11 @@ struct ALIGN_CL_DIRECTIVE NFAContext##nsize { \
};
GEN_CONTEXT_STRUCT(32, u32)
#ifdef ARCH_64_BIT
GEN_CONTEXT_STRUCT(64, u64a)
#else
GEN_CONTEXT_STRUCT(64, m128)
#endif
GEN_CONTEXT_STRUCT(128, m128)
GEN_CONTEXT_STRUCT(256, m256)
GEN_CONTEXT_STRUCT(384, m384)

View File

@ -35,9 +35,10 @@
#include "limex_internal.h"
#include "nfa_dump_internal.h"
#include "ue2common.h"
#include "util/charreach.h"
#include "util/dump_charclass.h"
#include "util/dump_mask.h"
#include "util/charreach.h"
#include "util/dump_util.h"
#include <algorithm>
#include <cstdio>
@ -70,6 +71,10 @@ template<> struct limex_traits<LimExNFA128> {
static const u32 size = 128;
typedef NFAException128 exception_type;
};
template<> struct limex_traits<LimExNFA64> {
static const u32 size = 64;
typedef NFAException64 exception_type;
};
template<> struct limex_traits<LimExNFA32> {
static const u32 size = 32;
typedef NFAException32 exception_type;
@ -82,7 +87,7 @@ void dumpMask(FILE *f, const char *name, const u8 *mask, u32 mask_bits) {
template<typename mask_t>
static
u32 rank_in_mask(mask_t mask, u32 bit) {
u32 rank_in_mask(const mask_t &mask, u32 bit) {
assert(bit < 8 * sizeof(mask));
u32 chunks[sizeof(mask)/sizeof(u32)];
@ -176,26 +181,40 @@ void dumpAccel(const limex_type *limex, FILE *f) {
}
}
static
void dumpAcceptList(const char *limex_base, const struct NFAAccept *accepts,
u32 acceptCount, FILE *f) {
for (u32 i = 0; i < acceptCount; i++) {
const NFAAccept &a = accepts[i];
if (a.single_report) {
fprintf(f, " idx %u fires single report %u\n", i, a.reports);
continue;
}
fprintf(f, " idx %u fires report list %u:", i, a.reports);
const ReportID *report = (const ReportID *)(limex_base + a.reports);
for (; *report != MO_INVALID_IDX; report++) {
fprintf(f, " %u", *report);
}
fprintf(f, "\n");
}
}
template<typename limex_type>
static
void dumpAccepts(const limex_type *limex, FILE *f) {
u32 acceptCount = limex->acceptCount;
u32 acceptEodCount = limex->acceptEodCount;
const char *limex_base = (const char *)limex;
const u32 acceptCount = limex->acceptCount;
const u32 acceptEodCount = limex->acceptEodCount;
fprintf(f, "\n%u accepts.\n", acceptCount);
const struct NFAAccept *accepts
= (const struct NFAAccept *)((const char *)limex + limex->acceptOffset);
for (u32 i = 0; i < acceptCount; i++) {
fprintf(f, " state %u fires report %u\n", accepts[i].state,
accepts[i].externalId);
}
const auto *accepts =
(const struct NFAAccept *)(limex_base + limex->acceptOffset);
dumpAcceptList(limex_base, accepts, acceptCount, f);
fprintf(f, "\n%u accepts at EOD.\n", acceptEodCount);
accepts = (const struct NFAAccept *)((const char *)limex
+ limex->acceptEodOffset);
for (u32 i = 0; i < acceptEodCount; i++) {
fprintf(f, " state %u fires report %u\n", accepts[i].state,
accepts[i].externalId);
}
const auto *accepts_eod =
(const struct NFAAccept *)(limex_base + limex->acceptEodOffset);
dumpAcceptList(limex_base, accepts_eod, acceptEodCount, f);
fprintf(f, "\n");
}
@ -222,20 +241,15 @@ getExceptionTable(const limex_type *limex) {
((const char *)limex + limex->exceptionOffset);
}
template<typename limex_type>
static
const ReportID *getReportList(const limex_type *limex) {
return (const ReportID *)((const char *)limex + limex->exReportOffset);
}
template<typename limex_type>
static
void dumpLimexExceptions(const limex_type *limex, FILE *f) {
const typename limex_traits<limex_type>::exception_type *e =
getExceptionTable(limex);
const ReportID *reports = getReportList(limex);
const u32 size = limex_traits<limex_type>::size;
const char *limex_base = (const char *)limex;
fprintf(f, "\n");
for (u32 i = 0; i < limex->exceptionCount; i++) {
fprintf(f, "exception %u: hasSquash=%u, reports offset=%u\n",
@ -251,7 +265,7 @@ void dumpLimexExceptions(const limex_type *limex, FILE *f) {
if (e[i].reports == MO_INVALID_IDX) {
fprintf(f, " <none>\n");
} else {
const ReportID *r = reports + e[i].reports;
const ReportID *r = (const ReportID *)(limex_base + e[i].reports);
while (*r != MO_INVALID_IDX) {
fprintf(f, " %u", *r++);
}
@ -459,36 +473,32 @@ void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
}
}
#define DUMP_TEXT_FN(ddf_n) \
void nfaExecLimEx##ddf_n##_dumpText(const NFA *nfa, FILE *f) { \
dumpLimexText((const LimExNFA##ddf_n *)getImplNfa(nfa), f); \
}
#define DUMP_DOT_FN(ddf_n) \
void nfaExecLimEx##ddf_n##_dumpDot(const NFA *nfa, FILE *f, \
UNUSED const string &base) { \
const LimExNFA##ddf_n *limex = \
(const LimExNFA##ddf_n *)getImplNfa(nfa); \
#define LIMEX_DUMP_FN(size) \
void nfaExecLimEx##size##_dump(const NFA *nfa, const string &base) { \
auto limex = (const LimExNFA##size *)getImplNfa(nfa); \
\
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); \
dumpLimexText(limex, f); \
fclose(f); \
\
f = fopen_or_throw((base + ".dot").c_str(), "w"); \
dumpDotPreamble(f); \
u32 state_count = nfa->nPositions; \
dumpVertexDotInfo(limex, state_count, f, \
limex_labeller<LimExNFA##ddf_n>(limex)); \
limex_labeller<LimExNFA##size>(limex)); \
for (u32 i = 0; i < state_count; i++) { \
dumpLimDotInfo(limex, i, f); \
dumpExDotInfo(limex, i, f); \
} \
dumpDotTrailer(f); \
fclose(f); \
}
#define LIMEX_DUMP_FNS(size) \
DUMP_TEXT_FN(size) \
DUMP_DOT_FN(size)
LIMEX_DUMP_FNS(32)
LIMEX_DUMP_FNS(128)
LIMEX_DUMP_FNS(256)
LIMEX_DUMP_FNS(384)
LIMEX_DUMP_FNS(512)
LIMEX_DUMP_FN(32)
LIMEX_DUMP_FN(64)
LIMEX_DUMP_FN(128)
LIMEX_DUMP_FN(256)
LIMEX_DUMP_FN(384)
LIMEX_DUMP_FN(512)
} // namespace ue2

View File

@ -32,8 +32,8 @@
* X-macro generic impl, included into the various LimEx model implementations.
*/
#if !defined(SIZE) || !defined(STATE_T)
# error Must define SIZE and STATE_T in includer.
#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG)
# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer.
#endif
#include "config.h"
@ -44,8 +44,6 @@
#define PE_FN JOIN(processExceptional, SIZE)
#define RUN_EXCEPTION_FN JOIN(runException, SIZE)
#define ZERO_STATE JOIN(zero_, STATE_T)
#define LOAD_STATE JOIN(load_, STATE_T)
#define STORE_STATE JOIN(store_, STATE_T)
#define AND_STATE JOIN(and_, STATE_T)
#define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b)))
#define OR_STATE JOIN(or_, STATE_T)
@ -59,7 +57,7 @@
#define ESTATE_ARG STATE_T estate
#else
#define ESTATE_ARG const STATE_T *estatep
#define estate LOAD_STATE(estatep)
#define estate (*estatep)
#endif
#ifdef STATE_ON_STACK
@ -97,7 +95,6 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
STATE_T *local_succ,
#endif
const struct IMPL_NFA_T *limex,
const ReportID *exReports,
u64a offset,
struct CONTEXT_T *ctx,
struct proto_cache *new_cache,
@ -133,7 +130,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
char *repeat_state = ctx->repeat_state + info->stateOffset;
if (e->trigger == LIMEX_TRIGGER_POS) {
char cyclic_on = TESTBIT_STATE(STATE_ARG_P, info->cyclicState);
char cyclic_on = TESTBIT_STATE(*STATE_ARG_P, info->cyclicState);
processPosTrigger(repeat, repeat_ctrl, repeat_state, offset,
cyclic_on);
*cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES;
@ -149,8 +146,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
*cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES;
DEBUG_PRINTF("stale history, squashing cyclic state\n");
assert(e->hasSquash == LIMEX_SQUASH_TUG);
STORE_STATE(succ, AND_STATE(LOAD_STATE(succ),
LOAD_STATE(&e->squash)));
*succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash));
return 1; // continue
} else if (rv == TRIGGER_SUCCESS_CACHE) {
new_cache->br = 1;
@ -164,7 +160,8 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
// Some exceptions fire accepts.
if (e->reports != MO_INVALID_IDX) {
if (flags & CALLBACK_OUTPUT) {
const ReportID *reports = exReports + e->reports;
const ReportID *reports =
(const ReportID *)((const char *)limex + e->reports);
if (unlikely(limexRunReports(reports, ctx->callback,
ctx->context, offset)
== MO_HALT_MATCHING)) {
@ -188,18 +185,16 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
// Most exceptions have a set of successors to switch on. `local_succ' is
// ORed into `succ' at the end of the caller's loop.
#ifndef BIG_MODEL
*local_succ = OR_STATE(*local_succ, LOAD_STATE(&e->successors));
*local_succ = OR_STATE(*local_succ, LOAD_FROM_ENG(&e->successors));
#else
STORE_STATE(&ctx->local_succ, OR_STATE(LOAD_STATE(&ctx->local_succ),
LOAD_STATE(&e->successors)));
ctx->local_succ = OR_STATE(ctx->local_succ, LOAD_FROM_ENG(&e->successors));
#endif
// Some exceptions squash states behind them. Note that we squash states in
// 'succ', not local_succ.
if (e->hasSquash == LIMEX_SQUASH_CYCLIC ||
e->hasSquash == LIMEX_SQUASH_REPORT) {
STORE_STATE(succ, AND_STATE(LOAD_STATE(succ),
LOAD_STATE(&e->squash)));
if (e->hasSquash == LIMEX_SQUASH_CYCLIC
|| e->hasSquash == LIMEX_SQUASH_REPORT) {
*succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash));
if (*cacheable == CACHE_RESULT) {
*cacheable = DO_NOT_CACHE_RESULT;
}
@ -215,13 +210,12 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
static really_inline
int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
const ReportID *exReports, u64a offset, struct CONTEXT_T *ctx,
char in_rev, char flags) {
u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) {
assert(diffmask > 0); // guaranteed by caller macro
if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) {
if (EQ_STATE(estate, ctx->cached_estate)) {
DEBUG_PRINTF("using cached succ from previous state\n");
STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), LOAD_STATE(&ctx->cached_esucc)));
*succ = OR_STATE(*succ, ctx->cached_esucc);
if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) {
DEBUG_PRINTF("firing cached reports from previous state\n");
if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback,
@ -236,7 +230,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
#ifndef BIG_MODEL
STATE_T local_succ = ZERO_STATE;
#else
STORE_STATE(&ctx->local_succ, ZERO_STATE);
ctx->local_succ = ZERO_STATE;
#endif
// A copy of the estate as an array of GPR-sized chunks.
@ -254,7 +248,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
base_index[0] = 0;
for (u32 i = 0; i < ARRAY_LENGTH(base_index) - 1; i++) {
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
}
@ -276,31 +270,31 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
#ifndef BIG_MODEL
&local_succ,
#endif
limex, exReports, offset, ctx, &new_cache,
&cacheable, in_rev, flags)) {
limex, offset, ctx, &new_cache, &cacheable,
in_rev, flags)) {
return PE_RV_HALT;
}
} while (word);
} while (diffmask);
#ifndef BIG_MODEL
STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), local_succ));
*succ = OR_STATE(*succ, local_succ);
#else
STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), ctx->local_succ));
*succ = OR_STATE(*succ, ctx->local_succ);
#endif
if (cacheable == CACHE_RESULT) {
STORE_STATE(&ctx->cached_estate, estate);
ctx->cached_estate = estate;
#ifndef BIG_MODEL
ctx->cached_esucc = local_succ;
#else
STORE_STATE(&ctx->cached_esucc, LOAD_STATE(&ctx->local_succ));
ctx->cached_esucc = ctx->local_succ;
#endif
ctx->cached_reports = new_cache.reports;
ctx->cached_br = new_cache.br;
} else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) {
if (ctx->cached_br) {
STORE_STATE(&ctx->cached_estate, ZERO_STATE);
ctx->cached_estate = ZERO_STATE;
}
}
@ -314,8 +308,6 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
#undef EQ_STATE
#undef OR_STATE
#undef TESTBIT_STATE
#undef LOAD_STATE
#undef STORE_STATE
#undef PE_FN
#undef RUN_EXCEPTION_FN
#undef CONTEXT_T
@ -333,11 +325,9 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
#undef STATE_ARG_NAME
#undef STATE_ARG_P
#undef IMPL_NFA_T
#undef CHUNK_T
#undef FIND_AND_CLEAR_FN
#undef IMPL_NFA_T
#undef GET_NFA_REPEAT_INFO_FN
// Parameters.
#undef SIZE
#undef STATE_T
#undef POPCOUNT_FN
#undef RANK_IN_MASK_FN

View File

@ -132,7 +132,6 @@ struct LimExNFA##size { \
u32 acceptEodOffset; /* rel. to start of LimExNFA */ \
u32 exceptionCount; \
u32 exceptionOffset; /* rel. to start of LimExNFA */ \
u32 exReportOffset; /* rel. to start of LimExNFA */ \
u32 repeatCount; \
u32 repeatOffset; \
u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \
@ -160,6 +159,7 @@ struct LimExNFA##size { \
};
CREATE_NFA_LIMEX(32)
CREATE_NFA_LIMEX(64)
CREATE_NFA_LIMEX(128)
CREATE_NFA_LIMEX(256)
CREATE_NFA_LIMEX(384)
@ -183,9 +183,16 @@ struct NFARepeatInfo {
};
struct NFAAccept {
u32 state; //!< state ID of triggering state
ReportID externalId; //!< report ID to raise
u32 squash; //!< offset into masks, or MO_INVALID_IDX
u8 single_report; //!< If true, 'reports' is report id.
/**
* \brief If single report is true, this is the report id to fire.
* Otherwise, it is the offset (relative to the start of the LimExNFA
* structure) of a list of reports, terminated with MO_INVALID_IDX.
*/
u32 reports;
u32 squash; //!< Offset (from LimEx) into squash masks, or MO_INVALID_IDX.
};
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -31,6 +31,5 @@
#define NFA_MAX_STATES 512 /**< max states in an NFA */
#define NFA_MAX_ACCEL_STATES 8 /**< max accel states in a NFA */
#define NFA_MAX_TOP_MASKS 32 /**< max number of MQE_TOP_N event types */
#endif

View File

@ -49,12 +49,13 @@
#include "limex_runtime.h"
// Other implementation code from X-Macro impl.
#define SIZE 32
#define STATE_T u32
#define SIZE 32
#define STATE_T u32
#define ENG_STATE_T u32
#define LOAD_FROM_ENG load_u32
#include "limex_state_impl.h"
#define SIZE 32
#define STATE_T u32
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
@ -64,8 +65,6 @@
// Process exceptional states
#define SIZE 32
#define STATE_T u32
#define STATE_ON_STACK
#define ESTATE_ON_STACK
#define RUN_EXCEPTION_FN_ONLY
@ -74,8 +73,7 @@
static really_inline
int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
const struct LimExNFA32 *limex,
const struct NFAException32 *exceptions,
const ReportID *exReports, u64a offset,
const struct NFAException32 *exceptions, u64a offset,
struct NFAContext32 *ctx, char in_rev, char flags) {
assert(estate != 0); // guaranteed by calling macro
@ -105,8 +103,8 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
u32 bit = findAndClearLSB_32(&estate);
u32 idx = rank_in_mask32(limex->exceptionMask, bit);
const struct NFAException32 *e = &exceptions[idx];
if (!runException32(e, s, succ, &local_succ, limex, exReports, offset,
ctx, &new_cache, &cacheable, in_rev, flags)) {
if (!runException32(e, s, succ, &local_succ, limex, offset, ctx,
&new_cache, &cacheable, in_rev, flags)) {
return PE_RV_HALT;
}
} while (estate != 0);
@ -128,7 +126,4 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
}
// 32-bit models.
#define SIZE 32
#define STATE_T u32
#include "limex_runtime_impl.h"

View File

@ -30,8 +30,8 @@
\brief Limex Execution Engine Or:
How I Learned To Stop Worrying And Love The Preprocessor
This file includes utility functions which do not depend on the state size or
shift masks directly.
This file includes utility functions which do not depend on the size of the
state or shift masks directly.
*/
#ifndef LIMEX_RUNTIME_H
@ -72,41 +72,6 @@ struct proto_cache {
const ReportID *reports;
};
// Shift macros for Limited NFAs. Defined in terms of uniform ops.
// LimExNFAxxx ptr in 'limex' and the current state in 's'
#define NFA_EXEC_LIM_SHIFT(nels_type, nels_i) \
(JOIN(lshift_, nels_type)( \
JOIN(and_, nels_type)(s, \
JOIN(load_, nels_type)(&limex->shift[nels_i])), \
limex->shiftAmount[nels_i]))
// Calculate the (limited model) successors for a number of variable shifts.
// Assumes current state in 's' and successors in 'succ'.
#define NFA_EXEC_GET_LIM_SUCC(gls_type) \
do { \
succ = NFA_EXEC_LIM_SHIFT(gls_type, 0); \
switch (limex->shiftCount) { \
case 8: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 7)); \
case 7: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 6)); \
case 6: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 5)); \
case 5: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 4)); \
case 4: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 3)); \
case 3: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 2)); \
case 2: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 1)); \
case 1: \
case 0: \
; \
} \
} while (0)
#define PE_RV_HALT 1
#ifdef STATE_ON_STACK
@ -138,14 +103,42 @@ int limexRunReports(const ReportID *reports, NfaCallback callback,
return MO_CONTINUE_MATCHING; // continue
}
static really_inline
int limexRunAccept(const char *limex_base, const struct NFAAccept *accept,
NfaCallback callback, void *context, u64a offset) {
if (accept->single_report) {
const ReportID report = accept->reports;
DEBUG_PRINTF("firing single report for id %u at offset %llu\n", report,
offset);
return callback(0, offset, report, context);
}
const ReportID *reports = (const ReportID *)(limex_base + accept->reports);
return limexRunReports(reports, callback, context, offset);
}
static really_inline
int limexAcceptHasReport(const char *limex_base, const struct NFAAccept *accept,
ReportID report) {
if (accept->single_report) {
return accept->reports == report;
}
const ReportID *reports = (const ReportID *)(limex_base + accept->reports);
assert(*reports != MO_INVALID_IDX);
do {
if (*reports == report) {
return 1;
}
reports++;
} while (*reports != MO_INVALID_IDX);
return 0;
}
/** \brief Return a (correctly typed) pointer to the exception table. */
#define getExceptionTable(exc_type, lim) \
((const exc_type *)((const char *)(lim) + (lim)->exceptionOffset))
/** \brief Return a pointer to the exceptional reports list. */
#define getExReports(lim) \
((const ReportID *)((const char *)(lim) + (lim)->exReportOffset))
/** \brief Return a pointer to the ordinary accepts table. */
#define getAcceptTable(lim) \
((const struct NFAAccept *)((const char *)(lim) + (lim)->acceptOffset))
@ -170,6 +163,7 @@ int limexRunReports(const ReportID *reports, NfaCallback callback,
}
MAKE_GET_NFA_REPEAT_INFO(32)
MAKE_GET_NFA_REPEAT_INFO(64)
MAKE_GET_NFA_REPEAT_INFO(128)
MAKE_GET_NFA_REPEAT_INFO(256)
MAKE_GET_NFA_REPEAT_INFO(384)

View File

@ -29,7 +29,6 @@
#include "util/join.h"
#include <string.h>
/** \file
* \brief Limex Execution Engine Or:
* How I Learned To Stop Worrying And Love The Preprocessor
@ -37,8 +36,9 @@
* Version 2.0: now with X-Macros, so you get line numbers in your debugger.
*/
#if !defined(SIZE) || !defined(STATE_T)
# error Must define SIZE and STATE_T in includer.
#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG)
# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer.
#endif
#define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE)
@ -46,7 +46,6 @@
#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
#define TESTEOD_FN JOIN(moNfaTestEod, SIZE)
#define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE)
#define INITIAL_FN JOIN(moNfaInitial, SIZE)
#define TOP_FN JOIN(moNfaTop, SIZE)
#define TOPN_FN JOIN(moNfaTopN, SIZE)
@ -67,11 +66,10 @@
#define STREAMSILENT_FN JOIN(LIMEX_API_ROOT, _Stream_Silent)
#define CONTEXT_T JOIN(NFAContext, SIZE)
#define EXCEPTION_T JOIN(struct NFAException, SIZE)
#define LOAD_STATE JOIN(load_, STATE_T)
#define STORE_STATE JOIN(store_, STATE_T)
#define AND_STATE JOIN(and_, STATE_T)
#define ANDNOT_STATE JOIN(andnot_, STATE_T)
#define OR_STATE JOIN(or_, STATE_T)
#define LSHIFT_STATE JOIN(lshift_, STATE_T)
#define TESTBIT_STATE JOIN(testbit_, STATE_T)
#define CLEARBIT_STATE JOIN(clearbit_, STATE_T)
#define ZERO_STATE JOIN(zero_, STATE_T)
@ -96,17 +94,16 @@
#define ACCEL_AND_FRIENDS_MASK accel_and_friendsMask
#define EXCEPTION_MASK exceptionMask
#else
#define ACCEL_MASK LOAD_STATE(&limex->accel)
#define ACCEL_AND_FRIENDS_MASK LOAD_STATE(&limex->accel_and_friends)
#define EXCEPTION_MASK LOAD_STATE(&limex->exceptionMask)
#define ACCEL_MASK LOAD_FROM_ENG(&limex->accel)
#define ACCEL_AND_FRIENDS_MASK LOAD_FROM_ENG(&limex->accel_and_friends)
#define EXCEPTION_MASK LOAD_FROM_ENG(&limex->exceptionMask)
#endif
// Run exception processing, if necessary. Returns 0 if scanning should
// continue, 1 if an accept was fired and the user instructed us to halt.
static really_inline
char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
const ReportID *exReports, STATE_T s,
const STATE_T emask, size_t i, u64a offset,
STATE_T s, const STATE_T emask, size_t i, u64a offset,
STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx,
const char flags, const char in_rev,
const char first_match) {
@ -117,13 +114,13 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
}
if (first_match && i) {
STATE_T acceptMask = LOAD_STATE(&limex->accept);
STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
STATE_T foundAccepts = AND_STATE(s, acceptMask);
if (unlikely(ISNONZERO_STATE(foundAccepts))) {
DEBUG_PRINTF("first match at %zu\n", i);
DEBUG_PRINTF("for nfa %p\n", limex);
assert(final_loc);
STORE_STATE(&ctx->s, s);
ctx->s = s;
*final_loc = i;
return 1; // Halt matching.
}
@ -133,7 +130,7 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags;
int rv = JOIN(processExceptional, SIZE)(
pass_state, pass_estate, diffmask, succ, limex, exceptions, exReports,
pass_state, pass_estate, diffmask, succ, limex, exceptions,
callback_offset, ctx, in_rev, localflags);
if (rv == PE_RV_HALT) {
return 1; // Halt matching.
@ -161,22 +158,55 @@ size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask,
return j;
}
// Shift macros for Limited NFAs. Defined in terms of uniform ops.
// LimExNFAxxx ptr in 'limex' and the current state in 's'
#define NFA_EXEC_LIM_SHIFT(limex_m, curr_m, shift_idx) \
LSHIFT_STATE(AND_STATE(curr_m, LOAD_FROM_ENG(&limex_m->shift[shift_idx])), \
limex_m->shiftAmount[shift_idx])
// Calculate the (limited model) successors for a number of variable shifts.
// Assumes current state in 'curr_m' and places the successors in 'succ_m'.
#define NFA_EXEC_GET_LIM_SUCC(limex_m, curr_m, succ_m) \
do { \
succ_m = NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 0); \
switch (limex_m->shiftCount) { \
case 8: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 7)); \
case 7: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 6)); \
case 6: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 5)); \
case 5: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 4)); \
case 4: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 3)); \
case 3: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 2)); \
case 2: \
succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 1)); \
case 1: \
case 0: \
; \
} \
} while (0)
static really_inline
char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
struct CONTEXT_T *ctx, u64a offset, const char flags,
u64a *final_loc, const char first_match) {
const STATE_T *reach = (const STATE_T *)((const char *)limex + sizeof(*limex));
const ENG_STATE_T *reach = get_reach_table(limex);
#if SIZE < 256
const STATE_T accelMask = LOAD_STATE(&limex->accel);
const STATE_T accel_and_friendsMask = LOAD_STATE(&limex->accel_and_friends);
const STATE_T exceptionMask = LOAD_STATE(&limex->exceptionMask);
const STATE_T accelMask = LOAD_FROM_ENG(&limex->accel);
const STATE_T accel_and_friendsMask
= LOAD_FROM_ENG(&limex->accel_and_friends);
const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask);
#endif
const u8 *accelTable = (const u8 *)((const char *)limex + limex->accelTableOffset);
const union AccelAux *accelAux =
(const union AccelAux *)((const char *)limex + limex->accelAuxOffset);
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
const ReportID *exReports = getExReports(limex);
STATE_T s = LOAD_STATE(&ctx->s);
STATE_T s = ctx->s;
/* assert(ISALIGNED_16(exceptions)); */
/* assert(ISALIGNED_16(reach)); */
@ -195,21 +225,20 @@ without_accel:
DUMP_INPUT(i);
if (ISZERO_STATE(s)) {
DEBUG_PRINTF("no states are switched on, early exit\n");
STORE_STATE(&ctx->s, s);
ctx->s = s;
return MO_CONTINUE_MATCHING;
}
u8 c = input[i];
STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(STATE_T);
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK,
i, offset, &succ, final_loc, ctx, flags, 0,
first_match)) {
if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset,
&succ, final_loc, ctx, flags, 0, first_match)) {
return MO_HALT_MATCHING;
}
s = AND_STATE(succ, LOAD_STATE(&reach[limex->reachMap[c]]));
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
}
with_accel:
@ -252,33 +281,30 @@ with_accel:
u8 c = input[i];
STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(STATE_T);
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK,
i, offset, &succ, final_loc, ctx, flags, 0,
first_match)) {
if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset,
&succ, final_loc, ctx, flags, 0, first_match)) {
return MO_HALT_MATCHING;
}
s = AND_STATE(succ, LOAD_STATE(&reach[limex->reachMap[c]]));
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
}
STORE_STATE(&ctx->s, s);
ctx->s = s;
if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) {
STATE_T acceptMask = LOAD_STATE(&limex->accept);
STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
const struct NFAAccept *acceptTable = getAcceptTable(limex);
const u32 acceptCount = limex->acceptCount;
STATE_T foundAccepts = AND_STATE(s, acceptMask);
if (unlikely(ISNONZERO_STATE(foundAccepts))) {
if (first_match) {
STORE_STATE(&ctx->s, s);
ctx->s = s;
assert(final_loc);
*final_loc = length;
return MO_HALT_MATCHING;
} else if (PROCESS_ACCEPTS_FN(limex, &ctx->s, acceptTable,
acceptCount, offset + length,
} else if (PROCESS_ACCEPTS_FN(limex, &ctx->s, &acceptMask,
acceptTable, offset + length,
ctx->callback, ctx->context)) {
return MO_HALT_MATCHING;
}
@ -294,13 +320,12 @@ with_accel:
static never_inline
char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
struct CONTEXT_T *ctx, u64a offset) {
const STATE_T *reach = (const STATE_T *)((const char *)limex + sizeof(*limex));
const ENG_STATE_T *reach = get_reach_table(limex);
#if SIZE < 256
const STATE_T exceptionMask = LOAD_STATE(&limex->exceptionMask);
const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask);
#endif
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
const ReportID *exReports = getExReports(limex);
STATE_T s = LOAD_STATE(&ctx->s);
STATE_T s = ctx->s;
/* assert(ISALIGNED_16(exceptions)); */
/* assert(ISALIGNED_16(reach)); */
@ -311,34 +336,33 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
DUMP_INPUT(i-1);
if (ISZERO_STATE(s)) {
DEBUG_PRINTF("no states are switched on, early exit\n");
STORE_STATE(&ctx->s, s);
ctx->s = s;
return MO_CONTINUE_MATCHING;
}
u8 c = input[i-1];
STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(STATE_T);
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s,
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx,
flags, 1, 0)) {
if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset,
&succ, final_loc, ctx, flags, 1, 0)) {
return MO_HALT_MATCHING;
}
s = AND_STATE(succ, reach[limex->reachMap[c]]);
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
}
STORE_STATE(&ctx->s, s);
ctx->s = s;
STATE_T acceptMask = LOAD_STATE(&limex->accept);
STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
const struct NFAAccept *acceptTable = getAcceptTable(limex);
const u32 acceptCount = limex->acceptCount;
assert(flags & CALLBACK_OUTPUT);
if (acceptCount) {
STATE_T foundAccepts = AND_STATE(s, acceptMask);
if (unlikely(ISNONZERO_STATE(foundAccepts))) {
if (PROCESS_ACCEPTS_NOSQUASH_FN(&ctx->s, acceptTable, acceptCount,
offset, ctx->callback,
if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &ctx->s, &acceptMask,
acceptTable, offset, ctx->callback,
ctx->context)) {
return MO_HALT_MATCHING;
}
@ -354,9 +378,9 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src,
return;
}
STATE_T s = LOAD_STATE(src);
STATE_T s = *(STATE_T *)src;
if (ISZERO_STATE(AND_STATE(s, LOAD_STATE(&limex->repeatCyclicMask)))) {
if (ISZERO_STATE(AND_STATE(LOAD_FROM_ENG(&limex->repeatCyclicMask), s))) {
DEBUG_PRINTF("no cyclics are on\n");
return;
}
@ -369,7 +393,7 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src,
DEBUG_PRINTF("repeat %u\n", i);
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
if (!TESTBIT_STATE(&s, info->cyclicState)) {
if (!TESTBIT_STATE(s, info->cyclicState)) {
DEBUG_PRINTF("is dead\n");
continue;
}
@ -388,7 +412,7 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src,
offset);
}
STORE_STATE(src, s);
*(STATE_T *)src = s;
}
char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n,
@ -411,7 +435,7 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
// Note: state has already been expanded into 'dest'.
const STATE_T cyclics =
AND_STATE(LOAD_STATE(dest), LOAD_STATE(&limex->repeatCyclicMask));
AND_STATE(*(STATE_T *)dest, LOAD_FROM_ENG(&limex->repeatCyclicMask));
if (ISZERO_STATE(cyclics)) {
DEBUG_PRINTF("no cyclics are on\n");
return;
@ -425,7 +449,7 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
DEBUG_PRINTF("repeat %u\n", i);
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
if (!TESTBIT_STATE(&cyclics, info->cyclicState)) {
if (!TESTBIT_STATE(cyclics, info->cyclicState)) {
DEBUG_PRINTF("is dead\n");
continue;
}
@ -447,9 +471,8 @@ char JOIN(LIMEX_API_ROOT, _expandState)(const struct NFA *n, void *dest,
return 0;
}
char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n,
struct mq *q) {
STORE_STATE(q->state, ZERO_STATE);
char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n, struct mq *q) {
*(STATE_T *)q->state = ZERO_STATE;
// Zero every bounded repeat control block in state.
const IMPL_NFA_T *limex = getImplNfa(n);
@ -529,7 +552,7 @@ void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex,
u32 e = q->items[q->cur].type;
switch (e) {
DEFINE_CASE(MQE_TOP)
STORE_STATE(&ctx->s, TOP_FN(limex, !!sp, LOAD_STATE(&ctx->s)));
ctx->s = TOP_FN(limex, !!sp, ctx->s);
break;
DEFINE_CASE(MQE_START)
break;
@ -539,8 +562,7 @@ void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex,
assert(e >= MQE_TOP_FIRST);
assert(e < MQE_INVALID);
DEBUG_PRINTF("MQE_TOP + %d\n", ((int)e - MQE_TOP_FIRST));
STORE_STATE(&ctx->s,
TOPN_FN(limex, LOAD_STATE(&ctx->s), e - MQE_TOP_FIRST));
ctx->s = TOPN_FN(limex, ctx->s, e - MQE_TOP_FIRST);
}
#undef DEFINE_CASE
}
@ -570,12 +592,12 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
ctx.repeat_state = q->streamState + limex->stateSize;
ctx.callback = q->cb;
ctx.context = q->context;
STORE_STATE(&ctx.cached_estate, ZERO_STATE);
ctx.cached_estate = ZERO_STATE;
ctx.cached_br = 0;
assert(q->items[q->cur].location >= 0);
DEBUG_PRINTF("LOAD STATE\n");
STORE_STATE(&ctx.s, LOAD_STATE(q->state));
ctx.s = *(STATE_T *)q->state;
assert(q->items[q->cur].type == MQE_START);
u64a offset = q->offset;
@ -599,7 +621,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
assert(ep - offset <= q->length);
if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp)
== MO_HALT_MATCHING) {
STORE_STATE(q->state, ZERO_STATE);
*(STATE_T *)q->state = ZERO_STATE;
return 0;
}
@ -616,7 +638,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = sp - offset;
DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end);
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
return MO_ALIVE;
}
@ -628,7 +650,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
EXPIRE_ESTATE_FN(limex, &ctx, sp);
DEBUG_PRINTF("END\n");
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
if (q->cur != q->end) {
q->cur--;
@ -637,7 +659,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
return MO_ALIVE;
}
return ISNONZERO_STATE(LOAD_STATE(&ctx.s));
return ISNONZERO_STATE(ctx.s);
}
/* used by suffix execution in Rose */
@ -665,11 +687,11 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
ctx.repeat_state = q->streamState + limex->stateSize;
ctx.callback = q->cb;
ctx.context = q->context;
STORE_STATE(&ctx.cached_estate, ZERO_STATE);
ctx.cached_estate = ZERO_STATE;
ctx.cached_br = 0;
DEBUG_PRINTF("LOAD STATE\n");
STORE_STATE(&ctx.s, LOAD_STATE(q->state));
ctx.s = *(STATE_T *)q->state;
assert(q->items[q->cur].type == MQE_START);
u64a offset = q->offset;
@ -699,7 +721,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = sp + final_look - offset;
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
return MO_MATCHES_PENDING;
}
@ -721,7 +743,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = sp + final_look - offset;
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
return MO_MATCHES_PENDING;
}
@ -737,7 +759,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = sp - offset;
DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end);
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
return MO_ALIVE;
}
@ -749,7 +771,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
EXPIRE_ESTATE_FN(limex, &ctx, sp);
DEBUG_PRINTF("END\n");
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
if (q->cur != q->end) {
q->cur--;
@ -758,7 +780,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
return MO_ALIVE;
}
return ISNONZERO_STATE(LOAD_STATE(&ctx.s));
return ISNONZERO_STATE(ctx.s);
}
// Used for execution Rose prefix/infixes.
@ -777,11 +799,11 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
ctx.repeat_state = q->streamState + limex->stateSize;
ctx.callback = NULL;
ctx.context = NULL;
STORE_STATE(&ctx.cached_estate, ZERO_STATE);
ctx.cached_estate = ZERO_STATE;
ctx.cached_br = 0;
DEBUG_PRINTF("LOAD STATE\n");
STORE_STATE(&ctx.s, LOAD_STATE(q->state));
ctx.s = *(STATE_T *)q->state;
assert(q->items[q->cur].type == MQE_START);
u64a offset = q->offset;
@ -793,7 +815,7 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
if (n->maxWidth) {
if (ep - sp > n->maxWidth) {
sp = ep - n->maxWidth;
STORE_STATE(&ctx.s, INITIAL_FN(limex, !!sp));
ctx.s = INITIAL_FN(limex, !!sp);
}
}
assert(ep >= sp);
@ -832,14 +854,14 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
DEBUG_PRINTF("END, nfa is %s\n",
ISNONZERO_STATE(ctx.s) ? "still alive" : "dead");
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
*(STATE_T *)q->state = ctx.s;
if (JOIN(limexInAccept, SIZE)(limex, LOAD_STATE(&ctx.s), ctx.repeat_ctrl,
if (JOIN(limexInAccept, SIZE)(limex, ctx.s, ctx.repeat_ctrl,
ctx.repeat_state, sp + 1, report)) {
return MO_MATCHES_PENDING;
}
return ISNONZERO_STATE(LOAD_STATE(&ctx.s));
return ISNONZERO_STATE(ctx.s);
}
char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
@ -852,8 +874,8 @@ char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
const union RepeatControl *repeat_ctrl =
getRepeatControlBaseConst(state, sizeof(STATE_T));
const char *repeat_state = streamState + limex->stateSize;
return TESTEOD_FN(limex, sptr, repeat_ctrl, repeat_state, offset, 1,
callback, context);
return TESTEOD_FN(limex, sptr, repeat_ctrl, repeat_state, offset, callback,
context);
}
char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) {
@ -875,11 +897,11 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset,
ctx.repeat_state = NULL;
ctx.callback = cb;
ctx.context = context;
STORE_STATE(&ctx.cached_estate, ZERO_STATE);
ctx.cached_estate = ZERO_STATE;
ctx.cached_br = 0;
const IMPL_NFA_T *limex = getImplNfa(n);
STORE_STATE(&ctx.s, INITIAL_FN(limex, 0)); // always anchored
ctx.s = INITIAL_FN(limex, 0); // always anchored
// 'buf' may be null, for example when we're scanning at EOD time.
if (buflen) {
@ -896,8 +918,11 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset,
REV_STREAM_FN(limex, hbuf, hlen, &ctx, offset);
}
if (offset == 0 && ISNONZERO_STATE(LOAD_STATE(&ctx.s))) {
TESTEOD_REV_FN(limex, &ctx.s, offset, cb, context);
if (offset == 0 && limex->acceptEodCount && ISNONZERO_STATE(ctx.s)) {
const union RepeatControl *repeat_ctrl = NULL;
const char *repeat_state = NULL;
TESTEOD_FN(limex, &ctx.s, repeat_ctrl, repeat_state, offset, cb,
context);
}
// NOTE: return value is unused.
@ -913,7 +938,7 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa,
union RepeatControl *repeat_ctrl =
getRepeatControlBase(q->state, sizeof(STATE_T));
char *repeat_state = q->streamState + limex->stateSize;
STATE_T state = LOAD_STATE(q->state);
STATE_T state = *(STATE_T *)q->state;
u64a offset = q->offset + q_last_loc(q) + 1;
return JOIN(limexInAccept, SIZE)(limex, state, repeat_ctrl, repeat_state,
@ -928,7 +953,7 @@ char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
union RepeatControl *repeat_ctrl =
getRepeatControlBase(q->state, sizeof(STATE_T));
char *repeat_state = q->streamState + limex->stateSize;
STATE_T state = LOAD_STATE(q->state);
STATE_T state = *(STATE_T *)q->state;
u64a offset = q->offset + q_last_loc(q) + 1;
return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state,
@ -941,8 +966,8 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
s64a loc) {
assert(nfa->flags & NFA_ZOMBIE);
const IMPL_NFA_T *limex = getImplNfa(nfa);
STATE_T state = LOAD_STATE(q->state);
STATE_T zmask = LOAD_STATE(&limex->zombieMask);
STATE_T state = *(STATE_T *)q->state;
STATE_T zmask = LOAD_FROM_ENG(&limex->zombieMask);
if (limex->repeatCount) {
u64a offset = q->offset + loc + 1;
@ -960,7 +985,6 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
}
#undef TESTEOD_FN
#undef TESTEOD_REV_FN
#undef INITIAL_FN
#undef TOP_FN
#undef TOPN_FN
@ -981,11 +1005,10 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
#undef STREAMSILENT_FN
#undef CONTEXT_T
#undef EXCEPTION_T
#undef LOAD_STATE
#undef STORE_STATE
#undef AND_STATE
#undef ANDNOT_STATE
#undef OR_STATE
#undef LSHIFT_STATE
#undef TESTBIT_STATE
#undef CLEARBIT_STATE
#undef ZERO_STATE
@ -999,8 +1022,4 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
#undef ACCEL_MASK
#undef ACCEL_AND_FRIENDS_MASK
#undef EXCEPTION_MASK
// Parameters.
#undef SIZE
#undef STATE_T
#undef LIMEX_API_ROOT

View File

@ -41,52 +41,6 @@
#include "util/bitutils.h"
#include "util/simd_utils.h"
#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__))
#define HAVE_PEXT
#endif
static really_inline
u32 packedExtract32(u32 x, u32 mask) {
#if defined(HAVE_PEXT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u32(x, mask);
#else
u32 result = 0, num = 1;
while (mask != 0) {
u32 bit = findAndClearLSB_32(&mask);
if (x & (1U << bit)) {
assert(num != 0); // more than 32 bits!
result |= num;
}
num <<= 1;
}
return result;
#endif
}
static really_inline
u32 packedExtract64(u64a x, u64a mask) {
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u64(x, mask);
#else
u32 result = 0, num = 1;
while (mask != 0) {
u32 bit = findAndClearLSB_64(&mask);
if (x & (1ULL << bit)) {
assert(num != 0); // more than 32 bits!
result |= num;
}
num <<= 1;
}
return result;
#endif
}
#undef HAVE_PEXT
static really_inline
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
m128 shuffled = pshufb(s, permute);

View File

@ -48,19 +48,16 @@
#include "limex_runtime.h"
#define SIZE 128
#define STATE_T m128
#define SIZE 128
#define STATE_T m128
#define ENG_STATE_T m128
#define LOAD_FROM_ENG load_m128
#include "limex_exceptional.h"
#define SIZE 128
#define STATE_T m128
#include "limex_state_impl.h"
#define SIZE 128
#define STATE_T m128
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#define SIZE 128
#define STATE_T m128
#include "limex_runtime_impl.h"

View File

@ -45,19 +45,16 @@
// Common code
#include "limex_runtime.h"
#define SIZE 256
#define STATE_T m256
#define SIZE 256
#define STATE_T m256
#define ENG_STATE_T m256
#define LOAD_FROM_ENG load_m256
#include "limex_exceptional.h"
#define SIZE 256
#define STATE_T m256
#include "limex_state_impl.h"
#define SIZE 256
#define STATE_T m256
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#define SIZE 256
#define STATE_T m256
#include "limex_runtime_impl.h"

View File

@ -45,19 +45,16 @@
// Common code
#include "limex_runtime.h"
#define SIZE 384
#define STATE_T m384
#define SIZE 384
#define STATE_T m384
#define ENG_STATE_T m384
#define LOAD_FROM_ENG load_m384
#include "limex_exceptional.h"
#define SIZE 384
#define STATE_T m384
#include "limex_state_impl.h"
#define SIZE 384
#define STATE_T m384
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#define SIZE 384
#define STATE_T m384
#include "limex_runtime_impl.h"

View File

@ -45,19 +45,16 @@
// Common code
#include "limex_runtime.h"
#define SIZE 512
#define STATE_T m512
#define SIZE 512
#define STATE_T m512
#define ENG_STATE_T m512
#define LOAD_FROM_ENG load_m512
#include "limex_exceptional.h"
#define SIZE 512
#define STATE_T m512
#include "limex_state_impl.h"
#define SIZE 512
#define STATE_T m512
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#define SIZE 512
#define STATE_T m512
#include "limex_runtime_impl.h"

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -35,8 +35,8 @@
#include "util/state_compress.h"
#include <string.h>
#if !defined(SIZE) || !defined(STATE_T)
# error Must define SIZE and STATE_T in includer.
#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG)
# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer.
#endif
#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
@ -44,29 +44,33 @@
#define REACHMASK_FN JOIN(moNfaReachMask, SIZE)
#define COMPRESS_FN JOIN(moNfaCompressState, SIZE)
#define EXPAND_FN JOIN(moNfaExpandState, SIZE)
#define COMPRESSED_STORE_FN JOIN(storecompressed, SIZE)
#define COMPRESSED_LOAD_FN JOIN(loadcompressed, SIZE)
#define COMPRESSED_STORE_FN JOIN(store_compressed_, STATE_T)
#define COMPRESSED_LOAD_FN JOIN(load_compressed_, STATE_T)
#define PARTIAL_STORE_FN JOIN(partial_store_, STATE_T)
#define PARTIAL_LOAD_FN JOIN(partial_load_, STATE_T)
#define LOAD_STATE JOIN(load_, STATE_T)
#define STORE_STATE JOIN(store_, STATE_T)
#define OR_STATE JOIN(or_, STATE_T)
#define AND_STATE JOIN(and_, STATE_T)
#define ISZERO_STATE JOIN(isZero_, STATE_T)
static really_inline
const STATE_T *REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) {
const STATE_T *reach
= (const STATE_T *)((const char *)limex + sizeof(*limex));
assert(ISALIGNED_N(reach, alignof(STATE_T)));
return &reach[limex->reachMap[key]];
const ENG_STATE_T *get_reach_table(const IMPL_NFA_T *limex) {
const ENG_STATE_T *reach
= (const ENG_STATE_T *)((const char *)limex + sizeof(*limex));
assert(ISALIGNED_N(reach, alignof(ENG_STATE_T)));
return reach;
}
static really_inline
STATE_T REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) {
const ENG_STATE_T *reach = get_reach_table(limex);
return LOAD_FROM_ENG(&reach[limex->reachMap[key]]);
}
static really_inline
void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src,
u8 key) {
assert(ISALIGNED_N(src, alignof(STATE_T)));
STATE_T a_src = LOAD_STATE(src);
STATE_T a_src = *src;
DEBUG_PRINTF("compress state: %p -> %p\n", src, dest);
@ -77,31 +81,30 @@ void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src,
} else {
DEBUG_PRINTF("compress state, key=%hhx\n", key);
const STATE_T *reachmask = REACHMASK_FN(limex, key);
STATE_T reachmask = REACHMASK_FN(limex, key);
// Masked compression means that we mask off the initDs states and
// provide a shortcut for the all-zeroes case. Note that these must be
// switched on in the EXPAND call below.
if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) {
STATE_T s = AND_STATE(LOAD_STATE(&limex->compressMask), a_src);
STATE_T s = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), a_src);
if (ISZERO_STATE(s)) {
DEBUG_PRINTF("after compression mask, all states are zero\n");
memset(dest, 0, limex->stateSize);
return;
}
STATE_T mask = AND_STATE(LOAD_STATE(&limex->compressMask),
LOAD_STATE(reachmask));
STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask),
reachmask);
COMPRESSED_STORE_FN(dest, &s, &mask, limex->stateSize);
} else {
COMPRESSED_STORE_FN(dest, src, reachmask, limex->stateSize);
COMPRESSED_STORE_FN(dest, src, &reachmask, limex->stateSize);
}
}
}
static really_inline
void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src,
u8 key) {
void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, u8 key) {
assert(ISALIGNED_N(dest, alignof(STATE_T)));
DEBUG_PRINTF("expand state: %p -> %p\n", src, dest);
@ -111,16 +114,15 @@ void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src,
*dest = PARTIAL_LOAD_FN(src, limex->stateSize);
} else {
DEBUG_PRINTF("expand state, key=%hhx\n", key);
const STATE_T *reachmask = REACHMASK_FN(limex, key);
STATE_T reachmask = REACHMASK_FN(limex, key);
if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) {
STATE_T mask = AND_STATE(LOAD_STATE(&limex->compressMask),
LOAD_STATE(reachmask));
STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask),
reachmask);
COMPRESSED_LOAD_FN(dest, src, &mask, limex->stateSize);
STORE_STATE(dest, OR_STATE(LOAD_STATE(&limex->initDS),
LOAD_STATE(dest)));
*dest = OR_STATE(LOAD_FROM_ENG(&limex->initDS), *dest);
} else {
COMPRESSED_LOAD_FN(dest, src, reachmask, limex->stateSize);
COMPRESSED_LOAD_FN(dest, src, &reachmask, limex->stateSize);
}
}
}
@ -134,11 +136,6 @@ void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src,
#undef COMPRESSED_LOAD_FN
#undef PARTIAL_STORE_FN
#undef PARTIAL_LOAD_FN
#undef LOAD_STATE
#undef STORE_STATE
#undef OR_STATE
#undef AND_STATE
#undef ISZERO_STATE
#undef SIZE
#undef STATE_T

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -42,10 +42,10 @@
static really_inline
char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
u16 s, u64a loc, char eod, u16 *const cached_accept_state,
u32 *const cached_accept_id) {
DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n",
(u16)(s & STATE_MASK), loc, eod);
u32 s, u64a loc, char eod, u32 *cached_accept_state,
u32 *cached_accept_id) {
DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n",
s & STATE_MASK, loc, eod);
if (!eod && s == *cached_accept_state) {
if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
@ -89,27 +89,108 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
}
static really_inline
char mcclellanExec16_i(const struct mcclellan *m, u16 *state, const u8 *buf,
const u8 *run_mcclellan_accel(const struct mcclellan *m,
const struct mstate_aux *aux, u32 s,
const u8 **min_accel_offset,
const u8 *c, const u8 *c_end) {
DEBUG_PRINTF("skipping\n");
u32 accel_offset = aux[s].accel_offset;
assert(aux[s].accel_offset);
assert(accel_offset >= m->aux_offset);
assert(!m->sherman_offset || accel_offset < m->sherman_offset);
const union AccelAux *aaux = (const void *)((const char *)m + accel_offset);
const u8 *c2 = run_accel(aaux, c, c_end);
if (c2 < *min_accel_offset + BAD_ACCEL_DIST) {
*min_accel_offset = c2 + BIG_ACCEL_PENALTY;
} else {
*min_accel_offset = c2 + SMALL_ACCEL_PENALTY;
}
if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) {
*min_accel_offset = c_end;
}
DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n",
c2 - c, *min_accel_offset - c2, c_end - c2);
return c2;
}
static really_inline
u32 doNormal16(const struct mcclellan *m, const u8 **c_inout, const u8 *end,
u32 s, char do_accel, enum MatchMode mode) {
const u8 *c = *c_inout;
const u16 *succ_table
= (const u16 *)((const char *)m + sizeof(struct mcclellan));
assert(ISALIGNED_N(succ_table, 2));
u32 sherman_base = m->sherman_limit;
const char *sherman_base_offset
= (const char *)m - sizeof(struct NFA) + m->sherman_offset;
u32 as = m->alphaShift;
s &= STATE_MASK;
while (c < end && s) {
u8 cprime = m->remap[*c];
DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c,
ourisprint(*c) ? *c : '?', cprime, s);
if (s < sherman_base) {
DEBUG_PRINTF("doing normal\n");
assert(s < m->state_count);
s = succ_table[(s << as) + cprime];
} else {
const char *sherman_state
= findShermanState(m, sherman_base_offset, sherman_base, s);
DEBUG_PRINTF("doing sherman (%u)\n", s);
s = doSherman16(sherman_state, cprime, succ_table, as);
}
DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK);
c++;
if (do_accel && (s & ACCEL_FLAG)) {
break;
}
if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
break;
}
s &= STATE_MASK;
}
*c_inout = c;
return s;
}
static really_inline
char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf,
size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
char single, const u8 **c_final, enum MatchMode mode) {
assert(ISALIGNED_N(state, 2));
if (!len) {
if (mode == STOP_AT_MATCH) {
*c_final = buf;
}
return MO_ALIVE;
}
u16 s = *state;
const u8 *c = buf, *c_end = buf + len;
const u16 *succ_table = (const u16 *)((const char *)m
+ sizeof(struct mcclellan));
assert(ISALIGNED_N(succ_table, 2));
const u16 sherman_base = m->sherman_limit;
const char *sherman_base_offset
= (const char *)m - sizeof(struct NFA) + m->sherman_offset;
const u32 as = m->alphaShift;
u32 s = *state;
const u8 *c = buf;
const u8 *c_end = buf + len;
const struct mstate_aux *aux
= (const struct mstate_aux *)((const char *)m + m->aux_offset
- sizeof(struct NFA));
s &= STATE_MASK;
u32 cached_accept_id = 0;
u16 cached_accept_state = 0;
u32 cached_accept_state = 0;
DEBUG_PRINTF("s: %hu, len %zu\n", s, len);
DEBUG_PRINTF("s: %u, len %zu\n", s, len);
const u8 *min_accel_offset = c;
if (!m->has_accel || len < ACCEL_MIN_LEN) {
@ -120,26 +201,19 @@ char mcclellanExec16_i(const struct mcclellan *m, u16 *state, const u8 *buf,
goto with_accel;
without_accel:
while (c < min_accel_offset && s) {
u8 cprime = m->remap[*(c++)];
DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s);
if (s < sherman_base) {
DEBUG_PRINTF("doing normal\n");
assert(s < m->state_count);
s = succ_table[((u32)s << as) + cprime];
} else {
const char *sherman_state
= findShermanState(m, sherman_base_offset, sherman_base, s);
DEBUG_PRINTF("doing sherman (%hu)\n", s);
s = doSherman16(sherman_state, cprime, succ_table, as);
do {
assert(c < min_accel_offset);
if (!s) {
goto exit;
}
DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK));
s = doNormal16(m, &c, min_accel_offset, s, 0, mode);
if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
if (mode == STOP_AT_MATCH) {
*state = s & STATE_MASK;
*c_final = c - 1;
return MO_CONTINUE_MATCHING;
return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
@ -147,39 +221,51 @@ without_accel:
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
return MO_DEAD; /* termination requested */
}
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
&cached_accept_state,
&cached_accept_id) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_DEAD;
}
}
s &= STATE_MASK;
assert(c <= min_accel_offset);
} while (c < min_accel_offset);
s &= STATE_MASK;
if (c == c_end) {
goto exit;
} else {
goto with_accel;
}
with_accel:
while (c < c_end && s) {
u8 cprime = m->remap[*(c++)];
DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s);
if (s < sherman_base) {
DEBUG_PRINTF("doing normal\n");
assert(s < m->state_count);
s = succ_table[((u32)s << as) + cprime];
} else {
const char *sherman_state
= findShermanState(m, sherman_base_offset, sherman_base, s);
DEBUG_PRINTF("doing sherman (%hu)\n", s);
s = doSherman16(sherman_state, cprime, succ_table, as);
do {
assert(c < c_end);
if (!s) {
goto exit;
}
DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK));
if (s & ACCEL_FLAG) {
DEBUG_PRINTF("skipping\n");
s &= STATE_MASK;
c = run_mcclellan_accel(m, aux, s, &min_accel_offset, c, c_end);
if (c == c_end) {
goto exit;
} else {
goto without_accel;
}
}
s = doNormal16(m, &c, c_end, s, 1, mode);
if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
if (mode == STOP_AT_MATCH) {
*state = s & STATE_MASK;
*c_final = c - 1;
return MO_CONTINUE_MATCHING;
return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
@ -187,56 +273,31 @@ with_accel:
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
return MO_DEAD; /* termination requested */
}
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
&cached_accept_state,
&cached_accept_id) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_DEAD;
}
} else if (s & ACCEL_FLAG) {
DEBUG_PRINTF("skipping\n");
const struct mstate_aux *this_aux = get_aux(m, s & STATE_MASK);
u32 accel_offset = this_aux->accel_offset;
assert(accel_offset >= m->aux_offset);
assert(accel_offset < m->sherman_offset);
const union AccelAux *aaux
= (const void *)((const char *)m + accel_offset);
const u8 *c2 = run_accel(aaux, c, c_end);
if (c2 < min_accel_offset + BAD_ACCEL_DIST) {
min_accel_offset = c2 + BIG_ACCEL_PENALTY;
} else {
min_accel_offset = c2 + SMALL_ACCEL_PENALTY;
}
if (min_accel_offset >= c_end - ACCEL_MIN_LEN) {
min_accel_offset = c_end;
}
DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n",
c2 - c, min_accel_offset - c2, c_end - c2);
c = c2;
s &= STATE_MASK;
goto without_accel;
}
s &= STATE_MASK;
}
assert(c <= c_end);
} while (c < c_end);
exit:
s &= STATE_MASK;
if (mode == STOP_AT_MATCH) {
*c_final = c_end;
}
*state = s;
return MO_CONTINUE_MATCHING;
return MO_ALIVE;
}
static never_inline
char mcclellanExec16_i_cb(const struct mcclellan *m, u16 *state, const u8 *buf,
char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, const u8 *buf,
size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
char single, const u8 **final_point) {
return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
@ -244,7 +305,7 @@ char mcclellanExec16_i_cb(const struct mcclellan *m, u16 *state, const u8 *buf,
}
static never_inline
char mcclellanExec16_i_sam(const struct mcclellan *m, u16 *state, const u8 *buf,
char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, const u8 *buf,
size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
char single, const u8 **final_point) {
return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
@ -252,15 +313,15 @@ char mcclellanExec16_i_sam(const struct mcclellan *m, u16 *state, const u8 *buf,
}
static never_inline
char mcclellanExec16_i_nm(const struct mcclellan *m, u16 *state, const u8 *buf,
size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
char single, const u8 **final_point) {
char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, const u8 *buf,
size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
char single, const u8 **final_point) {
return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
final_point, NO_MATCHES);
}
static really_inline
char mcclellanExec16_i_ni(const struct mcclellan *m, u16 *state, const u8 *buf,
char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf,
size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
char single, const u8 **final_point,
enum MatchMode mode) {
@ -271,35 +332,69 @@ char mcclellanExec16_i_ni(const struct mcclellan *m, u16 *state, const u8 *buf,
return mcclellanExec16_i_sam(m, state, buf, len, offAdj, cb, ctxt,
single, final_point);
} else {
assert (mode == NO_MATCHES);
assert(mode == NO_MATCHES);
return mcclellanExec16_i_nm(m, state, buf, len, offAdj, cb, ctxt,
single, final_point);
}
}
static really_inline
char mcclellanExec8_i(const struct mcclellan *m, u8 *state, const u8 *buf,
size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
char single, const u8 **c_final, enum MatchMode mode) {
u8 s = *state;
const u8 *c = buf, *c_end = buf + len;
u32 doNormal8(const struct mcclellan *m, const u8 **c_inout, const u8 *end,
u32 s, char do_accel, enum MatchMode mode) {
const u8 *c = *c_inout;
u32 accel_limit = m->accel_limit_8;
u32 accept_limit = m->accept_limit_8;
const u32 as = m->alphaShift;
const u8 *succ_table = (const u8 *)((const char *)m
+ sizeof(struct mcclellan));
const u32 as = m->alphaShift;
const struct mstate_aux *aux;
while (c < end && s) {
u8 cprime = m->remap[*c];
DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c,
ourisprint(*c) ? *c : '?', cprime);
s = succ_table[(s << as) + cprime];
aux = (const struct mstate_aux *)((const char *)m + m->aux_offset
DEBUG_PRINTF("s: %u\n", s);
c++;
if (do_accel) {
if (s >= accel_limit) {
break;
}
} else {
if (mode != NO_MATCHES && s >= accept_limit) {
break;
}
}
}
*c_inout = c;
return s;
}
static really_inline
char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf,
size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
char single, const u8 **c_final, enum MatchMode mode) {
if (!len) {
if (mode == STOP_AT_MATCH) {
*c_final = buf;
}
return MO_ALIVE;
}
u32 s = *state;
const u8 *c = buf;
const u8 *c_end = buf + len;
const struct mstate_aux *aux
= (const struct mstate_aux *)((const char *)m + m->aux_offset
- sizeof(struct NFA));
u16 accel_limit = m->accel_limit_8;
u16 accept_limit = m->accept_limit_8;
u32 accept_limit = m->accept_limit_8;
u32 cached_accept_id = 0;
u16 cached_accept_state = 0;
u32 cached_accept_state = 0;
DEBUG_PRINTF("accel %hu, accept %hu\n", accel_limit, accept_limit);
DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit);
DEBUG_PRINTF("s: %hhu, len %zu\n", s, len);
DEBUG_PRINTF("s: %u, len %zu\n", s, len);
const u8 *min_accel_offset = c;
if (!m->has_accel || len < ACCEL_MIN_LEN) {
@ -310,124 +405,119 @@ char mcclellanExec8_i(const struct mcclellan *m, u8 *state, const u8 *buf,
goto with_accel;
without_accel:
while (c < min_accel_offset && s) {
u8 cprime = m->remap[*(c++)];
DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1),
ourisprint(*(c-1)) ? *(c-1) : '?', cprime);
s = succ_table[((u32)s << as) + cprime];
DEBUG_PRINTF("s: %hhu\n", s);
do {
assert(c < min_accel_offset);
if (!s) {
goto exit;
}
s = doNormal8(m, &c, min_accel_offset, s, 0, mode);
if (mode != NO_MATCHES && s >= accept_limit) {
if (mode == STOP_AT_MATCH) {
DEBUG_PRINTF("match - pausing\n");
*state = s;
*c_final = c - 1;
return MO_CONTINUE_MATCHING;
return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
&cached_accept_state,
&cached_accept_id)
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
assert(c <= min_accel_offset);
} while (c < min_accel_offset);
if (c == c_end) {
goto exit;
}
with_accel:
while (c < c_end && s) {
u8 cprime = m->remap[*(c++)];
DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1),
ourisprint(*(c-1)) ? *(c-1) : '?', cprime);
s = succ_table[((u32)s << as) + cprime];
DEBUG_PRINTF("s: %hhu\n", s);
do {
u32 accel_limit = m->accel_limit_8;
assert(c < c_end);
if (s >= accel_limit) { /* accept_limit >= accel_limit */
if (mode != NO_MATCHES && s >= accept_limit) {
if (mode == STOP_AT_MATCH) {
DEBUG_PRINTF("match - pausing\n");
*state = s;
*c_final = c - 1;
return MO_CONTINUE_MATCHING;
}
if (!s) {
goto exit;
}
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
&cached_accept_state,
&cached_accept_id)
== MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else if (aux[s].accel_offset) {
DEBUG_PRINTF("skipping\n");
const union AccelAux *aaux = (const void *)((const char *)m
+ aux[s].accel_offset);
const u8 *c2 = run_accel(aaux, c, c_end);
if (c2 < min_accel_offset + BAD_ACCEL_DIST) {
min_accel_offset = c2 + BIG_ACCEL_PENALTY;
} else {
min_accel_offset = c2 + SMALL_ACCEL_PENALTY;
}
if (min_accel_offset >= c_end - ACCEL_MIN_LEN) {
min_accel_offset = c_end;
}
DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n",
c2 - c, min_accel_offset - c2, c_end - c2);
c = c2;
if (s >= accel_limit && aux[s].accel_offset) {
c = run_mcclellan_accel(m, aux, s, &min_accel_offset, c, c_end);
if (c == c_end) {
goto exit;
} else {
goto without_accel;
}
}
}
s = doNormal8(m, &c, c_end, s, 1, mode);
if (mode != NO_MATCHES && s >= accept_limit) {
if (mode == STOP_AT_MATCH) {
DEBUG_PRINTF("match - pausing\n");
*state = s;
*c_final = c - 1;
return MO_MATCHES_PENDING;
}
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_DEAD;
}
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_DEAD;
}
}
assert(c <= c_end);
} while (c < c_end);
exit:
*state = s;
if (mode == STOP_AT_MATCH) {
*c_final = c_end;
}
return MO_CONTINUE_MATCHING;
return MO_ALIVE;
}
static never_inline
char mcclellanExec8_i_cb(const struct mcclellan *m, u8 *state, const u8 *buf,
char mcclellanExec8_i_cb(const struct mcclellan *m, u32 *state, const u8 *buf,
size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
char single, const u8 **final_point) {
return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
final_point, CALLBACK_OUTPUT);
final_point, CALLBACK_OUTPUT);
}
static never_inline
char mcclellanExec8_i_sam(const struct mcclellan *m, u8 *state, const u8 *buf,
char mcclellanExec8_i_sam(const struct mcclellan *m, u32 *state, const u8 *buf,
size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
char single, const u8 **final_point) {
return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
final_point, STOP_AT_MATCH);
final_point, STOP_AT_MATCH);
}
static never_inline
char mcclellanExec8_i_nm(const struct mcclellan *m, u8 *state, const u8 *buf,
char mcclellanExec8_i_nm(const struct mcclellan *m, u32 *state, const u8 *buf,
size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
char single, const u8 **final_point) {
return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
final_point, NO_MATCHES);
final_point, NO_MATCHES);
}
static really_inline
char mcclellanExec8_i_ni(const struct mcclellan *m, u8 *state, const u8 *buf,
char mcclellanExec8_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf,
size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
char single, const u8 **final_point,
enum MatchMode mode) {
@ -445,7 +535,7 @@ char mcclellanExec8_i_ni(const struct mcclellan *m, u8 *state, const u8 *buf,
}
static really_inline
char mcclellanCheckEOD(const struct NFA *nfa, u16 s, u64a offset,
char mcclellanCheckEOD(const struct NFA *nfa, u32 s, u64a offset,
NfaCallback cb, void *ctxt) {
const struct mcclellan *m = getImplNfa(nfa);
const struct mstate_aux *aux = get_aux(m, s);
@ -466,7 +556,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
s64a sp;
assert(ISALIGNED_N(q->state, 2));
u16 s = *(u16 *)q->state;
u32 s = *(u16 *)q->state;
if (q->report_current) {
assert(s);
@ -478,7 +568,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
rv = cb(0, q_cur_offset(q), m->arb_report, context);
} else {
u32 cached_accept_id = 0;
u16 cached_accept_state = 0;
u32 cached_accept_state = 0;
rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0,
&cached_accept_state, &cached_accept_id);
@ -487,7 +577,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
q->report_current = 0;
if (rv == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@ -496,12 +586,6 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
const u8 *cur_buf = sp < 0 ? hend : buffer;
char report = 1;
if (mode == CALLBACK_OUTPUT) {
/* we are starting inside the history buffer: matches are suppressed */
report = !(sp < 0);
}
assert(q->cur);
if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
DEBUG_PRINTF("this is as far as we go\n");
@ -528,19 +612,20 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
/* do main buffer region */
const u8 *final_look;
if (mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, single, &final_look,
report ? mode : NO_MATCHES)
== MO_HALT_MATCHING) {
assert(report);
char rv = mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, single,
&final_look, mode);
if (rv == MO_DEAD) {
*(u16 *)q->state = 0;
return 0;
return MO_DEAD;
}
if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) {
if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
DEBUG_PRINTF("this is as far as we go\n");
DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
assert(q->cur);
DEBUG_PRINTF("state %hu final_look %zd\n", s,
final_look - cur_buf);
assert(final_look != cur_buf + local_ep);
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = final_look - cur_buf + 1; /* due to
@ -549,6 +634,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
return MO_MATCHES_PENDING;
}
assert(rv == MO_ALIVE);
assert(q->cur);
if (mode != NO_MATCHES && q->items[q->cur].location > end) {
DEBUG_PRINTF("this is as far as we go\n");
@ -563,7 +649,6 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
if (sp == 0) {
cur_buf = buffer;
report = 1;
}
if (sp != ep) {
@ -582,7 +667,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
case MQE_END:
*(u16 *)q->state = s;
q->cur++;
return s ? MO_ALIVE : 0;
return s ? MO_ALIVE : MO_DEAD;
default:
assert(!"invalid queue event");
}
@ -591,18 +676,18 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
}
}
static really_inline really_flatten
char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset,
const u8 *buffer, size_t length,
NfaCallback cb, void *context, char single) {
static really_inline
char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context,
char single) {
assert(n->type == MCCLELLAN_NFA_16);
const struct mcclellan *m = getImplNfa(n);
u16 s = m->start_anchored;
u32 s = m->start_anchored;
if (mcclellanExec16_i(m, &s, buffer, length, offset, cb, context, single,
NULL, CALLBACK_OUTPUT)
== MO_HALT_MATCHING) {
return 0;
== MO_DEAD) {
return s ? MO_ALIVE : MO_DEAD;
}
const struct mstate_aux *aux = get_aux(m, s);
@ -611,19 +696,19 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset,
doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL);
}
return !!s;
return MO_ALIVE;
}
static really_inline
char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
const u8 *hend, NfaCallback cb, void *context,
struct mq *q, char single, s64a end,
enum MatchMode mode) {
const u8 *hend, NfaCallback cb, void *context,
struct mq *q, char single, s64a end,
enum MatchMode mode) {
assert(n->type == MCCLELLAN_NFA_8);
const struct mcclellan *m = getImplNfa(n);
s64a sp;
u8 s = *(u8 *)q->state;
u32 s = *(u8 *)q->state;
if (q->report_current) {
assert(s);
@ -635,7 +720,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
rv = cb(0, q_cur_offset(q), m->arb_report, context);
} else {
u32 cached_accept_id = 0;
u16 cached_accept_state = 0;
u32 cached_accept_state = 0;
rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0,
&cached_accept_state, &cached_accept_id);
@ -644,7 +729,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
q->report_current = 0;
if (rv == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
return MO_DEAD;
}
}
@ -653,12 +738,6 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
const u8 *cur_buf = sp < 0 ? hend : buffer;
char report = 1;
if (mode == CALLBACK_OUTPUT) {
/* we are starting inside the history buffer: matches are suppressed */
report = !(sp < 0);
}
if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
DEBUG_PRINTF("this is as far as we go\n");
q->cur--;
@ -686,17 +765,20 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
}
const u8 *final_look;
if (mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp,
cb, context, single, &final_look,
report ? mode : NO_MATCHES)
== MO_HALT_MATCHING) {
char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, single,
&final_look, mode);
if (rv == MO_HALT_MATCHING) {
*(u8 *)q->state = 0;
return 0;
return MO_DEAD;
}
if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) {
/* found a match */
DEBUG_PRINTF("found a match\n");
if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
DEBUG_PRINTF("this is as far as we go\n");
DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
assert(q->cur);
assert(final_look != cur_buf + local_ep);
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = final_look - cur_buf + 1; /* due to
@ -705,6 +787,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
return MO_MATCHES_PENDING;
}
assert(rv == MO_ALIVE);
assert(q->cur);
if (mode != NO_MATCHES && q->items[q->cur].location > end) {
DEBUG_PRINTF("this is as far as we go\n");
@ -720,7 +803,6 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
if (sp == 0) {
cur_buf = buffer;
report = 1;
}
if (sp != ep) {
@ -739,7 +821,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
case MQE_END:
*(u8 *)q->state = s;
q->cur++;
return s ? MO_ALIVE : 0;
return s ? MO_ALIVE : MO_DEAD;
default:
assert(!"invalid queue event");
}
@ -748,18 +830,18 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
}
}
static really_inline really_flatten
static really_inline
char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context,
char single) {
assert(n->type == MCCLELLAN_NFA_8);
const struct mcclellan *m = getImplNfa(n);
u8 s = (u8)m->start_anchored;
u32 s = m->start_anchored;
if (mcclellanExec8_i(m, &s, buffer, length, offset, cb, context, single,
NULL, CALLBACK_OUTPUT)
== MO_HALT_MATCHING) {
return 0;
== MO_DEAD) {
return MO_DEAD;
}
const struct mstate_aux *aux = get_aux(m, s);
@ -768,7 +850,7 @@ char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL);
}
return s;
return s ? MO_ALIVE : MO_DEAD;
}
char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer,
@ -827,7 +909,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
const struct mcclellan *m = getImplNfa(n);
NfaCallback cb = q->cb;
void *ctxt = q->context;
u8 s = *(u8 *)q->state;
u32 s = *(u8 *)q->state;
u8 single = m->flags & MCCLELLAN_FLAG_SINGLE;
u64a offset = q_cur_offset(q);
assert(q_cur_type(q) == MQE_START);
@ -839,7 +921,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
cb(0, offset, m->arb_report, ctxt);
} else {
u32 cached_accept_id = 0;
u16 cached_accept_state = 0;
u32 cached_accept_state = 0;
doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state,
&cached_accept_id);
@ -853,12 +935,12 @@ char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
const struct mcclellan *m = getImplNfa(n);
NfaCallback cb = q->cb;
void *ctxt = q->context;
u16 s = *(u16 *)q->state;
u32 s = *(u16 *)q->state;
const struct mstate_aux *aux = get_aux(m, s);
u8 single = m->flags & MCCLELLAN_FLAG_SINGLE;
u64a offset = q_cur_offset(q);
assert(q_cur_type(q) == MQE_START);
DEBUG_PRINTF("state %hu\n", s);
DEBUG_PRINTF("state %u\n", s);
assert(s);
if (aux->accept) {
@ -867,7 +949,7 @@ char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
cb(0, offset, m->arb_report, ctxt);
} else {
u32 cached_accept_id = 0;
u16 cached_accept_state = 0;
u32 cached_accept_state = 0;
doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state,
&cached_accept_id);
@ -1041,7 +1123,7 @@ void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
size_t len, NfaCallback cb, void *ctxt) {
const struct mcclellan *m = getImplNfa(nfa);
u8 s = top ? m->start_anchored : *(u8 *)state;
u32 s = top ? m->start_anchored : *(u8 *)state;
if (m->flags & MCCLELLAN_FLAG_SINGLE) {
mcclellanExec8_i(m, &s, buf + start_off, len - start_off,
@ -1059,14 +1141,14 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
size_t len, NfaCallback cb, void *ctxt) {
const struct mcclellan *m = getImplNfa(nfa);
u16 s = top ? m->start_anchored : unaligned_load_u16(state);
u32 s = top ? m->start_anchored : unaligned_load_u16(state);
if (m->flags & MCCLELLAN_FLAG_SINGLE) {
mcclellanExec16_i(m, &s, buf + start_off, len - start_off,
start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT);
start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT);
} else {
mcclellanExec16_i(m, &s, buf + start_off, len - start_off,
start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT);
start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT);
}
unaligned_store_u16(state, s);
@ -1087,13 +1169,15 @@ char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
context);
}
char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa,
struct mq *q) {
assert(nfa->scratchStateSize == 1);
*(u8 *)q->state = 0;
return 0;
}
char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa,
struct mq *q) {
assert(nfa->scratchStateSize == 2);
assert(ISALIGNED_N(q->state, 2));
*(u16 *)q->state = 0;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -26,14 +26,6 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(__INTEL_COMPILER) || defined(__clang__) || defined(_WIN32) || defined(__GNUC__) && (__GNUC__ < 4)
#define really_flatten
#else
#define really_flatten __attribute__ ((flatten))
#endif
#define CASE_MASK 0xdf
enum MatchMode {
CALLBACK_OUTPUT,
STOP_AT_MATCH,
@ -41,7 +33,7 @@ enum MatchMode {
};
static really_inline
const struct mstate_aux *get_aux(const struct mcclellan *m, u16 s) {
const struct mstate_aux *get_aux(const struct mcclellan *m, u32 s) {
const char *nfa = (const char *)m - sizeof(struct NFA);
const struct mstate_aux *aux
= s + (const struct mstate_aux *)(nfa + m->aux_offset);
@ -51,15 +43,15 @@ const struct mstate_aux *get_aux(const struct mcclellan *m, u16 s) {
}
static really_inline
u16 mcclellanEnableStarts(const struct mcclellan *m, u16 s) {
u32 mcclellanEnableStarts(const struct mcclellan *m, u32 s) {
const struct mstate_aux *aux = get_aux(m, s);
DEBUG_PRINTF("enabling starts %hu->%hu\n", s, aux->top);
DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top);
return aux->top;
}
static really_inline
u16 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table,
u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table,
u32 as) {
assert(ISALIGNED_N(sherman_state, 16));
@ -78,15 +70,15 @@ u16 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table,
if (z) {
u32 i = ctz32(z & ~0xf) - 4;
u16 s_out = unaligned_load_u16((const u8 *)sherman_state
u32 s_out = unaligned_load_u16((const u8 *)sherman_state
+ SHERMAN_STATES_OFFSET(len)
+ sizeof(u16) * i);
DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu "
"s=%hu\n", i, len, cprime, s_out);
DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i,
len, cprime, s_out);
return s_out;
}
}
u16 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET);
return succ_table[((u32)daddy << as) + cprime];
u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET);
return succ_table[(daddy << as) + cprime];
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -71,17 +71,17 @@ struct mcclellan {
u16 start_floating; /**< floating start state */
u32 aux_offset; /**< offset of the aux structures relative to the start of
* the nfa structure */
u32 sherman_offset; /**< offset of to array of sherman state offsets
* the state_info structures relative to the start of the
* nfa structure */
u32 sherman_end; /**< offset of the end of the state_info structures relative
* to the start of the nfa structure */
u32 sherman_offset; /**< offset of array of sherman state offsets the
* state_info structures relative to the start of the
* nfa structure */
u32 sherman_end; /**< offset of the end of the state_info structures
* relative to the start of the nfa structure */
u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
u16 accept_limit_8; /**< 8 bit, lowest accept state */
u16 sherman_limit; /**< lowest sherman state */
u8 alphaShift;
u8 flags;
u8 has_accel; /**< 1 iff there are any accel planes */
u8 has_accel; /**< 1 iff there are any accel plans */
u8 remap[256]; /**< remaps characters to a smaller alphabet */
ReportID arb_report; /**< one of the accepts that this dfa may raise */
u32 accel_offset; /**< offset of the accel structures from start of NFA */
@ -90,8 +90,8 @@ struct mcclellan {
static really_inline
const char *findShermanState(UNUSED const struct mcclellan *m,
const char *sherman_base_offset, u16 sherman_base,
u16 s) {
const char *sherman_base_offset, u32 sherman_base,
u32 s) {
const char *rv
= sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base);
assert(rv < (const char *)m + m->length - sizeof(struct NFA));
@ -102,7 +102,7 @@ const char *findShermanState(UNUSED const struct mcclellan *m,
static really_inline
char *findMutableShermanState(char *sherman_base_offset, u16 sherman_base,
u16 s) {
u32 s) {
return sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base);
}

View File

@ -415,9 +415,9 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
: info.raw.start_floating);
}
/* returns non-zero on error */
/* returns false on error */
static
int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
info.states[0].impl_id = 0; /* dead is always 0 */
vector<dstate_id_t> norm;
@ -426,7 +426,7 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
if (info.size() > (1 << 16)) {
DEBUG_PRINTF("too many states\n");
*sherman_base = 0;
return 1;
return false;
}
for (u32 i = 1; i < info.size(); i++) {
@ -452,7 +452,7 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) {
/* Check to see if we haven't over allocated our states */
DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman,
(dstate_id_t)(next_sherman & STATE_MASK));
return (next_sherman - 1) != ((next_sherman - 1) & STATE_MASK);
return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK);
}
static
@ -470,7 +470,7 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
assert(alphaShift <= 8);
u16 count_real_states;
if (allocateFSN16(info, &count_real_states)) {
if (!allocateFSN16(info, &count_real_states)) {
DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
info.size());
return nullptr;

View File

@ -32,9 +32,7 @@
#include "accel_dfa_build_strat.h"
#include "rdfa.h"
#include "ue2common.h"
#include "util/accel_scheme.h"
#include "util/alloc.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
#include <memory>

View File

@ -39,6 +39,7 @@
#include "ue2common.h"
#include "util/charreach.h"
#include "util/dump_charclass.h"
#include "util/dump_util.h"
#include "util/unaligned.h"
#include <cctype>
@ -267,8 +268,8 @@ void dumpDotPreambleDfa(FILE *f) {
fprintf(f, "0 [style=invis];\n");
}
void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f,
UNUSED const string &base) {
static
void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) {
assert(nfa->type == MCCLELLAN_NFA_16);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
@ -287,8 +288,8 @@ void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f,
fprintf(f, "}\n");
}
void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f,
UNUSED const string &base) {
static
void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f) {
assert(nfa->type == MCCLELLAN_NFA_8);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
@ -397,6 +398,7 @@ void dumpTransitions(FILE *f, const NFA *nfa, const mcclellan *m,
}
}
static
void nfaExecMcClellan16_dumpText(const NFA *nfa, FILE *f) {
assert(nfa->type == MCCLELLAN_NFA_16);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
@ -417,6 +419,7 @@ void nfaExecMcClellan16_dumpText(const NFA *nfa, FILE *f) {
dumpTextReverse(nfa, f);
}
static
void nfaExecMcClellan8_dumpText(const NFA *nfa, FILE *f) {
assert(nfa->type == MCCLELLAN_NFA_8);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
@ -437,4 +440,24 @@ void nfaExecMcClellan8_dumpText(const NFA *nfa, FILE *f) {
dumpTextReverse(nfa, f);
}
void nfaExecMcClellan16_dump(const NFA *nfa, const string &base) {
assert(nfa->type == MCCLELLAN_NFA_16);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
nfaExecMcClellan16_dumpText(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
nfaExecMcClellan16_dumpDot(nfa, f);
fclose(f);
}
void nfaExecMcClellan8_dump(const NFA *nfa, const string &base) {
assert(nfa->type == MCCLELLAN_NFA_8);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
nfaExecMcClellan8_dumpText(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
nfaExecMcClellan8_dumpDot(nfa, f);
fclose(f);
}
} // namespace ue2

View File

@ -43,14 +43,10 @@ union AccelAux;
namespace ue2 {
void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecMcClellan8_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecMcClellan16_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecMcClellan8_dump(const struct NFA *nfa, const std::string &base);
void nfaExecMcClellan16_dump(const struct NFA *nfa, const std::string &base);
/* These functions are shared with the Haig dump code. */
/* These functions are shared with the Gough dump code. */
const mstate_aux *getAux(const NFA *n, dstate_id_t i);
void describeEdge(FILE *f, const u16 *t, u16 i);

1406
src/nfa/mcsheng.c Normal file

File diff suppressed because it is too large Load Diff

84
src/nfa/mcsheng.h Normal file
View File

@ -0,0 +1,84 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MCSHENG_H
#define MCSHENG_H
#include "callback.h"
#include "ue2common.h"
struct mq;
struct NFA;
/* 8-bit Sheng-McClellan hybrid */
char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, void *context);
char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecMcSheng8_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMcSheng8_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
char nfaExecMcSheng8_queueCompressState(const struct NFA *nfa,
const struct mq *q, s64a loc);
char nfaExecMcSheng8_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecMcSheng8_B_Reverse NFA_API_NO_IMPL
#define nfaExecMcSheng8_zombie_status NFA_API_ZOMBIE_NO_IMPL
/* 16-bit Sheng-McClellan hybrid */
char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, void *context);
char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecMcSheng16_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMcSheng16_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
char nfaExecMcSheng16_queueCompressState(const struct NFA *nfa,
const struct mq *q, s64a loc);
char nfaExecMcSheng16_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL
#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL
#endif

1070
src/nfa/mcsheng_compile.cpp Normal file

File diff suppressed because it is too large Load Diff

55
src/nfa/mcsheng_compile.h Normal file
View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MCSHENGCOMPILE_H
#define MCSHENGCOMPILE_H
#include "accel_dfa_build_strat.h"
#include "rdfa.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/ue2_containers.h"
#include <memory>
struct NFA;
namespace ue2 {
class ReportManager;
struct CompileContext;
ue2::aligned_unique_ptr<NFA>
mcshengCompile(raw_dfa &raw, const CompileContext &cc,
const ReportManager &rm);
bool has_accel_mcsheng(const NFA *nfa);
} // namespace ue2
#endif

43
src/nfa/mcsheng_data.c Normal file
View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "mcsheng_internal.h"
/* This table is in a separate translation unit from mcsheng.c as we want to
* prevent the compiler from seeing these constants. We have the load resources
* free at runtime to load the masks with no problems. */
const u64a mcsheng_pext_mask[8] = {
0, /* dummy */
0x000000000000ff0f,
0x0000000000ff000f,
0x00000000ff00000f,
0x000000ff0000000f,
0x0000ff000000000f,
0x00ff00000000000f,
0xff0000000000000f,
};

415
src/nfa/mcsheng_dump.cpp Normal file
View File

@ -0,0 +1,415 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "mcsheng_dump.h"
#include "accel.h"
#include "accel_dump.h"
#include "nfa_dump_internal.h"
#include "nfa_internal.h"
#include "mcsheng_internal.h"
#include "rdfa.h"
#include "ue2common.h"
#include "util/charreach.h"
#include "util/dump_charclass.h"
#include "util/dump_util.h"
#include "util/unaligned.h"
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <map>
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
using namespace std;
namespace ue2 {
static
const mstate_aux *getAux(const NFA *n, dstate_id_t i) {
auto *m = (const mcsheng *)getImplNfa(n);
auto *aux_base = (const mstate_aux *)((const char *)n + m->aux_offset);
const mstate_aux *aux = aux_base + i;
assert((const char *)aux < (const char *)n + m->length);
return aux;
}
static
void next_states(const NFA *n, u16 s, u16 *t) {
const mcsheng *m = (const mcsheng *)getImplNfa(n);
const mstate_aux *aux = getAux(n, s);
const u32 as = m->alphaShift;
assert(s != DEAD_STATE);
if (s < m->sheng_end) {
for (u16 c = 0; c < N_CHARS; c++) {
u8 sheng_s = s - 1;
auto trans_for_c = (const char *)&m->sheng_masks[c];
assert(sheng_s < sizeof(m128));
u8 raw_succ = trans_for_c[sheng_s];
if (raw_succ == m->sheng_end - 1) {
t[c] = DEAD_STATE;
} else if (raw_succ < m->sheng_end) {
t[c] = raw_succ + 1;
} else {
t[c] = raw_succ;
}
}
} else if (n->type == MCSHENG_NFA_8) {
const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcsheng));
for (u16 c = 0; c < N_CHARS; c++) {
u32 normal_id = s - m->sheng_end;
t[c] = succ_table[(normal_id << as) + m->remap[c]];
}
} else {
u16 base_s = s;
const char *winfo_base = (const char *)n + m->sherman_offset;
const char *state_base
= winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit);
if (s >= m->sherman_limit) {
base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET);
assert(base_s >= m->sheng_end);
}
const u16 *succ_table = (const u16 *)((const char *)m
+ sizeof(mcsheng));
for (u16 c = 0; c < N_CHARS; c++) {
u32 normal_id = base_s - m->sheng_end;
t[c] = succ_table[(normal_id << as) + m->remap[c]];
}
if (s >= m->sherman_limit) {
UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET);
assert(type == SHERMAN_STATE);
u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base);
const char *chars = state_base + SHERMAN_CHARS_OFFSET;
const u16 *states = (const u16 *)(state_base
+ SHERMAN_STATES_OFFSET(len));
for (u8 i = 0; i < len; i++) {
for (u16 c = 0; c < N_CHARS; c++) {
if (m->remap[c] == chars[i]) {
t[c] = unaligned_load_u16((const u8*)&states[i]);
}
}
}
}
for (u16 c = 0; c < N_CHARS; c++) {
t[c] &= STATE_MASK;
}
}
t[TOP] = aux->top & STATE_MASK;
}
static
void describeEdge(FILE *f, const mcsheng *m, const u16 *t, u16 i) {
for (u16 s = 0; s < N_CHARS; s++) {
if (!t[s]) {
continue;
}
u16 ss;
for (ss = 0; ss < s; ss++) {
if (t[s] == t[ss]) {
break;
}
}
if (ss != s) {
continue;
}
CharReach reach;
for (ss = s; ss < 256; ss++) {
if (t[s] == t[ss]) {
reach.set(ss);
}
}
fprintf(f, "%u -> %u [ ", i, t[s]);
if (i < m->sheng_end && t[s] < m->sheng_end) {
fprintf(f, "color = red, fontcolor = red ");
}
fprintf(f, "label = \"");
describeClass(f, reach, 5, CC_OUT_DOT);
fprintf(f, "\" ];\n");
}
}
static
void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel) {
switch(accel->accel_type) {
case ACCEL_NONE:
break;
case ACCEL_VERM:
case ACCEL_VERM_NOCASE:
case ACCEL_DVERM:
case ACCEL_DVERM_NOCASE:
fprintf(f, "%u [ color = forestgreen style=diagonals];\n", i);
break;
case ACCEL_SHUFTI:
case ACCEL_DSHUFTI:
case ACCEL_TRUFFLE:
fprintf(f, "%u [ color = darkgreen style=diagonals ];\n", i);
break;
default:
fprintf(f, "%u [ color = yellow style=diagonals ];\n", i);
break;
}
}
static
void describeNode(const NFA *n, const mcsheng *m, u16 i, FILE *f) {
const mstate_aux *aux = getAux(n, i);
bool isSherman = m->sherman_limit && i >= m->sherman_limit;
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
"label = \"%u%s\" ]; \n", i, i, isSherman ? "w":"");
if (aux->accel_offset) {
dumpAccelDot(f, i, (const union AccelAux *)
((const char *)m + aux->accel_offset));
}
if (i && i < m->sheng_end) {
fprintf(f, "%u [color = red, fontcolor = red]; \n", i);
}
if (aux->accept_eod) {
fprintf(f, "%u [ color = darkorchid ];\n", i);
}
if (aux->accept) {
fprintf(f, "%u [ shape = doublecircle ];\n", i);
}
if (aux->top && aux->top != i) {
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
aux->top);
}
if (i == m->start_anchored) {
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
}
if (i == m->start_floating) {
fprintf(f, "STARTF -> %u [color = red ]\n", i);
}
if (isSherman) {
const char *winfo_base = (const char *)n + m->sherman_offset;
const char *state_base
= winfo_base + SHERMAN_FIXED_SIZE * (i - m->sherman_limit);
assert(state_base < (const char *)m + m->length - sizeof(NFA));
UNUSED u8 type = *(const u8 *)(state_base + SHERMAN_TYPE_OFFSET);
assert(type == SHERMAN_STATE);
fprintf(f, "%u [ fillcolor = lightblue style=filled ];\n", i);
u16 daddy = *(const u16 *)(state_base + SHERMAN_DADDY_OFFSET);
if (daddy) {
fprintf(f, "%u -> %u [ color=royalblue style=dashed weight=0.1]\n",
i, daddy);
}
}
if (i && i < m->sheng_end) {
fprintf(f, "subgraph cluster_sheng { %u } \n", i);
}
}
static
void dumpDotPreambleDfa(FILE *f) {
dumpDotPreamble(f);
// DFA specific additions.
fprintf(f, "STARTF [style=invis];\n");
fprintf(f, "STARTA [style=invis];\n");
fprintf(f, "0 [style=invis];\n");
fprintf(f, "subgraph cluster_sheng { style = dashed }\n");
}
static
void dump_dot_16(const NFA *nfa, FILE *f) {
auto *m = (const mcsheng *)getImplNfa(nfa);
dumpDotPreambleDfa(f);
for (u16 i = 1; i < m->state_count; i++) {
describeNode(nfa, m, i, f);
u16 t[ALPHABET_SIZE];
next_states(nfa, i, t);
describeEdge(f, m, t, i);
}
fprintf(f, "}\n");
}
static
void dump_dot_8(const NFA *nfa, FILE *f) {
auto m = (const mcsheng *)getImplNfa(nfa);
dumpDotPreambleDfa(f);
for (u16 i = 1; i < m->state_count; i++) {
describeNode(nfa, m, i, f);
u16 t[ALPHABET_SIZE];
next_states(nfa, i, t);
describeEdge(f, m, t, i);
}
fprintf(f, "}\n");
}
static
void dumpAccelMasks(FILE *f, const mcsheng *m, const mstate_aux *aux) {
fprintf(f, "\n");
fprintf(f, "Acceleration\n");
fprintf(f, "------------\n");
for (u16 i = 0; i < m->state_count; i++) {
if (!aux[i].accel_offset) {
continue;
}
auto accel = (const AccelAux *)((const char *)m + aux[i].accel_offset);
fprintf(f, "%05hu ", i);
dumpAccelInfo(f, *accel);
}
}
static
void describeAlphabet(FILE *f, const mcsheng *m) {
map<u8, CharReach> rev;
for (u16 i = 0; i < N_CHARS; i++) {
rev[m->remap[i]].clear();
}
for (u16 i = 0; i < N_CHARS; i++) {
rev[m->remap[i]].set(i);
}
map<u8, CharReach>::const_iterator it;
fprintf(f, "\nAlphabet\n");
for (it = rev.begin(); it != rev.end(); ++it) {
fprintf(f, "%3hhu: ", it->first);
describeClass(f, it->second, 10240, CC_OUT_TEXT);
fprintf(f, "\n");
}
fprintf(f, "\n");
}
static
void dumpCommonHeader(FILE *f, const mcsheng *m) {
fprintf(f, "report: %u, states: %u, length: %u\n", m->arb_report,
m->state_count, m->length);
fprintf(f, "astart: %hu, fstart: %hu\n", m->start_anchored,
m->start_floating);
fprintf(f, "single accept: %d, has_accel: %d\n",
!!(int)m->flags & MCSHENG_FLAG_SINGLE, m->has_accel);
fprintf(f, "sheng_end: %hu\n", m->sheng_end);
fprintf(f, "sheng_accel_limit: %hu\n", m->sheng_accel_limit);
}
static
void dump_text_16(const NFA *nfa, FILE *f) {
auto *m = (const mcsheng *)getImplNfa(nfa);
auto *aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);
fprintf(f, "mcsheng 16\n");
dumpCommonHeader(f, m);
fprintf(f, "sherman_limit: %d, sherman_end: %d\n", (int)m->sherman_limit,
(int)m->sherman_end);
fprintf(f, "\n");
describeAlphabet(f, m);
dumpAccelMasks(f, m, aux);
fprintf(f, "\n");
dumpTextReverse(nfa, f);
}
static
void dump_text_8(const NFA *nfa, FILE *f) {
auto m = (const mcsheng *)getImplNfa(nfa);
auto aux = (const mstate_aux *)((const char *)nfa + m->aux_offset);
fprintf(f, "mcsheng 8\n");
dumpCommonHeader(f, m);
fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8,
m->accept_limit_8);
fprintf(f, "\n");
describeAlphabet(f, m);
dumpAccelMasks(f, m, aux);
fprintf(f, "\n");
dumpTextReverse(nfa, f);
}
void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) {
assert(nfa->type == MCSHENG_NFA_16);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
dump_text_16(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
dump_dot_16(nfa, f);
fclose(f);
}
void nfaExecMcSheng8_dump(const NFA *nfa, const string &base) {
assert(nfa->type == MCSHENG_NFA_8);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
dump_text_8(nfa, f);
fclose(f);
f = fopen_or_throw((base + ".dot").c_str(), "w");
dump_dot_8(nfa, f);
fclose(f);
}
} // namespace ue2

50
src/nfa/mcsheng_dump.h Normal file
View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MCSHENG_DUMP_H
#define MCSHENG_DUMP_H
#ifdef DUMP_SUPPORT
#include "rdfa.h"
#include <cstdio>
#include <string>
struct NFA;
namespace ue2 {
void nfaExecMcSheng8_dump(const struct NFA *nfa, const std::string &base);
void nfaExecMcSheng16_dump(const struct NFA *nfa, const std::string &base);
} // namespace ue2
#endif // DUMP_SUPPORT
#endif // MCSHENG_DUMP_H

View File

@ -0,0 +1,95 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MCSHENG_INTERNAL_H
#define MCSHENG_INTERNAL_H
#include "nfa_internal.h"
#include "ue2common.h"
#include "util/simd_types.h"
#define ACCEPT_FLAG 0x8000
#define ACCEL_FLAG 0x4000
#define STATE_MASK 0x3fff
#define SHERMAN_STATE 1
#define SHERMAN_TYPE_OFFSET 0
#define SHERMAN_FIXED_SIZE 32
#define SHERMAN_LEN_OFFSET 1
#define SHERMAN_DADDY_OFFSET 2
#define SHERMAN_CHARS_OFFSET 4
#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len))
struct report_list {
u32 count;
ReportID report[];
};
struct mstate_aux {
u32 accept;
u32 accept_eod;
u16 top;
u32 accel_offset; /* relative to start of struct mcsheng; 0 if no accel */
};
#define MCSHENG_FLAG_SINGLE 1 /**< we raise only single accept id */
struct mcsheng {
u16 state_count; /**< total number of states */
u32 length; /**< length of dfa in bytes */
u16 start_anchored; /**< anchored start state */
u16 start_floating; /**< floating start state */
u32 aux_offset; /**< offset of the aux structures relative to the start of
* the nfa structure */
u32 sherman_offset; /**< offset of array of sherman state offsets the
* state_info structures relative to the start of the
* nfa structure */
u32 sherman_end; /**< offset of the end of the state_info structures
* relative to the start of the nfa structure */
u16 sheng_end; /**< first non-sheng state */
u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of
* internal sheng ids */
u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
u16 accept_limit_8; /**< 8 bit, lowest accept state */
u16 sherman_limit; /**< lowest sherman state */
u8 alphaShift;
u8 flags;
u8 has_accel; /**< 1 iff there are any accel plans */
u8 remap[256]; /**< remaps characters to a smaller alphabet */
ReportID arb_report; /**< one of the accepts that this dfa may raise */
u32 accel_offset; /**< offset of the accel structures from start of NFA */
m128 sheng_masks[N_CHARS];
};
/* pext masks for the runtime to access appropriately copies of bytes 1..7
* representing the data from a u64a. */
extern const u64a mcsheng_pext_mask[8];
#endif

View File

@ -825,21 +825,21 @@ void mpvStoreState(const struct NFA *n, char *state,
}
}
char nfaExecMpv0_queueCompressState(const struct NFA *nfa, const struct mq *q,
UNUSED s64a loc) {
char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q,
UNUSED s64a loc) {
void *dest = q->streamState;
const void *src = q->state;
mpvStoreState(nfa, dest, src);
return 0;
}
char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src,
UNUSED u64a offset, UNUSED u8 key) {
char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src,
UNUSED u64a offset, UNUSED u8 key) {
mpvLoadState(dest, nfa, src);
return 0;
}
char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q) {
char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q) {
const struct mpv *m = getImplNfa(n);
u64a offset = q_cur_offset(q);
struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state;
@ -855,7 +855,7 @@ char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q) {
return 0;
}
char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q) {
char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q) {
struct mpv_decomp_state *out = (void *)q->state;
const struct mpv *m = getImplNfa(n);
assert(sizeof(*out) <= n->scratchStateSize);
@ -880,8 +880,8 @@ char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q) {
return 0;
}
char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset,
void *state, UNUSED u8 key) {
char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset,
void *state, UNUSED u8 key) {
const struct mpv *m = getImplNfa(n);
memset(state, 0, m->active_offset); /* active_offset marks end of comp
* counters */
@ -896,7 +896,7 @@ char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset,
}
static really_inline
char nfaExecMpv0_Q_i(const struct NFA *n, struct mq *q, s64a end) {
char nfaExecMpv_Q_i(const struct NFA *n, struct mq *q, s64a end) {
u64a offset = q->offset;
const u8 *buffer = q->buffer;
size_t length = q->length;
@ -1021,18 +1021,18 @@ char nfaExecMpv0_Q_i(const struct NFA *n, struct mq *q, s64a end) {
return alive;
}
char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end) {
char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end) {
DEBUG_PRINTF("_Q %lld\n", end);
return nfaExecMpv0_Q_i(n, q, end);
return nfaExecMpv_Q_i(n, q, end);
}
s64a nfaExecMpv0_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) {
s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) {
DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end);
#ifdef DEBUG
debugQueue(q);
#endif
assert(nfa->type == MPV_NFA_0);
assert(nfa->type == MPV_NFA);
assert(q && q->context && q->state);
assert(end >= 0);
assert(q->cur < q->end);
@ -1058,7 +1058,7 @@ s64a nfaExecMpv0_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) {
/* TODO: restore max offset stuff, if/when _interesting_ max offset stuff
* is filled in */
char rv = nfaExecMpv0_Q_i(nfa, q, end);
char rv = nfaExecMpv_Q_i(nfa, q, end);
assert(!q->report_current);
DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed);

View File

@ -34,27 +34,27 @@
struct mq;
struct NFA;
char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
char nfaExecMpv0_queueCompressState(const struct NFA *nfa, const struct mq *q,
s64a loc);
char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src,
u64a offset, u8 key);
char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q,
s64a loc);
char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src,
u64a offset, u8 key);
#define nfaExecMpv0_testEOD NFA_API_NO_IMPL
#define nfaExecMpv0_inAccept NFA_API_NO_IMPL
#define nfaExecMpv0_inAnyAccept NFA_API_NO_IMPL
#define nfaExecMpv0_QR NFA_API_NO_IMPL
#define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
#define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL
#define nfaExecMpv0_zombie_status NFA_API_ZOMBIE_NO_IMPL
#define nfaExecMpv_testEOD NFA_API_NO_IMPL
#define nfaExecMpv_inAccept NFA_API_NO_IMPL
#define nfaExecMpv_inAnyAccept NFA_API_NO_IMPL
#define nfaExecMpv_QR NFA_API_NO_IMPL
#define nfaExecMpv_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
#define nfaExecMpv_B_Reverse NFA_API_NO_IMPL
#define nfaExecMpv_zombie_status NFA_API_ZOMBIE_NO_IMPL
/**
* return 0 if the mpv dies, otherwise returns the location of the next possible
* match (given the currently known events). */
s64a nfaExecMpv0_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end);
s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end);
#endif

View File

@ -36,6 +36,7 @@
#include "ue2common.h"
#include "util/compare.h"
#include "util/dump_mask.h"
#include "util/dump_util.h"
#include <cstdlib>
#include <cstdio>
@ -46,11 +47,11 @@
#error No dump support!
#endif
namespace ue2 {
/* Note: No dot files for MPV */
void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file,
UNUSED const std::string &base) {
}
using namespace std;
namespace ue2 {
static really_inline
u32 largest_puff_repeat(const mpv *m, const mpv_kilopuff *kp) {
@ -128,9 +129,11 @@ void dumpCounter(FILE *f, const mpv_counter_info *c) {
fprintf(f, "\n");
}
void nfaExecMpv0_dumpText(const NFA *nfa, FILE *f) {
void nfaExecMpv_dump(const NFA *nfa, const string &base) {
const mpv *m = (const mpv *)getImplNfa(nfa);
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
fprintf(f, "Puff the Magic Engines\n");
fprintf(f, "\n");
fprintf(f, "%u puffettes in %u kilopuffs\n", m->puffette_count,
@ -151,6 +154,7 @@ void nfaExecMpv0_dumpText(const NFA *nfa, FILE *f) {
}
dumpTextReverse(nfa, f);
fclose(f);
}
} // namespace ue2

View File

@ -31,16 +31,13 @@
#if defined(DUMP_SUPPORT)
#include <cstdio>
#include <string>
struct NFA;
namespace ue2 {
void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecMpv0_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecMpv_dump(const struct NFA *nfa, const std::string &base);
} // namespace ue2

View File

@ -34,7 +34,7 @@
#include "shufticompile.h"
#include "trufflecompile.h"
#include "util/alloc.h"
#include "util/multibit_internal.h"
#include "util/multibit_build.h"
#include "util/order_check.h"
#include "util/report_manager.h"
#include "util/verify_types.h"
@ -175,12 +175,13 @@ void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it,
size_t set = reach.find_first();
assert(set != CharReach::npos);
kp->u.verm.c = (char)set;
} else if (shuftiBuildMasks(~reach, &kp->u.shuf.mask_lo,
&kp->u.shuf.mask_hi) != -1) {
} else if (shuftiBuildMasks(~reach, (u8 *)&kp->u.shuf.mask_lo,
(u8 *)&kp->u.shuf.mask_hi) != -1) {
kp->type = MPV_SHUFTI;
} else {
kp->type = MPV_TRUFFLE;
truffleBuildMasks(~reach, &kp->u.truffle.mask1, &kp->u.truffle.mask2);
truffleBuildMasks(~reach, (u8 *)&kp->u.truffle.mask1,
(u8 *)&kp->u.truffle.mask2);
}
kp->count = verify_u32(puffs.size());
@ -207,7 +208,7 @@ void writeCoreNfa(NFA *nfa, u32 len, u32 min_width, u32 max_counter,
nfa->length = len;
nfa->nPositions = max_counter - 1;
nfa->type = MPV_NFA_0;
nfa->type = MPV_NFA;
nfa->streamStateSize = streamStateSize;
assert(16 >= sizeof(mpv_decomp_kilo));
nfa->scratchStateSize = scratchStateSize;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -36,7 +36,7 @@
#define MULTISHUFTI_H
#include "ue2common.h"
#include "util/simd_utils.h"
#include "util/simd_types.h"
#ifdef __cplusplus
extern "C"

View File

@ -41,39 +41,43 @@
#include "lbr.h"
#include "limex.h"
#include "mcclellan.h"
#include "mcsheng.h"
#include "mpv.h"
#include "sheng.h"
#include "tamarama.h"
#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \
case dc_ltype##_NFA_##dc_subtype: \
return nfaExec##dc_ftype##dc_subtype##dc_func_call; \
#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_func_call) \
case dc_ltype: \
return nfaExec##dc_ftype##dc_func_call; \
break
// general framework calls
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \
switch (nfa->type) { \
DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \
DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \
DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \
DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \
DISPATCH_CASE(GOUGH, Gough, 16, dbnt_func); \
DISPATCH_CASE(MPV, Mpv, 0, dbnt_func); \
DISPATCH_CASE(LBR, Lbr, Dot, dbnt_func); \
DISPATCH_CASE(LBR, Lbr, Verm, dbnt_func); \
DISPATCH_CASE(LBR, Lbr, NVerm, dbnt_func); \
DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \
DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \
DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \
DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \
DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \
default: \
assert(0); \
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \
switch (nfa->type) { \
DISPATCH_CASE(LIMEX_NFA_32, LimEx32, dbnt_func); \
DISPATCH_CASE(LIMEX_NFA_64, LimEx64, dbnt_func); \
DISPATCH_CASE(LIMEX_NFA_128, LimEx128, dbnt_func); \
DISPATCH_CASE(LIMEX_NFA_256, LimEx256, dbnt_func); \
DISPATCH_CASE(LIMEX_NFA_384, LimEx384, dbnt_func); \
DISPATCH_CASE(LIMEX_NFA_512, LimEx512, dbnt_func); \
DISPATCH_CASE(MCCLELLAN_NFA_8, McClellan8, dbnt_func); \
DISPATCH_CASE(MCCLELLAN_NFA_16, McClellan16, dbnt_func); \
DISPATCH_CASE(GOUGH_NFA_8, Gough8, dbnt_func); \
DISPATCH_CASE(GOUGH_NFA_16, Gough16, dbnt_func); \
DISPATCH_CASE(MPV_NFA, Mpv, dbnt_func); \
DISPATCH_CASE(LBR_NFA_DOT, LbrDot, dbnt_func); \
DISPATCH_CASE(LBR_NFA_VERM, LbrVerm, dbnt_func); \
DISPATCH_CASE(LBR_NFA_NVERM, LbrNVerm, dbnt_func); \
DISPATCH_CASE(LBR_NFA_SHUF, LbrShuf, dbnt_func); \
DISPATCH_CASE(LBR_NFA_TRUF, LbrTruf, dbnt_func); \
DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \
DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \
DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \
DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \
DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \
default: \
assert(0); \
}
char nfaCheckFinalState(const struct NFA *nfa, const char *state,

View File

@ -30,6 +30,7 @@
#include "limex_internal.h"
#include "mcclellancompile.h"
#include "mcsheng_compile.h"
#include "shengcompile.h"
#include "nfa_internal.h"
#include "repeat_internal.h"
@ -170,17 +171,16 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER};
#define DO_IF_DUMP_SUPPORT(a)
#endif
#define MAKE_LIMEX_TRAITS(mlt_size) \
#define MAKE_LIMEX_TRAITS(mlt_size, mlt_align) \
template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \
static UNUSED const char *name; \
static const NFACategory category = NFA_LIMEX; \
typedef LimExNFA##mlt_size implNFA_t; \
typedef u_##mlt_size tableRow_t; \
static const nfa_dispatch_fn has_accel; \
static const nfa_dispatch_fn has_repeats; \
static const nfa_dispatch_fn has_repeats_other_than_firsts; \
static const u32 stateAlign = \
MAX(alignof(tableRow_t), alignof(RepeatControl)); \
MAX(mlt_align, alignof(RepeatControl)); \
static const bool fast = mlt_size <= 64; \
}; \
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_accel \
@ -194,16 +194,17 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER};
const char *NFATraits<LIMEX_NFA_##mlt_size>::name \
= "LimEx "#mlt_size; \
template<> struct getDescription<LIMEX_NFA_##mlt_size> { \
static string call(const void *ptr) { \
return getDescriptionLimEx<LIMEX_NFA_##mlt_size>((const NFA *)ptr); \
} \
static string call(const void *p) { \
return getDescriptionLimEx<LIMEX_NFA_##mlt_size>((const NFA *)p); \
} \
};)
MAKE_LIMEX_TRAITS(32)
MAKE_LIMEX_TRAITS(128)
MAKE_LIMEX_TRAITS(256)
MAKE_LIMEX_TRAITS(384)
MAKE_LIMEX_TRAITS(512)
MAKE_LIMEX_TRAITS(32, alignof(u32))
MAKE_LIMEX_TRAITS(64, alignof(m128)) /* special, 32bit arch uses m128 */
MAKE_LIMEX_TRAITS(128, alignof(m128))
MAKE_LIMEX_TRAITS(256, alignof(m256))
MAKE_LIMEX_TRAITS(384, alignof(m384))
MAKE_LIMEX_TRAITS(512, alignof(m512))
template<> struct NFATraits<MCCLELLAN_NFA_8> {
UNUSED static const char *name;
@ -269,7 +270,7 @@ const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats_other_than_firsts = d
const char *NFATraits<GOUGH_NFA_16>::name = "Goughfish 16";
#endif
template<> struct NFATraits<MPV_NFA_0> {
template<> struct NFATraits<MPV_NFA> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
@ -278,14 +279,14 @@ template<> struct NFATraits<MPV_NFA_0> {
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
const nfa_dispatch_fn NFATraits<MPV_NFA>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<MPV_NFA>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MPV_NFA>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<MPV_NFA_0>::name = "Mega-Puff-Vac";
const char *NFATraits<MPV_NFA>::name = "Mega-Puff-Vac";
#endif
template<> struct NFATraits<CASTLE_NFA_0> {
template<> struct NFATraits<CASTLE_NFA> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
@ -294,14 +295,14 @@ template<> struct NFATraits<CASTLE_NFA_0> {
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<CASTLE_NFA_0>::name = "Castle";
const char *NFATraits<CASTLE_NFA>::name = "Castle";
#endif
template<> struct NFATraits<LBR_NFA_Dot> {
template<> struct NFATraits<LBR_NFA_DOT> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
@ -310,14 +311,14 @@ template<> struct NFATraits<LBR_NFA_Dot> {
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_repeats_other_than_firsts = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_Dot>::name = "Lim Bounded Repeat (D)";
const char *NFATraits<LBR_NFA_DOT>::name = "Lim Bounded Repeat (D)";
#endif
template<> struct NFATraits<LBR_NFA_Verm> {
template<> struct NFATraits<LBR_NFA_VERM> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
@ -326,14 +327,14 @@ template<> struct NFATraits<LBR_NFA_Verm> {
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_repeats_other_than_firsts = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_Verm>::name = "Lim Bounded Repeat (V)";
const char *NFATraits<LBR_NFA_VERM>::name = "Lim Bounded Repeat (V)";
#endif
template<> struct NFATraits<LBR_NFA_NVerm> {
template<> struct NFATraits<LBR_NFA_NVERM> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
@ -342,14 +343,14 @@ template<> struct NFATraits<LBR_NFA_NVerm> {
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_repeats_other_than_firsts = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_NVerm>::name = "Lim Bounded Repeat (NV)";
const char *NFATraits<LBR_NFA_NVERM>::name = "Lim Bounded Repeat (NV)";
#endif
template<> struct NFATraits<LBR_NFA_Shuf> {
template<> struct NFATraits<LBR_NFA_SHUF> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
@ -358,14 +359,14 @@ template<> struct NFATraits<LBR_NFA_Shuf> {
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_repeats_other_than_firsts = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_Shuf>::name = "Lim Bounded Repeat (S)";
const char *NFATraits<LBR_NFA_SHUF>::name = "Lim Bounded Repeat (S)";
#endif
template<> struct NFATraits<LBR_NFA_Truf> {
template<> struct NFATraits<LBR_NFA_TRUF> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
@ -374,14 +375,14 @@ template<> struct NFATraits<LBR_NFA_Truf> {
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_repeats_other_than_firsts = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_Truf>::name = "Lim Bounded Repeat (M)";
const char *NFATraits<LBR_NFA_TRUF>::name = "Lim Bounded Repeat (M)";
#endif
template<> struct NFATraits<SHENG_NFA_0> {
template<> struct NFATraits<SHENG_NFA> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1;
@ -390,14 +391,14 @@ template<> struct NFATraits<SHENG_NFA_0> {
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_accel = has_accel_sheng;
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
const nfa_dispatch_fn NFATraits<SHENG_NFA>::has_accel = has_accel_sheng;
const nfa_dispatch_fn NFATraits<SHENG_NFA>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<SHENG_NFA>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<SHENG_NFA_0>::name = "Sheng";
const char *NFATraits<SHENG_NFA>::name = "Sheng";
#endif
template<> struct NFATraits<TAMARAMA_NFA_0> {
template<> struct NFATraits<TAMARAMA_NFA> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 32;
@ -406,11 +407,43 @@ template<> struct NFATraits<TAMARAMA_NFA_0> {
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<TAMARAMA_NFA_0>::name = "Tamarama";
const char *NFATraits<TAMARAMA_NFA>::name = "Tamarama";
#endif
template<> struct NFATraits<MCSHENG_NFA_8> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_accel = has_accel_mcsheng;
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<MCSHENG_NFA_8>::name = "Shengy McShengFace 8";
#endif
template<> struct NFATraits<MCSHENG_NFA_16> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 2;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_accel = has_accel_mcsheng;
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<MCSHENG_NFA_16>::name = "Shengy McShengFace 16";
#endif
} // namespace

Some files were not shown because too many files have changed in this diff Show More