diff --git a/CHANGELOG.md b/CHANGELOG.md index 729e58ef..03eba64d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,40 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. -## [4.3.2] 2016-11-15 +## [4.4.0] 2017-01-20 +- Introduce the "fat runtime" build. This will build several variants of the + Hyperscan scanning engine specialised for different processor feature sets, + and use the appropriate one for the host at runtime. This uses the "ifunc" + indirect function attribute provided by GCC and is currently available on + Linux only, where it is the default for release builds. +- New API function: add the `hs_valid_platform()` function. This function tests + whether the host provides the SSSE3 instruction set required by Hyperscan. +- Introduce a new standard benchmarking tool, "hsbench". This provides an easy + way to measure Hyperscan's performance for a particular set of patterns and + corpus of data to be scanned. +- Introduce a 64-bit GPR LimEx NFA model, which uses 64-bit GPRs on 64-bit + hosts and SSE registers on 32-bit hosts. +- Introduce a new DFA model ("McSheng") which is a hybrid of the existing + McClellan and Sheng models. This improves scanning performance for some + cases. +- Introduce lookaround specialisations to improve scanning performance. +- Improve the handling of long literals by moving confirmation to the Rose + interpreter and simplifying the hash table used to track them in streaming + mode. +- Improve compile time optimisation for removing redundant paths from + expression graphs. +- Build: improve support for building with MSVC toolchain. +- Reduce the size of small write DFAs used for small scans in block mode. +- Introduce a custom graph type (`ue2_graph`) used in place of the Boost Graph + Library's `adjacency_list` type. Improves compile time performance and type + safety. +- Improve scanning performance of the McClellan DFA. +- Bugfix for a very unusual SOM case where the incorrect start offset was + reported for a match. +- Bugfix for issue #37, removing execute permissions from some source files. +- Bugfix for issue #41, handle Windows line endings in pattern files. +## [4.3.2] 2016-11-15 - Bugfix for issue #39. This small change is a workaround for an issue in Boost 1.62. The fix has been submitted to Boost for inclusion in a future release. @@ -11,7 +43,7 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. ## [4.3.1] 2016-08-29 - Bugfix for issue #30. In recent versions of Clang, a write to a variable was being elided, resulting in corrupted stream state after calling - hs_reset_stream(). + `hs_reset_stream()`. ## [4.3.0] 2016-08-24 - Introduce a new analysis pass ("Violet") used for decomposition of patterns diff --git a/CMakeLists.txt b/CMakeLists.txt index 842834a1..3a7d40ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,19 +1,11 @@ cmake_minimum_required (VERSION 2.8.11) - -# don't use the built-in default configs -set (CMAKE_NOT_USING_CONFIG_FLAGS TRUE) - project (Hyperscan C CXX) set (HS_MAJOR_VERSION 4) -set (HS_MINOR_VERSION 3) -set (HS_PATCH_VERSION 2) +set (HS_MINOR_VERSION 4) +set (HS_PATCH_VERSION 0) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) -# since we are doing this manually, we only have three types -set (CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo" - CACHE STRING "" FORCE) - set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) @@ -70,7 +62,14 @@ include_directories(SYSTEM include) set(BOOST_USE_STATIC_LIBS OFF) set(BOOST_USE_MULTITHREADED OFF) set(BOOST_USE_STATIC_RUNTIME OFF) -set(BOOST_MINVERSION 1.57.0) +if (CMAKE_SYSTEM_NAME MATCHES "Darwin" + OR (CMAKE_SYSTEM_NAME MATCHES "FreeBSD" + AND CMAKE_C_COMPILER_ID MATCHES "Clang")) + # we need a more recent boost for libc++ used by clang on OSX and FreeBSD + set(BOOST_MINVERSION 1.61.0) +else () + set(BOOST_MINVERSION 1.57.0) +endif () set(BOOST_NO_BOOST_CMAKE ON) # first check for Boost installed on the system @@ -85,6 +84,7 @@ if(NOT Boost_FOUND) endif() endif() +include (${CMAKE_MODULE_PATH}/boost.cmake) # -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6) find_package(PythonInterp) @@ -151,27 +151,21 @@ if(MSVC OR MSVC_IDE) if (MSVC_VERSION LESS 1700) message(FATAL_ERROR "The project requires C++11 features.") else() - # set base flags - set(CMAKE_C_FLAGS "/DWIN32 /D_WINDOWS /W3") - set(CMAKE_C_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od") - set(CMAKE_C_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi") - set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi") - - set(CMAKE_CXX_FLAGS "/DWIN32 /D_WINDOWS /W3 /GR /EHsc") - set(CMAKE_CXX_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od") - set(CMAKE_CXX_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi") - if (WINDOWS_ICC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O3 /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") else() #TODO: don't hardcode arch - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /wd4267") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /arch:AVX /wd4267") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /arch:AVX /wd4244 /wd4267 /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") endif() + string(REPLACE "/RTC1" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") + string(REPLACE "/RTC1" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") - + if (DISABLE_ASSERTS) + set(CMAKE_C_FLAGS_DEBUG "/DNDEBUG ${CMAKE_C_FLAGS_DEBUG}") + set(CMAKE_CXX_FLAGS_DEBUG "/DNDEBUG ${CMAKE_CXX_FLAGS_DEBUG}") + endif () endif() else() @@ -192,6 +186,12 @@ else() unset(_GXX_OUTPUT) endif() + # remove CMake's idea of optimisation + foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES}) + string(REGEX REPLACE "-O[^ ]*" "" CMAKE_C_FLAGS_${CONFIG} "${CMAKE_C_FLAGS_${CONFIG}}") + string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}") + endforeach () + if(OPTIMISE) set(OPT_C_FLAG "-O3") set(OPT_CXX_FLAG "-O2") @@ -200,32 +200,28 @@ else() set(OPT_CXX_FLAG "-O0") endif(OPTIMISE) - # set up base flags for build types - set(CMAKE_C_FLAGS_DEBUG "-g ${OPT_C_FLAG} -Werror") - set(CMAKE_C_FLAGS_RELWITHDEBINFO "-g ${OPT_C_FLAG}") - set(CMAKE_C_FLAGS_RELEASE "${OPT_C_FLAG}") + # set compiler flags - more are tested and added later + set(EXTRA_C_FLAGS "${OPT_C_FLAG} -std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing") + set(EXTRA_CXX_FLAGS "${OPT_CXX_FLAG} -std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing") - set(CMAKE_CXX_FLAGS_DEBUG "-g ${OPT_CXX_FLAG} -Werror") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g ${OPT_CXX_FLAG}") - set(CMAKE_CXX_FLAGS_RELEASE "${OPT_CXX_FLAG}") + if (NOT RELEASE_BUILD) + # -Werror is most useful during development, don't potentially break + # release builds + set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror") + endif() if (DISABLE_ASSERTS) - # usually true for release builds, false for debug - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNDEBUG") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG") + set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -DNDEBUG") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -DNDEBUG") endif() - - # set compiler flags - more are tested and added later - set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing") - set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing") - if (NOT CMAKE_C_FLAGS MATCHES .*march.*) - message(STATUS "Building for current host CPU") - set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -march=native -mtune=native") + set(ARCH_C_FLAGS "${ARCH_C_FLAGS} -march=native -mtune=native") endif() + if (NOT CMAKE_CXX_FLAGS MATCHES .*march.*) - set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -march=native -mtune=native") + set(ARCH_CXX_FLAGS "${ARCH_CXX_FLAGS} -march=native -mtune=native") endif() if(CMAKE_COMPILER_IS_GNUCC) @@ -242,12 +238,17 @@ else() set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer") endif() + if (RELEASE_BUILD) + # we don't need the noise of ABI warnings in a release build + set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi") + endif () + endif() CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H) CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H) -CHECK_INCLUDE_FILES(tmmintrin.h HAVE_TMMINTRIN_H) CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H) CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H) @@ -267,9 +268,36 @@ if (RELEASE_BUILD) endif() endif() -# ensure we are building for the right target arch +if (CMAKE_SYSTEM_NAME MATCHES "Linux") + # This is a Linux-only feature for now - requires platform support + # elsewhere + message(STATUS "generator is ${CMAKE_GENERATOR}") + if (CMAKE_C_COMPILER_ID MATCHES "Clang" AND + CMAKE_C_COMPILER_VERSION VERSION_LESS "3.9") + message (STATUS "Clang v3.9 or higher required for fat runtime, cannot build fat runtime") + set (FAT_RUNTIME_REQUISITES FALSE) + elseif (NOT (CMAKE_GENERATOR MATCHES "Unix Makefiles" OR + (CMAKE_VERSION VERSION_GREATER "3.0" AND CMAKE_GENERATOR MATCHES "Ninja"))) + message (STATUS "Building the fat runtime requires the Unix Makefiles generator, or Ninja with CMake v3.0 or higher") + set (FAT_RUNTIME_REQUISITES FALSE) + else() + include (${CMAKE_MODULE_PATH}/attrib.cmake) + if (NOT HAS_C_ATTR_IFUNC) + message(STATUS "Compiler does not support ifunc attribute, cannot build fat runtime") + set (FAT_RUNTIME_REQUISITES FALSE) + else () + set (FAT_RUNTIME_REQUISITES TRUE) + endif() + endif() + CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitecures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF) +endif () + include (${CMAKE_MODULE_PATH}/arch.cmake) +if (NOT FAT_RUNTIME AND NOT HAVE_SSSE3) + message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") +endif () + # testing a builtin takes a little more work CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED) CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED) @@ -375,6 +403,16 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Intel") endif() endif() +if (NOT FAT_RUNTIME) +message(STATUS "Building for current host CPU") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}") +else() +message(STATUS "Building runtime for multiple microarchitectures") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +endif() + add_subdirectory(util) add_subdirectory(unit) add_subdirectory(doc/dev-reference) @@ -401,8 +439,13 @@ if (NOT WIN32) endif() # only set these after all tests are done +if (NOT FAT_RUNTIME) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") +else() +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") +endif() if(NOT WIN32) @@ -424,12 +467,21 @@ SET(hs_HEADERS ) install(FILES ${hs_HEADERS} DESTINATION include/hs) +set (hs_exec_common_SRCS + src/alloc.c + src/scratch.c + src/util/cpuid_flags.c + src/util/cpuid_flags.h + src/util/multibit.c + ) + set (hs_exec_SRCS ${hs_HEADERS} src/hs_version.h src/ue2common.h - src/alloc.c src/allocator.h + src/crc32.c + src/crc32.h src/report.h src/runtime.c src/fdr/fdr.c @@ -437,7 +489,6 @@ set (hs_exec_SRCS src/fdr/fdr_internal.h src/fdr/fdr_confirm.h src/fdr/fdr_confirm_runtime.h - src/fdr/fdr_streaming_runtime.h src/fdr/flood_runtime.h src/fdr/fdr_loadval.h src/fdr/teddy.c @@ -461,15 +512,12 @@ set (hs_exec_SRCS src/nfa/lbr.h src/nfa/lbr_common_impl.h src/nfa/lbr_internal.h - src/nfa/mcclellan.c - src/nfa/mcclellan.h - src/nfa/mcclellan_common_impl.h - src/nfa/mcclellan_internal.h src/nfa/limex_accel.c src/nfa/limex_accel.h src/nfa/limex_exceptional.h src/nfa/limex_native.c src/nfa/limex_ring.h + src/nfa/limex_64.c src/nfa/limex_simd128.c src/nfa/limex_simd256.c src/nfa/limex_simd384.c @@ -482,6 +530,14 @@ set (hs_exec_SRCS src/nfa/limex_runtime_impl.h src/nfa/limex_shuffle.h src/nfa/limex_state_impl.h + src/nfa/mcclellan.c + src/nfa/mcclellan.h + src/nfa/mcclellan_common_impl.h + src/nfa/mcclellan_internal.h + src/nfa/mcsheng.c + src/nfa/mcsheng_data.c + src/nfa/mcsheng.h + src/nfa/mcsheng_internal.h src/nfa/mpv.h src/nfa/mpv.c src/nfa/mpv_internal.h @@ -542,6 +598,8 @@ set (hs_exec_SRCS src/rose/init.h src/rose/init.c src/rose/stream.c + src/rose/stream_long_lit.h + src/rose/stream_long_lit_hash.h src/rose/match.h src/rose/match.c src/rose/miracle.h @@ -554,15 +612,16 @@ set (hs_exec_SRCS src/rose/rose_types.h src/rose/rose_common.h src/rose/validate_mask.h + src/rose/validate_shufti.h src/util/bitutils.h + src/util/copybytes.h src/util/exhaust.h src/util/fatbit.h - src/util/fatbit.c src/util/join.h src/util/masked_move.h src/util/multibit.h - src/util/multibit_internal.h src/util/multibit.c + src/util/multibit_internal.h src/util/pack_bits.h src/util/popcount.h src/util/pqueue.h @@ -574,21 +633,14 @@ set (hs_exec_SRCS src/util/state_compress.c src/util/unaligned.h src/util/uniform_ops.h - src/scratch.h - src/scratch.c - src/crc32.c - src/crc32.h src/database.c src/database.h ) -if (HAVE_AVX2) - set (hs_exec_SRCS - ${hs_exec_SRCS} - src/fdr/teddy_avx2.c - src/util/masked_move.c - ) -endif () +set (hs_exec_avx2_SRCS + src/fdr/teddy_avx2.c + src/util/masked_move.c +) SET (hs_SRCS @@ -621,8 +673,6 @@ SET (hs_SRCS src/fdr/fdr_engine_description.cpp src/fdr/fdr_engine_description.h src/fdr/fdr_internal.h - src/fdr/fdr_streaming_compile.cpp - src/fdr/fdr_streaming_internal.h src/fdr/flood_compile.cpp src/fdr/teddy_compile.cpp src/fdr/teddy_compile.h @@ -660,6 +710,8 @@ SET (hs_SRCS src/nfa/mcclellancompile.h src/nfa/mcclellancompile_util.cpp src/nfa/mcclellancompile_util.h + src/nfa/mcsheng_compile.cpp + src/nfa/mcsheng_compile.h src/nfa/limex_compile.cpp src/nfa/limex_compile.h src/nfa/limex_accel.h @@ -677,6 +729,8 @@ SET (hs_SRCS src/nfa/nfa_internal.h src/nfa/nfa_kind.h src/nfa/rdfa.h + src/nfa/rdfa_graph.cpp + src/nfa/rdfa_graph.h src/nfa/rdfa_merge.cpp src/nfa/rdfa_merge.h src/nfa/repeat_internal.h @@ -721,7 +775,6 @@ SET (hs_SRCS src/nfagraph/ng_extparam.h src/nfagraph/ng_fixed_width.cpp src/nfagraph/ng_fixed_width.h - src/nfagraph/ng_graph.h src/nfagraph/ng_haig.cpp src/nfagraph/ng_haig.h src/nfagraph/ng_holder.cpp @@ -875,6 +928,7 @@ SET (hs_SRCS src/rose/rose_build_compile.cpp src/rose/rose_build_convert.cpp src/rose/rose_build_convert.h + src/rose/rose_build_engine_blob.h src/rose/rose_build_exclusive.cpp src/rose/rose_build_exclusive.h src/rose/rose_build_groups.cpp @@ -882,6 +936,8 @@ SET (hs_SRCS src/rose/rose_build_impl.h src/rose/rose_build_infix.cpp src/rose/rose_build_infix.h + src/rose/rose_build_long_lit.cpp + src/rose/rose_build_long_lit.h src/rose/rose_build_lookaround.cpp src/rose/rose_build_lookaround.h src/rose/rose_build_matchers.cpp @@ -889,6 +945,8 @@ SET (hs_SRCS src/rose/rose_build_merge.cpp src/rose/rose_build_merge.h src/rose/rose_build_misc.cpp + src/rose/rose_build_program.cpp + src/rose/rose_build_program.h src/rose/rose_build_role_aliasing.cpp src/rose/rose_build_scatter.cpp src/rose/rose_build_scatter.h @@ -915,14 +973,15 @@ SET (hs_SRCS src/util/compile_error.cpp src/util/compile_error.h src/util/container.h - src/util/cpuid_flags.c - src/util/cpuid_flags.h src/util/depth.cpp src/util/depth.h src/util/determinise.h src/util/dump_mask.cpp src/util/dump_mask.h + src/util/fatbit_build.cpp + src/util/fatbit_build.h src/util/graph.h + src/util/hash.h src/util/multibit_build.cpp src/util/multibit_build.h src/util/order_check.h @@ -937,6 +996,7 @@ SET (hs_SRCS src/util/target_info.cpp src/util/target_info.h src/util/ue2_containers.h + src/util/ue2_graph.h src/util/ue2string.cpp src/util/ue2string.h src/util/unaligned.h @@ -966,6 +1026,8 @@ set(hs_dump_SRCS src/nfa/limex_dump.cpp src/nfa/mcclellandump.cpp src/nfa/mcclellandump.h + src/nfa/mcsheng_dump.cpp + src/nfa/mcsheng_dump.h src/nfa/mpv_dump.cpp src/nfa/nfa_dump_api.h src/nfa/nfa_dump_dispatch.cpp @@ -990,6 +1052,8 @@ set(hs_dump_SRCS src/rose/rose_dump.h src/util/dump_charclass.cpp src/util/dump_charclass.h + src/util/dump_util.cpp + src/util/dump_util.h ) if (DUMP_SUPPORT) @@ -1002,27 +1066,106 @@ endif() set (LIB_VERSION ${HS_VERSION}) set (LIB_SOVERSION ${HS_MAJOR_VERSION}) -add_library(hs_exec OBJECT ${hs_exec_SRCS}) +if (NOT FAT_RUNTIME) + + set(hs_exec_SRCS ${hs_exec_SRCS} ${hs_exec_common_SRCS}) + + if (HAVE_AVX2) + set(hs_exec_SRCS ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + endif() + + add_library(hs_exec OBJECT ${hs_exec_SRCS}) + + add_library(hs_runtime STATIC src/hs_version.c src/hs_valid_platform.c $) + set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) + + if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) + add_library(hs_exec_shared OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) + endif() + +else (FAT_RUNTIME) + set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") + add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_core2 PROPERTIES + COMPILE_FLAGS "-march=core2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_corei7 PROPERTIES + COMPILE_FLAGS "-march=corei7" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + set_target_properties(hs_exec_avx2 PROPERTIES + COMPILE_FLAGS "-march=core-avx2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_common OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c + ) + set_source_files_properties(src/dispatcher.c PROPERTIES + COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function") + + if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) + add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_shared_core2 PROPERTIES + COMPILE_FLAGS "-march=core2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_shared_corei7 PROPERTIES + COMPILE_FLAGS "-march=corei7" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + set_target_properties(hs_exec_shared_avx2 PROPERTIES + COMPILE_FLAGS "-march=core-avx2" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + add_library(hs_exec_common_shared OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c + ) + set_target_properties(hs_exec_common_shared PROPERTIES + OUTPUT_NAME hs_exec_common + POSITION_INDEPENDENT_CODE TRUE) + endif() # SHARED -if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) -add_library(hs_exec_shared OBJECT ${hs_exec_SRCS}) -set_target_properties(hs_exec_shared PROPERTIES - POSITION_INDEPENDENT_CODE TRUE) -endif() # hs_version.c is added explicitly to avoid some build systems that refuse to # create a lib without any src (I'm looking at you Xcode) -add_library(hs_runtime STATIC src/hs_version.c $) + add_library(hs_runtime STATIC src/hs_version.c + $ $ + $ $) +endif (NOT FAT_RUNTIME) -set_target_properties(hs_runtime PROPERTIES - LINKER_LANGUAGE C) + +set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) if (NOT BUILD_SHARED_LIBS) install(TARGETS hs_runtime DESTINATION lib) endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) - add_library(hs_runtime_shared SHARED src/hs_version.c $) + if (NOT FAT_RUNTIME) + add_library(hs_runtime_shared SHARED src/hs_version.c src/hs_valid_platform.c +$) + else() + add_library(hs_runtime_shared SHARED src/hs_version.c + src/hs_valid_platform.c + $ + $ + $ + $) + endif() set_target_properties(hs_runtime_shared PROPERTIES VERSION ${LIB_VERSION} SOVERSION ${LIB_SOVERSION} @@ -1035,8 +1178,14 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) LIBRARY DESTINATION lib) endif() -# we want the static lib for testing -add_library(hs STATIC ${hs_SRCS} $) +if (NOT FAT_RUNTIME) + add_library(hs STATIC ${hs_SRCS} src/hs_valid_platform.c $) +else() + # we want the static lib for testing + add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c + ${hs_SRCS} $ $ + $ $) +endif() add_dependencies(hs ragel_Parser) @@ -1045,7 +1194,17 @@ install(TARGETS hs DESTINATION lib) endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) - add_library(hs_shared SHARED ${hs_SRCS} $) + if (NOT FAT_RUNTIME) + add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c + ${hs_SRCS} $) + else() + add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c + ${hs_SRCS} $ + $ + $ + $) + + endif() add_dependencies(hs_shared ragel_Parser) set_target_properties(hs_shared PROPERTIES OUTPUT_NAME hs diff --git a/cmake/arch.cmake b/cmake/arch.cmake index c00401dd..e98fbf22 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -11,7 +11,8 @@ else () endif () -set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") +set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}") + # ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> int main() { @@ -19,10 +20,6 @@ int main() { (void)_mm_shuffle_epi8(a, a); }" HAVE_SSSE3) -if (NOT HAVE_SSSE3) - message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") -endif () - # now look for AVX2 CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> #if !defined(__AVX2__) @@ -34,9 +31,5 @@ int main(){ (void)_mm256_xor_si256(z, z); }" HAVE_AVX2) -if (NOT HAVE_AVX2) - message(STATUS "Building without AVX2 support") -endif () - unset (CMAKE_REQUIRED_FLAGS) unset (INTRIN_INC_H) diff --git a/cmake/attrib.cmake b/cmake/attrib.cmake new file mode 100644 index 00000000..5600ce6b --- /dev/null +++ b/cmake/attrib.cmake @@ -0,0 +1,13 @@ +# tests for compiler properties + +# set -Werror so we can't ignore unused attribute warnings +set (CMAKE_REQUIRED_FLAGS "-Werror") + +CHECK_C_SOURCE_COMPILES(" + int foo(int) __attribute__ ((ifunc(\"foo_i\"))); + int f1(int i) { return i; } + void (*foo_i()) { return f1; } + int main(void) { return 0; } + " HAS_C_ATTR_IFUNC) + +unset(CMAKE_REQUIRED_FLAGS) diff --git a/cmake/boost.cmake b/cmake/boost.cmake new file mode 100644 index 00000000..3d513deb --- /dev/null +++ b/cmake/boost.cmake @@ -0,0 +1,41 @@ +# Boost 1.62 has a bug that we've patched around, check if it is required +if (Boost_VERSION EQUAL 106200) + set (CMAKE_REQUIRED_INCLUDES ${BOOST_INCLUDEDIR} "${PROJECT_SOURCE_DIR}/include") + set (BOOST_REV_TEST " +#include +#include +#include +#include + +int main(int,char*[]) +{ + using namespace boost; + // Check const reverse_graph + { + typedef adjacency_list< vecS, vecS, bidirectionalS, + property, + property, + property + > AdjList; + typedef reverse_graph Graph; + BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept )); + } + return 0; +} +") + + CHECK_CXX_SOURCE_COMPILES("${BOOST_REV_TEST}" BOOST_REVGRAPH_OK) + + if (NOT BOOST_REVGRAPH_OK) + message(STATUS "trying patched") + CHECK_CXX_SOURCE_COMPILES(" +#include +${BOOST_REV_TEST}" BOOST_REVGRAPH_PATCH) + endif() + + if (NOT BOOST_REVGRAPH_OK AND NOT BOOST_REVGRAPH_PATCH) + message(FATAL_ERROR "Something is wrong with this copy of boost::reverse_graph") + endif() + + unset (CMAKE_REQUIRED_INCLUDES) +endif () # Boost 1.62.0 diff --git a/cmake/build_wrapper.sh b/cmake/build_wrapper.sh new file mode 100755 index 00000000..5baf209b --- /dev/null +++ b/cmake/build_wrapper.sh @@ -0,0 +1,27 @@ +#!/bin/sh -e +# This is used for renaming symbols for the fat runtime, don't call directly +# TODO: make this a lot less fragile! +PREFIX=$1 +KEEPSYMS_IN=$2 +shift 2 +BUILD=$@ +OUT=$(echo $BUILD | sed 's/.* -o \(.*\.o\).*/\1/') +SYMSFILE=/tmp/${PREFIX}_rename.syms.$$ +KEEPSYMS=/tmp/keep.syms.$$ +# grab the command without the target obj or src file flags +# we don't just call gcc directly as there may be flags modifying the arch +CC_CMD=$(echo $BUILD | sed 's/ -o .*\.o//;s/ -c //;s/ .[^ ]*\.c//;') +# find me a libc +LIBC_SO=$(${CC_CMD} --print-file-name=libc.so.6) +cp ${KEEPSYMS_IN} ${KEEPSYMS} +# get all symbols from libc and turn them into patterns +nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ ]*\).*/^\1$/' >> ${KEEPSYMS} +# build the object +${BUILD} +# rename the symbols in the object +nm -f p -g ${OUT} | cut -f1 -d' ' | grep -v -f ${KEEPSYMS} | sed -e "s/\(.*\)/\1\ ${PREFIX}_\1/" >> ${SYMSFILE} +if test -s ${SYMSFILE} +then + objcopy --redefine-syms=${SYMSFILE} ${OUT} +fi +rm -f ${SYMSFILE} ${KEEPSYMS} diff --git a/cmake/config.h.in b/cmake/config.h.in index 75c27b3e..c7b577c2 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -15,6 +15,9 @@ /* internal build, switch on dump support. */ #cmakedefine DUMP_SUPPORT +/* Define if building "fat" runtime. */ +#cmakedefine FAT_RUNTIME + /* Define to 1 if `backtrace' works. */ #cmakedefine HAVE_BACKTRACE @@ -67,9 +70,6 @@ /* Define if the sqlite3_open_v2 call is available */ #cmakedefine HAVE_SQLITE3_OPEN_V2 -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_TMMINTRIN_H - /* Define to 1 if you have the header file. */ #cmakedefine HAVE_UNISTD_H @@ -89,3 +89,5 @@ /* define if this is a release build. */ #cmakedefine RELEASE_BUILD +/* define if reverse_graph requires patch for boost 1.62.0 */ +#cmakedefine BOOST_REVGRAPH_PATCH diff --git a/cmake/keep.syms.in b/cmake/keep.syms.in new file mode 100644 index 00000000..ab6f82a5 --- /dev/null +++ b/cmake/keep.syms.in @@ -0,0 +1,11 @@ +# names to exclude +hs_misc_alloc +hs_misc_free +hs_free_scratch +hs_stream_alloc +hs_stream_free +hs_scratch_alloc +hs_scratch_free +hs_database_alloc +hs_database_free +^_ diff --git a/cmake/sqlite3.cmake b/cmake/sqlite3.cmake new file mode 100644 index 00000000..c07f1161 --- /dev/null +++ b/cmake/sqlite3.cmake @@ -0,0 +1,53 @@ +# +# a lot of noise to find sqlite +# + +option(SQLITE_PREFER_STATIC "Build sqlite3 statically instead of using an installed lib" OFF) + +if(NOT WIN32 AND NOT SQLITE_PREFER_STATIC) +find_package(PkgConfig QUIET) + +# first check for sqlite on the system +pkg_check_modules(SQLITE3 sqlite3) +endif() + +if (NOT SQLITE3_FOUND) + message(STATUS "looking for sqlite3 in source tree") + # look in the source tree + if (EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.h" AND + EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c") + message(STATUS " found sqlite3 in source tree") + set(SQLITE3_FOUND TRUE) + set(SQLITE3_BUILD_SOURCE TRUE) + set(SQLITE3_INCLUDE_DIRS "${PROJECT_SOURCE_DIR}/sqlite3") + set(SQLITE3_LDFLAGS sqlite3_static) + else() + message(FATAL_ERROR " no sqlite3 in source tree") + endif() +endif() + +# now do version checks +if (SQLITE3_FOUND) + list(INSERT CMAKE_REQUIRED_INCLUDES 0 "${SQLITE3_INCLUDE_DIRS}") + CHECK_C_SOURCE_COMPILES("#include \n#if SQLITE_VERSION_NUMBER >= 3008007 && SQLITE_VERSION_NUMBER < 3008010\n#error broken sqlite\n#endif\nint main() {return 0;}" SQLITE_VERSION_OK) + if (NOT SQLITE_VERSION_OK) + message(FATAL_ERROR "sqlite3 is broken from 3.8.7 to 3.8.10 - please find a working version") + endif() +if (NOT SQLITE3_BUILD_SOURCE) + set(_SAVED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + list(INSERT CMAKE_REQUIRED_LIBRARIES 0 ${SQLITE3_LDFLAGS}) + CHECK_SYMBOL_EXISTS(sqlite3_open_v2 sqlite3.h HAVE_SQLITE3_OPEN_V2) + list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES "${SQLITE3_INCLUDE_DIRS}") + list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${SQLITE3_LDFLAGS}) +else() + if (NOT TARGET sqlite3_static) + # build sqlite as a static lib to compile into our test programs + add_library(sqlite3_static STATIC "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c") + if (NOT WIN32) + set_target_properties(sqlite3_static PROPERTIES COMPILE_FLAGS "-Wno-unused -Wno-cast-qual -DSQLITE_OMIT_LOAD_EXTENSION") + endif() + endif() +endif() +endif() + +# that's enough about sqlite diff --git a/doc/dev-reference/getting_started.rst b/doc/dev-reference/getting_started.rst index 826349a7..1794f3e9 100644 --- a/doc/dev-reference/getting_started.rst +++ b/doc/dev-reference/getting_started.rst @@ -169,6 +169,9 @@ Common options for CMake include: +------------------------+----------------------------------------------------+ | DEBUG_OUTPUT | Enable very verbose debug output. Default off. | +------------------------+----------------------------------------------------+ +| FAT_RUNTIME | Build the :ref:`fat runtime`. Default | +| | true on Linux, not available elsewhere. | ++------------------------+----------------------------------------------------+ For example, to generate a ``Debug`` build: :: @@ -199,11 +202,11 @@ The other types of builds are: Target Architecture ------------------- -By default, Hyperscan will be compiled to target the instruction set of the -processor of the machine that being used for compilation. This is done via -the use of ``-march=native``. The result of this means that a library built on -one machine may not work on a different machine if they differ in supported -instruction subsets. +Unless using the :ref:`fat runtime`, by default Hyperscan will be +compiled to target the instruction set of the processor of the machine that +being used for compilation. This is done via the use of ``-march=native``. The +result of this means that a library built on one machine may not work on a +different machine if they differ in supported instruction subsets. To override the use of ``-march=native``, set appropriate flags for the compiler in ``CFLAGS`` and ``CXXFLAGS`` environment variables before invoking @@ -215,3 +218,57 @@ example, to set the instruction subsets up to ``SSE4.2`` using GCC 4.8: :: For more information, refer to :ref:`instr_specialization`. +.. _fat_runtime: + +Fat Runtime +----------- + +A feature introduced in Hyperscan v4.4 is the ability for the Hyperscan +library to dispatch the most appropriate runtime code for the host processor. +This feature is called the "fat runtime", as a single Hyperscan library +contains multiple copies of the runtime code for different instruction sets. + +.. note:: + + The fat runtime feature is only available on Linux. Release builds of + Hyperscan will default to having the fat runtime enabled where supported. + +When building the library with the fat runtime, the Hyperscan runtime code +will be compiled multiple times for these different instruction sets, and +these compiled objects are combined into one library. There are no changes to +how user applications are built against this library. + +When applications are executed, the correct version of the runtime is selected +for the machine that it is running on. This is done using a ``CPUID`` check +for the presence of the instruction set, and then an indirect function is +resolved so that the right version of each API function is used. There is no +impact on function call performance, as this check and resolution is performed +by the ELF loader once when the binary is loaded. + +If the Hyperscan library is used on x86 systems without ``SSSE3``, the runtime +API functions will resolve to functions that return :c:member:`HS_ARCH_ERROR` +instead of potentially executing illegal instructions. The API function +:c:func:`hs_valid_platform` can be used by application writers to determine if +the current platform is supported by Hyperscan. + +At of this release, the variants of the runtime that are built, and the CPU +capability that is required, are the following: + ++----------+-------------------------------+---------------------+ +| Variant | CPU Feature Flag(s) Required | gcc arch flag | ++==========+===============================+=====================+ +| Core 2 | ``SSSE3`` | ``-march=core2`` | ++----------+-------------------------------+---------------------+ +| Core i7 | ``SSE4_2`` and ``POPCNT`` | ``-march=corei7`` | ++----------+-------------------------------+---------------------+ +| AVX 2 | ``AVX2`` | ``-march=avx2`` | ++----------+-------------------------------+---------------------+ + +As this requires compiler, libc, and binutils support, at this time the fat +runtime will only be enabled for Linux builds where the compiler supports the +`indirect function "ifunc" function attribute +`_. + +This attribute should be available on all supported versions of GCC, and +recent versions of Clang and ICC. There is currently no operating system +support for this feature on non-Linux systems. diff --git a/examples/patbench.cc b/examples/patbench.cc index 9c2b41fa..f82f47a7 100644 --- a/examples/patbench.cc +++ b/examples/patbench.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -833,6 +833,8 @@ static unsigned parseFlags(const string &flagsStr) { flags |= HS_FLAG_UTF8; break; case 'W': flags |= HS_FLAG_UCP; break; + case '\r': // stray carriage-return + break; default: cerr << "Unsupported flag \'" << c << "\'" << endl; exit(-1); diff --git a/examples/pcapscan.cc b/examples/pcapscan.cc index 032b19cd..12b94438 100644 --- a/examples/pcapscan.cc +++ b/examples/pcapscan.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -621,6 +621,8 @@ static unsigned parseFlags(const string &flagsStr) { flags |= HS_FLAG_UTF8; break; case 'W': flags |= HS_FLAG_UCP; break; + case '\r': // stray carriage-return + break; default: cerr << "Unsupported flag \'" << c << "\'" << endl; exit(-1); diff --git a/include/boost-patched/graph/reverse_graph.hpp b/include/boost-patched/graph/reverse_graph.hpp index 07a11f9b..8f98a1d5 100644 --- a/include/boost-patched/graph/reverse_graph.hpp +++ b/include/boost-patched/graph/reverse_graph.hpp @@ -5,7 +5,7 @@ #include -#if (BOOST_VERSION == 106200) +#if defined(BOOST_REVGRAPH_PATCH) // Boost 1.62.0 does not implement degree() in reverse_graph which is required // by BidirectionalGraph, so add it. diff --git a/src/compiler/asserts.cpp b/src/compiler/asserts.cpp index 0365e268..be836b06 100644 --- a/src/compiler/asserts.cpp +++ b/src/compiler/asserts.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -117,11 +117,11 @@ typedef map, NFAEdge> edge_cache_t; static void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, u32 &assert_edge_count) { - DEBUG_PRINTF("replacing assert vertex %u\n", g[t].index); + DEBUG_PRINTF("replacing assert vertex %zu\n", g[t].index); const u32 flags = g[t].assert_flags; - DEBUG_PRINTF("consider assert vertex %u with flags %u\n", - g[t].index, flags); + DEBUG_PRINTF("consider assert vertex %zu with flags %u\n", g[t].index, + flags); // Wire up all the predecessors to all the successors. @@ -142,7 +142,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, for (const auto &outEdge : out_edges_range(t, g)) { NFAVertex v = target(outEdge, g); - DEBUG_PRINTF("consider path [%u,%u,%u]\n", g[u].index, + DEBUG_PRINTF("consider path [%zu,%zu,%zu]\n", g[u].index, g[t].index, g[v].index); if (v == t) { @@ -173,9 +173,8 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, auto cache_key = make_pair(u, v); auto ecit = edge_cache.find(cache_key); if (ecit == edge_cache.end()) { - DEBUG_PRINTF("adding edge %u %u\n", g[u].index, - g[v].index); - NFAEdge e = add_edge(u, v, g).first; + DEBUG_PRINTF("adding edge %zu %zu\n", g[u].index, g[v].index); + NFAEdge e = add_edge(u, v, g); edge_cache.emplace(cache_key, e); g[e].assert_flags = flags; if (++assert_edge_count > MAX_ASSERT_EDGES) { @@ -184,7 +183,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, } } else { NFAEdge e = ecit->second; - DEBUG_PRINTF("updating edge %u %u [a %u]\n", g[u].index, + DEBUG_PRINTF("updating edge %zu %zu [a %zu]\n", g[u].index, g[v].index, g[t].index); // Edge already exists. u32 &e_flags = g[e].assert_flags; @@ -211,8 +210,7 @@ void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) { Report r = rm.getBasicInternalReport(g, adj); g[v].reports.insert(rm.getInternalId(r)); - DEBUG_PRINTF("set report id for vertex %u, adj %d\n", - g[v].index, adj); + DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); } static @@ -222,8 +220,7 @@ void checkForMultilineStart(ReportManager &rm, NGWrapper &g) { if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) { continue; } - DEBUG_PRINTF("mls %u %08x\n", g[v].index, - g[v].assert_flags); + DEBUG_PRINTF("mls %zu %08x\n", g[v].index, g[v].assert_flags); /* we have found a multi-line start (maybe more than one) */ @@ -299,8 +296,8 @@ void removeAssertVertices(ReportManager &rm, NGWrapper &g) { DEBUG_PRINTF("resolved %zu assert vertices\n", num); pruneUseless(g); pruneEmptyVertices(g); - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); } DEBUG_PRINTF("after: graph has %zu vertices\n", num_vertices(g)); diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index d56aff88..4a4afc64 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -29,8 +29,10 @@ /** \file * \brief Compiler front-end interface. */ +#include "allocator.h" #include "asserts.h" #include "compiler.h" +#include "crc32.h" #include "database.h" #include "grey.h" #include "hs_internal.h" @@ -321,6 +323,45 @@ platform_t target_to_platform(const target_t &target_info) { return p; } +/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated + * \ref hs_database, ensuring that it is padded correctly to give cacheline + * alignment. */ +static +hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) { + size_t db_len = sizeof(struct hs_database) + len; + DEBUG_PRINTF("db size %zu\n", db_len); + DEBUG_PRINTF("db platform %llx\n", platform); + + struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len); + if (hs_check_alloc(db) != HS_SUCCESS) { + hs_database_free(db); + return nullptr; + } + + // So that none of our database is uninitialized + memset(db, 0, db_len); + + // we need to align things manually + size_t shift = (uintptr_t)db->bytes & 0x3f; + DEBUG_PRINTF("shift is %zu\n", shift); + + db->bytecode = offsetof(struct hs_database, bytes) - shift; + char *bytecode = (char *)db + db->bytecode; + assert(ISALIGNED_CL(bytecode)); + + db->magic = HS_DB_MAGIC; + db->version = HS_DB_VERSION; + db->length = len; + db->platform = platform; + + // Copy bytecode + memcpy(bytecode, in_bytecode, len); + + db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length); + return db; +} + + struct hs_database *build(NG &ng, unsigned int *length) { assert(length); diff --git a/src/database.c b/src/database.c index a4e10c22..61eb021f 100644 --- a/src/database.c +++ b/src/database.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -348,43 +348,6 @@ hs_error_t dbIsValid(const hs_database_t *db) { return HS_SUCCESS; } -/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated - * \ref hs_database, ensuring that it is padded correctly to give cacheline - * alignment. */ -hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) { - size_t db_len = sizeof(struct hs_database) + len; - DEBUG_PRINTF("db size %zu\n", db_len); - DEBUG_PRINTF("db platform %llx\n", platform); - - struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len); - if (hs_check_alloc(db) != HS_SUCCESS) { - hs_database_free(db); - return NULL; - } - - // So that none of our database is uninitialized - memset(db, 0, db_len); - - // we need to align things manually - size_t shift = (uintptr_t)db->bytes & 0x3f; - DEBUG_PRINTF("shift is %zu\n", shift); - - db->bytecode = offsetof(struct hs_database, bytes) - shift; - char *bytecode = (char *)db + db->bytecode; - assert(ISALIGNED_CL(bytecode)); - - db->magic = HS_DB_MAGIC; - db->version = HS_DB_VERSION; - db->length = len; - db->platform = platform; - - // Copy bytecode - memcpy(bytecode, in_bytecode, len); - - db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length); - return db; -} - #if defined(_WIN32) #define SNPRINTF_COMPAT _snprintf #else diff --git a/src/database.h b/src/database.h index 5488c93d..399513fc 100644 --- a/src/database.h +++ b/src/database.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -110,7 +110,6 @@ hs_error_t validDatabase(const hs_database_t *db) { } hs_error_t dbIsValid(const struct hs_database *db); -struct hs_database *dbCreate(const char *bytecode, size_t len, u64a platform); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/dispatcher.c b/src/dispatcher.c new file mode 100644 index 00000000..fb2f4f02 --- /dev/null +++ b/src/dispatcher.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "hs_common.h" +#include "hs_runtime.h" +#include "ue2common.h" +#include "util/cpuid_flags.h" +#include "util/join.h" + +#define CREATE_DISPATCH(RTYPE, NAME, ...) \ + /* create defns */ \ + RTYPE JOIN(avx2_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(corei7_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(core2_, NAME)(__VA_ARGS__); \ + \ + /* error func */ \ + static inline RTYPE JOIN(error_, NAME)(__VA_ARGS__) { \ + return (RTYPE)HS_ARCH_ERROR; \ + } \ + \ + /* resolver */ \ + static void(*JOIN(resolve_, NAME)(void)) { \ + if (check_avx2()) { \ + return JOIN(avx2_, NAME); \ + } \ + if (check_sse42() && check_popcnt()) { \ + return JOIN(corei7_, NAME); \ + } \ + if (check_ssse3()) { \ + return JOIN(core2_, NAME); \ + } \ + /* anything else is fail */ \ + return JOIN(error_, NAME); \ + } \ + \ + /* function */ \ + HS_PUBLIC_API \ + RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) + +CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data, + unsigned length, unsigned flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *userCtx); + +CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database, + size_t *stream_size); + +CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db, + size_t *size); +CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db); +CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db); + +CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db, + unsigned int flags, hs_stream_t **stream); + +CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data, + unsigned int length, unsigned int flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *ctxt); + +CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id, + hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt); + +CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db, + const char *const *data, const unsigned int *length, + unsigned int count, unsigned int flags, hs_scratch_t *scratch, + match_event_handler onevent, void *context); + +CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info); + +CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id, + const hs_stream_t *from_id); + +CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id, + unsigned int flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); + +CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id, + const hs_stream_t *from_id, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); + +CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db, + char **bytes, size_t *length); + +CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes, + const size_t length, hs_database_t **db); + +CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes, + const size_t length, hs_database_t *db); + +CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes, + size_t length, char **info); + +CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes, + const size_t length, size_t *deserialized_size); + +/** INTERNALS **/ + +CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen); diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 4230c2b1..23416c70 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -31,7 +31,6 @@ #include "fdr_confirm_runtime.h" #include "fdr_internal.h" #include "fdr_loadval.h" -#include "fdr_streaming_runtime.h" #include "flood_runtime.h" #include "teddy.h" #include "teddy_internal.h" @@ -809,8 +808,6 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, len, hbuf, 0, - hbuf, // nocase - 0, start, cb, ctxt, @@ -828,14 +825,12 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, size_t start, HWLMCallback cb, void *ctxt, - hwlm_group_t groups, u8 *stream_state) { + hwlm_group_t groups) { struct FDR_Runtime_Args a = { buf, len, hbuf, hlen, - hbuf, // nocase - start same as caseful, override later if needed - hlen, // nocase start, cb, ctxt, @@ -844,7 +839,6 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, * the history buffer (they may be garbage). */ hbuf ? unaligned_load_u64a(hbuf + hlen - sizeof(u64a)) : (u64a)0 }; - fdrUnpackState(fdr, &a, stream_state); hwlm_error_t ret; if (unlikely(a.start_offset >= a.len)) { @@ -854,6 +848,5 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, ret = funcs[fdr->engineID](fdr, &a, groups); } - fdrPackState(fdr, &a, stream_state); return ret; } diff --git a/src/fdr/fdr.h b/src/fdr/fdr.h index e0aa594f..e2b80056 100644 --- a/src/fdr/fdr.h +++ b/src/fdr/fdr.h @@ -43,10 +43,6 @@ extern "C" { struct FDR; -/** \brief Returns non-zero if the contents of the stream state indicate that - * there is active FDR history beyond the regularly used history. */ -u32 fdrStreamStateActive(const struct FDR *fdr, const u8 *stream_state); - /** * \brief Block-mode scan. * @@ -74,12 +70,11 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, * \param cb Callback to call when a match is found. * \param ctxt Caller-provided context pointer supplied to callback on match. * \param groups Initial groups mask. - * \param stream_state Persistent stream state for use by FDR. */ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, size_t start, HWLMCallback cb, void *ctxt, - hwlm_group_t groups, u8 *stream_state); + hwlm_group_t groups); #ifdef __cplusplus } diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 89a0ff72..937513a8 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -39,6 +39,7 @@ #include "teddy_engine_description.h" #include "grey.h" #include "ue2common.h" +#include "hwlm/hwlm_build.h" #include "util/alloc.h" #include "util/compare.h" #include "util/dump_mask.h" @@ -495,14 +496,34 @@ FDRCompiler::build(pair, size_t> &link) { } // namespace +static +size_t maxMaskLen(const vector &lits) { + size_t rv = 0; + for (const auto &lit : lits) { + rv = max(rv, lit.msk.size()); + } + return rv; +} + +static +void setHistoryRequired(hwlmStreamingControl &stream_ctl, + const vector &lits) { + size_t max_mask_len = maxMaskLen(lits); + + // we want enough history to manage the longest literal and the longest + // mask. + stream_ctl.literal_history_required = max(maxLen(lits), max_mask_len) - 1; +} + static aligned_unique_ptr fdrBuildTableInternal(const vector &lits, bool make_small, const target_t &target, const Grey &grey, u32 hint, hwlmStreamingControl *stream_control) { pair, size_t> link(nullptr, 0); + if (stream_control) { - link = fdrBuildTableStreaming(lits, *stream_control); + setHistoryRequired(*stream_control, lits); } DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2"); diff --git a/src/fdr/fdr_confirm.h b/src/fdr/fdr_confirm.h index 865218b4..6ce85afd 100644 --- a/src/fdr/fdr_confirm.h +++ b/src/fdr/fdr_confirm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -52,19 +52,18 @@ typedef enum LitInfoFlags { /** * \brief Structure describing a literal, linked to by FDRConfirm. * - * This structure is followed in memory by a variable-sized string prefix at - * LitInfo::s, for strings that are longer than CONF_TYPE. + * This structure is followed in memory by a variable-sized string prefix, for + * strings that are longer than CONF_TYPE. */ struct LitInfo { CONF_TYPE v; CONF_TYPE msk; hwlm_group_t groups; - u32 size; u32 id; // literal ID as passed in + u8 size; u8 flags; /* LitInfoFlags */ u8 next; u8 extended_size; - u8 s[1]; // literal prefix, which continues "beyond" this struct. }; #define FDRC_FLAG_NO_CONFIRM 1 diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 23437fe2..e77c46d1 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -107,7 +107,7 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, info.extended_size = verify_u8(lit.msk.size()); } info.flags = flags; - info.size = verify_u32(lit.s.size()); + info.size = verify_u8(lit.s.size()); info.groups = lit.groups; // these are built up assuming a LE machine @@ -333,13 +333,13 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, const string &t = lits[litIdx].s; if (t.size() > sizeof(CONF_TYPE)) { size_t prefix_len = t.size() - sizeof(CONF_TYPE); - memcpy(&finalLI.s[0], t.c_str(), prefix_len); - ptr = &finalLI.s[0] + prefix_len; + memcpy(ptr, t.c_str(), prefix_len); + ptr += prefix_len; } ptr = ROUNDUP_PTR(ptr, alignof(LitInfo)); if (next(i) == e) { - finalLI.next = 0x0; + finalLI.next = 0; } else { // our next field represents an adjustment on top of // current address + the actual size of the literal diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index 9b1df593..87ade9fe 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -74,10 +74,8 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a if (loc < buf) { u32 full_overhang = buf - loc; - const u8 *history = caseless ? a->buf_history_nocase - : a->buf_history; - size_t len_history = caseless ? a->len_history_nocase - : a->len_history; + const u8 *history = a->buf_history; + size_t len_history = a->len_history; // can't do a vectored confirm either if we don't have // the bytes @@ -88,7 +86,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a // as for the regular case, no need to do a full confirm if // we're a short literal if (unlikely(li->size > sizeof(CONF_TYPE))) { - const u8 *s1 = li->s; + const u8 *s1 = (const u8 *)li + sizeof(*li); const u8 *s2 = s1 + full_overhang; const u8 *loc1 = history + len_history - full_overhang; const u8 *loc2 = buf; @@ -108,7 +106,8 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a // if string < conf_type we don't need regular string cmp if (unlikely(li->size > sizeof(CONF_TYPE))) { - if (cmpForward(loc, li->s, li->size - sizeof(CONF_TYPE), + const u8 *s = (const u8 *)li + sizeof(*li); + if (cmpForward(loc, s, li->size - sizeof(CONF_TYPE), caseless)) { goto out; } @@ -123,8 +122,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a const u8 *loc2 = buf + i - li->extended_size + 1 - pullBackAmount; if (loc2 < buf) { u32 full_overhang = buf - loc2; - size_t len_history = caseless ? a->len_history_nocase - : a->len_history; + size_t len_history = a->len_history; if (full_overhang > len_history) { goto out; } diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index 6272b69e..3bf82837 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -100,8 +100,6 @@ struct FDR_Runtime_Args { size_t len; const u8 *buf_history; size_t len_history; - const u8 *buf_history_nocase; - size_t len_history_nocase; size_t start_offset; HWLMCallback cb; void *ctxt; diff --git a/src/fdr/fdr_streaming_compile.cpp b/src/fdr/fdr_streaming_compile.cpp deleted file mode 100644 index b2e1656c..00000000 --- a/src/fdr/fdr_streaming_compile.cpp +++ /dev/null @@ -1,425 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "fdr_internal.h" -#include "fdr_streaming_internal.h" -#include "fdr_compile_internal.h" -#include "hwlm/hwlm_build.h" -#include "util/alloc.h" -#include "util/bitutils.h" -#include "util/target_info.h" -#include "util/verify_types.h" - -#include -#include -#include -#include -#include -#include - -#include - -using namespace std; -using boost::dynamic_bitset; - -namespace ue2 { - -namespace { -struct LongLitOrder { - bool operator()(const hwlmLiteral &i1, const hwlmLiteral &i2) const { - if (i1.nocase != i2.nocase) { - return i1.nocase < i2.nocase; - } else { - return i1.s < i2.s; - } - } -}; -} - -static -bool hwlmLitEqual(const hwlmLiteral &l1, const hwlmLiteral &l2) { - return l1.s == l2.s && l1.nocase == l2.nocase; -} - -static -u32 roundUpToPowerOfTwo(u32 x) { - x -= 1; - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); - return x + 1; -} - -/** - * \brief Creates a long literals vector containing all literals of length > max_len. - * - * The last char of each literal is trimmed as we're not interested in full - * matches, only partial matches. - * - * Literals are sorted (by caseful/caseless, then lexicographical order) and - * made unique. - * - * The ID of each literal is set to its position in the vector. - * - * \return False if there aren't any long literals. - */ -static -bool setupLongLits(const vector &lits, - vector &long_lits, size_t max_len) { - long_lits.reserve(lits.size()); - for (const auto &lit : lits) { - if (lit.s.length() > max_len) { - hwlmLiteral tmp = lit; // copy - tmp.s.pop_back(); - tmp.id = 0; // recalc later - tmp.groups = 0; // filled in later by hash bucket(s) - long_lits.push_back(move(tmp)); - } - } - - if (long_lits.empty()) { - return false; - } - - // sort long_literals by caseful/caseless and in lexicographical order, - // remove duplicates - stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder()); - auto new_end = unique(long_lits.begin(), long_lits.end(), hwlmLitEqual); - long_lits.erase(new_end, long_lits.end()); - - // fill in ids; not currently used - for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) { - i->id = distance(long_lits.begin(), i); - } - return true; -} - -// boundaries are the 'start' boundaries for each 'mode' -// so boundary[CASEFUL] is the index one above the largest caseful index -// positions[CASEFUL] is the # of positions in caseful strings (stream) -// hashedPositions[CASEFUL] is the # of positions in caseful strings -// (not returned - a temporary) -// hashEntries[CASEFUL] is the # of positions hashed for caseful strings -// (rounded up to the nearest power of two) -static -void analyzeLits(const vector &long_lits, size_t max_len, - u32 *boundaries, u32 *positions, u32 *hashEntries) { - u32 hashedPositions[MAX_MODES]; - - for (u32 m = CASEFUL; m < MAX_MODES; ++m) { - boundaries[m] = verify_u32(long_lits.size()); - positions[m] = 0; - hashedPositions[m] = 0; - } - - for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) { - if (i->nocase) { - boundaries[CASEFUL] = verify_u32(distance(long_lits.begin(), i)); - break; - } - } - - for (const auto &lit : long_lits) { - Modes m = lit.nocase ? CASELESS : CASEFUL; - for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) { - hashedPositions[m]++; - } - positions[m] += lit.s.size(); - } - - for (u32 m = CASEFUL; m < MAX_MODES; m++) { - hashEntries[m] = hashedPositions[m] - ? roundUpToPowerOfTwo(MAX(4096, hashedPositions[m])) - : 0; - } - -#ifdef DEBUG_COMPILE - printf("analyzeLits:\n"); - for (Modes m = CASEFUL; m < MAX_MODES; m++) { - printf("mode %s boundary %d positions %d hashedPositions %d " - "hashEntries %d\n", - (m == CASEFUL) ? "caseful" : "caseless", boundaries[m], - positions[m], hashedPositions[m], hashEntries[m]); - } - printf("\n"); -#endif -} - -static -u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, Modes m) { - return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m); -} - -// sort by 'distance from start' -namespace { -struct OffsetIDFromEndOrder { - const vector &lits; // not currently used - explicit OffsetIDFromEndOrder(const vector &lits_in) - : lits(lits_in) {} - bool operator()(const pair &i1, const pair &i2) const { - if (i1.second != i2.second) { - // longest is 'first', so > not < - return i1.second > i2.second; - } - return i1.first < i2.first; - } -}; -} - -static -void fillHashes(const vector &long_lits, size_t max_len, - FDRSHashEntry *tab, size_t numEntries, Modes mode, - map &litToOffsetVal) { - const u32 nbits = lg2(numEntries); - map > > bucketToLitOffPairs; - map bucketToBitfield; - - for (const auto &lit : long_lits) { - if ((mode == CASELESS) != lit.nocase) { - continue; - } - for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) { - u32 h = hashLit(lit, j, max_len, mode); - u32 h_ent = h & ((1U << nbits) - 1); - u32 h_low = (h >> nbits) & 63; - bucketToLitOffPairs[h_ent].emplace_back(lit.id, j); - bucketToBitfield[h_ent] |= (1ULL << h_low); - } - } - - // this used to be a set, but a bitset is much much faster given that - // we're using it only for membership testing. - dynamic_bitset<> filledBuckets(numEntries); // all bits zero by default. - - // sweep out bitfield entries and save the results swapped accordingly - // also, anything with bitfield entries is put in filledBuckets - for (const auto &m : bucketToBitfield) { - const u32 &bucket = m.first; - const u64a &contents = m.second; - tab[bucket].bitfield = contents; - filledBuckets.set(bucket); - } - - // store out all our chains based on free values in our hash table. - // find nearest free locations that are empty (there will always be more - // entries than strings, at present) - for (auto &m : bucketToLitOffPairs) { - u32 bucket = m.first; - deque> &d = m.second; - - // sort d by distance of the residual string (len minus our depth into - // the string). We need to put the 'furthest back' string first... - stable_sort(d.begin(), d.end(), OffsetIDFromEndOrder(long_lits)); - - while (1) { - // first time through is always at bucket, then we fill in links - filledBuckets.set(bucket); - FDRSHashEntry *ent = &tab[bucket]; - u32 lit_id = d.front().first; - u32 offset = d.front().second; - - ent->state = verify_u32(litToOffsetVal[lit_id] + offset + max_len); - ent->link = (u32)LINK_INVALID; - - d.pop_front(); - if (d.empty()) { - break; - } - // now, if there is another value - // find a bucket for it and put in 'bucket' and repeat - // all we really need to do is find something not in filledBuckets, - // ideally something close to bucket - // we search backward and forward from bucket, trying to stay as - // close as possible. - UNUSED bool found = false; - int bucket_candidate = 0; - for (u32 k = 1; k < numEntries * 2; k++) { - bucket_candidate = bucket + (((k & 1) == 0) - ? (-(int)k / 2) : (k / 2)); - if (bucket_candidate < 0 || - (size_t)bucket_candidate >= numEntries) { - continue; - } - if (!filledBuckets.test(bucket_candidate)) { - found = true; - break; - } - } - - assert(found); - bucket = bucket_candidate; - ent->link = bucket; - } - } -} - -static -size_t maxMaskLen(const vector &lits) { - size_t rv = 0; - for (const auto &lit : lits) { - rv = max(rv, lit.msk.size()); - } - return rv; -} - -pair, size_t> -fdrBuildTableStreaming(const vector &lits, - hwlmStreamingControl &stream_control) { - // refuse to compile if we are forced to have smaller than minimum - // history required for long-literal support, full stop - // otherwise, choose the maximum of the preferred history quantity - // (currently a fairly extravagant 32) or the already used history - // quantity - subject to the limitation of stream_control.history_max - - const size_t MIN_HISTORY_REQUIRED = 32; - - if (MIN_HISTORY_REQUIRED > stream_control.history_max) { - throw std::logic_error("Cannot set history to minimum history required"); - } - - size_t max_len = - MIN(stream_control.history_max, - MAX(MIN_HISTORY_REQUIRED, stream_control.history_min)); - assert(max_len >= MIN_HISTORY_REQUIRED); - size_t max_mask_len = maxMaskLen(lits); - - vector long_lits; - if (!setupLongLits(lits, long_lits, max_len) || false) { - // "Don't need to do anything" path, not really a fail - DEBUG_PRINTF("Streaming literal path produces no table\n"); - - // we want enough history to manage the longest literal and the longest - // mask. - stream_control.literal_history_required = - max(maxLen(lits), max_mask_len) - 1; - stream_control.literal_stream_state_required = 0; - return {nullptr, size_t{0}}; - } - - // Ensure that we have enough room for the longest mask. - if (max_mask_len) { - max_len = max(max_len, max_mask_len - 1); - } - - u32 boundary[MAX_MODES]; - u32 positions[MAX_MODES]; - u32 hashEntries[MAX_MODES]; - - analyzeLits(long_lits, max_len, boundary, positions, hashEntries); - - // first assess the size and find our caseless threshold - size_t headerSize = ROUNDUP_16(sizeof(FDRSTableHeader)); - - size_t litTabOffset = headerSize; - - size_t litTabNumEntries = long_lits.size() + 1; - size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(FDRSLiteral)); - - size_t wholeLitTabOffset = litTabOffset + litTabSize; - size_t totalWholeLitTabSize = ROUNDUP_16(positions[CASEFUL] + - positions[CASELESS]); - - size_t htOffset[MAX_MODES]; - size_t htSize[MAX_MODES]; - - htOffset[CASEFUL] = wholeLitTabOffset + totalWholeLitTabSize; - htSize[CASEFUL] = hashEntries[CASEFUL] * sizeof(FDRSHashEntry); - htOffset[CASELESS] = htOffset[CASEFUL] + htSize[CASEFUL]; - htSize[CASELESS] = hashEntries[CASELESS] * sizeof(FDRSHashEntry); - - size_t tabSize = ROUNDUP_16(htOffset[CASELESS] + htSize[CASELESS]); - - // need to add +2 to both of these to allow space for the actual largest - // value as well as handling the fact that we add one to the space when - // storing out a position to allow zero to mean "no stream state value" - u8 streamBits[MAX_MODES]; - streamBits[CASEFUL] = lg2(roundUpToPowerOfTwo(positions[CASEFUL] + 2)); - streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2)); - u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8; - - auto secondaryTable = aligned_zmalloc_unique(tabSize); - assert(secondaryTable); // otherwise would have thrown std::bad_alloc - - // then fill it in - u8 * ptr = secondaryTable.get(); - FDRSTableHeader * header = (FDRSTableHeader *)ptr; - // fill in header - header->pseudoEngineID = (u32)0xffffffff; - header->N = verify_u8(max_len); // u8 so doesn't matter; won't go > 255 - for (u32 m = CASEFUL; m < MAX_MODES; ++m) { - header->boundary[m] = boundary[m]; - header->hashOffset[m] = verify_u32(htOffset[m]); - header->hashNBits[m] = lg2(hashEntries[m]); - header->streamStateBits[m] = streamBits[m]; - } - assert(tot_state_bytes < sizeof(u64a)); - header->streamStateBytes = verify_u8(tot_state_bytes); // u8 - - ptr += headerSize; - - // now fill in the rest - - FDRSLiteral * litTabPtr = (FDRSLiteral *)ptr; - ptr += litTabSize; - - map litToOffsetVal; - for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) { - u32 entry = verify_u32(i - long_lits.begin()); - u32 offset = verify_u32(ptr - secondaryTable.get()); - - // point the table entry to the string location - litTabPtr[entry].offset = offset; - - litToOffsetVal[entry] = offset; - - // copy the string into the string location - memcpy(ptr, i->s.c_str(), i->s.size()); - - ptr += i->s.size(); // and the string location - } - - // fill in final lit table entry with current ptr (serves as end value) - litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable.get()); - - // fill hash tables - ptr = secondaryTable.get() + htOffset[CASEFUL]; - for (u32 m = CASEFUL; m < MAX_MODES; ++m) { - fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m], - (Modes)m, litToOffsetVal); - ptr += htSize[m]; - } - - // tell the world what we did - stream_control.literal_history_required = max_len; - stream_control.literal_stream_state_required = tot_state_bytes; - return {move(secondaryTable), tabSize}; -} - -} // namespace ue2 diff --git a/src/fdr/fdr_streaming_internal.h b/src/fdr/fdr_streaming_internal.h deleted file mode 100644 index 11b07b56..00000000 --- a/src/fdr/fdr_streaming_internal.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef FDR_STREAMING_INTERNAL_H -#define FDR_STREAMING_INTERNAL_H - -#include "ue2common.h" -#include "fdr_internal.h" -#include "util/unaligned.h" - -// tertiary table: -// a header (FDRSTableHeader) -// long_lits.size()+1 entries holding an offset to the string in the -// 'whole literal table' (FDRSLiteral structure) -// the whole literal table - every string packed in (freeform) -// hash table (caseful) (FDRSHashEntry) -// hash table (caseless) (FDRSHashEntry) - -enum Modes { - CASEFUL = 0, - CASELESS = 1, - MAX_MODES = 2 -}; - -// We have one of these structures hanging off the 'link' of our secondary -// FDR table that handles streaming strings -struct FDRSTableHeader { - u32 pseudoEngineID; // set to 0xffffffff to indicate this isn't an FDR - - // string id one beyond the maximum entry for this type of literal - // boundary[CASEFUL] is the end of the caseful literals - // boundary[CASELESS] is the end of the caseless literals and one beyond - // the largest literal id (the size of the littab) - u32 boundary[MAX_MODES]; - - // offsets are 0 if no such table exists - // offset from the base of the tertiary structure to the hash table - u32 hashOffset[MAX_MODES]; - u32 hashNBits[MAX_MODES]; // lg2 of the size of the hash table - - u8 streamStateBits[MAX_MODES]; - u8 streamStateBytes; // total size of packed stream state in bytes - u8 N; // prefix lengths - u16 pad; -}; - -// One of these structures per literal entry in our secondary FDR table. -struct FDRSLiteral { - u32 offset; - // potentially - another u32 to point to the 'next lesser included literal' - // which would be a literal that overlaps this one in such a way that a - // failure to match _this_ literal can leave us in a state that we might - // still match that literal. Offset information might also be called for, - // in which case we might be wanting to use a FDRSLiteralOffset -}; - -typedef u32 FDRSLiteralOffset; - -#define LINK_INVALID 0xffffffff - -// One of these structures per hash table entry in our secondary FDR table -struct FDRSHashEntry { - u64a bitfield; - FDRSLiteralOffset state; - u32 link; -}; - -static really_inline -u32 get_start_lit_idx(const struct FDRSTableHeader * h, enum Modes m) { - return m == CASEFUL ? 0 : h->boundary[m-1]; -} - -static really_inline -u32 get_end_lit_idx(const struct FDRSTableHeader * h, enum Modes m) { - return h->boundary[m]; -} - -static really_inline -const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) { - return (const struct FDRSLiteral *) (((const u8 *)h) + - ROUNDUP_16(sizeof(struct FDRSTableHeader))); -} - -static really_inline -u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, enum Modes m) { - return getLitTab(h)[get_start_lit_idx(h, m)].offset; -} - -static really_inline -u32 packStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) { - return v - getBaseOffsetOfLits(h, m) + 1; -} - -static really_inline -u32 unpackStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) { - return v + getBaseOffsetOfLits(h, m) - 1; -} - -static really_inline -u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) { - return (ent->bitfield >> bit) & 0x1; -} - -static really_inline -u32 streaming_hash(const u8 *ptr, UNUSED size_t len, enum Modes mode) { - const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL; - const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL; - assert(len >= 32); - - u64a v1 = unaligned_load_u64a(ptr); - u64a v2 = unaligned_load_u64a(ptr + 8); - u64a v3 = unaligned_load_u64a(ptr + 16); - if (mode == CASELESS) { - v1 &= CASEMASK; - v2 &= CASEMASK; - v3 &= CASEMASK; - } - v1 *= MULTIPLIER; - v2 *= (MULTIPLIER*MULTIPLIER); - v3 *= (MULTIPLIER*MULTIPLIER*MULTIPLIER); - v1 >>= 32; - v2 >>= 32; - v3 >>= 32; - return v1 ^ v2 ^ v3; -} - -#endif diff --git a/src/fdr/fdr_streaming_runtime.h b/src/fdr/fdr_streaming_runtime.h deleted file mode 100644 index 8e264c76..00000000 --- a/src/fdr/fdr_streaming_runtime.h +++ /dev/null @@ -1,368 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef FDR_STREAMING_RUNTIME_H -#define FDR_STREAMING_RUNTIME_H - -#include "fdr_streaming_internal.h" -#include "util/partial_store.h" - -#include - -static really_inline -const struct FDRSTableHeader * getSHDR(const struct FDR * fdr) { - const u8 * linkPtr = ((const u8 *)fdr) + fdr->link; - // test if it's not really a engineID, but a 'pseudo engine id' - assert(*(const u32 *)linkPtr == 0xffffffff); - assert(linkPtr); - return (const struct FDRSTableHeader *)linkPtr; -} - -// Reads from stream state and unpacks values into stream state table. -static really_inline -void getStreamStates(const struct FDRSTableHeader * streamingTable, - const u8 * stream_state, u32 * table) { - assert(streamingTable); - assert(stream_state); - assert(table); - - u8 ss_bytes = streamingTable->streamStateBytes; - u8 ssb = streamingTable->streamStateBits[CASEFUL]; - UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS]; - assert(ss_bytes == (ssb + ssb_nc + 7) / 8); - -#if defined(ARCH_32_BIT) - // On 32-bit hosts, we may be able to avoid having to do any u64a - // manipulation at all. - if (ss_bytes <= 4) { - u32 ssb_mask = (1U << ssb) - 1; - u32 streamVal = partial_load_u32(stream_state, ss_bytes); - table[CASEFUL] = (u32)(streamVal & ssb_mask); - table[CASELESS] = (u32)(streamVal >> ssb); - return; - } -#endif - - u64a ssb_mask = (1ULL << ssb) - 1; - u64a streamVal = partial_load_u64a(stream_state, ss_bytes); - table[CASEFUL] = (u32)(streamVal & ssb_mask); - table[CASELESS] = (u32)(streamVal >> (u64a)ssb); -} - -#ifndef NDEBUG -// Defensive checking (used in assert) that these table values don't overflow -// outside the range available. -static really_inline UNUSED -u32 streamingTableOverflow(u32 * table, u8 ssb, u8 ssb_nc) { - u32 ssb_mask = (1ULL << (ssb)) - 1; - if (table[CASEFUL] & ~ssb_mask) { - return 1; - } - u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1; - if (table[CASELESS] & ~ssb_nc_mask) { - return 1; - } - return 0; -} -#endif - -// Reads from stream state table and packs values into stream state. -static really_inline -void setStreamStates(const struct FDRSTableHeader * streamingTable, - u8 * stream_state, u32 * table) { - assert(streamingTable); - assert(stream_state); - assert(table); - - u8 ss_bytes = streamingTable->streamStateBytes; - u8 ssb = streamingTable->streamStateBits[CASEFUL]; - UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS]; - assert(ss_bytes == (ssb + ssb_nc + 7) / 8); - assert(!streamingTableOverflow(table, ssb, ssb_nc)); - -#if defined(ARCH_32_BIT) - // On 32-bit hosts, we may be able to avoid having to do any u64a - // manipulation at all. - if (ss_bytes <= 4) { - u32 stagingStreamState = table[CASEFUL]; - stagingStreamState |= (table[CASELESS] << ssb); - - partial_store_u32(stream_state, stagingStreamState, ss_bytes); - return; - } -#endif - - u64a stagingStreamState = (u64a)table[CASEFUL]; - stagingStreamState |= (u64a)table[CASELESS] << ((u64a)ssb); - partial_store_u64a(stream_state, stagingStreamState, ss_bytes); -} - -u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) { - if (!stream_state) { - return 0; - } - const struct FDRSTableHeader * streamingTable = getSHDR(fdr); - u8 ss_bytes = streamingTable->streamStateBytes; - - // We just care if there are any bits set, and the test below is faster - // than a partial_load_u64a (especially on 32-bit hosts). - for (u32 i = 0; i < ss_bytes; i++) { - if (*stream_state) { - return 1; - } - ++stream_state; - } - return 0; -} - -// binary search for the literal index that contains the current state -static really_inline -u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable, - u32 stateValue, enum Modes m) { - const struct FDRSLiteral * litTab = getLitTab(streamingTable); - u32 lo = get_start_lit_idx(streamingTable, m); - u32 hi = get_end_lit_idx(streamingTable, m); - - // Now move stateValue back by one so that we're looking for the - // litTab entry that includes it the string, not the one 'one past' it - stateValue -= 1; - assert(lo != hi); - assert(litTab[lo].offset <= stateValue); - assert(litTab[hi].offset > stateValue); - - // binary search to find the entry e such that: - // litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral - while (lo + 1 < hi) { - u32 mid = (lo + hi) / 2; - if (litTab[mid].offset <= stateValue) { - lo = mid; - } else { //(litTab[mid].offset > stateValue) { - hi = mid; - } - } - assert(litTab[lo].offset <= stateValue); - assert(litTab[hi].offset > stateValue); - return lo; -} - -static really_inline -void fdrUnpackStateMode(struct FDR_Runtime_Args *a, - const struct FDRSTableHeader *streamingTable, - const struct FDRSLiteral * litTab, - const u32 *state_table, - const enum Modes m) { - if (!state_table[m]) { - return; - } - - u32 stateValue = unpackStateVal(streamingTable, m, state_table[m]); - u32 idx = findLitTabEntry(streamingTable, stateValue, m); - size_t found_offset = litTab[idx].offset; - const u8 * found_buf = found_offset + (const u8 *)streamingTable; - size_t found_sz = stateValue - found_offset; - if (m == CASEFUL) { - a->buf_history = found_buf; - a->len_history = found_sz; - } else { - a->buf_history_nocase = found_buf; - a->len_history_nocase = found_sz; - } -} - -static really_inline -void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a, - const u8 * stream_state) { - // nothing to do if there's no stream state for the case - if (!stream_state) { - return; - } - - const struct FDRSTableHeader * streamingTable = getSHDR(fdr); - const struct FDRSLiteral * litTab = getLitTab(streamingTable); - - u32 state_table[MAX_MODES]; - getStreamStates(streamingTable, stream_state, state_table); - - fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASEFUL); - fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASELESS); -} - -static really_inline -u32 do_single_confirm(const struct FDRSTableHeader *streamingTable, - const struct FDR_Runtime_Args *a, u32 hashState, - enum Modes m) { - const struct FDRSLiteral * litTab = getLitTab(streamingTable); - u32 idx = findLitTabEntry(streamingTable, hashState, m); - size_t found_offset = litTab[idx].offset; - const u8 * s1 = found_offset + (const u8 *)streamingTable; - assert(hashState > found_offset); - size_t l1 = hashState - found_offset; - const u8 * buf = a->buf; - size_t len = a->len; - const char nocase = m != CASEFUL; - - if (l1 > len) { - const u8 * hist = nocase ? a->buf_history_nocase : a->buf_history; - size_t hist_len = nocase ? a->len_history_nocase : a->len_history; - - if (l1 > len+hist_len) { - return 0; // Break out - not enough total history - } - - size_t overhang = l1 - len; - assert(overhang <= hist_len); - - if (cmpForward(hist + hist_len - overhang, s1, overhang, nocase)) { - return 0; - } - s1 += overhang; - l1 -= overhang; - } - // if we got here, we don't need history or we compared ok out of history - assert(l1 <= len); - - if (cmpForward(buf + len - l1, s1, l1, nocase)) { - return 0; - } - return hashState; // our new state -} - -static really_inline -void fdrFindStreamingHash(const struct FDR_Runtime_Args *a, - const struct FDRSTableHeader *streamingTable, - u8 hash_len, u32 *hashes) { - u8 tempbuf[128]; - const u8 *base; - if (hash_len > a->len) { - assert(hash_len <= 128); - size_t overhang = hash_len - a->len; - assert(overhang <= a->len_history); - memcpy(tempbuf, a->buf_history + a->len_history - overhang, overhang); - memcpy(tempbuf + overhang, a->buf, a->len); - base = tempbuf; - } else { - assert(hash_len <= a->len); - base = a->buf + a->len - hash_len; - } - - if (streamingTable->hashNBits[CASEFUL]) { - hashes[CASEFUL] = streaming_hash(base, hash_len, CASEFUL); - } - if (streamingTable->hashNBits[CASELESS]) { - hashes[CASELESS] = streaming_hash(base, hash_len, CASELESS); - } -} - -static really_inline -const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable, - u32 h, const enum Modes m) { - u32 nbits = streamingTable->hashNBits[m]; - if (!nbits) { - return NULL; - } - - u32 h_ent = h & ((1 << nbits) - 1); - u32 h_low = (h >> nbits) & 63; - - const struct FDRSHashEntry *tab = - (const struct FDRSHashEntry *)((const u8 *)streamingTable - + streamingTable->hashOffset[m]); - const struct FDRSHashEntry *ent = tab + h_ent; - - if (!has_bit(ent, h_low)) { - return NULL; - } - - return ent; -} - -static really_inline -void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a, - const struct FDRSTableHeader *streamingTable, - const struct FDRSHashEntry *ent, const enum Modes m) { - assert(ent); - assert(streamingTable->hashNBits[m]); - - const struct FDRSHashEntry *tab = - (const struct FDRSHashEntry *)((const u8 *)streamingTable - + streamingTable->hashOffset[m]); - - while (1) { - u32 tmp = 0; - if ((tmp = do_single_confirm(streamingTable, a, ent->state, m))) { - state_table[m] = packStateVal(streamingTable, m, tmp); - break; - } - if (ent->link == LINK_INVALID) { - break; - } - ent = tab + ent->link; - } -} - -static really_inline -void fdrPackState(const struct FDR *fdr, const struct FDR_Runtime_Args *a, - u8 *stream_state) { - // nothing to do if there's no stream state for the case - if (!stream_state) { - return; - } - - // get pointers to the streamer FDR and the tertiary structure - const struct FDRSTableHeader *streamingTable = getSHDR(fdr); - - assert(streamingTable->N); - - u32 state_table[MAX_MODES] = {0, 0}; - - // if we don't have enough history, we don't need to do anything - if (streamingTable->N <= a->len + a->len_history) { - u32 hashes[MAX_MODES] = {0, 0}; - - fdrFindStreamingHash(a, streamingTable, streamingTable->N, hashes); - - const struct FDRSHashEntry *ent_ful = getEnt(streamingTable, - hashes[CASEFUL], CASEFUL); - const struct FDRSHashEntry *ent_less = getEnt(streamingTable, - hashes[CASELESS], CASELESS); - - if (ent_ful) { - fdrPackStateMode(state_table, a, streamingTable, ent_ful, - CASEFUL); - } - - if (ent_less) { - fdrPackStateMode(state_table, a, streamingTable, ent_less, - CASELESS); - } - } - - setStreamStates(streamingTable, stream_state, state_table); -} - -#endif diff --git a/src/grey.cpp b/src/grey.cpp index bad56b56..340a34bf 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -51,6 +51,7 @@ Grey::Grey(void) : allowLbr(true), allowMcClellan(true), allowSheng(true), + allowMcSheng(true), allowPuff(true), allowLiteral(true), allowRose(true), @@ -217,6 +218,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowLbr); G_UPDATE(allowMcClellan); G_UPDATE(allowSheng); + G_UPDATE(allowMcSheng); G_UPDATE(allowPuff); G_UPDATE(allowLiteral); G_UPDATE(allowRose); diff --git a/src/grey.h b/src/grey.h index 90f5f826..4882af7d 100644 --- a/src/grey.h +++ b/src/grey.h @@ -51,6 +51,7 @@ struct Grey { bool allowLbr; bool allowMcClellan; bool allowSheng; + bool allowMcSheng; bool allowPuff; bool allowLiteral; bool allowRose; diff --git a/src/hs.cpp b/src/hs.cpp index 07f6d2c1..f64e867a 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -192,6 +192,14 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, return HS_COMPILER_ERROR; } +#if defined(FAT_RUNTIME) + if (!check_ssse3()) { + *db = nullptr; + *comp_error = generateCompileError("Unsupported architecture", -1); + return HS_ARCH_ERROR; + } +#endif + if (!checkMode(mode, comp_error)) { *db = nullptr; assert(*comp_error); // set by checkMode. @@ -319,6 +327,13 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, return HS_COMPILER_ERROR; } +#if defined(FAT_RUNTIME) + if (!check_ssse3()) { + *error = generateCompileError("Unsupported architecture", -1); + return HS_ARCH_ERROR; + } +#endif + if (!info) { *error = generateCompileError("Invalid parameter: info is NULL", -1); return HS_COMPILER_ERROR; @@ -426,6 +441,11 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform) { extern "C" HS_PUBLIC_API hs_error_t hs_free_compile_error(hs_compile_error_t *error) { +#if defined(FAT_RUNTIME) + if (!check_ssse3()) { + return HS_ARCH_ERROR; + } +#endif freeCompileError(error); return HS_SUCCESS; } diff --git a/src/hs_common.h b/src/hs_common.h index 4bf31146..b25b1842 100644 --- a/src/hs_common.h +++ b/src/hs_common.h @@ -435,6 +435,23 @@ hs_error_t hs_set_stream_allocator(hs_alloc_t alloc_func, hs_free_t free_func); */ const char *hs_version(void); +/** + * Utility function to test the current system architecture. + * + * Hyperscan requires the Supplemental Streaming SIMD Extensions 3 instruction + * set. This function can be called on any x86 platform to determine if the + * system provides the required instruction set. + * + * This function does not test for more advanced features if Hyperscan has + * been built for a more specific architecture, for example the AVX2 + * instruction set. + * + * @return + * @ref HS_SUCCESS on success, @ref HS_ARCH_ERROR if system does not + * support Hyperscan. + */ +hs_error_t hs_valid_platform(void); + /** * @defgroup HS_ERROR hs_error_t values * @@ -519,6 +536,17 @@ const char *hs_version(void); */ #define HS_SCRATCH_IN_USE (-10) +/** + * Unsupported CPU architecture. + * + * This error is returned when Hyperscan is able to detect that the current + * system does not support the required instruction set. + * + * At a minimum, Hyperscan requires Supplemental Streaming SIMD Extensions 3 + * (SSSE3). + */ +#define HS_ARCH_ERROR (-11) + /** @} */ #ifdef __cplusplus diff --git a/src/hs_valid_platform.c b/src/hs_valid_platform.c new file mode 100644 index 00000000..939cde1f --- /dev/null +++ b/src/hs_valid_platform.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "hs_common.h" +#include "util/cpuid_flags.h" + +HS_PUBLIC_API +hs_error_t hs_valid_platform(void) { + /* Hyperscan requires SSSE3, anything else is a bonus */ + if (check_ssse3()) { + return HS_SUCCESS; + } else { + return HS_ARCH_ERROR; + } +} diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index 2e16f1ac..3c7615a7 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -200,8 +200,7 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, size_t len, size_t start, HWLMCallback cb, - void *ctxt, hwlm_group_t groups, - u8 *stream_state) { + void *ctxt, hwlm_group_t groups) { const u8 *hbuf = scratch->core_info.hbuf; const size_t hlen = scratch->core_info.hlen; const u8 *buf = scratch->core_info.buf; @@ -234,13 +233,10 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); aa = &t->accel1; } - // if no active stream state, use acceleration - if (!fdrStreamStateActive(HWLM_C_DATA(t), stream_state)) { - do_accel_streaming(aa, hbuf, hlen, buf, len, &start); - } + do_accel_streaming(aa, hbuf, hlen, buf, len, &start); DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start); return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, - start, cb, ctxt, groups, stream_state); + start, cb, ctxt, groups); } } diff --git a/src/hwlm/hwlm.h b/src/hwlm/hwlm.h index 009550e9..a17575df 100644 --- a/src/hwlm/hwlm.h +++ b/src/hwlm/hwlm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -132,8 +132,7 @@ hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len, hwlm_error_t hwlmExecStreaming(const struct HWLM *tab, struct hs_scratch *scratch, size_t len, size_t start, HWLMCallback callback, - void *context, hwlm_group_t groups, - u8 *stream_state); + void *context, hwlm_group_t groups); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index b1814245..fa6335c9 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -461,7 +461,8 @@ void findForwardAccelScheme(const vector &lits, } const CharReach &cr = reach[min_offset]; - if (shuftiBuildMasks(cr, &aux->shufti.lo, &aux->shufti.hi) != -1) { + if (-1 != + shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", describeClass(cr).c_str(), cr.count(), min_offset); aux->shufti.accel_type = ACCEL_SHUFTI; @@ -469,7 +470,7 @@ void findForwardAccelScheme(const vector &lits, return; } - truffleBuildMasks(cr, &aux->truffle.mask1, &aux->truffle.mask2); + truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2); DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n", describeClass(cr).c_str(), cr.count(), min_offset); aux->truffle.accel_type = ACCEL_TRUFFLE; @@ -523,7 +524,7 @@ bool isNoodleable(const vector &lits, } if (stream_control) { // nullptr if in block mode - if (lits.front().s.length() + 1 > stream_control->history_max) { + if (lits.front().s.length() > stream_control->history_max + 1) { DEBUG_PRINTF("length of %zu too long for history max %zu\n", lits.front().s.length(), stream_control->history_max); @@ -552,6 +553,12 @@ aligned_unique_ptr hwlmBuild(const vector &lits, if (stream_control) { assert(stream_control->history_min <= stream_control->history_max); + + // We should not have been passed any literals that are too long to + // match with a maximally-sized history buffer. + assert(all_of(begin(lits), end(lits), [&](const hwlmLiteral &lit) { + return lit.s.length() <= stream_control->history_max + 1; + })); } // Check that we haven't exceeded the maximum number of literals. @@ -602,7 +609,6 @@ aligned_unique_ptr hwlmBuild(const vector &lits, stream_control->literal_history_required = lit.s.length() - 1; assert(stream_control->literal_history_required <= stream_control->history_max); - stream_control->literal_stream_state_required = 0; } eng = move(noodle); } else { diff --git a/src/hwlm/hwlm_build.h b/src/hwlm/hwlm_build.h index b5bdb0ea..fbf359e6 100644 --- a/src/hwlm/hwlm_build.h +++ b/src/hwlm/hwlm_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -63,10 +63,6 @@ struct hwlmStreamingControl { /** \brief OUT parameter: History required by the literal matcher to * correctly match all literals. */ size_t literal_history_required; - - /** OUT parameter: Stream state required by literal matcher in bytes. Can - * be zero, and generally will be small (0-8 bytes). */ - size_t literal_stream_state_required; }; /** \brief Build an \ref HWLM literal matcher runtime structure for a group of diff --git a/src/hwlm/hwlm_literal.cpp b/src/hwlm/hwlm_literal.cpp index 9e365a0c..b0968d79 100644 --- a/src/hwlm/hwlm_literal.cpp +++ b/src/hwlm/hwlm_literal.cpp @@ -86,6 +86,7 @@ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in, const vector &msk_in, const vector &cmp_in) : s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in), groups(groups_in), msk(msk_in), cmp(cmp_in) { + assert(s.size() <= HWLM_LITERAL_MAX_LEN); assert(msk.size() <= HWLM_MASKLEN); assert(msk.size() == cmp.size()); diff --git a/src/hwlm/hwlm_literal.h b/src/hwlm/hwlm_literal.h index 7e63a6f3..b7af99d3 100644 --- a/src/hwlm/hwlm_literal.h +++ b/src/hwlm/hwlm_literal.h @@ -41,6 +41,9 @@ namespace ue2 { +/** \brief Max length of the literal passed to HWLM. */ +#define HWLM_LITERAL_MAX_LEN 255 + /** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */ #define HWLM_MASKLEN 8 diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp old mode 100755 new mode 100644 index ba21adc7..d257b530 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -56,15 +56,6 @@ struct path { }; }; -static UNUSED -string describeClasses(const vector &v) { - std::ostringstream oss; - for (const auto &cr : v) { - describeClass(oss, cr); - } - return oss.str(); -} - static void dump_paths(const vector &paths) { for (UNUSED const auto &p : paths) { @@ -482,9 +473,10 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, } if (double_byte_ok(info) && - shuftiBuildDoubleMasks(info.double_cr, info.double_byte, - &accel->dshufti.lo1, &accel->dshufti.hi1, - &accel->dshufti.lo2, &accel->dshufti.hi2)) { + shuftiBuildDoubleMasks( + info.double_cr, info.double_byte, (u8 *)&accel->dshufti.lo1, + (u8 *)&accel->dshufti.hi1, (u8 *)&accel->dshufti.lo2, + (u8 *)&accel->dshufti.hi2)) { accel->accel_type = ACCEL_DSHUFTI; accel->dshufti.offset = verify_u8(info.double_offset); DEBUG_PRINTF("state %hu is double shufti\n", this_idx); @@ -520,14 +512,16 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, } accel->accel_type = ACCEL_SHUFTI; - if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo, &accel->shufti.hi)) { + if (-1 != shuftiBuildMasks(info.cr, (u8 *)&accel->shufti.lo, + (u8 *)&accel->shufti.hi)) { DEBUG_PRINTF("state %hu is shufti\n", this_idx); return; } assert(!info.cr.none()); accel->accel_type = ACCEL_TRUFFLE; - truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2); + truffleBuildMasks(info.cr, (u8 *)&accel->truffle.mask1, + (u8 *)&accel->truffle.mask2); DEBUG_PRINTF("state %hu is truffle\n", this_idx); } diff --git a/src/nfa/accel_dfa_build_strat.h b/src/nfa/accel_dfa_build_strat.h old mode 100755 new mode 100644 diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index 6e2b8f41..e99e71a5 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -41,7 +41,7 @@ #include "util/charreach.h" #include "util/dump_charclass.h" #include "util/dump_mask.h" -#include "util/simd_utils.h" +#include "util/simd_types.h" #include #include @@ -147,16 +147,20 @@ const char *accelName(u8 accel_type) { } static -void dumpShuftiCharReach(FILE *f, const m128 &lo, const m128 &hi) { +void dumpShuftiCharReach(FILE *f, const u8 *lo, const u8 *hi) { CharReach cr = shufti2cr(lo, hi); fprintf(f, "count %zu class %s\n", cr.count(), describeClass(cr).c_str()); } static -vector shufti2cr_array(const m128 lo_in, const m128 hi_in) { - const u8 *lo = (const u8 *)&lo_in; - const u8 *hi = (const u8 *)&hi_in; +vector dshufti2cr_array(const u8 *lo_in, const u8 *hi_in) { + u8 lo[16]; + u8 hi[16]; + for (u32 i = 0; i < 16; i++) { + lo[i] = ~lo_in[i]; + hi[i] = ~hi_in[i]; + } vector crs(8); for (u32 i = 0; i < 256; i++) { u32 combined = lo[(u8)i & 0xf] & hi[(u8)i >> 4]; @@ -169,10 +173,10 @@ vector shufti2cr_array(const m128 lo_in, const m128 hi_in) { } static -void dumpDShuftiCharReach(FILE *f, const m128 &lo1, const m128 &hi1, - const m128 &lo2, const m128 &hi2) { - vector cr1 = shufti2cr_array(not128(lo1), not128(hi1)); - vector cr2 = shufti2cr_array(not128(lo2), not128(hi2)); +void dumpDShuftiCharReach(FILE *f, const u8 *lo1, const u8 *hi1, + const u8 *lo2, const u8 *hi2) { + vector cr1 = dshufti2cr_array(lo1, hi1); + vector cr2 = dshufti2cr_array(lo2, hi2); map > cr1_group; assert(cr1.size() == 8 && cr2.size() == 8); for (u32 i = 0; i < 8; i++) { @@ -208,26 +212,22 @@ void dumpDShuftiCharReach(FILE *f, const m128 &lo1, const m128 &hi1, } static -void dumpShuftiMasks(FILE *f, const m128 &lo, const m128 &hi) { - fprintf(f, "lo %s\n", - dumpMask((const u8 *)&lo, 128).c_str()); - fprintf(f, "hi %s\n", - dumpMask((const u8 *)&hi, 128).c_str()); +void dumpShuftiMasks(FILE *f, const u8 *lo, const u8 *hi) { + fprintf(f, "lo %s\n", dumpMask(lo, 128).c_str()); + fprintf(f, "hi %s\n", dumpMask(hi, 128).c_str()); } static -void dumpTruffleCharReach(FILE *f, const m128 &hiset, const m128 &hiclear) { +void dumpTruffleCharReach(FILE *f, const u8 *hiset, const u8 *hiclear) { CharReach cr = truffle2cr(hiset, hiclear); fprintf(f, "count %zu class %s\n", cr.count(), describeClass(cr).c_str()); } static -void dumpTruffleMasks(FILE *f, const m128 &hiset, const m128 &hiclear) { - fprintf(f, "lo %s\n", - dumpMask((const u8 *)&hiset, 128).c_str()); - fprintf(f, "hi %s\n", - dumpMask((const u8 *)&hiclear, 128).c_str()); +void dumpTruffleMasks(FILE *f, const u8 *hiset, const u8 *hiclear) { + fprintf(f, "lo %s\n", dumpMask(hiset, 128).c_str()); + fprintf(f, "hi %s\n", dumpMask(hiclear, 128).c_str()); } @@ -256,23 +256,31 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { break; case ACCEL_SHUFTI: { fprintf(f, "\n"); - dumpShuftiMasks(f, accel.shufti.lo, accel.shufti.hi); - dumpShuftiCharReach(f, accel.shufti.lo, accel.shufti.hi); + dumpShuftiMasks(f, (const u8 *)&accel.shufti.lo, + (const u8 *)&accel.shufti.hi); + dumpShuftiCharReach(f, (const u8 *)&accel.shufti.lo, + (const u8 *)&accel.shufti.hi); break; } case ACCEL_DSHUFTI: fprintf(f, "\n"); fprintf(f, "mask 1\n"); - dumpShuftiMasks(f, accel.dshufti.lo1, accel.dshufti.hi1); + dumpShuftiMasks(f, (const u8 *)&accel.dshufti.lo1, + (const u8 *)&accel.dshufti.hi1); fprintf(f, "mask 2\n"); - dumpShuftiMasks(f, accel.dshufti.lo2, accel.dshufti.hi2); - dumpDShuftiCharReach(f, accel.dshufti.lo1, accel.dshufti.hi1, - accel.dshufti.lo2, accel.dshufti.hi2); + dumpShuftiMasks(f, (const u8 *)&accel.dshufti.lo2, + (const u8 *)&accel.dshufti.hi2); + dumpDShuftiCharReach(f, (const u8 *)&accel.dshufti.lo1, + (const u8 *)&accel.dshufti.hi1, + (const u8 *)&accel.dshufti.lo2, + (const u8 *)&accel.dshufti.hi2); break; case ACCEL_TRUFFLE: { fprintf(f, "\n"); - dumpTruffleMasks(f, accel.truffle.mask1, accel.truffle.mask2); - dumpTruffleCharReach(f, accel.truffle.mask1, accel.truffle.mask2); + dumpTruffleMasks(f, (const u8 *)&accel.truffle.mask1, + (const u8 *)&accel.truffle.mask2); + dumpTruffleCharReach(f, (const u8 *)&accel.truffle.mask1, + (const u8 *)&accel.truffle.mask2); break; } case ACCEL_MLVERM: @@ -297,28 +305,36 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { case ACCEL_MSSHUFTI: case ACCEL_MSGSHUFTI: fprintf(f, " len:%u\n", accel.mshufti.len); - dumpShuftiMasks(f, accel.mshufti.lo, accel.mshufti.hi); - dumpShuftiCharReach(f, accel.mshufti.lo, accel.mshufti.hi); + dumpShuftiMasks(f, (const u8 *)&accel.mshufti.lo, + (const u8 *)&accel.mshufti.hi); + dumpShuftiCharReach(f, (const u8 *)&accel.mshufti.lo, + (const u8 *)&accel.mshufti.hi); break; case ACCEL_MDSSHUFTI: case ACCEL_MDSGSHUFTI: fprintf(f, " len1:%u len2:%u\n", accel.mdshufti.len1, accel.mdshufti.len2); - dumpShuftiMasks(f, accel.mdshufti.lo, accel.mdshufti.hi); - dumpShuftiCharReach(f, accel.mdshufti.lo, accel.mdshufti.hi); + dumpShuftiMasks(f, (const u8 *)&accel.mdshufti.lo, + (const u8 *)&accel.mdshufti.hi); + dumpShuftiCharReach(f, (const u8 *)&accel.mdshufti.lo, + (const u8 *)&accel.mdshufti.hi); break; case ACCEL_MLTRUFFLE: case ACCEL_MLGTRUFFLE: case ACCEL_MSTRUFFLE: case ACCEL_MSGTRUFFLE: fprintf(f, " len:%u\n", accel.mtruffle.len); - dumpTruffleMasks(f, accel.mtruffle.mask1, accel.mtruffle.mask2); - dumpTruffleCharReach(f, accel.mtruffle.mask1, accel.mtruffle.mask2); + dumpTruffleMasks(f, (const u8 *)&accel.mtruffle.mask1, + (const u8 *)&accel.mtruffle.mask2); + dumpTruffleCharReach(f, (const u8 *)&accel.mtruffle.mask1, + (const u8 *)&accel.mtruffle.mask2); break; case ACCEL_MDSTRUFFLE: case ACCEL_MDSGTRUFFLE: fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2); - dumpTruffleMasks(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2); - dumpTruffleCharReach(f, accel.mdtruffle.mask1, accel.mdtruffle.mask2); + dumpTruffleMasks(f, (const u8 *)&accel.mdtruffle.mask1, + (const u8 *)&accel.mdtruffle.mask2); + dumpTruffleCharReach(f, (const u8 *)&accel.mdtruffle.mask1, + (const u8 *)&accel.mdtruffle.mask2); break; default: fprintf(f, "\n"); diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index 75960dda..32e569ba 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -72,8 +72,8 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { } DEBUG_PRINTF("attempting shufti for %zu chars\n", outs); - if (-1 != shuftiBuildMasks(info.single_stops, &aux->shufti.lo, - &aux->shufti.hi)) { + if (-1 != shuftiBuildMasks(info.single_stops, (u8 *)&aux->shufti.lo, + (u8 *)&aux->shufti.hi)) { aux->accel_type = ACCEL_SHUFTI; aux->shufti.offset = offset; DEBUG_PRINTF("shufti built OK\n"); @@ -86,8 +86,8 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { DEBUG_PRINTF("building Truffle for %zu chars\n", outs); aux->accel_type = ACCEL_TRUFFLE; aux->truffle.offset = offset; - truffleBuildMasks(info.single_stops, &aux->truffle.mask1, - &aux->truffle.mask2); + truffleBuildMasks(info.single_stops, (u8 *)&aux->truffle.mask1, + (u8 *)&aux->truffle.mask2); return; } @@ -212,9 +212,10 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { " two-byte literals\n", outs1, outs2); aux->accel_type = ACCEL_DSHUFTI; aux->dshufti.offset = offset; - if (shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2, - &aux->dshufti.lo1, &aux->dshufti.hi1, - &aux->dshufti.lo2, &aux->dshufti.hi2)) { + if (shuftiBuildDoubleMasks( + info.double_stop1, info.double_stop2, (u8 *)&aux->dshufti.lo1, + (u8 *)&aux->dshufti.hi1, (u8 *)&aux->dshufti.lo2, + (u8 *)&aux->dshufti.hi2)) { return; } } @@ -372,8 +373,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { switch (info.ma_type) { case MultibyteAccelInfo::MAT_LONG: - if (shuftiBuildMasks(stops, &aux->mshufti.lo, - &aux->mshufti.hi) == -1) { + if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, + (u8 *)&aux->mshufti.hi) == -1) { break; } aux->accel_type = ACCEL_MLSHUFTI; @@ -381,8 +382,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { aux->mshufti.len = info.ma_len1; return; case MultibyteAccelInfo::MAT_LONGGRAB: - if (shuftiBuildMasks(stops, &aux->mshufti.lo, - &aux->mshufti.hi) == -1) { + if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, + (u8 *)&aux->mshufti.hi) == -1) { break; } aux->accel_type = ACCEL_MLGSHUFTI; @@ -390,8 +391,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { aux->mshufti.len = info.ma_len1; return; case MultibyteAccelInfo::MAT_SHIFT: - if (shuftiBuildMasks(stops, &aux->mshufti.lo, - &aux->mshufti.hi) == -1) { + if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, + (u8 *)&aux->mshufti.hi) == -1) { break; } aux->accel_type = ACCEL_MSSHUFTI; @@ -399,8 +400,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { aux->mshufti.len = info.ma_len1; return; case MultibyteAccelInfo::MAT_SHIFTGRAB: - if (shuftiBuildMasks(stops, &aux->mshufti.lo, - &aux->mshufti.hi) == -1) { + if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, + (u8 *)&aux->mshufti.hi) == -1) { break; } aux->accel_type = ACCEL_MSGSHUFTI; @@ -408,8 +409,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { aux->mshufti.len = info.ma_len1; return; case MultibyteAccelInfo::MAT_DSHIFT: - if (shuftiBuildMasks(stops, &aux->mdshufti.lo, - &aux->mdshufti.hi) == -1) { + if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo, + (u8 *)&aux->mdshufti.hi) == -1) { break; } aux->accel_type = ACCEL_MDSSHUFTI; @@ -418,8 +419,8 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { aux->mdshufti.len2 = info.ma_len2; return; case MultibyteAccelInfo::MAT_DSHIFTGRAB: - if (shuftiBuildMasks(stops, &aux->mdshufti.lo, - &aux->mdshufti.hi) == -1) { + if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo, + (u8 *)&aux->mdshufti.hi) == -1) { break; } aux->accel_type = ACCEL_MDSGSHUFTI; @@ -441,45 +442,45 @@ void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { aux->accel_type = ACCEL_MLTRUFFLE; aux->mtruffle.offset = offset; aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, &aux->mtruffle.mask1, - &aux->mtruffle.mask2); + truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, + (u8 *)&aux->mtruffle.mask2); break; case MultibyteAccelInfo::MAT_LONGGRAB: aux->accel_type = ACCEL_MLGTRUFFLE; aux->mtruffle.offset = offset; aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, &aux->mtruffle.mask1, - &aux->mtruffle.mask2); + truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, + (u8 *)&aux->mtruffle.mask2); break; case MultibyteAccelInfo::MAT_SHIFT: aux->accel_type = ACCEL_MSTRUFFLE; aux->mtruffle.offset = offset; aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, &aux->mtruffle.mask1, - &aux->mtruffle.mask2); + truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, + (u8 *)&aux->mtruffle.mask2); break; case MultibyteAccelInfo::MAT_SHIFTGRAB: aux->accel_type = ACCEL_MSGTRUFFLE; aux->mtruffle.offset = offset; aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, &aux->mtruffle.mask1, - &aux->mtruffle.mask2); + truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, + (u8 *)&aux->mtruffle.mask2); break; case MultibyteAccelInfo::MAT_DSHIFT: aux->accel_type = ACCEL_MDSTRUFFLE; aux->mdtruffle.offset = offset; aux->mdtruffle.len1 = info.ma_len1; aux->mdtruffle.len2 = info.ma_len2; - truffleBuildMasks(stops, &aux->mtruffle.mask1, - &aux->mdtruffle.mask2); + truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, + (u8 *)&aux->mdtruffle.mask2); break; case MultibyteAccelInfo::MAT_DSHIFTGRAB: aux->accel_type = ACCEL_MDSGTRUFFLE; aux->mdtruffle.offset = offset; aux->mdtruffle.len1 = info.ma_len1; aux->mdtruffle.len2 = info.ma_len2; - truffleBuildMasks(stops, &aux->mtruffle.mask1, - &aux->mdtruffle.mask2); + truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, + (u8 *)&aux->mdtruffle.mask2); break; default: // shouldn't happen diff --git a/src/nfa/castle.c b/src/nfa/castle.c index 6a72ae31..7c158b31 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -745,10 +745,10 @@ void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) { } static really_inline -char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, - enum MatchMode mode) { +char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end, + enum MatchMode mode) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("state=%p, streamState=%p\n", q->state, q->streamState); @@ -856,14 +856,14 @@ char nfaExecCastle0_Q_i(const struct NFA *n, struct mq *q, s64a end, return mmbit_any_precise(active, c->numRepeats); } -char nfaExecCastle0_Q(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("entry\n"); - return nfaExecCastle0_Q_i(n, q, end, CALLBACK_OUTPUT); + return nfaExecCastle_Q_i(n, q, end, CALLBACK_OUTPUT); } -char nfaExecCastle0_Q2(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("entry\n"); - return nfaExecCastle0_Q_i(n, q, end, STOP_AT_MATCH); + return nfaExecCastle_Q_i(n, q, end, STOP_AT_MATCH); } static @@ -896,9 +896,9 @@ s64a castleLastKillLoc(const struct Castle *c, struct mq *q) { return sp - 1; /* the repeats are never killed */ } -char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) { +char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); if (q->cur == q->end) { @@ -959,9 +959,9 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report) { return 1; } -char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q) { +char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); @@ -969,19 +969,19 @@ char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q) { return 0; } -char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { +char nfaExecCastle_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); return castleInAccept(c, q, report, q_cur_offset(q)); } -char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) { +char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); @@ -1019,9 +1019,9 @@ char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) { } -char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) { +char nfaExecCastle_queueInitState(UNUSED const struct NFA *n, struct mq *q) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); @@ -1038,10 +1038,10 @@ char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) { return 0; } -char nfaExecCastle0_initCompressedState(const struct NFA *n, UNUSED u64a offset, - void *state, UNUSED u8 key) { +char nfaExecCastle_initCompressedState(const struct NFA *n, UNUSED u64a offset, + void *state, UNUSED u8 key) { assert(n && state); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); @@ -1070,10 +1070,10 @@ void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx, repeatPack(packed, info, rctrl, offset); } -char nfaExecCastle0_queueCompressState(const struct NFA *n, const struct mq *q, - s64a loc) { +char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q, + s64a loc) { assert(n && q); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry, loc=%lld\n", loc); const struct Castle *c = getImplNfa(n); @@ -1118,11 +1118,10 @@ void subCastleExpandState(const struct Castle *c, const u32 subIdx, packed + info->packedCtrlSize, offset)); } -char nfaExecCastle0_expandState(const struct NFA *n, void *dest, - const void *src, u64a offset, - UNUSED u8 key) { +char nfaExecCastle_expandState(const struct NFA *n, void *dest, const void *src, + u64a offset, UNUSED u8 key) { assert(n && dest && src); - assert(n->type == CASTLE_NFA_0); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry, src=%p, dest=%p, offset=%llu\n", src, dest, offset); const struct Castle *c = getImplNfa(n); diff --git a/src/nfa/castle.h b/src/nfa/castle.h index 84d79097..cc7496ca 100644 --- a/src/nfa/castle.h +++ b/src/nfa/castle.h @@ -38,24 +38,24 @@ extern "C" { struct mq; struct NFA; -char nfaExecCastle0_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecCastle0_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report, - struct mq *q); -char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecCastle0_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecCastle0_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); +char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecCastle_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecCastle_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecCastle_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecCastle_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc); +char nfaExecCastle_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); -#define nfaExecCastle0_testEOD NFA_API_NO_IMPL -#define nfaExecCastle0_B_Reverse NFA_API_NO_IMPL -#define nfaExecCastle0_zombie_status NFA_API_ZOMBIE_NO_IMPL +#define nfaExecCastle_testEOD NFA_API_NO_IMPL +#define nfaExecCastle_B_Reverse NFA_API_NO_IMPL +#define nfaExecCastle_zombie_status NFA_API_ZOMBIE_NO_IMPL #ifdef __cplusplus } diff --git a/src/nfa/castle_dump.cpp b/src/nfa/castle_dump.cpp index fd1521a5..1514ca8c 100644 --- a/src/nfa/castle_dump.cpp +++ b/src/nfa/castle_dump.cpp @@ -40,18 +40,18 @@ #include "shufticompile.h" #include "trufflecompile.h" #include "util/charreach.h" +#include "util/dump_util.h" #include "util/dump_charclass.h" #ifndef DUMP_SUPPORT #error No dump support! #endif -namespace ue2 { +/* Note: No dot files for castle */ -void nfaExecCastle0_dumpDot(const struct NFA *, FILE *, - UNUSED const std::string &base) { - // No GraphViz output for Castles. -} +using namespace std; + +namespace ue2 { static void dumpTextSubCastle(const SubCastle &sub, FILE *f) { @@ -68,9 +68,11 @@ void dumpTextSubCastle(const SubCastle &sub, FILE *f) { fprintf(f, "\n"); } -void nfaExecCastle0_dumpText(const struct NFA *nfa, FILE *f) { +void nfaExecCastle_dump(const struct NFA *nfa, const string &base) { const Castle *c = (const Castle *)getImplNfa(nfa); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + fprintf(f, "Castle multi-tenant repeat engine\n"); fprintf(f, "\n"); fprintf(f, "Number of repeat tenants: %u\n", c->numRepeats); @@ -86,13 +88,15 @@ void nfaExecCastle0_dumpText(const struct NFA *nfa, FILE *f) { fprintf(f, "negated verm, scanning for 0x%02x\n", c->u.verm.c); break; case CASTLE_SHUFTI: { - const CharReach cr = shufti2cr(c->u.shuf.mask_lo, c->u.shuf.mask_hi); + const CharReach cr = shufti2cr((const u8 *)&c->u.shuf.mask_lo, + (const u8 *)&c->u.shuf.mask_hi); fprintf(f, "shufti, scanning for %s (%zu chars)\n", describeClass(cr).c_str(), cr.count()); break; } case CASTLE_TRUFFLE: { - const CharReach cr = truffle2cr(c->u.truffle.mask1, c->u.truffle.mask2); + const CharReach cr = truffle2cr((const u8 *)&c->u.truffle.mask1, + (const u8 *)&c->u.truffle.mask2); fprintf(f, "truffle, scanning for %s (%zu chars)\n", describeClass(cr).c_str(), cr.count()); break; @@ -113,6 +117,7 @@ void nfaExecCastle0_dumpText(const struct NFA *nfa, FILE *f) { fprintf(f, "Sub %u:\n", i); dumpTextSubCastle(sub[i], f); } + fclose(f); } } // namespace ue2 diff --git a/src/nfa/castle_dump.h b/src/nfa/castle_dump.h index 94dadec0..06e7e36e 100644 --- a/src/nfa/castle_dump.h +++ b/src/nfa/castle_dump.h @@ -31,16 +31,13 @@ #if defined(DUMP_SUPPORT) -#include #include struct NFA; namespace ue2 { -void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file, - const std::string &base); -void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file); +void nfaExecCastle_dump(const NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 4bddf767..3b40ab9a 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -49,7 +49,6 @@ #include "util/graph.h" #include "util/make_unique.h" #include "util/multibit_build.h" -#include "util/multibit_internal.h" #include "util/report_manager.h" #include "util/ue2_containers.h" #include "util/verify_types.h" @@ -58,6 +57,7 @@ #include #include +#include #include using namespace std; @@ -100,13 +100,15 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) { return; } - if (shuftiBuildMasks(negated, &c->u.shuf.mask_lo, &c->u.shuf.mask_hi) != -1) { + if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo, + (u8 *)&c->u.shuf.mask_hi) != -1) { c->type = CASTLE_SHUFTI; return; } c->type = CASTLE_TRUFFLE; - truffleBuildMasks(negated, &c->u.truffle.mask1, &c->u.truffle.mask2); + truffleBuildMasks(negated, (u8 *)(u8 *)&c->u.truffle.mask1, + (u8 *)&c->u.truffle.mask2); } static @@ -576,7 +578,7 @@ buildCastle(const CastleProto &proto, total_size += byte_length(stale_iter); // stale sparse iter aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); - nfa->type = verify_u8(CASTLE_NFA_0); + nfa->type = verify_u8(CASTLE_NFA); nfa->length = verify_u32(total_size); nfa->nPositions = verify_u32(subs.size()); nfa->streamStateSize = streamStateSize; @@ -903,8 +905,8 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { u32 min_bound = pr.bounds.min; // always finite if (min_bound == 0) { // Vacuous case, we can only do this once. assert(!edge(g.start, g.accept, g).second); - NFAEdge e = add_edge(g.start, g.accept, g).first; - g[e].top = top; + NFAEdge e = add_edge(g.start, g.accept, g); + g[e].tops.insert(top); g[u].reports.insert(pr.reports.begin(), pr.reports.end()); min_bound = 1; } @@ -912,9 +914,9 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { for (u32 i = 0; i < min_bound; i++) { NFAVertex v = add_vertex(g); g[v].char_reach = pr.reach; - NFAEdge e = add_edge(u, v, g).first; + NFAEdge e = add_edge(u, v, g); if (u == g.start) { - g[e].top = top; + g[e].tops.insert(top); } u = v; } @@ -931,9 +933,9 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { if (head != u) { add_edge(head, v, g); } - NFAEdge e = add_edge(u, v, g).first; + NFAEdge e = add_edge(u, v, g); if (u == g.start) { - g[e].top = top; + g[e].tops.insert(top); } u = v; } @@ -978,15 +980,10 @@ unique_ptr makeHolder(const CastleProto &proto, auto g = ue2::make_unique(proto.kind); for (const auto &m : proto.repeats) { - if (m.first >= NFA_MAX_TOP_MASKS) { - DEBUG_PRINTF("top %u too big for an NFA\n", m.first); - return nullptr; - } - addToHolder(*g, m.first, m.second); } - //dumpGraph("castle_holder.dot", g->g); + //dumpGraph("castle_holder.dot", *g); // Sanity checks. assert(allMatchStatesHaveReports(*g)); diff --git a/src/nfa/dfa_build_strat.cpp b/src/nfa/dfa_build_strat.cpp old mode 100755 new mode 100644 diff --git a/src/nfa/gough.c b/src/nfa/gough.c index 520aca93..44acd4c2 100644 --- a/src/nfa/gough.c +++ b/src/nfa/gough.c @@ -655,12 +655,6 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *cur_buf = sp < 0 ? hend : buffer; - char report = 1; - if (mode == CALLBACK_OUTPUT) { - /* we are starting inside the history buffer: matches are suppressed */ - report = !(sp < 0); - } - if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { /* this is as far as we go */ q->cur--; @@ -691,8 +685,7 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *final_look; if (goughExec8_i_ni(m, som, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, &final_look, - report ? mode : NO_MATCHES) + offset + sp, cb, context, &final_look, mode) == MO_HALT_MATCHING) { *(u8 *)q->state = 0; return 0; @@ -724,7 +717,6 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, if (sp == 0) { cur_buf = buffer; - report = 1; } if (sp != ep) { @@ -789,12 +781,6 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *cur_buf = sp < 0 ? hend : buffer; - char report = 1; - if (mode == CALLBACK_OUTPUT) { - /* we are starting inside the history buffer: matches are suppressed */ - report = !(sp < 0); - } - assert(q->cur); if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { /* this is as far as we go */ @@ -822,10 +808,8 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, /* do main buffer region */ const u8 *final_look; if (goughExec16_i_ni(m, som, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, &final_look, - report ? mode : NO_MATCHES) + offset + sp, cb, context, &final_look, mode) == MO_HALT_MATCHING) { - assert(report); *(u16 *)q->state = 0; return 0; } @@ -856,7 +840,6 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, if (sp == 0) { cur_buf = buffer; - report = 1; } if (sp != ep) { diff --git a/src/nfa/goughcompile_dump.cpp b/src/nfa/goughcompile_dump.cpp index dd76b9ec..cb361cdb 100644 --- a/src/nfa/goughcompile_dump.cpp +++ b/src/nfa/goughcompile_dump.cpp @@ -275,7 +275,7 @@ void dump_vars(const GoughGraph &g, const string &base, const Grey &grey) { } void dump(const GoughGraph &g, const string &base, const Grey &grey) { - if (!grey.dumpFlags) { + if (!(grey.dumpFlags & Grey::DUMP_INT_GRAPH)) { return; } @@ -311,9 +311,9 @@ void dump_block(FILE *f, const gough_edge_id &e, } } -void dump_blocks(const map > &blocks, +void dump_blocks(const map> &blocks, const string &base, const Grey &grey) { - if (!grey.dumpFlags) { + if (!(grey.dumpFlags & Grey::DUMP_INT_GRAPH)) { return; } diff --git a/src/nfa/goughdump.cpp b/src/nfa/goughdump.cpp index 4e6e5425..1b37a0b1 100644 --- a/src/nfa/goughdump.cpp +++ b/src/nfa/goughdump.cpp @@ -37,6 +37,7 @@ #include "ue2common.h" #include "util/charreach.h" #include "util/dump_charclass.h" +#include "util/dump_util.h" #include "util/unaligned.h" #include @@ -259,8 +260,8 @@ void dumpTransitions(const NFA *nfa, FILE *f, fprintf(f, "\n"); } -void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f, - UNUSED const string &base) { +static +void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f) { assert(nfa->type == GOUGH_NFA_8); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -279,6 +280,7 @@ void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f, fprintf(f, "}\n"); } +static void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) { assert(nfa->type == GOUGH_NFA_8); @@ -303,8 +305,8 @@ void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) { dumpTextReverse(nfa, f); } -void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f, - UNUSED const string &base) { +static +void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f) { assert(nfa->type == GOUGH_NFA_16); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -323,6 +325,7 @@ void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f, fprintf(f, "}\n"); } +static void nfaExecGough16_dumpText(const struct NFA *nfa, FILE *f) { assert(nfa->type == GOUGH_NFA_16); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -348,4 +351,24 @@ void nfaExecGough16_dumpText(const struct NFA *nfa, FILE *f) { dumpTextReverse(nfa, f); } +void nfaExecGough16_dump(const NFA *nfa, const string &base) { + assert(nfa->type == GOUGH_NFA_16); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + nfaExecGough16_dumpText(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + nfaExecGough16_dumpDot(nfa, f); + fclose(f); +} + +void nfaExecGough8_dump(const NFA *nfa, const string &base) { + assert(nfa->type == GOUGH_NFA_8); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + nfaExecGough8_dumpText(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + nfaExecGough8_dumpDot(nfa, f); + fclose(f); +} + } // namespace ue2 diff --git a/src/nfa/goughdump.h b/src/nfa/goughdump.h index b96938e4..2d204d5a 100644 --- a/src/nfa/goughdump.h +++ b/src/nfa/goughdump.h @@ -39,12 +39,8 @@ struct NFA; namespace ue2 { -void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file, - const std::string &base); -void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file, - const std::string &base); -void nfaExecGough8_dumpText(const NFA *nfa, FILE *file); -void nfaExecGough16_dumpText(const NFA *nfa, FILE *file); +void nfaExecGough8_dump(const NFA *nfa, const std::string &base); +void nfaExecGough16_dump(const NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/nfa/lbr.c b/src/nfa/lbr.c index 07e59239..3075be33 100644 --- a/src/nfa/lbr.c +++ b/src/nfa/lbr.c @@ -307,7 +307,7 @@ char lbrRevScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf, UNUSED size_t begin, UNUSED size_t end, UNUSED size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Dot); + assert(nfa->type == LBR_NFA_DOT); // Nothing can kill a dot! return 0; } @@ -316,7 +316,7 @@ static really_inline char lbrRevScanVerm(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Verm); + assert(nfa->type == LBR_NFA_VERM); const struct lbr_verm *l = getImplNfa(nfa); if (begin == end) { @@ -340,7 +340,7 @@ static really_inline char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_NVerm); + assert(nfa->type == LBR_NFA_NVERM); const struct lbr_verm *l = getImplNfa(nfa); if (begin == end) { @@ -365,7 +365,7 @@ char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Shuf); + assert(nfa->type == LBR_NFA_SHUF); const struct lbr_shuf *l = getImplNfa(nfa); if (begin == end) { @@ -389,7 +389,7 @@ char lbrRevScanTruf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Truf); + assert(nfa->type == LBR_NFA_TRUF); const struct lbr_truf *l = getImplNfa(nfa); if (begin == end) { @@ -413,7 +413,7 @@ char lbrFwdScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf, UNUSED size_t begin, UNUSED size_t end, UNUSED size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Dot); + assert(nfa->type == LBR_NFA_DOT); // Nothing can kill a dot! return 0; } @@ -422,7 +422,7 @@ static really_inline char lbrFwdScanVerm(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Verm); + assert(nfa->type == LBR_NFA_VERM); const struct lbr_verm *l = getImplNfa(nfa); if (begin == end) { @@ -446,7 +446,7 @@ static really_inline char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_NVerm); + assert(nfa->type == LBR_NFA_NVERM); const struct lbr_verm *l = getImplNfa(nfa); if (begin == end) { @@ -471,7 +471,7 @@ char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Shuf); + assert(nfa->type == LBR_NFA_SHUF); const struct lbr_shuf *l = getImplNfa(nfa); if (begin == end) { @@ -495,7 +495,7 @@ char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_Truf); + assert(nfa->type == LBR_NFA_TRUF); const struct lbr_truf *l = getImplNfa(nfa); if (begin == end) { diff --git a/src/nfa/lbr_dump.cpp b/src/nfa/lbr_dump.cpp index 3412ddf5..0948e122 100644 --- a/src/nfa/lbr_dump.cpp +++ b/src/nfa/lbr_dump.cpp @@ -42,38 +42,17 @@ #include "trufflecompile.h" #include "util/charreach.h" #include "util/dump_charclass.h" +#include "util/dump_util.h" #ifndef DUMP_SUPPORT #error No dump support! #endif +/* Note: No dot files for LBR */ +using namespace std; + namespace ue2 { -void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, - UNUSED const std::string &base) { - // No impl -} - -void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, - UNUSED const std::string &base) { - // No impl -} - -void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, - UNUSED const std::string &base) { - // No impl -} - -void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, - UNUSED const std::string &base) { - // No impl -} - -void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, - UNUSED const std::string &base) { - // No impl -} - static void lbrDumpCommon(const lbr_common *lc, FILE *f) { const RepeatInfo *info @@ -88,60 +67,80 @@ void lbrDumpCommon(const lbr_common *lc, FILE *f) { fprintf(f, "min period: %u\n", info->minPeriod); } -void nfaExecLbrDot_dumpText(const NFA *nfa, FILE *f) { +void nfaExecLbrDot_dump(const NFA *nfa, const string &base) { assert(nfa); - assert(nfa->type == LBR_NFA_Dot); + assert(nfa->type == LBR_NFA_DOT); const lbr_dot *ld = (const lbr_dot *)getImplNfa(nfa); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); lbrDumpCommon(&ld->common, f); fprintf(f, "DOT model\n"); fprintf(f, "\n"); dumpTextReverse(nfa, f); + fclose(f); } -void nfaExecLbrVerm_dumpText(const NFA *nfa, FILE *f) { +void nfaExecLbrVerm_dump(const NFA *nfa, const string &base) { assert(nfa); - assert(nfa->type == LBR_NFA_Verm); + assert(nfa->type == LBR_NFA_VERM); const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa); + + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + lbrDumpCommon(&lv->common, f); fprintf(f, "VERM model, scanning for 0x%02x\n", lv->c); fprintf(f, "\n"); dumpTextReverse(nfa, f); + fclose(f); } -void nfaExecLbrNVerm_dumpText(const NFA *nfa, FILE *f) { +void nfaExecLbrNVerm_dump(const NFA *nfa, const string &base) { assert(nfa); - assert(nfa->type == LBR_NFA_NVerm); + assert(nfa->type == LBR_NFA_NVERM); const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa); + + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + lbrDumpCommon(&lv->common, f); fprintf(f, "NEGATED VERM model, scanning for 0x%02x\n", lv->c); fprintf(f, "\n"); dumpTextReverse(nfa, f); + fclose(f); } -void nfaExecLbrShuf_dumpText(const NFA *nfa, FILE *f) { +void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) { assert(nfa); - assert(nfa->type == LBR_NFA_Shuf); + assert(nfa->type == LBR_NFA_SHUF); + + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa); lbrDumpCommon(&ls->common, f); - CharReach cr = shufti2cr(ls->mask_lo, ls->mask_hi); + CharReach cr = shufti2cr((const u8 *)&ls->mask_lo, + (const u8 *)&ls->mask_hi); fprintf(f, "SHUF model, scanning for: %s (%zu chars)\n", describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count()); fprintf(f, "\n"); dumpTextReverse(nfa, f); + fclose(f); } -void nfaExecLbrTruf_dumpText(const NFA *nfa, FILE *f) { +void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) { assert(nfa); - assert(nfa->type == LBR_NFA_Truf); + assert(nfa->type == LBR_NFA_TRUF); + + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa); lbrDumpCommon(<->common, f); - CharReach cr = truffle2cr(lt->mask1, lt->mask2); + CharReach cr = truffle2cr((const u8 *)<->mask1, + (const u8 *)<->mask2); fprintf(f, "TRUFFLE model, scanning for: %s (%zu chars)\n", describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count()); fprintf(f, "\n"); dumpTextReverse(nfa, f); + fclose(f); } } // namespace ue2 diff --git a/src/nfa/lbr_dump.h b/src/nfa/lbr_dump.h index 06ed51e2..ea4e3f38 100644 --- a/src/nfa/lbr_dump.h +++ b/src/nfa/lbr_dump.h @@ -31,28 +31,17 @@ #ifdef DUMP_SUPPORT -#include #include struct NFA; namespace ue2 { -void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecLbrDot_dumpText(const struct NFA *nfa, FILE *file); -void nfaExecLbrVerm_dumpText(const struct NFA *nfa, FILE *file); -void nfaExecLbrNVerm_dumpText(const struct NFA *nfa, FILE *file); -void nfaExecLbrTruf_dumpText(const struct NFA *nfa, FILE *file); -void nfaExecLbrShuf_dumpText(const struct NFA *nfa, FILE *file); +void nfaExecLbrDot_dump(const struct NFA *nfa, const std::string &base); +void nfaExecLbrVerm_dump(const struct NFA *nfa, const std::string &base); +void nfaExecLbrNVerm_dump(const struct NFA *nfa, const std::string &base); +void nfaExecLbrShuf_dump(const struct NFA *nfa, const std::string &base); +void nfaExecLbrTruf_dump(const struct NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/nfa/limex.h b/src/nfa/limex.h index ad53503c..0223604d 100644 --- a/src/nfa/limex.h +++ b/src/nfa/limex.h @@ -41,9 +41,7 @@ extern "C" #define GENERATE_NFA_DUMP_DECL(gf_name) \ } /* extern "C" */ \ namespace ue2 { \ - void gf_name##_dumpDot(const struct NFA *nfa, FILE *file, \ - const std::string &base); \ - void gf_name##_dumpText(const struct NFA *nfa, FILE *file); \ + void gf_name##_dump(const struct NFA *nfa, const std::string &base); \ } /* namespace ue2 */ \ extern "C" { @@ -77,6 +75,7 @@ extern "C" GENERATE_NFA_DUMP_DECL(gf_name) GENERATE_NFA_DECL(nfaExecLimEx32) +GENERATE_NFA_DECL(nfaExecLimEx64) GENERATE_NFA_DECL(nfaExecLimEx128) GENERATE_NFA_DECL(nfaExecLimEx256) GENERATE_NFA_DECL(nfaExecLimEx384) diff --git a/src/nfa/limex_64.c b/src/nfa/limex_64.c new file mode 100644 index 00000000..e8f0880b --- /dev/null +++ b/src/nfa/limex_64.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief LimEx NFA: 128-bit SIMD runtime implementations. + */ + +/* Limex64 is unusual on as on 32 bit platforms, at runtime it uses an m128 for + * state calculations. + */ + +//#define DEBUG_INPUT +//#define DEBUG_EXCEPTIONS + +#include "limex.h" + +#include "accel.h" +#include "limex_internal.h" +#include "nfa_internal.h" +#include "ue2common.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" + +// Common code +#define STATE_ON_STACK +#define ESTATE_ON_STACK + +#include "limex_runtime.h" + +#define SIZE 64 +#define ENG_STATE_T u64a + +#ifdef ARCH_64_BIT +#define STATE_T u64a +#define LOAD_FROM_ENG load_u64a +#else +#define STATE_T m128 +#define LOAD_FROM_ENG load_m128_from_u64a +#endif + +#include "limex_exceptional.h" + +#include "limex_state_impl.h" + +#define INLINE_ATTR really_inline +#include "limex_common_impl.h" + +#include "limex_runtime_impl.h" diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index 28f37083..c74c7079 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -78,10 +78,26 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end) { - u32 idx = packedExtract32(s, accel); + u32 idx = pext32(s, accel); return accelScanWrapper(accelTable, aux, input, idx, i, end); } +#ifdef ARCH_64_BIT +size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end) { + u32 idx = pext64(s, accel); + return accelScanWrapper(accelTable, aux, input, idx, i, end); +} +#else +size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end) { + u32 idx = pext64(movq(s), movq(accel)); + return accelScanWrapper(accelTable, aux, input, idx, i, end); +} +#endif + size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end) { diff --git a/src/nfa/limex_accel.h b/src/nfa/limex_accel.h index 173df759..e5c94e82 100644 --- a/src/nfa/limex_accel.h +++ b/src/nfa/limex_accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,7 @@ #include "util/simd_utils.h" // for m128 etc union AccelAux; +struct LimExNFA64; struct LimExNFA128; struct LimExNFA256; struct LimExNFA384; @@ -49,6 +50,16 @@ size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end); +#ifdef ARCH_64_BIT +size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end); +#else +size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end); +#endif + size_t doAccel128(const m128 *s, const struct LimExNFA128 *limex, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end); diff --git a/src/nfa/limex_common_impl.h b/src/nfa/limex_common_impl.h index 9523b073..e441945d 100644 --- a/src/nfa/limex_common_impl.h +++ b/src/nfa/limex_common_impl.h @@ -31,14 +31,14 @@ /* impl of limex functions which depend only on state size */ -#if !defined(SIZE) || !defined(STATE_T) || !defined(INLINE_ATTR) -# error Must define SIZE and STATE_T and INLINE_ATTR in includer. +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) \ + || !defined(INLINE_ATTR) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG and INLINE_ATTR in includer. #endif #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) #define TESTEOD_FN JOIN(moNfaTestEod, SIZE) -#define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE) #define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE) #define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE) #define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE) @@ -46,12 +46,11 @@ #define INITIAL_FN JOIN(moNfaInitial, SIZE) #define TOP_FN JOIN(moNfaTop, SIZE) #define TOPN_FN JOIN(moNfaTopN, SIZE) +#define PROCESS_ACCEPTS_IMPL_FN JOIN(moProcessAcceptsImpl, SIZE) #define PROCESS_ACCEPTS_FN JOIN(moProcessAccepts, SIZE) #define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE) #define CONTEXT_T JOIN(NFAContext, SIZE) #define ONES_STATE JOIN(ones_, STATE_T) -#define LOAD_STATE JOIN(load_, STATE_T) -#define STORE_STATE JOIN(store_, STATE_T) #define AND_STATE JOIN(and_, STATE_T) #define OR_STATE JOIN(or_, STATE_T) #define ANDNOT_STATE JOIN(andnot_, STATE_T) @@ -62,6 +61,20 @@ #define SQUASH_UNTUG_BR_FN JOIN(lazyTug, SIZE) #define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE) +#if defined(ARCH_64_BIT) && (SIZE >= 64) +#define CHUNK_T u64a +#define FIND_AND_CLEAR_FN findAndClearLSB_64 +#define POPCOUNT_FN popcount64 +#define RANK_IN_MASK_FN rank_in_mask64 +#else +#define CHUNK_T u32 +#define FIND_AND_CLEAR_FN findAndClearLSB_32 +#define POPCOUNT_FN popcount32 +#define RANK_IN_MASK_FN rank_in_mask32 +#endif + +#define NUM_STATE_CHUNKS (sizeof(STATE_T) / sizeof(CHUNK_T)) + static really_inline void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex, const union RepeatControl *repeat_ctrl, @@ -83,7 +96,7 @@ void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex, const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); u32 cyclicState = info->cyclicState; - if (!TESTBIT_STATE(accstate, cyclicState)) { + if (!TESTBIT_STATE(*accstate, cyclicState)) { continue; } @@ -100,70 +113,85 @@ void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex, } } -static never_inline -char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, - const struct NFAAccept *acceptTable, u32 acceptCount, - u64a offset, NfaCallback callback, void *context) { +static really_inline +char PROCESS_ACCEPTS_IMPL_FN(const IMPL_NFA_T *limex, const STATE_T *s, + STATE_T *squash, const STATE_T *acceptMask, + const struct NFAAccept *acceptTable, u64a offset, + NfaCallback callback, void *context) { assert(s); assert(limex); assert(callback); - assert(acceptCount); - // We have squash masks we might have to apply after firing reports. - STATE_T squash = ONES_STATE; - const STATE_T *squashMasks = (const STATE_T *) - ((const char *)limex + limex->squashOffset); + const STATE_T accept_mask = *acceptMask; + STATE_T accepts = AND_STATE(*s, accept_mask); - for (u32 i = 0; i < acceptCount; i++) { - const struct NFAAccept *a = &acceptTable[i]; - if (TESTBIT_STATE(s, a->state)) { - DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n", - a->state, a->externalId, offset); - int rv = callback(0, offset, a->externalId, context); + // Caller must ensure that we have at least one accept state on. + assert(ISNONZERO_STATE(accepts)); + + CHUNK_T chunks[NUM_STATE_CHUNKS]; + memcpy(chunks, &accepts, sizeof(accepts)); + + CHUNK_T mask_chunks[NUM_STATE_CHUNKS]; + memcpy(mask_chunks, &accept_mask, sizeof(accept_mask)); + + u32 base_index = 0; // Cumulative sum of mask popcount up to current chunk. + for (u32 i = 0; i < NUM_STATE_CHUNKS; i++) { + CHUNK_T chunk = chunks[i]; + while (chunk != 0) { + u32 bit = FIND_AND_CLEAR_FN(&chunk); + u32 local_idx = RANK_IN_MASK_FN(mask_chunks[i], bit); + u32 idx = local_idx + base_index; + const struct NFAAccept *a = &acceptTable[idx]; + DEBUG_PRINTF("state %u: firing report list=%u, offset=%llu\n", + bit + i * (u32)sizeof(chunk) * 8, a->reports, offset); + int rv = limexRunAccept((const char *)limex, a, callback, context, + offset); if (unlikely(rv == MO_HALT_MATCHING)) { return 1; } - if (a->squash != MO_INVALID_IDX) { - assert(a->squash < limex->squashCount); - const STATE_T *sq = &squashMasks[a->squash]; - DEBUG_PRINTF("squash mask %u @ %p\n", a->squash, sq); - squash = AND_STATE(squash, LOAD_STATE(sq)); + if (squash != NULL && a->squash != MO_INVALID_IDX) { + DEBUG_PRINTF("applying squash mask at offset %u\n", a->squash); + const ENG_STATE_T *sq = + (const ENG_STATE_T *)((const char *)limex + a->squash); + *squash = AND_STATE(*squash, LOAD_FROM_ENG(sq)); } } + base_index += POPCOUNT_FN(mask_chunks[i]); } - STORE_STATE(s, AND_STATE(LOAD_STATE(s), squash)); return 0; } static never_inline -char PROCESS_ACCEPTS_NOSQUASH_FN(const STATE_T *s, - const struct NFAAccept *acceptTable, - u32 acceptCount, u64a offset, - NfaCallback callback, void *context) { - assert(s); - assert(callback); - assert(acceptCount); +char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, + const STATE_T *acceptMask, + const struct NFAAccept *acceptTable, u64a offset, + NfaCallback callback, void *context) { + // We have squash masks we might have to apply after firing reports. + STATE_T squash = ONES_STATE; + return PROCESS_ACCEPTS_IMPL_FN(limex, s, &squash, acceptMask, acceptTable, + offset, callback, context); - for (u32 i = 0; i < acceptCount; i++) { - const struct NFAAccept *a = &acceptTable[i]; - if (TESTBIT_STATE(s, a->state)) { - DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n", - a->state, a->externalId, offset); - int rv = callback(0, offset, a->externalId, context); - if (unlikely(rv == MO_HALT_MATCHING)) { - return 1; - } - } - } - return 0; + *s = AND_STATE(*s, squash); } -// Run EOD accepts. +static never_inline +char PROCESS_ACCEPTS_NOSQUASH_FN(const IMPL_NFA_T *limex, const STATE_T *s, + const STATE_T *acceptMask, + const struct NFAAccept *acceptTable, + u64a offset, NfaCallback callback, + void *context) { + STATE_T *squash = NULL; + return PROCESS_ACCEPTS_IMPL_FN(limex, s, squash, acceptMask, acceptTable, + offset, callback, context); +} + +// Run EOD accepts. Note that repeat_ctrl and repeat_state may be NULL if this +// LimEx contains no repeat structures. static really_inline char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s, const union RepeatControl *repeat_ctrl, - const char *repeat_state, u64a offset, char do_br, + const char *repeat_state, u64a offset, NfaCallback callback, void *context) { assert(limex && s); @@ -172,47 +200,16 @@ char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s, return MO_CONTINUE_MATCHING; } - const STATE_T acceptEodMask = LOAD_STATE(&limex->acceptAtEOD); - STATE_T foundAccepts = AND_STATE(LOAD_STATE(s), acceptEodMask); + const STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD); + STATE_T foundAccepts = AND_STATE(*s, acceptEodMask); - if (do_br) { - SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, - offset + 1 /* EOD 'symbol' */, &foundAccepts); - } else { - assert(!limex->repeatCount); - } + SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, + offset + 1 /* EOD 'symbol' */, &foundAccepts); if (unlikely(ISNONZERO_STATE(foundAccepts))) { const struct NFAAccept *acceptEodTable = getAcceptEodTable(limex); - if (PROCESS_ACCEPTS_NOSQUASH_FN(&foundAccepts, acceptEodTable, - limex->acceptEodCount, offset, callback, - context)) { - return MO_HALT_MATCHING; - } - } - - return MO_CONTINUE_MATCHING; -} - -static really_inline -char TESTEOD_REV_FN(const IMPL_NFA_T *limex, const STATE_T *s, u64a offset, - NfaCallback callback, void *context) { - assert(limex && s); - - // There may not be any EOD accepts in this NFA. - if (!limex->acceptEodCount) { - return MO_CONTINUE_MATCHING; - } - - STATE_T acceptEodMask = LOAD_STATE(&limex->acceptAtEOD); - STATE_T foundAccepts = AND_STATE(LOAD_STATE(s), acceptEodMask); - - assert(!limex->repeatCount); - - if (unlikely(ISNONZERO_STATE(foundAccepts))) { - const struct NFAAccept *acceptEodTable = getAcceptEodTable(limex); - if (PROCESS_ACCEPTS_NOSQUASH_FN(&foundAccepts, acceptEodTable, - limex->acceptEodCount, offset, callback, + if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptEodMask, + acceptEodTable, offset, callback, context)) { return MO_HALT_MATCHING; } @@ -228,8 +225,8 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { assert(q->state); assert(q_cur_type(q) == MQE_START); - STATE_T s = LOAD_STATE(q->state); - STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T s = *(STATE_T *)q->state; + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { @@ -238,8 +235,8 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { const struct NFAAccept *acceptTable = getAcceptTable(limex); u64a offset = q_cur_offset(q); - if (PROCESS_ACCEPTS_NOSQUASH_FN(&foundAccepts, acceptTable, - limex->acceptCount, offset, q->cb, + if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptMask, + acceptTable, offset, q->cb, q->context)) { return MO_HALT_MATCHING; } @@ -250,7 +247,7 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { static really_inline STATE_T INITIAL_FN(const IMPL_NFA_T *impl, char onlyDs) { - return LOAD_STATE(onlyDs ? &impl->initDS : &impl->init); + return LOAD_FROM_ENG(onlyDs ? &impl->initDS : &impl->init); } static really_inline @@ -261,9 +258,9 @@ STATE_T TOP_FN(const IMPL_NFA_T *impl, char onlyDs, STATE_T state) { static really_inline STATE_T TOPN_FN(const IMPL_NFA_T *limex, STATE_T state, u32 n) { assert(n < limex->topCount); - const STATE_T *topsptr = - (const STATE_T *)((const char *)limex + limex->topOffset); - STATE_T top = LOAD_STATE(&topsptr[n]); + const ENG_STATE_T *topsptr = + (const ENG_STATE_T *)((const char *)limex + limex->topOffset); + STATE_T top = LOAD_FROM_ENG(&topsptr[n]); return OR_STATE(top, state); } @@ -279,8 +276,8 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, DEBUG_PRINTF("expire estate at offset %llu\n", offset); - const STATE_T cyclics = - AND_STATE(LOAD_STATE(&ctx->s), LOAD_STATE(&limex->repeatCyclicMask)); + const STATE_T cyclics + = AND_STATE(ctx->s, LOAD_FROM_ENG(&limex->repeatCyclicMask)); if (ISZERO_STATE(cyclics)) { DEBUG_PRINTF("no cyclic states are on\n"); return; @@ -290,7 +287,7 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); u32 cyclicState = info->cyclicState; - if (!TESTBIT_STATE(&cyclics, cyclicState)) { + if (!TESTBIT_STATE(cyclics, cyclicState)) { continue; } @@ -310,14 +307,14 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, last_top, repeat->repeatMax); u64a adj = 0; /* if the cycle's tugs are active at repeat max, it is still alive */ - if (TESTBIT_STATE((const STATE_T *)&limex->accept, cyclicState) || - TESTBIT_STATE((const STATE_T *)&limex->acceptAtEOD, cyclicState)) { + if (TESTBIT_STATE(LOAD_FROM_ENG(&limex->accept), cyclicState) || + TESTBIT_STATE(LOAD_FROM_ENG(&limex->acceptAtEOD), cyclicState)) { DEBUG_PRINTF("lazy tug possible - may still be inspected\n"); adj = 1; } else { - const STATE_T *tug_mask = - (const STATE_T *)((const char *)info + info->tugMaskOffset); - if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_STATE(tug_mask)))) { + const ENG_STATE_T *tug_mask = + (const ENG_STATE_T *)((const char *)info + info->tugMaskOffset); + if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_FROM_ENG(tug_mask)))) { DEBUG_PRINTF("tug possible - may still be inspected\n"); adj = 1; } @@ -339,37 +336,45 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, u64a offset, ReportID report) { assert(limex); - const STATE_T acceptMask = LOAD_STATE(&limex->accept); - STATE_T accstate = AND_STATE(state, acceptMask); + const STATE_T accept_mask = LOAD_FROM_ENG(&limex->accept); + STATE_T accepts = AND_STATE(state, accept_mask); // Are we in an accept state? - if (ISZERO_STATE(accstate)) { + if (ISZERO_STATE(accepts)) { DEBUG_PRINTF("no accept states are on\n"); return 0; } - SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accstate); + SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accepts); DEBUG_PRINTF("looking for report %u\n", report); -#ifdef DEBUG - DEBUG_PRINTF("accept states that are on: "); - for (u32 i = 0; i < sizeof(STATE_T) * 8; i++) { - if (TESTBIT_STATE(&accstate, i)) printf("%u ", i); - } - printf("\n"); -#endif - - // Does one of our states match the given report ID? const struct NFAAccept *acceptTable = getAcceptTable(limex); - for (u32 i = 0; i < limex->acceptCount; i++) { - const struct NFAAccept *a = &acceptTable[i]; - DEBUG_PRINTF("checking idx=%u, externalId=%u\n", a->state, - a->externalId); - if (a->externalId == report && TESTBIT_STATE(&accstate, a->state)) { - DEBUG_PRINTF("report is on!\n"); - return 1; + + CHUNK_T chunks[NUM_STATE_CHUNKS]; + memcpy(chunks, &accepts, sizeof(accepts)); + + CHUNK_T mask_chunks[NUM_STATE_CHUNKS]; + memcpy(mask_chunks, &accept_mask, sizeof(accept_mask)); + + u32 base_index = 0; // Cumulative sum of mask popcount up to current chunk. + for (u32 i = 0; i < NUM_STATE_CHUNKS; i++) { + CHUNK_T chunk = chunks[i]; + while (chunk != 0) { + u32 bit = FIND_AND_CLEAR_FN(&chunk); + u32 local_idx = RANK_IN_MASK_FN(mask_chunks[i], bit); + u32 idx = local_idx + base_index; + assert(idx < limex->acceptCount); + const struct NFAAccept *a = &acceptTable[idx]; + DEBUG_PRINTF("state %u is on, report list at %u\n", + bit + i * (u32)sizeof(chunk) * 8, a->reports); + + if (limexAcceptHasReport((const char *)limex, a, report)) { + DEBUG_PRINTF("report %u is on\n", report); + return 1; + } } + base_index += POPCOUNT_FN(mask_chunks[i]); } return 0; @@ -381,7 +386,7 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, u64a offset) { assert(limex); - const STATE_T acceptMask = LOAD_STATE(&limex->accept); + const STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); STATE_T accstate = AND_STATE(state, acceptMask); // Are we in an accept state? @@ -396,7 +401,6 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, } #undef TESTEOD_FN -#undef TESTEOD_REV_FN #undef REPORTCURRENT_FN #undef EXPIRE_ESTATE_FN #undef LIMEX_INACCEPT_FN @@ -407,8 +411,6 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, #undef CONTEXT_T #undef IMPL_NFA_T #undef ONES_STATE -#undef LOAD_STATE -#undef STORE_STATE #undef AND_STATE #undef OR_STATE #undef ANDNOT_STATE @@ -416,11 +418,14 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, #undef TESTBIT_STATE #undef ISNONZERO_STATE #undef ISZERO_STATE +#undef PROCESS_ACCEPTS_IMPL_FN #undef PROCESS_ACCEPTS_FN #undef PROCESS_ACCEPTS_NOSQUASH_FN #undef SQUASH_UNTUG_BR_FN #undef GET_NFA_REPEAT_INFO_FN -#undef SIZE -#undef STATE_T -#undef INLINE_ATTR +#undef CHUNK_T +#undef FIND_AND_CLEAR_FN +#undef POPCOUNT_FN +#undef RANK_IN_MASK_FN +#undef NUM_STATE_CHUNKS diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 77754e0b..ba4d0f0d 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -37,10 +37,10 @@ #include "limex_internal.h" #include "limex_limits.h" #include "nfa_build_util.h" +#include "nfagraph/ng_dominators.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_limex_accel.h" #include "nfagraph/ng_repeat.h" -#include "nfagraph/ng_restructuring.h" #include "nfagraph/ng_squash.h" #include "nfagraph/ng_util.h" #include "ue2common.h" @@ -64,12 +64,21 @@ #include #include #include + #include +#include using namespace std; +using boost::adaptors::map_values; namespace ue2 { +/** + * \brief Special state index value meaning that the vertex will not + * participate in an (NFA/DFA/etc) implementation. + */ +static constexpr u32 NO_STATE = ~0; + namespace { struct precalcAccel { @@ -87,7 +96,7 @@ struct precalcAccel { struct limex_accel_info { ue2::unordered_set accelerable; map precalc; - ue2::unordered_map > friends; + ue2::unordered_map> friends; ue2::unordered_map accel_map; }; @@ -130,7 +139,7 @@ struct build_info { const vector &ri, const map &rsmi, const map &smi, - const map &ti, const set &zi, + const map> &ti, const set &zi, bool dai, bool sci, const CompileContext &cci, u32 nsi) : h(hi), state_ids(states_in), repeats(ri), tops(ti), zombies(zi), @@ -156,7 +165,7 @@ struct build_info { map reportSquashMap; map squashMap; - const map &tops; + const map> &tops; ue2::unordered_set tugs; map br_cyclic; const set &zombies; @@ -485,7 +494,7 @@ void nfaFindAccelSchemes(const NGHolder &g, // We want to skip any vertices that don't lead to at least one other // (self-loops don't count) vertex. if (!has_proper_successor(v, g)) { - DEBUG_PRINTF("skipping vertex %u\n", g[v].index); + DEBUG_PRINTF("skipping vertex %zu\n", g[v].index); continue; } @@ -493,7 +502,7 @@ void nfaFindAccelSchemes(const NGHolder &g, AccelScheme as; if (nfaCheckAccel(g, v, refined_cr, br_cyclic, &as, allow_wide)) { - DEBUG_PRINTF("graph vertex %u is accelerable with offset %u.\n", + DEBUG_PRINTF("graph vertex %zu is accelerable with offset %u.\n", g[v].index, as.offset); (*out)[v] = as; } @@ -505,7 +514,7 @@ struct fas_visitor : public boost::default_bfs_visitor { ue2::unordered_map *out_in) : accel_map(am_in), out(out_in) {} - void discover_vertex(NFAVertex v, const NFAGraph &) { + void discover_vertex(NFAVertex v, const NGHolder &) { if (accel_map.find(v) != accel_map.end()) { (*out)[v] = accel_map.find(v)->second; } @@ -518,36 +527,40 @@ struct fas_visitor : public boost::default_bfs_visitor { }; static -void filterAccelStates(NGHolder &g, const map &tops, +void filterAccelStates(NGHolder &g, const map> &tops, ue2::unordered_map *accel_map) { /* We want the NFA_MAX_ACCEL_STATES best acceleration states, everything * else should be ditched. We use a simple BFS to choose accel states near * the start. */ - // Temporarily wire start to each top for the BFS. - vector topEdges; - wireStartToTops(g, tops, topEdges); + vector tempEdges; + for (const auto &vv : tops | map_values) { + for (NFAVertex v : vv) { + if (!edge(g.start, v, g).second) { + tempEdges.push_back(add_edge(g.start, v, g).first); + } + } + } // Similarly, connect (start, startDs) if necessary. if (!edge(g.start, g.startDs, g).second) { - auto e = add_edge(g.start, g.startDs, g).first; - topEdges.push_back(e); // Remove edge later. + NFAEdge e = add_edge(g.start, g.startDs, g); + tempEdges.push_back(e); // Remove edge later. } ue2::unordered_map out; try { vector colour(num_vertices(g)); - breadth_first_search( - g.g, g.start, + boost::breadth_first_search(g, g.start, visitor(fas_visitor(*accel_map, &out)) - .color_map(make_iterator_property_map( - colour.begin(), get(&NFAGraphVertexProps::index, g.g)))); + .color_map(make_iterator_property_map(colour.begin(), + get(vertex_index, g)))); } catch (fas_visitor *) { ; /* found max accel_states */ } - remove_edges(topEdges, g); + remove_edges(tempEdges, g); assert(out.size() <= NFA_MAX_ACCEL_STATES); accel_map->swap(out); @@ -614,7 +627,7 @@ void fillAccelInfo(build_info &bi) { /* for each subset of the accel keys need to find an accel scheme */ assert(astates.size() < 32); - sort(astates.begin(), astates.end(), make_index_ordering(g)); + sort(astates.begin(), astates.end()); for (u32 i = 1, i_end = 1U << astates.size(); i < i_end; i++) { DEBUG_PRINTF("saving info for accel %u\n", i); @@ -701,9 +714,157 @@ void fillAccelInfo(build_info &bi) { /** The AccelAux structure has large alignment specified, and this makes some * compilers do odd things unless we specify a custom allocator. */ -typedef vector > +typedef vector> AccelAuxVector; +#define IMPOSSIBLE_ACCEL_MASK (~0U) + +static +u32 getEffectiveAccelStates(const build_info &args, + u32 active_accel_mask, + const vector &accelStates) { + /* accelStates is indexed by the acceleration bit index and contains a + * reference to the original vertex & state_id */ + + /* Cases to consider: + * + * 1: Accel states a and b are on and b can squash a + * --> we can ignore a. This will result in a no longer being accurately + * modelled - we may miss escapes turning it off and we may also miss + * its successors being activated. + * + * 2: Accel state b is on but accel state a is off and a is .* and must be + * seen before b is reached (and would not be covered by (1)) + * --> if a is squashable (or may die unexpectedly) we should continue + * as is + * --> if a is not squashable we can treat this as a+b or as a no accel, + * impossible case + * --> this case could be extended to handle non dot reaches by + * effectively creating something similar to squash masks for the + * reverse graph + * + * + * Other cases: + * + * 3: Accel states a and b are on but have incompatible reaches + * --> we should treat this as an impossible case. Actually, this case + * is unlikely to arise as we pick states with wide reaches to + * accelerate so an empty intersection is unlikely. + * + * Note: we need to be careful when dealing with accel states corresponding + * to bounded repeat cyclics - they may 'turn off' based on a max bound and + * so we may still require on earlier states to be accurately modelled. + */ + const NGHolder &h = args.h; + auto dom_map = findDominators(h); + + /* map from accel_id to mask of accel_ids that it is dominated by */ + vector dominated_by(accelStates.size()); + + map accel_id_map; + for (u32 accel_id = 0; accel_id < accelStates.size(); accel_id++) { + NFAVertex v = accelStates[accel_id].v; + accel_id_map[v] = accel_id; + } + + /* Note: we want a slightly less strict defn of dominate as skip edges + * prevent .* 'truly' dominating */ + for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { + u32 accel_id = findAndClearLSB_32(&local_accel_mask); + assert(accel_id < accelStates.size()); + NFAVertex v = accelStates[accel_id].v; + while (dom_map[v]) { + v = dom_map[v]; + if (contains(accel_id_map, v)) { + dominated_by[accel_id] |= 1U << accel_id_map[v]; + } + /* TODO: could also look at inv_adj vertices to handle fan-in */ + for (NFAVertex a : adjacent_vertices_range(v, h)) { + if (a == v || !contains(accel_id_map, a) + || a == accelStates[accel_id].v /* not likely */) { + continue; + } + if (!is_subset_of(h[v].reports, h[a].reports)) { + continue; + } + auto v_succ = succs(v, h); + auto a_succ = succs(a, h); + if (is_subset_of(v_succ, a_succ)) { + dominated_by[accel_id] |= 1U << accel_id_map[a]; + } + } + } + } + + u32 may_turn_off = 0; /* BR with max bound, non-dots, squashed, etc */ + for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { + u32 accel_id = findAndClearLSB_32(&local_accel_mask); + NFAVertex v = accelStates[accel_id].v; + u32 state_id = accelStates[accel_id].state; + assert(contains(args.accel.accelerable, v)); + if (!h[v].char_reach.all()) { + may_turn_off |= 1U << accel_id; + continue; + } + if (contains(args.br_cyclic, v) + && args.br_cyclic.at(v).repeatMax != depth::infinity()) { + may_turn_off |= 1U << accel_id; + continue; + } + for (const auto &s_mask : args.squashMap | map_values) { + if (!s_mask.test(state_id)) { + may_turn_off |= 1U << accel_id; + break; + } + } + for (const auto &s_mask : args.reportSquashMap | map_values) { + if (!s_mask.test(state_id)) { + may_turn_off |= 1U << accel_id; + break; + } + } + } + + /* Case 1: */ + u32 ignored = 0; + for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { + u32 accel_id_b = findAndClearLSB_32(&local_accel_mask); + NFAVertex v = accelStates[accel_id_b].v; + if (!contains(args.squashMap, v)) { + continue; + } + assert(!contains(args.br_cyclic, v) + || args.br_cyclic.at(v).repeatMax == depth::infinity()); + NFAStateSet squashed = args.squashMap.at(v); + squashed.flip(); /* default sense for mask of survivors */ + + for (u32 local_accel_mask2 = active_accel_mask; local_accel_mask2; ) { + u32 accel_id_a = findAndClearLSB_32(&local_accel_mask2); + if (squashed.test(accelStates[accel_id_a].state)) { + ignored |= 1U << accel_id_a; + } + } + } + + /* Case 2: */ + for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { + u32 accel_id = findAndClearLSB_32(&local_accel_mask); + + u32 stuck_dominators = dominated_by[accel_id] & ~may_turn_off; + if ((stuck_dominators & active_accel_mask) != stuck_dominators) { + DEBUG_PRINTF("only %08x on, but we require %08x\n", + active_accel_mask, stuck_dominators); + return IMPOSSIBLE_ACCEL_MASK; + } + } + + if (ignored) { + DEBUG_PRINTF("in %08x, ignoring %08x\n", active_accel_mask, ignored); + } + + return active_accel_mask & ~ignored; +} + static void buildAccel(const build_info &args, NFAStateSet &accelMask, NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec, @@ -735,11 +896,22 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, // Set up a unioned AccelBuild for every possible combination of the set // bits in accelStates. vector accelOuts(accelCount); + vector effective_accel_set; + effective_accel_set.push_back(0); /* empty is effectively empty */ + for (u32 i = 1; i < accelCount; i++) { - for (u32 j = 0, j_end = accelStates.size(); j < j_end; j++) { - if (i & (1U << j)) { - combineAccel(accelStates[j], accelOuts[i]); - } + u32 effective_i = getEffectiveAccelStates(args, i, accelStates); + effective_accel_set.push_back(effective_i); + + if (effective_i == IMPOSSIBLE_ACCEL_MASK) { + DEBUG_PRINTF("this combination of accel states is not possible\n"); + accelOuts[i].stop1 = CharReach::dot(); + continue; + } + + while (effective_i) { + u32 base_accel_state = findAndClearLSB_32(&effective_i); + combineAccel(accelStates[base_accel_state], accelOuts[i]); } minimiseAccel(accelOuts[i]); } @@ -759,29 +931,32 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, for (u32 i = 1; i < accelCount; i++) { memset(&aux, 0, sizeof(aux)); - NFAStateSet states(args.num_states); - for (u32 j = 0; j < accelStates.size(); j++) { - if (i & (1U << j)) { - states.set(accelStates[j].state); - } - } + NFAStateSet effective_states(args.num_states); + u32 effective_i = effective_accel_set[i]; AccelInfo ainfo; ainfo.double_offset = accelOuts[i].offset; ainfo.double_stop1 = accelOuts[i].stop1; ainfo.double_stop2 = accelOuts[i].stop2; - if (contains(accel.precalc, states)) { - const precalcAccel &precalc = accel.precalc.at(states); - if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) { - ainfo.ma_len1 = precalc.ma_info.len1; - ainfo.ma_len2 = precalc.ma_info.len2; - ainfo.multiaccel_offset = precalc.ma_info.offset; - ainfo.multiaccel_stops = precalc.ma_info.cr; - ainfo.ma_type = precalc.ma_info.type; - } else { - ainfo.single_offset = precalc.single_offset; - ainfo.single_stops = precalc.single_cr; + if (effective_i != IMPOSSIBLE_ACCEL_MASK) { + while (effective_i) { + u32 base_accel_id = findAndClearLSB_32(&effective_i); + effective_states.set(accelStates[base_accel_id].state); + } + + if (contains(accel.precalc, effective_states)) { + const auto &precalc = accel.precalc.at(effective_states); + if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) { + ainfo.ma_len1 = precalc.ma_info.len1; + ainfo.ma_len2 = precalc.ma_info.len2; + ainfo.multiaccel_offset = precalc.ma_info.offset; + ainfo.multiaccel_stops = precalc.ma_info.cr; + ainfo.ma_type = precalc.ma_info.type; + } else { + ainfo.single_offset = precalc.single_offset; + ainfo.single_stops = precalc.single_cr; + } } } @@ -824,14 +999,105 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, } static -void buildAccepts(const build_info &args, NFAStateSet &acceptMask, - NFAStateSet &acceptEodMask, vector &accepts, - vector &acceptsEod, vector &squash) { +u32 addSquashMask(const build_info &args, const NFAVertex &v, + vector &squash) { + auto sit = args.reportSquashMap.find(v); + if (sit == args.reportSquashMap.end()) { + return MO_INVALID_IDX; + } + + // This state has a squash mask. Paw through the existing vector to + // see if we've already seen it, otherwise add a new one. + auto it = find(squash.begin(), squash.end(), sit->second); + if (it != squash.end()) { + return verify_u32(distance(squash.begin(), it)); + } + u32 idx = verify_u32(squash.size()); + squash.push_back(sit->second); + return idx; +} + +static +u32 addReports(const flat_set &r, vector &reports, + unordered_map, u32> &reportListCache) { + assert(!r.empty()); + + vector my_reports(begin(r), end(r)); + my_reports.push_back(MO_INVALID_IDX); // sentinel + + auto cache_it = reportListCache.find(my_reports); + if (cache_it != end(reportListCache)) { + u32 offset = cache_it->second; + DEBUG_PRINTF("reusing cached report list at %u\n", offset); + return offset; + } + + auto it = search(begin(reports), end(reports), begin(my_reports), + end(my_reports)); + if (it != end(reports)) { + u32 offset = verify_u32(distance(begin(reports), it)); + DEBUG_PRINTF("reusing found report list at %u\n", offset); + return offset; + } + + u32 offset = verify_u32(reports.size()); + insert(&reports, reports.end(), my_reports); + reportListCache.emplace(move(my_reports), offset); + return offset; +} + +static +void buildAcceptsList(const build_info &args, + unordered_map, u32> &reports_cache, + vector &verts, vector &accepts, + vector &reports, vector &squash) { + if (verts.empty()) { + return; + } + + DEBUG_PRINTF("building accept lists for %zu states\n", verts.size()); + + auto cmp_state_id = [&args](NFAVertex a, NFAVertex b) { + u32 a_state = args.state_ids.at(a); + u32 b_state = args.state_ids.at(b); + assert(a_state != b_state || a == b); + return a_state < b_state; + }; + + sort(begin(verts), end(verts), cmp_state_id); + + const NGHolder &h = args.h; + for (const auto &v : verts) { + DEBUG_PRINTF("state=%u, reports: [%s]\n", args.state_ids.at(v), + as_string_list(h[v].reports).c_str()); + NFAAccept a; + memset(&a, 0, sizeof(a)); + assert(!h[v].reports.empty()); + if (h[v].reports.size() == 1) { + a.single_report = 1; + a.reports = *h[v].reports.begin(); + } else { + a.single_report = 0; + a.reports = addReports(h[v].reports, reports, reports_cache); + } + a.squash = addSquashMask(args, v, squash); + accepts.push_back(move(a)); + } +} + +static +void buildAccepts(const build_info &args, + unordered_map, u32> &reports_cache, + NFAStateSet &acceptMask, NFAStateSet &acceptEodMask, + vector &accepts, vector &acceptsEod, + vector &reports, vector &squash) { const NGHolder &h = args.h; acceptMask.resize(args.num_states); acceptEodMask.resize(args.num_states); + vector verts_accept, verts_accept_eod; + for (auto v : vertices_range(h)) { u32 state_id = args.state_ids.at(v); @@ -839,41 +1105,20 @@ void buildAccepts(const build_info &args, NFAStateSet &acceptMask, continue; } - u32 squashMaskOffset = MO_INVALID_IDX; - auto sit = args.reportSquashMap.find(v); - if (sit != args.reportSquashMap.end()) { - // This state has a squash mask. Paw through the existing vector to - // see if we've already seen it, otherwise add a new one. - auto it = find(squash.begin(), squash.end(), sit->second); - if (it != squash.end()) { - squashMaskOffset = verify_u32(distance(squash.begin(), it)); - } else { - squashMaskOffset = verify_u32(squash.size()); - squash.push_back(sit->second); - } - } - - // Add an accept (or acceptEod) per report ID. - - vector *accepts_out; if (edge(v, h.accept, h).second) { acceptMask.set(state_id); - accepts_out = &accepts; + verts_accept.push_back(v); } else { assert(edge(v, h.acceptEod, h).second); acceptEodMask.set(state_id); - accepts_out = &acceptsEod; - } - - for (auto report : h[v].reports) { - accepts_out->push_back(NFAAccept()); - NFAAccept &a = accepts_out->back(); - a.state = state_id; - a.externalId = report; - a.squash = squashMaskOffset; - DEBUG_PRINTF("Accept: state=%u, externalId=%u\n", state_id, report); + verts_accept_eod.push_back(v); } } + + buildAcceptsList(args, reports_cache, verts_accept, accepts, reports, + squash); + buildAcceptsList(args, reports_cache, verts_accept_eod, acceptsEod, reports, + squash); } static @@ -884,19 +1129,20 @@ void buildTopMasks(const build_info &args, vector &topMasks) { u32 numMasks = args.tops.rbegin()->first + 1; // max mask index DEBUG_PRINTF("we have %u top masks\n", numMasks); - assert(numMasks <= NFA_MAX_TOP_MASKS); topMasks.assign(numMasks, NFAStateSet(args.num_states)); // all zeroes for (const auto &m : args.tops) { u32 mask_idx = m.first; - u32 state_id = args.state_ids.at(m.second); - DEBUG_PRINTF("state %u is in top mask %u\n", state_id, mask_idx); + for (NFAVertex v : m.second) { + u32 state_id = args.state_ids.at(v); + DEBUG_PRINTF("state %u is in top mask %u\n", state_id, mask_idx); - assert(mask_idx < numMasks); - assert(state_id != NO_STATE); + assert(mask_idx < numMasks); + assert(state_id != NO_STATE); - topMasks[mask_idx].set(state_id); + topMasks[mask_idx].set(state_id); + } } } @@ -1146,36 +1392,12 @@ struct ExceptionProto { } }; -static -u32 getReportListIndex(const flat_set &reports, - vector &exceptionReports, - map, u32> &reportListCache) { - if (reports.empty()) { - return MO_INVALID_IDX; - } - - const vector r(reports.begin(), reports.end()); - - auto it = reportListCache.find(r); - if (it != reportListCache.end()) { - u32 idx = it->second; - assert(idx < exceptionReports.size()); - assert(equal(r.begin(), r.end(), exceptionReports.begin() + idx)); - return idx; - } - - u32 idx = verify_u32(exceptionReports.size()); - reportListCache[r] = idx; - exceptionReports.insert(exceptionReports.end(), r.begin(), r.end()); - exceptionReports.push_back(MO_INVALID_IDX); // terminator - return idx; -} - static u32 buildExceptionMap(const build_info &args, + unordered_map, u32> &reports_cache, const ue2::unordered_set &exceptional, - map > &exceptionMap, - vector &exceptionReports) { + map> &exceptionMap, + vector &reportList) { const NGHolder &h = args.h; const u32 num_states = args.num_states; u32 exceptionCount = 0; @@ -1193,10 +1415,6 @@ u32 buildExceptionMap(const build_info &args, } } - // We track report lists that have already been written into the global - // list in case we can reuse them. - map, u32> reportListCache; - for (auto v : vertices_range(h)) { const u32 i = args.state_ids.at(v); @@ -1215,8 +1433,12 @@ u32 buildExceptionMap(const build_info &args, DEBUG_PRINTF("state %u is exceptional due to accept " "(%zu reports)\n", i, reports.size()); - e.reports_index = - getReportListIndex(reports, exceptionReports, reportListCache); + if (reports.empty()) { + e.reports_index = MO_INVALID_IDX; + } else { + e.reports_index = + addReports(reports, reportList, reports_cache); + } // We may be applying a report squash too. auto mi = args.reportSquashMap.find(v); @@ -1438,7 +1660,8 @@ struct Factory { sizeof(limex->init), stateSize, repeatscratchStateSize, repeatStreamState); - size_t scratchStateSize = sizeof(limex->init); + size_t scratchStateSize = NFATraits::scratch_state_size; + if (repeatscratchStateSize) { scratchStateSize = ROUNDUP_N(scratchStateSize, alignof(RepeatControl)); @@ -1641,9 +1864,10 @@ struct Factory { } static - void writeExceptions(const map > &exceptionMap, - const vector &repeatOffsets, - implNFA_t *limex, const u32 exceptionsOffset) { + void writeExceptions(const map> &exceptionMap, + const vector &repeatOffsets, implNFA_t *limex, + const u32 exceptionsOffset, + const u32 reportListOffset) { DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset); exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset); @@ -1670,7 +1894,12 @@ struct Factory { exception_t &e = etable[ecount]; maskSetBits(e.squash, proto.squash_states); maskSetBits(e.successors, proto.succ_states); - e.reports = proto.reports_index; + if (proto.reports_index == MO_INVALID_IDX) { + e.reports = MO_INVALID_IDX; + } else { + e.reports = reportListOffset + + proto.reports_index * sizeof(ReportID); + } e.hasSquash = verify_u8(proto.squash); e.trigger = verify_u8(proto.trigger); u32 repeat_offset = proto.repeat_index == MO_INVALID_IDX @@ -1789,7 +2018,9 @@ struct Factory { const vector &acceptsEod, const vector &squash, implNFA_t *limex, const u32 acceptsOffset, const u32 acceptsEodOffset, - const u32 squashOffset) { + const u32 squashOffset, const u32 reportListOffset) { + char *limex_base = (char *)limex; + DEBUG_PRINTF("acceptsOffset=%u, acceptsEodOffset=%u, squashOffset=%u\n", acceptsOffset, acceptsEodOffset, squashOffset); @@ -1797,27 +2028,39 @@ struct Factory { maskSetBits(limex->accept, acceptMask); maskSetBits(limex->acceptAtEOD, acceptEodMask); + // Transforms the indices (report list, squash mask) into offsets + // relative to the base of the limex. + auto transform_offset_fn = [&](NFAAccept a) { + if (!a.single_report) { + a.reports = reportListOffset + a.reports * sizeof(ReportID); + } + a.squash = squashOffset + a.squash * sizeof(tableRow_t); + return a; + }; + // Write accept table. limex->acceptOffset = acceptsOffset; limex->acceptCount = verify_u32(accepts.size()); DEBUG_PRINTF("NFA has %zu accepts\n", accepts.size()); - NFAAccept *acceptsTable = (NFAAccept *)((char *)limex + acceptsOffset); + NFAAccept *acceptsTable = (NFAAccept *)(limex_base + acceptsOffset); assert(ISALIGNED(acceptsTable)); - copy(accepts.begin(), accepts.end(), acceptsTable); + transform(accepts.begin(), accepts.end(), acceptsTable, + transform_offset_fn); // Write eod accept table. limex->acceptEodOffset = acceptsEodOffset; limex->acceptEodCount = verify_u32(acceptsEod.size()); DEBUG_PRINTF("NFA has %zu EOD accepts\n", acceptsEod.size()); - NFAAccept *acceptsEodTable = (NFAAccept *)((char *)limex + acceptsEodOffset); + NFAAccept *acceptsEodTable = (NFAAccept *)(limex_base + acceptsEodOffset); assert(ISALIGNED(acceptsEodTable)); - copy(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable); + transform(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable, + transform_offset_fn); // Write squash mask table. limex->squashCount = verify_u32(squash.size()); limex->squashOffset = squashOffset; DEBUG_PRINTF("NFA has %zu report squash masks\n", squash.size()); - tableRow_t *mask = (tableRow_t *)((char *)limex + squashOffset); + tableRow_t *mask = (tableRow_t *)(limex_base + squashOffset); assert(ISALIGNED(mask)); for (size_t i = 0, end = squash.size(); i < end; i++) { maskSetBits(mask[i], squash[i]); @@ -1854,15 +2097,12 @@ struct Factory { } static - void writeExceptionReports(const vector &reports, - implNFA_t *limex, - const u32 exceptionReportsOffset) { - DEBUG_PRINTF("exceptionReportsOffset=%u\n", exceptionReportsOffset); - - limex->exReportOffset = exceptionReportsOffset; - assert(ISALIGNED_N((char *)limex + exceptionReportsOffset, + void writeReportList(const vector &reports, implNFA_t *limex, + const u32 reportListOffset) { + DEBUG_PRINTF("reportListOffset=%u\n", reportListOffset); + assert(ISALIGNED_N((char *)limex + reportListOffset, alignof(ReportID))); - copy_bytes((char *)limex + exceptionReportsOffset, reports); + copy_bytes((char *)limex + reportListOffset, reports); } static @@ -1881,16 +2121,21 @@ struct Factory { repeatSize += repeats[i].second; } + // We track report lists that have already been written into the global + // list in case we can reuse them. + unordered_map, u32> reports_cache; + ue2::unordered_set exceptional; u32 shiftCount = findBestNumOfVarShifts(args); assert(shiftCount); u32 maxShift = findMaxVarShift(args, shiftCount); findExceptionalTransitions(args, exceptional, maxShift); - map > exceptionMap; - vector exceptionReports; - u32 exceptionCount = buildExceptionMap(args, exceptional, exceptionMap, - exceptionReports); + map> exceptionMap; + vector reportList; + + u32 exceptionCount = buildExceptionMap(args, reports_cache, exceptional, + exceptionMap, reportList); assert(exceptionCount <= args.num_states); @@ -1907,8 +2152,8 @@ struct Factory { NFAStateSet acceptMask, acceptEodMask; vector accepts, acceptsEod; vector squash; - buildAccepts(args, acceptMask, acceptEodMask, accepts, acceptsEod, - squash); + buildAccepts(args, reports_cache, acceptMask, acceptEodMask, accepts, + acceptsEod, reportList, squash); // Build all our accel info. NFAStateSet accelMask, accelFriendsMask; @@ -1949,8 +2194,8 @@ struct Factory { const u32 exceptionsOffset = offset; offset += sizeof(exception_t) * exceptionCount; - const u32 exceptionReportsOffset = offset; - offset += sizeof(ReportID) * exceptionReports.size(); + const u32 reportListOffset = offset; + offset += sizeof(ReportID) * reportList.size(); const u32 repeatOffsetsOffset = offset; offset += sizeof(u32) * args.repeats.size(); @@ -1977,7 +2222,8 @@ struct Factory { limex, accelTableOffset, accelAuxOffset); writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash, - limex, acceptsOffset, acceptsEodOffset, squashOffset); + limex, acceptsOffset, acceptsEodOffset, squashOffset, + reportListOffset); limex->shiftCount = shiftCount; writeShiftMasks(args, limex); @@ -1985,14 +2231,15 @@ struct Factory { // Determine the state required for our state vector. findStateSize(args, limex); - writeExceptionReports(exceptionReports, limex, exceptionReportsOffset); + writeReportList(reportList, limex, reportListOffset); // Repeat structures and offset table. vector repeatOffsets; writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset, repeatsOffset); - writeExceptions(exceptionMap, repeatOffsets, limex, exceptionsOffset); + writeExceptions(exceptionMap, repeatOffsets, limex, exceptionsOffset, + reportListOffset); writeLimexMasks(args, limex); @@ -2021,13 +2268,6 @@ struct Factory { sz = 32; } - // Special case: with SIMD available, we definitely prefer using - // 128-bit NFAs over 64-bit ones given the paucity of registers - // available. - if (sz == 64) { - sz = 128; - } - if (args.cc.grey.nfaForceSize) { sz = args.cc.grey.nfaForceSize; } @@ -2067,9 +2307,12 @@ struct scoreNfa { typedef u_##mlt_size tableRow_t; \ typedef NFAException##mlt_size exception_t; \ static const size_t maxStates = mlt_size; \ + static const size_t scratch_state_size = mlt_size == 64 ? sizeof(m128) \ + : sizeof(tableRow_t); \ }; MAKE_LIMEX_TRAITS(32) +MAKE_LIMEX_TRAITS(64) MAKE_LIMEX_TRAITS(128) MAKE_LIMEX_TRAITS(256) MAKE_LIMEX_TRAITS(384) @@ -2080,19 +2323,18 @@ MAKE_LIMEX_TRAITS(512) #ifndef NDEBUG // Some sanity tests, called by an assertion in generate(). static UNUSED -bool isSane(const NGHolder &h, const map &tops, +bool isSane(const NGHolder &h, const map> &tops, const ue2::unordered_map &state_ids, u32 num_states) { ue2::unordered_set seen; ue2::unordered_set top_starts; - for (const auto &m : tops) { - top_starts.insert(m.second); + for (const auto &vv : tops | map_values) { + insert(&top_starts, vv); } for (auto v : vertices_range(h)) { if (!contains(state_ids, v)) { - DEBUG_PRINTF("no entry for vertex %u in state map\n", - h[v].index); + DEBUG_PRINTF("no entry for vertex %zu in state map\n", h[v].index); return false; } const u32 i = state_ids.at(v); @@ -2100,8 +2342,7 @@ bool isSane(const NGHolder &h, const map &tops, continue; } - DEBUG_PRINTF("checking vertex %u (state %u)\n", h[v].index, - i); + DEBUG_PRINTF("checking vertex %zu (state %u)\n", h[v].index, i); if (i >= num_states || contains(seen, i)) { DEBUG_PRINTF("vertex %u/%u has invalid state\n", i, num_states); @@ -2111,7 +2352,7 @@ bool isSane(const NGHolder &h, const map &tops, // All our states should be reachable and have a state assigned. if (h[v].char_reach.none()) { - DEBUG_PRINTF("vertex %u has empty reachability\n", h[v].index); + DEBUG_PRINTF("vertex %zu has empty reachability\n", h[v].index); return false; } @@ -2119,7 +2360,7 @@ bool isSane(const NGHolder &h, const map &tops, // must have at least one predecessor that is not itself. if (v != h.start && v != h.startDs && !contains(top_starts, v) && !proper_in_degree(v, h)) { - DEBUG_PRINTF("vertex %u has no pred\n", h[v].index); + DEBUG_PRINTF("vertex %zu has no pred\n", h[v].index); return false; } } @@ -2150,7 +2391,7 @@ aligned_unique_ptr generate(NGHolder &h, const vector &repeats, const map &reportSquashMap, const map &squashMap, - const map &tops, + const map> &tops, const set &zombies, bool do_accel, bool stateCompression, @@ -2222,7 +2463,7 @@ u32 countAccelStates(NGHolder &h, const vector &repeats, const map &reportSquashMap, const map &squashMap, - const map &tops, + const map> &tops, const set &zombies, const CompileContext &cc) { const u32 num_states = max_state(states) + 1; diff --git a/src/nfa/limex_compile.h b/src/nfa/limex_compile.h index 62a07e10..21cb7608 100644 --- a/src/nfa/limex_compile.h +++ b/src/nfa/limex_compile.h @@ -71,7 +71,7 @@ aligned_unique_ptr generate(NGHolder &g, const std::vector &repeats, const std::map &reportSquashMap, const std::map &squashMap, - const std::map &tops, + const std::map> &tops, const std::set &zombies, bool do_accel, bool stateCompression, @@ -89,7 +89,7 @@ u32 countAccelStates(NGHolder &h, const std::vector &repeats, const std::map &reportSquashMap, const std::map &squashMap, - const std::map &tops, + const std::map> &tops, const std::set &zombies, const CompileContext &cc); diff --git a/src/nfa/limex_context.h b/src/nfa/limex_context.h index 74f22c32..60d20879 100644 --- a/src/nfa/limex_context.h +++ b/src/nfa/limex_context.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,6 +39,16 @@ // Runtime context structures. +/* Note: The size of the context structures may vary from platform to platform + * (notably, for the Limex64 structure). As a result, information based on the + * size and other detail of these structures should not be written into the + * bytecode -- really, the details of the structure should not be accessed by + * the ue2 compile side at all. + */ +#ifdef __cplusplus +#error ue2 runtime only file +#endif + /* cached_estate/esucc etc... * * If the exception state matches the cached_estate we will apply @@ -66,6 +76,11 @@ struct ALIGN_CL_DIRECTIVE NFAContext##nsize { \ }; GEN_CONTEXT_STRUCT(32, u32) +#ifdef ARCH_64_BIT +GEN_CONTEXT_STRUCT(64, u64a) +#else +GEN_CONTEXT_STRUCT(64, m128) +#endif GEN_CONTEXT_STRUCT(128, m128) GEN_CONTEXT_STRUCT(256, m256) GEN_CONTEXT_STRUCT(384, m384) diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp index c52adc46..852639ea 100644 --- a/src/nfa/limex_dump.cpp +++ b/src/nfa/limex_dump.cpp @@ -35,9 +35,10 @@ #include "limex_internal.h" #include "nfa_dump_internal.h" #include "ue2common.h" +#include "util/charreach.h" #include "util/dump_charclass.h" #include "util/dump_mask.h" -#include "util/charreach.h" +#include "util/dump_util.h" #include #include @@ -70,6 +71,10 @@ template<> struct limex_traits { static const u32 size = 128; typedef NFAException128 exception_type; }; +template<> struct limex_traits { + static const u32 size = 64; + typedef NFAException64 exception_type; +}; template<> struct limex_traits { static const u32 size = 32; typedef NFAException32 exception_type; @@ -82,7 +87,7 @@ void dumpMask(FILE *f, const char *name, const u8 *mask, u32 mask_bits) { template static -u32 rank_in_mask(mask_t mask, u32 bit) { +u32 rank_in_mask(const mask_t &mask, u32 bit) { assert(bit < 8 * sizeof(mask)); u32 chunks[sizeof(mask)/sizeof(u32)]; @@ -176,26 +181,40 @@ void dumpAccel(const limex_type *limex, FILE *f) { } } +static +void dumpAcceptList(const char *limex_base, const struct NFAAccept *accepts, + u32 acceptCount, FILE *f) { + for (u32 i = 0; i < acceptCount; i++) { + const NFAAccept &a = accepts[i]; + if (a.single_report) { + fprintf(f, " idx %u fires single report %u\n", i, a.reports); + continue; + } + fprintf(f, " idx %u fires report list %u:", i, a.reports); + const ReportID *report = (const ReportID *)(limex_base + a.reports); + for (; *report != MO_INVALID_IDX; report++) { + fprintf(f, " %u", *report); + } + fprintf(f, "\n"); + } +} + template static void dumpAccepts(const limex_type *limex, FILE *f) { - u32 acceptCount = limex->acceptCount; - u32 acceptEodCount = limex->acceptEodCount; + const char *limex_base = (const char *)limex; + + const u32 acceptCount = limex->acceptCount; + const u32 acceptEodCount = limex->acceptEodCount; fprintf(f, "\n%u accepts.\n", acceptCount); - const struct NFAAccept *accepts - = (const struct NFAAccept *)((const char *)limex + limex->acceptOffset); - for (u32 i = 0; i < acceptCount; i++) { - fprintf(f, " state %u fires report %u\n", accepts[i].state, - accepts[i].externalId); - } + const auto *accepts = + (const struct NFAAccept *)(limex_base + limex->acceptOffset); + dumpAcceptList(limex_base, accepts, acceptCount, f); fprintf(f, "\n%u accepts at EOD.\n", acceptEodCount); - accepts = (const struct NFAAccept *)((const char *)limex - + limex->acceptEodOffset); - for (u32 i = 0; i < acceptEodCount; i++) { - fprintf(f, " state %u fires report %u\n", accepts[i].state, - accepts[i].externalId); - } + const auto *accepts_eod = + (const struct NFAAccept *)(limex_base + limex->acceptEodOffset); + dumpAcceptList(limex_base, accepts_eod, acceptEodCount, f); fprintf(f, "\n"); } @@ -222,20 +241,15 @@ getExceptionTable(const limex_type *limex) { ((const char *)limex + limex->exceptionOffset); } -template -static -const ReportID *getReportList(const limex_type *limex) { - return (const ReportID *)((const char *)limex + limex->exReportOffset); -} - template static void dumpLimexExceptions(const limex_type *limex, FILE *f) { const typename limex_traits::exception_type *e = getExceptionTable(limex); - const ReportID *reports = getReportList(limex); const u32 size = limex_traits::size; + const char *limex_base = (const char *)limex; + fprintf(f, "\n"); for (u32 i = 0; i < limex->exceptionCount; i++) { fprintf(f, "exception %u: hasSquash=%u, reports offset=%u\n", @@ -251,7 +265,7 @@ void dumpLimexExceptions(const limex_type *limex, FILE *f) { if (e[i].reports == MO_INVALID_IDX) { fprintf(f, " \n"); } else { - const ReportID *r = reports + e[i].reports; + const ReportID *r = (const ReportID *)(limex_base + e[i].reports); while (*r != MO_INVALID_IDX) { fprintf(f, " %u", *r++); } @@ -459,36 +473,32 @@ void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) { } } -#define DUMP_TEXT_FN(ddf_n) \ - void nfaExecLimEx##ddf_n##_dumpText(const NFA *nfa, FILE *f) { \ - dumpLimexText((const LimExNFA##ddf_n *)getImplNfa(nfa), f); \ - } - -#define DUMP_DOT_FN(ddf_n) \ - void nfaExecLimEx##ddf_n##_dumpDot(const NFA *nfa, FILE *f, \ - UNUSED const string &base) { \ - const LimExNFA##ddf_n *limex = \ - (const LimExNFA##ddf_n *)getImplNfa(nfa); \ +#define LIMEX_DUMP_FN(size) \ + void nfaExecLimEx##size##_dump(const NFA *nfa, const string &base) { \ + auto limex = (const LimExNFA##size *)getImplNfa(nfa); \ \ + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); \ + dumpLimexText(limex, f); \ + fclose(f); \ + \ + f = fopen_or_throw((base + ".dot").c_str(), "w"); \ dumpDotPreamble(f); \ u32 state_count = nfa->nPositions; \ dumpVertexDotInfo(limex, state_count, f, \ - limex_labeller(limex)); \ + limex_labeller(limex)); \ for (u32 i = 0; i < state_count; i++) { \ dumpLimDotInfo(limex, i, f); \ dumpExDotInfo(limex, i, f); \ } \ dumpDotTrailer(f); \ + fclose(f); \ } -#define LIMEX_DUMP_FNS(size) \ - DUMP_TEXT_FN(size) \ - DUMP_DOT_FN(size) - -LIMEX_DUMP_FNS(32) -LIMEX_DUMP_FNS(128) -LIMEX_DUMP_FNS(256) -LIMEX_DUMP_FNS(384) -LIMEX_DUMP_FNS(512) +LIMEX_DUMP_FN(32) +LIMEX_DUMP_FN(64) +LIMEX_DUMP_FN(128) +LIMEX_DUMP_FN(256) +LIMEX_DUMP_FN(384) +LIMEX_DUMP_FN(512) } // namespace ue2 diff --git a/src/nfa/limex_exceptional.h b/src/nfa/limex_exceptional.h index 175ca393..e770c327 100644 --- a/src/nfa/limex_exceptional.h +++ b/src/nfa/limex_exceptional.h @@ -32,8 +32,8 @@ * X-macro generic impl, included into the various LimEx model implementations. */ -#if !defined(SIZE) || !defined(STATE_T) -# error Must define SIZE and STATE_T in includer. +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. #endif #include "config.h" @@ -44,8 +44,6 @@ #define PE_FN JOIN(processExceptional, SIZE) #define RUN_EXCEPTION_FN JOIN(runException, SIZE) #define ZERO_STATE JOIN(zero_, STATE_T) -#define LOAD_STATE JOIN(load_, STATE_T) -#define STORE_STATE JOIN(store_, STATE_T) #define AND_STATE JOIN(and_, STATE_T) #define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b))) #define OR_STATE JOIN(or_, STATE_T) @@ -59,7 +57,7 @@ #define ESTATE_ARG STATE_T estate #else #define ESTATE_ARG const STATE_T *estatep -#define estate LOAD_STATE(estatep) +#define estate (*estatep) #endif #ifdef STATE_ON_STACK @@ -97,7 +95,6 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, STATE_T *local_succ, #endif const struct IMPL_NFA_T *limex, - const ReportID *exReports, u64a offset, struct CONTEXT_T *ctx, struct proto_cache *new_cache, @@ -133,7 +130,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, char *repeat_state = ctx->repeat_state + info->stateOffset; if (e->trigger == LIMEX_TRIGGER_POS) { - char cyclic_on = TESTBIT_STATE(STATE_ARG_P, info->cyclicState); + char cyclic_on = TESTBIT_STATE(*STATE_ARG_P, info->cyclicState); processPosTrigger(repeat, repeat_ctrl, repeat_state, offset, cyclic_on); *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; @@ -149,8 +146,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; DEBUG_PRINTF("stale history, squashing cyclic state\n"); assert(e->hasSquash == LIMEX_SQUASH_TUG); - STORE_STATE(succ, AND_STATE(LOAD_STATE(succ), - LOAD_STATE(&e->squash))); + *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); return 1; // continue } else if (rv == TRIGGER_SUCCESS_CACHE) { new_cache->br = 1; @@ -164,7 +160,8 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, // Some exceptions fire accepts. if (e->reports != MO_INVALID_IDX) { if (flags & CALLBACK_OUTPUT) { - const ReportID *reports = exReports + e->reports; + const ReportID *reports = + (const ReportID *)((const char *)limex + e->reports); if (unlikely(limexRunReports(reports, ctx->callback, ctx->context, offset) == MO_HALT_MATCHING)) { @@ -188,18 +185,16 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, // Most exceptions have a set of successors to switch on. `local_succ' is // ORed into `succ' at the end of the caller's loop. #ifndef BIG_MODEL - *local_succ = OR_STATE(*local_succ, LOAD_STATE(&e->successors)); + *local_succ = OR_STATE(*local_succ, LOAD_FROM_ENG(&e->successors)); #else - STORE_STATE(&ctx->local_succ, OR_STATE(LOAD_STATE(&ctx->local_succ), - LOAD_STATE(&e->successors))); + ctx->local_succ = OR_STATE(ctx->local_succ, LOAD_FROM_ENG(&e->successors)); #endif // Some exceptions squash states behind them. Note that we squash states in // 'succ', not local_succ. - if (e->hasSquash == LIMEX_SQUASH_CYCLIC || - e->hasSquash == LIMEX_SQUASH_REPORT) { - STORE_STATE(succ, AND_STATE(LOAD_STATE(succ), - LOAD_STATE(&e->squash))); + if (e->hasSquash == LIMEX_SQUASH_CYCLIC + || e->hasSquash == LIMEX_SQUASH_REPORT) { + *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); if (*cacheable == CACHE_RESULT) { *cacheable = DO_NOT_CACHE_RESULT; } @@ -215,13 +210,12 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, static really_inline int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, - const ReportID *exReports, u64a offset, struct CONTEXT_T *ctx, - char in_rev, char flags) { + u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) { assert(diffmask > 0); // guaranteed by caller macro - if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) { + if (EQ_STATE(estate, ctx->cached_estate)) { DEBUG_PRINTF("using cached succ from previous state\n"); - STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), LOAD_STATE(&ctx->cached_esucc))); + *succ = OR_STATE(*succ, ctx->cached_esucc); if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) { DEBUG_PRINTF("firing cached reports from previous state\n"); if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback, @@ -236,7 +230,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, #ifndef BIG_MODEL STATE_T local_succ = ZERO_STATE; #else - STORE_STATE(&ctx->local_succ, ZERO_STATE); + ctx->local_succ = ZERO_STATE; #endif // A copy of the estate as an array of GPR-sized chunks. @@ -254,7 +248,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; base_index[0] = 0; - for (u32 i = 0; i < ARRAY_LENGTH(base_index) - 1; i++) { + for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) { base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]); } @@ -276,31 +270,31 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, #ifndef BIG_MODEL &local_succ, #endif - limex, exReports, offset, ctx, &new_cache, - &cacheable, in_rev, flags)) { + limex, offset, ctx, &new_cache, &cacheable, + in_rev, flags)) { return PE_RV_HALT; } } while (word); } while (diffmask); #ifndef BIG_MODEL - STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), local_succ)); + *succ = OR_STATE(*succ, local_succ); #else - STORE_STATE(succ, OR_STATE(LOAD_STATE(succ), ctx->local_succ)); + *succ = OR_STATE(*succ, ctx->local_succ); #endif if (cacheable == CACHE_RESULT) { - STORE_STATE(&ctx->cached_estate, estate); + ctx->cached_estate = estate; #ifndef BIG_MODEL ctx->cached_esucc = local_succ; #else - STORE_STATE(&ctx->cached_esucc, LOAD_STATE(&ctx->local_succ)); + ctx->cached_esucc = ctx->local_succ; #endif ctx->cached_reports = new_cache.reports; ctx->cached_br = new_cache.br; } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) { if (ctx->cached_br) { - STORE_STATE(&ctx->cached_estate, ZERO_STATE); + ctx->cached_estate = ZERO_STATE; } } @@ -314,8 +308,6 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, #undef EQ_STATE #undef OR_STATE #undef TESTBIT_STATE -#undef LOAD_STATE -#undef STORE_STATE #undef PE_FN #undef RUN_EXCEPTION_FN #undef CONTEXT_T @@ -333,11 +325,9 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, #undef STATE_ARG_NAME #undef STATE_ARG_P +#undef IMPL_NFA_T + #undef CHUNK_T #undef FIND_AND_CLEAR_FN -#undef IMPL_NFA_T -#undef GET_NFA_REPEAT_INFO_FN - -// Parameters. -#undef SIZE -#undef STATE_T +#undef POPCOUNT_FN +#undef RANK_IN_MASK_FN diff --git a/src/nfa/limex_internal.h b/src/nfa/limex_internal.h index c37f5f40..723803c1 100644 --- a/src/nfa/limex_internal.h +++ b/src/nfa/limex_internal.h @@ -132,7 +132,6 @@ struct LimExNFA##size { \ u32 acceptEodOffset; /* rel. to start of LimExNFA */ \ u32 exceptionCount; \ u32 exceptionOffset; /* rel. to start of LimExNFA */ \ - u32 exReportOffset; /* rel. to start of LimExNFA */ \ u32 repeatCount; \ u32 repeatOffset; \ u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \ @@ -160,6 +159,7 @@ struct LimExNFA##size { \ }; CREATE_NFA_LIMEX(32) +CREATE_NFA_LIMEX(64) CREATE_NFA_LIMEX(128) CREATE_NFA_LIMEX(256) CREATE_NFA_LIMEX(384) @@ -183,9 +183,16 @@ struct NFARepeatInfo { }; struct NFAAccept { - u32 state; //!< state ID of triggering state - ReportID externalId; //!< report ID to raise - u32 squash; //!< offset into masks, or MO_INVALID_IDX + u8 single_report; //!< If true, 'reports' is report id. + + /** + * \brief If single report is true, this is the report id to fire. + * Otherwise, it is the offset (relative to the start of the LimExNFA + * structure) of a list of reports, terminated with MO_INVALID_IDX. + */ + u32 reports; + + u32 squash; //!< Offset (from LimEx) into squash masks, or MO_INVALID_IDX. }; #endif diff --git a/src/nfa/limex_limits.h b/src/nfa/limex_limits.h index 9b35b115..f4df54a4 100644 --- a/src/nfa/limex_limits.h +++ b/src/nfa/limex_limits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,6 +31,5 @@ #define NFA_MAX_STATES 512 /**< max states in an NFA */ #define NFA_MAX_ACCEL_STATES 8 /**< max accel states in a NFA */ -#define NFA_MAX_TOP_MASKS 32 /**< max number of MQE_TOP_N event types */ #endif diff --git a/src/nfa/limex_native.c b/src/nfa/limex_native.c index 8a0a8acd..f6f5809c 100644 --- a/src/nfa/limex_native.c +++ b/src/nfa/limex_native.c @@ -49,12 +49,13 @@ #include "limex_runtime.h" // Other implementation code from X-Macro impl. -#define SIZE 32 -#define STATE_T u32 +#define SIZE 32 +#define STATE_T u32 +#define ENG_STATE_T u32 +#define LOAD_FROM_ENG load_u32 + #include "limex_state_impl.h" -#define SIZE 32 -#define STATE_T u32 #define INLINE_ATTR really_inline #include "limex_common_impl.h" @@ -64,8 +65,6 @@ // Process exceptional states -#define SIZE 32 -#define STATE_T u32 #define STATE_ON_STACK #define ESTATE_ON_STACK #define RUN_EXCEPTION_FN_ONLY @@ -74,8 +73,7 @@ static really_inline int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, const struct LimExNFA32 *limex, - const struct NFAException32 *exceptions, - const ReportID *exReports, u64a offset, + const struct NFAException32 *exceptions, u64a offset, struct NFAContext32 *ctx, char in_rev, char flags) { assert(estate != 0); // guaranteed by calling macro @@ -105,8 +103,8 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, u32 bit = findAndClearLSB_32(&estate); u32 idx = rank_in_mask32(limex->exceptionMask, bit); const struct NFAException32 *e = &exceptions[idx]; - if (!runException32(e, s, succ, &local_succ, limex, exReports, offset, - ctx, &new_cache, &cacheable, in_rev, flags)) { + if (!runException32(e, s, succ, &local_succ, limex, offset, ctx, + &new_cache, &cacheable, in_rev, flags)) { return PE_RV_HALT; } } while (estate != 0); @@ -128,7 +126,4 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, } // 32-bit models. - -#define SIZE 32 -#define STATE_T u32 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_runtime.h b/src/nfa/limex_runtime.h index e0c182fc..6109d382 100644 --- a/src/nfa/limex_runtime.h +++ b/src/nfa/limex_runtime.h @@ -30,8 +30,8 @@ \brief Limex Execution Engine Or: How I Learned To Stop Worrying And Love The Preprocessor - This file includes utility functions which do not depend on the state size or - shift masks directly. + This file includes utility functions which do not depend on the size of the + state or shift masks directly. */ #ifndef LIMEX_RUNTIME_H @@ -72,41 +72,6 @@ struct proto_cache { const ReportID *reports; }; -// Shift macros for Limited NFAs. Defined in terms of uniform ops. -// LimExNFAxxx ptr in 'limex' and the current state in 's' -#define NFA_EXEC_LIM_SHIFT(nels_type, nels_i) \ - (JOIN(lshift_, nels_type)( \ - JOIN(and_, nels_type)(s, \ - JOIN(load_, nels_type)(&limex->shift[nels_i])), \ - limex->shiftAmount[nels_i])) - -// Calculate the (limited model) successors for a number of variable shifts. -// Assumes current state in 's' and successors in 'succ'. - -#define NFA_EXEC_GET_LIM_SUCC(gls_type) \ - do { \ - succ = NFA_EXEC_LIM_SHIFT(gls_type, 0); \ - switch (limex->shiftCount) { \ - case 8: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 7)); \ - case 7: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 6)); \ - case 6: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 5)); \ - case 5: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 4)); \ - case 4: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 3)); \ - case 3: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 2)); \ - case 2: \ - succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 1)); \ - case 1: \ - case 0: \ - ; \ - } \ - } while (0) - #define PE_RV_HALT 1 #ifdef STATE_ON_STACK @@ -138,14 +103,42 @@ int limexRunReports(const ReportID *reports, NfaCallback callback, return MO_CONTINUE_MATCHING; // continue } +static really_inline +int limexRunAccept(const char *limex_base, const struct NFAAccept *accept, + NfaCallback callback, void *context, u64a offset) { + if (accept->single_report) { + const ReportID report = accept->reports; + DEBUG_PRINTF("firing single report for id %u at offset %llu\n", report, + offset); + return callback(0, offset, report, context); + } + const ReportID *reports = (const ReportID *)(limex_base + accept->reports); + return limexRunReports(reports, callback, context, offset); +} + +static really_inline +int limexAcceptHasReport(const char *limex_base, const struct NFAAccept *accept, + ReportID report) { + if (accept->single_report) { + return accept->reports == report; + } + + const ReportID *reports = (const ReportID *)(limex_base + accept->reports); + assert(*reports != MO_INVALID_IDX); + do { + if (*reports == report) { + return 1; + } + reports++; + } while (*reports != MO_INVALID_IDX); + + return 0; +} + /** \brief Return a (correctly typed) pointer to the exception table. */ #define getExceptionTable(exc_type, lim) \ ((const exc_type *)((const char *)(lim) + (lim)->exceptionOffset)) -/** \brief Return a pointer to the exceptional reports list. */ -#define getExReports(lim) \ - ((const ReportID *)((const char *)(lim) + (lim)->exReportOffset)) - /** \brief Return a pointer to the ordinary accepts table. */ #define getAcceptTable(lim) \ ((const struct NFAAccept *)((const char *)(lim) + (lim)->acceptOffset)) @@ -170,6 +163,7 @@ int limexRunReports(const ReportID *reports, NfaCallback callback, } MAKE_GET_NFA_REPEAT_INFO(32) +MAKE_GET_NFA_REPEAT_INFO(64) MAKE_GET_NFA_REPEAT_INFO(128) MAKE_GET_NFA_REPEAT_INFO(256) MAKE_GET_NFA_REPEAT_INFO(384) diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 881e41fd..45ceb2b5 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -29,7 +29,6 @@ #include "util/join.h" #include - /** \file * \brief Limex Execution Engine Or: * How I Learned To Stop Worrying And Love The Preprocessor @@ -37,8 +36,9 @@ * Version 2.0: now with X-Macros, so you get line numbers in your debugger. */ -#if !defined(SIZE) || !defined(STATE_T) -# error Must define SIZE and STATE_T in includer. + +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. #endif #define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE) @@ -46,7 +46,6 @@ #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) #define TESTEOD_FN JOIN(moNfaTestEod, SIZE) -#define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE) #define INITIAL_FN JOIN(moNfaInitial, SIZE) #define TOP_FN JOIN(moNfaTop, SIZE) #define TOPN_FN JOIN(moNfaTopN, SIZE) @@ -67,11 +66,10 @@ #define STREAMSILENT_FN JOIN(LIMEX_API_ROOT, _Stream_Silent) #define CONTEXT_T JOIN(NFAContext, SIZE) #define EXCEPTION_T JOIN(struct NFAException, SIZE) -#define LOAD_STATE JOIN(load_, STATE_T) -#define STORE_STATE JOIN(store_, STATE_T) #define AND_STATE JOIN(and_, STATE_T) #define ANDNOT_STATE JOIN(andnot_, STATE_T) #define OR_STATE JOIN(or_, STATE_T) +#define LSHIFT_STATE JOIN(lshift_, STATE_T) #define TESTBIT_STATE JOIN(testbit_, STATE_T) #define CLEARBIT_STATE JOIN(clearbit_, STATE_T) #define ZERO_STATE JOIN(zero_, STATE_T) @@ -96,17 +94,16 @@ #define ACCEL_AND_FRIENDS_MASK accel_and_friendsMask #define EXCEPTION_MASK exceptionMask #else -#define ACCEL_MASK LOAD_STATE(&limex->accel) -#define ACCEL_AND_FRIENDS_MASK LOAD_STATE(&limex->accel_and_friends) -#define EXCEPTION_MASK LOAD_STATE(&limex->exceptionMask) +#define ACCEL_MASK LOAD_FROM_ENG(&limex->accel) +#define ACCEL_AND_FRIENDS_MASK LOAD_FROM_ENG(&limex->accel_and_friends) +#define EXCEPTION_MASK LOAD_FROM_ENG(&limex->exceptionMask) #endif // Run exception processing, if necessary. Returns 0 if scanning should // continue, 1 if an accept was fired and the user instructed us to halt. static really_inline char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, - const ReportID *exReports, STATE_T s, - const STATE_T emask, size_t i, u64a offset, + STATE_T s, const STATE_T emask, size_t i, u64a offset, STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx, const char flags, const char in_rev, const char first_match) { @@ -117,13 +114,13 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, } if (first_match && i) { - STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { DEBUG_PRINTF("first match at %zu\n", i); DEBUG_PRINTF("for nfa %p\n", limex); assert(final_loc); - STORE_STATE(&ctx->s, s); + ctx->s = s; *final_loc = i; return 1; // Halt matching. } @@ -133,7 +130,7 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags; int rv = JOIN(processExceptional, SIZE)( - pass_state, pass_estate, diffmask, succ, limex, exceptions, exReports, + pass_state, pass_estate, diffmask, succ, limex, exceptions, callback_offset, ctx, in_rev, localflags); if (rv == PE_RV_HALT) { return 1; // Halt matching. @@ -161,22 +158,55 @@ size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask, return j; } +// Shift macros for Limited NFAs. Defined in terms of uniform ops. +// LimExNFAxxx ptr in 'limex' and the current state in 's' +#define NFA_EXEC_LIM_SHIFT(limex_m, curr_m, shift_idx) \ + LSHIFT_STATE(AND_STATE(curr_m, LOAD_FROM_ENG(&limex_m->shift[shift_idx])), \ + limex_m->shiftAmount[shift_idx]) + +// Calculate the (limited model) successors for a number of variable shifts. +// Assumes current state in 'curr_m' and places the successors in 'succ_m'. +#define NFA_EXEC_GET_LIM_SUCC(limex_m, curr_m, succ_m) \ + do { \ + succ_m = NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 0); \ + switch (limex_m->shiftCount) { \ + case 8: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 7)); \ + case 7: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 6)); \ + case 6: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 5)); \ + case 5: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 4)); \ + case 4: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 3)); \ + case 3: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 2)); \ + case 2: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 1)); \ + case 1: \ + case 0: \ + ; \ + } \ + } while (0) + + static really_inline char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset, const char flags, u64a *final_loc, const char first_match) { - const STATE_T *reach = (const STATE_T *)((const char *)limex + sizeof(*limex)); + const ENG_STATE_T *reach = get_reach_table(limex); #if SIZE < 256 - const STATE_T accelMask = LOAD_STATE(&limex->accel); - const STATE_T accel_and_friendsMask = LOAD_STATE(&limex->accel_and_friends); - const STATE_T exceptionMask = LOAD_STATE(&limex->exceptionMask); + const STATE_T accelMask = LOAD_FROM_ENG(&limex->accel); + const STATE_T accel_and_friendsMask + = LOAD_FROM_ENG(&limex->accel_and_friends); + const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); #endif const u8 *accelTable = (const u8 *)((const char *)limex + limex->accelTableOffset); const union AccelAux *accelAux = (const union AccelAux *)((const char *)limex + limex->accelAuxOffset); const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); - const ReportID *exReports = getExReports(limex); - STATE_T s = LOAD_STATE(&ctx->s); + STATE_T s = ctx->s; /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ @@ -195,21 +225,20 @@ without_accel: DUMP_INPUT(i); if (ISZERO_STATE(s)) { DEBUG_PRINTF("no states are switched on, early exit\n"); - STORE_STATE(&ctx->s, s); + ctx->s = s; return MO_CONTINUE_MATCHING; } u8 c = input[i]; STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(STATE_T); + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, - i, offset, &succ, final_loc, ctx, flags, 0, - first_match)) { + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, + &succ, final_loc, ctx, flags, 0, first_match)) { return MO_HALT_MATCHING; } - s = AND_STATE(succ, LOAD_STATE(&reach[limex->reachMap[c]])); + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } with_accel: @@ -252,33 +281,30 @@ with_accel: u8 c = input[i]; STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(STATE_T); + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, - i, offset, &succ, final_loc, ctx, flags, 0, - first_match)) { + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, + &succ, final_loc, ctx, flags, 0, first_match)) { return MO_HALT_MATCHING; } - s = AND_STATE(succ, LOAD_STATE(&reach[limex->reachMap[c]])); + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } - STORE_STATE(&ctx->s, s); + ctx->s = s; if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) { - STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); const struct NFAAccept *acceptTable = getAcceptTable(limex); - const u32 acceptCount = limex->acceptCount; - STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { if (first_match) { - STORE_STATE(&ctx->s, s); + ctx->s = s; assert(final_loc); *final_loc = length; return MO_HALT_MATCHING; - } else if (PROCESS_ACCEPTS_FN(limex, &ctx->s, acceptTable, - acceptCount, offset + length, + } else if (PROCESS_ACCEPTS_FN(limex, &ctx->s, &acceptMask, + acceptTable, offset + length, ctx->callback, ctx->context)) { return MO_HALT_MATCHING; } @@ -294,13 +320,12 @@ with_accel: static never_inline char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset) { - const STATE_T *reach = (const STATE_T *)((const char *)limex + sizeof(*limex)); + const ENG_STATE_T *reach = get_reach_table(limex); #if SIZE < 256 - const STATE_T exceptionMask = LOAD_STATE(&limex->exceptionMask); + const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); #endif const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); - const ReportID *exReports = getExReports(limex); - STATE_T s = LOAD_STATE(&ctx->s); + STATE_T s = ctx->s; /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ @@ -311,34 +336,33 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, DUMP_INPUT(i-1); if (ISZERO_STATE(s)) { DEBUG_PRINTF("no states are switched on, early exit\n"); - STORE_STATE(&ctx->s, s); + ctx->s = s; return MO_CONTINUE_MATCHING; } u8 c = input[i-1]; STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(STATE_T); + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, - EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, - flags, 1, 0)) { + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, + &succ, final_loc, ctx, flags, 1, 0)) { return MO_HALT_MATCHING; } - s = AND_STATE(succ, reach[limex->reachMap[c]]); + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } - STORE_STATE(&ctx->s, s); + ctx->s = s; - STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); const struct NFAAccept *acceptTable = getAcceptTable(limex); const u32 acceptCount = limex->acceptCount; assert(flags & CALLBACK_OUTPUT); if (acceptCount) { STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { - if (PROCESS_ACCEPTS_NOSQUASH_FN(&ctx->s, acceptTable, acceptCount, - offset, ctx->callback, + if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &ctx->s, &acceptMask, + acceptTable, offset, ctx->callback, ctx->context)) { return MO_HALT_MATCHING; } @@ -354,9 +378,9 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, return; } - STATE_T s = LOAD_STATE(src); + STATE_T s = *(STATE_T *)src; - if (ISZERO_STATE(AND_STATE(s, LOAD_STATE(&limex->repeatCyclicMask)))) { + if (ISZERO_STATE(AND_STATE(LOAD_FROM_ENG(&limex->repeatCyclicMask), s))) { DEBUG_PRINTF("no cyclics are on\n"); return; } @@ -369,7 +393,7 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, DEBUG_PRINTF("repeat %u\n", i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); - if (!TESTBIT_STATE(&s, info->cyclicState)) { + if (!TESTBIT_STATE(s, info->cyclicState)) { DEBUG_PRINTF("is dead\n"); continue; } @@ -388,7 +412,7 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, offset); } - STORE_STATE(src, s); + *(STATE_T *)src = s; } char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n, @@ -411,7 +435,7 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, // Note: state has already been expanded into 'dest'. const STATE_T cyclics = - AND_STATE(LOAD_STATE(dest), LOAD_STATE(&limex->repeatCyclicMask)); + AND_STATE(*(STATE_T *)dest, LOAD_FROM_ENG(&limex->repeatCyclicMask)); if (ISZERO_STATE(cyclics)) { DEBUG_PRINTF("no cyclics are on\n"); return; @@ -425,7 +449,7 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, DEBUG_PRINTF("repeat %u\n", i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); - if (!TESTBIT_STATE(&cyclics, info->cyclicState)) { + if (!TESTBIT_STATE(cyclics, info->cyclicState)) { DEBUG_PRINTF("is dead\n"); continue; } @@ -447,9 +471,8 @@ char JOIN(LIMEX_API_ROOT, _expandState)(const struct NFA *n, void *dest, return 0; } -char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n, - struct mq *q) { - STORE_STATE(q->state, ZERO_STATE); +char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n, struct mq *q) { + *(STATE_T *)q->state = ZERO_STATE; // Zero every bounded repeat control block in state. const IMPL_NFA_T *limex = getImplNfa(n); @@ -529,7 +552,7 @@ void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex, u32 e = q->items[q->cur].type; switch (e) { DEFINE_CASE(MQE_TOP) - STORE_STATE(&ctx->s, TOP_FN(limex, !!sp, LOAD_STATE(&ctx->s))); + ctx->s = TOP_FN(limex, !!sp, ctx->s); break; DEFINE_CASE(MQE_START) break; @@ -539,8 +562,7 @@ void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex, assert(e >= MQE_TOP_FIRST); assert(e < MQE_INVALID); DEBUG_PRINTF("MQE_TOP + %d\n", ((int)e - MQE_TOP_FIRST)); - STORE_STATE(&ctx->s, - TOPN_FN(limex, LOAD_STATE(&ctx->s), e - MQE_TOP_FIRST)); + ctx->s = TOPN_FN(limex, ctx->s, e - MQE_TOP_FIRST); } #undef DEFINE_CASE } @@ -570,12 +592,12 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { ctx.repeat_state = q->streamState + limex->stateSize; ctx.callback = q->cb; ctx.context = q->context; - STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; assert(q->items[q->cur].location >= 0); DEBUG_PRINTF("LOAD STATE\n"); - STORE_STATE(&ctx.s, LOAD_STATE(q->state)); + ctx.s = *(STATE_T *)q->state; assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -599,7 +621,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { assert(ep - offset <= q->length); if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp) == MO_HALT_MATCHING) { - STORE_STATE(q->state, ZERO_STATE); + *(STATE_T *)q->state = ZERO_STATE; return 0; } @@ -616,7 +638,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; return MO_ALIVE; } @@ -628,7 +650,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END\n"); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; if (q->cur != q->end) { q->cur--; @@ -637,7 +659,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { return MO_ALIVE; } - return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); + return ISNONZERO_STATE(ctx.s); } /* used by suffix execution in Rose */ @@ -665,11 +687,11 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { ctx.repeat_state = q->streamState + limex->stateSize; ctx.callback = q->cb; ctx.context = q->context; - STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; DEBUG_PRINTF("LOAD STATE\n"); - STORE_STATE(&ctx.s, LOAD_STATE(q->state)); + ctx.s = *(STATE_T *)q->state; assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -699,7 +721,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp + final_look - offset; - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; return MO_MATCHES_PENDING; } @@ -721,7 +743,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp + final_look - offset; - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; return MO_MATCHES_PENDING; } @@ -737,7 +759,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; return MO_ALIVE; } @@ -749,7 +771,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END\n"); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; if (q->cur != q->end) { q->cur--; @@ -758,7 +780,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { return MO_ALIVE; } - return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); + return ISNONZERO_STATE(ctx.s); } // Used for execution Rose prefix/infixes. @@ -777,11 +799,11 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, ctx.repeat_state = q->streamState + limex->stateSize; ctx.callback = NULL; ctx.context = NULL; - STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; DEBUG_PRINTF("LOAD STATE\n"); - STORE_STATE(&ctx.s, LOAD_STATE(q->state)); + ctx.s = *(STATE_T *)q->state; assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -793,7 +815,7 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, if (n->maxWidth) { if (ep - sp > n->maxWidth) { sp = ep - n->maxWidth; - STORE_STATE(&ctx.s, INITIAL_FN(limex, !!sp)); + ctx.s = INITIAL_FN(limex, !!sp); } } assert(ep >= sp); @@ -832,14 +854,14 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, DEBUG_PRINTF("END, nfa is %s\n", ISNONZERO_STATE(ctx.s) ? "still alive" : "dead"); - STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + *(STATE_T *)q->state = ctx.s; - if (JOIN(limexInAccept, SIZE)(limex, LOAD_STATE(&ctx.s), ctx.repeat_ctrl, + if (JOIN(limexInAccept, SIZE)(limex, ctx.s, ctx.repeat_ctrl, ctx.repeat_state, sp + 1, report)) { return MO_MATCHES_PENDING; } - return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); + return ISNONZERO_STATE(ctx.s); } char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, @@ -852,8 +874,8 @@ char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, const union RepeatControl *repeat_ctrl = getRepeatControlBaseConst(state, sizeof(STATE_T)); const char *repeat_state = streamState + limex->stateSize; - return TESTEOD_FN(limex, sptr, repeat_ctrl, repeat_state, offset, 1, - callback, context); + return TESTEOD_FN(limex, sptr, repeat_ctrl, repeat_state, offset, callback, + context); } char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) { @@ -875,11 +897,11 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, ctx.repeat_state = NULL; ctx.callback = cb; ctx.context = context; - STORE_STATE(&ctx.cached_estate, ZERO_STATE); + ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; const IMPL_NFA_T *limex = getImplNfa(n); - STORE_STATE(&ctx.s, INITIAL_FN(limex, 0)); // always anchored + ctx.s = INITIAL_FN(limex, 0); // always anchored // 'buf' may be null, for example when we're scanning at EOD time. if (buflen) { @@ -896,8 +918,11 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, REV_STREAM_FN(limex, hbuf, hlen, &ctx, offset); } - if (offset == 0 && ISNONZERO_STATE(LOAD_STATE(&ctx.s))) { - TESTEOD_REV_FN(limex, &ctx.s, offset, cb, context); + if (offset == 0 && limex->acceptEodCount && ISNONZERO_STATE(ctx.s)) { + const union RepeatControl *repeat_ctrl = NULL; + const char *repeat_state = NULL; + TESTEOD_FN(limex, &ctx.s, repeat_ctrl, repeat_state, offset, cb, + context); } // NOTE: return value is unused. @@ -913,7 +938,7 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa, union RepeatControl *repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); char *repeat_state = q->streamState + limex->stateSize; - STATE_T state = LOAD_STATE(q->state); + STATE_T state = *(STATE_T *)q->state; u64a offset = q->offset + q_last_loc(q) + 1; return JOIN(limexInAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, @@ -928,7 +953,7 @@ char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { union RepeatControl *repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); char *repeat_state = q->streamState + limex->stateSize; - STATE_T state = LOAD_STATE(q->state); + STATE_T state = *(STATE_T *)q->state; u64a offset = q->offset + q_last_loc(q) + 1; return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, @@ -941,8 +966,8 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( s64a loc) { assert(nfa->flags & NFA_ZOMBIE); const IMPL_NFA_T *limex = getImplNfa(nfa); - STATE_T state = LOAD_STATE(q->state); - STATE_T zmask = LOAD_STATE(&limex->zombieMask); + STATE_T state = *(STATE_T *)q->state; + STATE_T zmask = LOAD_FROM_ENG(&limex->zombieMask); if (limex->repeatCount) { u64a offset = q->offset + loc + 1; @@ -960,7 +985,6 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( } #undef TESTEOD_FN -#undef TESTEOD_REV_FN #undef INITIAL_FN #undef TOP_FN #undef TOPN_FN @@ -981,11 +1005,10 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( #undef STREAMSILENT_FN #undef CONTEXT_T #undef EXCEPTION_T -#undef LOAD_STATE -#undef STORE_STATE #undef AND_STATE #undef ANDNOT_STATE #undef OR_STATE +#undef LSHIFT_STATE #undef TESTBIT_STATE #undef CLEARBIT_STATE #undef ZERO_STATE @@ -999,8 +1022,4 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( #undef ACCEL_MASK #undef ACCEL_AND_FRIENDS_MASK #undef EXCEPTION_MASK - -// Parameters. -#undef SIZE -#undef STATE_T #undef LIMEX_API_ROOT diff --git a/src/nfa/limex_shuffle.h b/src/nfa/limex_shuffle.h index e45e4331..5ca8fce0 100644 --- a/src/nfa/limex_shuffle.h +++ b/src/nfa/limex_shuffle.h @@ -41,52 +41,6 @@ #include "util/bitutils.h" #include "util/simd_utils.h" -#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) -#define HAVE_PEXT -#endif - -static really_inline -u32 packedExtract32(u32 x, u32 mask) { -#if defined(HAVE_PEXT) - // Intel BMI2 can do this operation in one instruction. - return _pext_u32(x, mask); -#else - - u32 result = 0, num = 1; - while (mask != 0) { - u32 bit = findAndClearLSB_32(&mask); - if (x & (1U << bit)) { - assert(num != 0); // more than 32 bits! - result |= num; - } - num <<= 1; - } - return result; -#endif -} - -static really_inline -u32 packedExtract64(u64a x, u64a mask) { -#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) - // Intel BMI2 can do this operation in one instruction. - return _pext_u64(x, mask); -#else - - u32 result = 0, num = 1; - while (mask != 0) { - u32 bit = findAndClearLSB_64(&mask); - if (x & (1ULL << bit)) { - assert(num != 0); // more than 32 bits! - result |= num; - } - num <<= 1; - } - return result; -#endif -} - -#undef HAVE_PEXT - static really_inline u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { m128 shuffled = pshufb(s, permute); diff --git a/src/nfa/limex_simd128.c b/src/nfa/limex_simd128.c index f0fb1dd4..c5f2b33e 100644 --- a/src/nfa/limex_simd128.c +++ b/src/nfa/limex_simd128.c @@ -48,19 +48,16 @@ #include "limex_runtime.h" -#define SIZE 128 -#define STATE_T m128 +#define SIZE 128 +#define STATE_T m128 +#define ENG_STATE_T m128 +#define LOAD_FROM_ENG load_m128 + #include "limex_exceptional.h" -#define SIZE 128 -#define STATE_T m128 #include "limex_state_impl.h" -#define SIZE 128 -#define STATE_T m128 #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 128 -#define STATE_T m128 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_simd256.c b/src/nfa/limex_simd256.c index 57648b69..cc232908 100644 --- a/src/nfa/limex_simd256.c +++ b/src/nfa/limex_simd256.c @@ -45,19 +45,16 @@ // Common code #include "limex_runtime.h" -#define SIZE 256 -#define STATE_T m256 +#define SIZE 256 +#define STATE_T m256 +#define ENG_STATE_T m256 +#define LOAD_FROM_ENG load_m256 + #include "limex_exceptional.h" -#define SIZE 256 -#define STATE_T m256 #include "limex_state_impl.h" -#define SIZE 256 -#define STATE_T m256 #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 256 -#define STATE_T m256 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_simd384.c b/src/nfa/limex_simd384.c index 84061f61..7e596e48 100644 --- a/src/nfa/limex_simd384.c +++ b/src/nfa/limex_simd384.c @@ -45,19 +45,16 @@ // Common code #include "limex_runtime.h" -#define SIZE 384 -#define STATE_T m384 +#define SIZE 384 +#define STATE_T m384 +#define ENG_STATE_T m384 +#define LOAD_FROM_ENG load_m384 + #include "limex_exceptional.h" -#define SIZE 384 -#define STATE_T m384 #include "limex_state_impl.h" -#define SIZE 384 -#define STATE_T m384 #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 384 -#define STATE_T m384 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_simd512.c b/src/nfa/limex_simd512.c index a6646d83..f779f335 100644 --- a/src/nfa/limex_simd512.c +++ b/src/nfa/limex_simd512.c @@ -45,19 +45,16 @@ // Common code #include "limex_runtime.h" -#define SIZE 512 -#define STATE_T m512 +#define SIZE 512 +#define STATE_T m512 +#define ENG_STATE_T m512 +#define LOAD_FROM_ENG load_m512 + #include "limex_exceptional.h" -#define SIZE 512 -#define STATE_T m512 #include "limex_state_impl.h" -#define SIZE 512 -#define STATE_T m512 #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 512 -#define STATE_T m512 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_state_impl.h b/src/nfa/limex_state_impl.h index d6e89904..81153f71 100644 --- a/src/nfa/limex_state_impl.h +++ b/src/nfa/limex_state_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,8 +35,8 @@ #include "util/state_compress.h" #include -#if !defined(SIZE) || !defined(STATE_T) -# error Must define SIZE and STATE_T in includer. +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. #endif #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) @@ -44,29 +44,33 @@ #define REACHMASK_FN JOIN(moNfaReachMask, SIZE) #define COMPRESS_FN JOIN(moNfaCompressState, SIZE) #define EXPAND_FN JOIN(moNfaExpandState, SIZE) -#define COMPRESSED_STORE_FN JOIN(storecompressed, SIZE) -#define COMPRESSED_LOAD_FN JOIN(loadcompressed, SIZE) +#define COMPRESSED_STORE_FN JOIN(store_compressed_, STATE_T) +#define COMPRESSED_LOAD_FN JOIN(load_compressed_, STATE_T) #define PARTIAL_STORE_FN JOIN(partial_store_, STATE_T) #define PARTIAL_LOAD_FN JOIN(partial_load_, STATE_T) -#define LOAD_STATE JOIN(load_, STATE_T) -#define STORE_STATE JOIN(store_, STATE_T) #define OR_STATE JOIN(or_, STATE_T) #define AND_STATE JOIN(and_, STATE_T) #define ISZERO_STATE JOIN(isZero_, STATE_T) static really_inline -const STATE_T *REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) { - const STATE_T *reach - = (const STATE_T *)((const char *)limex + sizeof(*limex)); - assert(ISALIGNED_N(reach, alignof(STATE_T))); - return &reach[limex->reachMap[key]]; +const ENG_STATE_T *get_reach_table(const IMPL_NFA_T *limex) { + const ENG_STATE_T *reach + = (const ENG_STATE_T *)((const char *)limex + sizeof(*limex)); + assert(ISALIGNED_N(reach, alignof(ENG_STATE_T))); + return reach; +} + +static really_inline +STATE_T REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) { + const ENG_STATE_T *reach = get_reach_table(limex); + return LOAD_FROM_ENG(&reach[limex->reachMap[key]]); } static really_inline void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src, u8 key) { assert(ISALIGNED_N(src, alignof(STATE_T))); - STATE_T a_src = LOAD_STATE(src); + STATE_T a_src = *src; DEBUG_PRINTF("compress state: %p -> %p\n", src, dest); @@ -77,31 +81,30 @@ void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src, } else { DEBUG_PRINTF("compress state, key=%hhx\n", key); - const STATE_T *reachmask = REACHMASK_FN(limex, key); + STATE_T reachmask = REACHMASK_FN(limex, key); // Masked compression means that we mask off the initDs states and // provide a shortcut for the all-zeroes case. Note that these must be // switched on in the EXPAND call below. if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { - STATE_T s = AND_STATE(LOAD_STATE(&limex->compressMask), a_src); + STATE_T s = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), a_src); if (ISZERO_STATE(s)) { DEBUG_PRINTF("after compression mask, all states are zero\n"); memset(dest, 0, limex->stateSize); return; } - STATE_T mask = AND_STATE(LOAD_STATE(&limex->compressMask), - LOAD_STATE(reachmask)); + STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), + reachmask); COMPRESSED_STORE_FN(dest, &s, &mask, limex->stateSize); } else { - COMPRESSED_STORE_FN(dest, src, reachmask, limex->stateSize); + COMPRESSED_STORE_FN(dest, src, &reachmask, limex->stateSize); } } } static really_inline -void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, - u8 key) { +void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, u8 key) { assert(ISALIGNED_N(dest, alignof(STATE_T))); DEBUG_PRINTF("expand state: %p -> %p\n", src, dest); @@ -111,16 +114,15 @@ void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, *dest = PARTIAL_LOAD_FN(src, limex->stateSize); } else { DEBUG_PRINTF("expand state, key=%hhx\n", key); - const STATE_T *reachmask = REACHMASK_FN(limex, key); + STATE_T reachmask = REACHMASK_FN(limex, key); if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { - STATE_T mask = AND_STATE(LOAD_STATE(&limex->compressMask), - LOAD_STATE(reachmask)); + STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), + reachmask); COMPRESSED_LOAD_FN(dest, src, &mask, limex->stateSize); - STORE_STATE(dest, OR_STATE(LOAD_STATE(&limex->initDS), - LOAD_STATE(dest))); + *dest = OR_STATE(LOAD_FROM_ENG(&limex->initDS), *dest); } else { - COMPRESSED_LOAD_FN(dest, src, reachmask, limex->stateSize); + COMPRESSED_LOAD_FN(dest, src, &reachmask, limex->stateSize); } } } @@ -134,11 +136,6 @@ void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, #undef COMPRESSED_LOAD_FN #undef PARTIAL_STORE_FN #undef PARTIAL_LOAD_FN -#undef LOAD_STATE -#undef STORE_STATE #undef OR_STATE #undef AND_STATE #undef ISZERO_STATE - -#undef SIZE -#undef STATE_T diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 88da27c0..ceedb9db 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,10 +42,10 @@ static really_inline char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, - u16 s, u64a loc, char eod, u16 *const cached_accept_state, - u32 *const cached_accept_id) { - DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n", - (u16)(s & STATE_MASK), loc, eod); + u32 s, u64a loc, char eod, u32 *cached_accept_state, + u32 *cached_accept_id) { + DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n", + s & STATE_MASK, loc, eod); if (!eod && s == *cached_accept_state) { if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { @@ -89,27 +89,108 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, } static really_inline -char mcclellanExec16_i(const struct mcclellan *m, u16 *state, const u8 *buf, +const u8 *run_mcclellan_accel(const struct mcclellan *m, + const struct mstate_aux *aux, u32 s, + const u8 **min_accel_offset, + const u8 *c, const u8 *c_end) { + DEBUG_PRINTF("skipping\n"); + u32 accel_offset = aux[s].accel_offset; + + assert(aux[s].accel_offset); + assert(accel_offset >= m->aux_offset); + assert(!m->sherman_offset || accel_offset < m->sherman_offset); + + const union AccelAux *aaux = (const void *)((const char *)m + accel_offset); + const u8 *c2 = run_accel(aaux, c, c_end); + + if (c2 < *min_accel_offset + BAD_ACCEL_DIST) { + *min_accel_offset = c2 + BIG_ACCEL_PENALTY; + } else { + *min_accel_offset = c2 + SMALL_ACCEL_PENALTY; + } + + if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) { + *min_accel_offset = c_end; + } + + DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", + c2 - c, *min_accel_offset - c2, c_end - c2); + + return c2; +} + +static really_inline +u32 doNormal16(const struct mcclellan *m, const u8 **c_inout, const u8 *end, + u32 s, char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + + const u16 *succ_table + = (const u16 *)((const char *)m + sizeof(struct mcclellan)); + assert(ISALIGNED_N(succ_table, 2)); + u32 sherman_base = m->sherman_limit; + const char *sherman_base_offset + = (const char *)m - sizeof(struct NFA) + m->sherman_offset; + u32 as = m->alphaShift; + + s &= STATE_MASK; + + while (c < end && s) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c, + ourisprint(*c) ? *c : '?', cprime, s); + if (s < sherman_base) { + DEBUG_PRINTF("doing normal\n"); + assert(s < m->state_count); + s = succ_table[(s << as) + cprime]; + } else { + const char *sherman_state + = findShermanState(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman (%u)\n", s); + s = doSherman16(sherman_state, cprime, succ_table, as); + } + + DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); + c++; + + if (do_accel && (s & ACCEL_FLAG)) { + break; + } + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + break; + } + + s &= STATE_MASK; + } + + *c_inout = c; + return s; +} + +static really_inline +char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **c_final, enum MatchMode mode) { assert(ISALIGNED_N(state, 2)); + if (!len) { + if (mode == STOP_AT_MATCH) { + *c_final = buf; + } + return MO_ALIVE; + } - u16 s = *state; - const u8 *c = buf, *c_end = buf + len; - const u16 *succ_table = (const u16 *)((const char *)m - + sizeof(struct mcclellan)); - assert(ISALIGNED_N(succ_table, 2)); - const u16 sherman_base = m->sherman_limit; - const char *sherman_base_offset - = (const char *)m - sizeof(struct NFA) + m->sherman_offset; - const u32 as = m->alphaShift; + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset + - sizeof(struct NFA)); s &= STATE_MASK; u32 cached_accept_id = 0; - u16 cached_accept_state = 0; + u32 cached_accept_state = 0; - DEBUG_PRINTF("s: %hu, len %zu\n", s, len); + DEBUG_PRINTF("s: %u, len %zu\n", s, len); const u8 *min_accel_offset = c; if (!m->has_accel || len < ACCEL_MIN_LEN) { @@ -120,26 +201,19 @@ char mcclellanExec16_i(const struct mcclellan *m, u16 *state, const u8 *buf, goto with_accel; without_accel: - while (c < min_accel_offset && s) { - u8 cprime = m->remap[*(c++)]; - DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s); - if (s < sherman_base) { - DEBUG_PRINTF("doing normal\n"); - assert(s < m->state_count); - s = succ_table[((u32)s << as) + cprime]; - } else { - const char *sherman_state - = findShermanState(m, sherman_base_offset, sherman_base, s); - DEBUG_PRINTF("doing sherman (%hu)\n", s); - s = doSherman16(sherman_state, cprime, succ_table, as); + do { + assert(c < min_accel_offset); + if (!s) { + goto exit; } - DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK)); + + s = doNormal16(m, &c, min_accel_offset, s, 0, mode); if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { *state = s & STATE_MASK; *c_final = c - 1; - return MO_CONTINUE_MATCHING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; @@ -147,39 +221,51 @@ without_accel: if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ + return MO_DEAD; /* termination requested */ } } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, - &cached_accept_state, - &cached_accept_id) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; } } - s &= STATE_MASK; + assert(c <= min_accel_offset); + } while (c < min_accel_offset); + + s &= STATE_MASK; + + if (c == c_end) { + goto exit; + } else { + goto with_accel; } with_accel: - while (c < c_end && s) { - u8 cprime = m->remap[*(c++)]; - DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s); - if (s < sherman_base) { - DEBUG_PRINTF("doing normal\n"); - assert(s < m->state_count); - s = succ_table[((u32)s << as) + cprime]; - } else { - const char *sherman_state - = findShermanState(m, sherman_base_offset, sherman_base, s); - DEBUG_PRINTF("doing sherman (%hu)\n", s); - s = doSherman16(sherman_state, cprime, succ_table, as); + do { + assert(c < c_end); + if (!s) { + goto exit; } - DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK)); + + if (s & ACCEL_FLAG) { + DEBUG_PRINTF("skipping\n"); + s &= STATE_MASK; + c = run_mcclellan_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + + s = doNormal16(m, &c, c_end, s, 1, mode); if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { *state = s & STATE_MASK; *c_final = c - 1; - return MO_CONTINUE_MATCHING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; @@ -187,56 +273,31 @@ with_accel: if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ + return MO_DEAD; /* termination requested */ } } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, - &cached_accept_state, - &cached_accept_id) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; } - } else if (s & ACCEL_FLAG) { - DEBUG_PRINTF("skipping\n"); - const struct mstate_aux *this_aux = get_aux(m, s & STATE_MASK); - u32 accel_offset = this_aux->accel_offset; - - assert(accel_offset >= m->aux_offset); - assert(accel_offset < m->sherman_offset); - - const union AccelAux *aaux - = (const void *)((const char *)m + accel_offset); - const u8 *c2 = run_accel(aaux, c, c_end); - - if (c2 < min_accel_offset + BAD_ACCEL_DIST) { - min_accel_offset = c2 + BIG_ACCEL_PENALTY; - } else { - min_accel_offset = c2 + SMALL_ACCEL_PENALTY; - } - - if (min_accel_offset >= c_end - ACCEL_MIN_LEN) { - min_accel_offset = c_end; - } - - DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", - c2 - c, min_accel_offset - c2, c_end - c2); - - c = c2; - s &= STATE_MASK; - goto without_accel; } - s &= STATE_MASK; - } + assert(c <= c_end); + } while (c < c_end); + +exit: + s &= STATE_MASK; if (mode == STOP_AT_MATCH) { *c_final = c_end; } *state = s; - return MO_CONTINUE_MATCHING; + return MO_ALIVE; } static never_inline -char mcclellanExec16_i_cb(const struct mcclellan *m, u16 *state, const u8 *buf, +char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, @@ -244,7 +305,7 @@ char mcclellanExec16_i_cb(const struct mcclellan *m, u16 *state, const u8 *buf, } static never_inline -char mcclellanExec16_i_sam(const struct mcclellan *m, u16 *state, const u8 *buf, +char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, @@ -252,15 +313,15 @@ char mcclellanExec16_i_sam(const struct mcclellan *m, u16 *state, const u8 *buf, } static never_inline -char mcclellanExec16_i_nm(const struct mcclellan *m, u16 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { +char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, final_point, NO_MATCHES); } static really_inline -char mcclellanExec16_i_ni(const struct mcclellan *m, u16 *state, const u8 *buf, +char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point, enum MatchMode mode) { @@ -271,35 +332,69 @@ char mcclellanExec16_i_ni(const struct mcclellan *m, u16 *state, const u8 *buf, return mcclellanExec16_i_sam(m, state, buf, len, offAdj, cb, ctxt, single, final_point); } else { - assert (mode == NO_MATCHES); + assert(mode == NO_MATCHES); return mcclellanExec16_i_nm(m, state, buf, len, offAdj, cb, ctxt, single, final_point); } } static really_inline -char mcclellanExec8_i(const struct mcclellan *m, u8 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **c_final, enum MatchMode mode) { - u8 s = *state; - const u8 *c = buf, *c_end = buf + len; +u32 doNormal8(const struct mcclellan *m, const u8 **c_inout, const u8 *end, + u32 s, char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + u32 accel_limit = m->accel_limit_8; + u32 accept_limit = m->accept_limit_8; + + const u32 as = m->alphaShift; const u8 *succ_table = (const u8 *)((const char *)m + sizeof(struct mcclellan)); - const u32 as = m->alphaShift; - const struct mstate_aux *aux; + while (c < end && s) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, + ourisprint(*c) ? *c : '?', cprime); + s = succ_table[(s << as) + cprime]; - aux = (const struct mstate_aux *)((const char *)m + m->aux_offset + DEBUG_PRINTF("s: %u\n", s); + c++; + if (do_accel) { + if (s >= accel_limit) { + break; + } + } else { + if (mode != NO_MATCHES && s >= accept_limit) { + break; + } + } + } + *c_inout = c; + return s; +} + +static really_inline +char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **c_final, enum MatchMode mode) { + if (!len) { + if (mode == STOP_AT_MATCH) { + *c_final = buf; + } + return MO_ALIVE; + } + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset - sizeof(struct NFA)); - - u16 accel_limit = m->accel_limit_8; - u16 accept_limit = m->accept_limit_8; + u32 accept_limit = m->accept_limit_8; u32 cached_accept_id = 0; - u16 cached_accept_state = 0; + u32 cached_accept_state = 0; - DEBUG_PRINTF("accel %hu, accept %hu\n", accel_limit, accept_limit); + DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit); - DEBUG_PRINTF("s: %hhu, len %zu\n", s, len); + DEBUG_PRINTF("s: %u, len %zu\n", s, len); const u8 *min_accel_offset = c; if (!m->has_accel || len < ACCEL_MIN_LEN) { @@ -310,124 +405,119 @@ char mcclellanExec8_i(const struct mcclellan *m, u8 *state, const u8 *buf, goto with_accel; without_accel: - while (c < min_accel_offset && s) { - u8 cprime = m->remap[*(c++)]; - DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1), - ourisprint(*(c-1)) ? *(c-1) : '?', cprime); - s = succ_table[((u32)s << as) + cprime]; - DEBUG_PRINTF("s: %hhu\n", s); + do { + assert(c < min_accel_offset); + if (!s) { + goto exit; + } + + s = doNormal8(m, &c, min_accel_offset, s, 0, mode); if (mode != NO_MATCHES && s >= accept_limit) { if (mode == STOP_AT_MATCH) { DEBUG_PRINTF("match - pausing\n"); *state = s; *c_final = c - 1; - return MO_CONTINUE_MATCHING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } else if (doComplexReport(cb, ctxt, m, s, loc, 0, - &cached_accept_state, - &cached_accept_id) + &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } + + assert(c <= min_accel_offset); + } while (c < min_accel_offset); + + if (c == c_end) { + goto exit; } with_accel: - while (c < c_end && s) { - u8 cprime = m->remap[*(c++)]; - DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1), - ourisprint(*(c-1)) ? *(c-1) : '?', cprime); - s = succ_table[((u32)s << as) + cprime]; - DEBUG_PRINTF("s: %hhu\n", s); + do { + u32 accel_limit = m->accel_limit_8; + assert(c < c_end); - if (s >= accel_limit) { /* accept_limit >= accel_limit */ - if (mode != NO_MATCHES && s >= accept_limit) { - if (mode == STOP_AT_MATCH) { - DEBUG_PRINTF("match - pausing\n"); - *state = s; - *c_final = c - 1; - return MO_CONTINUE_MATCHING; - } + if (!s) { + goto exit; + } - u64a loc = (c - 1) - buf + offAdj + 1; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else if (doComplexReport(cb, ctxt, m, s, loc, 0, - &cached_accept_state, - &cached_accept_id) - == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else if (aux[s].accel_offset) { - DEBUG_PRINTF("skipping\n"); - - const union AccelAux *aaux = (const void *)((const char *)m - + aux[s].accel_offset); - const u8 *c2 = run_accel(aaux, c, c_end); - - if (c2 < min_accel_offset + BAD_ACCEL_DIST) { - min_accel_offset = c2 + BIG_ACCEL_PENALTY; - } else { - min_accel_offset = c2 + SMALL_ACCEL_PENALTY; - } - - if (min_accel_offset >= c_end - ACCEL_MIN_LEN) { - min_accel_offset = c_end; - } - - DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", - c2 - c, min_accel_offset - c2, c_end - c2); - - c = c2; + if (s >= accel_limit && aux[s].accel_offset) { + c = run_mcclellan_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { goto without_accel; } } - } + s = doNormal8(m, &c, c_end, s, 1, mode); + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; + } + } else if (doComplexReport(cb, ctxt, m, s, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); + } while (c < c_end); + +exit: *state = s; if (mode == STOP_AT_MATCH) { *c_final = c_end; } - return MO_CONTINUE_MATCHING; + return MO_ALIVE; } static never_inline -char mcclellanExec8_i_cb(const struct mcclellan *m, u8 *state, const u8 *buf, +char mcclellanExec8_i_cb(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, CALLBACK_OUTPUT); + final_point, CALLBACK_OUTPUT); } static never_inline -char mcclellanExec8_i_sam(const struct mcclellan *m, u8 *state, const u8 *buf, +char mcclellanExec8_i_sam(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, STOP_AT_MATCH); + final_point, STOP_AT_MATCH); } static never_inline -char mcclellanExec8_i_nm(const struct mcclellan *m, u8 *state, const u8 *buf, +char mcclellanExec8_i_nm(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, NO_MATCHES); + final_point, NO_MATCHES); } static really_inline -char mcclellanExec8_i_ni(const struct mcclellan *m, u8 *state, const u8 *buf, +char mcclellanExec8_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point, enum MatchMode mode) { @@ -445,7 +535,7 @@ char mcclellanExec8_i_ni(const struct mcclellan *m, u8 *state, const u8 *buf, } static really_inline -char mcclellanCheckEOD(const struct NFA *nfa, u16 s, u64a offset, +char mcclellanCheckEOD(const struct NFA *nfa, u32 s, u64a offset, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); const struct mstate_aux *aux = get_aux(m, s); @@ -466,7 +556,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, s64a sp; assert(ISALIGNED_N(q->state, 2)); - u16 s = *(u16 *)q->state; + u32 s = *(u16 *)q->state; if (q->report_current) { assert(s); @@ -478,7 +568,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, rv = cb(0, q_cur_offset(q), m->arb_report, context); } else { u32 cached_accept_id = 0; - u16 cached_accept_state = 0; + u32 cached_accept_state = 0; rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, &cached_accept_state, &cached_accept_id); @@ -487,7 +577,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, q->report_current = 0; if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -496,12 +586,6 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *cur_buf = sp < 0 ? hend : buffer; - char report = 1; - if (mode == CALLBACK_OUTPUT) { - /* we are starting inside the history buffer: matches are suppressed */ - report = !(sp < 0); - } - assert(q->cur); if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { DEBUG_PRINTF("this is as far as we go\n"); @@ -528,19 +612,20 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, /* do main buffer region */ const u8 *final_look; - if (mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, single, &final_look, - report ? mode : NO_MATCHES) - == MO_HALT_MATCHING) { - assert(report); + char rv = mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_DEAD) { *(u16 *)q->state = 0; - return 0; + return MO_DEAD; } - if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) { + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + assert(q->cur); - DEBUG_PRINTF("state %hu final_look %zd\n", s, - final_look - cur_buf); + assert(final_look != cur_buf + local_ep); + q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = final_look - cur_buf + 1; /* due to @@ -549,6 +634,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, return MO_MATCHES_PENDING; } + assert(rv == MO_ALIVE); assert(q->cur); if (mode != NO_MATCHES && q->items[q->cur].location > end) { DEBUG_PRINTF("this is as far as we go\n"); @@ -563,7 +649,6 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, if (sp == 0) { cur_buf = buffer; - report = 1; } if (sp != ep) { @@ -582,7 +667,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, case MQE_END: *(u16 *)q->state = s; q->cur++; - return s ? MO_ALIVE : 0; + return s ? MO_ALIVE : MO_DEAD; default: assert(!"invalid queue event"); } @@ -591,18 +676,18 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, } } -static really_inline really_flatten -char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, - const u8 *buffer, size_t length, - NfaCallback cb, void *context, char single) { +static really_inline +char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context, + char single) { assert(n->type == MCCLELLAN_NFA_16); const struct mcclellan *m = getImplNfa(n); - u16 s = m->start_anchored; + u32 s = m->start_anchored; if (mcclellanExec16_i(m, &s, buffer, length, offset, cb, context, single, NULL, CALLBACK_OUTPUT) - == MO_HALT_MATCHING) { - return 0; + == MO_DEAD) { + return s ? MO_ALIVE : MO_DEAD; } const struct mstate_aux *aux = get_aux(m, s); @@ -611,19 +696,19 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL); } - return !!s; + return MO_ALIVE; } static really_inline char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, - const u8 *hend, NfaCallback cb, void *context, - struct mq *q, char single, s64a end, - enum MatchMode mode) { + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { assert(n->type == MCCLELLAN_NFA_8); const struct mcclellan *m = getImplNfa(n); s64a sp; - u8 s = *(u8 *)q->state; + u32 s = *(u8 *)q->state; if (q->report_current) { assert(s); @@ -635,7 +720,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, rv = cb(0, q_cur_offset(q), m->arb_report, context); } else { u32 cached_accept_id = 0; - u16 cached_accept_state = 0; + u32 cached_accept_state = 0; rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, &cached_accept_state, &cached_accept_id); @@ -644,7 +729,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, q->report_current = 0; if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + return MO_DEAD; } } @@ -653,12 +738,6 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *cur_buf = sp < 0 ? hend : buffer; - char report = 1; - if (mode == CALLBACK_OUTPUT) { - /* we are starting inside the history buffer: matches are suppressed */ - report = !(sp < 0); - } - if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { DEBUG_PRINTF("this is as far as we go\n"); q->cur--; @@ -686,17 +765,20 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, } const u8 *final_look; - if (mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp, - cb, context, single, &final_look, - report ? mode : NO_MATCHES) - == MO_HALT_MATCHING) { + char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_HALT_MATCHING) { *(u8 *)q->state = 0; - return 0; + return MO_DEAD; } - if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) { - /* found a match */ - DEBUG_PRINTF("found a match\n"); + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + assert(q->cur); + assert(final_look != cur_buf + local_ep); + q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = final_look - cur_buf + 1; /* due to @@ -705,6 +787,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, return MO_MATCHES_PENDING; } + assert(rv == MO_ALIVE); assert(q->cur); if (mode != NO_MATCHES && q->items[q->cur].location > end) { DEBUG_PRINTF("this is as far as we go\n"); @@ -720,7 +803,6 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, if (sp == 0) { cur_buf = buffer; - report = 1; } if (sp != ep) { @@ -739,7 +821,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, case MQE_END: *(u8 *)q->state = s; q->cur++; - return s ? MO_ALIVE : 0; + return s ? MO_ALIVE : MO_DEAD; default: assert(!"invalid queue event"); } @@ -748,18 +830,18 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, } } -static really_inline really_flatten +static really_inline char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer, size_t length, NfaCallback cb, void *context, char single) { assert(n->type == MCCLELLAN_NFA_8); const struct mcclellan *m = getImplNfa(n); - u8 s = (u8)m->start_anchored; + u32 s = m->start_anchored; if (mcclellanExec8_i(m, &s, buffer, length, offset, cb, context, single, NULL, CALLBACK_OUTPUT) - == MO_HALT_MATCHING) { - return 0; + == MO_DEAD) { + return MO_DEAD; } const struct mstate_aux *aux = get_aux(m, s); @@ -768,7 +850,7 @@ char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer, doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL); } - return s; + return s ? MO_ALIVE : MO_DEAD; } char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer, @@ -827,7 +909,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) { const struct mcclellan *m = getImplNfa(n); NfaCallback cb = q->cb; void *ctxt = q->context; - u8 s = *(u8 *)q->state; + u32 s = *(u8 *)q->state; u8 single = m->flags & MCCLELLAN_FLAG_SINGLE; u64a offset = q_cur_offset(q); assert(q_cur_type(q) == MQE_START); @@ -839,7 +921,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) { cb(0, offset, m->arb_report, ctxt); } else { u32 cached_accept_id = 0; - u16 cached_accept_state = 0; + u32 cached_accept_state = 0; doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, &cached_accept_id); @@ -853,12 +935,12 @@ char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) { const struct mcclellan *m = getImplNfa(n); NfaCallback cb = q->cb; void *ctxt = q->context; - u16 s = *(u16 *)q->state; + u32 s = *(u16 *)q->state; const struct mstate_aux *aux = get_aux(m, s); u8 single = m->flags & MCCLELLAN_FLAG_SINGLE; u64a offset = q_cur_offset(q); assert(q_cur_type(q) == MQE_START); - DEBUG_PRINTF("state %hu\n", s); + DEBUG_PRINTF("state %u\n", s); assert(s); if (aux->accept) { @@ -867,7 +949,7 @@ char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) { cb(0, offset, m->arb_report, ctxt); } else { u32 cached_accept_id = 0; - u16 cached_accept_state = 0; + u32 cached_accept_state = 0; doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, &cached_accept_id); @@ -1041,7 +1123,7 @@ void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state, size_t len, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); - u8 s = top ? m->start_anchored : *(u8 *)state; + u32 s = top ? m->start_anchored : *(u8 *)state; if (m->flags & MCCLELLAN_FLAG_SINGLE) { mcclellanExec8_i(m, &s, buf + start_off, len - start_off, @@ -1059,14 +1141,14 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, size_t len, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); - u16 s = top ? m->start_anchored : unaligned_load_u16(state); + u32 s = top ? m->start_anchored : unaligned_load_u16(state); if (m->flags & MCCLELLAN_FLAG_SINGLE) { mcclellanExec16_i(m, &s, buf + start_off, len - start_off, - start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); + start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); } else { mcclellanExec16_i(m, &s, buf + start_off, len - start_off, - start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); + start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); } unaligned_store_u16(state, s); @@ -1087,13 +1169,15 @@ char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, context); } -char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { +char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, + struct mq *q) { assert(nfa->scratchStateSize == 1); *(u8 *)q->state = 0; return 0; } -char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { +char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, + struct mq *q) { assert(nfa->scratchStateSize == 2); assert(ISALIGNED_N(q->state, 2)); *(u16 *)q->state = 0; diff --git a/src/nfa/mcclellan_common_impl.h b/src/nfa/mcclellan_common_impl.h index e3bcf43e..be130715 100644 --- a/src/nfa/mcclellan_common_impl.h +++ b/src/nfa/mcclellan_common_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,14 +26,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#if defined(__INTEL_COMPILER) || defined(__clang__) || defined(_WIN32) || defined(__GNUC__) && (__GNUC__ < 4) -#define really_flatten -#else -#define really_flatten __attribute__ ((flatten)) -#endif - -#define CASE_MASK 0xdf - enum MatchMode { CALLBACK_OUTPUT, STOP_AT_MATCH, @@ -41,7 +33,7 @@ enum MatchMode { }; static really_inline -const struct mstate_aux *get_aux(const struct mcclellan *m, u16 s) { +const struct mstate_aux *get_aux(const struct mcclellan *m, u32 s) { const char *nfa = (const char *)m - sizeof(struct NFA); const struct mstate_aux *aux = s + (const struct mstate_aux *)(nfa + m->aux_offset); @@ -51,15 +43,15 @@ const struct mstate_aux *get_aux(const struct mcclellan *m, u16 s) { } static really_inline -u16 mcclellanEnableStarts(const struct mcclellan *m, u16 s) { +u32 mcclellanEnableStarts(const struct mcclellan *m, u32 s) { const struct mstate_aux *aux = get_aux(m, s); - DEBUG_PRINTF("enabling starts %hu->%hu\n", s, aux->top); + DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top); return aux->top; } static really_inline -u16 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, +u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, u32 as) { assert(ISALIGNED_N(sherman_state, 16)); @@ -78,15 +70,15 @@ u16 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, if (z) { u32 i = ctz32(z & ~0xf) - 4; - u16 s_out = unaligned_load_u16((const u8 *)sherman_state + u32 s_out = unaligned_load_u16((const u8 *)sherman_state + SHERMAN_STATES_OFFSET(len) + sizeof(u16) * i); - DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu " - "s=%hu\n", i, len, cprime, s_out); + DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i, + len, cprime, s_out); return s_out; } } - u16 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); - return succ_table[((u32)daddy << as) + cprime]; + u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); + return succ_table[(daddy << as) + cprime]; } diff --git a/src/nfa/mcclellan_internal.h b/src/nfa/mcclellan_internal.h index aad296c4..549bccf5 100644 --- a/src/nfa/mcclellan_internal.h +++ b/src/nfa/mcclellan_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -71,17 +71,17 @@ struct mcclellan { u16 start_floating; /**< floating start state */ u32 aux_offset; /**< offset of the aux structures relative to the start of * the nfa structure */ - u32 sherman_offset; /**< offset of to array of sherman state offsets - * the state_info structures relative to the start of the - * nfa structure */ - u32 sherman_end; /**< offset of the end of the state_info structures relative - * to the start of the nfa structure */ + u32 sherman_offset; /**< offset of array of sherman state offsets the + * state_info structures relative to the start of the + * nfa structure */ + u32 sherman_end; /**< offset of the end of the state_info structures + * relative to the start of the nfa structure */ u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ u16 accept_limit_8; /**< 8 bit, lowest accept state */ u16 sherman_limit; /**< lowest sherman state */ u8 alphaShift; u8 flags; - u8 has_accel; /**< 1 iff there are any accel planes */ + u8 has_accel; /**< 1 iff there are any accel plans */ u8 remap[256]; /**< remaps characters to a smaller alphabet */ ReportID arb_report; /**< one of the accepts that this dfa may raise */ u32 accel_offset; /**< offset of the accel structures from start of NFA */ @@ -90,8 +90,8 @@ struct mcclellan { static really_inline const char *findShermanState(UNUSED const struct mcclellan *m, - const char *sherman_base_offset, u16 sherman_base, - u16 s) { + const char *sherman_base_offset, u32 sherman_base, + u32 s) { const char *rv = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); assert(rv < (const char *)m + m->length - sizeof(struct NFA)); @@ -102,7 +102,7 @@ const char *findShermanState(UNUSED const struct mcclellan *m, static really_inline char *findMutableShermanState(char *sherman_base_offset, u16 sherman_base, - u16 s) { + u32 s) { return sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); } diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 09006d5b..7a73c9d4 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -415,9 +415,9 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, : info.raw.start_floating); } -/* returns non-zero on error */ +/* returns false on error */ static -int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { +bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { info.states[0].impl_id = 0; /* dead is always 0 */ vector norm; @@ -426,7 +426,7 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { if (info.size() > (1 << 16)) { DEBUG_PRINTF("too many states\n"); *sherman_base = 0; - return 1; + return false; } for (u32 i = 1; i < info.size(); i++) { @@ -452,7 +452,7 @@ int allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { /* Check to see if we haven't over allocated our states */ DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman, (dstate_id_t)(next_sherman & STATE_MASK)); - return (next_sherman - 1) != ((next_sherman - 1) & STATE_MASK); + return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK); } static @@ -470,7 +470,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, assert(alphaShift <= 8); u16 count_real_states; - if (allocateFSN16(info, &count_real_states)) { + if (!allocateFSN16(info, &count_real_states)) { DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", info.size()); return nullptr; diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index e6f548a7..8d8dfb19 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -32,9 +32,7 @@ #include "accel_dfa_build_strat.h" #include "rdfa.h" #include "ue2common.h" -#include "util/accel_scheme.h" #include "util/alloc.h" -#include "util/charreach.h" #include "util/ue2_containers.h" #include diff --git a/src/nfa/mcclellandump.cpp b/src/nfa/mcclellandump.cpp index dcbb0915..9e04ad63 100644 --- a/src/nfa/mcclellandump.cpp +++ b/src/nfa/mcclellandump.cpp @@ -39,6 +39,7 @@ #include "ue2common.h" #include "util/charreach.h" #include "util/dump_charclass.h" +#include "util/dump_util.h" #include "util/unaligned.h" #include @@ -267,8 +268,8 @@ void dumpDotPreambleDfa(FILE *f) { fprintf(f, "0 [style=invis];\n"); } -void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f, - UNUSED const string &base) { +static +void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) { assert(nfa->type == MCCLELLAN_NFA_16); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -287,8 +288,8 @@ void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f, fprintf(f, "}\n"); } -void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f, - UNUSED const string &base) { +static +void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f) { assert(nfa->type == MCCLELLAN_NFA_8); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -397,6 +398,7 @@ void dumpTransitions(FILE *f, const NFA *nfa, const mcclellan *m, } } +static void nfaExecMcClellan16_dumpText(const NFA *nfa, FILE *f) { assert(nfa->type == MCCLELLAN_NFA_16); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -417,6 +419,7 @@ void nfaExecMcClellan16_dumpText(const NFA *nfa, FILE *f) { dumpTextReverse(nfa, f); } +static void nfaExecMcClellan8_dumpText(const NFA *nfa, FILE *f) { assert(nfa->type == MCCLELLAN_NFA_8); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -437,4 +440,24 @@ void nfaExecMcClellan8_dumpText(const NFA *nfa, FILE *f) { dumpTextReverse(nfa, f); } +void nfaExecMcClellan16_dump(const NFA *nfa, const string &base) { + assert(nfa->type == MCCLELLAN_NFA_16); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + nfaExecMcClellan16_dumpText(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + nfaExecMcClellan16_dumpDot(nfa, f); + fclose(f); +} + +void nfaExecMcClellan8_dump(const NFA *nfa, const string &base) { + assert(nfa->type == MCCLELLAN_NFA_8); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + nfaExecMcClellan8_dumpText(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + nfaExecMcClellan8_dumpDot(nfa, f); + fclose(f); +} + } // namespace ue2 diff --git a/src/nfa/mcclellandump.h b/src/nfa/mcclellandump.h index efa61544..5b63a206 100644 --- a/src/nfa/mcclellandump.h +++ b/src/nfa/mcclellandump.h @@ -43,14 +43,10 @@ union AccelAux; namespace ue2 { -void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecMcClellan8_dumpText(const struct NFA *nfa, FILE *file); -void nfaExecMcClellan16_dumpText(const struct NFA *nfa, FILE *file); +void nfaExecMcClellan8_dump(const struct NFA *nfa, const std::string &base); +void nfaExecMcClellan16_dump(const struct NFA *nfa, const std::string &base); -/* These functions are shared with the Haig dump code. */ +/* These functions are shared with the Gough dump code. */ const mstate_aux *getAux(const NFA *n, dstate_id_t i); void describeEdge(FILE *f, const u16 *t, u16 i); diff --git a/src/nfa/mcsheng.c b/src/nfa/mcsheng.c new file mode 100644 index 00000000..98db3f0a --- /dev/null +++ b/src/nfa/mcsheng.c @@ -0,0 +1,1406 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcsheng.h" + +#include "accel.h" +#include "mcsheng_internal.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "util/bitutils.h" +#include "util/compare.h" +#include "util/simd_utils.h" +#include "ue2common.h" + +enum MatchMode { + CALLBACK_OUTPUT, + STOP_AT_MATCH, + NO_MATCHES +}; + +static really_inline +const struct mstate_aux *get_aux(const struct mcsheng *m, u32 s) { + const char *nfa = (const char *)m - sizeof(struct NFA); + const struct mstate_aux *aux + = s + (const struct mstate_aux *)(nfa + m->aux_offset); + + assert(ISALIGNED(aux)); + return aux; +} + +static really_inline +u32 mcshengEnableStarts(const struct mcsheng *m, u32 s) { + const struct mstate_aux *aux = get_aux(m, s); + + DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top); + return aux->top; +} + +static really_inline +u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, + u32 as) { + assert(ISALIGNED_N(sherman_state, 16)); + + u8 len = *(const u8 *)(sherman_state + SHERMAN_LEN_OFFSET); + + if (len) { + m128 ss_char = load128(sherman_state); + m128 cur_char = set16x8(cprime); + + u32 z = movemask128(eq128(ss_char, cur_char)); + + /* remove header cruft: type 1, len 1, daddy 2*/ + z &= ~0xf; + z &= (1U << (len + 4)) - 1; + + if (z) { + u32 i = ctz32(z & ~0xf) - 4; + + u32 s_out = unaligned_load_u16((const u8 *)sherman_state + + SHERMAN_STATES_OFFSET(len) + + sizeof(u16) * i); + DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i, + len, cprime, s_out); + return s_out; + } + } + + u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); + return succ_table[(daddy << as) + cprime]; +} + +static really_inline +char doComplexReport(NfaCallback cb, void *ctxt, const struct mcsheng *m, + u32 s, u64a loc, char eod, u32 *cached_accept_state, + u32 *cached_accept_id) { + DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n", + s & STATE_MASK, loc, eod); + + if (!eod && s == *cached_accept_state) { + if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + const struct mstate_aux *aux = get_aux(m, s); + size_t offset = eod ? aux->accept_eod : aux->accept; + + assert(offset); + const struct report_list *rl + = (const void *)((const char *)m + offset - sizeof(struct NFA)); + assert(ISALIGNED(rl)); + + DEBUG_PRINTF("report list size %u\n", rl->count); + u32 count = rl->count; + + if (!eod && count == 1) { + *cached_accept_state = s; + *cached_accept_id = rl->report[0]; + + DEBUG_PRINTF("reporting %u\n", rl->report[0]); + if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + for (u32 i = 0; i < count; i++) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + } + + return MO_CONTINUE_MATCHING; /* continue execution */ +} + +#define SHENG_CHUNK 8 + +static really_inline +u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, + const u8 *hard_c_end, u32 s_in, char do_accel) { + assert(s_in < m->sheng_end); + assert(s_in); /* should not already be dead */ + assert(soft_c_end <= hard_c_end); + DEBUG_PRINTF("s_in = %u (adjusted %u)\n", s_in, s_in - 1); + m128 s = set16x8(s_in - 1); + const u8 *c = *c_inout; + const u8 *c_end = hard_c_end - SHENG_CHUNK + 1; + if (!do_accel) { + c_end = MIN(soft_c_end, hard_c_end - SHENG_CHUNK + 1); + } + const m128 *masks = m->sheng_masks; + u8 sheng_limit = m->sheng_end - 1; /* - 1: no dead state */ + u8 sheng_stop_limit = do_accel ? m->sheng_accel_limit : sheng_limit; + + /* When we use movd to get a u32 containing our state, it will have 4 lanes + * all duplicating the state. We can create versions of our limits with 4 + * copies to directly compare against, this prevents us generating code to + * extract a single copy of the state from the u32 for checking. */ + u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101; + +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) + u32 sheng_limit_x4 = sheng_limit * 0x01010101; + m128 simd_stop_limit = set4x32(sheng_stop_limit_x4); + m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit); + DEBUG_PRINTF("end %hu, accel %hhu --> limit %hhu\n", sheng_limit, + m->sheng_accel_limit, sheng_stop_limit); +#endif + +#define SHENG_SINGLE_ITER do { \ + m128 shuffle_mask = masks[*(c++)]; \ + s = pshufb(shuffle_mask, s); \ + u32 s_gpr_x4 = movd(s); /* convert to u8 */ \ + DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr); \ + if (s_gpr_x4 >= sheng_stop_limit_x4) { \ + s_gpr = s_gpr_x4; \ + goto exit; \ + } \ + } while (0) + + u8 s_gpr; + while (c < c_end) { +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) + /* This version uses pext for efficently bitbashing out scaled + * versions of the bytes to process from a u64a */ + + u64a data_bytes = unaligned_load_u64a(c); + u64a cc0 = pdep64(data_bytes, 0xff0); /* extract scaled low byte */ + data_bytes &= ~0xffULL; /* clear low bits for scale space */ + m128 shuffle_mask0 = load128((const char *)masks + cc0); + s = pshufb(shuffle_mask0, s); + m128 s_max = s; + m128 s_max0 = s_max; + DEBUG_PRINTF("c %02llx --> s %hhu\n", cc0 >> 4, movd(s)); + +#define SHENG_SINGLE_UNROLL_ITER(iter) \ + assert(iter); \ + u64a cc##iter = pext64(data_bytes, mcsheng_pext_mask[iter]); \ + assert(cc##iter == (u64a)c[iter] << 4); \ + m128 shuffle_mask##iter = load128((const char *)masks + cc##iter); \ + s = pshufb(shuffle_mask##iter, s); \ + if (do_accel && iter == 7) { \ + /* in the final iteration we also have to check against accel */ \ + m128 s_temp = sadd_u8_m128(s, accel_delta); \ + s_max = max_u8_m128(s_max, s_temp); \ + } else { \ + s_max = max_u8_m128(s_max, s); \ + } \ + m128 s_max##iter = s_max; \ + DEBUG_PRINTF("c %02llx --> s %hhu max %hhu\n", cc##iter >> 4, \ + movd(s), movd(s_max)); + + SHENG_SINGLE_UNROLL_ITER(1); + + SHENG_SINGLE_UNROLL_ITER(2); + SHENG_SINGLE_UNROLL_ITER(3); + + SHENG_SINGLE_UNROLL_ITER(4); + SHENG_SINGLE_UNROLL_ITER(5); + + SHENG_SINGLE_UNROLL_ITER(6); + SHENG_SINGLE_UNROLL_ITER(7); + + if (movd(s_max7) >= sheng_limit_x4) { + DEBUG_PRINTF("exit found\n"); + + /* Explicitly check the last byte as it is more likely as it also + * checks for acceleration. */ + if (movd(s_max6) < sheng_limit_x4) { + c += SHENG_CHUNK; + s_gpr = movq(s); + assert(s_gpr >= sheng_stop_limit); + goto exit; + } + + /* use shift-xor to create a register containing all of the max + * values */ + m128 blended = rshift64_m128(s_max0, 56); + blended = xor128(blended, rshift64_m128(s_max1, 48)); + blended = xor128(blended, rshift64_m128(s_max2, 40)); + blended = xor128(blended, rshift64_m128(s_max3, 32)); + blended = xor128(blended, rshift64_m128(s_max4, 24)); + blended = xor128(blended, rshift64_m128(s_max5, 16)); + blended = xor128(blended, rshift64_m128(s_max6, 8)); + blended = xor128(blended, s); + blended = xor128(blended, rshift64_m128(blended, 8)); + DEBUG_PRINTF("blended %016llx\n", movq(blended)); + + m128 final = min_u8_m128(blended, simd_stop_limit); + m128 cmp = sub_u8_m128(final, simd_stop_limit); + u64a stops = ~movemask128(cmp); + assert(stops); + u32 earliest = ctz32(stops); + DEBUG_PRINTF("stops %02llx, earliest %u\n", stops, earliest); + assert(earliest < 8); + c += earliest + 1; + s_gpr = movq(blended) >> (earliest * 8); + assert(s_gpr >= sheng_stop_limit); + goto exit; + } else { + c += SHENG_CHUNK; + } +#else + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; +#endif + } + + assert(c_end - c < SHENG_CHUNK); + if (c < soft_c_end) { + assert(soft_c_end - c < SHENG_CHUNK); + switch (soft_c_end - c) { + case 7: + SHENG_SINGLE_ITER; + case 6: + SHENG_SINGLE_ITER; + case 5: + SHENG_SINGLE_ITER; + case 4: + SHENG_SINGLE_ITER; + case 3: + SHENG_SINGLE_ITER; + case 2: + SHENG_SINGLE_ITER; + case 1: + SHENG_SINGLE_ITER; + } + } + + assert(c >= soft_c_end); + + s_gpr = movd(s); +exit: + assert(c <= hard_c_end); + DEBUG_PRINTF("%zu from end; s %hhu\n", c_end - c, s_gpr); + assert(c >= soft_c_end || s_gpr >= sheng_stop_limit); + /* undo state adjustment to match mcclellan view */ + if (s_gpr == sheng_limit) { + s_gpr = 0; + } else if (s_gpr < sheng_limit) { + s_gpr++; + } + + *c_inout = c; + return s_gpr; +} + +static really_inline +const char *findShermanState(UNUSED const struct mcsheng *m, + const char *sherman_base_offset, u32 sherman_base, + u32 s) { + const char *rv + = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET); + assert(type == SHERMAN_STATE); + return rv; +} + +static really_inline +const u8 *run_mcsheng_accel(const struct mcsheng *m, + const struct mstate_aux *aux, u32 s, + const u8 **min_accel_offset, + const u8 *c, const u8 *c_end) { + DEBUG_PRINTF("skipping\n"); + u32 accel_offset = aux[s].accel_offset; + + assert(aux[s].accel_offset); + assert(accel_offset >= m->aux_offset); + assert(!m->sherman_offset || accel_offset < m->sherman_offset); + + const union AccelAux *aaux = (const void *)((const char *)m + accel_offset); + const u8 *c2 = run_accel(aaux, c, c_end); + + if (c2 < *min_accel_offset + BAD_ACCEL_DIST) { + *min_accel_offset = c2 + BIG_ACCEL_PENALTY; + } else { + *min_accel_offset = c2 + SMALL_ACCEL_PENALTY; + } + + if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) { + *min_accel_offset = c_end; + } + + DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", + c2 - c, *min_accel_offset - c2, c_end - c2); + + return c2; +} + +static really_inline +u32 doNormal16(const struct mcsheng *m, const u8 **c_inout, const u8 *end, + u32 s, char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + + const u16 *succ_table + = (const u16 *)((const char *)m + sizeof(struct mcsheng)); + assert(ISALIGNED_N(succ_table, 2)); + u32 sheng_end = m->sheng_end; + u32 sherman_base = m->sherman_limit; + const char *sherman_base_offset + = (const char *)m - sizeof(struct NFA) + m->sherman_offset; + u32 as = m->alphaShift; + + /* Adjust start of succ table so we can index into using state id (rather + * than adjust to normal id). As we will not be processing states with low + * state ids, we will not be accessing data before the succ table. Note: due + * to the size of the sheng tables, the succ_table pointer will still be + * inside the engine.*/ + succ_table -= sheng_end << as; + + s &= STATE_MASK; + + while (c < end && s >= sheng_end) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c, + ourisprint(*c) ? *c : '?', cprime, s); + if (s < sherman_base) { + DEBUG_PRINTF("doing normal\n"); + assert(s < m->state_count); + s = succ_table[(s << as) + cprime]; + } else { + const char *sherman_state + = findShermanState(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman (%u)\n", s); + s = doSherman16(sherman_state, cprime, succ_table, as); + } + + DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); + c++; + + if (do_accel && (s & ACCEL_FLAG)) { + break; + } + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + break; + } + + s &= STATE_MASK; + } + + *c_inout = c; + return s; +} + +static really_inline +char mcshengExec16_i(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **c_final, enum MatchMode mode) { + assert(ISALIGNED_N(state, 2)); + if (!len) { + if (mode == STOP_AT_MATCH) { + *c_final = buf; + } + return MO_ALIVE; + } + + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + const u8 sheng_end = m->sheng_end; + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset + - sizeof(struct NFA)); + + s &= STATE_MASK; + + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + DEBUG_PRINTF("s: %u, len %zu\n", s, len); + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: + do { + assert(c < min_accel_offset); + int do_accept; + if (!s) { + goto exit; + } else if (s < sheng_end) { + s = doSheng(m, &c, min_accel_offset, c_end, s, 0); + do_accept = mode != NO_MATCHES && get_aux(m, s)->accept; + } else { + s = doNormal16(m, &c, min_accel_offset, s, 0, mode); + + do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); + } + + if (do_accept) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; /* termination requested */ + } + } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ + } while (c < min_accel_offset); + + if (c == c_end) { + goto exit; + } + +with_accel: + do { + assert(c < c_end); + int do_accept; + + if (!s) { + goto exit; + } else if (s < sheng_end) { + if (s > m->sheng_accel_limit) { + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doSheng(m, &c, c_end, c_end, s, 1); + do_accept = mode != NO_MATCHES && get_aux(m, s)->accept; + } else { + if (s & ACCEL_FLAG) { + DEBUG_PRINTF("skipping\n"); + s &= STATE_MASK; + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + + s = doNormal16(m, &c, c_end, s, 1, mode); + do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); + } + + if (do_accept) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; /* termination requested */ + } + } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); + } while (c < c_end); + +exit: + s &= STATE_MASK; + + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } + *state = s; + + return MO_ALIVE; +} + +static never_inline +char mcshengExec16_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, CALLBACK_OUTPUT); +} + +static never_inline +char mcshengExec16_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, STOP_AT_MATCH); +} + +static never_inline +char mcshengExec16_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, NO_MATCHES); +} + +static really_inline +char mcshengExec16_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point, + enum MatchMode mode) { + if (mode == CALLBACK_OUTPUT) { + return mcshengExec16_i_cb(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else if (mode == STOP_AT_MATCH) { + return mcshengExec16_i_sam(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else { + assert (mode == NO_MATCHES); + return mcshengExec16_i_nm(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } +} + +static really_inline +u32 doNormal8(const struct mcsheng *m, const u8 **c_inout, const u8 *end, u32 s, + char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + u32 sheng_end = m->sheng_end; + u32 accel_limit = m->accel_limit_8; + u32 accept_limit = m->accept_limit_8; + + const u32 as = m->alphaShift; + const u8 *succ_table = (const u8 *)((const char *)m + + sizeof(struct mcsheng)); + /* Adjust start of succ table so we can index into using state id (rather + * than adjust to normal id). As we will not be processing states with low + * state ids, we will not be accessing data before the succ table. Note: due + * to the size of the sheng tables, the succ_table pointer will still be + * inside the engine.*/ + succ_table -= sheng_end << as; + + assert(s >= sheng_end); + + while (c < end && s >= sheng_end) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, + ourisprint(*c) ? *c : '?', cprime); + s = succ_table[(s << as) + cprime]; + + DEBUG_PRINTF("s: %u\n", s); + c++; + if (do_accel) { + if (s >= accel_limit) { + break; + } + } else { + if (mode != NO_MATCHES && s >= accept_limit) { + break; + } + } + } + *c_inout = c; + return s; +} + +static really_inline +char mcshengExec8_i(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **c_final, enum MatchMode mode) { + if (!len) { + *c_final = buf; + return MO_ALIVE; + } + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + const u8 sheng_end = m->sheng_end; + + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset + - sizeof(struct NFA)); + u32 accept_limit = m->accept_limit_8; + + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit); + + DEBUG_PRINTF("s: %u, len %zu\n", s, len); + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: + do { + assert(c < min_accel_offset); + if (!s) { + goto exit; + } else if (s < sheng_end) { + s = doSheng(m, &c, min_accel_offset, c_end, s, 0); + } else { + s = doNormal8(m, &c, min_accel_offset, s, 0, mode); + assert(c <= min_accel_offset); + } + + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; + } + } else if (doComplexReport(cb, ctxt, m, s, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ + } while (c < min_accel_offset); + + if (c == c_end) { + goto exit; + } + +with_accel: + do { + u32 accel_limit = m->accel_limit_8; + + assert(c < c_end); + if (!s) { + goto exit; + } else if (s < sheng_end) { + if (s > m->sheng_accel_limit) { + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doSheng(m, &c, c_end, c_end, s, 1); + } else { + if (s >= accel_limit && aux[s].accel_offset) { + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doNormal8(m, &c, c_end, s, 1, mode); + } + + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; + } + } else if (doComplexReport(cb, ctxt, m, s, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); + } while (c < c_end); + +exit: + *state = s; + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } + return MO_ALIVE; +} + +static never_inline +char mcshengExec8_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, CALLBACK_OUTPUT); +} + +static never_inline +char mcshengExec8_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, STOP_AT_MATCH); +} + +static never_inline +char mcshengExec8_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, NO_MATCHES); +} + +static really_inline +char mcshengExec8_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point, + enum MatchMode mode) { + if (mode == CALLBACK_OUTPUT) { + return mcshengExec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single, + final_point); + } else if (mode == STOP_AT_MATCH) { + return mcshengExec8_i_sam(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else { + assert(mode == NO_MATCHES); + return mcshengExec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single, + final_point); + } +} + +static really_inline +char mcshengCheckEOD(const struct NFA *nfa, u32 s, u64a offset, + NfaCallback cb, void *ctxt) { + const struct mcsheng *m = getImplNfa(nfa); + const struct mstate_aux *aux = get_aux(m, s); + + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; + } + return doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL); +} + +static really_inline +char nfaExecMcSheng16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + s64a sp; + + assert(ISALIGNED_N(q->state, 2)); + u32 s = *(u16 *)q->state; + + if (q->report_current) { + assert(s); + assert(get_aux(m, s)->accept); + + int rv; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + rv = cb(0, q_cur_offset(q), m->arb_report, context); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id); + } + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + /* do main buffer region */ + const u8 *final_look; + char rv = mcshengExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_DEAD) { + *(u16 *)q->state = 0; + return MO_DEAD; + } + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + + assert(q->cur); + assert(final_look != cur_buf + local_ep); + + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u16 *)q->state = s; + return MO_MATCHES_PENDING; + } + + assert(rv == MO_ALIVE); + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(sp + offset || !s); + if (sp + offset == 0) { + s = m->start_anchored; + break; + } + s = mcshengEnableStarts(m, s); + break; + case MQE_END: + *(u16 *)q->state = s; + q->cur++; + return s ? MO_ALIVE : MO_DEAD; + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + +static really_inline +char nfaExecMcSheng8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + s64a sp; + + u32 s = *(u8 *)q->state; + + if (q->report_current) { + assert(s); + assert(s >= m->accept_limit_8); + + int rv; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + rv = cb(0, q_cur_offset(q), m->arb_report, context); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id); + } + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" : + q->items[q->cur].type == MQE_END ? "END" : "???", + q->items[q->cur].location + offset); + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + const u8 *final_look; + char rv = mcshengExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_HALT_MATCHING) { + *(u8 *)q->state = 0; + return MO_DEAD; + } + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + + assert(q->cur); + assert(final_look != cur_buf + local_ep); + + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u8 *)q->state = s; + return MO_MATCHES_PENDING; + } + + assert(rv == MO_ALIVE); + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + assert(q->cur); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(sp + offset || !s); + if (sp + offset == 0) { + s = (u8)m->start_anchored; + break; + } + s = mcshengEnableStarts(m, s); + break; + case MQE_END: + *(u8 *)q->state = s; + q->cur++; + return s ? MO_ALIVE : MO_DEAD; + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + +char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + CALLBACK_OUTPUT); +} + +char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + CALLBACK_OUTPUT); +} + +char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q) { + const struct mcsheng *m = getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u32 s = *(u8 *)q->state; + u8 single = m->flags & MCSHENG_FLAG_SINGLE; + u64a offset = q_cur_offset(q); + assert(q_cur_type(q) == MQE_START); + assert(s); + + if (s >= m->accept_limit_8) { + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + cb(0, offset, m->arb_report, ctxt); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, + &cached_accept_id); + } + } + + return 0; +} + +char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q) { + const struct mcsheng *m = getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u32 s = *(u16 *)q->state; + const struct mstate_aux *aux = get_aux(m, s); + u8 single = m->flags & MCSHENG_FLAG_SINGLE; + u64a offset = q_cur_offset(q); + assert(q_cur_type(q) == MQE_START); + DEBUG_PRINTF("state %u\n", s); + assert(s); + + if (aux->accept) { + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + cb(0, offset, m->arb_report, ctxt); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, + &cached_accept_id); + } + } + + return 0; +} + +static +char mcshengHasAccept(const struct mcsheng *m, const struct mstate_aux *aux, + ReportID report) { + assert(m && aux); + + if (!aux->accept) { + return 0; + } + + const struct report_list *rl = (const struct report_list *) + ((const char *)m + aux->accept - sizeof(struct NFA)); + assert(ISALIGNED_N(rl, 4)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + + for (u32 i = 0; i < rl->count; i++) { + if (rl->report[i] == report) { + return 1; + } + } + + return 0; +} + +char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + + return mcshengHasAccept(m, get_aux(m, s), report); +} + +char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + + return !!get_aux(m, s)->accept; +} + +char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + + return mcshengHasAccept(m, get_aux(m, s), report); +} + +char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + + return !!get_aux(m, s)->accept; +} + +char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + STOP_AT_MATCH); +} + +char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + STOP_AT_MATCH); +} + +char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */, + NO_MATCHES); + if (rv && nfaExecMcSheng8_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */, + NO_MATCHES); + + if (rv && nfaExecMcSheng16_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecMcSheng8_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct mcsheng *m = getImplNfa(nfa); + u8 s = offset ? m->start_floating : m->start_anchored; + if (s) { + *(u8 *)state = s; + return 1; + } + return 0; +} + +char nfaExecMcSheng16_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct mcsheng *m = getImplNfa(nfa); + u16 s = offset ? m->start_floating : m->start_anchored; + if (s) { + unaligned_store_u16(state, s); + return 1; + } + return 0; +} + +char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { + return mcshengCheckEOD(nfa, *(const u8 *)state, offset, callback, + context); +} + +char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { + assert(ISALIGNED_N(state, 2)); + return mcshengCheckEOD(nfa, *(const u16 *)state, offset, callback, + context); +} + +char nfaExecMcSheng8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 1); + *(u8 *)q->state = 0; + return 0; +} + +char nfaExecMcSheng16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 2); + assert(ISALIGNED_N(q->state, 2)); + *(u16 *)q->state = 0; + return 0; +} + +char nfaExecMcSheng8_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecMcSheng8_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecMcSheng16_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, + UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 2); + assert(nfa->streamStateSize == 2); + assert(ISALIGNED_N(src, 2)); + unaligned_store_u16(dest, *(const u16 *)(src)); + return 0; +} + +char nfaExecMcSheng16_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 2); + assert(nfa->streamStateSize == 2); + assert(ISALIGNED_N(dest, 2)); + *(u16 *)dest = unaligned_load_u16(src); + return 0; +} diff --git a/src/nfa/mcsheng.h b/src/nfa/mcsheng.h new file mode 100644 index 00000000..19fd6961 --- /dev/null +++ b/src/nfa/mcsheng.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENG_H +#define MCSHENG_H + +#include "callback.h" +#include "ue2common.h" + +struct mq; +struct NFA; + +/* 8-bit Sheng-McClellan hybrid */ + +char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecMcSheng8_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMcSheng8_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMcSheng8_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecMcSheng8_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecMcSheng8_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcSheng8_zombie_status NFA_API_ZOMBIE_NO_IMPL + +/* 16-bit Sheng-McClellan hybrid */ + +char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecMcSheng16_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMcSheng16_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMcSheng16_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecMcSheng16_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL + +#endif diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp new file mode 100644 index 00000000..7b4e58ab --- /dev/null +++ b/src/nfa/mcsheng_compile.cpp @@ -0,0 +1,1070 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcsheng_compile.h" + +#include "accel.h" +#include "accelcompile.h" +#include "grey.h" +#include "mcclellancompile.h" +#include "mcclellancompile_util.h" +#include "mcsheng_internal.h" +#include "nfa_internal.h" +#include "rdfa_graph.h" +#include "shufticompile.h" +#include "trufflecompile.h" +#include "ue2common.h" +#include "util/alloc.h" +#include "util/bitutils.h" +#include "util/charreach.h" +#include "util/compare.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/graph.h" +#include "util/graph_range.h" +#include "util/make_unique.h" +#include "util/order_check.h" +#include "util/report_manager.h" +#include "util/ue2_containers.h" +#include "util/unaligned.h" +#include "util/verify_types.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace std; +using boost::adaptors::map_keys; + +namespace ue2 { + +namespace /* anon */ { + +#define MIN_SHENG_SIZE 6 +#define INVALID_SHENG_ID 255 + +struct dstate_extra { + u16 daddytaken = 0; + bool shermanState = false; + bool sheng_succ = false; + u8 sheng_id = INVALID_SHENG_ID; +}; + +struct dfa_info { + accel_dfa_build_strat &strat; + raw_dfa &raw; + vector &states; + vector extra; + const u16 alpha_size; /* including special symbols */ + const array &alpha_remap; + vector rev_alpha; + const u16 impl_alpha_size; + + u8 getAlphaShift() const; + + explicit dfa_info(accel_dfa_build_strat &s) + : strat(s), + raw(s.get_raw()), + states(raw.states), + extra(raw.states.size()), + alpha_size(raw.alpha_size), + alpha_remap(raw.alpha_remap), + impl_alpha_size(raw.getImplAlphaSize()) { + rev_alpha.resize(impl_alpha_size); + for (u32 i = 0; i < N_CHARS; i++) { + rev_alpha[alpha_remap[i]].set(i); + } + } + + dstate_id_t implId(dstate_id_t raw_id) const { + return states[raw_id].impl_id; + } + + bool is_sherman(dstate_id_t raw_id) const { + return extra[raw_id].shermanState; + } + + bool is_sheng(dstate_id_t raw_id) const { + return extra[raw_id].sheng_id != INVALID_SHENG_ID; + } + + bool is_sheng_succ(dstate_id_t raw_id) const { + return extra[raw_id].sheng_succ; + } + + /* states which use the normal transition/successor table */ + bool is_normal(dstate_id_t raw_id) const { + return raw_id != DEAD_STATE && !is_sheng(raw_id) && !is_sherman(raw_id); + } + size_t size(void) const { return states.size(); } +}; + +u8 dfa_info::getAlphaShift() const { + if (impl_alpha_size < 2) { + return 1; + } else { + /* log2 round up */ + return 32 - clz32(impl_alpha_size - 1); + } +} + +} // namespace + +static +mstate_aux *getAux(NFA *n, dstate_id_t i) { + mcsheng *m = (mcsheng *)getMutableImplNfa(n); + mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); + + mstate_aux *aux = aux_base + i; + assert((const char *)aux < (const char *)n + m->length); + return aux; +} + +static +void createShuffleMasks(mcsheng *m, const dfa_info &info, + dstate_id_t sheng_end, + const map &accel_escape_info) { + DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end); + assert(sheng_end > DEAD_STATE + 1); + assert(sheng_end <= sizeof(m128) + 1); + vector> masks; + masks.resize(info.alpha_size); + /* -1 to avoid wasting a slot as we do not include dead state */ + vector raw_ids; + raw_ids.resize(sheng_end - 1); + for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) { + assert(info.implId(s)); /* should not map to DEAD_STATE */ + if (info.is_sheng(s)) { + raw_ids[info.extra[s].sheng_id] = s; + } + } + for (u32 i = 0; i < info.alpha_size; i++) { + if (i == info.alpha_remap[TOP]) { + continue; + } + auto &mask = masks[i]; + assert(sizeof(mask) == sizeof(m128)); + mask.fill(0); + + for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) { + dstate_id_t raw_id = raw_ids[sheng_id]; + dstate_id_t next_id = info.implId(info.states[raw_id].next[i]); + if (next_id == DEAD_STATE) { + next_id = sheng_end - 1; + } else if (next_id < sheng_end) { + next_id--; + } + DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id); + mask[sheng_id] = verify_u8(next_id); + } + } + for (u32 i = 0; i < N_CHARS; i++) { + assert(info.alpha_remap[i] != info.alpha_remap[TOP]); + memcpy((u8 *)&m->sheng_masks[i], + (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m128)); + } + m->sheng_end = sheng_end; + m->sheng_accel_limit = sheng_end - 1; + + for (dstate_id_t s : raw_ids) { + if (contains(accel_escape_info, s)) { + LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id); + } + } +} + +static +void populateBasicInfo(size_t state_size, const dfa_info &info, + u32 total_size, u32 aux_offset, u32 accel_offset, + u32 accel_count, ReportID arb, bool single, NFA *nfa) { + assert(state_size == sizeof(u16) || state_size == sizeof(u8)); + + nfa->length = total_size; + nfa->nPositions = info.states.size(); + + nfa->scratchStateSize = verify_u32(state_size); + nfa->streamStateSize = verify_u32(state_size); + + if (state_size == sizeof(u8)) { + nfa->type = MCSHENG_NFA_8; + } else { + nfa->type = MCSHENG_NFA_16; + } + + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + for (u32 i = 0; i < 256; i++) { + m->remap[i] = verify_u8(info.alpha_remap[i]); + } + m->alphaShift = info.getAlphaShift(); + m->length = total_size; + m->aux_offset = aux_offset; + m->accel_offset = accel_offset; + m->arb_report = arb; + m->state_count = verify_u16(info.size()); + m->start_anchored = info.implId(info.raw.start_anchored); + m->start_floating = info.implId(info.raw.start_floating); + m->has_accel = accel_count ? 1 : 0; + + if (single) { + m->flags |= MCSHENG_FLAG_SINGLE; + } +} + +static +size_t calcShermanRegionSize(const dfa_info &info) { + size_t rv = 0; + + for (size_t i = 0; i < info.size(); i++) { + if (info.is_sherman(i)) { + rv += SHERMAN_FIXED_SIZE; + } + } + + return ROUNDUP_16(rv); +} + +static +void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, + const vector &reports, const vector &reports_eod, + const vector &reportOffsets) { + const dstate &raw_state = info.states[i]; + aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]]; + aux->accept_eod = raw_state.reports_eod.empty() ? 0 + : reportOffsets[reports_eod[i]]; + aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]] + : info.raw.start_floating); +} + +/* returns false on error */ +static +bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end, + dstate_id_t *sherman_base) { + info.states[0].impl_id = 0; /* dead is always 0 */ + + vector norm; + vector sherm; + vector norm_sheng_succ; + vector sherm_sheng_succ; + + if (info.size() > (1 << 16)) { + DEBUG_PRINTF("too many states\n"); + *sherman_base = 0; + return false; + } + + for (u32 i = 1; i < info.size(); i++) { + if (info.is_sheng(i)) { + continue; /* sheng impl ids have already been allocated */ + } if (info.is_sherman(i)) { + if (info.is_sheng_succ(i)) { + sherm_sheng_succ.push_back(i); + } else { + sherm.push_back(i); + } + } else { + if (info.is_sheng_succ(i)) { + norm_sheng_succ.push_back(i); + } else { + norm.push_back(i); + } + } + } + + dstate_id_t next_norm = sheng_end; + for (dstate_id_t s : norm_sheng_succ) { + info.states[s].impl_id = next_norm++; + } + if (next_norm + norm.size() + sherm_sheng_succ.size() > UINT8_MAX) { + /* we need to give sheng_succs ids which fit into a u8 -- demote these + * to normal states */ + for (dstate_id_t s : sherm_sheng_succ) { + info.states[s].impl_id = next_norm++; + info.extra[s].shermanState = false; + } + sherm_sheng_succ.clear(); + } + for (dstate_id_t s : norm) { + info.states[s].impl_id = next_norm++; + } + + *sherman_base = next_norm; + dstate_id_t next_sherman = next_norm; + + for (dstate_id_t s : sherm_sheng_succ) { + info.states[s].impl_id = next_sherman++; + } + + for (dstate_id_t s : sherm) { + info.states[s].impl_id = next_sherman++; + } + + /* Check to see if we haven't over allocated our states */ + DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman, + (dstate_id_t)(next_sherman & STATE_MASK)); + return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK); +} + +typedef RdfaGraph::vertex_descriptor RdfaVertex; + +static +bool mark_sheng_succs(const RdfaGraph &g, dfa_info &info, + const flat_set &sheng_states) { + u32 exit_count = 0; + + for (auto v : sheng_states) { + dstate_id_t s = g[v].index; + for (u32 i = 0; i != info.alpha_size; i++) { + if (i == info.alpha_remap[TOP]) { + continue; + } + dstate_id_t next = info.states[s].next[i]; + if (!next || info.is_sheng(next) || info.is_sheng_succ(next)) { + continue; + } + exit_count++; + info.extra[next].sheng_succ = true; + } + } + + if (exit_count + sheng_states.size() < UINT8_MAX) { + return true; + } else { + DEBUG_PRINTF("fail: unable to fit %u exits in byte", exit_count); + return false; + } +} + +static +CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) { + CharReach rv; + for (u32 i = 0; i < info.impl_alpha_size; i++) { + if (info.raw.states[u].next[i] == v) { + assert(info.rev_alpha[i].any()); + rv |= info.rev_alpha[i]; + } + } + assert(rv.any()); + return rv; +} + +#define MAX_SHENG_STATES 16 +#define MAX_SHENG_LEAKINESS 0.05 + +/** + * Returns the proportion of strings of length 'depth' which will leave the + * sheng region when starting at state 'u'. + */ +static +double leakiness(const RdfaGraph &g, dfa_info &info, + const flat_set &sheng_states, RdfaVertex u, + u32 depth, + unordered_map, double> &cache) { + double rv = 0; + if (contains(cache, make_pair(u, depth))) { + return cache[make_pair(u, depth)]; + } + for (RdfaVertex v : adjacent_vertices_range(u, g)) { + if (g[v].index == DEAD_STATE) { + continue; + } + double width = get_edge_reach(g[u].index, g[v].index, info).count(); + width /= N_CHARS; + + double weight; + if (!contains(sheng_states, v)) { + weight = 1; + } else if (depth > 1) { + weight = leakiness(g, info, sheng_states, v, depth - 1, cache); + } else { + continue; /* weight = 0 */ + } + rv += width * weight; + } + + cache[make_pair(u, depth)] = rv; + DEBUG_PRINTF("%zu [%u] q = %g\n", g[u].index, depth, rv); + return rv; +} + +/** + * Returns the proportion of 8 byte strings which will leave the sheng region + * when starting at state 'u'. + */ +static +double leakiness(const RdfaGraph &g, dfa_info &info, + const flat_set &sheng_states, RdfaVertex u) { + unordered_map, double> cache; + double rv = leakiness(g, info, sheng_states, u, 8, cache); + return rv; +} + +static +dstate_id_t find_sheng_states(dfa_info &info, + map &accel_escape_info) { + RdfaGraph g(info.raw); + auto cyclics = find_vertices_in_cycles(g); + + auto base_cyclic = RdfaGraph::null_vertex(); + for (const auto &v : cyclics) { + if (g[v].index == DEAD_STATE) { + continue; + } + DEBUG_PRINTF("considering cyclic %zu\n", g[v].index); + /* get an estimate of stickness of the cyclic: assume any edges from + * states with larger state ids are back edges */ + CharReach est_back_reach; + for (const auto &u : inv_adjacent_vertices_range(v, g)) { + if (g[u].index < g[v].index) { + continue; + } + est_back_reach |= get_edge_reach(g[u].index, g[v].index, info); + } + + if (est_back_reach.count() < 30) { + continue; + } + base_cyclic = v; + break; + } + if (!base_cyclic) { + return DEAD_STATE; + } + + flat_set sheng_states; + deque to_consider = { base_cyclic }; + flat_set considered = { DEAD_STATE }; + bool seen_back_edge = false; + while (!to_consider.empty() + && sheng_states.size() < MAX_SHENG_STATES) { + auto v = to_consider.front(); + to_consider.pop_front(); + if (!considered.insert(g[v].index).second) { + continue; + } + + assert(!contains(sheng_states, v)); + + if (generates_callbacks(info.raw.kind) + && !info.states[g[v].index].reports.empty()) { + /* cannot raise callbacks from sheng region */ + continue; + } + + sheng_states.insert(v); + for (const auto &t : adjacent_vertices_range(v, g)) { + if (!contains(considered, g[t].index)) { + to_consider.push_back(t); + } + if (t == base_cyclic) { + seen_back_edge = true; + } + } + } + + /* allocate normal ids */ + dstate_id_t sheng_end = DEAD_STATE + 1; + for (auto v : sheng_states) { + dstate_id_t s = g[v].index; + if (!contains(accel_escape_info, s)) { + info.states[s].impl_id = sheng_end++; + info.extra[s].sheng_id = info.states[s].impl_id - 1; + } + } + + /* allocate accel ids */ + for (auto v : sheng_states) { + dstate_id_t s = g[v].index; + if (contains(accel_escape_info, s)) { + assert(!info.states[s].impl_id); + info.states[s].impl_id = sheng_end++; + info.extra[s].sheng_id = info.states[s].impl_id - 1; + } + } + + if (sheng_states.size() < MIN_SHENG_SIZE) { + DEBUG_PRINTF("sheng region too small\n"); + return DEAD_STATE; + } + + if (!seen_back_edge) { + DEBUG_PRINTF("did not include cyclic\n"); + return DEAD_STATE; + } + + double leak = leakiness(g, info, sheng_states, base_cyclic); + if (leak > MAX_SHENG_LEAKINESS) { + DEBUG_PRINTF("too leaky (%g)\n", leak); + return DEAD_STATE; + } + + if (!mark_sheng_succs(g, info, sheng_states)) { + return DEAD_STATE; + } + + /* TODO: ensure sufficiently 'sticky' */ + /* TODO: check not all states accel */ + DEBUG_PRINTF("sheng_end = %hu\n", sheng_end); + return sheng_end; +} + +static +void fill_in_aux_info(NFA *nfa, const dfa_info &info, + const map &accel_escape_info, + u32 accel_offset, UNUSED u32 accel_end_offset, + const vector &reports, + const vector &reports_eod, + u32 report_base_offset, + const raw_report_info &ri) { + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + + vector reportOffsets; + + ri.fillReportLists(nfa, report_base_offset, reportOffsets); + + for (u32 i = 0; i < info.size(); i++) { + u16 impl_id = info.implId(i); + mstate_aux *this_aux = getAux(nfa, impl_id); + + fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); + if (contains(accel_escape_info, i)) { + this_aux->accel_offset = accel_offset; + accel_offset += info.strat.accelSize(); + assert(accel_offset <= accel_end_offset); + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + info.strat.buildAccel(i, accel_escape_info.at(i), + (void *)((char *)m + this_aux->accel_offset)); + } + } +} + +static +u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) { + mstate_aux *aux = getAux(nfa, target_impl_id); + u16 flags = 0; + + if (aux->accept) { + flags |= ACCEPT_FLAG; + } + + if (aux->accel_offset) { + flags |= ACCEL_FLAG; + } + + return flags; +} + +static +void fill_in_succ_table_16(NFA *nfa, const dfa_info &info, + dstate_id_t sheng_end, + UNUSED dstate_id_t sherman_base) { + u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng)); + + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + + for (size_t i = 0; i < info.size(); i++) { + if (!info.is_normal(i)) { + assert(info.implId(i) < sheng_end || info.is_sherman(i)); + continue; + } + + assert(info.implId(i) < sherman_base); + u16 normal_id = verify_u16(info.implId(i) - sheng_end); + + for (size_t s = 0; s < info.impl_alpha_size; s++) { + dstate_id_t raw_succ = info.states[i].next[s]; + u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s]; + + entry = info.implId(raw_succ); + entry |= get_edge_flags(nfa, entry); + } + } +} + +#define MAX_SHERMAN_LIST_LEN 8 + +static +void addIfEarlier(set &dest, dstate_id_t candidate, + dstate_id_t max) { + if (candidate < max) { + dest.insert(candidate); + } +} + +static +void addSuccessors(set &dest, const dstate &source, + u16 alphasize, dstate_id_t curr_id) { + for (symbol_t s = 0; s < alphasize; s++) { + addIfEarlier(dest, source.next[s], curr_id); + } +} + +#define MAX_SHERMAN_SELF_LOOP 20 + +static +void find_better_daddy(dfa_info &info, dstate_id_t curr_id, + bool any_cyclic_near_anchored_state, const Grey &grey) { + if (!grey.allowShermanStates) { + return; + } + + const u16 width = sizeof(u16); + const u16 alphasize = info.impl_alpha_size; + + if (info.raw.start_anchored != DEAD_STATE + && any_cyclic_near_anchored_state + && curr_id < alphasize * 3) { + /* crude attempt to prevent frequent states from being sherman'ed + * depends on the fact that states are numbers are currently in bfs + * order */ + DEBUG_PRINTF("%hu is banned\n", curr_id); + return; + } + + if (info.raw.start_floating != DEAD_STATE + && curr_id >= info.raw.start_floating + && curr_id < info.raw.start_floating + alphasize * 3) { + /* crude attempt to prevent frequent states from being sherman'ed + * depends on the fact that states are numbers are currently in bfs + * order */ + DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating); + return; + } + + const u16 full_state_size = width * alphasize; + const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN, + (full_state_size - 2)/(width + 1)); + u16 best_score = 0; + dstate_id_t best_daddy = 0; + dstate &currState = info.states[curr_id]; + + set hinted; /* set of states to search for a better daddy */ + addIfEarlier(hinted, 0, curr_id); + addIfEarlier(hinted, info.raw.start_anchored, curr_id); + addIfEarlier(hinted, info.raw.start_floating, curr_id); + + dstate_id_t mydaddy = currState.daddy; + if (mydaddy) { + addIfEarlier(hinted, mydaddy, curr_id); + addSuccessors(hinted, info.states[mydaddy], alphasize, curr_id); + dstate_id_t mygranddaddy = info.states[mydaddy].daddy; + if (mygranddaddy) { + addIfEarlier(hinted, mygranddaddy, curr_id); + addSuccessors(hinted, info.states[mygranddaddy], alphasize, + curr_id); + } + } + + for (const dstate_id_t &donor : hinted) { + assert(donor < curr_id); + u32 score = 0; + + if (!info.is_normal(donor)) { + continue; + } + + const dstate &donorState = info.states[donor]; + for (symbol_t s = 0; s < alphasize; s++) { + if (currState.next[s] == donorState.next[s]) { + score++; + } + } + + /* prefer lower ids to provide some stability amongst potential + * siblings */ + if (score > best_score || (score == best_score && donor < best_daddy)) { + best_daddy = donor; + best_score = score; + + if (score == alphasize) { + break; + } + } + } + + currState.daddy = best_daddy; + info.extra[curr_id].daddytaken = best_score; + DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy, + best_score, alphasize); + + if (best_daddy == DEAD_STATE) { + return; /* No good daddy */ + } + + if (best_score + max_list_len < alphasize) { + return; /* ??? */ + } + + assert(info.is_normal(currState.daddy)); + + u32 self_loop_width = 0; + const dstate curr_raw = info.states[curr_id]; + for (unsigned i = 0; i < N_CHARS; i++) { + if (curr_raw.next[info.alpha_remap[i]] == curr_id) { + self_loop_width++; + } + } + + if (self_loop_width > MAX_SHERMAN_SELF_LOOP) { + DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id, + self_loop_width); + return; + } + + if (info.is_sheng(curr_id)) { + return; + } + + DEBUG_PRINTF("%hu is sherman\n", curr_id); + info.extra[curr_id].shermanState = true; +} + +static +bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { + symbol_t alphasize = raw.getImplAlphaSize(); + for (symbol_t s = 0; s < alphasize; s++) { + dstate_id_t succ_id = raw.states[root].next[s]; + if (succ_id == DEAD_STATE) { + continue; + } + + const dstate &succ = raw.states[succ_id]; + for (symbol_t t = 0; t < alphasize; t++) { + if (succ.next[t] == root || succ.next[t] == succ_id) { + return true; + } + } + } + return false; +} + +static +void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { + char *nfa_base = (char *)nfa; + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + char *sherman_table = nfa_base + m->sherman_offset; + + assert(ISALIGNED_16(sherman_table)); + for (size_t i = 0; i < info.size(); i++) { + if (!info.is_sherman(i)) { + continue; + } + u16 fs = verify_u16(info.implId(i)); + DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs); + + assert(fs >= sherman_limit); + + char *curr_sherman_entry + = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; + assert(curr_sherman_entry <= nfa_base + m->length); + + u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken); + assert(len <= 9); + dstate_id_t d = info.states[i].daddy; + + *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; + *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; + *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); + u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); + + for (u16 s = 0; s < info.impl_alpha_size; s++) { + if (info.states[i].next[s] != info.states[d].next[s]) { + *(chars++) = (u8)s; + } + } + + u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); + for (u16 s = 0; s < info.impl_alpha_size; s++) { + if (info.states[i].next[s] != info.states[d].next[s]) { + DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs, + info.implId(d), + info.implId(info.states[i].next[s])); + u16 entry_val = info.implId(info.states[i].next[s]); + entry_val |= get_edge_flags(nfa, entry_val); + unaligned_store_u16((u8 *)states++, entry_val); + } + } + } +} + +static +aligned_unique_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, + const map &accel_escape_info, + const Grey &grey) { + DEBUG_PRINTF("building mcsheng 16\n"); + + vector reports; /* index in ri for the appropriate report list */ + vector reports_eod; /* as above */ + ReportID arb; + u8 single; + + assert(info.getAlphaShift() <= 8); + + u16 total_daddy = 0; + for (u32 i = 0; i < info.size(); i++) { + find_better_daddy(info, i, + is_cyclic_near(info.raw, info.raw.start_anchored), + grey); + total_daddy += info.extra[i].daddytaken; + } + + DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, + info.size() * info.impl_alpha_size, info.size(), + info.impl_alpha_size); + + u16 sherman_limit; + if (!allocateImplId16(info, sheng_end, &sherman_limit)) { + DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", + info.size()); + return nullptr; + } + u16 count_real_states = sherman_limit - sheng_end; + + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); + + size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) + * count_real_states; + + size_t aux_size = sizeof(mstate_aux) * info.size(); + + size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); + size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + + ri->getReportListSize(), 32); + size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); + size_t sherman_size = calcShermanRegionSize(info); + + size_t total_size = sherman_offset + sherman_size; + + accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + + aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); + + populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, + accel_escape_info.size(), arb, single, nfa.get()); + createShuffleMasks(m, info, sheng_end, accel_escape_info); + + /* copy in the mc header information */ + m->sherman_offset = sherman_offset; + m->sherman_end = total_size; + m->sherman_limit = sherman_limit; + + DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end, + count_real_states, info.size()); + + fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, + sherman_offset - sizeof(NFA), reports, reports_eod, + aux_offset + aux_size, *ri); + + fill_in_succ_table_16(nfa.get(), info, sheng_end, sherman_limit); + + fill_in_sherman(nfa.get(), info, sherman_limit); + + return nfa; +} + +static +void fill_in_succ_table_8(NFA *nfa, const dfa_info &info, + dstate_id_t sheng_end) { + u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng); + + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + + for (size_t i = 0; i < info.size(); i++) { + assert(!info.is_sherman(i)); + if (!info.is_normal(i)) { + assert(info.implId(i) < sheng_end); + continue; + } + u8 normal_id = verify_u8(info.implId(i) - sheng_end); + + for (size_t s = 0; s < info.impl_alpha_size; s++) { + dstate_id_t raw_succ = info.states[i].next[s]; + succ_table[((size_t)normal_id << alphaShift) + s] + = info.implId(raw_succ); + } + } +} + +static +void allocateImplId8(dfa_info &info, dstate_id_t sheng_end, + const map &accel_escape_info, + u16 *accel_limit, u16 *accept_limit) { + info.states[0].impl_id = 0; /* dead is always 0 */ + + vector norm; + vector accel; + vector accept; + + assert(info.size() <= (1 << 8)); + + for (u32 i = 1; i < info.size(); i++) { + if (info.is_sheng(i)) { + continue; /* already allocated */ + } else if (!info.states[i].reports.empty()) { + accept.push_back(i); + } else if (contains(accel_escape_info, i)) { + accel.push_back(i); + } else { + norm.push_back(i); + } + } + + u32 j = sheng_end; + for (const dstate_id_t &s : norm) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } + *accel_limit = j; + for (const dstate_id_t &s : accel) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } + *accept_limit = j; + for (const dstate_id_t &s : accept) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } +} + +static +aligned_unique_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, + const map &accel_escape_info) { + DEBUG_PRINTF("building mcsheng 8\n"); + + vector reports; + vector reports_eod; + ReportID arb; + u8 single; + + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); + + size_t normal_count = info.size() - sheng_end; + + size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count; + size_t aux_size = sizeof(mstate_aux) * info.size(); + size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); + size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + + ri->getReportListSize(), 32); + size_t total_size = accel_offset + accel_size; + + DEBUG_PRINTF("aux_size %zu\n", aux_size); + DEBUG_PRINTF("aux_offset %zu\n", aux_offset); + DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); + DEBUG_PRINTF("accel_size %zu\n", accel_size); + DEBUG_PRINTF("accel_offset %zu\n", accel_offset); + DEBUG_PRINTF("total_size %zu\n", total_size); + + accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + + aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); + + allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, + &m->accept_limit_8); + + populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, + accel_escape_info.size(), arb, single, nfa.get()); + createShuffleMasks(m, info, sheng_end, accel_escape_info); + + fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, + total_size - sizeof(NFA), reports, reports_eod, + aux_offset + aux_size, *ri); + + fill_in_succ_table_8(nfa.get(), info, sheng_end); + + DEBUG_PRINTF("rl size %zu\n", ri->size()); + + return nfa; +} + +aligned_unique_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm) { + if (!cc.grey.allowMcSheng) { + return nullptr; + } + + mcclellan_build_strat mbs(raw, rm); + dfa_info info(mbs); + bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; + + if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming + * mode with our semantics */ + raw.stripExtraEodReports(); + } + + bool has_eod_reports = raw.hasEodReports(); + + map accel_escape_info + = info.strat.getAccelInfo(cc.grey); + + dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info); + if (sheng_end <= DEAD_STATE + 1) { + return nullptr; + } + + aligned_unique_ptr nfa; + if (!using8bit) { + nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey); + } else { + nfa = mcshengCompile8(info, sheng_end, accel_escape_info); + } + + if (!nfa) { + return nfa; + } + + if (has_eod_reports) { + nfa->flags |= NFA_ACCEPTS_EOD; + } + + DEBUG_PRINTF("compile done\n"); + return nfa; +} + +bool has_accel_mcsheng(const NFA *) { + return true; /* consider the sheng region as accelerated */ +} + +} // namespace ue2 diff --git a/src/nfa/mcsheng_compile.h b/src/nfa/mcsheng_compile.h new file mode 100644 index 00000000..d1ae1e32 --- /dev/null +++ b/src/nfa/mcsheng_compile.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENGCOMPILE_H +#define MCSHENGCOMPILE_H + +#include "accel_dfa_build_strat.h" +#include "rdfa.h" +#include "ue2common.h" +#include "util/alloc.h" +#include "util/ue2_containers.h" + +#include + +struct NFA; + +namespace ue2 { + +class ReportManager; +struct CompileContext; + +ue2::aligned_unique_ptr +mcshengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm); + +bool has_accel_mcsheng(const NFA *nfa); + +} // namespace ue2 + +#endif diff --git a/src/nfa/mcsheng_data.c b/src/nfa/mcsheng_data.c new file mode 100644 index 00000000..eaf3cbbb --- /dev/null +++ b/src/nfa/mcsheng_data.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcsheng_internal.h" + +/* This table is in a separate translation unit from mcsheng.c as we want to + * prevent the compiler from seeing these constants. We have the load resources + * free at runtime to load the masks with no problems. */ +const u64a mcsheng_pext_mask[8] = { + 0, /* dummy */ + 0x000000000000ff0f, + 0x0000000000ff000f, + 0x00000000ff00000f, + 0x000000ff0000000f, + 0x0000ff000000000f, + 0x00ff00000000000f, + 0xff0000000000000f, +}; diff --git a/src/nfa/mcsheng_dump.cpp b/src/nfa/mcsheng_dump.cpp new file mode 100644 index 00000000..f5c058af --- /dev/null +++ b/src/nfa/mcsheng_dump.cpp @@ -0,0 +1,415 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "mcsheng_dump.h" + +#include "accel.h" +#include "accel_dump.h" +#include "nfa_dump_internal.h" +#include "nfa_internal.h" +#include "mcsheng_internal.h" +#include "rdfa.h" +#include "ue2common.h" +#include "util/charreach.h" +#include "util/dump_charclass.h" +#include "util/dump_util.h" +#include "util/unaligned.h" + +#include +#include +#include +#include +#include + +#ifndef DUMP_SUPPORT +#error No dump support! +#endif + +using namespace std; + +namespace ue2 { + +static +const mstate_aux *getAux(const NFA *n, dstate_id_t i) { + auto *m = (const mcsheng *)getImplNfa(n); + auto *aux_base = (const mstate_aux *)((const char *)n + m->aux_offset); + + const mstate_aux *aux = aux_base + i; + + assert((const char *)aux < (const char *)n + m->length); + return aux; +} + +static +void next_states(const NFA *n, u16 s, u16 *t) { + const mcsheng *m = (const mcsheng *)getImplNfa(n); + const mstate_aux *aux = getAux(n, s); + const u32 as = m->alphaShift; + assert(s != DEAD_STATE); + + if (s < m->sheng_end) { + for (u16 c = 0; c < N_CHARS; c++) { + u8 sheng_s = s - 1; + auto trans_for_c = (const char *)&m->sheng_masks[c]; + assert(sheng_s < sizeof(m128)); + u8 raw_succ = trans_for_c[sheng_s]; + if (raw_succ == m->sheng_end - 1) { + t[c] = DEAD_STATE; + } else if (raw_succ < m->sheng_end) { + t[c] = raw_succ + 1; + } else { + t[c] = raw_succ; + } + } + } else if (n->type == MCSHENG_NFA_8) { + const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcsheng)); + for (u16 c = 0; c < N_CHARS; c++) { + u32 normal_id = s - m->sheng_end; + t[c] = succ_table[(normal_id << as) + m->remap[c]]; + } + } else { + u16 base_s = s; + const char *winfo_base = (const char *)n + m->sherman_offset; + const char *state_base + = winfo_base + SHERMAN_FIXED_SIZE * (s - m->sherman_limit); + + if (s >= m->sherman_limit) { + base_s = unaligned_load_u16(state_base + SHERMAN_DADDY_OFFSET); + assert(base_s >= m->sheng_end); + } + + const u16 *succ_table = (const u16 *)((const char *)m + + sizeof(mcsheng)); + for (u16 c = 0; c < N_CHARS; c++) { + u32 normal_id = base_s - m->sheng_end; + t[c] = succ_table[(normal_id << as) + m->remap[c]]; + } + + if (s >= m->sherman_limit) { + UNUSED char type = *(state_base + SHERMAN_TYPE_OFFSET); + assert(type == SHERMAN_STATE); + u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base); + const char *chars = state_base + SHERMAN_CHARS_OFFSET; + const u16 *states = (const u16 *)(state_base + + SHERMAN_STATES_OFFSET(len)); + + for (u8 i = 0; i < len; i++) { + for (u16 c = 0; c < N_CHARS; c++) { + if (m->remap[c] == chars[i]) { + t[c] = unaligned_load_u16((const u8*)&states[i]); + } + } + } + } + + for (u16 c = 0; c < N_CHARS; c++) { + t[c] &= STATE_MASK; + } + + } + + t[TOP] = aux->top & STATE_MASK; +} + +static +void describeEdge(FILE *f, const mcsheng *m, const u16 *t, u16 i) { + for (u16 s = 0; s < N_CHARS; s++) { + if (!t[s]) { + continue; + } + + u16 ss; + for (ss = 0; ss < s; ss++) { + if (t[s] == t[ss]) { + break; + } + } + + if (ss != s) { + continue; + } + + CharReach reach; + for (ss = s; ss < 256; ss++) { + if (t[s] == t[ss]) { + reach.set(ss); + } + } + + fprintf(f, "%u -> %u [ ", i, t[s]); + if (i < m->sheng_end && t[s] < m->sheng_end) { + fprintf(f, "color = red, fontcolor = red "); + } + fprintf(f, "label = \""); + describeClass(f, reach, 5, CC_OUT_DOT); + + fprintf(f, "\" ];\n"); + } +} + +static +void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel) { + switch(accel->accel_type) { + case ACCEL_NONE: + break; + case ACCEL_VERM: + case ACCEL_VERM_NOCASE: + case ACCEL_DVERM: + case ACCEL_DVERM_NOCASE: + fprintf(f, "%u [ color = forestgreen style=diagonals];\n", i); + break; + case ACCEL_SHUFTI: + case ACCEL_DSHUFTI: + case ACCEL_TRUFFLE: + fprintf(f, "%u [ color = darkgreen style=diagonals ];\n", i); + break; + default: + fprintf(f, "%u [ color = yellow style=diagonals ];\n", i); + break; + } +} + +static +void describeNode(const NFA *n, const mcsheng *m, u16 i, FILE *f) { + const mstate_aux *aux = getAux(n, i); + + bool isSherman = m->sherman_limit && i >= m->sherman_limit; + + fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, " + "label = \"%u%s\" ]; \n", i, i, isSherman ? "w":""); + + if (aux->accel_offset) { + dumpAccelDot(f, i, (const union AccelAux *) + ((const char *)m + aux->accel_offset)); + } + + if (i && i < m->sheng_end) { + fprintf(f, "%u [color = red, fontcolor = red]; \n", i); + } + + if (aux->accept_eod) { + fprintf(f, "%u [ color = darkorchid ];\n", i); + } + + if (aux->accept) { + fprintf(f, "%u [ shape = doublecircle ];\n", i); + } + + if (aux->top && aux->top != i) { + fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i, + aux->top); + } + + if (i == m->start_anchored) { + fprintf(f, "STARTA -> %u [color = blue ]\n", i); + } + + if (i == m->start_floating) { + fprintf(f, "STARTF -> %u [color = red ]\n", i); + } + + if (isSherman) { + const char *winfo_base = (const char *)n + m->sherman_offset; + const char *state_base + = winfo_base + SHERMAN_FIXED_SIZE * (i - m->sherman_limit); + assert(state_base < (const char *)m + m->length - sizeof(NFA)); + UNUSED u8 type = *(const u8 *)(state_base + SHERMAN_TYPE_OFFSET); + assert(type == SHERMAN_STATE); + fprintf(f, "%u [ fillcolor = lightblue style=filled ];\n", i); + u16 daddy = *(const u16 *)(state_base + SHERMAN_DADDY_OFFSET); + if (daddy) { + fprintf(f, "%u -> %u [ color=royalblue style=dashed weight=0.1]\n", + i, daddy); + } + } + + if (i && i < m->sheng_end) { + fprintf(f, "subgraph cluster_sheng { %u } \n", i); + } + +} + +static +void dumpDotPreambleDfa(FILE *f) { + dumpDotPreamble(f); + + // DFA specific additions. + fprintf(f, "STARTF [style=invis];\n"); + fprintf(f, "STARTA [style=invis];\n"); + fprintf(f, "0 [style=invis];\n"); + fprintf(f, "subgraph cluster_sheng { style = dashed }\n"); +} + +static +void dump_dot_16(const NFA *nfa, FILE *f) { + auto *m = (const mcsheng *)getImplNfa(nfa); + + dumpDotPreambleDfa(f); + + for (u16 i = 1; i < m->state_count; i++) { + describeNode(nfa, m, i, f); + + u16 t[ALPHABET_SIZE]; + + next_states(nfa, i, t); + + describeEdge(f, m, t, i); + } + + fprintf(f, "}\n"); +} + +static +void dump_dot_8(const NFA *nfa, FILE *f) { + auto m = (const mcsheng *)getImplNfa(nfa); + + dumpDotPreambleDfa(f); + + for (u16 i = 1; i < m->state_count; i++) { + describeNode(nfa, m, i, f); + + u16 t[ALPHABET_SIZE]; + + next_states(nfa, i, t); + + describeEdge(f, m, t, i); + } + + fprintf(f, "}\n"); +} + +static +void dumpAccelMasks(FILE *f, const mcsheng *m, const mstate_aux *aux) { + fprintf(f, "\n"); + fprintf(f, "Acceleration\n"); + fprintf(f, "------------\n"); + + for (u16 i = 0; i < m->state_count; i++) { + if (!aux[i].accel_offset) { + continue; + } + + auto accel = (const AccelAux *)((const char *)m + aux[i].accel_offset); + fprintf(f, "%05hu ", i); + dumpAccelInfo(f, *accel); + } +} + +static +void describeAlphabet(FILE *f, const mcsheng *m) { + map rev; + + for (u16 i = 0; i < N_CHARS; i++) { + rev[m->remap[i]].clear(); + } + + for (u16 i = 0; i < N_CHARS; i++) { + rev[m->remap[i]].set(i); + } + + map::const_iterator it; + fprintf(f, "\nAlphabet\n"); + for (it = rev.begin(); it != rev.end(); ++it) { + fprintf(f, "%3hhu: ", it->first); + describeClass(f, it->second, 10240, CC_OUT_TEXT); + fprintf(f, "\n"); + } + fprintf(f, "\n"); +} + +static +void dumpCommonHeader(FILE *f, const mcsheng *m) { + fprintf(f, "report: %u, states: %u, length: %u\n", m->arb_report, + m->state_count, m->length); + fprintf(f, "astart: %hu, fstart: %hu\n", m->start_anchored, + m->start_floating); + fprintf(f, "single accept: %d, has_accel: %d\n", + !!(int)m->flags & MCSHENG_FLAG_SINGLE, m->has_accel); + fprintf(f, "sheng_end: %hu\n", m->sheng_end); + fprintf(f, "sheng_accel_limit: %hu\n", m->sheng_accel_limit); +} + +static +void dump_text_16(const NFA *nfa, FILE *f) { + auto *m = (const mcsheng *)getImplNfa(nfa); + auto *aux = (const mstate_aux *)((const char *)nfa + m->aux_offset); + + fprintf(f, "mcsheng 16\n"); + dumpCommonHeader(f, m); + fprintf(f, "sherman_limit: %d, sherman_end: %d\n", (int)m->sherman_limit, + (int)m->sherman_end); + fprintf(f, "\n"); + + describeAlphabet(f, m); + dumpAccelMasks(f, m, aux); + + fprintf(f, "\n"); + dumpTextReverse(nfa, f); +} + +static +void dump_text_8(const NFA *nfa, FILE *f) { + auto m = (const mcsheng *)getImplNfa(nfa); + auto aux = (const mstate_aux *)((const char *)nfa + m->aux_offset); + + fprintf(f, "mcsheng 8\n"); + dumpCommonHeader(f, m); + fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8, + m->accept_limit_8); + fprintf(f, "\n"); + + describeAlphabet(f, m); + dumpAccelMasks(f, m, aux); + + fprintf(f, "\n"); + dumpTextReverse(nfa, f); +} + +void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) { + assert(nfa->type == MCSHENG_NFA_16); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + dump_text_16(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + dump_dot_16(nfa, f); + fclose(f); +} + +void nfaExecMcSheng8_dump(const NFA *nfa, const string &base) { + assert(nfa->type == MCSHENG_NFA_8); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + dump_text_8(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + dump_dot_8(nfa, f); + fclose(f); +} + +} // namespace ue2 diff --git a/src/nfa/mcsheng_dump.h b/src/nfa/mcsheng_dump.h new file mode 100644 index 00000000..1b699367 --- /dev/null +++ b/src/nfa/mcsheng_dump.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENG_DUMP_H +#define MCSHENG_DUMP_H + +#ifdef DUMP_SUPPORT + +#include "rdfa.h" + +#include +#include + +struct NFA; + +namespace ue2 { + +void nfaExecMcSheng8_dump(const struct NFA *nfa, const std::string &base); +void nfaExecMcSheng16_dump(const struct NFA *nfa, const std::string &base); + +} // namespace ue2 + +#endif // DUMP_SUPPORT + +#endif // MCSHENG_DUMP_H diff --git a/src/nfa/mcsheng_internal.h b/src/nfa/mcsheng_internal.h new file mode 100644 index 00000000..81a658e0 --- /dev/null +++ b/src/nfa/mcsheng_internal.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENG_INTERNAL_H +#define MCSHENG_INTERNAL_H + +#include "nfa_internal.h" +#include "ue2common.h" +#include "util/simd_types.h" + +#define ACCEPT_FLAG 0x8000 +#define ACCEL_FLAG 0x4000 +#define STATE_MASK 0x3fff + +#define SHERMAN_STATE 1 + +#define SHERMAN_TYPE_OFFSET 0 +#define SHERMAN_FIXED_SIZE 32 + +#define SHERMAN_LEN_OFFSET 1 +#define SHERMAN_DADDY_OFFSET 2 +#define SHERMAN_CHARS_OFFSET 4 +#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len)) + +struct report_list { + u32 count; + ReportID report[]; +}; + +struct mstate_aux { + u32 accept; + u32 accept_eod; + u16 top; + u32 accel_offset; /* relative to start of struct mcsheng; 0 if no accel */ +}; + +#define MCSHENG_FLAG_SINGLE 1 /**< we raise only single accept id */ + +struct mcsheng { + u16 state_count; /**< total number of states */ + u32 length; /**< length of dfa in bytes */ + u16 start_anchored; /**< anchored start state */ + u16 start_floating; /**< floating start state */ + u32 aux_offset; /**< offset of the aux structures relative to the start of + * the nfa structure */ + u32 sherman_offset; /**< offset of array of sherman state offsets the + * state_info structures relative to the start of the + * nfa structure */ + u32 sherman_end; /**< offset of the end of the state_info structures + * relative to the start of the nfa structure */ + u16 sheng_end; /**< first non-sheng state */ + u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of + * internal sheng ids */ + u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ + u16 accept_limit_8; /**< 8 bit, lowest accept state */ + u16 sherman_limit; /**< lowest sherman state */ + u8 alphaShift; + u8 flags; + u8 has_accel; /**< 1 iff there are any accel plans */ + u8 remap[256]; /**< remaps characters to a smaller alphabet */ + ReportID arb_report; /**< one of the accepts that this dfa may raise */ + u32 accel_offset; /**< offset of the accel structures from start of NFA */ + m128 sheng_masks[N_CHARS]; +}; + +/* pext masks for the runtime to access appropriately copies of bytes 1..7 + * representing the data from a u64a. */ +extern const u64a mcsheng_pext_mask[8]; + +#endif diff --git a/src/nfa/mpv.c b/src/nfa/mpv.c index c6c8cb88..552754d6 100644 --- a/src/nfa/mpv.c +++ b/src/nfa/mpv.c @@ -825,21 +825,21 @@ void mpvStoreState(const struct NFA *n, char *state, } } -char nfaExecMpv0_queueCompressState(const struct NFA *nfa, const struct mq *q, - UNUSED s64a loc) { +char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q, + UNUSED s64a loc) { void *dest = q->streamState; const void *src = q->state; mpvStoreState(nfa, dest, src); return 0; } -char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src, - UNUSED u64a offset, UNUSED u8 key) { +char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src, + UNUSED u64a offset, UNUSED u8 key) { mpvLoadState(dest, nfa, src); return 0; } -char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q) { +char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q) { const struct mpv *m = getImplNfa(n); u64a offset = q_cur_offset(q); struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; @@ -855,7 +855,7 @@ char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q) { return 0; } -char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q) { +char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q) { struct mpv_decomp_state *out = (void *)q->state; const struct mpv *m = getImplNfa(n); assert(sizeof(*out) <= n->scratchStateSize); @@ -880,8 +880,8 @@ char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q) { return 0; } -char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset, - void *state, UNUSED u8 key) { +char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset, + void *state, UNUSED u8 key) { const struct mpv *m = getImplNfa(n); memset(state, 0, m->active_offset); /* active_offset marks end of comp * counters */ @@ -896,7 +896,7 @@ char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset, } static really_inline -char nfaExecMpv0_Q_i(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecMpv_Q_i(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; size_t length = q->length; @@ -1021,18 +1021,18 @@ char nfaExecMpv0_Q_i(const struct NFA *n, struct mq *q, s64a end) { return alive; } -char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("_Q %lld\n", end); - return nfaExecMpv0_Q_i(n, q, end); + return nfaExecMpv_Q_i(n, q, end); } -s64a nfaExecMpv0_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) { +s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) { DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end); #ifdef DEBUG debugQueue(q); #endif - assert(nfa->type == MPV_NFA_0); + assert(nfa->type == MPV_NFA); assert(q && q->context && q->state); assert(end >= 0); assert(q->cur < q->end); @@ -1058,7 +1058,7 @@ s64a nfaExecMpv0_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) { /* TODO: restore max offset stuff, if/when _interesting_ max offset stuff * is filled in */ - char rv = nfaExecMpv0_Q_i(nfa, q, end); + char rv = nfaExecMpv_Q_i(nfa, q, end); assert(!q->report_current); DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed); diff --git a/src/nfa/mpv.h b/src/nfa/mpv.h index a3f90719..3780728d 100644 --- a/src/nfa/mpv.h +++ b/src/nfa/mpv.h @@ -34,27 +34,27 @@ struct mq; struct NFA; -char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecMpv0_queueCompressState(const struct NFA *nfa, const struct mq *q, - s64a loc); -char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src, - u64a offset, u8 key); +char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc); +char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src, + u64a offset, u8 key); -#define nfaExecMpv0_testEOD NFA_API_NO_IMPL -#define nfaExecMpv0_inAccept NFA_API_NO_IMPL -#define nfaExecMpv0_inAnyAccept NFA_API_NO_IMPL -#define nfaExecMpv0_QR NFA_API_NO_IMPL -#define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */ -#define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL -#define nfaExecMpv0_zombie_status NFA_API_ZOMBIE_NO_IMPL +#define nfaExecMpv_testEOD NFA_API_NO_IMPL +#define nfaExecMpv_inAccept NFA_API_NO_IMPL +#define nfaExecMpv_inAnyAccept NFA_API_NO_IMPL +#define nfaExecMpv_QR NFA_API_NO_IMPL +#define nfaExecMpv_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */ +#define nfaExecMpv_B_Reverse NFA_API_NO_IMPL +#define nfaExecMpv_zombie_status NFA_API_ZOMBIE_NO_IMPL /** * return 0 if the mpv dies, otherwise returns the location of the next possible * match (given the currently known events). */ -s64a nfaExecMpv0_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end); +s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end); #endif diff --git a/src/nfa/mpv_dump.cpp b/src/nfa/mpv_dump.cpp index da21d7cf..9a8a4067 100644 --- a/src/nfa/mpv_dump.cpp +++ b/src/nfa/mpv_dump.cpp @@ -36,6 +36,7 @@ #include "ue2common.h" #include "util/compare.h" #include "util/dump_mask.h" +#include "util/dump_util.h" #include #include @@ -46,11 +47,11 @@ #error No dump support! #endif -namespace ue2 { +/* Note: No dot files for MPV */ -void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file, - UNUSED const std::string &base) { -} +using namespace std; + +namespace ue2 { static really_inline u32 largest_puff_repeat(const mpv *m, const mpv_kilopuff *kp) { @@ -128,9 +129,11 @@ void dumpCounter(FILE *f, const mpv_counter_info *c) { fprintf(f, "\n"); } -void nfaExecMpv0_dumpText(const NFA *nfa, FILE *f) { +void nfaExecMpv_dump(const NFA *nfa, const string &base) { const mpv *m = (const mpv *)getImplNfa(nfa); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + fprintf(f, "Puff the Magic Engines\n"); fprintf(f, "\n"); fprintf(f, "%u puffettes in %u kilopuffs\n", m->puffette_count, @@ -151,6 +154,7 @@ void nfaExecMpv0_dumpText(const NFA *nfa, FILE *f) { } dumpTextReverse(nfa, f); + fclose(f); } } // namespace ue2 diff --git a/src/nfa/mpv_dump.h b/src/nfa/mpv_dump.h index 23910dce..e587619e 100644 --- a/src/nfa/mpv_dump.h +++ b/src/nfa/mpv_dump.h @@ -31,16 +31,13 @@ #if defined(DUMP_SUPPORT) -#include #include struct NFA; namespace ue2 { -void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecMpv0_dumpText(const struct NFA *nfa, FILE *file); +void nfaExecMpv_dump(const struct NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/nfa/mpvcompile.cpp b/src/nfa/mpvcompile.cpp index b024b530..87fb462e 100644 --- a/src/nfa/mpvcompile.cpp +++ b/src/nfa/mpvcompile.cpp @@ -34,7 +34,7 @@ #include "shufticompile.h" #include "trufflecompile.h" #include "util/alloc.h" -#include "util/multibit_internal.h" +#include "util/multibit_build.h" #include "util/order_check.h" #include "util/report_manager.h" #include "util/verify_types.h" @@ -175,12 +175,13 @@ void writeKiloPuff(const map>::const_iterator &it, size_t set = reach.find_first(); assert(set != CharReach::npos); kp->u.verm.c = (char)set; - } else if (shuftiBuildMasks(~reach, &kp->u.shuf.mask_lo, - &kp->u.shuf.mask_hi) != -1) { + } else if (shuftiBuildMasks(~reach, (u8 *)&kp->u.shuf.mask_lo, + (u8 *)&kp->u.shuf.mask_hi) != -1) { kp->type = MPV_SHUFTI; } else { kp->type = MPV_TRUFFLE; - truffleBuildMasks(~reach, &kp->u.truffle.mask1, &kp->u.truffle.mask2); + truffleBuildMasks(~reach, (u8 *)&kp->u.truffle.mask1, + (u8 *)&kp->u.truffle.mask2); } kp->count = verify_u32(puffs.size()); @@ -207,7 +208,7 @@ void writeCoreNfa(NFA *nfa, u32 len, u32 min_width, u32 max_counter, nfa->length = len; nfa->nPositions = max_counter - 1; - nfa->type = MPV_NFA_0; + nfa->type = MPV_NFA; nfa->streamStateSize = streamStateSize; assert(16 >= sizeof(mpv_decomp_kilo)); nfa->scratchStateSize = scratchStateSize; diff --git a/src/nfa/multishufti.h b/src/nfa/multishufti.h index bcccf607..af578483 100644 --- a/src/nfa/multishufti.h +++ b/src/nfa/multishufti.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,7 @@ #define MULTISHUFTI_H #include "ue2common.h" -#include "util/simd_utils.h" +#include "util/simd_types.h" #ifdef __cplusplus extern "C" diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c index c67103b3..f4b7552e 100644 --- a/src/nfa/nfa_api_dispatch.c +++ b/src/nfa/nfa_api_dispatch.c @@ -41,39 +41,43 @@ #include "lbr.h" #include "limex.h" #include "mcclellan.h" +#include "mcsheng.h" #include "mpv.h" #include "sheng.h" #include "tamarama.h" -#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \ - case dc_ltype##_NFA_##dc_subtype: \ - return nfaExec##dc_ftype##dc_subtype##dc_func_call; \ +#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_func_call) \ + case dc_ltype: \ + return nfaExec##dc_ftype##dc_func_call; \ break // general framework calls -#define DISPATCH_BY_NFA_TYPE(dbnt_func) \ - switch (nfa->type) { \ - DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \ - DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \ - DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \ - DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \ - DISPATCH_CASE(GOUGH, Gough, 16, dbnt_func); \ - DISPATCH_CASE(MPV, Mpv, 0, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Dot, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Verm, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, NVerm, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \ - DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \ - DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \ - DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \ - default: \ - assert(0); \ +#define DISPATCH_BY_NFA_TYPE(dbnt_func) \ + switch (nfa->type) { \ + DISPATCH_CASE(LIMEX_NFA_32, LimEx32, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_64, LimEx64, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_128, LimEx128, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_256, LimEx256, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_384, LimEx384, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_512, LimEx512, dbnt_func); \ + DISPATCH_CASE(MCCLELLAN_NFA_8, McClellan8, dbnt_func); \ + DISPATCH_CASE(MCCLELLAN_NFA_16, McClellan16, dbnt_func); \ + DISPATCH_CASE(GOUGH_NFA_8, Gough8, dbnt_func); \ + DISPATCH_CASE(GOUGH_NFA_16, Gough16, dbnt_func); \ + DISPATCH_CASE(MPV_NFA, Mpv, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_DOT, LbrDot, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_VERM, LbrVerm, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_NVERM, LbrNVerm, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_SHUF, LbrShuf, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_TRUF, LbrTruf, dbnt_func); \ + DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \ + DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \ + DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \ + default: \ + assert(0); \ } char nfaCheckFinalState(const struct NFA *nfa, const char *state, diff --git a/src/nfa/nfa_build_util.cpp b/src/nfa/nfa_build_util.cpp index 93376b01..3103cd29 100644 --- a/src/nfa/nfa_build_util.cpp +++ b/src/nfa/nfa_build_util.cpp @@ -30,6 +30,7 @@ #include "limex_internal.h" #include "mcclellancompile.h" +#include "mcsheng_compile.h" #include "shengcompile.h" #include "nfa_internal.h" #include "repeat_internal.h" @@ -170,17 +171,16 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER}; #define DO_IF_DUMP_SUPPORT(a) #endif -#define MAKE_LIMEX_TRAITS(mlt_size) \ +#define MAKE_LIMEX_TRAITS(mlt_size, mlt_align) \ template<> struct NFATraits { \ static UNUSED const char *name; \ static const NFACategory category = NFA_LIMEX; \ typedef LimExNFA##mlt_size implNFA_t; \ - typedef u_##mlt_size tableRow_t; \ static const nfa_dispatch_fn has_accel; \ static const nfa_dispatch_fn has_repeats; \ static const nfa_dispatch_fn has_repeats_other_than_firsts; \ static const u32 stateAlign = \ - MAX(alignof(tableRow_t), alignof(RepeatControl)); \ + MAX(mlt_align, alignof(RepeatControl)); \ static const bool fast = mlt_size <= 64; \ }; \ const nfa_dispatch_fn NFATraits::has_accel \ @@ -194,16 +194,17 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER}; const char *NFATraits::name \ = "LimEx "#mlt_size; \ template<> struct getDescription { \ - static string call(const void *ptr) { \ - return getDescriptionLimEx((const NFA *)ptr); \ - } \ + static string call(const void *p) { \ + return getDescriptionLimEx((const NFA *)p); \ + } \ };) -MAKE_LIMEX_TRAITS(32) -MAKE_LIMEX_TRAITS(128) -MAKE_LIMEX_TRAITS(256) -MAKE_LIMEX_TRAITS(384) -MAKE_LIMEX_TRAITS(512) +MAKE_LIMEX_TRAITS(32, alignof(u32)) +MAKE_LIMEX_TRAITS(64, alignof(m128)) /* special, 32bit arch uses m128 */ +MAKE_LIMEX_TRAITS(128, alignof(m128)) +MAKE_LIMEX_TRAITS(256, alignof(m256)) +MAKE_LIMEX_TRAITS(384, alignof(m384)) +MAKE_LIMEX_TRAITS(512, alignof(m512)) template<> struct NFATraits { UNUSED static const char *name; @@ -269,7 +270,7 @@ const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = d const char *NFATraits::name = "Goughfish 16"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -278,14 +279,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Mega-Puff-Vac"; +const char *NFATraits::name = "Mega-Puff-Vac"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -294,14 +295,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Castle"; +const char *NFATraits::name = "Castle"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -310,14 +311,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Lim Bounded Repeat (D)"; +const char *NFATraits::name = "Lim Bounded Repeat (D)"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -326,14 +327,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Lim Bounded Repeat (V)"; +const char *NFATraits::name = "Lim Bounded Repeat (V)"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -342,14 +343,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Lim Bounded Repeat (NV)"; +const char *NFATraits::name = "Lim Bounded Repeat (NV)"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -358,14 +359,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Lim Bounded Repeat (S)"; +const char *NFATraits::name = "Lim Bounded Repeat (S)"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; @@ -374,14 +375,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Lim Bounded Repeat (M)"; +const char *NFATraits::name = "Lim Bounded Repeat (M)"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 1; @@ -390,14 +391,14 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = has_accel_sheng; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_sheng; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Sheng"; +const char *NFATraits::name = "Sheng"; #endif -template<> struct NFATraits { +template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 32; @@ -406,11 +407,43 @@ template<> struct NFATraits { static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits::name = "Tamarama"; +const char *NFATraits::name = "Tamarama"; +#endif + +template<> struct NFATraits { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 1; + static const bool fast = true; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcsheng; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits::name = "Shengy McShengFace 8"; +#endif + +template<> struct NFATraits { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 2; + static const bool fast = true; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcsheng; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits::name = "Shengy McShengFace 16"; #endif } // namespace diff --git a/src/nfa/nfa_dump_api.h b/src/nfa/nfa_dump_api.h index 1054a204..a0c4a9c9 100644 --- a/src/nfa/nfa_dump_api.h +++ b/src/nfa/nfa_dump_api.h @@ -35,7 +35,6 @@ #if defined(DUMP_SUPPORT) -#include #include struct NFA; @@ -43,13 +42,11 @@ struct NFA; namespace ue2 { /** - * \brief Dump (in Graphviz 'dot' format) a representation of the NFA into the - * file pointed to by dotFile. + * \brief Dump files representing the engine. All files dumped should begin with + * path/prefix specified by base. Generally a text file and a grpahviz (dot) + * files should be produced. */ -void nfaDumpDot(const struct NFA *nfa, FILE *dotFile, const std::string &base); - -/** \brief Dump a textual representation of the NFA. */ -void nfaDumpText(const struct NFA *fact, FILE *textFile); +void nfaGenerateDumpFiles(const struct NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/nfa/nfa_dump_dispatch.cpp b/src/nfa/nfa_dump_dispatch.cpp index 388ac003..5607ed27 100644 --- a/src/nfa/nfa_dump_dispatch.cpp +++ b/src/nfa/nfa_dump_dispatch.cpp @@ -39,6 +39,7 @@ #include "lbr_dump.h" #include "limex.h" #include "mcclellandump.h" +#include "mcsheng_dump.h" #include "mpv_dump.h" #include "shengdump.h" #include "tamarama_dump.h" @@ -49,45 +50,43 @@ namespace ue2 { -#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \ - case dc_ltype##_NFA_##dc_subtype: \ - nfaExec##dc_ftype##dc_subtype##dc_func_call; \ +#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_func_call) \ + case dc_ltype: \ + nfaExec##dc_ftype##dc_func_call; \ break // general framework calls -#define DISPATCH_BY_NFA_TYPE(dbnt_func) \ - DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \ - switch (nfa->type) { \ - DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \ - DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \ - DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \ - DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \ - DISPATCH_CASE(GOUGH, Gough, 16, dbnt_func); \ - DISPATCH_CASE(MPV, Mpv, 0, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Dot, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Verm, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, NVerm, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \ - DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \ - DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \ - DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \ - DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \ - default: \ - assert(0); \ +#define DISPATCH_BY_NFA_TYPE(dbnt_func) \ + DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \ + switch (nfa->type) { \ + DISPATCH_CASE(LIMEX_NFA_32, LimEx32, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_64, LimEx64, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_128, LimEx128, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_256, LimEx256, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_384, LimEx384, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_512, LimEx512, dbnt_func); \ + DISPATCH_CASE(MCCLELLAN_NFA_8, McClellan8, dbnt_func); \ + DISPATCH_CASE(MCCLELLAN_NFA_16, McClellan16, dbnt_func); \ + DISPATCH_CASE(GOUGH_NFA_8, Gough8, dbnt_func); \ + DISPATCH_CASE(GOUGH_NFA_16, Gough16, dbnt_func); \ + DISPATCH_CASE(MPV_NFA, Mpv, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_DOT, LbrDot, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_VERM, LbrVerm, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_NVERM, LbrNVerm, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_SHUF, LbrShuf, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_TRUF, LbrTruf, dbnt_func); \ + DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \ + DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \ + DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \ + default: \ + assert(0); \ } -void nfaDumpDot(const struct NFA *nfa, FILE *dotFile, - const std::string &base) { - DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile, base)); -} - -void nfaDumpText(const struct NFA *nfa, FILE *txtFile) { - DISPATCH_BY_NFA_TYPE(_dumpText(nfa, txtFile)); +void nfaGenerateDumpFiles(const struct NFA *nfa, const std::string &base) { + DISPATCH_BY_NFA_TYPE(_dump(nfa, base)); } } // namespace ue2 diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h index 41fee73e..9d280822 100644 --- a/src/nfa/nfa_internal.h +++ b/src/nfa/nfa_internal.h @@ -52,6 +52,7 @@ extern "C" enum NFAEngineType { LIMEX_NFA_32, + LIMEX_NFA_64, LIMEX_NFA_128, LIMEX_NFA_256, LIMEX_NFA_384, @@ -60,15 +61,17 @@ enum NFAEngineType { MCCLELLAN_NFA_16, /**< magic pseudo nfa */ GOUGH_NFA_8, /**< magic pseudo nfa */ GOUGH_NFA_16, /**< magic pseudo nfa */ - MPV_NFA_0, /**< magic pseudo nfa */ - LBR_NFA_Dot, /**< magic pseudo nfa */ - LBR_NFA_Verm, /**< magic pseudo nfa */ - LBR_NFA_NVerm, /**< magic pseudo nfa */ - LBR_NFA_Shuf, /**< magic pseudo nfa */ - LBR_NFA_Truf, /**< magic pseudo nfa */ - CASTLE_NFA_0, /**< magic pseudo nfa */ - SHENG_NFA_0, /**< magic pseudo nfa */ - TAMARAMA_NFA_0, /**< magic nfa container */ + MPV_NFA, /**< magic pseudo nfa */ + LBR_NFA_DOT, /**< magic pseudo nfa */ + LBR_NFA_VERM, /**< magic pseudo nfa */ + LBR_NFA_NVERM, /**< magic pseudo nfa */ + LBR_NFA_SHUF, /**< magic pseudo nfa */ + LBR_NFA_TRUF, /**< magic pseudo nfa */ + CASTLE_NFA, /**< magic pseudo nfa */ + SHENG_NFA, /**< magic pseudo nfa */ + TAMARAMA_NFA, /**< magic nfa container */ + MCSHENG_NFA_8, /**< magic pseudo nfa */ + MCSHENG_NFA_16, /**< magic pseudo nfa */ /** \brief bogus NFA - not used */ INVALID_NFA }; @@ -142,6 +145,12 @@ static really_inline int isMcClellanType(u8 t) { return t == MCCLELLAN_NFA_8 || t == MCCLELLAN_NFA_16; } +/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid + * DFA. */ +static really_inline int isShengMcClellanType(u8 t) { + return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16; +} + /** \brief True if the given type (from NFA::type) is a Gough DFA. */ static really_inline int isGoughType(u8 t) { return t == GOUGH_NFA_8 || t == GOUGH_NFA_16; @@ -149,7 +158,7 @@ static really_inline int isGoughType(u8 t) { /** \brief True if the given type (from NFA::type) is a Sheng DFA. */ static really_inline int isShengType(u8 t) { - return t == SHENG_NFA_0; + return t == SHENG_NFA; } /** @@ -157,13 +166,23 @@ static really_inline int isShengType(u8 t) { * Sheng DFA. */ static really_inline int isDfaType(u8 t) { - return isMcClellanType(t) || isGoughType(t) || isShengType(t); + return isMcClellanType(t) || isGoughType(t) || isShengType(t) + || isShengMcClellanType(t); +} + +static really_inline int isBigDfaType(u8 t) { + return t == MCCLELLAN_NFA_16 || t == MCSHENG_NFA_16 || t == GOUGH_NFA_16; +} + +static really_inline int isSmallDfaType(u8 t) { + return isDfaType(t) && !isBigDfaType(t); } /** \brief True if the given type (from NFA::type) is an NFA. */ static really_inline int isNfaType(u8 t) { switch (t) { case LIMEX_NFA_32: + case LIMEX_NFA_64: case LIMEX_NFA_128: case LIMEX_NFA_256: case LIMEX_NFA_384: @@ -178,14 +197,14 @@ static really_inline int isNfaType(u8 t) { /** \brief True if the given type (from NFA::type) is an LBR. */ static really_inline int isLbrType(u8 t) { - return t == LBR_NFA_Dot || t == LBR_NFA_Verm || t == LBR_NFA_NVerm || - t == LBR_NFA_Shuf || t == LBR_NFA_Truf; + return t == LBR_NFA_DOT || t == LBR_NFA_VERM || t == LBR_NFA_NVERM || + t == LBR_NFA_SHUF || t == LBR_NFA_TRUF; } /** \brief True if the given type (from NFA::type) is a container engine. */ static really_inline int isContainerType(u8 t) { - return t == TAMARAMA_NFA_0; + return t == TAMARAMA_NFA; } static really_inline @@ -200,14 +219,14 @@ int isMultiTopType(u8 t) { /* Use for functions that return an integer. */ #define NFA_API_NO_IMPL(...) \ ({ \ - assert(!"not implemented for this engine!"); \ + assert(!"not implemented for this engine!"); \ 0; /* return value, for places that need it */ \ }) /* Use for _zombie_status functions. */ #define NFA_API_ZOMBIE_NO_IMPL(...) \ ({ \ - assert(!"not implemented for this engine!"); \ + assert(!"not implemented for this engine!"); \ NFA_ZOMBIE_NO; \ }) diff --git a/src/nfa/rdfa_graph.cpp b/src/nfa/rdfa_graph.cpp new file mode 100644 index 00000000..2467748b --- /dev/null +++ b/src/nfa/rdfa_graph.cpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "rdfa_graph.h" + +#include "rdfa.h" +#include "util/container.h" + +#include + +using namespace std; + +namespace ue2 { + +RdfaGraph::RdfaGraph(const raw_dfa &rdfa) { + RdfaGraph &g = *this; + + vector verts; + verts.reserve(rdfa.states.size()); + for (dstate_id_t i = 0; i < rdfa.states.size(); i++) { + verts.push_back(add_vertex(g)); + assert(g[verts.back()].index == i); + } + + symbol_t symbol_end = rdfa.alpha_size - 1; + + flat_set local_succs; + for (dstate_id_t i = 0; i < rdfa.states.size(); i++) { + local_succs.clear(); + for (symbol_t s = 0; s < symbol_end; s++) { + dstate_id_t next = rdfa.states[i].next[s]; + if (contains(local_succs, next)) { + continue; + } + DEBUG_PRINTF("%hu->%hu\n", i, next); + add_edge(verts[i], verts[next], g); + local_succs.insert(next); + } + } +} + +} diff --git a/src/nfa/rdfa_graph.h b/src/nfa/rdfa_graph.h new file mode 100644 index 00000000..6d166c2f --- /dev/null +++ b/src/nfa/rdfa_graph.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RDFA_GRAPH_H +#define RDFA_GRAPH_H + +#include "ue2common.h" +#include "util/ue2_graph.h" + +namespace ue2 { + +struct raw_dfa; + +struct RdfaVertexProps { + size_t index = 0; +}; + +struct RdfaEdgeProps { + size_t index = 0; +}; + +struct RdfaGraph : public ue2_graph { + RdfaGraph(const raw_dfa &rdfa); +}; + + +} + +#endif diff --git a/src/nfa/repeatcompile.cpp b/src/nfa/repeatcompile.cpp index 2e1010bb..934dd29e 100644 --- a/src/nfa/repeatcompile.cpp +++ b/src/nfa/repeatcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,7 @@ #include "util/charreach.h" #include "util/depth.h" #include "util/dump_charclass.h" -#include "util/multibit_internal.h" +#include "util/multibit_build.h" #include "util/verify_types.h" #include diff --git a/src/nfa/sheng.c b/src/nfa/sheng.c index bbbf1f20..837aa7df 100644 --- a/src/nfa/sheng.c +++ b/src/nfa/sheng.c @@ -405,9 +405,7 @@ char runSheng(const struct sheng *sh, struct mq *q, s64a b_end, const u8 * scanned = cur_buf; char rv; - /* if we're in nomatch mode or if we're scanning history buffer */ - if (mode == NO_MATCHES || - (cur_start < 0 && mode == CALLBACK_OUTPUT)) { + if (mode == NO_MATCHES) { runShengNm(sh, q->cb, q->context, q->offset, &cached_accept_state, &cached_accept_id, cur_buf, cur_buf + cur_start, cur_buf + cur_end, can_die, @@ -506,10 +504,10 @@ char runSheng(const struct sheng *sh, struct mq *q, s64a b_end, } } -char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context) { +char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context) { DEBUG_PRINTF("smallwrite Sheng\n"); - assert(n->type == SHENG_NFA_0); + assert(n->type == SHENG_NFA); const struct sheng *sh = getImplNfa(n); u8 state = sh->anchored; u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; @@ -545,32 +543,31 @@ char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer, return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE; } -char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecSheng_Q(const struct NFA *n, struct mq *q, s64a end) { const struct sheng *sh = get_sheng(n); char rv = runSheng(sh, q, end, CALLBACK_OUTPUT); return rv; } -char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecSheng_Q2(const struct NFA *n, struct mq *q, s64a end) { const struct sheng *sh = get_sheng(n); char rv = runSheng(sh, q, end, STOP_AT_MATCH); return rv; } -char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report) { +char nfaExecSheng_QR(const struct NFA *n, struct mq *q, ReportID report) { assert(q_cur_type(q) == MQE_START); const struct sheng *sh = get_sheng(n); char rv = runSheng(sh, q, 0 /* end */, NO_MATCHES); - if (rv && nfaExecSheng0_inAccept(n, report, q)) { + if (rv && nfaExecSheng_inAccept(n, report, q)) { return MO_MATCHES_PENDING; } return rv; } -char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { +char nfaExecSheng_inAccept(const struct NFA *n, ReportID report, struct mq *q) { assert(n && q); const struct sheng *sh = get_sheng(n); @@ -586,7 +583,7 @@ char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, return shengHasAccept(sh, aux, report); } -char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q) { +char nfaExecSheng_inAnyAccept(const struct NFA *n, struct mq *q) { assert(n && q); const struct sheng *sh = get_sheng(n); @@ -597,9 +594,9 @@ char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q) { return !!aux->accept; } -char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - NfaCallback cb, void *ctxt) { +char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback cb, void *ctxt) { assert(nfa); const struct sheng *sh = get_sheng(nfa); @@ -615,7 +612,7 @@ char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state, return fireReports(sh, cb, ctxt, s, offset, NULL, NULL, 1); } -char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q) { +char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) { const struct sheng *sh = (const struct sheng *)getImplNfa(n); NfaCallback cb = q->cb; void *ctxt = q->context; @@ -638,15 +635,15 @@ char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q) { return 0; } -char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, UNUSED u8 key) { +char nfaExecSheng_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { const struct sheng *sh = get_sheng(nfa); u8 *s = (u8 *)state; *s = offset ? sh->floating: sh->anchored; return !(*s & SHENG_STATE_DEAD); } -char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q) { +char nfaExecSheng_queueInitState(const struct NFA *nfa, struct mq *q) { assert(nfa->scratchStateSize == 1); /* starting in floating state */ @@ -656,8 +653,8 @@ char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q) { return 0; } -char nfaExecSheng0_queueCompressState(UNUSED const struct NFA *nfa, - const struct mq *q, UNUSED s64a loc) { +char nfaExecSheng_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, UNUSED s64a loc) { void *dest = q->streamState; const void *src = q->state; assert(nfa->scratchStateSize == 1); @@ -666,9 +663,9 @@ char nfaExecSheng0_queueCompressState(UNUSED const struct NFA *nfa, return 0; } -char nfaExecSheng0_expandState(UNUSED const struct NFA *nfa, void *dest, - const void *src, UNUSED u64a offset, - UNUSED u8 key) { +char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { assert(nfa->scratchStateSize == 1); assert(nfa->streamStateSize == 1); *(u8 *)dest = *(const u8 *)src; diff --git a/src/nfa/sheng.h b/src/nfa/sheng.h index 46ead180..84a2b6b5 100644 --- a/src/nfa/sheng.h +++ b/src/nfa/sheng.h @@ -35,27 +35,27 @@ struct mq; struct NFA; -#define nfaExecSheng0_B_Reverse NFA_API_NO_IMPL -#define nfaExecSheng0_zombie_status NFA_API_ZOMBIE_NO_IMPL +#define nfaExecSheng_B_Reverse NFA_API_NO_IMPL +#define nfaExecSheng_zombie_status NFA_API_ZOMBIE_NO_IMPL -char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, struct mq *q); -char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q); -char nfaExecSheng0_queueCompressState(const struct NFA *nfa, const struct mq *q, - s64a loc); -char nfaExecSheng0_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); -char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, u8 key); -char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context); -char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecSheng_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecSheng_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecSheng_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecSheng_queueInitState(const struct NFA *nfa, struct mq *q); +char nfaExecSheng_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc); +char nfaExecSheng_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); +char nfaExecSheng_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, u8 key); +char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer, +char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, size_t length, NfaCallback cb, void *context); #endif /* SHENG_H_ */ diff --git a/src/nfa/sheng_internal.h b/src/nfa/sheng_internal.h index 046eb759..ff843ebe 100644 --- a/src/nfa/sheng_internal.h +++ b/src/nfa/sheng_internal.h @@ -30,7 +30,7 @@ #define SHENG_INTERNAL_H_ #include "ue2common.h" -#include "util/simd_utils.h" +#include "util/simd_types.h" #define SHENG_STATE_ACCEPT 0x10 #define SHENG_STATE_DEAD 0x20 diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp index 911f6d70..53f2c131 100644 --- a/src/nfa/shengcompile.cpp +++ b/src/nfa/shengcompile.cpp @@ -48,7 +48,7 @@ #include "util/compile_context.h" #include "util/make_unique.h" #include "util/verify_types.h" -#include "util/simd_utils.h" +#include "util/simd_types.h" #include #include @@ -358,7 +358,7 @@ void populateBasicInfo(struct NFA *n, dfa_info &info, n->scratchStateSize = 1; n->streamStateSize = 1; n->nPositions = info.size(); - n->type = SHENG_NFA_0; + n->type = SHENG_NFA; n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0; sheng *s = (sheng *)getMutableImplNfa(n); @@ -442,14 +442,12 @@ void createShuffleMasks(sheng *s, dfa_info &info, #ifdef DEBUG dumpShuffleMask(chr, buf, sizeof(buf)); #endif - m128 mask = loadu128(buf); - s->shuffle_masks[chr] = mask; + memcpy(&s->shuffle_masks[chr], buf, sizeof(m128)); } } -bool has_accel_sheng(const NFA *nfa) { - const sheng *s = (const sheng *)getImplNfa(nfa); - return s->flags & SHENG_FLAG_HAS_ACCEL; +bool has_accel_sheng(const NFA *) { + return true; /* consider the sheng region as accelerated */ } aligned_unique_ptr shengCompile(raw_dfa &raw, diff --git a/src/nfa/shengdump.cpp b/src/nfa/shengdump.cpp index 037dfb05..ce87beaf 100644 --- a/src/nfa/shengdump.cpp +++ b/src/nfa/shengdump.cpp @@ -38,7 +38,8 @@ #include "ue2common.h" #include "util/charreach.h" #include "util/dump_charclass.h" -#include "util/simd_utils.h" +#include "util/dump_util.h" +#include "util/simd_types.h" #ifndef DUMP_SUPPORT @@ -100,7 +101,7 @@ void dumpMasks(FILE *f, const sheng *s) { for (u32 chr = 0; chr < 256; chr++) { u8 buf[16]; m128 shuffle_mask = s->shuffle_masks[chr]; - store128(buf, shuffle_mask); + memcpy(buf, &shuffle_mask, sizeof(m128)); fprintf(f, "%3u: ", chr); for (u32 pos = 0; pos < 16; pos++) { @@ -115,8 +116,9 @@ void dumpMasks(FILE *f, const sheng *s) { } } -void nfaExecSheng0_dumpText(const NFA *nfa, FILE *f) { - assert(nfa->type == SHENG_NFA_0); +static +void nfaExecSheng_dumpText(const NFA *nfa, FILE *f) { + assert(nfa->type == SHENG_NFA); const sheng *s = (const sheng *)getImplNfa(nfa); fprintf(f, "sheng DFA\n"); @@ -235,7 +237,7 @@ void shengGetTransitions(const NFA *n, u16 state, u16 *t) { u8 buf[16]; m128 shuffle_mask = s->shuffle_masks[i]; - store128(buf, shuffle_mask); + memcpy(buf, &shuffle_mask, sizeof(m128)); t[i] = buf[state] & SHENG_STATE_MASK; } @@ -243,8 +245,9 @@ void shengGetTransitions(const NFA *n, u16 state, u16 *t) { t[TOP] = aux->top & SHENG_STATE_MASK; } -void nfaExecSheng0_dumpDot(const NFA *nfa, FILE *f, const string &) { - assert(nfa->type == SHENG_NFA_0); +static +void nfaExecSheng_dumpDot(const NFA *nfa, FILE *f) { + assert(nfa->type == SHENG_NFA); const sheng *s = (const sheng *)getImplNfa(nfa); dumpDotPreambleDfa(f); @@ -262,4 +265,14 @@ void nfaExecSheng0_dumpDot(const NFA *nfa, FILE *f, const string &) { fprintf(f, "}\n"); } +void nfaExecSheng_dump(const NFA *nfa, const string &base) { + assert(nfa->type == SHENG_NFA); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + nfaExecSheng_dumpText(nfa, f); + fclose(f); + f = fopen_or_throw((base + ".dot").c_str(), "w"); + nfaExecSheng_dumpDot(nfa, f); + fclose(f); +} + } // namespace ue2 diff --git a/src/nfa/shengdump.h b/src/nfa/shengdump.h index 5334894f..2bdffeb9 100644 --- a/src/nfa/shengdump.h +++ b/src/nfa/shengdump.h @@ -31,16 +31,13 @@ #ifdef DUMP_SUPPORT -#include #include struct NFA; namespace ue2 { -void nfaExecSheng0_dumpDot(const struct NFA *nfa, FILE *file, - const std::string &base); -void nfaExecSheng0_dumpText(const struct NFA *nfa, FILE *file); +void nfaExecSheng_dump(const struct NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index 903e04da..d68b1b04 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -242,6 +242,7 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, #endif u32 z = movemask128(eq128(t2, ones)); + DEBUG_PRINTF(" z: 0x%08x\n", z); return firstMatch(buf, z); } @@ -302,6 +303,39 @@ const u8 *firstMatch(const u8 *buf, u32 z) { } } +static really_inline +const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf, + const m256 low4bits) { + // do the hi and lo shuffles in the one avx register + m256 c = combine2x128(rshift64_m128(chars, 4), chars); + c = and256(c, low4bits); + m256 c_shuf = vpshufb(mask, c); + m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); + // the upper 32-bits can't match + u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128())); + + return firstMatch(buf, z); +} + +static really_inline +const u8 *shuftiFwdShort(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end, const m256 low4bits) { + // run shufti over two overlapping 16-byte unaligned reads + const m256 mask = combine2x128(mask_hi, mask_lo); + m128 chars = loadu128(buf); + const u8 *rv = fwdBlockShort(mask, chars, buf, low4bits); + if (rv) { + return rv; + } + + chars = loadu128(buf_end - 16); + rv = fwdBlockShort(mask, chars, buf_end - 16, low4bits); + if (rv) { + return rv; + } + return buf_end; +} + static really_inline const u8 *fwdBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, const m256 low4bits, const m256 zeroes) { @@ -315,15 +349,21 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *buf_end) { assert(buf && buf_end); assert(buf < buf_end); + DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf); // Slow path for small cases. - if (buf_end - buf < 32) { + if (buf_end - buf < 16) { return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end); } - const m256 zeroes = zeroes256(); const m256 low4bits = set32x8(0xf); + + if (buf_end - buf <= 32) { + return shuftiFwdShort(mask_lo, mask_hi, buf, buf_end, low4bits); + } + + const m256 zeroes = zeroes256(); const m256 wide_mask_lo = set2x128(mask_lo); const m256 wide_mask_hi = set2x128(mask_hi); const u8 *rv; @@ -365,12 +405,7 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, } static really_inline -const u8 *lastMatch(const u8 *buf, m256 t, m256 compare) { -#ifdef DEBUG - DEBUG_PRINTF("confirming match in:"); dumpMsk256(t); printf("\n"); -#endif - - u32 z = movemask256(eq256(t, compare)); +const u8 *lastMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffffffff)) { u32 pos = clz32(~z); DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); @@ -395,9 +430,45 @@ const u8 *revBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); #endif - return lastMatch(buf, t, zeroes); + u32 z = movemask256(eq256(t, zeroes)); + return lastMatch(buf, z); } +static really_inline +const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf, + const m256 low4bits) { + // do the hi and lo shuffles in the one avx register + m256 c = combine2x128(rshift64_m128(chars, 4), chars); + c = and256(c, low4bits); + m256 c_shuf = vpshufb(mask, c); + m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); + // the upper 32-bits can't match + u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128())); + + return lastMatch(buf, z); +} + +static really_inline +const u8 *shuftiRevShort(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end, const m256 low4bits) { + // run shufti over two overlapping 16-byte unaligned reads + const m256 mask = combine2x128(mask_hi, mask_lo); + + m128 chars = loadu128(buf_end - 16); + const u8 *rv = revBlockShort(mask, chars, buf_end - 16, low4bits); + if (rv) { + return rv; + } + + chars = loadu128(buf); + rv = revBlockShort(mask, chars, buf, low4bits); + if (rv) { + return rv; + } + return buf - 1; +} + + /* takes 128 bit masks, but operates on 256 bits of data */ const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *buf_end) { @@ -405,13 +476,18 @@ const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, assert(buf < buf_end); // Slow path for small cases. - if (buf_end - buf < 64) { + if (buf_end - buf < 16) { return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end); } - const m256 zeroes = zeroes256(); const m256 low4bits = set32x8(0xf); + + if (buf_end - buf <= 32) { + return shuftiRevShort(mask_lo, mask_hi, buf, buf_end, low4bits); + } + + const m256 zeroes = zeroes256(); const m256 wide_mask_lo = set2x128(mask_lo); const m256 wide_mask_hi = set2x128(mask_hi); const u8 *rv; @@ -482,14 +558,56 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi, return firstMatch(buf, z); } +static really_inline +const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf, + const m256 low4bits) { + // do the hi and lo shuffles in the one avx register + m256 c = combine2x128(rshift64_m128(chars, 4), chars); + c = and256(c, low4bits); + m256 c_shuf1 = vpshufb(mask1, c); + m256 c_shuf2 = rshift128_m256(vpshufb(mask2, c), 1); + m256 t0 = or256(c_shuf1, c_shuf2); + m128 t = or128(movdq_hi(t0), cast256to128(t0)); + // the upper 32-bits can't match + u32 z = 0xffff0000U | movemask128(eq128(t, ones128())); + + return firstMatch(buf, z); +} + +static really_inline +const u8 *shuftiDoubleShort(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, + m128 mask2_hi, const u8 *buf, const u8 *buf_end) { + DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf); + const m256 low4bits = set32x8(0xf); + // run shufti over two overlapping 16-byte unaligned reads + const m256 mask1 = combine2x128(mask1_hi, mask1_lo); + const m256 mask2 = combine2x128(mask2_hi, mask2_lo); + m128 chars = loadu128(buf); + const u8 *rv = fwdBlockShort2(mask1, mask2, chars, buf, low4bits); + if (rv) { + return rv; + } + + chars = loadu128(buf_end - 16); + rv = fwdBlockShort2(mask1, mask2, chars, buf_end - 16, low4bits); + if (rv) { + return rv; + } + return buf_end; +} + /* takes 128 bit masks, but operates on 256 bits of data */ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, const u8 *buf, const u8 *buf_end) { + /* we should always have at least 16 bytes */ + assert(buf_end - buf >= 16); + if (buf_end - buf < 32) { - // not worth it - return buf; + return shuftiDoubleShort(mask1_lo, mask1_hi, mask2_lo, mask2_hi, buf, + buf_end); } + const m256 ones = ones256(); const m256 low4bits = set32x8(0xf); const m256 wide_mask1_lo = set2x128(mask1_lo); diff --git a/src/nfa/shufticompile.cpp b/src/nfa/shufticompile.cpp index 217fcee0..12a94b7b 100644 --- a/src/nfa/shufticompile.cpp +++ b/src/nfa/shufticompile.cpp @@ -51,7 +51,7 @@ namespace ue2 { * * Note: always able to construct masks for 8 or fewer characters. */ -int shuftiBuildMasks(const CharReach &c, m128 *lo, m128 *hi) { +int shuftiBuildMasks(const CharReach &c, u8 *lo, u8 *hi) { /* Things could be packed much more optimally, but this should be able to * handle any set of characters entirely in the lower half. */ @@ -134,7 +134,7 @@ void set_buckets_from_mask(u16 nibble_mask, u32 bucket, bool shuftiBuildDoubleMasks(const CharReach &onechar, const flat_set> &twochar, - m128 *lo1, m128 *hi1, m128 *lo2, m128 *hi2) { + u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2) { DEBUG_PRINTF("unibytes %zu dibytes %zu\n", onechar.size(), twochar.size()); array lo1_a; @@ -210,9 +210,7 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar, #ifdef DUMP_SUPPORT -CharReach shufti2cr(const m128 lo_in, const m128 hi_in) { - const u8 *lo = (const u8 *)&lo_in; - const u8 *hi = (const u8 *)&hi_in; +CharReach shufti2cr(const u8 *lo, const u8 *hi) { CharReach cr; for (u32 i = 0; i < 256; i++) { if (lo[(u8)i & 0xf] & hi[(u8)i >> 4]) { diff --git a/src/nfa/shufticompile.h b/src/nfa/shufticompile.h index 59126b0b..a72904e0 100644 --- a/src/nfa/shufticompile.h +++ b/src/nfa/shufticompile.h @@ -48,7 +48,7 @@ namespace ue2 { * * Note: always able to construct masks for 8 or fewer characters. */ -int shuftiBuildMasks(const CharReach &chars, m128 *lo, m128 *hi); +int shuftiBuildMasks(const CharReach &chars, u8 *lo, u8 *hi); /** \brief Double-byte variant * @@ -56,7 +56,7 @@ int shuftiBuildMasks(const CharReach &chars, m128 *lo, m128 *hi); */ bool shuftiBuildDoubleMasks(const CharReach &onechar, const flat_set> &twochar, - m128 *lo1, m128 *hi1, m128 *lo2, m128 *hi2); + u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2); #ifdef DUMP_SUPPORT @@ -64,7 +64,7 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar, * \brief Dump code: returns a CharReach with the reach that would match this * shufti. */ -CharReach shufti2cr(const m128 lo, const m128 hi); +CharReach shufti2cr(const u8 *lo, const u8 *hi); #endif // DUMP_SUPPORT diff --git a/src/nfa/tamarama.c b/src/nfa/tamarama.c index b5f90e85..43480f06 100644 --- a/src/nfa/tamarama.c +++ b/src/nfa/tamarama.c @@ -265,9 +265,9 @@ void copyBack(const struct Tamarama *t, struct mq *q, struct mq *q1) { #endif } -char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context) { +char nfaExecTamarama_testEOD(const struct NFA *n, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -285,8 +285,7 @@ char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state, return MO_CONTINUE_MATCHING; } -char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q, - ReportID report) { +char nfaExecTamarama_QR(const struct NFA *n, struct mq *q, ReportID report) { DEBUG_PRINTF("exec rose\n"); struct mq q1; q1.cur = q1.end = 0; @@ -304,7 +303,7 @@ char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q, return rv; } -char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q) { +char nfaExecTamarama_reportCurrent(const struct NFA *n, struct mq *q) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -317,8 +316,8 @@ char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q) { return nfaReportCurrentMatches(sub, &q1); } -char nfaExecTamarama0_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { +char nfaExecTamarama_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -331,7 +330,7 @@ char nfaExecTamarama0_inAccept(const struct NFA *n, ReportID report, return nfaInAcceptState(sub, report, &q1); } -char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q) { +char nfaExecTamarama_inAnyAccept(const struct NFA *n, struct mq *q) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -344,7 +343,7 @@ char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q) { return nfaInAnyAcceptState(sub, &q1); } -char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q) { +char nfaExecTamarama_queueInitState(const struct NFA *n, struct mq *q) { DEBUG_PRINTF("init state\n"); const struct Tamarama *t = getImplNfa(n); char *ptr = q->streamState; @@ -354,8 +353,8 @@ char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q) { return 0; } -char nfaExecTamarama0_queueCompressState(const struct NFA *n, - const struct mq *q, s64a loc) { +char nfaExecTamarama_queueCompressState(const struct NFA *n, const struct mq *q, + s64a loc) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -369,8 +368,8 @@ char nfaExecTamarama0_queueCompressState(const struct NFA *n, return nfaQueueCompressState(sub, &q1, loc); } -char nfaExecTamarama0_expandState(const struct NFA *n, void *dest, - const void *src, u64a offset, u8 key) { +char nfaExecTamarama_expandState(const struct NFA *n, void *dest, + const void *src, u64a offset, u8 key) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(src, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -383,8 +382,8 @@ char nfaExecTamarama0_expandState(const struct NFA *n, void *dest, return nfaExpandState(sub, dest, subStreamState, offset, key); } -enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n, - struct mq *q, s64a loc) { +enum nfa_zombie_status nfaExecTamarama_zombie_status(const struct NFA *n, + struct mq *q, s64a loc) { const struct Tamarama *t = getImplNfa(n); u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); if (activeIdx == t->numSubEngines) { @@ -397,7 +396,7 @@ enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n, return nfaGetZombieStatus(sub, &q1, loc); } -char nfaExecTamarama0_Q(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecTamarama_Q(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("exec\n"); struct mq q1; char rv = MO_ALIVE; @@ -418,8 +417,7 @@ char nfaExecTamarama0_Q(const struct NFA *n, struct mq *q, s64a end) { return rv; } -char nfaExecTamarama0_Q2(const struct NFA *n, - struct mq *q, s64a end) { +char nfaExecTamarama_Q2(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("exec to match\n"); struct mq q1; char rv = 0; diff --git a/src/nfa/tamarama.h b/src/nfa/tamarama.h index 7ccfa5a0..3b52d8de 100644 --- a/src/nfa/tamarama.h +++ b/src/nfa/tamarama.h @@ -41,28 +41,27 @@ struct mq; struct NFA; struct hs_scratch; -char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context); -char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecTamarama0_inAccept(const struct NFA *n, ReportID report, - struct mq *q); -char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecTamarama0_queueCompressState(const struct NFA *n, - const struct mq *q, - s64a loc); -char nfaExecTamarama0_expandState(const struct NFA *n, void *dest, - const void *src, u64a offset, u8 key); -enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n, - struct mq *q, s64a loc); -char nfaExecTamarama0_Q(const struct NFA *nfa, struct mq *q, s64a end); -char nfaExecTamarama0_Q2(const struct NFA *nfa, struct mq *q, s64a end); +char nfaExecTamarama_testEOD(const struct NFA *n, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecTamarama_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecTamarama_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecTamarama_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecTamarama_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecTamarama_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecTamarama_queueCompressState(const struct NFA *n, const struct mq *q, + s64a loc); +char nfaExecTamarama_expandState(const struct NFA *n, void *dest, + const void *src, u64a offset, u8 key); +enum nfa_zombie_status nfaExecTamarama_zombie_status(const struct NFA *n, + struct mq *q, s64a loc); +char nfaExecTamarama_Q(const struct NFA *nfa, struct mq *q, s64a end); +char nfaExecTamarama_Q2(const struct NFA *nfa, struct mq *q, s64a end); // only used by outfix and miracles, no implementation for tamarama -#define nfaExecTamarama0_initCompressedState NFA_API_NO_IMPL -#define nfaExecTamarama0_B_Reverse NFA_API_NO_IMPL +#define nfaExecTamarama_initCompressedState NFA_API_NO_IMPL +#define nfaExecTamarama_B_Reverse NFA_API_NO_IMPL #ifdef __cplusplus } diff --git a/src/nfa/tamarama_dump.cpp b/src/nfa/tamarama_dump.cpp index 181fa9af..88cb33cc 100644 --- a/src/nfa/tamarama_dump.cpp +++ b/src/nfa/tamarama_dump.cpp @@ -38,6 +38,7 @@ #include "nfa_dump_api.h" #include "nfa_dump_internal.h" #include "nfa_internal.h" +#include "util/dump_util.h" #include #include @@ -46,27 +47,14 @@ #error No dump support! #endif +using namespace std; + namespace ue2 { -void nfaExecTamarama0_dumpDot(const struct NFA *nfa, UNUSED FILE *f, - const std::string &base) { +void nfaExecTamarama_dump(const struct NFA *nfa, const string &base) { const Tamarama *t = (const Tamarama *)getImplNfa(nfa); - const u32 *subOffset = - (const u32 *)((const char *)t + sizeof(struct Tamarama) + - t->numSubEngines * sizeof(u32)); - for (u32 i = 0; i < t->numSubEngines; i++) { - std::stringstream ssdot; - ssdot << base << "rose_nfa_" << nfa->queueIndex - << "_sub_" << i << ".dot"; - const NFA *sub = (const struct NFA *)((const char *)t + subOffset[i]); - FILE *f1 = fopen(ssdot.str().c_str(), "w"); - nfaDumpDot(sub, f1, base); - fclose(f1); - } -} -void nfaExecTamarama0_dumpText(const struct NFA *nfa, FILE *f) { - const Tamarama *t = (const Tamarama *)getImplNfa(nfa); + FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); fprintf(f, "Tamarama container engine\n"); fprintf(f, "\n"); @@ -75,15 +63,17 @@ void nfaExecTamarama0_dumpText(const struct NFA *nfa, FILE *f) { fprintf(f, "\n"); dumpTextReverse(nfa, f); fprintf(f, "\n"); + fclose(f); const u32 *subOffset = (const u32 *)((const char *)t + sizeof(struct Tamarama) + t->numSubEngines * sizeof(u32)); for (u32 i = 0; i < t->numSubEngines; i++) { - fprintf(f, "Sub %u:\n", i); const NFA *sub = (const struct NFA *)((const char *)t + subOffset[i]); - nfaDumpText(sub, f); - fprintf(f, "\n"); + + stringstream sssub; + sssub << base << "_sub_" << i; + nfaGenerateDumpFiles(sub, sssub.str()); } } diff --git a/src/nfa/tamarama_dump.h b/src/nfa/tamarama_dump.h index dc976004..f40b7ecf 100644 --- a/src/nfa/tamarama_dump.h +++ b/src/nfa/tamarama_dump.h @@ -31,16 +31,13 @@ #if defined(DUMP_SUPPORT) -#include #include struct NFA; namespace ue2 { -void nfaExecTamarama0_dumpDot(const NFA *nfa, FILE *file, - const std::string &base); -void nfaExecTamarama0_dumpText(const NFA *nfa, FILE *file); +void nfaExecTamarama_dump(const NFA *nfa, const std::string &base); } // namespace ue2 diff --git a/src/nfa/tamaramacompile.cpp b/src/nfa/tamaramacompile.cpp index 73d19595..c28caacb 100644 --- a/src/nfa/tamaramacompile.cpp +++ b/src/nfa/tamaramacompile.cpp @@ -117,7 +117,7 @@ aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, const u32 queue, remapTops(tamaInfo, top_base, out_top_remap); size_t subSize = tamaInfo.subengines.size(); - DEBUG_PRINTF("subSize:%lu\n", subSize); + DEBUG_PRINTF("subSize:%zu\n", subSize); size_t total_size = sizeof(NFA) + // initial NFA structure sizeof(Tamarama) + // Tamarama structure @@ -134,7 +134,7 @@ aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, const u32 queue, // so add one to subSize here u32 activeIdxSize = calcPackedBytes(subSize + 1); aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); - nfa->type = verify_u8(TAMARAMA_NFA_0); + nfa->type = verify_u8(TAMARAMA_NFA); nfa->length = verify_u32(total_size); nfa->queueIndex = queue; diff --git a/src/nfa/trufflecompile.cpp b/src/nfa/trufflecompile.cpp index 6bde7abb..9442d046 100644 --- a/src/nfa/trufflecompile.cpp +++ b/src/nfa/trufflecompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,7 @@ #include "ue2common.h" #include "util/charreach.h" #include "util/simd_types.h" -#include "util/simd_utils.h" + #include "util/dump_mask.h" using namespace std; @@ -53,17 +53,15 @@ namespace ue2 { * bits 456 is the bit that is set at that offset. */ -void truffleBuildMasks(const CharReach &cr, m128 *shuf_mask_lo_highclear, - m128 *shuf_mask_lo_highset) { - *shuf_mask_lo_highset = zeroes128(); - *shuf_mask_lo_highclear = zeroes128(); - u8 *lo_highset = (u8 *)shuf_mask_lo_highset; - u8 *lo_highclear = (u8 *)shuf_mask_lo_highclear; +void truffleBuildMasks(const CharReach &cr, u8 *shuf_mask_lo_highclear, + u8 *shuf_mask_lo_highset) { + memset(shuf_mask_lo_highset, 0, sizeof(m128)); + memset(shuf_mask_lo_highclear, 0, sizeof(m128)); for (size_t v = cr.find_first(); v != CharReach::npos; v = cr.find_next(v)) { DEBUG_PRINTF("adding 0x%02x to %s\n", (u8)v, (v & 0x80) ? "highset" : "highclear"); - u8 *change_mask = (v & 0x80) ? lo_highset : lo_highclear; + u8 *change_mask = (v & 0x80) ? shuf_mask_lo_highset : shuf_mask_lo_highclear; u8 low_nibble = v & 0xf; u8 bits_456 = (v & 0x70) >> 4; change_mask[low_nibble] |= 1 << bits_456; @@ -73,18 +71,16 @@ void truffleBuildMasks(const CharReach &cr, m128 *shuf_mask_lo_highclear, /* * Reconstruct the charclass that the truffle masks represent */ -CharReach truffle2cr(const m128 highclear, const m128 highset) { - const u8 *lo = (const u8 *)&highclear; - const u8 *hi = (const u8 *)&highset; +CharReach truffle2cr(const u8 *highclear, const u8 *highset) { CharReach cr; for (u8 i = 0; i < 16; i++) { - u32 bits_456 = lo[i]; + u32 bits_456 = highclear[i]; while (bits_456) { u32 pos = findAndClearLSB_32(&bits_456); assert(pos < 8); cr.set(pos << 4 | i); } - bits_456 = hi[i]; + bits_456 = highset[i]; while (bits_456) { u32 pos = findAndClearLSB_32(&bits_456); assert(pos < 8); diff --git a/src/nfa/trufflecompile.h b/src/nfa/trufflecompile.h index 19d3eb54..14b314f3 100644 --- a/src/nfa/trufflecompile.h +++ b/src/nfa/trufflecompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,8 +34,8 @@ namespace ue2 { -void truffleBuildMasks(const CharReach &cr, m128 *mask1, m128 *mask2); -CharReach truffle2cr(const m128 lo_in, const m128 hi_in); +void truffleBuildMasks(const CharReach &cr, u8 *mask1, u8 *mask2); +CharReach truffle2cr(const u8 *lo_in, const u8 *hi_in); } diff --git a/src/nfa/vermicelli.h b/src/nfa/vermicelli.h index ba8afcf1..817e681a 100644 --- a/src/nfa/vermicelli.h +++ b/src/nfa/vermicelli.h @@ -74,9 +74,7 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf, } buf += VERM_BOUNDARY - min; - if (buf >= buf_end) { - return buf_end; - } + assert(buf < buf_end); } // Aligned loops from here on in @@ -129,9 +127,7 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf, } buf += VERM_BOUNDARY - min; - if (buf >= buf_end) { - return buf_end; - } + assert(buf < buf_end); } // Aligned loops from here on in @@ -172,9 +168,7 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, } buf += VERM_BOUNDARY - min; - if (buf >= buf_end) { - return buf_end - 1; - } + assert(buf < buf_end); } // Aligned loops from here on in @@ -190,9 +184,19 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, ptr = nocase ? dvermPreconditionNocase(chars1, chars2, buf_end - VERM_BOUNDARY) : dvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY); - /* buf_end - 1 to be conservative in case last byte is a partial match */ - return ptr ? ptr : buf_end - 1; + if (ptr) { + return ptr; + } + + /* check for partial match at end */ + u8 mask = nocase ? CASE_CLEAR : 0xff; + if ((buf_end[-1] & mask) == (u8)c1) { + DEBUG_PRINTF("partial!!!\n"); + return buf_end - 1; + } + + return buf_end; } static really_inline @@ -220,9 +224,7 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, } buf += VERM_BOUNDARY - min; - if (buf >= buf_end) { - return buf_end - 1; - } + assert(buf < buf_end); } // Aligned loops from here on in @@ -235,9 +237,17 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, // Tidy up the mess at the end ptr = dvermPreconditionMasked(chars1, chars2, mask1, mask2, buf_end - VERM_BOUNDARY); - /* buf_end - 1 to be conservative in case last byte is a partial match */ - return ptr ? ptr : buf_end - 1; + if (ptr) { + return ptr; + } + + /* check for partial match at end */ + if ((buf_end[-1] & m1) == (u8)c1) { + return buf_end - 1; + } + + return buf_end; } // Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index deca3fd5..dff9c7e8 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -203,6 +203,7 @@ static bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, const u32 comp_id) { const CompileContext &cc = ng.cc; + assert(hasCorrectlyNumberedVertices(g)); DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n", w.expressionIndex, comp_id, num_vertices(g), num_edges(g)); @@ -421,6 +422,7 @@ bool NG::addGraph(NGWrapper &w) { // Perform a reduction pass to merge sibling character classes together. if (cc.grey.performGraphSimplification) { removeRedundancy(w, som); + prunePathsRedundantWithSuccessorOfCyclics(w, som); } dumpDotWrapper(w, "04_reduced", cc.grey); diff --git a/src/nfagraph/ng_anchored_dots.cpp b/src/nfagraph/ng_anchored_dots.cpp index ba352e60..ed9c7f48 100644 --- a/src/nfagraph/ng_anchored_dots.cpp +++ b/src/nfagraph/ng_anchored_dots.cpp @@ -202,7 +202,7 @@ void reformAnchoredRepeatsComponent(NGHolder &g, } if (!isStartNode(dotV, g.start, g, true)) { - DEBUG_PRINTF("fleeing: vertex %u has other preds\n", g[dotV].index); + DEBUG_PRINTF("fleeing: vertex %zu has other preds\n", g[dotV].index); return; } @@ -249,7 +249,7 @@ void reformAnchoredRepeatsComponent(NGHolder &g, remove_edge(g.start, v, g); } - DEBUG_PRINTF("removing vertex %u\n", g[dotV].index); + DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index); clear_vertex(dotV, g); dead.insert(dotV); compAnchoredStarts.erase(dotV); @@ -313,14 +313,15 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, } // A self-loop indicates that this is a '.+' or '.*' - DEBUG_PRINTF("self-loop detected on %u\n", g[dotV].index); + DEBUG_PRINTF("self-loop detected on %zu\n", g[dotV].index); *startEnd = depth::infinity(); remove_edge(dotV, dotV, g); return; } if (!isStartNode(dotV, g.startDs, g, true)) { - DEBUG_PRINTF("fleeing: vertex %u has other preds\n", g[dotV].index); + DEBUG_PRINTF("fleeing: vertex %zu has other preds\n", + g[dotV].index); return; } @@ -362,14 +363,14 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, compUnanchoredStarts.clear(); for (auto t : adjacent_vertices_range(dotV, g)) { if (t != dotV) { - DEBUG_PRINTF("connecting sds -> %u\n", g[t].index); + DEBUG_PRINTF("connecting sds -> %zu\n", g[t].index); add_edge(g.startDs, t, g); add_edge(g.start, t, g); compUnanchoredStarts.insert(t); } } - DEBUG_PRINTF("removing vertex %u\n", g[dotV].index); + DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index); dead.insert(dotV); clear_vertex(dotV, g); compUnanchoredStarts.erase(dotV); @@ -416,7 +417,7 @@ bool gatherParticipants(const NGHolder &g, if (isOptionalDot(t, v, g)) { // another dot; bail if we've seen it once already if (dots.find(t) != dots.end()) { - DEBUG_PRINTF("cycle detected at vertex %u\n", g[t].index); + DEBUG_PRINTF("cycle detected at vertex %zu\n", g[t].index); return false; } dots.insert(t); @@ -432,7 +433,7 @@ bool gatherParticipants(const NGHolder &g, for (auto w : adjacent_vertices_range(v, g)) { succ.insert(w); if (!edge(start, w, g).second) { - DEBUG_PRINTF("failing, vertex %u does not have edge from start\n", + DEBUG_PRINTF("failing, vertex %zu does not have edge from start\n", g[w].index); return false; } @@ -474,7 +475,7 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, return; } initialDot = v; - DEBUG_PRINTF("initial dot vertex is %u\n", g[v].index); + DEBUG_PRINTF("initial dot vertex is %zu\n", g[v].index); } } @@ -507,12 +508,8 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, } assert(startEnd->is_reachable()); - // For determinism, copy and sort our successor vertices. - deque s(succ.begin(), succ.end()); - sort(s.begin(), s.end(), make_index_ordering(g)); - // Connect our successor vertices to both start and startDs. - for (auto v : s) { + for (auto v : succ) { add_edge_if_not_present(g.start, v, g); add_edge_if_not_present(g.startDs, v, g); } @@ -637,8 +634,8 @@ void restoreLeadingDots(NGHolder &g, const depth &startBegin, } addDotsBetween(g, root, rhs, startBegin, startEnd); - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); } // Entry point. diff --git a/src/nfagraph/ng_asserts.cpp b/src/nfagraph/ng_asserts.cpp index e9e39345..c2f0d68f 100644 --- a/src/nfagraph/ng_asserts.cpp +++ b/src/nfagraph/ng_asserts.cpp @@ -101,7 +101,7 @@ vector getAsserts(const NGHolder &g) { static void addToSplit(const NGHolder &g, NFAVertex v, map *to_split) { - DEBUG_PRINTF("%u needs splitting\n", g[v].index); + DEBUG_PRINTF("%zu needs splitting\n", g[v].index); to_split->emplace(g[v].index, v); } @@ -194,7 +194,7 @@ void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) { Report ir = rm.getBasicInternalReport(g, adj); g[v].reports.insert(rm.getInternalId(ir)); - DEBUG_PRINTF("set report id for vertex %u, adj %d\n", g[v].index, adj); + DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); } static @@ -224,7 +224,7 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) { assert(v != g.start); assert(v != g.accept); assert(v != g.acceptEod); - DEBUG_PRINTF("partitioning vertex %u ucp:%d\n", g[v].index, (int)ucp); + DEBUG_PRINTF("partitioning vertex %zu ucp:%d\n", g[v].index, (int)ucp); CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD; CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD; @@ -267,8 +267,8 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { bool impassable = true; bool ucp = flags & UCP_ASSERT_FLAGS; - DEBUG_PRINTF("resolving edge %u->%u (flags=0x%x, ucp=%d)\n", g[u].index, - g[v].index, flags, (int)ucp); + DEBUG_PRINTF("resolving edge %zu->%zu (flags=0x%x, ucp=%d)\n", + g[u].index, g[v].index, flags, (int)ucp); while (flags && impassable) { u32 flag = 1U << findAndClearLSB_32(&flags); switch (flag) { @@ -377,17 +377,14 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { add_edge(vv, g.accept, g); g[e].assert_flags = 0; add_edge(u, vv, g[e], g); - if (!edge(u, g.acceptEod, g).second) { - add_edge(u, g.acceptEod, g[e], g); - } else { - /* there may already be a different edge from start to eod - * if so we need to make it unconditional and alive - */ - NFAEdge start_eod = edge(u, g.acceptEod, g).first; - + /* there may already be a different edge from start to eod if so + * we need to make it unconditional and alive + */ + if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { g[start_eod].assert_flags = 0; dead->erase(start_eod); - + } else { + add_edge(u, g.acceptEod, g[e], g); } dead->insert(e); } @@ -433,17 +430,14 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { add_edge(vv, g.accept, g); g[e].assert_flags = 0; add_edge(u, vv, g[e], g); - if (!edge(u, g.acceptEod, g).second) { - add_edge(u, g.acceptEod, g[e], g); - } else { - /* there may already be a different edge from start to eod - * if so we need to make it unconditional and alive - */ - NFAEdge start_eod = edge(u, g.acceptEod, g).first; - + /* there may already be a different edge from start to eod if so + * we need to make it unconditional and alive + */ + if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { g[start_eod].assert_flags = 0; dead->erase(start_eod); - + } else { + add_edge(u, g.acceptEod, g[e], g); } dead->insert(e); } @@ -482,12 +476,12 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) { resolveEdges(rm, g, &dead); remove_edges(dead, g); - g.renumberVertices(); + renumber_vertices(g); pruneUseless(g); pruneEmptyVertices(g); - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); clearReports(g); } @@ -496,10 +490,8 @@ void ensureCodePointStart(ReportManager &rm, NGWrapper &g) { * boundaries. Assert resolution handles the badness coming from asserts. * The only other source of trouble is startDs->accept connections. */ - bool exists; - NFAEdge orig; - tie(orig, exists) = edge(g.startDs, g.accept, g); - if (g.utf8 && exists) { + NFAEdge orig = edge(g.startDs, g.accept, g); + if (g.utf8 && orig) { DEBUG_PRINTF("rectifying %u\n", g.reportId); Report ir = rm.getBasicInternalReport(g); ReportID rep = rm.getInternalId(ir); @@ -552,7 +544,7 @@ void ensureCodePointStart(ReportManager &rm, NGWrapper &g) { add_edge(g.start, v_4, g); add_edge(g.startDs, v_4, g); remove_edge(orig, g); - g.renumberEdges(); + renumber_edges(g); clearReports(g); } } diff --git a/src/nfagraph/ng_builder.cpp b/src/nfagraph/ng_builder.cpp index 8a92b7ee..4ca0b37e 100644 --- a/src/nfagraph/ng_builder.cpp +++ b/src/nfagraph/ng_builder.cpp @@ -132,7 +132,7 @@ NFAVertex NFABuilderImpl::getVertex(Position pos) const { assert(id2vertex.size() >= pos); const NFAVertex v = id2vertex[pos]; assert(v != NGHolder::null_vertex()); - assert(graph->g[v].index == pos); + assert((*graph)[v].index == pos); return v; } @@ -147,7 +147,7 @@ void NFABuilderImpl::addVertex(Position pos) { id2vertex.resize(pos + 1); } id2vertex[pos] = v; - graph->g[v].index = pos; + (*graph)[v].index = pos; } unique_ptr NFABuilderImpl::getGraph() { @@ -177,26 +177,24 @@ void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) { void NFABuilderImpl::addCharReach(Position pos, const CharReach &cr) { NFAVertex v = getVertex(pos); - graph->g[v].char_reach |= cr; + (*graph)[v].char_reach |= cr; } void NFABuilderImpl::setAssertFlag(Position pos, u32 flag) { NFAVertex v = getVertex(pos); - graph->g[v].assert_flags |= flag; + (*graph)[v].assert_flags |= flag; } u32 NFABuilderImpl::getAssertFlag(Position pos) { NFAVertex v = getVertex(pos); - return graph->g[v].assert_flags; + return (*graph)[v].assert_flags; } pair NFABuilderImpl::addEdge(NFAVertex u, NFAVertex v) { // assert that the edge doesn't already exist - assert(edge(u, v, graph->g).second == false); + assert(edge(u, v, *graph).second == false); - pair e = add_edge(u, v, *graph); - assert(e.second); - return e; + return add_edge(u, v, *graph); } void NFABuilderImpl::addEdge(Position startPos, Position endPos) { @@ -209,16 +207,16 @@ void NFABuilderImpl::addEdge(Position startPos, Position endPos) { if ((u == graph->start || u == graph->startDs) && v == graph->startDs) { /* standard special -> special edges already exist */ - assert(edge(u, v, graph->g).second == true); + assert(edge(u, v, *graph).second == true); return; } - assert(edge(u, v, graph->g).second == false); + assert(edge(u, v, *graph).second == false); addEdge(u, v); } bool NFABuilderImpl::hasEdge(Position startPos, Position endPos) const { - return edge(getVertex(startPos), getVertex(endPos), graph->g).second; + return edge(getVertex(startPos), getVertex(endPos), *graph).second; } Position NFABuilderImpl::getStart() const { @@ -252,7 +250,7 @@ Position NFABuilderImpl::makePositions(size_t nPositions) { } void NFABuilderImpl::cloneRegion(Position first, Position last, unsigned posOffset) { - NFAGraph &g = graph->g; + NGHolder &g = *graph; assert(posOffset > 0); // walk the nodes between first and last and copy their vertex properties diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index 658e7001..da6775e4 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -162,7 +162,7 @@ flat_set findHeadShell(const NGHolder &g, } for (UNUSED auto v : shell) { - DEBUG_PRINTF("shell: %u\n", g[v].index); + DEBUG_PRINTF("shell: %zu\n", g[v].index); } return shell; @@ -184,7 +184,7 @@ flat_set findTailShell(const NGHolder &g, } for (UNUSED auto v : shell) { - DEBUG_PRINTF("shell: %u\n", g[v].index); + DEBUG_PRINTF("shell: %zu\n", g[v].index); } return shell; @@ -209,7 +209,8 @@ vector findShellEdges(const NGHolder &g, if ((is_special(u, g) || contains(head_shell, u)) && (is_special(v, g) || contains(tail_shell, v))) { - DEBUG_PRINTF("edge (%u,%u) is a shell edge\n", g[u].index, g[v].index); + DEBUG_PRINTF("edge (%zu,%zu) is a shell edge\n", g[u].index, + g[v].index); shell_edges.push_back(e); } } @@ -275,9 +276,8 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, NFAUndirectedGraph ug; ue2::unordered_map old2new; - ue2::unordered_map newIdx2old; - createUnGraph(g.g, true, true, ug, old2new, newIdx2old); + createUnGraph(g, true, true, ug, old2new); // Construct reverse mapping. ue2::unordered_map new2old; @@ -313,7 +313,7 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, assert(contains(new2old, uv)); NFAVertex v = new2old.at(uv); verts[c].push_back(v); - DEBUG_PRINTF("vertex %u is in comp %u\n", g[v].index, c); + DEBUG_PRINTF("vertex %zu is in comp %u\n", g[v].index, c); } ue2::unordered_map v_map; // temp map for fillHolder @@ -322,8 +322,9 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, vv.insert(vv.end(), begin(head_shell), end(head_shell)); vv.insert(vv.end(), begin(tail_shell), end(tail_shell)); - // Sort by vertex index for determinism. - sort(begin(vv), end(vv), VertexIndexOrdering(g)); + /* Sort for determinism. Still required as NFAUndirectedVertex have + * no deterministic ordering (split_components map). */ + sort(begin(vv), end(vv)); auto gc = ue2::make_unique(); v_map.clear(); @@ -349,9 +350,6 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, vv.insert(vv.end(), begin(head_shell), end(head_shell)); vv.insert(vv.end(), begin(tail_shell), end(tail_shell)); - // Sort by vertex index for determinism. - sort(begin(vv), end(vv), VertexIndexOrdering(g)); - auto gc = ue2::make_unique(); v_map.clear(); fillHolder(gc.get(), g, vv, &v_map); diff --git a/src/nfagraph/ng_cyclic_redundancy.cpp b/src/nfagraph/ng_cyclic_redundancy.cpp index e2272264..9ae4458c 100644 --- a/src/nfagraph/ng_cyclic_redundancy.cpp +++ b/src/nfagraph/ng_cyclic_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -99,7 +99,7 @@ class SearchVisitor : public boost::default_dfs_visitor { template void discover_vertex(const Vertex &v, const Graph &g) const { - DEBUG_PRINTF("vertex %u\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); if (is_special(v, g)) { DEBUG_PRINTF("start or accept\n"); throw SearchFailed(); @@ -141,24 +141,16 @@ bool searchForward(const Graph &g, const CharReach &reach, } static -NFAEdge to_raw(const NFAEdge &e, const NFAGraph &, const NGHolder &) { +NFAEdge to_raw(const NFAEdge &e, const NGHolder &) { return e; } static -NFAEdge to_raw(const reverse_graph::edge_descriptor &e, - const reverse_graph &g, - const NGHolder &raw) { - /* clang doesn't seem to like edge_underlying */ - NFAVertex t = source(e, g); - NFAVertex s = target(e, g); - - assert(edge(s, t, raw).second); - - return edge(s, t, raw).first; +NFAEdge to_raw(const reverse_graph::edge_descriptor &e, + const reverse_graph &g) { + return get(boost::edge_underlying, g, e); } - /* returns true if we did stuff */ template static @@ -185,7 +177,7 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, continue; } - DEBUG_PRINTF("- checking u %u\n", g[u].index); + DEBUG_PRINTF("- checking u %zu\n", g[u].index); // let s be intersection(succ(u), succ(v)) s.clear(); @@ -206,17 +198,18 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, continue; } - DEBUG_PRINTF(" - checking w %u\n", g[w].index); + DEBUG_PRINTF(" - checking w %zu\n", g[w].index); - if (searchForward(g, reach, s, w)) { - DEBUG_PRINTF("removing edge (%u,%u)\n", - g[u].index, g[w].index); - /* we are currently iterating over the in-edges of v, so it - would be unwise to remove edges to v. However, */ - assert(w != v); /* as v is in s */ - remove_edge(to_raw(e_u, g, raw), raw); - did_stuff = true; + if (!searchForward(g, reach, s, w)) { + continue; } + + DEBUG_PRINTF("removing edge (%zu,%zu)\n", g[u].index, g[w].index); + /* we are currently iterating over the in-edges of v, so it + would be unwise to remove edges to v. However, */ + assert(w != v); /* as v is in s */ + remove_edge(to_raw(e_u, g), raw); + did_stuff = true; } } @@ -233,7 +226,7 @@ bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) { continue; } - DEBUG_PRINTF("examining cyclic vertex %u\n", g[v].index); + DEBUG_PRINTF("examining cyclic vertex %zu\n", g[v].index); did_stuff |= removeCyclicPathRedundancy(g, v, raw); } @@ -242,7 +235,7 @@ bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) { bool removeCyclicPathRedundancy(NGHolder &g) { // Forward pass. - bool f_changed = cyclicPathRedundancyPass(g.g, g); + bool f_changed = cyclicPathRedundancyPass(g, g); if (f_changed) { DEBUG_PRINTF("edges removed by forward pass\n"); pruneUseless(g); @@ -250,8 +243,8 @@ bool removeCyclicPathRedundancy(NGHolder &g) { // Reverse pass. DEBUG_PRINTF("REVERSE PASS\n"); - typedef reverse_graph RevGraph; - RevGraph revg(g.g); + typedef reverse_graph RevGraph; + RevGraph revg(g); bool r_changed = cyclicPathRedundancyPass(revg, g); if (r_changed) { DEBUG_PRINTF("edges removed by reverse pass\n"); diff --git a/src/nfagraph/ng_depth.cpp b/src/nfagraph/ng_depth.cpp index d7945be9..63e0e46b 100644 --- a/src/nfagraph/ng_depth.cpp +++ b/src/nfagraph/ng_depth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,11 +44,14 @@ #include #include #include +#include using namespace std; using boost::filtered_graph; +using boost::make_filtered_graph; using boost::make_constant_property; using boost::reverse_graph; +using boost::adaptors::reverse; namespace ue2 { @@ -122,25 +125,23 @@ private: template static -void findLoopReachable(const GraphT &g, const NFAVertex srcVertex, +void findLoopReachable(const GraphT &g, + const typename GraphT::vertex_descriptor srcVertex, vector &deadNodes) { typedef typename GraphT::edge_descriptor EdgeT; + typedef typename GraphT::vertex_descriptor VertexT; typedef set EdgeSet; EdgeSet deadEdges; BackEdges be(deadEdges); - auto index_map = get(&NFAGraphVertexProps::index, g); + depth_first_search(g, visitor(be).root_vertex(srcVertex)); + auto af = make_bad_edge_filter(&deadEdges); + auto acyclic_g = make_filtered_graph(g, af); - depth_first_search(g, visitor(be).root_vertex(srcVertex).vertex_index_map( - index_map)); - AcyclicFilter af(&deadEdges); - filtered_graph > acyclic_g(g, af); - - vector topoOrder; /* actually reverse topological order */ + vector topoOrder; /* actually reverse topological order */ topoOrder.reserve(deadNodes.size()); - topological_sort(acyclic_g, back_inserter(topoOrder), - vertex_index_map(index_map)); + topological_sort(acyclic_g, back_inserter(topoOrder)); for (const auto &e : deadEdges) { u32 srcIdx = g[source(e, g)].index; @@ -149,8 +150,7 @@ void findLoopReachable(const GraphT &g, const NFAVertex srcVertex, } } - for (auto it = topoOrder.rbegin(); it != topoOrder.rend(); ++it) { - NFAVertex v = *it; + for (VertexT v : reverse(topoOrder)) { for (const auto &e : in_edges_range(v, g)) { if (deadNodes[g[source(e, g)].index]) { deadNodes[g[v].index] = true; @@ -162,13 +162,13 @@ void findLoopReachable(const GraphT &g, const NFAVertex srcVertex, template static -void calcDepthFromSource(const NGHolder &graph, const GraphT &g, +void calcDepthFromSource(const GraphT &g, typename GraphT::vertex_descriptor srcVertex, - const vector &deadNodes, - vector &dMin, vector &dMax) { + const vector &deadNodes, vector &dMin, + vector &dMax) { typedef typename GraphT::edge_descriptor EdgeT; - const size_t numVerts = num_vertices(graph); + const size_t numVerts = num_vertices(g); NodeFilter nf(&deadNodes, &g); StartFilter sf(&g); @@ -194,22 +194,20 @@ void calcDepthFromSource(const NGHolder &graph, const GraphT &g, using boost::make_iterator_property_map; - auto min_index_map = get(&NFAGraphVertexProps::index, mindist_g); + auto min_index_map = get(vertex_index, mindist_g); breadth_first_search(mindist_g, srcVertex, - boost::vertex_index_map(min_index_map). visitor(make_bfs_visitor(record_distances( - make_iterator_property_map( - dMin.begin(), min_index_map), + make_iterator_property_map(dMin.begin(), + min_index_map), boost::on_tree_edge())))); - auto max_index_map = get(&NFAGraphVertexProps::index, maxdist_g); + auto max_index_map = get(vertex_index, maxdist_g); dag_shortest_paths(maxdist_g, srcVertex, - boost::vertex_index_map(max_index_map). - distance_map(make_iterator_property_map(dMax.begin(), - max_index_map)). - weight_map(make_constant_property(-1))); + distance_map(make_iterator_property_map(dMax.begin(), + max_index_map)) + .weight_map(make_constant_property(-1))); for (size_t i = 0; i < numVerts; i++) { if (dMin[i] > DIST_UNREACHABLE) { @@ -254,14 +252,14 @@ DepthMinMax getDepths(u32 idx, const vector &dMin, template static -void calcAndStoreDepth(const NGHolder &h, const Graph &g, +void calcAndStoreDepth(const Graph &g, const typename Graph::vertex_descriptor src, const vector &deadNodes, vector &dMin /* util */, vector &dMax /* util */, vector &depths, DepthMinMax Output::*store) { - calcDepthFromSource(h, g, src, deadNodes, dMin, dMax); + calcDepthFromSource(g, src, deadNodes, dMin, dMax); for (auto v : vertices_range(g)) { u32 idx = g[v].index; @@ -285,14 +283,14 @@ void calcDepths(const NGHolder &g, std::vector &depths) { * reachable from a loop need to be removed */ vector deadNodes(numVertices); - findLoopReachable(g.g, g.start, deadNodes); + findLoopReachable(g, g.start, deadNodes); DEBUG_PRINTF("doing start\n"); - calcAndStoreDepth(g, g.g, g.start, deadNodes, dMin, dMax, - depths, &NFAVertexDepth::fromStart); + calcAndStoreDepth(g, g.start, deadNodes, dMin, dMax, depths, + &NFAVertexDepth::fromStart); DEBUG_PRINTF("doing startds\n"); - calcAndStoreDepth(g, g.g, g.startDs, deadNodes, dMin, dMax, - depths, &NFAVertexDepth::fromStartDotStar); + calcAndStoreDepth(g, g.startDs, deadNodes, dMin, dMax, depths, + &NFAVertexDepth::fromStartDotStar); } void calcDepths(const NGHolder &g, std::vector &depths) { @@ -305,8 +303,10 @@ void calcDepths(const NGHolder &g, std::vector &depths) { vector dMax; /* reverse the graph before walking it */ - typedef reverse_graph RevNFAGraph; - const RevNFAGraph rg(g.g); + typedef reverse_graph RevNFAGraph; + const RevNFAGraph rg(g); + + assert(num_vertices(g) == num_vertices(rg)); /* * create a filtered graph for max depth calculations: all nodes/edges @@ -317,12 +317,12 @@ void calcDepths(const NGHolder &g, std::vector &depths) { DEBUG_PRINTF("doing accept\n"); calcAndStoreDepth( - g, rg, g.accept, deadNodes, dMin, dMax, depths, + rg, g.accept, deadNodes, dMin, dMax, depths, &NFAVertexRevDepth::toAccept); DEBUG_PRINTF("doing accepteod\n"); deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge. calcAndStoreDepth( - g, rg, g.acceptEod, deadNodes, dMin, dMax, depths, + rg, g.acceptEod, deadNodes, dMin, dMax, depths, &NFAVertexRevDepth::toAcceptEod); } @@ -340,31 +340,31 @@ void calcDepths(const NGHolder &g, vector &depths) { * reachable from a loop need to be removed */ vector deadNodes(numVertices); - findLoopReachable(g.g, g.start, deadNodes); + findLoopReachable(g, g.start, deadNodes); DEBUG_PRINTF("doing start\n"); - calcAndStoreDepth( - g, g.g, g.start, deadNodes, dMin, dMax, depths, + calcAndStoreDepth( + g, g.start, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::fromStart); DEBUG_PRINTF("doing startds\n"); - calcAndStoreDepth( - g, g.g, g.startDs, deadNodes, dMin, dMax, depths, + calcAndStoreDepth( + g, g.startDs, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::fromStartDotStar); /* Now go backwards */ - typedef reverse_graph RevNFAGraph; - const RevNFAGraph rg(g.g); + typedef reverse_graph RevNFAGraph; + const RevNFAGraph rg(g); deadNodes.assign(numVertices, false); findLoopReachable(rg, g.acceptEod, deadNodes); DEBUG_PRINTF("doing accept\n"); calcAndStoreDepth( - g, rg, g.accept, deadNodes, dMin, dMax, depths, + rg, g.accept, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::toAccept); DEBUG_PRINTF("doing accepteod\n"); deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge. calcAndStoreDepth( - g, rg, g.acceptEod, deadNodes, dMin, dMax, depths, + rg, g.acceptEod, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::toAcceptEod); } @@ -374,10 +374,10 @@ void calcDepthsFrom(const NGHolder &g, const NFAVertex src, const size_t numVertices = num_vertices(g); vector deadNodes(numVertices); - findLoopReachable(g.g, g.start, deadNodes); + findLoopReachable(g, g.start, deadNodes); vector dMin, dMax; - calcDepthFromSource(g, g.g, src, deadNodes, dMin, dMax); + calcDepthFromSource(g, src, deadNodes, dMin, dMax); depths.clear(); depths.resize(numVertices); diff --git a/src/nfagraph/ng_dominators.cpp b/src/nfagraph/ng_dominators.cpp index 05650aaf..d01af994 100644 --- a/src/nfagraph/ng_dominators.cpp +++ b/src/nfagraph/ng_dominators.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-16, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,37 +48,45 @@ using boost::make_iterator_property_map; namespace ue2 { template -ue2::unordered_map calcDominators(const Graph &g, - NFAVertex source) { +unordered_map calcDominators(const Graph &g, + typename Graph::vertex_descriptor source) { + using Vertex = typename Graph::vertex_descriptor; const size_t num_verts = num_vertices(g); auto index_map = get(&NFAGraphVertexProps::index, g); vector dfnum(num_verts, 0); - vector parents(num_verts, Graph::null_vertex()); + vector parents(num_verts, Graph::null_vertex()); auto dfnum_map = make_iterator_property_map(dfnum.begin(), index_map); auto parent_map = make_iterator_property_map(parents.begin(), index_map); - vector vertices_by_dfnum(num_verts, Graph::null_vertex()); + vector vertices_by_dfnum(num_verts, Graph::null_vertex()); // Output map. - unordered_map doms; + unordered_map doms; auto dom_map = make_assoc_property_map(doms); boost_ue2::lengauer_tarjan_dominator_tree(g, source, index_map, dfnum_map, parent_map, vertices_by_dfnum, dom_map); - return doms; + /* Translate back to an NFAVertex map */ + unordered_map doms2; + for (const auto &e : doms) { + NFAVertex f(e.first); + NFAVertex s(e.second); + doms2[f] = s; + } + return doms2; } -ue2::unordered_map findDominators(const NGHolder &g) { +unordered_map findDominators(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); - return calcDominators(g.g, g.start); + return calcDominators(g, g.start); } -ue2::unordered_map findPostDominators(const NGHolder &g) { +unordered_map findPostDominators(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); - return calcDominators(boost::reverse_graph(g.g), + return calcDominators(boost::reverse_graph(g), g.acceptEod); } diff --git a/src/nfagraph/ng_dump.cpp b/src/nfagraph/ng_dump.cpp index 57668caf..fc840f25 100644 --- a/src/nfagraph/ng_dump.cpp +++ b/src/nfagraph/ng_dump.cpp @@ -234,9 +234,9 @@ public: void operator()(ostream& os, const EdgeT& e) const { // Edge label. Print priority. os << "[fontsize=9,label=\""; - // If it's an edge from start, print top id. - if (is_any_start(source(e, g), g) && !is_any_start(target(e, g), g)) { - os << "TOP " << g[e].top << "\\n"; + // print tops if any set. + if (!g[e].tops.empty()) { + os << "TOP " << as_string_list(g[e].tops) << "\\n"; } // If it's an assert vertex, then display its info. @@ -285,7 +285,7 @@ void dumpGraphImpl(const char *name, const GraphT &g, } // manual instantiation of templated dumpGraph above. -template void dumpGraphImpl(const char *, const NFAGraph &); +template void dumpGraphImpl(const char *, const NGHolder &); void dumpDotWrapperImpl(const NGWrapper &nw, const char *name, const Grey &grey) { @@ -293,7 +293,7 @@ void dumpDotWrapperImpl(const NGWrapper &nw, const char *name, stringstream ss; ss << grey.dumpPath << "Expr_" << nw.expressionIndex << "_" << name << ".dot"; DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str()); - dumpGraphImpl(ss.str().c_str(), nw.g); + dumpGraphImpl(ss.str().c_str(), nw); } } @@ -304,7 +304,7 @@ void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, ss << grey.dumpPath << "Comp_" << expr << "-" << comp << "_" << name << ".dot"; DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str()); - dumpGraphImpl(ss.str().c_str(), g.g); + dumpGraphImpl(ss.str().c_str(), g); } } @@ -315,7 +315,7 @@ void dumpSomSubComponentImpl(const NGHolder &g, const char *name, u32 expr, ss << grey.dumpPath << "Comp_" << expr << "-" << comp << "_" << name << "_" << plan << ".dot"; DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str()); - dumpGraphImpl(ss.str().c_str(), g.g); + dumpGraphImpl(ss.str().c_str(), g); } } @@ -325,7 +325,7 @@ void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber, stringstream ss; ss << grey.dumpPath << "Holder_X_" << stageNumber << "-" << stageName << ".dot"; - dumpGraphImpl(ss.str().c_str(), h.g); + dumpGraphImpl(ss.str().c_str(), h); } } @@ -337,7 +337,7 @@ void dumpHolderImpl(const NGHolder &h, stringstream ss; ss << grey.dumpPath << "Holder_X_" << stageNumber << "-" << stageName << ".dot"; - dumpGraphImpl(ss.str().c_str(), h.g, region_map); + dumpGraphImpl(ss.str().c_str(), h, region_map); } } diff --git a/src/nfagraph/ng_edge_redundancy.cpp b/src/nfagraph/ng_edge_redundancy.cpp index 5944cfef..3ce62c41 100644 --- a/src/nfagraph/ng_edge_redundancy.cpp +++ b/src/nfagraph/ng_edge_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -297,9 +297,8 @@ bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src, return false; } - DEBUG_PRINTF("edge (%u, %u) killed by edge (%u, %u)\n", - g[w].index, g[v].index, - g[fixed_src].index, g[v].index); + DEBUG_PRINTF("edge (%zu, %zu) killed by edge (%zu, %zu)\n", + g[w].index, g[v].index, g[fixed_src].index, g[v].index); return true; } @@ -415,7 +414,7 @@ bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) { pred(g, u, &parents_u); done.clear(); - if (hasGreaterOutDegree(1, u, g)) { + if (out_degree(u, g) > 1) { checkLargeOutU(g, u, parents_u, possible_w, done, &dead); } else { checkSmallOutU(g, u, parents_u, done, &dead); @@ -460,7 +459,7 @@ bool removeSiblingsOfStartDotStar(NGHolder &g) { vector dead; for (auto v : adjacent_vertices_range(g.startDs, g)) { - DEBUG_PRINTF("checking %u\n", g[v].index); + DEBUG_PRINTF("checking %zu\n", g[v].index); if (is_special(v, g)) { continue; } @@ -470,8 +469,7 @@ bool removeSiblingsOfStartDotStar(NGHolder &g) { if (is_special(u, g)) { continue; } - DEBUG_PRINTF("removing %u->%u\n", g[u].index, - g[v].index); + DEBUG_PRINTF("removing %zu->%zu\n", g[u].index, g[v].index); dead.push_back(e); } } diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp index d0ab7c4a..32a392a6 100644 --- a/src/nfagraph/ng_equivalence.cpp +++ b/src/nfagraph/ng_equivalence.cpp @@ -38,17 +38,16 @@ #include "ng_util.h" #include "util/compile_context.h" #include "util/graph_range.h" +#include "util/make_unique.h" #include "util/ue2_containers.h" #include +#include #include #include #include -#include - using namespace std; -using boost::ptr_vector; namespace ue2 { @@ -72,17 +71,17 @@ struct VertexInfoPtrCmp { class VertexInfo { public: VertexInfo(NFAVertex v_in, const NGHolder &g) - : v(v_in), vert_index(g[v].index), cr(g[v].char_reach), edge_top(~0), + : v(v_in), vert_index(g[v].index), cr(g[v].char_reach), equivalence_class(~0), vertex_flags(g[v].assert_flags) {} flat_set pred; //!< predecessors of this vertex flat_set succ; //!< successors of this vertex NFAVertex v; - u32 vert_index; + size_t vert_index; CharReach cr; CharReach pred_cr; CharReach succ_cr; - unsigned edge_top; + flat_set edge_tops; /**< tops on edge from start */ unsigned equivalence_class; unsigned vertex_flags; }; @@ -120,15 +119,15 @@ public: EquivalenceType eq) : /* reports only matter for right-equiv */ rs(eq == RIGHT_EQUIVALENCE ? g[vi.v].reports : flat_set()), - vertex_flags(vi.vertex_flags), edge_top(vi.edge_top), cr(vi.cr), + vertex_flags(vi.vertex_flags), edge_tops(vi.edge_tops), cr(vi.cr), adjacent_cr(eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr), /* treat non-special vertices the same */ - node_type(min(g[vi.v].index, u32{N_SPECIALS})), depth(d_in) {} + node_type(min(g[vi.v].index, size_t{N_SPECIALS})), depth(d_in) {} bool operator==(const ClassInfo &b) const { return node_type == b.node_type && depth.d1 == b.depth.d1 && depth.d2 == b.depth.d2 && cr == b.cr && - adjacent_cr == b.adjacent_cr && edge_top == b.edge_top && + adjacent_cr == b.adjacent_cr && edge_tops == b.edge_tops && vertex_flags == b.vertex_flags && rs == b.rs; } @@ -136,7 +135,6 @@ public: size_t val = 0; boost::hash_combine(val, boost::hash_range(begin(c.rs), end(c.rs))); boost::hash_combine(val, c.vertex_flags); - boost::hash_combine(val, c.edge_top); boost::hash_combine(val, c.cr); boost::hash_combine(val, c.adjacent_cr); boost::hash_combine(val, c.node_type); @@ -148,7 +146,7 @@ public: private: flat_set rs; /* for right equiv only */ unsigned vertex_flags; - u32 edge_top; + flat_set edge_tops; CharReach cr; CharReach adjacent_cr; unsigned node_type; @@ -277,47 +275,47 @@ bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) { // populate VertexInfo table static -ptr_vector getVertexInfos(const NGHolder &g) { +vector> getVertexInfos(const NGHolder &g) { const size_t num_verts = num_vertices(g); - ptr_vector infos; + vector> infos; infos.reserve(num_verts * 2); vector vertex_map; // indexed by vertex_index property vertex_map.resize(num_verts); for (auto v : vertices_range(g)) { - VertexInfo *vi = new VertexInfo(v, g); - - // insert our new shiny VertexInfo into the info map - infos.push_back(vi); - - vertex_map[g[v].index] = vi; + infos.push_back(make_unique(v, g)); + vertex_map[g[v].index] = infos.back().get(); } - // now, go through each vertex and populate its predecessor and successor lists - for (VertexInfo &cur_vi : infos) { - // find predecessors - for (const auto &e : in_edges_range(cur_vi.v, g)) { - NFAVertex u = source(e, g); - VertexInfo *vmi = vertex_map[g[u].index]; + // now, go through each vertex and populate its predecessor and successor + // lists + for (auto &vi : infos) { + assert(vi); + NFAVertex v = vi->v; - cur_vi.pred_cr |= vmi->cr; - cur_vi.pred.insert(vmi); + // find predecessors + for (const auto &e : in_edges_range(v, g)) { + NFAVertex u = source(e, g); + VertexInfo *u_vi = vertex_map[g[u].index]; + + vi->pred_cr |= u_vi->cr; + vi->pred.insert(u_vi); // also set up edge tops if (is_triggered(g) && u == g.start) { - cur_vi.edge_top = g[e].top; + vi->edge_tops = g[e].tops; } } // find successors - for (auto w : adjacent_vertices_range(cur_vi.v, g)) { - VertexInfo *vmi = vertex_map[g[w].index]; - cur_vi.succ_cr |= vmi->cr; - cur_vi.succ.insert(vmi); + for (auto w : adjacent_vertices_range(v, g)) { + VertexInfo *w_vi = vertex_map[g[w].index]; + vi->succ_cr |= w_vi->cr; + vi->succ.insert(w_vi); } - assert(!hasEdgeAsserts(cur_vi.v, g)); + assert(!hasEdgeAsserts(vi->v, g)); } return infos; @@ -325,7 +323,7 @@ ptr_vector getVertexInfos(const NGHolder &g) { // store equivalence class in VertexInfo for each vertex static -vector partitionGraph(ptr_vector &infos, +vector partitionGraph(vector> &infos, WorkQueue &work_queue, const NGHolder &g, EquivalenceType eq) { const size_t num_verts = infos.size(); @@ -350,28 +348,30 @@ vector partitionGraph(ptr_vector &infos, } // partition the graph based on CharReach - for (VertexInfo &vi : infos) { + for (auto &vi : infos) { + assert(vi); + ClassInfo::ClassDepth depth; if (eq == LEFT_EQUIVALENCE) { - depth = depths[vi.vert_index]; + depth = depths[vi->vert_index]; } else { - depth = rdepths[vi.vert_index]; + depth = rdepths[vi->vert_index]; } - ClassInfo ci(g, vi, depth, eq); + ClassInfo ci(g, *vi, depth, eq); auto ii = classinfomap.find(ci); if (ii == classinfomap.end()) { // vertex is in a new equivalence class by itself. unsigned eq_class = classes.size(); - vi.equivalence_class = eq_class; - classes.push_back({&vi}); + vi->equivalence_class = eq_class; + classes.push_back({vi.get()}); classinfomap.emplace(move(ci), eq_class); } else { // vertex is added to an existing class. unsigned eq_class = ii->second; - vi.equivalence_class = eq_class; - classes.at(eq_class).insert(&vi); + vi->equivalence_class = eq_class; + classes.at(eq_class).insert(vi.get()); // we now know that this particular class has more than one // vertex, so we add it to the work queue @@ -501,8 +501,9 @@ bool require_separate_eod_vertex(const VertexInfoSet &vert_infos, } static -void mergeClass(ptr_vector &infos, NGHolder &g, unsigned eq_class, - VertexInfoSet &cur_class_vertices, set *toRemove) { +void mergeClass(vector> &infos, NGHolder &g, + unsigned eq_class, VertexInfoSet &cur_class_vertices, + set *toRemove) { DEBUG_PRINTF("Replacing %zd vertices from equivalence class %u with a " "single vertex.\n", cur_class_vertices.size(), eq_class); @@ -530,9 +531,9 @@ void mergeClass(ptr_vector &infos, NGHolder &g, unsigned eq_class, * props */ g[new_v].reports.clear(); /* populated as we pull in succs */ - VertexInfo *new_vertex_info = new VertexInfo(new_v, g); // store this vertex in our global vertex list - infos.push_back(new_vertex_info); + infos.push_back(make_unique(new_v, g)); + VertexInfo *new_vertex_info = infos.back().get(); NFAVertex new_v_eod = NGHolder::null_vertex(); VertexInfo *new_vertex_info_eod = nullptr; @@ -540,11 +541,11 @@ void mergeClass(ptr_vector &infos, NGHolder &g, unsigned eq_class, if (require_separate_eod_vertex(cur_class_vertices, g)) { new_v_eod = clone_vertex(g, old_v); g[new_v_eod].reports.clear(); - new_vertex_info_eod = new VertexInfo(new_v_eod, g); - infos.push_back(new_vertex_info_eod); + infos.push_back(make_unique(new_v_eod, g)); + new_vertex_info_eod = infos.back().get(); } - const unsigned edgetop = (*cur_class_vertices.begin())->edge_top; + const auto &edgetops = (*cur_class_vertices.begin())->edge_tops; for (VertexInfo *old_vertex_info : cur_class_vertices) { assert(old_vertex_info->equivalence_class == eq_class); @@ -563,22 +564,24 @@ void mergeClass(ptr_vector &infos, NGHolder &g, unsigned eq_class, pred_info->succ.erase(old_vertex_info); // if edge doesn't exist, create it - NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g).first; + NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g); - // put edge top, if applicable - if (edgetop != (unsigned) -1) { - g[e].top = edgetop; + // put edge tops, if applicable + if (!edgetops.empty()) { + assert(g[e].tops.empty() || g[e].tops == edgetops); + g[e].tops = edgetops; } pred_info->succ.insert(new_vertex_info); if (new_v_eod) { NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod, - g).first; + g); - // put edge top, if applicable - if (edgetop != (unsigned) -1) { - g[ee].top = edgetop; + // put edge tops, if applicable + if (!edgetops.empty()) { + assert(g[e].tops.empty() || g[e].tops == edgetops); + g[ee].tops = edgetops; } pred_info->succ.insert(new_vertex_info_eod); @@ -626,7 +629,8 @@ void mergeClass(ptr_vector &infos, NGHolder &g, unsigned eq_class, // report behaviour with a single vertex). static bool mergeEquivalentClasses(vector &classes, - ptr_vector &infos, NGHolder &g) { + vector> &infos, + NGHolder &g) { bool merged = false; set toRemove; @@ -656,7 +660,7 @@ bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) { // get information on every vertex in the graph // new vertices are allocated here, and stored in infos - ptr_vector infos = getVertexInfos(g); + auto infos = getVertexInfos(g); // partition the graph auto classes = partitionGraph(infos, work_queue, g, eq_type); @@ -674,7 +678,7 @@ bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) { DEBUG_PRINTF("equivalence processing disabled in grey box\n"); return false; } - g.renumberVertices(); + renumber_vertices(g); // Cheap check: if all the non-special vertices have in-degree one and // out-degree one, there's no redundancy in this here graph and we can diff --git a/src/nfagraph/ng_execute.cpp b/src/nfagraph/ng_execute.cpp index 4ffd89c0..9d904894 100644 --- a/src/nfagraph/ng_execute.cpp +++ b/src/nfagraph/ng_execute.cpp @@ -183,8 +183,6 @@ flat_set execute_graph(const NGHolder &g, return getVertices(work_states, info); } -typedef boost::reverse_graph RevNFAGraph; - namespace { class eg_visitor : public boost::default_dfs_visitor { public: @@ -195,13 +193,14 @@ public: info(info_in), input_g(input_g_in), states(states_in), succs(vertex_count) {} - void finish_vertex(NFAVertex input_v, const RevNFAGraph &) { + void finish_vertex(NFAVertex input_v, + const boost::reverse_graph &) { if (input_v == input_g.accept) { return; } assert(input_v != input_g.acceptEod); - DEBUG_PRINTF("finished p%u\n", input_g[input_v].index); + DEBUG_PRINTF("finished p%zu\n", input_g[input_v].index); /* finish vertex is called on vertex --> implies that all its parents * (in the forward graph) are also finished. Our parents will have @@ -236,7 +235,7 @@ public: /* we need to push into all our (forward) children their successors * from us. */ for (auto v : adjacent_vertices_range(input_v, input_g)) { - DEBUG_PRINTF("pushing our states to pstate %u\n", + DEBUG_PRINTF("pushing our states to pstate %zu\n", input_g[v].index); if (v == input_g.startDs) { /* no need for intra start edges */ @@ -289,7 +288,7 @@ flat_set execute_graph(const NGHolder &running_g, map colours; /* could just a topo order, but really it is time to pull a slightly bigger * gun: DFS */ - RevNFAGraph revg(input_dag.g); + boost::reverse_graph revg(input_dag); map > dfs_states; auto info = makeInfoTable(running_g); @@ -308,7 +307,7 @@ flat_set execute_graph(const NGHolder &running_g, #ifdef DEBUG DEBUG_PRINTF(" output rstates:"); for (const auto &v : states) { - printf(" %u", running_g[v].index); + printf(" %zu", running_g[v].index); } printf("\n"); #endif diff --git a/src/nfagraph/ng_expr_info.cpp b/src/nfagraph/ng_expr_info.cpp index cfd34ce6..b43c7fd1 100644 --- a/src/nfagraph/ng_expr_info.cpp +++ b/src/nfagraph/ng_expr_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -118,7 +118,7 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, rd.max = min(rd.max, max_offset); } - DEBUG_PRINTF("vertex %u report %u: %s\n", w[v].index, report_id, + DEBUG_PRINTF("vertex %zu report %u: %s\n", w[v].index, report_id, rd.str().c_str()); info = unionDepthMinMax(info, rd); diff --git a/src/nfagraph/ng_extparam.cpp b/src/nfagraph/ng_extparam.cpp index bc101df2..a504ac50 100644 --- a/src/nfagraph/ng_extparam.cpp +++ b/src/nfagraph/ng_extparam.cpp @@ -172,8 +172,7 @@ void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept, new_reports.insert(rm.getInternalId(ir)); } - DEBUG_PRINTF("swapping reports on vertex %u\n", - g[v].index); + DEBUG_PRINTF("swapping reports on vertex %zu\n", g[v].index); reports.swap(new_reports); } } @@ -286,8 +285,8 @@ bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth, add_edge(u, v, g); } - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); return true; } @@ -309,7 +308,7 @@ NFAVertex findSingleCyclic(const NGHolder &g) { } if (v != NGHolder::null_vertex()) { - DEBUG_PRINTF("cyclic is %u\n", g[v].index); + DEBUG_PRINTF("cyclic is %zu\n", g[v].index); assert(!is_special(v, g)); } return v; @@ -380,10 +379,9 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { // Walk from the start vertex to the cyclic state and ensure we have a // chain of vertices. while (v != cyclic) { - DEBUG_PRINTF("vertex %u\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); width++; - tie(ai, ae) = adjacent_vertices(v, g); - set succ(ai, ae); + auto succ = succs(v, g); if (contains(succ, cyclic)) { if (succ.size() == 1) { v = cyclic; @@ -419,10 +417,9 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { // Walk from the cyclic state to an accept and ensure we have a chain of // vertices. while (!is_any_accept(v, g)) { - DEBUG_PRINTF("vertex %u\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); width++; - tie(ai, ae) = adjacent_vertices(v, g); - set succ(ai, ae); + auto succ = succs(v, g); if (succ.size() != 1) { DEBUG_PRINTF("bad form\n"); return false; @@ -437,7 +434,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust); width += offsetAdjust; - DEBUG_PRINTF("width=%u, vertex %u is cyclic\n", width, + DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width, g[cyclic].index); if (width >= g.min_length) { @@ -450,7 +447,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { vector preds; vector dead; for (auto u : inv_adjacent_vertices_range(cyclic, g)) { - DEBUG_PRINTF("pred %u\n", g[u].index); + DEBUG_PRINTF("pred %zu\n", g[u].index); if (u == cyclic) { continue; } @@ -486,8 +483,8 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { add_edge(u, cyclic, g); } - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); clearReports(g); g.min_length = 0; @@ -544,8 +541,7 @@ bool isEdgePrunable(const NGWrapper &g, const NFAVertex u = source(e, g); const NFAVertex v = target(e, g); - DEBUG_PRINTF("edge (%u,%u)\n", g[u].index, - g[v].index); + DEBUG_PRINTF("edge (%zu,%zu)\n", g[u].index, g[v].index); // Leave our special-to-special edges alone. if (is_special(u, g) && is_special(v, g)) { @@ -718,8 +714,7 @@ static bool isUnanchored(const NGHolder &g) { for (auto v : adjacent_vertices_range(g.start, g)) { if (!edge(g.startDs, v, g).second) { - DEBUG_PRINTF("fail, %u is anchored vertex\n", - g[v].index); + DEBUG_PRINTF("fail, %zu is anchored vertex\n", g[v].index); return false; } } @@ -864,7 +859,7 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g, } } } - //dumpGraph("final.dot", g.g); + //dumpGraph("final.dot", g); if (!hasExtParams(g)) { return; diff --git a/src/nfagraph/ng_fixed_width.cpp b/src/nfagraph/ng_fixed_width.cpp index 46d77913..978dad44 100644 --- a/src/nfagraph/ng_fixed_width.cpp +++ b/src/nfagraph/ng_fixed_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -77,7 +77,7 @@ bool findMask(const NGHolder &g, vector *mask, bool *anchored, NFAVertex v = *succs.begin(); while (true) { - DEBUG_PRINTF("validating vertex %u\n", g[v].index); + DEBUG_PRINTF("validating vertex %zu\n", g[v].index); assert(v != g.acceptEod); diff --git a/src/nfagraph/ng_graph.h b/src/nfagraph/ng_graph.h deleted file mode 100644 index 64b32839..00000000 --- a/src/nfagraph/ng_graph.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Definition of the NFAGraph type used for all NFA graph - * representations. - * - * Note that most of the time we don't work on a bare NFAGraph: instead - * we use an NGHolder, which wraps the graph and defines our special vertices, - * etc. - */ - -#ifndef NG_GRAPH_H -#define NG_GRAPH_H - -#include "util/charreach.h" -#include "util/ue2_containers.h" -#include "ue2common.h" - -#include -#include -#include - -namespace ue2 { - -/** \brief Properties associated with each vertex in an NFAGraph. */ -struct NFAGraphVertexProps { - /** \brief Set of characters on which this vertex is reachable. */ - CharReach char_reach; - - /** \brief Set of reports raised by this vertex. */ - ue2::flat_set reports; - - /** \brief Unique index for this vertex, used for BGL algorithms. */ - u32 index = 0; - - /** \brief Flags associated with assertions. */ - u32 assert_flags = 0; -}; - -/** \brief Properties associated with each edge in an NFAGraph. */ -struct NFAGraphEdgeProps { - /** \brief Unique index for this edge, used for BGL algorithms. */ - u32 index = 0; - - /** \brief For graphs that will be implemented as multi-top engines, this - * specifies the top event. Only used on edges from the start vertex. */ - u32 top = 0; - - /** \brief Flags associated with assertions. */ - u32 assert_flags = 0; -}; - -// For flexibility: boost::listS, boost::listS for out-edge and vertex lists. -// boost::bidirectionalS for directed graph so that we can get at in-edges. -typedef boost::adjacency_list NFAGraph; - -typedef NFAGraph::vertex_descriptor NFAVertex; -typedef NFAGraph::edge_descriptor NFAEdge; - -/** \brief vertex_index values for special nodes in the NFAGraph. */ -enum SpecialNodes { - /** \brief Anchored start vertex. WARNING: this may be triggered at various - * locations (not just zero) for triggered graphs. */ - NODE_START, - - /** \brief Unanchored start-dotstar vertex. WARNING: this may not have a - * proper self-loop. */ - NODE_START_DOTSTAR, - - /** \brief Accept vertex. All vertices that can match at arbitrary offsets - * must have an edge to this vertex. */ - NODE_ACCEPT, - - /** \brief Accept-EOD vertex. Vertices that must raise a match at EOD only - * must have an edge to this vertex. */ - NODE_ACCEPT_EOD, - - /** \brief Sentinel, number of special vertices. */ - N_SPECIALS -}; - -} // namespace ue2 - -#endif diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index e70b7708..e4be14c3 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -35,13 +35,12 @@ #include "nfa/goughcompile.h" #include "ng_holder.h" #include "ng_mcclellan_internal.h" -#include "ng_restructuring.h" #include "ng_som_util.h" #include "ng_squash.h" -#include "ng_util.h" #include "util/bitfield.h" #include "util/container.h" #include "util/determinise.h" +#include "util/graph.h" #include "util/graph_range.h" #include "util/make_unique.h" #include "util/ue2_containers.h" @@ -118,11 +117,11 @@ public: using StateMap = typename Automaton_Traits::StateMap; protected: - Automaton_Base(const NGHolder &graph_in, - const flat_set &unused_in, som_type som, + Automaton_Base(const NGHolder &graph_in, som_type som, const vector> &triggers, bool unordered_som) - : graph(graph_in), numStates(num_vertices(graph)), unused(unused_in), + : graph(graph_in), numStates(num_vertices(graph)), + unused(getRedundantStarts(graph_in)), init(Automaton_Traits::init_states(numStates)), initDS(Automaton_Traits::init_states(numStates)), squash(Automaton_Traits::init_states(numStates)), @@ -210,7 +209,7 @@ public: const NGHolder &graph; const u32 numStates; - const flat_set &unused; + const flat_set unused; array alpha; array unalpha; @@ -251,10 +250,9 @@ struct Big_Traits { class Automaton_Big : public Automaton_Base { public: - Automaton_Big(const NGHolder &graph_in, - const flat_set &unused_in, som_type som, + Automaton_Big(const NGHolder &graph_in, som_type som, const vector> &triggers, bool unordered_som) - : Automaton_Base(graph_in, unused_in, som, triggers, unordered_som) {} + : Automaton_Base(graph_in, som, triggers, unordered_som) {} }; struct Graph_Traits { @@ -278,11 +276,10 @@ struct Graph_Traits { class Automaton_Graph : public Automaton_Base { public: - Automaton_Graph(const NGHolder &graph_in, - const flat_set &unused_in, som_type som, + Automaton_Graph(const NGHolder &graph_in, som_type som, const vector> &triggers, bool unordered_som) - : Automaton_Base(graph_in, unused_in, som, triggers, unordered_som) {} + : Automaton_Base(graph_in, som, triggers, unordered_som) {} }; class Automaton_Haig_Merge { @@ -452,7 +449,7 @@ void haig_do_preds(const NGHolder &g, const stateset &nfa_states, NFAVertex v = state_mapping[i]; s32 slot_id = g[v].index; - DEBUG_PRINTF("d vertex %u\n", g[v].index); + DEBUG_PRINTF("d vertex %zu\n", g[v].index); vector &out_map = preds[slot_id]; for (auto u : inv_adjacent_vertices_range(v, g)) { out_map.push_back(g[u].index); @@ -493,7 +490,7 @@ void haig_note_starts(const NGHolder &g, map *out) { for (auto v : vertices_range(g)) { if (is_any_start_inc_virtual(v, g)) { - DEBUG_PRINTF("%u creates new som value\n", g[v].index); + DEBUG_PRINTF("%zu creates new som value\n", g[v].index); out->emplace(g[v].index, 0U); continue; } @@ -504,7 +501,7 @@ void haig_note_starts(const NGHolder &g, map *out) { const DepthMinMax &d = depths[g[v].index]; if (d.min == d.max && d.min.is_finite()) { - DEBUG_PRINTF("%u is fixed at %u\n", g[v].index, (u32)d.min); + DEBUG_PRINTF("%zu is fixed at %u\n", g[v].index, (u32)d.min); out->emplace(g[v].index, d.min); } } @@ -512,15 +509,14 @@ void haig_note_starts(const NGHolder &g, map *out) { template static -bool doHaig(const NGHolder &g, - const flat_set &unused, - som_type som, const vector> &triggers, - bool unordered_som, raw_som_dfa *rdfa) { +bool doHaig(const NGHolder &g, som_type som, + const vector> &triggers, bool unordered_som, + raw_som_dfa *rdfa) { u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from a fight */ typedef typename Auto::StateSet StateSet; vector nfa_state_map; - Auto n(g, unused, som, triggers, unordered_som); + Auto n(g, som, triggers, unordered_som); try { if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) { DEBUG_PRINTF("state limit exceeded\n"); @@ -550,9 +546,9 @@ bool doHaig(const NGHolder &g, haig_do_preds(g, source_states, n.v_by_index, rdfa->state_som.back().preds); - haig_do_report(g, unused, g.accept, source_states, n.v_by_index, + haig_do_report(g, n.unused, g.accept, source_states, n.v_by_index, rdfa->state_som.back().reports); - haig_do_report(g, unused, g.acceptEod, source_states, n.v_by_index, + haig_do_report(g, n.unused, g.acceptEod, source_states, n.v_by_index, rdfa->state_som.back().reports_eod); } @@ -577,8 +573,6 @@ attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, assert(allMatchStatesHaveReports(g)); assert(hasCorrectlyNumberedVertices(g)); - auto unused = findUnusedStates(g); - u32 numStates = num_vertices(g); if (numStates > HAIG_MAX_NFA_STATE) { DEBUG_PRINTF("giving up... looks too big\n"); @@ -592,12 +586,11 @@ attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, bool rv; if (numStates <= NFA_STATE_LIMIT) { /* fast path */ - rv = doHaig(g, unused, som, triggers, unordered_som, + rv = doHaig(g, som, triggers, unordered_som, rdfa.get()); } else { /* not the fast path */ - rv = doHaig(g, unused, som, triggers, unordered_som, - rdfa.get()); + rv = doHaig(g, som, triggers, unordered_som, rdfa.get()); } if (!rv) { diff --git a/src/nfagraph/ng_holder.cpp b/src/nfagraph/ng_holder.cpp index 53566891..a2fbb288 100644 --- a/src/nfagraph/ng_holder.cpp +++ b/src/nfagraph/ng_holder.cpp @@ -36,123 +36,33 @@ using namespace std; namespace ue2 { // internal use only -static NFAVertex addSpecialVertex(NFAGraph &g, SpecialNodes id) { - NFAVertex v = add_vertex(g); +static NFAVertex addSpecialVertex(NGHolder &g, SpecialNodes id) { + NFAVertex v(add_vertex(g)); g[v].index = id; return v; } -NGHolder::NGHolder(void) - : g(), - // add initial special nodes - start(addSpecialVertex(g, NODE_START)), - startDs(addSpecialVertex(g, NODE_START_DOTSTAR)), - accept(addSpecialVertex(g, NODE_ACCEPT)), - acceptEod(addSpecialVertex(g, NODE_ACCEPT_EOD)), - // misc data - numVertices(N_SPECIALS), - numEdges(0), - isValidNumEdges(true), - isValidNumVertices(true) { - - // wire up some fake edges for the stylized bits of the NFA - add_edge(start, startDs, *this); - add_edge(startDs, startDs, *this); - add_edge(accept, acceptEod, *this); - - g[start].char_reach.setall(); - g[startDs].char_reach.setall(); -} - NGHolder::NGHolder(nfa_kind k) - : kind (k), g(), + : kind (k), // add initial special nodes - start(addSpecialVertex(g, NODE_START)), - startDs(addSpecialVertex(g, NODE_START_DOTSTAR)), - accept(addSpecialVertex(g, NODE_ACCEPT)), - acceptEod(addSpecialVertex(g, NODE_ACCEPT_EOD)), - // misc data - numVertices(N_SPECIALS), - numEdges(0), - isValidNumEdges(true), - isValidNumVertices(true) { + start(addSpecialVertex(*this, NODE_START)), + startDs(addSpecialVertex(*this, NODE_START_DOTSTAR)), + accept(addSpecialVertex(*this, NODE_ACCEPT)), + acceptEod(addSpecialVertex(*this, NODE_ACCEPT_EOD)) { // wire up some fake edges for the stylized bits of the NFA add_edge(start, startDs, *this); add_edge(startDs, startDs, *this); add_edge(accept, acceptEod, *this); - g[start].char_reach.setall(); - g[startDs].char_reach.setall(); + (*this)[start].char_reach.setall(); + (*this)[startDs].char_reach.setall(); } NGHolder::~NGHolder(void) { DEBUG_PRINTF("destroying holder @ %p\n", this); } -size_t num_edges(NGHolder &h) { - if (!h.isValidNumEdges) { - h.numEdges = num_edges(h.g); - h.isValidNumEdges = true; - } - return h.numEdges; -} - -size_t num_edges(const NGHolder &h) { - if (!h.isValidNumEdges) { - return num_edges(h.g); - } - return h.numEdges; -} - -size_t num_vertices(NGHolder &h) { - if (!h.isValidNumVertices) { - h.numVertices = num_vertices(h.g); - h.isValidNumVertices = true; - } - return h.numVertices; -} - -size_t num_vertices(const NGHolder &h) { - if (!h.isValidNumVertices) { - return num_vertices(h.g); - } - return h.numVertices; -} - -void remove_edge(const NFAEdge &e, NGHolder &h) { - remove_edge(e, h.g); - assert(!h.isValidNumEdges || h.numEdges > 0); - h.numEdges--; -} - -void remove_edge(NFAVertex u, NFAVertex v, NGHolder &h) { - remove_edge(u, v, h.g); - assert(!h.isValidNumEdges || h.numEdges > 0); - h.numEdges--; -} - -void remove_vertex(NFAVertex v, NGHolder &h) { - remove_vertex(v, h.g); - assert(!h.isValidNumVertices || h.numVertices > 0); - h.numVertices--; -} - -void clear_vertex(NFAVertex v, NGHolder &h) { - h.isValidNumEdges = false; - clear_vertex_faster(v, h.g); -} - -void clear_in_edges(NFAVertex v, NGHolder &h) { - h.isValidNumEdges = false; - clear_in_edges(v, h.g); -} - -void clear_out_edges(NFAVertex v, NGHolder &h) { - h.isValidNumEdges = false; - clear_out_edges(v, h.g); -} - void clear_graph(NGHolder &h) { NGHolder::vertex_iterator vi, ve; for (tie(vi, ve) = vertices(h); vi != ve;) { @@ -166,6 +76,8 @@ void clear_graph(NGHolder &h) { } assert(num_vertices(h) == N_SPECIALS); + renumber_vertices(h); /* ensure that we reset our next allocated index */ + renumber_edges(h); // Recreate special stylised edges. add_edge(h.start, h.startDs, h); @@ -173,57 +85,13 @@ void clear_graph(NGHolder &h) { add_edge(h.accept, h.acceptEod, h); } -std::pair add_edge(NFAVertex u, NFAVertex v, NGHolder &h) { - assert(edge(u, v, h.g).second == false); - pair e = add_edge(u, v, h.g); - h.g[e.first].index = h.numEdges++; - assert(!h.isValidNumEdges || h.numEdges > 0); // no wrapping - h.g[e.first].top = 0; - return e; -} - -std::pair add_edge(NFAVertex u, NFAVertex v, - const NFAGraph::edge_property_type &ep, - NGHolder &h) { - assert(edge(u, v, h.g).second == false); - pair e = add_edge(u, v, ep, h.g); - h.g[e.first].index = h.numEdges++; - assert(!h.isValidNumEdges || h.numEdges > 0); // no wrapping - return e; -} - -NFAVertex add_vertex(NGHolder &h) { - NFAVertex v = add_vertex(h.g); - h[v].index = h.numVertices++; - assert(h.numVertices > 0); // no wrapping - return v; -} - -NFAVertex add_vertex(const NFAGraph::vertex_property_type &vp, NGHolder &h) { - NFAVertex v = add_vertex(h); - u32 i = h.g[v].index; /* preserve index */ - h.g[v] = vp; - h.g[v].index = i; - return v; -} - -void NGHolder::renumberEdges() { - numEdges = renumberGraphEdges(g); - isValidNumEdges = true; -} - -void NGHolder::renumberVertices() { - numVertices = renumberGraphVertices(g); - isValidNumVertices = true; -} - NFAVertex NGHolder::getSpecialVertex(u32 id) const { switch (id) { - case NODE_START: return start; - case NODE_START_DOTSTAR: return startDs; - case NODE_ACCEPT: return accept; - case NODE_ACCEPT_EOD: return acceptEod; - default: return nullptr; + case NODE_START: return start; + case NODE_START_DOTSTAR: return startDs; + case NODE_ACCEPT: return accept; + case NODE_ACCEPT_EOD: return acceptEod; + default: return null_vertex(); } } diff --git a/src/nfagraph/ng_holder.h b/src/nfagraph/ng_holder.h index f0a387d0..fbb6ac52 100644 --- a/src/nfagraph/ng_holder.h +++ b/src/nfagraph/ng_holder.h @@ -26,19 +26,75 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/** \file + * \brief Definition of the NGHolder type used for to represent general nfa + * graphs as well as all associated types (vertex and edge properties, etc). + * + * The NGHolder also contains the special vertices used to represents starts and + * accepts. + */ + #ifndef NG_HOLDER_H #define NG_HOLDER_H -#include "ng_graph.h" #include "ue2common.h" #include "nfa/nfa_kind.h" - -#include -#include -#include +#include "util/charreach.h" +#include "util/ue2_containers.h" +#include "util/ue2_graph.h" namespace ue2 { +/** \brief Properties associated with each vertex in an NFAGraph. */ +struct NFAGraphVertexProps { + /** \brief Set of characters on which this vertex is reachable. */ + CharReach char_reach; + + /** \brief Set of reports raised by this vertex. */ + flat_set reports; + + /** \brief Unique index for this vertex, used for BGL algorithms. */ + size_t index = 0; + + /** \brief Flags associated with assertions. */ + u32 assert_flags = 0; +}; + +/** \brief Properties associated with each edge in an NFAGraph. */ +struct NFAGraphEdgeProps { + /** \brief Unique index for this edge, used for BGL algorithms. */ + size_t index = 0; + + /** \brief For graphs that will be implemented as multi-top engines, this + * specifies the top events. Only used on edges from the start vertex. */ + ue2::flat_set tops; + + /** \brief Flags associated with assertions. */ + u32 assert_flags = 0; +}; + +/** \brief vertex_index values for special nodes in the NFAGraph. */ +enum SpecialNodes { + /** \brief Anchored start vertex. WARNING: this may be triggered at various + * locations (not just zero) for triggered graphs. */ + NODE_START, + + /** \brief Unanchored start-dotstar vertex. WARNING: this may not have a + * proper self-loop. */ + NODE_START_DOTSTAR, + + /** \brief Accept vertex. All vertices that can match at arbitrary offsets + * must have an edge to this vertex. */ + NODE_ACCEPT, + + /** \brief Accept-EOD vertex. Vertices that must raise a match at EOD only + * must have an edge to this vertex. */ + NODE_ACCEPT_EOD, + + /** \brief Sentinel, number of special vertices. */ + N_SPECIALS +}; + /** \brief Encapsulates an NFAGraph, stores special vertices and other * metadata. * @@ -49,188 +105,34 @@ namespace ue2 { * - (startDs, startDs) (self-loop) * - (accept, acceptEod) */ -class NGHolder : boost::noncopyable { +class NGHolder : public ue2_graph { public: - NGHolder(void); explicit NGHolder(nfa_kind kind); + NGHolder(void) : NGHolder(NFA_OUTFIX) {}; virtual ~NGHolder(void); - // Pack edge and vertex indices. - // Note: maintaining edge index order can be expensive due to the frequency - // of edge removal/addition, so only renumberEdges() when required by - // operations on edge lists. - void renumberEdges(); - void renumberVertices(); + nfa_kind kind; /* Role that this plays in Rose */ - NFAVertex getSpecialVertex(u32 id) const; + static const size_t N_SPECIAL_VERTICES = N_SPECIALS; +public: + const vertex_descriptor start; //!< Anchored start vertex. + const vertex_descriptor startDs; //!< Unanchored start-dotstar vertex. + const vertex_descriptor accept; //!< Accept vertex. + const vertex_descriptor acceptEod; //!< Accept at EOD vertex. - nfa_kind kind = NFA_OUTFIX; /* Role that this plays in Rose */ - - /** \brief Underlying graph object */ - NFAGraph g; - - const NFAVertex start; //!< Anchored start vertex. - const NFAVertex startDs; //!< Unanchored start-dotstar vertex. - const NFAVertex accept; //!< Accept vertex. - const NFAVertex acceptEod; //!< Accept at EOD vertex. - - using directed_category = NFAGraph::directed_category; - using edge_parallel_category = NFAGraph::edge_parallel_category; - using traversal_category = NFAGraph::traversal_category; - - using vertex_descriptor = NFAGraph::vertex_descriptor; - using edge_descriptor = NFAGraph::edge_descriptor; - using adjacency_iterator = NFAGraph::adjacency_iterator; - using edge_iterator = NFAGraph::edge_iterator; - using in_edge_iterator = NFAGraph::in_edge_iterator; - using inv_adjacency_iterator = NFAGraph::inv_adjacency_iterator; - using out_edge_iterator = NFAGraph::out_edge_iterator; - using vertex_iterator = NFAGraph::vertex_iterator; - using edge_property_type = NFAGraph::edge_property_type; - using vertex_property_type = NFAGraph::vertex_property_type; - - // These free functions, which follow the BGL model, are the interface to - // the graph held by this class. - friend size_t num_vertices(NGHolder &h); - friend size_t num_vertices(const NGHolder &h); - friend size_t num_edges(NGHolder &h); - friend size_t num_edges(const NGHolder &h); - friend void remove_vertex(NFAVertex v, NGHolder &h); - friend void clear_vertex(NFAVertex v, NGHolder &h); - friend void clear_in_edges(NFAVertex v, NGHolder &h); - friend void clear_out_edges(NFAVertex v, NGHolder &h); - friend void remove_edge(const NFAEdge &e, NGHolder &h); - friend void remove_edge(NFAVertex u, NFAVertex v, NGHolder &h); - - template - friend void remove_out_edge_if(NFAVertex v, Predicate pred, NGHolder &h) { - boost::remove_out_edge_if(v, pred, h.g); - h.isValidNumEdges = false; - } - - template - friend void remove_in_edge_if(NFAVertex v, Predicate pred, NGHolder &h) { - boost::remove_in_edge_if(v, pred, h.g); - h.isValidNumEdges = false; - } - - template - friend void remove_edge_if(Predicate pred, NGHolder &h) { - boost::remove_edge_if(pred, h.g); - h.isValidNumEdges = false; - } - - friend std::pair add_edge(NFAVertex u, NFAVertex v, - NGHolder &h); - friend std::pair add_edge(NFAVertex u, NFAVertex v, - const edge_property_type &ep, - NGHolder &h); - friend NFAVertex add_vertex(NGHolder &h); - friend NFAVertex add_vertex(const vertex_property_type &vp, NGHolder &h); - - static NFAVertex null_vertex(void) { return NFAGraph::null_vertex(); } - - // Subscript operators for BGL bundled properties. - using graph_bundled = NFAGraph::graph_bundled; - using vertex_bundled = NFAGraph::vertex_bundled; - using edge_bundled = NFAGraph::edge_bundled; - - vertex_bundled &operator[](NFAVertex v) { - return get(boost::vertex_bundle, g)[v]; - } - const vertex_bundled &operator[](NFAVertex v) const { - return get(boost::vertex_bundle, g)[v]; - } - edge_bundled &operator[](const NFAEdge &e) { - return get(boost::edge_bundle, g)[e]; - } - const edge_bundled &operator[](const NFAEdge &e) const { - return get(boost::edge_bundle, g)[e]; - } - -protected: - - /* Since the NFAGraph vertex/edge list selectors are std::lists, computing - * num_vertices and num_edges is O(N). We use these members to store a - * cached copy of the size. - * - * In the future, with C++11's constant-time std::list::size, these may - * become obsolete. */ - - u32 numVertices; - u32 numEdges; - bool isValidNumEdges; - bool isValidNumVertices; + vertex_descriptor getSpecialVertex(u32 id) const; }; +typedef NGHolder::vertex_descriptor NFAVertex; +typedef NGHolder::edge_descriptor NFAEdge; + /** \brief True if the vertex \p v is one of our special vertices. */ template -static really_inline -bool is_special(const NFAVertex v, const GraphT &g) { +bool is_special(const typename GraphT::vertex_descriptor v, const GraphT &g) { return g[v].index < N_SPECIALS; } -static really_inline -std::pair -adjacent_vertices(NFAVertex v, const NGHolder &h) { - return adjacent_vertices(v, h.g); -} - -static really_inline -std::pair edge(NFAVertex u, NFAVertex v, const NGHolder &h) { - return boost::edge(u, v, h.g); -} - -static really_inline -std::pair -edges(const NGHolder &h) { - return edges(h.g); -} - -static really_inline -size_t in_degree(NFAVertex v, const NGHolder &h) { - return in_degree(v, h.g); -} - -static really_inline -std::pair -in_edges(NFAVertex v, const NGHolder &h) { - return in_edges(v, h.g); -} - -static really_inline -std::pair -inv_adjacent_vertices(NFAVertex v, const NGHolder &h) { - return inv_adjacent_vertices(v, h.g); -} - -static really_inline -size_t out_degree(NFAVertex v, const NGHolder &h) { - return out_degree(v, h.g); -} - -static really_inline -std::pair -out_edges(NFAVertex v, const NGHolder &h) { - return out_edges(v, h.g); -} - -static really_inline -NFAVertex source(const NFAEdge &e, const NGHolder &h) { - return source(e, h.g); -} - -static really_inline -NFAVertex target(const NFAEdge &e, const NGHolder &h) { - return target(e, h.g); -} - -static really_inline -std::pair -vertices(const NGHolder &h) { - return vertices(h.g); -} - /** * \brief Clears all non-special vertices and edges from the graph. * @@ -239,16 +141,6 @@ vertices(const NGHolder &h) { */ void clear_graph(NGHolder &h); -inline -void renumber_edges(NGHolder &h) { - h.renumberEdges(); -} - -inline -void renumber_vertices(NGHolder &h) { - h.renumberVertices(); -} - /* * \brief Clear and remove all of the vertices pointed to by the given iterator * range. @@ -275,8 +167,8 @@ void remove_vertices(Iter begin, Iter end, NGHolder &h, bool renumber = true) { } if (renumber) { - h.renumberEdges(); - h.renumberVertices(); + renumber_edges(h); + renumber_vertices(h); } } @@ -311,10 +203,12 @@ void remove_edges(Iter begin, Iter end, NGHolder &h, bool renumber = true) { } if (renumber) { - h.renumberEdges(); + renumber_edges(h); } } +#define DEFAULT_TOP 0U + /** \brief Clear and remove all of the edges pointed to by the edge descriptors * in the given container. * diff --git a/src/nfagraph/ng_is_equal.cpp b/src/nfagraph/ng_is_equal.cpp index cc65fa17..2df79f50 100644 --- a/src/nfagraph/ng_is_equal.cpp +++ b/src/nfagraph/ng_is_equal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -77,6 +77,26 @@ private: ReportID a_rep; ReportID b_rep; }; + +/** Comparison functor used to sort by vertex_index. */ +template +struct VertexIndexOrdering { + explicit VertexIndexOrdering(const Graph &g_in) : g(g_in) {} + bool operator()(typename Graph::vertex_descriptor a, + typename Graph::vertex_descriptor b) const { + assert(a == b || g[a].index != g[b].index); + return g[a].index < g[b].index; + } +private: + const Graph &g; +}; + +template +static +VertexIndexOrdering make_index_ordering(const Graph &g) { + return VertexIndexOrdering(g); +} + } static @@ -109,7 +129,7 @@ bool is_equal_i(const NGHolder &a, const NGHolder &b, for (size_t i = 0; i < vert_a.size(); i++) { NFAVertex va = vert_a[i]; NFAVertex vb = vert_b[i]; - DEBUG_PRINTF("vertex %u\n", a[va].index); + DEBUG_PRINTF("vertex %zu\n", a[va].index); // Vertex index must be the same. if (a[va].index != b[vb].index) { @@ -153,14 +173,14 @@ bool is_equal_i(const NGHolder &a, const NGHolder &b, } /* check top for edges out of start */ - vector> top_a; - vector> top_b; + vector>> top_a; + vector>> top_b; for (const auto &e : out_edges_range(a.start, a)) { - top_a.emplace_back(a[target(e, a)].index, a[e].top); + top_a.emplace_back(a[target(e, a)].index, a[e].tops); } for (const auto &e : out_edges_range(b.start, b)) { - top_b.emplace_back(b[target(e, b)].index, b[e].top); + top_b.emplace_back(b[target(e, b)].index, b[e].tops); } sort(top_a.begin(), top_a.end()); diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index d7183817..d832bdaa 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -153,8 +153,7 @@ aligned_unique_ptr buildLbrDot(const CharReach &cr, const depth &repeatMin, enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - aligned_unique_ptr nfa - = makeLbrNfa(LBR_NFA_Dot, rtype, repeatMax); + auto nfa = makeLbrNfa(LBR_NFA_DOT, rtype, repeatMax); struct lbr_dot *ld = (struct lbr_dot *)getMutableImplNfa(nfa.get()); fillNfa(nfa.get(), &ld->common, report, repeatMin, repeatMax, @@ -177,8 +176,7 @@ aligned_unique_ptr buildLbrVerm(const CharReach &cr, enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - aligned_unique_ptr nfa - = makeLbrNfa(LBR_NFA_Verm, rtype, repeatMax); + auto nfa = makeLbrNfa(LBR_NFA_VERM, rtype, repeatMax); struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get()); lv->c = escapes.find_first(); @@ -202,8 +200,7 @@ aligned_unique_ptr buildLbrNVerm(const CharReach &cr, enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - aligned_unique_ptr nfa - = makeLbrNfa(LBR_NFA_NVerm, rtype, repeatMax); + auto nfa = makeLbrNfa(LBR_NFA_NVERM, rtype, repeatMax); struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get()); lv->c = escapes.find_first(); @@ -221,14 +218,13 @@ aligned_unique_ptr buildLbrShuf(const CharReach &cr, bool is_reset, ReportID report) { enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - aligned_unique_ptr nfa - = makeLbrNfa(LBR_NFA_Shuf, rtype, repeatMax); + auto nfa = makeLbrNfa(LBR_NFA_SHUF, rtype, repeatMax); struct lbr_shuf *ls = (struct lbr_shuf *)getMutableImplNfa(nfa.get()); fillNfa(nfa.get(), &ls->common, report, repeatMin, repeatMax, minPeriod, rtype); - if (shuftiBuildMasks(~cr, &ls->mask_lo, &ls->mask_hi) == -1) { + if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) { return nullptr; } @@ -243,14 +239,13 @@ aligned_unique_ptr buildLbrTruf(const CharReach &cr, bool is_reset, ReportID report) { enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); - aligned_unique_ptr nfa - = makeLbrNfa(LBR_NFA_Truf, rtype, repeatMax); + auto nfa = makeLbrNfa(LBR_NFA_TRUF, rtype, repeatMax); struct lbr_truf *lc = (struct lbr_truf *)getMutableImplNfa(nfa.get()); fillNfa(nfa.get(), &lc->common, report, repeatMin, repeatMax, minPeriod, rtype); - truffleBuildMasks(~cr, &lc->mask1, &lc->mask2); + truffleBuildMasks(~cr, (u8 *)&lc->mask1, (u8 *)&lc->mask2); DEBUG_PRINTF("built truffle lbr\n"); return nfa; diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 72efa43a..e92790b9 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -54,10 +54,15 @@ #include "util/ue2_containers.h" #include "util/verify_types.h" +#include #include #include +#include + using namespace std; +using boost::adaptors::map_values; +using boost::adaptors::map_keys; namespace ue2 { @@ -73,8 +78,7 @@ bool sanityCheckGraph(const NGHolder &g, // Non-specials should have non-empty reachability. if (!is_special(v, g)) { if (g[v].char_reach.none()) { - DEBUG_PRINTF("vertex %u has empty reach\n", - g[v].index); + DEBUG_PRINTF("vertex %zu has empty reach\n", g[v].index); return false; } } @@ -83,25 +87,23 @@ bool sanityCheckGraph(const NGHolder &g, // other vertices must not have them. if (is_match_vertex(v, g) && v != g.accept) { if (g[v].reports.empty()) { - DEBUG_PRINTF("vertex %u has no reports\n", g[v].index); + DEBUG_PRINTF("vertex %zu has no reports\n", g[v].index); return false; } } else if (!g[v].reports.empty()) { - DEBUG_PRINTF("vertex %u has reports but no accept edge\n", + DEBUG_PRINTF("vertex %zu has reports but no accept edge\n", g[v].index); return false; } // Participant vertices should have distinct state indices. if (!contains(state_ids, v)) { - DEBUG_PRINTF("vertex %u has no state index!\n", - g[v].index); + DEBUG_PRINTF("vertex %zu has no state index!\n", g[v].index); return false; } u32 s = state_ids.at(v); if (s != NO_STATE && !seen_states.insert(s).second) { - DEBUG_PRINTF("vertex %u has dupe state %u\n", - g[v].index, s); + DEBUG_PRINTF("vertex %zu has dupe state %u\n", g[v].index, s); return false; } } @@ -118,9 +120,11 @@ void findSquashStates(const NGHolder &g, filterSquashers(g, squashMap); /* We also filter out the cyclic states representing bounded repeats, as - * they are not really cyclic. */ + * they are not really cyclic -- they may turn off unexpectedly. */ for (const auto &br : repeats) { - squashMap.erase(br.cyclic); + if (br.repeatMax.is_finite()) { + squashMap.erase(br.cyclic); + } } } @@ -144,76 +148,319 @@ void dropRedundantStartEdges(NGHolder &g) { } static -void makeTopStates(NGHolder &g, map &tops, - const map &top_reach) { - map> top_succs; - for (const auto &e : out_edges_range(g.start, g)) { - NFAVertex v = target(e, g); - if (v == g.startDs) { - continue; - } - u32 t = g[e].top; - top_succs[t].push_back(v); - } - - for (const auto &top : top_succs) { - u32 t = top.first; - - CharReach top_cr; +CharReach calcTopVertexReach(const flat_set &tops, + const map &top_reach) { + CharReach top_cr; + for (u32 t : tops) { if (contains(top_reach, t)) { - top_cr = top_reach.at(t); + top_cr |= top_reach.at(t); } else { top_cr = CharReach::dot(); - } - - assert(!contains(tops, t)); - - NFAVertex s = NGHolder::null_vertex(); - flat_set succs; - insert(&succs, top.second); - - for (auto v : top.second) { - if (!top_cr.isSubsetOf(g[v].char_reach)) { - continue; - } - - flat_set vsuccs; - insert(&vsuccs, adjacent_vertices(v, g)); - - if (succs != vsuccs) { - continue; - } - - if (g[v].reports != g[g.start].reports) { - continue; - } - s = v; break; } + } + return top_cr; +} - if (!s) { - s = add_vertex(g[g.start], g); - g[s].char_reach = top_cr; - for (auto v : top.second) { - add_edge(s, v, g); +static +NFAVertex makeTopStartVertex(NGHolder &g, const flat_set &tops, + const flat_set &succs, + const map &top_reach) { + assert(!succs.empty()); + assert(!tops.empty()); + + bool reporter = false; + + NFAVertex u = add_vertex(g[g.start], g); + CharReach top_cr = calcTopVertexReach(tops, top_reach); + g[u].char_reach = top_cr; + + for (auto v : succs) { + if (v == g.accept || v == g.acceptEod) { + reporter = true; + } + add_edge(u, v, g); + } + + // Only retain reports (which we copied on add_vertex above) for new top + // vertices connected to accepts. + if (!reporter) { + g[u].reports.clear(); + } + + return u; +} + +static +void pickNextTopStateToHandle(const map> &top_succs, + const map> &succ_tops, + flat_set *picked_tops, + flat_set *picked_succs) { + /* pick top or vertex we want to handle */ + if (top_succs.size() < succ_tops.size()) { + auto best = top_succs.end(); + for (auto it = top_succs.begin(); it != top_succs.end(); ++it) { + if (best == top_succs.end() + || it->second.size() < best->second.size()) { + best = it; } } - tops[t] = s; + assert(best != top_succs.end()); + assert(!best->second.empty()); /* should already been pruned */ + + *picked_tops = { best->first }; + *picked_succs = best->second; + } else { + auto best = succ_tops.end(); + for (auto it = succ_tops.begin(); it != succ_tops.end(); ++it) { + /* have to worry about determinism for this one */ + if (best == succ_tops.end() + || it->second.size() < best->second.size() + || (it->second.size() == best->second.size() + && it->second < best->second)) { + best = it; + } + } + assert(best != succ_tops.end()); + assert(!best->second.empty()); /* should already been pruned */ + + *picked_succs = { best->first }; + *picked_tops = best->second; } +} + +static +void expandCbsByTops(const map> &unhandled_top_succs, + const map> &top_succs, + const map> &succ_tops, + flat_set &picked_tops, + flat_set &picked_succs) { + NFAVertex v = *picked_succs.begin(); /* arbitrary successor - all equiv */ + const auto &cand_tops = succ_tops.at(v); + + for (u32 t : cand_tops) { + if (!contains(unhandled_top_succs, t)) { + continue; + } + if (!has_intersection(unhandled_top_succs.at(t), picked_succs)) { + continue; /* not adding any useful work that hasn't already been + * done */ + } + if (!is_subset_of(picked_succs, top_succs.at(t))) { + continue; /* will not form a cbs */ + } + picked_tops.insert(t); + } +} + +static +void expandCbsBySuccs(const map> &unhandled_succ_tops, + const map> &top_succs, + const map> &succ_tops, + flat_set &picked_tops, + flat_set &picked_succs) { + u32 t = *picked_tops.begin(); /* arbitrary top - all equiv */ + const auto &cand_succs = top_succs.at(t); + + for (NFAVertex v : cand_succs) { + if (!contains(unhandled_succ_tops, v)) { + continue; + } + if (!has_intersection(unhandled_succ_tops.at(v), picked_tops)) { + continue; /* not adding any useful work that hasn't already been + * done */ + } + if (!is_subset_of(picked_tops, succ_tops.at(v))) { + continue; /* will not form a cbs */ + } + picked_succs.insert(v); + } +} + +/* See if we can expand the complete bipartite subgraph (cbs) specified by the + * picked tops/succs by adding more to either of the tops or succs. + */ +static +void expandTopSuccCbs(const map> &top_succs, + const map> &succ_tops, + const map> &unhandled_top_succs, + const map> &unhandled_succ_tops, + flat_set &picked_tops, + flat_set &picked_succs) { + /* Note: all picked (tops|succs) are equivalent */ + + /* Try to expand first (as we are more likely to succeed) on the side + * with fewest remaining things to be handled */ + + if (unhandled_top_succs.size() < unhandled_succ_tops.size()) { + expandCbsByTops(unhandled_top_succs, top_succs, succ_tops, + picked_tops, picked_succs); + expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops, + picked_tops, picked_succs); + } else { + expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops, + picked_tops, picked_succs); + expandCbsByTops(unhandled_top_succs, top_succs, succ_tops, + picked_tops, picked_succs); + } +} + +static +void markTopSuccAsHandled(NFAVertex start_v, + const flat_set &handled_tops, + const flat_set &handled_succs, + map> &tops_out, + map> &unhandled_top_succs, + map> &unhandled_succ_tops) { + for (u32 t : handled_tops) { + tops_out[t].insert(start_v); + assert(contains(unhandled_top_succs, t)); + erase_all(&unhandled_top_succs[t], handled_succs); + if (unhandled_top_succs[t].empty()) { + unhandled_top_succs.erase(t); + } + } + + for (NFAVertex v : handled_succs) { + assert(contains(unhandled_succ_tops, v)); + erase_all(&unhandled_succ_tops[v], handled_tops); + if (unhandled_succ_tops[v].empty()) { + unhandled_succ_tops.erase(v); + } + } +} + +static +void attemptToUseAsStart(const NGHolder &g, NFAVertex u, + const map &top_reach, + map> &unhandled_top_succs, + map> &unhandled_succ_tops, + map> &tops_out) { + flat_set top_inter = unhandled_succ_tops.at(u); + flat_set succs; + for (NFAVertex v : adjacent_vertices_range(u, g)) { + if (!contains(unhandled_succ_tops, v)) { + return; + } + /* if it has vacuous reports we need to make sure that the report sets + * are the same */ + if ((v == g.accept || v == g.acceptEod) + && g[g.start].reports != g[u].reports) { + DEBUG_PRINTF("different report behaviour\n"); + return; + } + const flat_set &v_tops = unhandled_succ_tops.at(v); + flat_set new_inter; + auto ni_inserter = inserter(new_inter, new_inter.end()); + set_intersection(top_inter.begin(), top_inter.end(), + v_tops.begin(), v_tops.end(), ni_inserter); + top_inter = move(new_inter); + succs.insert(v); + } + + if (top_inter.empty()) { + return; + } + + auto top_cr = calcTopVertexReach(top_inter, top_reach); + if (!top_cr.isSubsetOf(g[u].char_reach)) { + return; + } + + DEBUG_PRINTF("reusing %zu is a start vertex\n", g[u].index); + markTopSuccAsHandled(u, top_inter, succs, tops_out, unhandled_top_succs, + unhandled_succ_tops); +} + +/* We may have cases where a top triggers something that starts with a .* (or + * similar state). In these cases we can make use of that state as a start + * state. + */ +static +void reusePredsAsStarts(const NGHolder &g, const map &top_reach, + map> &unhandled_top_succs, + map> &unhandled_succ_tops, + map> &tops_out) { + /* create list of candidates first, to avoid issues of iter invalidation */ + DEBUG_PRINTF("attempting to reuse vertices for top starts\n"); + vector cand_starts; + for (NFAVertex u : unhandled_succ_tops | map_keys) { + if (hasSelfLoop(u, g)) { + cand_starts.push_back(u); + } + } + + for (NFAVertex u : cand_starts) { + if (!contains(unhandled_succ_tops, u)) { + continue; + } + attemptToUseAsStart(g, u, top_reach, unhandled_top_succs, + unhandled_succ_tops, tops_out); + } +} + +static +void makeTopStates(NGHolder &g, map> &tops_out, + const map &top_reach) { + /* Ideally, we want to add the smallest number of states to the graph for + * tops to turn on so that they can accurately trigger their successors. + * + * The relationships between tops and their successors forms a bipartite + * graph. Finding the optimal number of start states to add is equivalent to + * finding a minimal biclique coverings. Unfortunately, this is known to be + * NP-complete. + * + * Given this, we will just do something simple to avoid creating something + * truly wasteful: + * 1) Try to find any cyclic states which can act as their own start states + * 2) Pick a top or a succ to create a start state for and then try to find + * the largest complete bipartite subgraph that it is part of. + */ + + map> top_succs; + map> succ_tops; + for (const auto &e : out_edges_range(g.start, g)) { + NFAVertex v = target(e, g); + for (u32 t : g[e].tops) { + top_succs[t].insert(v); + succ_tops[v].insert(t); + } + } + + auto unhandled_top_succs = top_succs; + auto unhandled_succ_tops = succ_tops; + + reusePredsAsStarts(g, top_reach, unhandled_top_succs, unhandled_succ_tops, + tops_out); + + /* Note: there may be successors which are equivalent (in terms of + top-triggering), it may be more efficient to discover this and treat them + as a unit. TODO */ + + while (!unhandled_succ_tops.empty()) { + assert(!unhandled_top_succs.empty()); + DEBUG_PRINTF("creating top start vertex\n"); + flat_set u_tops; + flat_set u_succs; + pickNextTopStateToHandle(unhandled_top_succs, unhandled_succ_tops, + &u_tops, &u_succs); + + expandTopSuccCbs(top_succs, succ_tops, unhandled_top_succs, + unhandled_succ_tops, u_tops, u_succs); + + /* create start vertex to handle this top/succ combination */ + NFAVertex u = makeTopStartVertex(g, u_tops, u_succs, top_reach); + + /* update maps */ + markTopSuccAsHandled(u, u_tops, u_succs, tops_out, unhandled_top_succs, + unhandled_succ_tops); + } + assert(unhandled_top_succs.empty()); // We are completely replacing the start vertex, so clear its reports. clear_out_edges(g.start, g); add_edge(g.start, g.startDs, g); g[g.start].reports.clear(); - - // Only retain reports (which we copied on add_vertex above) for new top - // vertices connected to accepts. - for (const auto &m : tops) { - NFAVertex v = m.second; - if (!edge(v, g.accept, g).second && !edge(v, g.acceptEod, g).second) { - g[v].reports.clear(); - } - } } static @@ -232,7 +479,7 @@ set findZombies(const NGHolder &h, } if (in_degree(h.acceptEod, h) != 1 || all_reports(h).size() != 1) { - DEBUG_PRINTF("can be made undead - bad reports\n"); + DEBUG_PRINTF("cannot be made undead - bad reports\n"); return zombies; } @@ -321,7 +568,8 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm, const map>> &triggers, bool impl_test_only, const CompileContext &cc, ue2::unordered_map &state_ids, - vector &repeats, map &tops) { + vector &repeats, + map> &tops) { assert(is_triggered(h_in) || fixed_depth_tops.empty()); unique_ptr h = cloneHolder(h_in); @@ -331,15 +579,19 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm, impl_test_only, cc.grey); // If we're building a rose/suffix, do the top dance. + flat_set topVerts; if (is_triggered(*h)) { makeTopStates(*h, tops, findTopReach(triggers)); + + for (const auto &vv : tops | map_values) { + insert(&topVerts, vv); + } } dropRedundantStartEdges(*h); // Do state numbering - state_ids = numberStates(*h, tops); - dropUnusedStarts(*h, state_ids); + state_ids = numberStates(*h, topVerts); // In debugging, we sometimes like to reverse the state numbering to stress // the NFA construction code. @@ -364,7 +616,7 @@ void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) { u32 program = rm.getProgramOffset(id); reports.insert(program); } - DEBUG_PRINTF("vertex %u: remapped reports {%s} to programs {%s}\n", + DEBUG_PRINTF("vertex %zu: remapped reports {%s} to programs {%s}\n", h[v].index, as_string_list(old_reports).c_str(), as_string_list(reports).c_str()); } @@ -385,14 +637,14 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, ue2::unordered_map state_ids; vector repeats; - map tops; + map> tops; unique_ptr h = prepareGraph(h_in, rm, fixed_depth_tops, triggers, impl_test_only, cc, state_ids, repeats, tops); // Quick exit: if we've got an embarrassment of riches, i.e. more states // than we can implement in our largest NFA model, bail here. - u32 numStates = countStates(*h, state_ids, false); + u32 numStates = countStates(state_ids); if (numStates > NFA_MAX_STATES) { DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); return nullptr; @@ -465,13 +717,11 @@ aligned_unique_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */ // Do state numbering. - auto state_ids = numberStates(h); - - dropUnusedStarts(h, state_ids); + auto state_ids = numberStates(h, {}); // Quick exit: if we've got an embarrassment of riches, i.e. more states // than we can implement in our largest NFA model, bail here. - u32 numStates = countStates(h, state_ids, false); + u32 numStates = countStates(state_ids); if (numStates > NFA_MAX_STATES) { DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); return nullptr; @@ -479,7 +729,7 @@ aligned_unique_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, assert(sanityCheckGraph(h, state_ids)); - map tops; /* only the standards tops for nfas */ + map> tops; /* only the standards tops for nfas */ set zombies; vector repeats; map reportSquashMap; @@ -508,10 +758,13 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, if (!cc.grey.allowLimExNFA) { return false; } + + assert(!can_never_match(g)); + // Quick check: we can always implement an NFA with less than NFA_MAX_STATES // states. Note that top masks can generate extra states, so we account for // those here too. - if (num_vertices(g) + NFA_MAX_TOP_MASKS < NFA_MAX_STATES) { + if (num_vertices(g) + getTops(g).size() < NFA_MAX_STATES) { return true; } @@ -532,12 +785,12 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, ue2::unordered_map state_ids; vector repeats; - map tops; + map> tops; unique_ptr h = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc, state_ids, repeats, tops); assert(h); - u32 numStates = countStates(*h, state_ids, false); + u32 numStates = countStates(state_ids); if (numStates <= NFA_MAX_STATES) { return numStates; } @@ -579,12 +832,12 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm, ue2::unordered_map state_ids; vector repeats; - map tops; + map> tops; unique_ptr h = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc, state_ids, repeats, tops); - if (!h || countStates(*h, state_ids, false) > NFA_MAX_STATES) { + if (!h || countStates(state_ids) > NFA_MAX_STATES) { DEBUG_PRINTF("not constructible\n"); return NFA_MAX_ACCEL_STATES + 1; } diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index deaf2ffd..bfba7c71 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -69,7 +69,7 @@ void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr, } const CharReach &acr = g[v].char_reach; - DEBUG_PRINTF("checking %u\n", g[v].index); + DEBUG_PRINTF("checking %zu\n", g[v].index); if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) { DEBUG_PRINTF("bad reach %zu\n", acr.count()); @@ -86,7 +86,7 @@ void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr, next_preds->insert(v); insert(next_cands, adjacent_vertices(v, g)); - DEBUG_PRINTF("%u is a friend indeed\n", g[v].index); + DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index); friends->insert(v); next_cand:; } @@ -675,7 +675,7 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) { while (true) { if (hasSelfLoop(v, g)) { - DEBUG_PRINTF("woot %u\n", g[v].index); + DEBUG_PRINTF("woot %zu\n", g[v].index); return v; } if (out_degree(v, g) != 1) { @@ -837,7 +837,7 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, CharReach terminating = g[v].char_reach; terminating.flip(); - DEBUG_PRINTF("vertex %u is cyclic and has %zu stop chars%s\n", + DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n", g[v].index, terminating.count(), allow_wide ? " (w)" : ""); diff --git a/src/nfagraph/ng_literal_analysis.cpp b/src/nfagraph/ng_literal_analysis.cpp index 9229457c..a5f3468b 100644 --- a/src/nfagraph/ng_literal_analysis.cpp +++ b/src/nfagraph/ng_literal_analysis.cpp @@ -40,6 +40,7 @@ #include "util/depth.h" #include "util/graph.h" #include "util/graph_range.h" +#include "util/ue2_graph.h" #include "util/ue2string.h" #include @@ -49,7 +50,6 @@ #include using namespace std; -using boost::vertex_index; namespace ue2 { @@ -64,24 +64,29 @@ namespace { /* Small literal graph type used for the suffix tree used in * compressAndScore. */ - struct LitGraphVertexProps { - LitGraphVertexProps() {} - explicit LitGraphVertexProps(const ue2_literal::elem &c_in) : c(c_in) {} + LitGraphVertexProps() = default; + explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {} ue2_literal::elem c; // string element (char + bool) + size_t index; // managed by ue2_graph }; struct LitGraphEdgeProps { - LitGraphEdgeProps() {} + LitGraphEdgeProps() = default; explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {} u64a score = NO_LITERAL_AT_EDGE_SCORE; - size_t index; /* only initialised when the reverse edges are added. */ + size_t index; // managed by ue2_graph +}; + +struct LitGraph + : public ue2_graph { + + LitGraph() : root(add_vertex(*this)), sink(add_vertex(*this)) {} + + const vertex_descriptor root; + const vertex_descriptor sink; }; -/* keep edgeList = listS as you cannot remove edges if edgeList = vecS */ -typedef boost::adjacency_list LitGraph; typedef LitGraph::vertex_descriptor LitVertex; typedef LitGraph::edge_descriptor LitEdge; @@ -94,17 +99,16 @@ typedef std::queue LitVertexQ; /** \brief Dump the literal graph in Graphviz format. */ static UNUSED -void dumpGraph(const char *filename, const LitGraph &lg, const LitVertex &root, - const LitVertex &sink) { +void dumpGraph(const char *filename, const LitGraph &lg) { ofstream fout(filename); fout << "digraph G {" << endl; for (auto v : vertices_range(lg)) { - fout << boost::get(vertex_index, lg, v); - if (v == root) { + fout << lg[v].index; + if (v == lg.root) { fout << "[label=\"ROOT\"];"; - } else if (v == sink) { + } else if (v == lg.sink) { fout << "[label=\"SINK\"];"; } else { ue2_literal s; @@ -116,10 +120,9 @@ void dumpGraph(const char *filename, const LitGraph &lg, const LitVertex &root, for (const auto &e : edges_range(lg)) { LitVertex u = source(e, lg), v = target(e, lg); - fout << boost::get(vertex_index, lg, u) << " -> " << - boost::get(vertex_index, lg, v) << - "[label=\"" << lg[e].score << "\"]" << - ";" << endl; + fout << lg[u].index << " -> " << lg[v].index << "[label=\"" + << lg[e].score << "\"]" + << ";" << endl; } fout << "}" << endl; @@ -141,11 +144,11 @@ bool allowExpand(size_t numItems, size_t totalPathsSoFar) { } static -LitVertex addToLitGraph(LitGraph &lg, LitVertex sink, - LitVertex pred, const ue2_literal::elem &c) { +LitVertex addToLitGraph(LitGraph &lg, LitVertex pred, + const ue2_literal::elem &c) { // Check if we already have this in the graph. for (auto v : adjacent_vertices_range(pred, lg)) { - if (v == sink) { + if (v == lg.sink) { continue; } if (lg[v].c == c) { @@ -159,9 +162,10 @@ LitVertex addToLitGraph(LitGraph &lg, LitVertex sink, } static -void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex sink, - LitVertex pred, const CharReach &cr, NFAVertex v) { - for (size_t i = cr.find_first(); i != CharReach::npos; i = cr.find_next(i)) { +void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred, + const CharReach &cr, NFAVertex v) { + for (size_t i = cr.find_first(); i != CharReach::npos; + i = cr.find_next(i)) { if (myisupper(i) && cr.test(mytolower(i))) { // ignore upper half of a nocase pair continue; @@ -169,14 +173,14 @@ void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex sink, bool nocase = myislower(i) && cr.test(mytoupper(i)); ue2_literal::elem c((char)i, nocase); - LitVertex lv = addToLitGraph(lg, sink, pred, c); + LitVertex lv = addToLitGraph(lg, pred, c); workQ.push(VertexPair(lv, v)); } } static -void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex root, - LitVertex sink, const NGHolder &g, const NFAEdge &e) { +void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g, + const NFAEdge &e) { NFAVertex u = source(e, g); NFAVertex v = target(e, g); const CharReach &cr = g[v].char_reach; @@ -185,7 +189,7 @@ void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex root, return; } - addToQueue(workQ, lg, sink, root, cr, u); + addToQueue(workQ, lg, lg.root, cr, u); } static @@ -197,7 +201,8 @@ u32 crCardinality(const CharReach &cr) { } u32 rv = 0; - for (size_t i = cr.find_first(); i != CharReach::npos; i = cr.find_next(i)) { + for (size_t i = cr.find_first(); i != CharReach::npos; + i = cr.find_next(i)) { if (myisupper(i) && cr.test(mytolower(i))) { // ignore upper half of a nocase pair continue; @@ -212,10 +217,10 @@ u32 crCardinality(const CharReach &cr) { * identifying vertices connected to the sink and removing their other * out-edges. */ static -void filterLitGraph(LitGraph &lg, const LitVertex sink) { - for (auto v : inv_adjacent_vertices_range(sink, lg)) { - remove_out_edge_if(v, [&lg, &sink](const LitEdge &e) { - return target(e, lg) != sink; +void filterLitGraph(LitGraph &lg) { + for (auto v : inv_adjacent_vertices_range(lg.sink, lg)) { + remove_out_edge_if(v, [&lg](const LitEdge &e) { + return target(e, lg) != lg.sink; }, lg); } @@ -228,13 +233,12 @@ void filterLitGraph(LitGraph &lg, const LitVertex sink) { * from each predecessor of the sink (note: it's a suffix tree except for this * convenience) towards the source, storing each string as we go. */ static -void extractLiterals(const LitGraph &lg, const LitVertex root, - const LitVertex sink, set &s) { +void extractLiterals(const LitGraph &lg, set &s) { ue2_literal lit; - for (auto u : inv_adjacent_vertices_range(sink, lg)) { + for (auto u : inv_adjacent_vertices_range(lg.sink, lg)) { lit.clear(); - while (u != root) { + while (u != lg.root) { lit.push_back(lg[u].c); assert(in_degree(u, lg) <= 1); LitGraph::inv_adjacency_iterator ai2, ae2; @@ -276,11 +280,9 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, } LitGraph lg; - LitVertex root = add_vertex(lg); - LitVertex sink = add_vertex(lg); LitVertexQ workQ; - initWorkQueue(workQ, lg, root, sink, g, e); + initWorkQueue(workQ, lg, g, e); while (!workQ.empty()) { const LitVertex lv = workQ.front().first; @@ -289,18 +291,18 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, u32 cr_card = crCardinality(cr); size_t numItems = cr_card * in_degree(t, g); - size_t committed_count = workQ.size() + in_degree(sink, lg) - 1; + size_t committed_count = workQ.size() + in_degree(lg.sink, lg) - 1; if (g[t].index == NODE_START) { // reached start, add to literal set - add_edge_if_not_present(lv, sink, lg); + add_edge_if_not_present(lv, lg.sink, lg); goto next_work_elem; } // Expand next vertex if (allowExpand(numItems, committed_count)) { for (auto u : inv_adjacent_vertices_range(t, g)) { - addToQueue(workQ, lg, sink, lv, cr, u); + addToQueue(workQ, lg, lv, cr, u); } goto next_work_elem; } @@ -316,26 +318,26 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, bool nocase = myislower(i) && cr.test(mytoupper(i)); ue2_literal::elem c((char)i, nocase); - LitVertex lt = addToLitGraph(lg, sink, lv, c); - add_edge_if_not_present(lt, sink, lg); + LitVertex lt = addToLitGraph(lg, lv, c); + add_edge_if_not_present(lt, lg.sink, lg); } goto next_work_elem; } // add to literal set - add_edge_if_not_present(lv, sink, lg); + add_edge_if_not_present(lv, lg.sink, lg); next_work_elem: workQ.pop(); } - filterLitGraph(lg, sink); - //dumpGraph("litgraph.dot", lg, root, sink); - extractLiterals(lg, root, sink, s); + filterLitGraph(lg); + //dumpGraph("litgraph.dot", lg); + extractLiterals(lg, s); // Our literal set should contain no literal that is a suffix of another. assert(!hasSuffixLiterals(s)); - DEBUG_PRINTF("edge %u (%u->%u) produced %zu literals\n", g[e].index, + DEBUG_PRINTF("edge %zu (%zu->%zu) produced %zu literals\n", g[e].index, g[source(e, g)].index, g[target(e, g)].index, s.size()); } @@ -409,16 +411,15 @@ u64a calculateScore(const ue2_literal &s) { /** Adds a literal in reverse order, building up a suffix tree. */ static -void addReversedLiteral(const ue2_literal &lit, LitGraph &lg, - const LitVertex &root, const LitVertex &sink) { +void addReversedLiteral(const ue2_literal &lit, LitGraph &lg) { DEBUG_PRINTF("literal: '%s'\n", escapeString(lit).c_str()); ue2_literal suffix; - LitVertex v = root; + LitVertex v = lg.root; for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { suffix.push_back(*it); LitVertex w; for (auto v2 : adjacent_vertices_range(v, lg)) { - if (v2 != sink && lg[v2].c == *it) { + if (v2 != lg.sink && lg[v2].c == *it) { w = v2; goto next_char; } @@ -430,17 +431,18 @@ next_char: } // Wire the last vertex to the sink. - add_edge(v, sink, lg); + add_edge(v, lg.sink, lg); } static void extractLiterals(const vector &cutset, const LitGraph &lg, - const LitVertex &root, set &s) { + set &s) { for (const auto &e : cutset) { - LitVertex u = source(e, lg), v = target(e, lg); + LitVertex u = source(e, lg); + LitVertex v = target(e, lg); ue2_literal lit; lit.push_back(lg[v].c); - while (u != root) { + while (u != lg.root) { lit.push_back(lg[u].c); assert(in_degree(u, lg) == 1); LitGraph::inv_adjacency_iterator ai, ae; @@ -487,10 +489,7 @@ const char *describeColor(boost::default_color_type c) { static vector add_reverse_edges_and_index(LitGraph &lg) { vector fwd_edges; - - size_t next_index = 0; for (const auto &e : edges_range(lg)) { - lg[e].index = next_index++; fwd_edges.push_back(e); } @@ -502,9 +501,7 @@ vector add_reverse_edges_and_index(LitGraph &lg) { assert(!edge(v, u, lg).second); - LitEdge rev = add_edge(v, u, lg).first; - lg[rev].score = 0; - lg[rev].index = next_index++; + LitEdge rev = add_edge(v, u, LitGraphEdgeProps(0), lg).first; rev_map[lg[e].index] = rev; rev_map[lg[rev].index] = e; } @@ -513,20 +510,19 @@ vector add_reverse_edges_and_index(LitGraph &lg) { } static -void findMinCut(LitGraph &lg, const LitVertex &root, const LitVertex &sink, - vector &cutset) { +void findMinCut(LitGraph &lg, vector &cutset) { cutset.clear(); - //dumpGraph("litgraph.dot", lg, root, sink); + //dumpGraph("litgraph.dot", lg); - assert(!in_degree(root, lg)); - assert(!out_degree(sink, lg)); + assert(!in_degree(lg.root, lg)); + assert(!out_degree(lg.sink, lg)); size_t num_real_edges = num_edges(lg); // Add reverse edges for the convenience of the BGL's max flow algorithm. vector rev_edges = add_reverse_edges_and_index(lg); - const auto v_index_map = get(vertex_index, lg); + const auto v_index_map = get(&LitGraphVertexProps::index, lg); const auto e_index_map = get(&LitGraphEdgeProps::index, lg); const size_t num_verts = num_vertices(lg); vector colors(num_verts); @@ -541,7 +537,7 @@ void findMinCut(LitGraph &lg, const LitVertex &root, const LitVertex &sink, make_iterator_property_map(predecessors.begin(), v_index_map), make_iterator_property_map(colors.begin(), v_index_map), make_iterator_property_map(distances.begin(), v_index_map), - v_index_map, root, sink); + v_index_map, lg.root, lg.sink); DEBUG_PRINTF("done, flow = %llu\n", flow); /* remove reverse edges */ @@ -554,21 +550,20 @@ void findMinCut(LitGraph &lg, const LitVertex &root, const LitVertex &sink, for (const auto &e : edges_range(lg)) { const LitVertex u = source(e, lg), v = target(e, lg); - const auto ucolor = colors[boost::get(vertex_index, lg, u)]; - const auto vcolor = colors[boost::get(vertex_index, lg, v)]; + const auto ucolor = colors[lg[u].index]; + const auto vcolor = colors[lg[v].index]; - DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", - boost::get(vertex_index, lg, u), describeColor(ucolor), - boost::get(vertex_index, lg, v), describeColor(vcolor), + DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", lg[u].index, + describeColor(ucolor), lg[v].index, describeColor(vcolor), lg[e].score); if (ucolor != boost::white_color && vcolor == boost::white_color) { - assert(target(e, lg) != sink); + assert(v != lg.sink); white_cut.push_back(e); white_flow += lg[e].score; } if (ucolor == boost::black_color && vcolor != boost::black_color) { - assert(target(e, lg) != sink); + assert(v != lg.sink); black_cut.push_back(e); black_flow += lg[e].score; } @@ -608,21 +603,19 @@ u64a compressAndScore(set &s) { initialScore); LitGraph lg; - const LitVertex root = add_vertex(lg); - const LitVertex sink = add_vertex(lg); for (const auto &lit : s) { - addReversedLiteral(lit, lg, root, sink); + addReversedLiteral(lit, lg); } DEBUG_PRINTF("suffix tree has %zu vertices and %zu edges\n", num_vertices(lg), num_edges(lg)); vector cutset; - findMinCut(lg, root, sink, cutset); + findMinCut(lg, cutset); s.clear(); - extractLiterals(cutset, lg, root, s); + extractLiterals(cutset, lg, s); u64a score = scoreSet(s); DEBUG_PRINTF("compressed score is %llu\n", score); @@ -791,7 +784,7 @@ bool splitOffLeadingLiteral_i(const NGHolder &g, bool anch, } while (true) { - DEBUG_PRINTF("validating vertex %u\n", g[v].index); + DEBUG_PRINTF("validating vertex %zu\n", g[v].index); assert(v != g.acceptEod && v != g.accept); diff --git a/src/nfagraph/ng_literal_component.cpp b/src/nfagraph/ng_literal_component.cpp index 871c8ac7..e3cfe867 100644 --- a/src/nfagraph/ng_literal_component.cpp +++ b/src/nfagraph/ng_literal_component.cpp @@ -95,7 +95,7 @@ void addToString(string &s, const NGHolder &g, NFAVertex v) { static bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, set &dead) { - DEBUG_PRINTF("examine vertex %u\n", g[v].index); + DEBUG_PRINTF("examine vertex %zu\n", g[v].index); bool nocase = false, casefixed = false; assert(!is_special(v, g)); @@ -109,7 +109,7 @@ bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, assert(edge(g.start, v, g).second); assert(edge(g.startDs, v, g).second); } - if (hasGreaterInDegree(reqInDegree, v, g)) { + if (in_degree(v, g) > reqInDegree) { DEBUG_PRINTF("extra in-edges\n"); return false; } @@ -134,7 +134,7 @@ bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, u = v; // previous vertex v = *(adjacent_vertices(v, g).first); - DEBUG_PRINTF("loop, v=%u\n", g[v].index); + DEBUG_PRINTF("loop, v=%zu\n", g[v].index); if (is_special(v, g)) { if (v == g.accept || v == g.acceptEod) { diff --git a/src/nfagraph/ng_literal_decorated.cpp b/src/nfagraph/ng_literal_decorated.cpp index 02b25a73..89c01a6c 100644 --- a/src/nfagraph/ng_literal_decorated.cpp +++ b/src/nfagraph/ng_literal_decorated.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -77,7 +77,7 @@ bool findPaths(const NGHolder &g, vector &paths) { read_count[g[v].index] = out_degree(v, g); - DEBUG_PRINTF("setting read_count to %zu for %u\n", + DEBUG_PRINTF("setting read_count to %zu for %zu\n", read_count[g[v].index], g[v].index); if (v == g.start || v == g.startDs) { @@ -117,7 +117,7 @@ bool findPaths(const NGHolder &g, vector &paths) { read_count[g[u].index]--; if (!read_count[g[u].index]) { - DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index); + DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index); built[g[u].index].clear(); built[g[u].index].shrink_to_fit(); } @@ -138,9 +138,9 @@ bool hasLargeDegreeVertex(const NGHolder &g) { if (is_special(v, g)) { // specials can have large degree continue; } - if (has_greater_degree(MAX_VERTEX_DEGREE, v, g)) { - DEBUG_PRINTF("vertex %u has degree %zu\n", g[v].index, - boost::degree(v, g.g)); + if (degree(v, g) > MAX_VERTEX_DEGREE) { + DEBUG_PRINTF("vertex %zu has degree %zu\n", g[v].index, + degree(v, g)); return true; } } @@ -188,7 +188,8 @@ struct PathMask { } // Reports are attached to the second-to-last vertex. - reports = g[*next(path.rbegin())].reports; + NFAVertex u = *std::next(path.rbegin()); + reports = g[u].reports; assert(!reports.empty()); } diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 39788570..375086a4 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -36,7 +36,6 @@ #include "nfa/rdfa.h" #include "ng_holder.h" #include "ng_mcclellan_internal.h" -#include "ng_restructuring.h" #include "ng_squash.h" #include "ng_util.h" #include "ue2common.h" @@ -329,7 +328,7 @@ void markToppableStarts(const NGHolder &g, const flat_set &unused, } for (const auto &trigger : triggers) { if (triggerAllowed(g, v, triggers, trigger)) { - DEBUG_PRINTF("idx %u is valid location for top\n", g[v].index); + DEBUG_PRINTF("idx %zu is valid location for top\n", g[v].index); out->set(g[v].index); break; } @@ -348,10 +347,11 @@ public: using StateMap = typename Automaton_Traits::StateMap; Automaton_Base(const ReportManager *rm_in, const NGHolder &graph_in, - const flat_set &unused_in, bool single_trigger, + bool single_trigger, const vector> &triggers, bool prunable_in) : rm(rm_in), graph(graph_in), numStates(num_vertices(graph)), - unused(unused_in), init(Automaton_Traits::init_states(numStates)), + unused(getRedundantStarts(graph_in)), + init(Automaton_Traits::init_states(numStates)), initDS(Automaton_Traits::init_states(numStates)), squash(Automaton_Traits::init_states(numStates)), accept(Automaton_Traits::init_states(numStates)), @@ -444,7 +444,7 @@ private: public: const NGHolder &graph; u32 numStates; - const flat_set &unused; + const flat_set unused; vector v_by_index; vector cr_by_index; /* pre alpha'ed */ StateSet init; @@ -482,9 +482,9 @@ struct Big_Traits { class Automaton_Big : public Automaton_Base { public: Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in, - const flat_set &unused_in, bool single_trigger, + bool single_trigger, const vector> &triggers, bool prunable_in) - : Automaton_Base(rm_in, graph_in, unused_in, single_trigger, triggers, + : Automaton_Base(rm_in, graph_in, single_trigger, triggers, prunable_in) {} }; @@ -510,14 +510,36 @@ struct Graph_Traits { class Automaton_Graph : public Automaton_Base { public: Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in, - const flat_set &unused_in, bool single_trigger, - const vector> &triggers, bool prunable_in) - : Automaton_Base(rm_in, graph_in, unused_in, single_trigger, triggers, + bool single_trigger, + const vector> &triggers, bool prunable_in) + : Automaton_Base(rm_in, graph_in, single_trigger, triggers, prunable_in) {} }; } // namespace +static +bool startIsRedundant(const NGHolder &g) { + set start; + set startDs; + + insert(&start, adjacent_vertices(g.start, g)); + insert(&startDs, adjacent_vertices(g.startDs, g)); + + return start == startDs; +} + +flat_set getRedundantStarts(const NGHolder &g) { + flat_set dead; + if (startIsRedundant(g)) { + dead.insert(g.start); + } + if (proper_out_degree(g.startDs, g) == 0) { + dead.insert(g.startDs); + } + return dead; +} + unique_ptr buildMcClellan(const NGHolder &graph, const ReportManager *rm, bool single_trigger, const vector> &triggers, @@ -526,8 +548,6 @@ unique_ptr buildMcClellan(const NGHolder &graph, return nullptr; } - auto unused = findUnusedStates(graph); - DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind); assert(allMatchStatesHaveReports(graph)); @@ -553,8 +573,7 @@ unique_ptr buildMcClellan(const NGHolder &graph, if (numStates <= NFA_STATE_LIMIT) { /* Fast path. Automaton_Graph uses a bitfield internally to represent * states and is quicker than Automaton_Big. */ - Automaton_Graph n(rm, graph, unused, single_trigger, triggers, - prunable); + Automaton_Graph n(rm, graph, single_trigger, triggers, prunable); if (determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ @@ -566,7 +585,7 @@ unique_ptr buildMcClellan(const NGHolder &graph, rdfa->alpha_remap = n.alpha; } else { /* Slow path. Too many states to use Automaton_Graph. */ - Automaton_Big n(rm, graph, unused, single_trigger, triggers, prunable); + Automaton_Big n(rm, graph, single_trigger, triggers, prunable); if (determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ diff --git a/src/nfagraph/ng_mcclellan_internal.h b/src/nfagraph/ng_mcclellan_internal.h index 22fcf01e..b78dac3b 100644 --- a/src/nfagraph/ng_mcclellan_internal.h +++ b/src/nfagraph/ng_mcclellan_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,6 @@ #include "ue2common.h" #include "nfa/mcclellancompile.h" #include "nfagraph/ng_holder.h" -#include "nfagraph/ng_restructuring.h" // for NO_STATE #include "util/charreach.h" #include "util/graph_range.h" #include "util/ue2_containers.h" @@ -69,6 +68,13 @@ void markToppableStarts(const NGHolder &g, const flat_set &unused, const std::vector> &triggers, boost::dynamic_bitset<> *out); +/** + * \brief Returns a set of start vertices that will not participate in an + * implementation of this graph. These are either starts with no successors or + * starts which are redundant with startDs. + */ +flat_set getRedundantStarts(const NGHolder &g); + template void transition_graph(autom &nfa, const std::vector &vByStateId, const typename autom::StateSet &in, diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index 2e02933a..29939fec 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -69,13 +69,18 @@ #include "util/charreach.h" #include "util/container.h" #include "util/graph_range.h" +#include "util/ue2_containers.h" #include "ue2common.h" +#include +#include + #include #include #include using namespace std; +using boost::make_filtered_graph; namespace ue2 { @@ -94,8 +99,8 @@ void findCandidates(NGHolder &g, const vector &ordering, // For `v' to be a candidate, its predecessors must all have the same // successor set as `v'. - set succ_v, succ_u; - succ(g, v, &succ_v); + auto succ_v = succs(v, g); + flat_set succ_u; for (auto u : inv_adjacent_vertices_range(v, g)) { succ_u.clear(); @@ -104,7 +109,7 @@ void findCandidates(NGHolder &g, const vector &ordering, goto next_cand; } } - DEBUG_PRINTF("vertex %u is a candidate\n", g[v].index); + DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index); cand->push_back(v); next_cand:; } @@ -125,8 +130,8 @@ void findCandidates_rev(NGHolder &g, const vector &ordering, // For `v' to be a candidate, its predecessors must all have the same // successor set as `v'. - set pred_v, pred_u; - pred(g, v, &pred_v); + auto pred_v = preds(v, g); + flat_set pred_u; for (auto u : adjacent_vertices_range(v, g)) { pred_u.clear(); @@ -135,7 +140,7 @@ void findCandidates_rev(NGHolder &g, const vector &ordering, goto next_cand; } } - DEBUG_PRINTF("vertex %u is a candidate\n", g[v].index); + DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index); cand->push_back(v); next_cand:; } @@ -172,8 +177,7 @@ void succCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) { static set findSustainSet(const NGHolder &g, NFAVertex p, bool ignore_starts, const CharReach &new_cr) { - set cand; - pred(g, p, &cand); + auto cand = preds>(p, g); if (ignore_starts) { cand.erase(g.startDs); } @@ -209,8 +213,7 @@ set findSustainSet(const NGHolder &g, NFAVertex p, static set findSustainSet_rev(const NGHolder &g, NFAVertex p, const CharReach &new_cr) { - set cand; - succ(g, p, &cand); + auto cand = succs>(p, g); /* remove elements from cand until the sustain set property holds */ bool changed; do { @@ -240,7 +243,7 @@ set findSustainSet_rev(const NGHolder &g, NFAVertex p, static bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) { - DEBUG_PRINTF("considering vertex %u\n", g[v].index); + DEBUG_PRINTF("considering vertex %zu\n", g[v].index); const CharReach &v_cr = g[v].char_reach; CharReach add; @@ -259,7 +262,7 @@ bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) { if (p == v) { continue; } - DEBUG_PRINTF("looking at pred %u\n", g[p].index); + DEBUG_PRINTF("looking at pred %zu\n", g[p].index); bool ignore_sds = som; /* if we are tracking som, entries into a state from sds are significant. */ @@ -289,13 +292,13 @@ bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) { /* the cr can be increased */ g[v].char_reach = add; - DEBUG_PRINTF("vertex %u was widened\n", g[v].index); + DEBUG_PRINTF("vertex %zu was widened\n", g[v].index); return true; } static bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) { - DEBUG_PRINTF("considering vertex %u\n", g[v].index); + DEBUG_PRINTF("considering vertex %zu\n", g[v].index); const CharReach &v_cr = g[v].char_reach; CharReach add; @@ -314,7 +317,7 @@ bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) { if (p == v) { continue; } - DEBUG_PRINTF("looking at succ %u\n", g[p].index); + DEBUG_PRINTF("looking at succ %zu\n", g[p].index); set sustain = findSustainSet_rev(g, p, add); DEBUG_PRINTF("sustain set is %zu\n", sustain.size()); @@ -339,7 +342,7 @@ bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) { /* the cr can be increased */ g[v].char_reach = add; - DEBUG_PRINTF("vertex %u was widened\n", g[v].index); + DEBUG_PRINTF("vertex %zu was widened\n", g[v].index); return true; } @@ -388,7 +391,7 @@ bool improveGraph(NGHolder &g, som_type som) { * enlargeCyclicCR. */ CharReach reduced_cr(NFAVertex v, const NGHolder &g, const map &br_cyclic) { - DEBUG_PRINTF("find minimal cr for %u\n", g[v].index); + DEBUG_PRINTF("find minimal cr for %zu\n", g[v].index); CharReach v_cr = g[v].char_reach; if (proper_in_degree(v, g) != 1) { return v_cr; @@ -546,4 +549,165 @@ bool mergeCyclicDotStars(NGHolder &g) { return true; } +/** + * Returns the set of vertices that cannot be on if v is not on. + */ +static +flat_set findDependentVertices(const NGHolder &g, NFAVertex v) { + auto v_pred = preds(v, g); + flat_set may_be_on; + + /* We need to exclude any vertex that may be reached on a path which is + * incompatible with the vertex v being on. */ + + /* A vertex u is bad if: + * 1) its reach may be incompatible with v (not a subset) + * 2) it if there is an edge from a bad vertex b and there is either not an + * edge v->u or not an edge b->v. + * Note: 2) means v is never bad as it has a selfloop + * + * Can do this with a DFS from all the initial bad states with a conditional + * check down edges. Alternately can just filter these edges out of the + * graph first. + */ + flat_set no_explore; + for (NFAVertex t : adjacent_vertices_range(v, g)) { + for (NFAEdge e : in_edges_range(t, g)) { + NFAVertex s = source(e, g); + if (edge(s, v, g).second) { + no_explore.insert(e); + } + } + } + + auto filtered_g = make_filtered_graph(g, make_bad_edge_filter(&no_explore)); + + vector color_raw(num_vertices(g)); + auto color = make_iterator_property_map(color_raw.begin(), + get(vertex_index, g)); + flat_set bad; + for (NFAVertex b : vertices_range(g)) { + if (b != g.start && g[b].char_reach.isSubsetOf(g[v].char_reach)) { + continue; + } + boost::depth_first_visit(filtered_g, b, make_vertex_recorder(bad), + color); + } + + flat_set rv; + for (NFAVertex u : vertices_range(g)) { + if (!contains(bad, u)) { + DEBUG_PRINTF("%zu is good\n", g[u].index); + rv.insert(u); + } + } + return rv; +} + +static +bool willBeEnabledConcurrently(NFAVertex main_cyclic, NFAVertex v, + const NGHolder &g) { + return is_subset_of(preds(main_cyclic, g), preds(v, g)); +} + +static +bool sometimesEnabledConcurrently(NFAVertex main_cyclic, NFAVertex v, + const NGHolder &g) { + return has_intersection(preds(main_cyclic, g), preds(v, g)); +} + +static +bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { + if (som && (is_virtual_start(u, g) || u == g.startDs)) { + return false; + } + + bool changed = false; + DEBUG_PRINTF("using cyclic %zu as base\n", g[u].index); + auto children = findDependentVertices(g, u); + vector u_succs; + for (NFAVertex v : adjacent_vertices_range(u, g)) { + if (som && is_virtual_start(v, g)) { + /* as v is virtual start, its som has been reset so can not override + * existing in progress matches. */ + continue; + } + u_succs.push_back(v); + } + stable_sort(u_succs.begin(), u_succs.end(), + [&](NFAVertex a, NFAVertex b) { + return g[a].char_reach.count() > g[b].char_reach.count(); + }); + for (NFAVertex v : u_succs) { + DEBUG_PRINTF(" using %zu as killer\n", g[v].index); + /* Need to distinguish between vertices that are switched on after the + * cyclic vs vertices that are switched on concurrently with the cyclic + * if (subject to a suitable reach) */ + bool v_peer_of_cyclic = willBeEnabledConcurrently(u, v, g); + set dead; + for (NFAVertex s : adjacent_vertices_range(v, g)) { + DEBUG_PRINTF(" looking at preds of %zu\n", g[s].index); + for (NFAEdge e : in_edges_range(s, g)) { + NFAVertex p = source(e, g); + if (!contains(children, p) || p == v || p == u + || p == g.accept) { + DEBUG_PRINTF("%zu not a cand\n", g[p].index); + continue; + } + if (is_any_accept(s, g) && g[p].reports != g[v].reports) { + DEBUG_PRINTF("%zu bad reports\n", g[p].index); + continue; + } + /* the out-edges of a vertex that may be enabled on the same + * byte as the cyclic can only be killed by the out-edges of a + * peer vertex which will be enabled with the cyclic (a non-peer + * may not be switched on until another byte is processed). */ + if (!v_peer_of_cyclic + && sometimesEnabledConcurrently(u, p, g)) { + DEBUG_PRINTF("%zu can only be squashed by a proper peer\n", + g[p].index); + continue; + } + + if (g[p].char_reach.isSubsetOf(g[v].char_reach)) { + dead.insert(e); + changed = true; + DEBUG_PRINTF("removing edge %zu->%zu\n", g[p].index, + g[s].index); + } else if (is_subset_of(succs(p, g), succs(u, g))) { + if (is_match_vertex(p, g) + && !is_subset_of(g[p].reports, g[v].reports)) { + continue; + } + DEBUG_PRINTF("updating reach on %zu\n", g[p].index); + changed |= (g[p].char_reach & g[v].char_reach).any(); + g[p].char_reach &= ~g[v].char_reach; + } + + } + } + remove_edges(dead, g); + } + + DEBUG_PRINTF("changed %d\n", (int)changed); + return changed; +} + +bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &g, som_type som) { + /* TODO: the reverse form of this is also possible */ + bool changed = false; + for (NFAVertex v : vertices_range(g)) { + if (hasSelfLoop(v, g) && g[v].char_reach.all()) { + changed |= pruneUsingSuccessors(g, v, som); + } + } + + if (changed) { + pruneUseless(g); + clearReports(g); + } + + return changed; +} + } // namespace ue2 diff --git a/src/nfagraph/ng_misc_opt.h b/src/nfagraph/ng_misc_opt.h index 4955c7af..5ed089dc 100644 --- a/src/nfagraph/ng_misc_opt.h +++ b/src/nfagraph/ng_misc_opt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -72,6 +72,13 @@ std::vector reduced_cr(const NGHolder &g, /** Remove cyclic stars connected to start */ bool mergeCyclicDotStars(NGHolder &g); +/** + * Given a cyclic state 'c' with a broad reach and a later state 'v' that is + * only reachable if c is still on, then any edges to a successor of a direct + * successor of c with reach a superset of v are redundant. + */ +bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &h, som_type som); + } // namespace ue2 #endif diff --git a/src/nfagraph/ng_netflow.cpp b/src/nfagraph/ng_netflow.cpp index 9004024f..cff26358 100644 --- a/src/nfagraph/ng_netflow.cpp +++ b/src/nfagraph/ng_netflow.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -92,7 +92,7 @@ void addReverseEdges(NGHolder &g, vector &reverseEdge, if (it == allEdges.end()) { // No reverse edge, add one. NFAVertex u = source(fwd, g), v = target(fwd, g); - NFAEdge rev = add_edge(v, u, g).first; + NFAEdge rev = add_edge(v, u, g); it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first; // Add to capacity map. u32 revIndex = g[rev].index; @@ -111,6 +111,7 @@ static void removeEdgesFromIndex(NGHolder &g, vector &capacityMap, u32 idx) { remove_edge_if([&](const NFAEdge &e) { return g[e].index >= idx; }, g); capacityMap.resize(idx); + renumber_edges(g); } /** A wrapper around boykov_kolmogorov_max_flow, returns the max flow and @@ -142,11 +143,10 @@ u64a getMaxFlow(NGHolder &h, const vector &capacityMap_in, vector distances(numVertices); assert(colorMap.size() == numVertices); - const NFAGraph &g = h.g; - auto v_index_map = get(&NFAGraphVertexProps::index, g); - auto e_index_map = get(&NFAGraphEdgeProps::index, g); + auto v_index_map = get(vertex_index, h); + auto e_index_map = get(edge_index, h); - u64a flow = boykov_kolmogorov_max_flow(g, + u64a flow = boykov_kolmogorov_max_flow(h, make_iterator_property_map(capacityMap.begin(), e_index_map), make_iterator_property_map(edgeResiduals.begin(), e_index_map), make_iterator_property_map(reverseEdges.begin(), e_index_map), @@ -158,7 +158,7 @@ u64a getMaxFlow(NGHolder &h, const vector &capacityMap_in, // Remove reverse edges from graph. removeEdgesFromIndex(h, capacityMap, numRealEdges); - assert(num_edges(h.g) == numRealEdges); + assert(num_edges(h) == numRealEdges); DEBUG_PRINTF("flow = %llu\n", flow); return flow; @@ -190,14 +190,14 @@ vector findMinCut(NGHolder &h, const vector &scores) { if (fromColor != boost::white_color && toColor == boost::white_color) { assert(ec <= INVALID_EDGE_CAP); - DEBUG_PRINTF("found white cut edge %u->%u cap %llu\n", + DEBUG_PRINTF("found white cut edge %zu->%zu cap %llu\n", h[from].index, h[to].index, ec); observed_white_flow += ec; picked_white.push_back(e); } if (fromColor == boost::black_color && toColor != boost::black_color) { assert(ec <= INVALID_EDGE_CAP); - DEBUG_PRINTF("found black cut edge %u->%u cap %llu\n", + DEBUG_PRINTF("found black cut edge %zu->%zu cap %llu\n", h[from].index, h[to].index, ec); observed_black_flow += ec; picked_black.push_back(e); diff --git a/src/nfagraph/ng_prefilter.cpp b/src/nfagraph/ng_prefilter.cpp index 8abc45b3..012b4e8d 100644 --- a/src/nfagraph/ng_prefilter.cpp +++ b/src/nfagraph/ng_prefilter.cpp @@ -220,13 +220,7 @@ void copyInEdges(NGHolder &g, NFAVertex from, NFAVertex to, continue; } - // Check with edge_by_target to cope with predecessors with large - // fan-out. - if (edge_by_target(u, to, g).second) { - continue; - } - - add_edge(u, to, g[e], g); + add_edge_if_not_present(u, to, g[e], g); } } @@ -361,7 +355,7 @@ void reduceRegions(NGHolder &h) { // We may have vertices that have edges to both accept and acceptEod: in // this case, we can optimize for performance by removing the acceptEod // edges. - remove_in_edge_if(h.acceptEod, SourceHasEdgeToAccept(h), h.g); + remove_in_edge_if(h.acceptEod, SourceHasEdgeToAccept(h), h); } void prefilterReductions(NGHolder &h, const CompileContext &cc) { @@ -378,13 +372,13 @@ void prefilterReductions(NGHolder &h, const CompileContext &cc) { DEBUG_PRINTF("before: graph with %zu vertices, %zu edges\n", num_vertices(h), num_edges(h)); - h.renumberVertices(); - h.renumberEdges(); + renumber_vertices(h); + renumber_edges(h); reduceRegions(h); - h.renumberVertices(); - h.renumberEdges(); + renumber_vertices(h); + renumber_edges(h); DEBUG_PRINTF("after: graph with %zu vertices, %zu edges\n", num_vertices(h), num_edges(h)); diff --git a/src/nfagraph/ng_prune.cpp b/src/nfagraph/ng_prune.cpp index 473b9586..88f1880f 100644 --- a/src/nfagraph/ng_prune.cpp +++ b/src/nfagraph/ng_prune.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -57,9 +57,8 @@ namespace ue2 { void pruneUnreachable(NGHolder &g) { deque dead; - if (!hasGreaterInDegree(1, g.acceptEod, g) && - !hasGreaterInDegree(0, g.accept, g) && - edge(g.accept, g.acceptEod, g).second) { + if (in_degree(g.acceptEod, g) == 1 && !in_degree(g.accept, g) + && edge(g.accept, g.acceptEod, g).second) { // Trivial case: there are no in-edges to our accepts (other than // accept->acceptEod), so all non-specials are unreachable. for (auto v : vertices_range(g)) { @@ -70,10 +69,10 @@ void pruneUnreachable(NGHolder &g) { } else { // Walk a reverse graph from acceptEod with Boost's depth_first_visit // call. - typedef reverse_graph RevNFAGraph; - RevNFAGraph revg(g.g); + typedef reverse_graph RevNFAGraph; + RevNFAGraph revg(g); - map colours; + map colours; depth_first_visit(revg, g.acceptEod, make_dfs_visitor(boost::null_visitor()), @@ -104,7 +103,8 @@ void pruneUnreachable(NGHolder &g) { template static -bool pruneForwardUseless(NGHolder &h, const nfag_t &g, NFAVertex s, +bool pruneForwardUseless(NGHolder &h, const nfag_t &g, + typename nfag_t::vertex_descriptor s, vector &vertexColor) { // Begin with all vertices set to white, as DFV only marks visited // vertices. @@ -122,9 +122,9 @@ bool pruneForwardUseless(NGHolder &h, const nfag_t &g, NFAVertex s, for (auto v : vertices_range(g)) { u32 idx = g[v].index; if (!is_special(v, g) && vertexColor[idx] == boost::white_color) { - DEBUG_PRINTF("vertex %u is unreachable from %u\n", + DEBUG_PRINTF("vertex %zu is unreachable from %zu\n", g[v].index, g[s].index); - dead.push_back(v); + dead.push_back(NFAVertex(v)); } } @@ -145,17 +145,17 @@ void pruneUseless(NGHolder &g, bool renumber) { assert(hasCorrectlyNumberedVertices(g)); vector vertexColor(num_vertices(g)); - bool work_done = pruneForwardUseless(g, g.g, g.start, vertexColor); - work_done |= pruneForwardUseless( - g, reverse_graph(g.g), g.acceptEod, vertexColor); + bool work_done = pruneForwardUseless(g, g, g.start, vertexColor); + work_done |= pruneForwardUseless(g, reverse_graph(g), + g.acceptEod, vertexColor); if (!work_done) { return; } if (renumber) { - g.renumberEdges(); - g.renumberVertices(); + renumber_edges(g); + renumber_vertices(g); } } @@ -172,7 +172,7 @@ void pruneEmptyVertices(NGHolder &g) { const CharReach &cr = g[v].char_reach; if (cr.none()) { - DEBUG_PRINTF("empty: %u\n", g[v].index); + DEBUG_PRINTF("empty: %zu\n", g[v].index); dead.push_back(v); } } @@ -234,7 +234,7 @@ bool isDominatedByReporter(const NGHolder &g, // Note: reporters with edges only to acceptEod are not considered to // dominate. if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) { - DEBUG_PRINTF("%u is dominated by %u, and both report %u\n", + DEBUG_PRINTF("%zu is dominated by %zu, and both report %u\n", g[v].index, g[u].index, report_id); return true; } @@ -296,7 +296,7 @@ void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { } - sort(begin(reporters), end(reporters), make_index_ordering(g)); + sort(begin(reporters), end(reporters)); reporters.erase(unique(begin(reporters), end(reporters)), end(reporters)); DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n", @@ -315,14 +315,14 @@ void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { continue; } if (isDominatedByReporter(g, dom, v, report_id)) { - DEBUG_PRINTF("removed dominated report %u from vertex %u\n", + DEBUG_PRINTF("removed dominated report %u from vertex %zu\n", report_id, g[v].index); g[v].reports.erase(report_id); } } if (g[v].reports.empty()) { - DEBUG_PRINTF("removed edges to accepts from %u, no reports left\n", + DEBUG_PRINTF("removed edges to accepts from %zu, no reports left\n", g[v].index); remove_edge(v, g.accept, g); remove_edge(v, g.acceptEod, g); @@ -337,7 +337,7 @@ void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) { remove_edge(v, v, g); modified = true; - DEBUG_PRINTF("removed self-loop on %u\n", g[v].index); + DEBUG_PRINTF("removed self-loop on %zu\n", g[v].index); } } @@ -349,7 +349,7 @@ void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { // We may have only removed self-loops, in which case pruneUseless wouldn't // renumber, so we do edge renumbering explicitly here. - g.renumberEdges(); + renumber_edges(g); } /** Removes the given Report ID from vertices connected to accept, and then @@ -388,8 +388,8 @@ void pruneReport(NGHolder &g, ReportID report) { remove_edges(dead, g); pruneUnreachable(g); - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); } /** Removes all Report IDs bar the given one from vertices connected to accept, @@ -431,8 +431,8 @@ void pruneAllOtherReports(NGHolder &g, ReportID report) { remove_edges(dead, g); pruneUnreachable(g); - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); } } // namespace ue2 diff --git a/src/nfagraph/ng_puff.cpp b/src/nfagraph/ng_puff.cpp index 00b2e8ac..7281471f 100644 --- a/src/nfagraph/ng_puff.cpp +++ b/src/nfagraph/ng_puff.cpp @@ -59,7 +59,7 @@ static size_t countChain(const NGHolder &g, NFAVertex v) { size_t count = 0; while (v) { - DEBUG_PRINTF("counting vertex %u\n", g[v].index); + DEBUG_PRINTF("counting vertex %zu\n", g[v].index); if (is_special(v, g)) { break; } @@ -79,7 +79,7 @@ void wireNewAccepts(NGHolder &g, NFAVertex head, continue; } - DEBUG_PRINTF("adding edge: %u -> accept\n", g[u].index); + DEBUG_PRINTF("adding edge: %zu -> accept\n", g[u].index); assert(!edge(u, g.accept, g).second); assert(!edge(u, g.acceptEod, g).second); add_edge(u, g.accept, g); @@ -136,13 +136,13 @@ bool singleStart(const NGHolder &g) { for (auto v : adjacent_vertices_range(g.start, g)) { if (!is_special(v, g)) { - DEBUG_PRINTF("saw %u\n", g[v].index); + DEBUG_PRINTF("saw %zu\n", g[v].index); seen.insert(v); } } for (auto v : adjacent_vertices_range(g.startDs, g)) { if (!is_special(v, g)) { - DEBUG_PRINTF("saw %u\n", g[v].index); + DEBUG_PRINTF("saw %zu\n", g[v].index); seen.insert(v); } } @@ -158,7 +158,7 @@ bool triggerResetsPuff(const NGHolder &g, NFAVertex head) { for (auto u : inv_adjacent_vertices_range(head, g)) { if (!g[u].char_reach.isSubsetOf(puff_escapes)) { - DEBUG_PRINTF("no reset on trigger %u %u\n", g[u].index, + DEBUG_PRINTF("no reset on trigger %zu %zu\n", g[u].index, g[head].index); return false; } @@ -172,7 +172,7 @@ bool triggerResetsPuff(const NGHolder &g, NFAVertex head) { * */ static bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) { - DEBUG_PRINTF("head = %u\n", g[head].index); + DEBUG_PRINTF("head = %zu\n", g[head].index); const CharReach &puff_cr = g[head].char_reach; @@ -186,14 +186,14 @@ bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) { if (proper_in_degree(head, g) == 1 && puff_cr == g[getSoleSourceVertex(g, head)].char_reach) { head = getSoleSourceVertex(g, head); - DEBUG_PRINTF("temp new head = %u\n", g[head].index); + DEBUG_PRINTF("temp new head = %zu\n", g[head].index); } for (auto s : inv_adjacent_vertices_range(head, g)) { - DEBUG_PRINTF("s = %u\n", g[s].index); + DEBUG_PRINTF("s = %zu\n", g[s].index); if (!puff_cr.isSubsetOf(g[s].char_reach)) { - DEBUG_PRINTF("no flood on trigger %u %u\n", - g[s].index, g[head].index); + DEBUG_PRINTF("no flood on trigger %zu %zu\n", g[s].index, + g[head].index); return false; } @@ -268,7 +268,7 @@ void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, RoseBuild &rose, ReportManager &rm, flat_set &chain_reports, bool prefilter) { DEBUG_PRINTF("constructing Puff for report %u\n", report); - DEBUG_PRINTF("a = %u\n", g[a].index); + DEBUG_PRINTF("a = %zu\n", g[a].index); const Report &puff_report = rm.getReport(report); const bool simple_exhaust = isSimpleExhaustible(puff_report); @@ -349,7 +349,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, } nodes.push_back(a); - DEBUG_PRINTF("vertex %u has in_degree %zu\n", g[a].index, + DEBUG_PRINTF("vertex %zu has in_degree %zu\n", g[a].index, in_degree(a, g)); a = getSoleSourceVertex(g, a); @@ -387,10 +387,10 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, bool auto_restart = false; - DEBUG_PRINTF("a = %u\n", g[a].index); + DEBUG_PRINTF("a = %zu\n", g[a].index); if (nodes.size() < MIN_PUFF_LENGTH || a == g.startDs) { - DEBUG_PRINTF("bad %zu %u\n", nodes.size(), g[a].index); + DEBUG_PRINTF("bad %zu %zu\n", nodes.size(), g[a].index); if (nodes.size() < MIN_PUFF_LENGTH) { return false; } else { diff --git a/src/nfagraph/ng_redundancy.cpp b/src/nfagraph/ng_redundancy.cpp index 26599251..76bc93da 100644 --- a/src/nfagraph/ng_redundancy.cpp +++ b/src/nfagraph/ng_redundancy.cpp @@ -307,16 +307,10 @@ void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap, static bool hasInEdgeTops(const NGHolder &g, NFAVertex v) { - bool exists; - NFAEdge e; - tie(e, exists) = edge_by_target(g.start, v, g); - if (exists && g[e].top != 0) { - return true; - } - return false; + NFAEdge e = edge(g.start, v, g); + return e && !g[e].tops.empty(); } - /** Transform (1), removal of redundant vertices. */ static bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, @@ -348,8 +342,7 @@ bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, } if (info.pred.empty() || info.succ.empty()) { - DEBUG_PRINTF("vertex %u has empty pred/succ list\n", - g[v].index); + DEBUG_PRINTF("vertex %zu has empty pred/succ list\n", g[v].index); assert(0); // non-special states should always have succ/pred lists continue; } @@ -448,7 +441,7 @@ bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, CharReach &otherReach = g[t].char_reach; if (currReach.isSubsetOf(otherReach)) { - DEBUG_PRINTF("removing redundant vertex %u (keeping %u)\n", + DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n", g[v].index, g[t].index); markForRemoval(v, infoMap, removable); changed = true; @@ -539,9 +532,6 @@ bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, continue; } - /* ensure that we look for candidates in the same order */ - sort(intersection.begin(), intersection.end(), make_index_ordering(g)); - const CharReach &currReach = g[v].char_reach; const auto &currReports = g[v].reports; for (auto t : intersection) { @@ -578,8 +568,8 @@ bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, CharReach &otherReach = g[t].char_reach; otherReach |= currReach; // v can be removed - DEBUG_PRINTF("removing redundant vertex %u and merging " - "reachability with vertex %u\n", + DEBUG_PRINTF("removing redundant vertex %zu and merging " + "reachability with vertex %zu\n", g[v].index, g[t].index); markForRemoval(v, infoMap, removable); changed = true; @@ -645,14 +635,14 @@ bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom, } NFAVertex start = source(e, g); - using RevGraph = boost::reverse_graph; + using RevGraph = boost::reverse_graph; map vertexColor; // Walk the graph backwards from v, examining each node. We fail (return // false) if we encounter a node with reach NOT a subset of domReach, and // we stop searching at dom. try { - depth_first_visit(RevGraph(g.g), start, + depth_first_visit(RevGraph(g), start, ReachSubsetVisitor(domReach), make_assoc_property_map(vertexColor), VertexIs(dom)); @@ -674,16 +664,15 @@ bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom, } NFAVertex start = target(e, g); - map vertexColor; + map vertexColor; // Walk the graph forward from v, examining each node. We fail (return // false) if we encounter a node with reach NOT a subset of domReach, and // we stop searching at dom. try { - depth_first_visit(g.g, start, - ReachSubsetVisitor(domReach), + depth_first_visit(g, start, ReachSubsetVisitor(domReach), make_assoc_property_map(vertexColor), - VertexIs(dom)); + VertexIs(dom)); } catch(ReachMismatch&) { return false; } @@ -746,11 +735,10 @@ u32 findCyclic(const NGHolder &g, vector &cyclic) { for (auto v : vertices_range(g)) { assert(g[v].index < cyclic.size()); - bool c = edge(v, v, g).second; - if (c) { + if (hasSelfLoop(v, g)) { count++; + cyclic[g[v].index] = true; } - cyclic[g[v].index] = c; } return count; @@ -775,9 +763,8 @@ void findCyclicDom(NGHolder &g, vector &cyclic, continue; } - DEBUG_PRINTF("vertex %u is dominated by directly-connected cyclic " - "vertex %u\n", g[v].index, - g[dom].index); + DEBUG_PRINTF("vertex %zu is dominated by directly-connected cyclic " + "vertex %zu\n", g[v].index, g[dom].index); // iff all paths through in-edge e of v involve vertices whose // reachability is a subset of reach(dom), we can delete edge e. @@ -787,8 +774,8 @@ void findCyclicDom(NGHolder &g, vector &cyclic, } if (reversePathReachSubset(e, dom, g)) { - DEBUG_PRINTF("edge (%u, %u) can be removed: leading paths " - "share dom reach\n", + DEBUG_PRINTF("edge (%zu, %zu) can be removed: leading " + "paths share dom reach\n", g[source(e, g)].index, g[target(e, g)].index); dead.insert(e); if (source(e, g) == v) { @@ -814,11 +801,9 @@ void findCyclicPostDom(NGHolder &g, vector &cyclic, // Path out through a post-dominator (e.g. a?.+foobar') NFAVertex postdom = postdominators[v]; - if (postdom && cyclic[g[postdom].index] - && edge(v, postdom, g).second) { - DEBUG_PRINTF("vertex %u is postdominated by directly-connected " - "cyclic vertex %u\n", g[v].index, - g[postdom].index); + if (postdom && cyclic[g[postdom].index] && edge(v, postdom, g).second) { + DEBUG_PRINTF("vertex %zu is postdominated by directly-connected " + "cyclic vertex %zu\n", g[v].index, g[postdom].index); // iff all paths through in-edge e of v involve vertices whose // reachability is a subset of reach(dom), we can delete edge e. @@ -828,8 +813,8 @@ void findCyclicPostDom(NGHolder &g, vector &cyclic, } if (forwardPathReachSubset(e, postdom, g)) { - DEBUG_PRINTF("edge (%u, %u) can be removed: trailing paths " - "share postdom reach\n", + DEBUG_PRINTF("edge (%zu, %zu) can be removed: trailing " + "paths share postdom reach\n", g[source(e, g)].index, g[target(e, g)].index); if (target(e, g) == v) { cyclic[g[v].index] = false; @@ -844,7 +829,7 @@ void findCyclicPostDom(NGHolder &g, vector &cyclic, bool removeRedundancy(NGHolder &g, som_type som) { DEBUG_PRINTF("rr som = %d\n", (int)som); - g.renumberVertices(); + renumber_vertices(g); // Cheap check: if all the non-special vertices have in-degree one and // out-degree one, there's no redundancy in this here graph and we can diff --git a/src/nfagraph/ng_region.cpp b/src/nfagraph/ng_region.cpp index 124e9fa5..0ecd7bd6 100644 --- a/src/nfagraph/ng_region.cpp +++ b/src/nfagraph/ng_region.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -71,7 +71,7 @@ using namespace std; namespace ue2 { typedef ue2::unordered_set BackEdgeSet; -typedef boost::filtered_graph> +typedef boost::filtered_graph> AcyclicGraph; namespace { @@ -92,17 +92,17 @@ void checkAndAddExitCandidate(const AcyclicGraph &g, /* find the set of vertices reachable from v which are not in r */ for (auto w : adjacent_vertices_range(v, g)) { - if (!contains(r, w)) { + if (!contains(r, NFAVertex(w))) { if (!open) { - exits->push_back(exit_info(v)); + exits->push_back(exit_info(NFAVertex(v))); open = &exits->back().open; } - open->insert(w); + open->insert(NFAVertex(w)); } } if (open) { - DEBUG_PRINTF("exit %u\n", g[v].index); + DEBUG_PRINTF("exit %zu\n", g[v].index); } } @@ -141,7 +141,7 @@ bool exitValid(UNUSED const AcyclicGraph &g, const vector &exits, return true; } if (exits.size() == 1 && open_jumps.size() == 1) { - DEBUG_PRINTF("oj %u, e %u\n", g[*open_jumps.begin()].index, + DEBUG_PRINTF("oj %zu, e %zu\n", g[*open_jumps.begin()].index, g[exits[0].exit].index); if (*open_jumps.begin() == exits[0].exit) { return true; @@ -190,7 +190,7 @@ void buildInitialCandidate(const AcyclicGraph &g, if (exits->empty()) { DEBUG_PRINTF("odd\n"); candidate->clear(); - DEBUG_PRINTF("adding %u to initial\n", g[*it].index); + DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); candidate->insert(*it); open_jumps->erase(*it); checkAndAddExitCandidate(g, *candidate, *it, exits); @@ -202,7 +202,7 @@ void buildInitialCandidate(const AcyclicGraph &g, candidate->clear(); for (; it != ite; ++it) { - DEBUG_PRINTF("adding %u to initial\n", g[*it].index); + DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); candidate->insert(*it); if (contains(enters, *it)) { break; @@ -231,10 +231,10 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, vector exits; ue2::unordered_set candidate; ue2::unordered_set open_jumps; - DEBUG_PRINTF("adding %u to current\n", g[*t_it].index); + DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); assert(t_it != topo.rend()); candidate.insert(*t_it++); - DEBUG_PRINTF("adding %u to current\n", g[*t_it].index); + DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); assert(t_it != topo.rend()); candidate.insert(*t_it++); findExits(g, candidate, &exits); @@ -257,7 +257,7 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, &open_jumps); } else { NFAVertex curr = *t_it; - DEBUG_PRINTF("adding %u to current\n", g[curr].index); + DEBUG_PRINTF("adding %zu to current\n", g[curr].index); candidate.insert(curr); open_jumps.erase(curr); refineExits(g, candidate, *t_it, &exits); @@ -284,7 +284,7 @@ void mergeUnderBackEdges(const NGHolder &g, const vector &topo, continue; } - DEBUG_PRINTF("merging v = %u(%u), u = %u(%u)\n", g[v].index, rv, + DEBUG_PRINTF("merging v = %zu(%u), u = %zu(%u)\n", g[v].index, rv, g[u].index, ru); assert(rv < ru); @@ -350,8 +350,8 @@ void liftSinks(const AcyclicGraph &acyclic_g, vector &topoOrder) { } if (isLeafNode(v, acyclic_g)) { - DEBUG_PRINTF("sink found %u\n", acyclic_g[v].index); - sinks.insert(v); + DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index); + sinks.insert(NFAVertex(v)); } } @@ -365,18 +365,18 @@ void liftSinks(const AcyclicGraph &acyclic_g, vector &topoOrder) { DEBUG_PRINTF("look\n"); changed = false; for (auto v : vertices_range(acyclic_g)) { - if (is_special(v, acyclic_g) || contains(sinks, v)) { + if (is_special(v, acyclic_g) || contains(sinks, NFAVertex(v))) { continue; } for (auto w : adjacent_vertices_range(v, acyclic_g)) { - if (!contains(sinks, w)) { + if (!contains(sinks, NFAVertex(w))) { goto next; } } - DEBUG_PRINTF("sink found %u\n", acyclic_g[v].index); - sinks.insert(v); + DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index); + sinks.insert(NFAVertex(v)); changed = true; next:; } @@ -387,10 +387,10 @@ void liftSinks(const AcyclicGraph &acyclic_g, vector &topoOrder) { continue; } NFAVertex s = *ri; - DEBUG_PRINTF("handling sink %u\n", acyclic_g[s].index); + DEBUG_PRINTF("handling sink %zu\n", acyclic_g[s].index); ue2::unordered_set parents; for (const auto &e : in_edges_range(s, acyclic_g)) { - parents.insert(source(e, acyclic_g)); + parents.insert(NFAVertex(source(e, acyclic_g))); } /* vertex has no children not reachable on a back edge, bubble the @@ -417,10 +417,9 @@ vector buildTopoOrder(const NGHolder &w, vector &colours) { vector topoOrder; - topological_sort( - acyclic_g, back_inserter(topoOrder), - color_map(make_iterator_property_map( - colours.begin(), get(&NFAGraphVertexProps::index, acyclic_g)))); + topological_sort(acyclic_g, back_inserter(topoOrder), + color_map(make_iterator_property_map(colours.begin(), + get(vertex_index, acyclic_g)))); reorderSpecials(w, acyclic_g, topoOrder); @@ -432,7 +431,7 @@ vector buildTopoOrder(const NGHolder &w, DEBUG_PRINTF("TOPO ORDER\n"); for (auto ri = topoOrder.rbegin(); ri != topoOrder.rend(); ++ri) { - DEBUG_PRINTF("[%u]\n", acyclic_g[*ri].index); + DEBUG_PRINTF("[%zu]\n", acyclic_g[*ri].index); } DEBUG_PRINTF("----------\n"); @@ -448,14 +447,14 @@ ue2::unordered_map assignRegions(const NGHolder &g) { // Build an acyclic graph for this NGHolder. BackEdgeSet deadEdges; - depth_first_search( - g.g, visitor(BackEdges(deadEdges)) - .root_vertex(g.start) - .color_map(make_iterator_property_map( - colours.begin(), get(&NFAGraphVertexProps::index, g.g)))); + depth_first_search(g, + visitor(BackEdges(deadEdges)) + .root_vertex(g.start) + .color_map(make_iterator_property_map(colours.begin(), + get(vertex_index, g)))); - AcyclicFilter af(&deadEdges); - AcyclicGraph acyclic_g(g.g, af); + auto af = make_bad_edge_filter(&deadEdges); + AcyclicGraph acyclic_g(g, af); // Build a (reverse) topological ordering. vector topoOrder = buildTopoOrder(g, acyclic_g, colours); diff --git a/src/nfagraph/ng_region.h b/src/nfagraph/ng_region.h index 464a6838..a56933dc 100644 --- a/src/nfagraph/ng_region.h +++ b/src/nfagraph/ng_region.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -181,7 +181,7 @@ bool isOptionalRegion(const Graph &g, NFAVertex v, const ue2::unordered_map ®ion_map) { assert(isRegionEntry(g, v, region_map)); - DEBUG_PRINTF("check if r%u is optional (inspecting v%u)\n", + DEBUG_PRINTF("check if r%u is optional (inspecting v%zu)\n", region_map.at(v), g[v].index); // Region zero is never optional. @@ -198,12 +198,12 @@ bool isOptionalRegion(const Graph &g, NFAVertex v, if (inSameRegion(g, v, u, region_map)) { continue; } - DEBUG_PRINTF(" searching from u=%u\n", g[u].index); + DEBUG_PRINTF(" searching from u=%zu\n", g[u].index); assert(inEarlierRegion(g, v, u, region_map)); for (auto w : adjacent_vertices_range(u, g)) { - DEBUG_PRINTF(" searching to w=%u\n", g[w].index); + DEBUG_PRINTF(" searching to w=%zu\n", g[w].index); if (inLaterRegion(g, v, w, region_map)) { return true; } diff --git a/src/nfagraph/ng_region_redundancy.cpp b/src/nfagraph/ng_region_redundancy.cpp index 5cd266dc..264e4312 100644 --- a/src/nfagraph/ng_region_redundancy.cpp +++ b/src/nfagraph/ng_region_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -90,7 +90,7 @@ void processCyclicStateForward(NGHolder &h, NFAVertex cyc, CharReach cr = h[cyc].char_reach; auto reports = h[cyc].reports; - DEBUG_PRINTF("going forward from %u/%u\n", h[cyc].index, + DEBUG_PRINTF("going forward from %zu/%u\n", h[cyc].index, region); map::const_iterator it; @@ -98,7 +98,7 @@ void processCyclicStateForward(NGHolder &h, NFAVertex cyc, NFAVertex v = it->second.entry; const CharReach ®ion_cr = it->second.cr; assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); - DEBUG_PRINTF("checking %u\n", h[v].index); + DEBUG_PRINTF("checking %zu\n", h[v].index); if (!region_cr.isSubsetOf(cr)) { DEBUG_PRINTF("doesn't cover the reach of region %u\n", region); @@ -107,8 +107,8 @@ void processCyclicStateForward(NGHolder &h, NFAVertex cyc, if (isOptionalRegion(h, v, region_map) && !regionHasUnexpectedAccept(h, region, reports, region_map)) { - DEBUG_PRINTF("cyclic state %u leads to optional region leader %u\n", - h[cyc].index, h[v].index); + DEBUG_PRINTF("cyclic state %zu leads to optional region leader" + " %zu\n", h[cyc].index, h[v].index); deadRegions.insert(region); } else if (isSingletonRegion(h, v, region_map)) { /* we can use this region as straw and suck in optional regions on @@ -136,14 +136,14 @@ void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, CharReach cr = h[cyc].char_reach; auto reports = h[cyc].reports; - DEBUG_PRINTF("going back from %u/%u\n", h[cyc].index, region); + DEBUG_PRINTF("going back from %zu/%u\n", h[cyc].index, region); map::const_iterator it; while ((it = info.find(--region)) != info.end()) { NFAVertex v = it->second.entry; const CharReach ®ion_cr = it->second.cr; assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); - DEBUG_PRINTF("checking %u\n", h[v].index); + DEBUG_PRINTF("checking %zu\n", h[v].index); if (!region_cr.isSubsetOf(cr)) { DEBUG_PRINTF("doesn't cover the reach of region %u\n", region); @@ -152,7 +152,7 @@ void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, if (isOptionalRegion(h, v, region_map) && !regionHasUnexpectedAccept(h, region, reports, region_map)) { - DEBUG_PRINTF("cyclic state %u trails optional region leader %u\n", + DEBUG_PRINTF("cyclic state %zu trails optional region leader %zu\n", h[cyc].index, h[v].index); deadRegions.insert(region); } else if (isSingletonRegion(h, v, region_map)) { diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index bc7e73d3..a16e2715 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -61,6 +61,8 @@ #include using namespace std; +using boost::depth_first_search; +using boost::depth_first_visit; namespace ue2 { @@ -99,7 +101,7 @@ struct ReachFilter { const Graph *g = nullptr; }; -typedef boost::filtered_graph > RepeatGraph; +typedef boost::filtered_graph> RepeatGraph; struct ReachSubgraph { vector vertices; @@ -126,9 +128,11 @@ void findInitDepths(const NGHolder &g, } } -template static -void buildTopoOrder(const Graph &g, vector &topoOrder) { +vector buildTopoOrder(const RepeatGraph &g) { + /* Note: RepeatGraph is a filtered version of NGHolder and still has + * NFAVertex as its vertex descriptor */ + typedef ue2::unordered_set EdgeSet; EdgeSet deadEdges; @@ -138,13 +142,15 @@ void buildTopoOrder(const Graph &g, vector &topoOrder) { depth_first_search(g, visitor(BackEdges(deadEdges)). color_map(make_assoc_property_map(colours))); - AcyclicFilter af(&deadEdges); - boost::filtered_graph > acyclic_g(g, af); + auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&deadEdges)); + vector topoOrder; topological_sort(acyclic_g, back_inserter(topoOrder), color_map(make_assoc_property_map(colours))); reverse(topoOrder.begin(), topoOrder.end()); + + return topoOrder; } static @@ -172,7 +178,7 @@ bool roguePredecessor(const NGHolder &g, NFAVertex v, continue; } if (!contains(pred, u)) { - DEBUG_PRINTF("%u is a rogue pred\n", g[u].index); + DEBUG_PRINTF("%zu is a rogue pred\n", g[u].index); return true; } @@ -198,7 +204,7 @@ bool rogueSuccessor(const NGHolder &g, NFAVertex v, } if (!contains(succ, w)) { - DEBUG_PRINTF("%u is a rogue succ\n", g[w].index); + DEBUG_PRINTF("%zu is a rogue succ\n", g[w].index); return true; } @@ -215,8 +221,8 @@ bool rogueSuccessor(const NGHolder &g, NFAVertex v, static bool hasDifferentTops(const NGHolder &g, const vector &verts) { - bool found = false; - u32 top = 0; + /* TODO: check that we need this now that we allow multiple tops */ + const flat_set *tops = nullptr; for (auto v : verts) { for (const auto &e : in_edges_range(v, g)) { @@ -224,17 +230,12 @@ bool hasDifferentTops(const NGHolder &g, const vector &verts) { if (u != g.start && u != g.startDs) { continue; // Only edges from starts have valid top properties. } - u32 t = g[e].top; - DEBUG_PRINTF("edge (%u,%u) with top %u\n", g[u].index, - g[v].index, t); - assert(t < NFA_MAX_TOP_MASKS); - if (!found) { - found = true; - top = t; - } else { - if (t != top) { - return true; // More than one top. - } + DEBUG_PRINTF("edge (%zu,%zu) with %zu tops\n", g[u].index, + g[v].index, g[e].tops.size()); + if (!tops) { + tops = &g[e].tops; + } else if (g[e].tops != *tops) { + return true; // More than one set of tops. } } } @@ -249,14 +250,14 @@ bool vertexIsBad(const NGHolder &g, NFAVertex v, const ue2::unordered_set &pred, const ue2::unordered_set &succ, const flat_set &reports) { - DEBUG_PRINTF("check vertex %u\n", g[v].index); + DEBUG_PRINTF("check vertex %zu\n", g[v].index); // We must drop any vertex that is the target of a back-edge within // our subgraph. The tail set contains all vertices that are after v in a // topo ordering. for (auto u : inv_adjacent_vertices_range(v, g)) { if (contains(tail, u)) { - DEBUG_PRINTF("back-edge (%u,%u) in subgraph found\n", + DEBUG_PRINTF("back-edge (%zu,%zu) in subgraph found\n", g[u].index, g[v].index); return true; } @@ -266,18 +267,18 @@ bool vertexIsBad(const NGHolder &g, NFAVertex v, // edges from *all* the vertices in pred and no other external entries. // Similarly for exits. if (roguePredecessor(g, v, involved, pred)) { - DEBUG_PRINTF("preds for %u not well-formed\n", g[v].index); + DEBUG_PRINTF("preds for %zu not well-formed\n", g[v].index); return true; } if (rogueSuccessor(g, v, involved, succ)) { - DEBUG_PRINTF("succs for %u not well-formed\n", g[v].index); + DEBUG_PRINTF("succs for %zu not well-formed\n", g[v].index); return true; } // All reporting vertices should have the same reports. if (is_match_vertex(v, g) && reports != g[v].reports) { - DEBUG_PRINTF("report mismatch to %u\n", g[v].index); + DEBUG_PRINTF("report mismatch to %zu\n", g[v].index); return true; } @@ -297,8 +298,7 @@ void splitSubgraph(const NGHolder &g, const deque &verts, NFAUndirectedGraph ug; ue2::unordered_map old2new; - ue2::unordered_map newIdx2old; - createUnGraph(verts_g.g, true, true, ug, old2new, newIdx2old); + createUnGraph(verts_g, true, true, ug, old2new); ue2::unordered_map repeatMap; @@ -523,7 +523,7 @@ bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi, if (u == first) { continue; // no self-loops } - DEBUG_PRINTF("pred vertex %u\n", g[u].index); + DEBUG_PRINTF("pred vertex %zu\n", g[u].index); dist[u].insert(0); } @@ -625,7 +625,7 @@ void buildTugTrigger(NGHolder &g, NFAVertex cyclic, NFAVertex v, vector &tugs) { if (allPredsInSubgraph(v, g, involved)) { // We can transform this vertex into a tug trigger in-place. - DEBUG_PRINTF("all preds in subgraph, vertex %u becomes tug\n", + DEBUG_PRINTF("all preds in subgraph, vertex %zu becomes tug\n", g[v].index); add_edge(cyclic, v, g); tugs.push_back(v); @@ -637,7 +637,7 @@ void buildTugTrigger(NGHolder &g, NFAVertex cyclic, NFAVertex v, NFAVertex t = clone_vertex(g, v); depths[t] = depths[v]; - DEBUG_PRINTF("there are other paths, cloned tug %u from vertex %u\n", + DEBUG_PRINTF("there are other paths, cloned tug %zu from vertex %zu\n", g[t].index, g[v].index); tugs.push_back(t); @@ -654,7 +654,7 @@ NFAVertex createCyclic(NGHolder &g, ReachSubgraph &rsi) { NFAVertex cyclic = clone_vertex(g, last); add_edge(cyclic, cyclic, g); - DEBUG_PRINTF("created cyclic vertex %u\n", g[cyclic].index); + DEBUG_PRINTF("created cyclic vertex %zu\n", g[cyclic].index); return cyclic; } @@ -665,7 +665,7 @@ NFAVertex createPos(NGHolder &g, ReachSubgraph &rsi) { g[pos].char_reach = g[first].char_reach; - DEBUG_PRINTF("created pos vertex %u\n", g[pos].index); + DEBUG_PRINTF("created pos vertex %zu\n", g[pos].index); return pos; } @@ -711,7 +711,7 @@ void unpeelNearEnd(NGHolder &g, ReachSubgraph &rsi, NFAVertex d = clone_vertex(g, last); depths[d] = depths[last]; - DEBUG_PRINTF("created vertex %u\n", g[d].index); + DEBUG_PRINTF("created vertex %zu\n", g[d].index); for (auto v : *succs) { add_edge(d, v, g); @@ -952,7 +952,7 @@ bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, zap = it; break; } else { - DEBUG_PRINTF("%u is involved in another repeat\n", g[*it].index); + DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index); } } DEBUG_PRINTF("peeling %zu vertices from front\n", @@ -969,7 +969,7 @@ bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, zap = it.base(); // Note: erases everything after it. break; } else { - DEBUG_PRINTF("%u is involved in another repeat\n", g[*it].index); + DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index); } } DEBUG_PRINTF("peeling %zu vertices from back\n", @@ -980,7 +980,7 @@ bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, // no-no. for (auto v : rsi.vertices) { if (contains(created, v)) { - DEBUG_PRINTF("vertex %u is in another repeat\n", g[v].index); + DEBUG_PRINTF("vertex %zu is in another repeat\n", g[v].index); return false; } } @@ -1003,7 +1003,7 @@ void peelStartDotStar(const NGHolder &g, NFAVertex first = rsi.vertices.front(); if (depths.at(first).fromStartDotStar.min == depth(1)) { - DEBUG_PRINTF("peeling start front vertex %u\n", g[first].index); + DEBUG_PRINTF("peeling start front vertex %zu\n", g[first].index); rsi.vertices.erase(rsi.vertices.begin()); reprocessSubgraph(g, grey, rsi); } @@ -1012,8 +1012,8 @@ void peelStartDotStar(const NGHolder &g, static void buildReachSubgraphs(const NGHolder &g, vector &rs, const u32 minNumVertices) { - const ReachFilter fil(&g.g); - const RepeatGraph rg(g.g, fil); + const ReachFilter fil(&g); + const RepeatGraph rg(g, fil); if (!isCompBigEnough(rg, minNumVertices)) { DEBUG_PRINTF("component not big enough, bailing\n"); @@ -1021,19 +1021,17 @@ void buildReachSubgraphs(const NGHolder &g, vector &rs, } NFAUndirectedGraph ug; - ue2::unordered_map old2new; - ue2::unordered_map newIdx2old; - createUnGraph(rg, true, true, ug, old2new, newIdx2old); + unordered_map old2new; + createUnGraph(rg, true, true, ug, old2new); - ue2::unordered_map repeatMap; + unordered_map repeatMap; unsigned int num; num = connected_components(ug, make_assoc_property_map(repeatMap)); DEBUG_PRINTF("found %u connected repeat components\n", num); // Now, we build a set of topo-ordered ReachSubgraphs. - vector topoOrder; - buildTopoOrder(rg, topoOrder); + vector topoOrder = buildTopoOrder(rg); rs.resize(num); @@ -1084,7 +1082,7 @@ bool entered_at_fixed_offset(NFAVertex v, const NGHolder &g, if (is_triggered(g) && !contains(reached_by_fixed_tops, v)) { /* can't do this for infix/suffixes unless we know trigger literals * can only occur at one offset */ - DEBUG_PRINTF("bad top(s) for %u\n", g[v].index); + DEBUG_PRINTF("bad top(s) for %zu\n", g[v].index); return false; } @@ -1104,8 +1102,8 @@ bool entered_at_fixed_offset(NFAVertex v, const NGHolder &g, for (auto u : inv_adjacent_vertices_range(v, g)) { const depth &u_max_depth = depths.at(u).fromStart.max; - DEBUG_PRINTF("pred %u max depth %s from start\n", - g[u].index, u_max_depth.str().c_str()); + DEBUG_PRINTF("pred %zu max depth %s from start\n", g[u].index, + u_max_depth.str().c_str()); if (u_max_depth != first - depth(1)) { return false; } @@ -1123,12 +1121,12 @@ NFAVertex buildTriggerStates(NGHolder &g, const vector &trigger, g[v].char_reach = cr; add_edge(u, v, g); if (u == g.start) { - g[edge(u, v, g).first].top = top; + g[edge(u, v, g)].tops.insert(top); } u = v; } - DEBUG_PRINTF("trigger len=%zu has sink %u\n", trigger.size(), g[u].index); + DEBUG_PRINTF("trigger len=%zu has sink %zu\n", trigger.size(), g[u].index); return u; } @@ -1153,18 +1151,21 @@ void addTriggers(NGHolder &g, continue; } - const auto &top = g[e].top; + const auto &tops = g[e].tops; // The caller may not have given us complete trigger information. If we // don't have any triggers for a particular top, we should just leave // it alone. - if (!contains(triggers, top)) { - DEBUG_PRINTF("no triggers for top %u\n", top); - continue; - } + for (u32 top : tops) { + if (!contains(triggers, top)) { + DEBUG_PRINTF("no triggers for top %u\n", top); + goto next_edge; + } - starts_by_top[top].push_back(v); + starts_by_top[top].push_back(v); + } dead.push_back(e); + next_edge:; } remove_edges(dead, g); @@ -1255,7 +1256,7 @@ void buildRepeatGraph(NGHolder &rg, if (is_triggered(rg)) { // Add vertices for all our triggers addTriggers(rg, triggers); - rg.renumberVertices(); + renumber_vertices(rg); // We don't know anything about how often this graph is triggered, so we // make the start vertex cyclic for the purposes of this analysis ONLY. @@ -1277,30 +1278,26 @@ void buildInputGraph(NGHolder &lhs, ue2::unordered_map &lhs_map, const NGHolder &g, const NFAVertex first, const map>> &triggers) { - DEBUG_PRINTF("building lhs with first=%u\n", g[first].index); + DEBUG_PRINTF("building lhs with first=%zu\n", g[first].index); cloneHolder(lhs, g, &lhs_map); assert(g.kind == lhs.kind); addTriggers(lhs, triggers); - lhs.renumberVertices(); + renumber_vertices(lhs); // Replace each back-edge (u,v) with an edge (startDs,v), which will // generate entries at at least the rate of the loop created by that // back-edge. set dead; BackEdges > backEdgeVisitor(dead); - depth_first_search( - lhs.g, visitor(backEdgeVisitor) - .root_vertex(lhs.start) - .vertex_index_map(get(&NFAGraphVertexProps::index, lhs.g))); + depth_first_search(lhs, visitor(backEdgeVisitor).root_vertex(lhs.start)); for (const auto &e : dead) { const NFAVertex u = source(e, lhs), v = target(e, lhs); if (u == v) { continue; // Self-loops are OK. } - DEBUG_PRINTF("replacing back-edge (%u,%u) with edge (startDs,%u)\n", - lhs[u].index, lhs[v].index, - lhs[v].index); + DEBUG_PRINTF("replacing back-edge (%zu,%zu) with edge (startDs,%zu)\n", + lhs[u].index, lhs[v].index, lhs[v].index); add_edge_if_not_present(lhs.startDs, v, lhs); remove_edge(e, lhs); @@ -1387,13 +1384,13 @@ bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi, for (const auto &v : rsi.vertices) { assert(!is_special(v, g)); // no specials in repeats assert(contains(rg_map, v)); - DEBUG_PRINTF("rg vertex %u in repeat\n", rg[rg_map.at(v)].index); + DEBUG_PRINTF("rg vertex %zu in repeat\n", rg[rg_map.at(v)].index); region_map.emplace(rg_map.at(v), repeat_region); } for (const auto &v : vertices_range(rg)) { if (!contains(region_map, v)) { - DEBUG_PRINTF("rg vertex %u in lhs (trigger)\n", rg[v].index); + DEBUG_PRINTF("rg vertex %zu in lhs (trigger)\n", rg[v].index); region_map.emplace(v, lhs_region); } } @@ -1435,7 +1432,7 @@ struct StrawWalker { if (next == v) { // Ignore self loop. ++ai; if (ai == ae) { - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } next = *ai; } @@ -1450,7 +1447,7 @@ struct StrawWalker { succs.erase(v); for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) { next = *ai; - DEBUG_PRINTF("checking %u\n", g[next].index); + DEBUG_PRINTF("checking %zu\n", g[next].index); if (next == v) { continue; } @@ -1471,32 +1468,31 @@ struct StrawWalker { return next; } DEBUG_PRINTF("bailing\n"); - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } return next; } NFAVertex walk(NFAVertex v, vector &straw) const { - DEBUG_PRINTF("walk from %u\n", g[v].index); + DEBUG_PRINTF("walk from %zu\n", g[v].index); ue2::unordered_set visited; straw.clear(); while (!is_special(v, g)) { - DEBUG_PRINTF("checking %u\n", g[v].index); + DEBUG_PRINTF("checking %zu\n", g[v].index); NFAVertex next = step(v); - if (next == NFAGraph::null_vertex()) { + if (next == NGHolder::null_vertex()) { break; } if (!visited.insert(next).second) { - DEBUG_PRINTF("already visited %u, bailing\n", - g[next].index); + DEBUG_PRINTF("already visited %zu, bailing\n", g[next].index); break; /* don't want to get stuck in any complicated loops */ } const CharReach &reach_v = g[v].char_reach; const CharReach &reach_next = g[next].char_reach; if (!reach_v.isSubsetOf(reach_next)) { - DEBUG_PRINTF("%u's reach is not a superset of %u's\n", + DEBUG_PRINTF("%zu's reach is not a superset of %zu's\n", g[next].index, g[v].index); break; } @@ -1504,7 +1500,7 @@ struct StrawWalker { // If this is cyclic with the right reach, we're done. Note that // startDs fulfils this requirement. if (hasSelfLoop(next, g) && !isBoundedRepeatCyclic(next)) { - DEBUG_PRINTF("found cyclic %u\n", g[next].index); + DEBUG_PRINTF("found cyclic %zu\n", g[next].index); return next; } @@ -1513,7 +1509,7 @@ struct StrawWalker { } straw.clear(); - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } private: @@ -1528,8 +1524,8 @@ static NFAVertex walkStrawToCyclicRev(const NGHolder &g, NFAVertex v, const vector &all_repeats, vector &straw) { - typedef boost::reverse_graph RevGraph; - const RevGraph revg(g.g); + typedef boost::reverse_graph RevGraph; + const RevGraph revg(g); auto cyclic = StrawWalker(g, revg, all_repeats).walk(v, straw); reverse(begin(straw), end(straw)); // path comes from cyclic @@ -1540,7 +1536,7 @@ static NFAVertex walkStrawToCyclicFwd(const NGHolder &g, NFAVertex v, const vector &all_repeats, vector &straw) { - return StrawWalker(g, g.g, all_repeats).walk(v, straw); + return StrawWalker(g, g, all_repeats).walk(v, straw); } /** True if entries to this subgraph must pass through a cyclic state with @@ -1556,7 +1552,7 @@ bool hasCyclicSupersetEntryPath(const NGHolder &g, const ReachSubgraph &rsi, // until we encounter our cyclic, all of which must have superset reach. vector straw; return walkStrawToCyclicRev(g, rsi.vertices.front(), all_repeats, straw) != - NFAGraph::null_vertex(); + NGHolder::null_vertex(); } static @@ -1564,7 +1560,7 @@ bool hasCyclicSupersetExitPath(const NGHolder &g, const ReachSubgraph &rsi, const vector &all_repeats) { vector straw; return walkStrawToCyclicFwd(g, rsi.vertices.back(), all_repeats, straw) != - NFAGraph::null_vertex(); + NGHolder::null_vertex(); } static @@ -1847,7 +1843,7 @@ void buildFeeder(NGHolder &g, const BoundedRepeatData &rd, add_edge(u, feeder, g); } - DEBUG_PRINTF("added feeder %u\n", g[feeder].index); + DEBUG_PRINTF("added feeder %zu\n", g[feeder].index); } else { // No neg trigger means feeder is empty, and unnecessary. assert(g[rd.pos_trigger].char_reach.all()); @@ -1895,13 +1891,13 @@ bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd, // This transformation is only safe if the straw path from startDs that // we've discovered can *only* lead to this repeat, since we're going to // remove the self-loop on startDs. - if (hasGreaterOutDegree(2, g.startDs, g)) { + if (proper_out_degree(g.startDs, g) > 1) { DEBUG_PRINTF("startDs has other successors\n"); return false; } for (const auto &v : straw) { if (proper_out_degree(v, g) != 1) { - DEBUG_PRINTF("branch between startDs and repeat, from vertex %u\n", + DEBUG_PRINTF("branch between startDs and repeat, from vertex %zu\n", g[v].index); return false; } @@ -2071,8 +2067,8 @@ public: const depth &our_depth_in) : top_depths(top_depths_in), our_depth(our_depth_in) {} - void discover_vertex(NFAVertex v, UNUSED const NFAGraph &g) { - DEBUG_PRINTF("discovered %u (depth %s)\n", g[v].index, + void discover_vertex(NFAVertex v, UNUSED const NGHolder &g) { + DEBUG_PRINTF("discovered %zu (depth %s)\n", g[v].index, our_depth.str().c_str()); auto it = top_depths.find(v); @@ -2105,28 +2101,39 @@ void populateFixedTopInfo(const map &fixed_depth_tops, if (v == g.startDs) { continue; } - u32 top = g[e].top; + depth td = depth::infinity(); - if (contains(fixed_depth_tops, top)) { - td = fixed_depth_tops.at(top); + for (u32 top : g[e].tops) { + if (!contains(fixed_depth_tops, top)) { + td = depth::infinity(); + break; + } + depth td_t = fixed_depth_tops.at(top); + if (td == td_t) { + continue; + } else if (td == depth::infinity()) { + td = td_t; + } else { + td = depth::infinity(); + break; + } } - DEBUG_PRINTF("scanning from %u top=%u depth=%s\n", - g[v].index, top, td.str().c_str()); + DEBUG_PRINTF("scanning from %zu depth=%s\n", g[v].index, + td.str().c_str()); /* for each vertex reachable from v update its map to reflect that it is * reachable from a top of depth td. */ - depth_first_visit( - g.g, v, pfti_visitor(top_depths, td), - make_iterator_property_map(colours.begin(), - get(&NFAGraphVertexProps::index, g.g))); + depth_first_visit(g, v, pfti_visitor(top_depths, td), + make_iterator_property_map(colours.begin(), + get(vertex_index, g))); } for (const auto &v_depth : top_depths) { const NFAVertex v = v_depth.first; const depth &d = v_depth.second; if (d.is_finite()) { - DEBUG_PRINTF("%u reached by fixed tops at depth %s\n", + DEBUG_PRINTF("%zu reached by fixed tops at depth %s\n", g[v].index, d.str().c_str()); reached_by_fixed_tops->insert(v); } @@ -2143,19 +2150,16 @@ bool hasOverlappingRepeats(UNUSED const NGHolder &g, for (const auto &br : repeats) { if (contains(involved, br.cyclic)) { - DEBUG_PRINTF("already seen cyclic %u\n", - g[br.cyclic].index); + DEBUG_PRINTF("already seen cyclic %zu\n", g[br.cyclic].index); return true; } if (contains(involved, br.pos_trigger)) { - DEBUG_PRINTF("already seen pos %u\n", - g[br.pos_trigger].index); + DEBUG_PRINTF("already seen pos %zu\n", g[br.pos_trigger].index); return true; } for (auto v : br.tug_triggers) { if (contains(involved, v)) { - DEBUG_PRINTF("already seen tug %u\n", - g[v].index); + DEBUG_PRINTF("already seen tug %zu\n", g[v].index); return true; } } @@ -2301,7 +2305,7 @@ void analyseRepeats(NGHolder &g, const ReportManager *rm, // Go to town on the remaining acceptable subgraphs. ue2::unordered_set created; for (auto &rsi : rs) { - DEBUG_PRINTF("subgraph (beginning vertex %u) is a {%s,%s} repeat\n", + DEBUG_PRINTF("subgraph (beginning vertex %zu) is a {%s,%s} repeat\n", g[rsi.vertices.front()].index, rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str()); @@ -2334,7 +2338,7 @@ void analyseRepeats(NGHolder &g, const ReportManager *rm, // Some of our analyses require correctly numbered vertices, so we // renumber after changes. - g.renumberVertices(); + renumber_vertices(g); } bool modified_start_ds = false; @@ -2375,8 +2379,8 @@ void analyseRepeats(NGHolder &g, const ReportManager *rm, // We have modified the graph, so we need to ensure that our edges // and vertices are correctly numbered. - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); // Remove stray report IDs. clearReports(g); } @@ -2415,20 +2419,20 @@ bool isPureRepeat(const NGHolder &g, PureRepeat &repeat) { // Must be start anchored. assert(edge(g.startDs, g.startDs, g).second); - if (hasGreaterOutDegree(1, g.startDs, g)) { + if (out_degree(g.startDs, g) > 1) { DEBUG_PRINTF("Unanchored\n"); return false; } // Must not be EOD-anchored. assert(edge(g.accept, g.acceptEod, g).second); - if (hasGreaterInDegree(1, g.acceptEod, g)) { + if (in_degree(g.acceptEod, g) > 1) { DEBUG_PRINTF("EOD anchored\n"); return false; } // Must have precisely one top. - if (!onlyOneTop(g)) { + if (is_triggered(g) && !onlyOneTop(g)) { DEBUG_PRINTF("Too many tops\n"); return false; } diff --git a/src/nfagraph/ng_restructuring.cpp b/src/nfagraph/ng_restructuring.cpp index 09abf775..32cdac23 100644 --- a/src/nfagraph/ng_restructuring.cpp +++ b/src/nfagraph/ng_restructuring.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -49,37 +49,71 @@ namespace ue2 { /** Connect the start vertex to each of the vertices in \p tops. This is useful * temporarily for when we need to run a graph algorithm that expects a single * source vertex. */ -void wireStartToTops(NGHolder &g, const map &tops, - vector &topEdges) { - for (const auto &top : tops) { - NFAVertex v = top.second; +static +void wireStartToTops(NGHolder &g, const flat_set &tops, + vector &tempEdges) { + for (NFAVertex v : tops) { assert(!isLeafNode(v, g)); - const NFAEdge &e = add_edge(g.start, v, g).first; - topEdges.push_back(e); + const NFAEdge &e = add_edge(g.start, v, g); + tempEdges.push_back(e); } } +/** + * Returns true if start's successors (aside from startDs) are subset of + * startDs's proper successors or if start has no successors other than startDs. + */ static -void getStateOrdering(NGHolder &g, const map &tops, +bool startIsRedundant(const NGHolder &g) { + /* We ignore startDs as the self-loop may have been stripped as an + * optimisation for repeats (improveLeadingRepeats()). */ + set start; + insert(&start, adjacent_vertices_range(g.start, g)); + start.erase(g.startDs); + + // Trivial case: start has no successors other than startDs. + if (start.empty()) { + DEBUG_PRINTF("start has no out-edges other than to startDs\n"); + return true; + } + + set startDs; + insert(&startDs, adjacent_vertices_range(g.startDs, g)); + startDs.erase(g.startDs); + + if (!is_subset_of(start, startDs)) { + DEBUG_PRINTF("out-edges of start and startDs aren't equivalent\n"); + return false; + } + + return true; +} + +static +void getStateOrdering(NGHolder &g, const flat_set &tops, vector &ordering) { // First, wire up our "tops" to start so that we have a single source, // which will give a nicer topo order. - vector topEdges; - wireStartToTops(g, tops, topEdges); + vector tempEdges; + wireStartToTops(g, tops, tempEdges); - renumberGraphVertices(g); + renumber_vertices(g); vector temp = getTopoOrdering(g); - remove_edges(topEdges, g); + remove_edges(tempEdges, g); // Move {start, startDs} to the end, so they'll be first when we reverse - // the ordering. + // the ordering (if they are required). temp.erase(remove(temp.begin(), temp.end(), g.startDs)); temp.erase(remove(temp.begin(), temp.end(), g.start)); - temp.push_back(g.startDs); - temp.push_back(g.start); + if (proper_out_degree(g.startDs, g)) { + temp.push_back(g.startDs); + } + if (!startIsRedundant(g)) { + temp.push_back(g.start); + } // Walk ordering, remove vertices that shouldn't be participating in state // numbering, such as accepts. @@ -106,7 +140,7 @@ getStateIndices(const NGHolder &h, const vector &ordering) { u32 stateNum = 0; for (auto v : ordering) { - DEBUG_PRINTF("assigning state num %u to vertex %u\n", stateNum, + DEBUG_PRINTF("assigning state num %u to vertex %zu\n", stateNum, h[v].index); states[v] = stateNum++; } @@ -149,16 +183,15 @@ void optimiseTightLoops(const NGHolder &g, vector &ordering) { continue; } - DEBUG_PRINTF("moving vertex %u next to %u\n", - g[v].index, g[u].index); + DEBUG_PRINTF("moving vertex %zu next to %zu\n", g[v].index, g[u].index); ordering.erase(v_it); ordering.insert(++u_it, v); } } -ue2::unordered_map -numberStates(NGHolder &h, const map &tops) { +unordered_map +numberStates(NGHolder &h, const flat_set &tops) { DEBUG_PRINTF("numbering states for holder %p\n", &h); vector ordering; @@ -166,14 +199,10 @@ numberStates(NGHolder &h, const map &tops) { optimiseTightLoops(h, ordering); - ue2::unordered_map states = getStateIndices(h, ordering); - - return states; + return getStateIndices(h, ordering); } -u32 countStates(const NGHolder &g, - const ue2::unordered_map &state_ids, - bool addTops) { +u32 countStates(const unordered_map &state_ids) { if (state_ids.empty()) { return 0; } @@ -184,168 +213,9 @@ u32 countStates(const NGHolder &g, max_state = max(m.second, max_state); } } - u32 num_states = max_state + 1; - assert(contains(state_ids, g.start)); - if (addTops && state_ids.at(g.start) != NO_STATE) { - num_states--; - set tops; - for (auto e : out_edges_range(g.start, g)) { - tops.insert(g[e].top); - } - num_states += tops.size(); - } - return num_states; } -/** - * Returns true if start leads to all of startDs's proper successors or if - * start has no successors other than startDs. - */ -static -bool startIsRedundant(const NGHolder &g) { - set start, startDs; - - for (const auto &e : out_edges_range(g.start, g)) { - NFAVertex v = target(e, g); - if (v == g.startDs) { - continue; - } - start.insert(v); - } - - for (const auto &e : out_edges_range(g.startDs, g)) { - NFAVertex v = target(e, g); - if (v == g.startDs) { - continue; - } - startDs.insert(v); - } - - // Trivial case: start has no successors other than startDs. - if (start.empty()) { - DEBUG_PRINTF("start has no out-edges other than to startDs\n"); - return true; - } - - if (start != startDs) { - DEBUG_PRINTF("out-edges of start and startDs aren't equivalent\n"); - return false; - } - - return true; -} - -/** One final, FINAL optimisation. Drop either start or startDs if it's unused - * in this graph. We leave this until this late because having both vertices in - * the graph, with fixed state indices, is useful for merging and other - * analyses. */ -void dropUnusedStarts(NGHolder &g, ue2::unordered_map &states) { - u32 adj = 0; - - if (startIsRedundant(g)) { - DEBUG_PRINTF("dropping unused start\n"); - states[g.start] = NO_STATE; - adj++; - } - - if (proper_out_degree(g.startDs, g) == 0) { - DEBUG_PRINTF("dropping unused startDs\n"); - states[g.startDs] = NO_STATE; - adj++; - } - - if (!adj) { - DEBUG_PRINTF("both start and startDs must remain\n"); - return; - } - - // We have removed one or both of the starts. Walk the non-special vertices - // in the graph with state indices assigned to them and subtract - // adj from all of them. - for (auto v : vertices_range(g)) { - u32 &state = states[v]; // note ref - if (state == NO_STATE) { - continue; - } - if (is_any_start(v, g)) { - assert(state <= 1); - state = 0; // one start remains - } else { - assert(!is_special(v, g)); - assert(state >= adj); - state -= adj; - } - } -} - -flat_set findUnusedStates(const NGHolder &g) { - flat_set dead; - if (startIsRedundant(g)) { - dead.insert(g.start); - } - if (proper_out_degree(g.startDs, g) == 0) { - dead.insert(g.startDs); - } - return dead; -} - -/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to - * accepts. */ -void reverseHolder(const NGHolder &g_in, NGHolder &g) { - // Make the BGL do the grunt work. - ue2::unordered_map vertexMap; - boost::transpose_graph(g_in.g, g.g, - orig_to_copy(boost::make_assoc_property_map(vertexMap)). - vertex_index_map(get(&NFAGraphVertexProps::index, g_in.g))); - - // The transpose_graph operation will have created extra copies of our - // specials. We have to rewire their neighbours to the 'real' specials and - // delete them. - NFAVertex start = vertexMap[g_in.acceptEod]; - NFAVertex startDs = vertexMap[g_in.accept]; - NFAVertex accept = vertexMap[g_in.startDs]; - NFAVertex acceptEod = vertexMap[g_in.start]; - - // Successors of starts. - for (const auto &e : out_edges_range(start, g)) { - NFAVertex v = target(e, g); - add_edge(g.start, v, g[e], g); - } - for (const auto &e : out_edges_range(startDs, g)) { - NFAVertex v = target(e, g); - add_edge(g.startDs, v, g[e], g); - } - - // Predecessors of accepts. - for (const auto &e : in_edges_range(accept, g)) { - NFAVertex u = source(e, g); - add_edge(u, g.accept, g[e], g); - } - for (const auto &e : in_edges_range(acceptEod, g)) { - NFAVertex u = source(e, g); - add_edge(u, g.acceptEod, g[e], g); - } - - // Remove our impostors. - clear_vertex(start, g); - remove_vertex(start, g); - clear_vertex(startDs, g); - remove_vertex(startDs, g); - clear_vertex(accept, g); - remove_vertex(accept, g); - clear_vertex(acceptEod, g); - remove_vertex(acceptEod, g); - - // Renumber so that g's properties (number of vertices, edges) are - // accurate. - g.renumberVertices(); - g.renumberEdges(); - - assert(num_vertices(g) == num_vertices(g_in)); - assert(num_edges(g) == num_edges(g_in)); -} - } // namespace ue2 diff --git a/src/nfagraph/ng_restructuring.h b/src/nfagraph/ng_restructuring.h index 5e244bf6..bbd478d5 100644 --- a/src/nfagraph/ng_restructuring.h +++ b/src/nfagraph/ng_restructuring.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,23 +37,8 @@ #include "ue2common.h" #include "util/ue2_containers.h" -#include -#include - namespace ue2 { -class NGHolder; - -/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to - * accepts. */ -void reverseHolder(const NGHolder &g, NGHolder &out); - -/** Connect the start vertex to each of the vertices in \p tops. This is useful - * temporarily for when we need to run a graph algorithm that expects a single - * source vertex. */ -void wireStartToTops(NGHolder &g, const std::map &tops, - std::vector &topEdges); - /** * \brief Special state index value meaning that the vertex will not * participate in an (NFA/DFA/etc) implementation. @@ -63,30 +48,14 @@ static constexpr u32 NO_STATE = ~0; /** * \brief Gives each participating vertex in the graph a unique state index. */ -ue2::unordered_map -numberStates(NGHolder &h, - const std::map &tops = std::map{}); +unordered_map +numberStates(NGHolder &h, const flat_set &tops); /** * \brief Counts the number of states (vertices with state indices) in the * graph. - * - * If addTops is true, also accounts for states that will be constructed for - * each unique top. */ -u32 countStates(const NGHolder &g, - const ue2::unordered_map &state_ids, - bool addTops = true); - -/** Optimisation: drop unnecessary start states. */ -void dropUnusedStarts(NGHolder &g, ue2::unordered_map &states); - -/** - * \brief Returns a set of vertices that will not participate in an - * implementation (NFA, DFA etc) of this graph. For example, starts with no - * successors. - */ -flat_set findUnusedStates(const NGHolder &g); +u32 countStates(const unordered_map &state_ids); } // namespace ue2 diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp index 137ac5cc..7066ab27 100644 --- a/src/nfagraph/ng_rose.cpp +++ b/src/nfagraph/ng_rose.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -68,8 +68,6 @@ #include #include #include -#include -#include #define NDEBUG_PRINTF(x, ...) \ do { if (0) { DEBUG_PRINTF(x, ## __VA_ARGS__); } } while (0) @@ -540,7 +538,7 @@ void getRegionRoseLiterals(const NGHolder &g, DEBUG_PRINTF("inspecting region %u\n", region); set s; for (auto v : vv) { - DEBUG_PRINTF(" exit vertex: %u\n", g[v].index); + DEBUG_PRINTF(" exit vertex: %zu\n", g[v].index); /* Note: RHS can not be depended on to take all subsequent revisits * to this vertex */ set ss = getLiteralSet(g, v, false); @@ -575,8 +573,7 @@ void gatherBackEdges(const NGHolder &g, ue2::unordered_map> *out) { set backEdges; BackEdges> be(backEdges); - depth_first_search(g.g, visitor(be).root_vertex(g.start).vertex_index_map( - get(&NFAGraphVertexProps::index, g.g))); + depth_first_search(g, visitor(be).root_vertex(g.start)); for (const auto &e : backEdges) { (*out)[source(e, g)].push_back(target(e, g)); @@ -759,7 +756,7 @@ unique_ptr LitCollection::pickNext() { unique_ptr rv = move(lits.back()); lits.pop_back(); poisonCandidates(*rv); - DEBUG_PRINTF("best is '%s' %u a%d t%d\n", + DEBUG_PRINTF("best is '%s' %zu a%d t%d\n", dumpString(*(rv->lit.begin())).c_str(), g[rv->vv.front()].index, (int)createsAnchoredLHS(g, rv->vv, depths, grey), @@ -811,6 +808,7 @@ bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) { u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, u32 max_delay, bool overhang_ok) { + assert(isCorrectlyTopped(g)); if (max_delay == MO_INVALID_IDX) { max_delay--; } @@ -864,8 +862,6 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, assert(delay <= lit.length()); DEBUG_PRINTF("managed delay %u (of max %u)\n", delay, max_delay); - // For determinism, we make sure that we create these edges from vertices - // in index-sorted order. set pred; for (auto v : curr) { insert(&pred, inv_adjacent_vertices_range(v, g)); @@ -874,16 +870,17 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, clear_in_edges(g.accept, g); clearReports(g); - vector verts(pred.begin(), pred.end()); - sort(verts.begin(), verts.end(), VertexIndexOrdering(g)); - - for (auto v : verts) { - add_edge(v, g.accept, g); + for (auto v : pred) { + NFAEdge e = add_edge(v, g.accept, g); g[v].reports.insert(0); + if (is_triggered(g) && v == g.start) { + g[e].tops.insert(DEFAULT_TOP); + } } pruneUseless(g); assert(allMatchStatesHaveReports(g)); + assert(isCorrectlyTopped(g)); DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g)); return delay; @@ -892,6 +889,7 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, u32 delay, const vector &preds) { assert(delay <= lit.length()); + assert(isCorrectlyTopped(g)); DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay); NFAVertex prev = g.accept; @@ -906,7 +904,10 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, } for (auto v : preds) { - add_edge(v, prev, g); + NFAEdge e = add_edge(v, prev, g); + if (v == g.start && is_triggered(g)) { + g[e].tops.insert(DEFAULT_TOP); + } } // Every predecessor of accept must have a report. @@ -914,9 +915,10 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, g[u].reports.insert(0); } - g.renumberVertices(); - g.renumberEdges(); + renumber_vertices(g); + renumber_edges(g); assert(allMatchStatesHaveReports(g)); + assert(isCorrectlyTopped(g)); } void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, @@ -1144,7 +1146,7 @@ void deanchorIfNeeded(NGHolder &g, bool *orig_anch) { succ_g.erase(g.startDs); for (auto v : adjacent_vertices_range(g.start, g)) { - DEBUG_PRINTF("inspecting cand %u || =%zu\n", g[v].index, + DEBUG_PRINTF("inspecting cand %zu || =%zu\n", g[v].index, g[v].char_reach.size()); if (v == g.startDs || !g[v].char_reach.all()) { @@ -1162,7 +1164,7 @@ void deanchorIfNeeded(NGHolder &g, bool *orig_anch) { } clear_vertex(v, g); remove_vertex(v, g); - g.renumberVertices(); + renumber_vertices(g); return; } @@ -1693,7 +1695,7 @@ void splitEdgesByCut(RoseInGraph &ig, const vector &to_cut, /* TODO need to update v_mapping (if we were doing more cuts) */ } - DEBUG_PRINTF("splitting on pivot %u\n", h[pivot].index); + DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); ue2::unordered_map temp_map; shared_ptr new_lhs = make_shared(); splitLHS(h, pivot, new_lhs.get(), &temp_map); @@ -1766,8 +1768,8 @@ bool doNetflowCut(RoseInGraph &ig, const vector &to_cut, return false; } - h.renumberVertices(); - h.renumberEdges(); + renumber_vertices(h); + renumber_edges(h); /* Step 1: Get scores for all edges */ vector scores = scoreEdges(h); /* scores by edge_index */ /* Step 2: poison scores for edges covered by successor literal */ @@ -2366,12 +2368,17 @@ void makeNocaseWithPrefixMask(RoseInGraph &g, RoseInVertex v) { h[ds].char_reach = CharReach::dot(); - add_edge(h.start, ds, h); + NFAEdge e_start_to_ds = add_edge(h.start, ds, h); add_edge(ds, ds, h); add_edge(ds, h.accept, h); h[h.start].reports.insert(0); h[ds].reports.insert(0); + + if (g[u].type == RIV_LITERAL) { + h[e_start_to_ds].tops.insert(DEFAULT_TOP); + } } else { + assert(g[u].type == RIV_ANCHORED_START); add_edge(h.start, h.accept, h); h[h.start].reports.insert(0); } @@ -2406,14 +2413,14 @@ void explodeLiteral(RoseInGraph &g, RoseInVertex v, g[v_new].s = lit; for (const auto &e : in_edges_range(v, g)) { - RoseInEdge e2 = add_edge(source(e, g), v_new, g[e], g).first; + RoseInEdge e2 = add_edge(source(e, g), v_new, g[e], g); // FIXME: are we safe to share graphs here? For now, make our very // own copy. g[e2].graph = makeGraphCopy(g[e].graph.get()); } for (const auto &e : out_edges_range(v, g)) { - RoseInEdge e2 = add_edge(v_new, target(e, g), g[e], g).first; + RoseInEdge e2 = add_edge(v_new, target(e, g), g[e], g); // FIXME: are we safe to share graphs here? For now, make our very // own copy. g[e2].graph = makeGraphCopy(g[e].graph.get()); @@ -2565,7 +2572,7 @@ bool followedByStar(const vector &vv, const NGHolder &g) { static bool isEodPrefixCandidate(const NGHolder &g) { - if (hasGreaterInDegree(0, g.accept, g)) { + if (in_degree(g.accept, g)) { DEBUG_PRINTF("graph isn't eod anchored\n"); return false; } @@ -2636,7 +2643,7 @@ void processEodPrefixes(RoseInGraph &g) { } // TODO: handle cases with multiple out-edges. - if (hasGreaterOutDegree(1, source(e, g), g)) { + if (out_degree(source(e, g), g) > 1) { continue; } @@ -2663,7 +2670,7 @@ void processEodPrefixes(RoseInGraph &g) { } for (auto v : accepts) { - if (!hasGreaterInDegree(0, v, g)) { + if (!in_degree(v, g)) { remove_vertex(v, g); } } @@ -2805,6 +2812,7 @@ unique_ptr buildRose(const NGHolder &h, bool desperation, dumpPreRoseGraph(ig, cc.grey); + renumber_vertices(ig); calcVertexOffsets(ig); return igp; } @@ -2821,6 +2829,7 @@ void desperationImprove(RoseInGraph &ig, const CompileContext &cc) { handleLongMixedSensitivityLiterals(ig); dedupe(ig); pruneUseless(ig); + renumber_vertices(ig); calcVertexOffsets(ig); } @@ -2831,8 +2840,7 @@ bool splitOffRose(RoseBuild &rose, const NGHolder &h, bool prefilter, } // We should have at least one edge into accept or acceptEod! - assert(hasGreaterInDegree(0, h.accept, h) || - hasGreaterInDegree(1, h.acceptEod, h)); + assert(in_degree(h.accept, h) || in_degree(h.acceptEod, h) > 1); unique_ptr igp = buildRose(h, false, cc); if (igp && rose.addRose(*igp, prefilter)) { @@ -2924,6 +2932,7 @@ bool finalChanceRose(RoseBuild &rose, const NGHolder &h, bool prefilter, add_edge(v, a, RoseInEdgeProps(rhs, 0U), ig); } + renumber_vertices(ig); calcVertexOffsets(ig); return rose.addRose(ig, prefilter, true /* final chance */); @@ -2936,8 +2945,7 @@ bool checkRose(const ReportManager &rm, const NGHolder &h, bool prefilter, } // We should have at least one edge into accept or acceptEod! - assert(hasGreaterInDegree(0, h.accept, h) || - hasGreaterInDegree(1, h.acceptEod, h)); + assert(in_degree(h.accept, h) || in_degree(h.acceptEod, h) > 1); unique_ptr igp; diff --git a/src/nfagraph/ng_small_literal_set.cpp b/src/nfagraph/ng_small_literal_set.cpp index b5867bb9..1d7be65b 100644 --- a/src/nfagraph/ng_small_literal_set.cpp +++ b/src/nfagraph/ng_small_literal_set.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -125,7 +125,7 @@ bool findLiterals(const NGHolder &g, set &out = built[g[v].index]; read_count[g[v].index] = out_degree(v, g); - DEBUG_PRINTF("setting read_count to %zu for %u\n", + DEBUG_PRINTF("setting read_count to %zu for %zu\n", read_count[g[v].index], g[v].index); assert(out.empty()); @@ -154,7 +154,7 @@ bool findLiterals(const NGHolder &g, } set &in = built[g[u].index]; - DEBUG_PRINTF("getting from %u (%zu reads to go)\n", + DEBUG_PRINTF("getting from %zu (%zu reads to go)\n", g[u].index, read_count[g[u].index]); assert(!in.empty()); assert(read_count[g[u].index]); @@ -188,7 +188,7 @@ bool findLiterals(const NGHolder &g, read_count[g[u].index]--; if (!read_count[g[u].index]) { - DEBUG_PRINTF("clearing %u as finished reading\n", g[u].index); + DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index); in.clear(); } } diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index ed2942bb..f6ba0fa7 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -40,7 +40,6 @@ #include "ng_redundancy.h" #include "ng_region.h" #include "ng_reports.h" -#include "ng_restructuring.h" #include "ng_rose.h" #include "ng_som.h" #include "ng_som_add_redundancy.h" @@ -111,7 +110,7 @@ bool regionCanEstablishSom(const NGHolder &g, DEBUG_PRINTF("region %u\n", region); for (UNUSED auto v : r_exits) { - DEBUG_PRINTF(" exit %u\n", g[v].index); + DEBUG_PRINTF(" exit %zu\n", g[v].index); } /* simple if each region exit is at fixed distance from SOM. Note SOM does @@ -120,12 +119,12 @@ bool regionCanEstablishSom(const NGHolder &g, assert(regions.at(v) == region); const DepthMinMax &d = depths.at(g[v].index); if (d.min != d.max) { - DEBUG_PRINTF("failing %u as %s != %s\n", g[v].index, + DEBUG_PRINTF("failing %zu as %s != %s\n", g[v].index, d.min.str().c_str(), d.max.str().c_str()); return false; } } - DEBUG_PRINTF("region %u/%u is good\n", regions.at(r_exits[0]), + DEBUG_PRINTF("region %u/%zu is good\n", regions.at(r_exits[0]), g[r_exits[0]].index); return true; @@ -179,10 +178,7 @@ void buildRegionMapping(const NGHolder &g, set be; BackEdges > backEdgeVisitor(be); - depth_first_search( - g.g, visitor(backEdgeVisitor) - .root_vertex(g.start) - .vertex_index_map(get(&NFAGraphVertexProps::index, g.g))); + boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); for (const auto &e : be) { NFAVertex u = source(e, g); @@ -209,17 +205,17 @@ void buildRegionMapping(const NGHolder &g, r_i.optional ? " (optional)" : ""); DEBUG_PRINTF(" enters:"); for (u32 i = 0; i < r_i.enters.size(); i++) { - printf(" %u", g[r_i.enters[i]].index); + printf(" %zu", g[r_i.enters[i]].index); } printf("\n"); DEBUG_PRINTF(" exits:"); for (u32 i = 0; i < r_i.exits.size(); i++) { - printf(" %u", g[r_i.exits[i]].index); + printf(" %zu", g[r_i.exits[i]].index); } printf("\n"); DEBUG_PRINTF(" all:"); for (u32 i = 0; i < r_i.full.size(); i++) { - printf(" %u", g[r_i.full[i]].index); + printf(" %zu", g[r_i.full[i]].index); } printf("\n"); } @@ -236,8 +232,7 @@ bool validateXSL(const NGHolder &g, u32 v_region = regions.at(v); if (!is_special(v, g) && v_region > region && (escapes & g[v].char_reach).any()) { - DEBUG_PRINTF("problem with escapes for %u\n", - g[v].index); + DEBUG_PRINTF("problem with escapes for %zu\n", g[v].index); first_bad_region = MIN(first_bad_region, v_region); } } @@ -403,7 +398,7 @@ makePrefix(const NGHolder &g, const ue2::unordered_map ®ions, vector to_clear; assert(contains(lhs_map, curr_exits.front())); NFAVertex p_u = lhs_map[curr_exits.front()]; - DEBUG_PRINTF("p_u: %u\n", prefix[p_u].index); + DEBUG_PRINTF("p_u: %zu\n", prefix[p_u].index); for (auto p_v : adjacent_vertices_range(p_u, prefix)) { auto v = rev_map.at(p_v); if (p_v == prefix.accept || regions.at(v) < dead_region) { @@ -413,7 +408,7 @@ makePrefix(const NGHolder &g, const ue2::unordered_map ®ions, } for (auto v : to_clear) { - DEBUG_PRINTF("clearing in_edges on %u\n", prefix[v].index); + DEBUG_PRINTF("clearing in_edges on %zu\n", prefix[v].index); clear_in_edges(v, prefix); } @@ -576,7 +571,7 @@ void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g, ir.somDistance = param; ReportID rep = rm.getInternalId(ir); - DEBUG_PRINTF("vertex %u, replacing report %u with %u (type %u)\n", + DEBUG_PRINTF("vertex %zu, replacing report %u with %u (type %u)\n", g[v].index, report_id, rep, ir_type); r_new.insert(rep); } @@ -690,31 +685,26 @@ void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, map::const_iterator picked) { /* NOTE: This is appropriate for firstMatchIsFirst */ DEBUG_PRINTF("prepping for lock check\n"); + NGHolder &midfix = *out; - add_edge(midfix.startDs, midfix.accept, midfix); map v_map; v_map[g.start] = midfix.start; v_map[g.startDs] = midfix.startDs; - map::const_iterator jt = picked; - /* include the lock region */ - assert(jt != info.end()); - ++jt; - assert(!jt->second.dag); - assert(jt->second.full.size() == 1); + assert(picked != info.end()); + auto graph_last = next(picked); - for (; ; --jt) { + assert(!graph_last->second.dag); + assert(graph_last->second.full.size() == 1); + + for (auto jt = graph_last; ; --jt) { DEBUG_PRINTF("adding r %u to midfix\n", jt->first); - if (!jt->second.optional) { - clear_out_edges(midfix.startDs, midfix); - add_edge(midfix.startDs, midfix.startDs, midfix); - } /* add all vertices in region, create mapping */ for (auto v : jt->second.full) { - DEBUG_PRINTF("adding v %u to midfix\n", g[v].index); + DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); if (contains(v_map, v)) { continue; } @@ -746,20 +736,33 @@ void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, } } - /* add edges from startds to enters */ + if (jt == info.begin()) { + break; + } + } + + /* add edges from startds to the enters of all the initial optional + * regions and the first mandatory region. */ + for (auto jt = info.begin(); ; ++jt) { for (auto enter : jt->second.enters) { assert(contains(v_map, enter)); NFAVertex v = v_map[enter]; add_edge_if_not_present(midfix.startDs, v, midfix); } - if (jt == info.begin()) { + if (!jt->second.optional) { + break; + } + + if (jt == graph_last) { + /* all regions are optional - add a direct edge to accept */ + add_edge_if_not_present(midfix.startDs, midfix.accept, midfix); break; } } assert(in_degree(midfix.accept, midfix)); - midfix.renumberVertices(); + renumber_vertices(midfix); } static @@ -786,7 +789,7 @@ void fillRoughMidfix(NGHolder *out, const NGHolder &g, /* add all vertices in region, create mapping */ for (auto v : jt->second.full) { - DEBUG_PRINTF("adding v %u to midfix\n", g[v].index); + DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); NFAVertex vnew = add_vertex(g[v], midfix); v_map[v] = vnew; } @@ -826,7 +829,7 @@ void fillRoughMidfix(NGHolder *out, const NGHolder &g, do { for (auto v : jt->second.exits) { - DEBUG_PRINTF("adding v %u to midfix\n", g[v].index); + DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index); NFAVertex vnew = add_vertex(g[v], midfix); v_map[v] = vnew; @@ -1013,8 +1016,7 @@ bool addPlan(vector &plan, u32 parent) { // Fetches all preds of {accept, acceptEod} for this graph. static void addReporterVertices(const NGHolder &g, vector &reporters) { - // Order reporter vertices by index for determinism. - set > tmp(g); + set tmp; insert(&tmp, inv_adjacent_vertices(g.accept, g)); insert(&tmp, inv_adjacent_vertices(g.acceptEod, g)); tmp.erase(g.accept); @@ -1022,7 +1024,7 @@ void addReporterVertices(const NGHolder &g, vector &reporters) { #ifdef DEBUG DEBUG_PRINTF("add reporters:"); for (UNUSED auto v : tmp) { - printf(" %u", g[v].index); + printf(" %zu", g[v].index); } printf("\n"); #endif @@ -1036,7 +1038,7 @@ void addReporterVertices(const region_info &r, const NGHolder &g, vector &reporters) { for (auto v : r.exits) { if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { - DEBUG_PRINTF("add reporter %u\n", g[v].index); + DEBUG_PRINTF("add reporter %zu\n", g[v].index); reporters.push_back(v); } } @@ -1049,7 +1051,7 @@ void addMappedReporterVertices(const region_info &r, const NGHolder &g, vector &reporters) { for (auto v : r.exits) { if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { - DEBUG_PRINTF("adding v=%u\n", g[v].index); + DEBUG_PRINTF("adding v=%zu\n", g[v].index); ue2::unordered_map::const_iterator it = mapping.find(v); assert(it != mapping.end()); @@ -1106,7 +1108,7 @@ void expandGraph(NGHolder &g, ue2::unordered_map ®ions, } for (auto enter : enters) { - DEBUG_PRINTF("processing enter %u\n", g[enter].index); + DEBUG_PRINTF("processing enter %zu\n", g[enter].index); map orig_to_copy; // Make a copy of all of the tail vertices, storing region info along @@ -1156,7 +1158,7 @@ void expandGraph(NGHolder &g, ue2::unordered_map ®ions, [&](const NFAEdge &e) { NFAVertex u = source(e, g); return regions.at(u) < split_region; - }, g.g); + }, g); } new_enters.push_back(orig_to_copy[enter]); @@ -1328,7 +1330,7 @@ bool doTreePlanning(NGHolder &g, dumpHolder(g, g_regions, 14, "som_expandedtree", grey); for (auto v : enters) { - DEBUG_PRINTF("enter %u\n", g[v].index); + DEBUG_PRINTF("enter %zu\n", g[v].index); // For this entry vertex, construct a version of the graph without the // other entries in this region (g_path), and calculate its depths and @@ -1563,12 +1565,12 @@ void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p, p.is_reset, p.parent); printf(" reporters:"); for (auto v : p.reporters) { - printf(" %u", g[v].index); + printf(" %zu", g[v].index); } printf("\n"); printf(" reporters_in:"); for (auto v : p.reporters_in) { - printf(" %u", g[v].index); + printf(" %zu", g[v].index); } printf("\n"); #endif @@ -1634,7 +1636,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, /* create prefix to set the som_loc */ if (!plan.front().no_implement) { - plan.front().prefix->renumberVertices(); + renumber_vertices(*plan.front().prefix); assert(plan.front().prefix->kind == NFA_OUTFIX); if (!ng.addHolder(*plan.front().prefix)) { throw CompileError(w.expressionIndex, "Pattern is too large."); @@ -1746,7 +1748,7 @@ aligned_unique_ptr makeBareSomRevNfa(const NGHolder &g, setZeroReports(g_rev); // Prep for actual construction. - g_rev.renumberVertices(); + renumber_vertices(g_rev); g_rev.kind = NFA_REV_PREFIX; reduceGraphEquivalences(g_rev, cc); removeRedundancy(g_rev, SOM_NONE); @@ -1786,7 +1788,7 @@ bool makeSomRevNfa(vector &som_nfas, const NGHolder &g, return true; } - g2.renumberVertices(); // for findMinWidth, findMaxWidth. + renumber_vertices(g2); // for findMinWidth, findMaxWidth. aligned_unique_ptr nfa = makeBareSomRevNfa(g2, cc); if (!nfa) { @@ -2221,7 +2223,7 @@ bool leadingLiterals(const NGHolder &g, set *lits, for (const auto &m : curr) { const NFAVertex u = m.first; const vector &base = m.second; - DEBUG_PRINTF("expanding from %u\n", g[u].index); + DEBUG_PRINTF("expanding from %zu\n", g[u].index); for (auto v : adjacent_vertices_range(u, g)) { if (v == g.startDs) { continue; @@ -2234,8 +2236,7 @@ bool leadingLiterals(const NGHolder &g, set *lits, DEBUG_PRINTF("match\n"); goto skip_to_next_terminal; } - if (g[v].char_reach.count() - > 2 * MAX_LEADING_LITERALS) { + if (g[v].char_reach.count() > 2 * MAX_LEADING_LITERALS) { DEBUG_PRINTF("wide\n"); goto skip_to_next_terminal; } @@ -2251,8 +2252,8 @@ bool leadingLiterals(const NGHolder &g, set *lits, CharReach cr = g[v].char_reach; vector &out = next[v]; - DEBUG_PRINTF("expanding to %u (|| = %zu)\n", - g[v].index, cr.count()); + DEBUG_PRINTF("expanding to %zu (|| = %zu)\n", g[v].index, + cr.count()); for (size_t c = cr.find_first(); c != CharReach::npos; c = cr.find_next(c)) { bool nocase = ourisalpha(c) && cr.test(mytoupper(c)) @@ -2328,7 +2329,7 @@ bool splitOffLeadingLiterals(const NGHolder &g, set *lit_out, set adj_term1; insert(&adj_term1, adjacent_vertices(*terms.begin(), g)); for (auto v : terms) { - DEBUG_PRINTF("term %u\n", g[v].index); + DEBUG_PRINTF("term %zu\n", g[v].index); set temp; insert(&temp, adjacent_vertices(v, g)); if (temp != adj_term1) { @@ -2355,7 +2356,7 @@ void findBestLiteral(const NGHolder &g, buildRegionMapping(g, regions, info, false); ue2_literal best; - NFAVertex best_v = nullptr; + NFAVertex best_v = NGHolder::null_vertex(); map::const_iterator lit = info.begin(); while (1) { @@ -2391,7 +2392,7 @@ bool splitOffBestLiteral(const NGHolder &g, const ue2::unordered_map ®ions, ue2_literal *lit_out, NGHolder *lhs, NGHolder *rhs, const CompileContext &cc) { - NFAVertex v = nullptr; + NFAVertex v = NGHolder::null_vertex(); findBestLiteral(g, regions, lit_out, &v, cc); if (lit_out->empty()) { @@ -2405,7 +2406,7 @@ bool splitOffBestLiteral(const NGHolder &g, splitGraph(g, v, lhs, &lhs_map, rhs, &rhs_map); - DEBUG_PRINTF("v = %u\n", g[v].index); + DEBUG_PRINTF("v = %zu\n", g[v].index); return true; } @@ -2625,7 +2626,7 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g, } } else { DEBUG_PRINTF("has start->accept edge\n"); - if (hasGreaterInDegree(1, g.acceptEod, g)) { + if (in_degree(g.acceptEod, g) > 1) { DEBUG_PRINTF("also has a path to EOD\n"); return false; } @@ -2826,7 +2827,7 @@ map::const_iterator tryForLaterRevNfaCut(const NGHolder &g, reverseHolder(*prefix, g_rev); anchorStarts(g_rev); - g_rev.renumberVertices(); + renumber_vertices(g_rev); g_rev.kind = NFA_REV_PREFIX; reduceGraphEquivalences(g_rev, cc); removeRedundancy(g_rev, SOM_NONE); @@ -2870,7 +2871,7 @@ unique_ptr makePrefixForChain(NGHolder &g, } depths->clear(); /* renumbering invalidates depths */ - prefix->renumberVertices(); + renumber_vertices(*prefix); DEBUG_PRINTF("done\n"); return prefix; @@ -2886,8 +2887,7 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, // Special case: if g is completely anchored or begins with a dot-star, we // know that we have an absolute SOM of zero all the time. - assert(edge(g.startDs, g.startDs, g).second); - if (!hasGreaterOutDegree(1, g.startDs, g) || beginsWithDotStar(g)) { + if (!proper_out_degree(g.startDs, g) || beginsWithDotStar(g)) { makeSomAbsReports(rm, g, g.accept); makeSomAbsReports(rm, g, g.acceptEod); return SOMBE_HANDLED_INTERNAL; @@ -3004,7 +3004,7 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc); updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); } - prefix->renumberVertices(); + renumber_vertices(*prefix); if (!ng.addHolder(*prefix)) { DEBUG_PRINTF("failed to add holder\n"); clear_graph(g); diff --git a/src/nfagraph/ng_som_add_redundancy.cpp b/src/nfagraph/ng_som_add_redundancy.cpp index 924cfad1..33544ec1 100644 --- a/src/nfagraph/ng_som_add_redundancy.cpp +++ b/src/nfagraph/ng_som_add_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -155,13 +155,13 @@ bool addSomRedundancy(NGHolder &g, vector &depths) { if (is_special(v, g)) { continue; } - if (!hasGreaterInDegree(0, v, g)) { + if (!in_degree(v, g)) { continue; // unreachable, probably killed } const DepthMinMax &d = getDepth(v, g, depths); - DEBUG_PRINTF("vertex %u has depths %s\n", g[v].index, + DEBUG_PRINTF("vertex %zu has depths %s\n", g[v].index, d.str().c_str()); if (d.min == d.max) { diff --git a/src/nfagraph/ng_som_util.cpp b/src/nfagraph/ng_som_util.cpp index 676fb523..c4337341 100644 --- a/src/nfagraph/ng_som_util.cpp +++ b/src/nfagraph/ng_som_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -76,7 +76,7 @@ vector getDistancesFromSOM(const NGHolder &g_orig) { clear_in_edges(v, g); } - //dumpGraph("som_depth.dot", g.g); + //dumpGraph("som_depth.dot", g); vector temp_depths; // numbered by vertex index in g calcDepthsFrom(g, g.start, temp_depths); @@ -143,7 +143,7 @@ bool firstMatchIsFirst(const NGHolder &p) { for (auto v : vertices_range(p)) { assert(!is_virtual_start(v, p)); if (!is_special(v, p)) { - DEBUG_PRINTF("turning on %u\n", p[v].index); + DEBUG_PRINTF("turning on %zu\n", p[v].index); states.insert(v); } } @@ -154,9 +154,9 @@ bool firstMatchIsFirst(const NGHolder &p) { for (auto v : states) { /* need to check if this vertex may represent an infix match - ie * it does not have an edge to accept. */ - DEBUG_PRINTF("check %u\n", p[v].index); + DEBUG_PRINTF("check %zu\n", p[v].index); if (!edge(v, p.accept, p).second) { - DEBUG_PRINTF("fail %u\n", p[v].index); + DEBUG_PRINTF("fail %zu\n", p[v].index); return false; } } @@ -186,14 +186,11 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, return cache.smgb[u]; } - DEBUG_PRINTF("checking if som can go backwards on %u\n", g[u].index); + DEBUG_PRINTF("checking if som can go backwards on %zu\n", g[u].index); set be; BackEdges> backEdgeVisitor(be); - depth_first_search( - g.g, visitor(backEdgeVisitor) - .root_vertex(g.start) - .vertex_index_map(get(&NFAGraphVertexProps::index, g.g))); + boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); bool rv; if (0) { @@ -210,8 +207,7 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, NFAVertex s = source(e, g); NFAVertex t = target(e, g); /* only need to worry about big cycles including/before u */ - DEBUG_PRINTF("back edge %u %u\n", g[s].index, - g[t].index); + DEBUG_PRINTF("back edge %zu %zu\n", g[s].index, g[t].index); if (s != t && region_map.at(s) <= u_region) { DEBUG_PRINTF("eek big cycle\n"); rv = true; /* big cycle -> eek */ @@ -268,13 +264,13 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, pruneUseless(c_g); be.clear(); - depth_first_search(c_g.g, visitor(backEdgeVisitor).root_vertex(c_g.start). - vertex_index_map(get(&NFAGraphVertexProps::index, c_g.g))); + boost::depth_first_search(c_g, visitor(backEdgeVisitor) + .root_vertex(c_g.start)); for (const auto &e : be) { NFAVertex s = source(e, c_g); NFAVertex t = target(e, c_g); - DEBUG_PRINTF("back edge %u %u\n", c_g[s].index, c_g[t].index); + DEBUG_PRINTF("back edge %zu %zu\n", c_g[s].index, c_g[t].index); if (s != t) { assert(0); DEBUG_PRINTF("eek big cycle\n"); @@ -326,7 +322,7 @@ bool sentClearsTail(const NGHolder &g, } for (UNUSED auto v : states) { - DEBUG_PRINTF("start state: %u\n", g[v].index); + DEBUG_PRINTF("start state: %zu\n", g[v].index); } /* run the prefix the main graph */ @@ -338,7 +334,7 @@ bool sentClearsTail(const NGHolder &g, continue; /* not in tail */ } - DEBUG_PRINTF("v %u is still on\n", g[v].index); + DEBUG_PRINTF("v %zu is still on\n", g[v].index); assert(v != g.accept && v != g.acceptEod); /* no cr */ assert(contains(region_map, v)); diff --git a/src/nfagraph/ng_split.cpp b/src/nfagraph/ng_split.cpp index bce638c0..3c2baee4 100644 --- a/src/nfagraph/ng_split.cpp +++ b/src/nfagraph/ng_split.cpp @@ -87,7 +87,7 @@ void splitLHS(const NGHolder &base, const vector &pivots, clearAccepts(*lhs); for (auto pivot : pivots) { - DEBUG_PRINTF("pivot is %u lv %zu lm %zu\n", base[pivot].index, + DEBUG_PRINTF("pivot is %zu lv %zu lm %zu\n", base[pivot].index, num_vertices(*lhs), lhs_map->size()); assert(contains(*lhs_map, pivot)); @@ -151,7 +151,8 @@ void splitRHS(const NGHolder &base, const vector &pivots, for (auto pivot : pivots) { assert(contains(*rhs_map, pivot)); - add_edge(rhs->start, (*rhs_map)[pivot], *rhs); + NFAEdge e = add_edge(rhs->start, (*rhs_map)[pivot], *rhs); + (*rhs)[e].tops.insert(DEFAULT_TOP); } /* should do the renumbering unconditionally as we know edges are already @@ -190,8 +191,8 @@ void findCommonSuccessors(const NGHolder &g, const vector &pivots, vector &succ) { assert(!pivots.empty()); - // Note: for determinism, we must sort our successor sets by vertex_index. - set > adj(g), adj_temp(g); + set adj; + set adj_temp; insert(&adj, adjacent_vertices(pivots.at(0), g)); @@ -215,6 +216,7 @@ void splitGraph(const NGHolder &base, const vector &pivots, DEBUG_PRINTF("splitting graph at %zu vertices\n", pivots.size()); assert(!has_parallel_edge(base)); + assert(isCorrectlyTopped(base)); /* RHS pivots are built from the common set of successors of pivots. */ vector rhs_pivots; @@ -228,6 +230,8 @@ void splitGraph(const NGHolder &base, const vector &pivots, assert(!has_parallel_edge(*lhs)); assert(!has_parallel_edge(*rhs)); + assert(isCorrectlyTopped(*lhs)); + assert(isCorrectlyTopped(*rhs)); } void splitGraph(const NGHolder &base, NFAVertex pivot, diff --git a/src/nfagraph/ng_split.h b/src/nfagraph/ng_split.h index 75577e97..31c1cf35 100644 --- a/src/nfagraph/ng_split.h +++ b/src/nfagraph/ng_split.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,6 +47,8 @@ class NGHolder; * is in the lhs if it is reachable from start without going through the * pivot. The pivot ends up in the LHS and any adjacent vertices in the RHS. * + * Note: The RHS is setup to be triggered by TOP 0 + * * When multiple split vertices are provided: * - RHS contains all vertices reachable from every pivot * - LHS contains all vertices which are reachable from start ignoring any diff --git a/src/nfagraph/ng_squash.cpp b/src/nfagraph/ng_squash.cpp index 6577673f..ebec3a4a 100644 --- a/src/nfagraph/ng_squash.cpp +++ b/src/nfagraph/ng_squash.cpp @@ -102,7 +102,6 @@ #include "ng_holder.h" #include "ng_prune.h" #include "ng_region.h" -#include "ng_restructuring.h" #include "ng_som_util.h" #include "ng_util.h" #include "ng_util.h" @@ -135,8 +134,7 @@ void buildPDomTree(const NGHolder &g, PostDomTree &tree) { } NFAVertex pdom = postdominators[v]; if (pdom) { - DEBUG_PRINTF("vertex %u -> %u\n", g[pdom].index, - g[v].index); + DEBUG_PRINTF("vertex %zu -> %zu\n", g[pdom].index, g[v].index); tree[pdom].insert(v); } } @@ -154,8 +152,7 @@ void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v, som_type som, const vector &som_depths, const ue2::unordered_map ®ion_map, smgb_cache &cache) { - DEBUG_PRINTF("build base squash mask for vertex %u)\n", - g[v].index); + DEBUG_PRINTF("build base squash mask for vertex %zu)\n", g[v].index); vector q; @@ -302,7 +299,7 @@ void findDerivedSquashers(const NGHolder &g, const vector &vByIndex, } NFAStateSet u_squash(init.size()); - u32 u_index = g[u].index; + size_t u_index = g[u].index; buildSquashMask(u_squash, g, u, g[u].char_reach, init, vByIndex, pdom_tree, som, som_depths, region_map, cache); @@ -310,7 +307,7 @@ void findDerivedSquashers(const NGHolder &g, const vector &vByIndex, u_squash.set(u_index); /* never clear ourselves */ if ((~u_squash).any()) { // i.e. some bits unset in mask - DEBUG_PRINTF("%u is an upstream squasher of %u\n", u_index, + DEBUG_PRINTF("%zu is an upstream squasher of %zu\n", u_index, g[v].index); (*squash)[u] = u_squash; remaining.push_back(u); @@ -522,8 +519,7 @@ void filterSquashers(const NGHolder &g, if (!contains(squash, v)) { continue; } - DEBUG_PRINTF("looking at squash set for vertex %u\n", - g[v].index); + DEBUG_PRINTF("looking at squash set for vertex %zu\n", g[v].index); if (!hasSelfLoop(v, g)) { DEBUG_PRINTF("acyclic\n"); @@ -601,7 +597,7 @@ void removeEdgesToAccept(NGHolder &g, NFAVertex v) { NFAVertex u = source(e, g); const auto &r = g[u].reports; if (!r.empty() && is_subset_of(r, reports)) { - DEBUG_PRINTF("vertex %u\n", g[u].index); + DEBUG_PRINTF("vertex %zu\n", g[u].index); dead.insert(e); } } @@ -610,7 +606,7 @@ void removeEdgesToAccept(NGHolder &g, NFAVertex v) { NFAVertex u = source(e, g); const auto &r = g[u].reports; if (!r.empty() && is_subset_of(r, reports)) { - DEBUG_PRINTF("vertex %u\n", g[u].index); + DEBUG_PRINTF("vertex %zu\n", g[u].index); dead.insert(e); } } @@ -621,7 +617,7 @@ void removeEdgesToAccept(NGHolder &g, NFAVertex v) { static vector findUnreachable(const NGHolder &g) { - const boost::reverse_graph revg(g.g); + const boost::reverse_graph revg(g); ue2::unordered_map colours; colours.reserve(num_vertices(g)); @@ -634,7 +630,7 @@ vector findUnreachable(const NGHolder &g) { vector unreach; for (auto v : vertices_range(revg)) { if (!contains(colours, v)) { - unreach.push_back(v); + unreach.push_back(NFAVertex(v)); } } return unreach; @@ -657,7 +653,7 @@ findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { const u32 numStates = num_vertices(g); for (auto v : verts) { - DEBUG_PRINTF("vertex %u with %zu reports\n", g[v].index, + DEBUG_PRINTF("vertex %zu with %zu reports\n", g[v].index, g[v].reports.size()); // Find the set of vertices that lead to v or any other reporter with a @@ -684,7 +680,7 @@ findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { NFAStateSet &mask = squash[v]; for (auto uv : unreach) { - DEBUG_PRINTF("squashes index %u\n", h[uv].index); + DEBUG_PRINTF("squashes index %zu\n", h[uv].index); mask.reset(h[uv].index); } } diff --git a/src/nfagraph/ng_uncalc_components.cpp b/src/nfagraph/ng_uncalc_components.cpp index 217183de..4ad5ff78 100644 --- a/src/nfagraph/ng_uncalc_components.cpp +++ b/src/nfagraph/ng_uncalc_components.cpp @@ -39,7 +39,6 @@ #include "ng_limex.h" #include "ng_redundancy.h" #include "ng_region.h" -#include "ng_restructuring.h" #include "ng_uncalc_components.h" #include "ng_util.h" #include "ue2common.h" @@ -55,42 +54,52 @@ #include #include +#include + using namespace std; +using boost::adaptors::map_values; namespace ue2 { static const u32 FAST_STATE_LIMIT = 256; /**< largest possible desirable NFA */ /** Sentinel value meaning no component has yet been selected. */ -static const u32 NO_COMPONENT = 0xffffffffu; +static const u32 NO_COMPONENT = ~0U; -static -vector getSortedVA(const NGHolder &g, - const ue2::unordered_map &state_ids) { - vector out; - out.reserve(num_vertices(g)); +static const u32 UNUSED_STATE = ~0U; - for (auto v : vertices_range(g)) { - assert(contains(state_ids, v)); - if (state_ids.at(v) == NO_STATE) { - continue; +namespace { +struct ranking_info { + explicit ranking_info(const NGHolder &h) : to_vertex(getTopoOrdering(h)) { + u32 rank = 0; + + reverse(to_vertex.begin(), to_vertex.end()); + + for (NFAVertex v : to_vertex) { + to_rank[v] = rank++; + } + + for (NFAVertex v : vertices_range(h)) { + if (!contains(to_rank, v)) { + to_rank[v] = UNUSED_STATE; + } } - out.push_back(v); } - // Order vertices by their state indices. - sort(begin(out), end(out), [&state_ids](NFAVertex a, NFAVertex b) { - return state_ids.at(a) < state_ids.at(b); - }); - -#ifndef NDEBUG - // State indices should match vector indices. - for (u32 i = 0; i < out.size(); i++) { - assert(state_ids.at(out.at(i)) == i); + NFAVertex at(u32 ranking) const { return to_vertex.at(ranking); } + u32 get(NFAVertex v) const { return to_rank.at(v); } + u32 size() const { return (u32)to_vertex.size(); } + u32 add_to_tail(NFAVertex v) { + u32 rank = size(); + to_rank[v] = rank; + to_vertex.push_back(v); + return rank; } -#endif - return out; +private: + vector to_vertex; + unordered_map to_rank; +}; } static never_inline @@ -122,9 +131,9 @@ bool cplVerticesMatch(const NGHolder &ga, NFAVertex va, } static never_inline -u32 cplCommonReachAndSimple(const NGHolder &ga, const vector &a, - const NGHolder &gb, const vector &b) { - u32 ml = min(a.size(), b.size()); +u32 cplCommonReachAndSimple(const NGHolder &ga, const ranking_info &a_ranking, + const NGHolder &gb, const ranking_info &b_ranking) { + u32 ml = min(a_ranking.size(), b_ranking.size()); if (ml > 65535) { ml = 65535; } @@ -133,7 +142,7 @@ u32 cplCommonReachAndSimple(const NGHolder &ga, const vector &a, // "startedness" properties. u32 max = 0; for (; max < ml; max++) { - if (!cplVerticesMatch(ga, a[max], gb, b[max])) { + if (!cplVerticesMatch(ga, a_ranking.at(max), gb, b_ranking.at(max))) { break; } } @@ -141,34 +150,30 @@ u32 cplCommonReachAndSimple(const NGHolder &ga, const vector &a, return max; } -u32 commonPrefixLength(const NGHolder &ga, - const ue2::unordered_map &a_state_ids, - const NGHolder &gb, - const ue2::unordered_map &b_state_ids) { - vector a = getSortedVA(ga, a_state_ids); - vector b = getSortedVA(gb, b_state_ids); - +static +u32 commonPrefixLength(const NGHolder &ga, const ranking_info &a_ranking, + const NGHolder &gb, const ranking_info &b_ranking) { /* upper bound on the common region based on local properties */ - u32 max = cplCommonReachAndSimple(ga, a, gb, b); + u32 max = cplCommonReachAndSimple(ga, a_ranking, gb, b_ranking); DEBUG_PRINTF("cpl upper bound %u\n", max); while (max > 0) { - bool ok = true; - /* shrink max region based on in-edges from outside the region */ for (size_t j = max; j > 0; j--) { - for (auto u : inv_adjacent_vertices_range(a[j - 1], ga)) { - u32 state_id = a_state_ids.at(u); - if (state_id != NO_STATE && state_id >= max) { + NFAVertex a_v = a_ranking.at(j - 1); + NFAVertex b_v = b_ranking.at(j - 1); + for (auto u : inv_adjacent_vertices_range(a_v, ga)) { + u32 state_id = a_ranking.get(u); + if (state_id != UNUSED_STATE && state_id >= max) { max = j - 1; DEBUG_PRINTF("lowering max to %u\n", max); goto next_vertex; } } - for (auto u : inv_adjacent_vertices_range(b[j - 1], gb)) { - u32 state_id = b_state_ids.at(u); - if (state_id != NO_STATE && state_id >= max) { + for (auto u : inv_adjacent_vertices_range(b_v, gb)) { + u32 state_id = b_ranking.get(u); + if (state_id != UNUSED_STATE && state_id >= max) { max = j - 1; DEBUG_PRINTF("lowering max to %u\n", max); goto next_vertex; @@ -180,44 +185,37 @@ u32 commonPrefixLength(const NGHolder &ga, /* Ensure that every pair of vertices has same out-edges to vertices in the region. */ - for (size_t i = 0; ok && i < max; i++) { + for (size_t i = 0; i < max; i++) { size_t a_count = 0; size_t b_count = 0; - NGHolder::out_edge_iterator ei, ee; - for (tie(ei, ee) = out_edges(a[i], ga); ok && ei != ee; ++ei) { - u32 sid = a_state_ids.at(target(*ei, ga)); - if (sid == NO_STATE || sid >= max) { + for (NFAEdge a_edge : out_edges_range(a_ranking.at(i), ga)) { + u32 sid = a_ranking.get(target(a_edge, ga)); + if (sid == UNUSED_STATE || sid >= max) { continue; } a_count++; - NFAEdge b_edge; - bool has_b_edge; - tie(b_edge, has_b_edge) = edge(b[i], b[sid], gb); + NFAEdge b_edge = edge(b_ranking.at(i), b_ranking.at(sid), gb); - if (!has_b_edge) { + if (!b_edge) { max = i; - ok = false; DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n", max, i, sid); - break; + goto try_smaller; } - if (ga[*ei].top != gb[b_edge].top) { + if (ga[a_edge].tops != gb[b_edge].tops) { max = i; - ok = false; - DEBUG_PRINTF("tops don't match on edge %zu->%u\n", - i, sid); + DEBUG_PRINTF("tops don't match on edge %zu->%u\n", i, sid); + goto try_smaller; } } - NGHolder::adjacency_iterator ai, ae; - for (tie(ai, ae) = adjacent_vertices(b[i], gb); ok && ai != ae; - ++ai) { - u32 sid = b_state_ids.at(*ai); - if (sid == NO_STATE || sid >= max) { + for (NFAVertex b_v : adjacent_vertices_range(b_ranking.at(i), gb)) { + u32 sid = b_ranking.get(b_v); + if (sid == UNUSED_STATE || sid >= max) { continue; } @@ -226,52 +224,54 @@ u32 commonPrefixLength(const NGHolder &ga, if (a_count != b_count) { max = i; - DEBUG_PRINTF("lowering max to %u due to a,b count " - "(a_count=%zu, b_count=%zu)\n", max, a_count, - b_count); - ok = false; + DEBUG_PRINTF("lowering max to %u due to a,b count (a_count=%zu," + " b_count=%zu)\n", max, a_count, b_count); + goto try_smaller; } } - if (ok) { - DEBUG_PRINTF("survived checks, returning cpl %u\n", max); - return max; - } + DEBUG_PRINTF("survived checks, returning cpl %u\n", max); + return max; + try_smaller:; } DEBUG_PRINTF("failed to find any common region\n"); return 0; } +u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb) { + return commonPrefixLength(ga, ranking_info(ga), gb, ranking_info(gb)); +} + static never_inline -void mergeNfa(NGHolder &dest, vector &destStateMap, - ue2::unordered_map &dest_state_ids, - NGHolder &vic, vector &vicStateMap, - size_t common_len) { +void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { + assert(&dest != &vic); + + auto dest_info = ranking_info(dest); + auto vic_info = ranking_info(vic); + map vmap; // vic -> dest vmap[vic.start] = dest.start; vmap[vic.startDs] = dest.startDs; vmap[vic.accept] = dest.accept; vmap[vic.acceptEod] = dest.acceptEod; - vmap[nullptr] = nullptr; - - u32 stateNum = countStates(dest, dest_state_ids); + vmap[NGHolder::null_vertex()] = NGHolder::null_vertex(); // For vertices in the common len, add to vmap and merge in the reports, if // any. for (u32 i = 0; i < common_len; i++) { - NFAVertex v_old = vicStateMap[i], v = destStateMap[i]; + NFAVertex v_old = vic_info.at(i); + NFAVertex v = dest_info.at(i); vmap[v_old] = v; const auto &reports = vic[v_old].reports; dest[v].reports.insert(reports.begin(), reports.end()); } - // Add in vertices beyond the common len, giving them state numbers - // starting at stateNum. - for (u32 i = common_len; i < vicStateMap.size(); i++) { - NFAVertex v_old = vicStateMap[i]; + // Add in vertices beyond the common len + for (u32 i = common_len; i < vic_info.size(); i++) { + NFAVertex v_old = vic_info.at(i); if (is_special(v_old, vic)) { // Dest already has start vertices, just merge the reports. @@ -283,15 +283,17 @@ void mergeNfa(NGHolder &dest, vector &destStateMap, } NFAVertex v = add_vertex(vic[v_old], dest); - dest_state_ids[v] = stateNum++; + dest_info.add_to_tail(v); vmap[v_old] = v; } /* add edges */ DEBUG_PRINTF("common_len=%zu\n", common_len); for (const auto &e : edges_range(vic)) { - NFAVertex u_old = source(e, vic), v_old = target(e, vic); - NFAVertex u = vmap[u_old], v = vmap[v_old]; + NFAVertex u_old = source(e, vic); + NFAVertex v_old = target(e, vic); + NFAVertex u = vmap[u_old]; + NFAVertex v = vmap[v_old]; bool uspecial = is_special(u, dest); bool vspecial = is_special(v, dest); @@ -302,15 +304,14 @@ void mergeNfa(NGHolder &dest, vector &destStateMap, // We're in the common region if v's state ID is low enough, unless v // is a special (an accept), in which case we use u's state ID. - assert(contains(dest_state_ids, v)); - bool in_common_region = dest_state_ids.at(v) < common_len; - if (vspecial && dest_state_ids.at(u) < common_len) { + bool in_common_region = dest_info.get(v) < common_len; + if (vspecial && dest_info.get(u) < common_len) { in_common_region = true; } - DEBUG_PRINTF("adding idx=%u (state %u) -> idx=%u (state %u)%s\n", - dest[u].index, dest_state_ids.at(u), - dest[v].index, dest_state_ids.at(v), + DEBUG_PRINTF("adding idx=%zu (state %u) -> idx=%zu (state %u)%s\n", + dest[u].index, dest_info.get(u), + dest[v].index, dest_info.get(v), in_common_region ? " [common]" : ""); if (in_common_region) { @@ -318,7 +319,7 @@ void mergeNfa(NGHolder &dest, vector &destStateMap, DEBUG_PRINTF("skipping common edge\n"); assert(edge(u, v, dest).second); // Should never merge edges with different top values. - assert(vic[e].top == dest[edge(u, v, dest).first].top); + assert(vic[e].tops == dest[edge(u, v, dest)].tops); continue; } else { assert(is_any_accept(v, dest)); @@ -334,20 +335,8 @@ void mergeNfa(NGHolder &dest, vector &destStateMap, add_edge(u, v, vic[e], dest); } - dest.renumberEdges(); - dest.renumberVertices(); -} - -static never_inline -void mergeNfaComponent(NGHolder &pholder, NGHolder &vholder, size_t cpl) { - assert(&pholder != &vholder); - - auto v_state_ids = numberStates(vholder); - auto p_state_ids = numberStates(pholder); - auto vhvmap = getSortedVA(vholder, v_state_ids); - auto phvmap = getSortedVA(pholder, p_state_ids); - - mergeNfa(pholder, phvmap, p_state_ids, vholder, vhvmap, cpl); + renumber_edges(dest); + renumber_vertices(dest); } namespace { @@ -374,14 +363,19 @@ struct NfaMergeCandidateH { /** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */ static -bool shouldMerge(NGHolder &ha, - const ue2::unordered_map &a_state_ids, - NGHolder &hb, - const ue2::unordered_map &b_state_ids, - size_t cpl, const ReportManager *rm, - const CompileContext &cc) { - size_t combinedStateCount = - countStates(ha, a_state_ids) + countStates(hb, b_state_ids) - cpl; +bool shouldMerge(const NGHolder &ha, const NGHolder &hb, size_t cpl, + const ReportManager *rm, const CompileContext &cc) { + size_t combinedStateCount = num_vertices(ha) + num_vertices(hb) - cpl; + + combinedStateCount -= 2 * 2; /* discount accepts from both */ + + if (is_triggered(ha)) { + /* allow for a state for each top, ignore existing starts */ + combinedStateCount -= 2; /* for start, startDs */ + auto tops = getTops(ha); + insert(&tops, getTops(hb)); + combinedStateCount += tops.size(); + } if (combinedStateCount > FAST_STATE_LIMIT) { // More complex implementability check. @@ -424,11 +418,13 @@ void buildNfaMergeQueue(const vector &cluster, // First, make sure all holders have numbered states and collect their // counts. - vector> states_map(cs); + vector states_map; + states_map.reserve(cs); for (size_t i = 0; i < cs; i++) { assert(cluster[i]); - NGHolder &g = *(cluster[i]); - states_map[i] = numberStates(g); + assert(states_map.size() == i); + const NGHolder &g = *(cluster[i]); + states_map.emplace_back(g); } vector seen_cpl(cs * cs, 0); @@ -506,26 +502,25 @@ bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { return false; } + /* TODO: relax top checks if reports match */ + // If both graphs have edge (start, accept), the tops must match. - auto e1_accept = edge(h1.start, h1.accept, h1); - auto e2_accept = edge(h2.start, h2.accept, h2); - if (e1_accept.second && e2_accept.second && - h1[e1_accept.first].top != h2[e2_accept.first].top) { + NFAEdge e1_accept = edge(h1.start, h1.accept, h1); + NFAEdge e2_accept = edge(h2.start, h2.accept, h2); + if (e1_accept && e2_accept && h1[e1_accept].tops != h2[e2_accept].tops) { return false; } // If both graphs have edge (start, acceptEod), the tops must match. - auto e1_eod = edge(h1.start, h1.acceptEod, h1); - auto e2_eod = edge(h2.start, h2.acceptEod, h2); - if (e1_eod.second && e2_eod.second && - h1[e1_eod.first].top != h2[e2_eod.first].top) { + NFAEdge e1_eod = edge(h1.start, h1.acceptEod, h1); + NFAEdge e2_eod = edge(h2.start, h2.acceptEod, h2); + if (e1_eod && e2_eod && h1[e1_eod].tops != h2[e2_eod].tops) { return false; } // If one graph has an edge to accept and the other has an edge to // acceptEod, the reports must match for the merge to be safe. - if ((e1_accept.second && e2_eod.second) || - (e2_accept.second && e1_eod.second)) { + if ((e1_accept && e2_eod) || (e2_accept && e1_eod)) { if (h1[h1.start].reports != h2[h2.start].reports) { return false; } @@ -535,11 +530,9 @@ bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { } /** Merge graph \p ga into graph \p gb. Returns false on failure. */ -bool mergeNfaPair(NGHolder &ga, NGHolder &gb, const ReportManager *rm, +bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, const CompileContext &cc) { assert(ga.kind == gb.kind); - auto a_state_ids = numberStates(ga); - auto b_state_ids = numberStates(gb); // Vacuous NFAs require special checks on their starts to ensure that tops // match, and that reports match for mixed-accept cases. @@ -548,29 +541,26 @@ bool mergeNfaPair(NGHolder &ga, NGHolder &gb, const ReportManager *rm, return false; } - u32 cpl = commonPrefixLength(ga, a_state_ids, gb, b_state_ids); - if (!shouldMerge(gb, b_state_ids, ga, a_state_ids, cpl, rm, cc)) { + u32 cpl = commonPrefixLength(ga, gb); + if (!shouldMerge(gb, ga, cpl, rm, cc)) { return false; } mergeNfaComponent(gb, ga, cpl); reduceImplementableGraph(gb, SOM_NONE, rm, cc); - b_state_ids = numberStates(gb); return true; } -/** Merge the group of graphs in \p cluster where possible. The (from, to) - * mapping of merged graphs is returned in \p merged. */ -void mergeNfaCluster(const vector &cluster, - const ReportManager *rm, - map &merged, - const CompileContext &cc) { +map mergeNfaCluster(const vector &cluster, + const ReportManager *rm, + const CompileContext &cc) { + map merged; + if (cluster.size() < 2) { - return; + return merged; } DEBUG_PRINTF("new cluster, size %zu\n", cluster.size()); - merged.clear(); priority_queue pq; buildNfaMergeQueue(cluster, &pq); @@ -599,6 +589,8 @@ void mergeNfaCluster(const vector &cluster, } } } + + return merged; } } // namespace ue2 diff --git a/src/nfagraph/ng_uncalc_components.h b/src/nfagraph/ng_uncalc_components.h index 5f341961..b0f42670 100644 --- a/src/nfagraph/ng_uncalc_components.h +++ b/src/nfagraph/ng_uncalc_components.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,9 +36,6 @@ #include #include -#include "nfagraph/ng_graph.h" -#include "util/ue2_containers.h" - namespace ue2 { struct CompileContext; @@ -52,20 +49,16 @@ class ReportManager; * The CPL is calculated based the topological ordering given by the state * indices for each graph. */ -u32 commonPrefixLength(const NGHolder &ga, - const ue2::unordered_map &a_state_ids, - const NGHolder &gb, - const ue2::unordered_map &b_state_ids); +u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb); /** * \brief Merge the group of graphs in \p cluster where possible. * - * The (from, to) mapping of merged graphs is returned in \p merged. + * The (from, to) mapping of merged graphs is returned. */ -void mergeNfaCluster(const std::vector &cluster, - const ReportManager *rm, - std::map &merged, - const CompileContext &cc); +std::map +mergeNfaCluster(const std::vector &cluster, const ReportManager *rm, + const CompileContext &cc); /** * \brief Merge graph \p ga into graph \p gb. @@ -73,7 +66,7 @@ void mergeNfaCluster(const std::vector &cluster, * Returns false on failure. On success, \p gb is reduced via \ref * reduceImplementableGraph and renumbered. */ -bool mergeNfaPair(NGHolder &ga, NGHolder &gb, const ReportManager *rm, +bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, const CompileContext &cc); } // namespace ue2 diff --git a/src/nfagraph/ng_undirected.h b/src/nfagraph/ng_undirected.h index 12632e05..7df6c7dc 100644 --- a/src/nfagraph/ng_undirected.h +++ b/src/nfagraph/ng_undirected.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,6 +39,10 @@ #include "util/graph_range.h" #include "util/ue2_containers.h" +#include + +#include + namespace ue2 { /** @@ -51,7 +55,7 @@ namespace ue2 { typedef boost::adjacency_list > + boost::property > NFAUndirectedGraph; typedef NFAUndirectedGraph::vertex_descriptor NFAUndirectedVertex; @@ -60,16 +64,18 @@ typedef NFAUndirectedGraph::vertex_descriptor NFAUndirectedVertex; * Make a copy of an NFAGraph with undirected edges, optionally without start * vertices. Mappings from the original graph to the new one are provided. * - * Note that new vertex indices are assigned contiguously in \a vertices(g) order. + * Note that new vertex indices are assigned contiguously in \a vertices(g) + * order. */ template void createUnGraph(const GraphT &g, - bool excludeStarts, - bool excludeAccepts, - NFAUndirectedGraph &ug, - ue2::unordered_map &old2new, - ue2::unordered_map &newIdx2old) { - u32 idx = 0; + bool excludeStarts, + bool excludeAccepts, + NFAUndirectedGraph &ug, + ue2::unordered_map &old2new) { + size_t idx = 0; + typedef typename GraphT::vertex_descriptor VertexT; for (auto v : ue2::vertices_range(g)) { // skip all accept nodes @@ -84,13 +90,12 @@ void createUnGraph(const GraphT &g, NFAUndirectedVertex nuv = boost::add_vertex(ug); old2new[v] = nuv; - newIdx2old[idx] = v; boost::put(boost::vertex_index, ug, nuv, idx++); } for (const auto &e : ue2::edges_range(g)) { - NFAVertex src = source(e, g); - NFAVertex targ = target(e, g); + VertexT src = source(e, g); + VertexT targ = target(e, g); if ((excludeAccepts && is_any_accept(src, g)) || (excludeStarts && is_any_start(src, g))) { diff --git a/src/nfagraph/ng_utf8.cpp b/src/nfagraph/ng_utf8.cpp index 719e42e2..383aa142 100644 --- a/src/nfagraph/ng_utf8.cpp +++ b/src/nfagraph/ng_utf8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -176,7 +176,7 @@ void findSeeds(const NGHolder &h, const bool som, vector *seeds) { continue; } - DEBUG_PRINTF("%u is a seed\n", h[v].index); + DEBUG_PRINTF("%zu is a seed\n", h[v].index); seeds->push_back(v); already_seeds.insert(v); } @@ -184,13 +184,12 @@ void findSeeds(const NGHolder &h, const bool som, vector *seeds) { static bool expandCyclic(NGHolder &h, NFAVertex v) { - DEBUG_PRINTF("inspecting %u\n", h[v].index); + DEBUG_PRINTF("inspecting %zu\n", h[v].index); bool changes = false; - set v_preds; - set v_succs; - pred(h, v, &v_preds); - succ(h, v, &v_succs); + auto v_preds = preds(v, h); + auto v_succs = succs(v, h); + set start_siblings; set end_siblings; @@ -199,11 +198,10 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { /* We need to find start vertices which have all of our preds. * As we have a self loop, it must be one of our succs. */ for (auto a : adjacent_vertices_range(v, h)) { - set a_preds; - pred(h, a, &a_preds); + auto a_preds = preds(a, h); if (a_preds == v_preds && isutf8start(h[a].char_reach)) { - DEBUG_PRINTF("%u is a start v\n", h[a].index); + DEBUG_PRINTF("%zu is a start v\n", h[a].index); start_siblings.insert(a); } } @@ -211,11 +209,10 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { /* We also need to find full cont vertices which have all our own succs; * As we have a self loop, it must be one of our preds. */ for (auto a : inv_adjacent_vertices_range(v, h)) { - set a_succs; - succ(h, a, &a_succs); + auto a_succs = succs(a, h); if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) { - DEBUG_PRINTF("%u is a full tail cont\n", h[a].index); + DEBUG_PRINTF("%zu is a full tail cont\n", h[a].index); end_siblings.insert(a); } } @@ -229,7 +226,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { if (cr.isSubsetOf(UTF_TWO_START_CR)) { if (end_siblings.find(*adjacent_vertices(s, h).first) == end_siblings.end()) { - DEBUG_PRINTF("%u is odd\n", h[s].index); + DEBUG_PRINTF("%zu is odd\n", h[s].index); continue; } } else if (cr.isSubsetOf(UTF_THREE_START_CR)) { @@ -241,7 +238,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { } if (end_siblings.find(*adjacent_vertices(m, h).first) == end_siblings.end()) { - DEBUG_PRINTF("%u is odd\n", h[s].index); + DEBUG_PRINTF("%zu is odd\n", h[s].index); continue; } } else if (cr.isSubsetOf(UTF_FOUR_START_CR)) { @@ -261,11 +258,11 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { if (end_siblings.find(*adjacent_vertices(m2, h).first) == end_siblings.end()) { - DEBUG_PRINTF("%u is odd\n", h[s].index); + DEBUG_PRINTF("%zu is odd\n", h[s].index); continue; } } else { - DEBUG_PRINTF("%u is bad\n", h[s].index); + DEBUG_PRINTF("%zu is bad\n", h[s].index); continue; } diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index c629d553..5252eb18 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -46,15 +46,13 @@ #include #include #include -#include #include #include using namespace std; using boost::default_color_type; -using boost::filtered_graph; +using boost::make_filtered_graph; using boost::make_assoc_property_map; -using boost::adaptors::map_values; namespace ue2 { @@ -146,7 +144,7 @@ void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest) { if (edge(dest, t, g).second) { continue; } - NFAEdge clone = add_edge(dest, t, g).first; + NFAEdge clone = add_edge(dest, t, g); u32 idx = g[clone].index; g[clone] = g[e]; g[clone].index = idx; @@ -157,7 +155,7 @@ void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) { for (const auto &e : in_edges_range(s, g)) { NFAVertex ss = source(e, g); assert(!edge(ss, dest, g).second); - NFAEdge clone = add_edge(ss, dest, g).first; + NFAEdge clone = add_edge(ss, dest, g); u32 idx = g[clone].index; g[clone] = g[e]; g[clone].index = idx; @@ -165,27 +163,21 @@ void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) { } bool onlyOneTop(const NGHolder &g) { - set tops; - for (const auto &e : out_edges_range(g.start, g)) { - tops.insert(g[e].top); - } - assert(!tops.empty()); - return tops.size() == 1; + return getTops(g).size() == 1; } namespace { struct CycleFound {}; struct DetectCycles : public boost::default_dfs_visitor { explicit DetectCycles(const NGHolder &g) : startDs(g.startDs) {} - void back_edge(const NFAEdge &e, const NFAGraph &g) const { + void back_edge(const NFAEdge &e, const NGHolder &g) const { NFAVertex u = source(e, g), v = target(e, g); // We ignore the startDs self-loop. if (u == startDs && v == startDs) { return; } // Any other back-edge indicates a cycle. - DEBUG_PRINTF("back edge %u->%u found\n", g[u].index, - g[v].index); + DEBUG_PRINTF("back edge %zu->%zu found\n", g[u].index, g[v].index); throw CycleFound(); } private: @@ -220,10 +212,8 @@ bool isFloating(const NGHolder &g) { bool isAcyclic(const NGHolder &g) { try { - depth_first_search( - g.g, visitor(DetectCycles(g)) - .root_vertex(g.start) - .vertex_index_map(get(&NFAGraphVertexProps::index, g.g))); + boost::depth_first_search(g, visitor(DetectCycles(g)) + .root_vertex(g.start)); } catch (const CycleFound &) { return false; } @@ -239,11 +229,11 @@ bool hasReachableCycle(const NGHolder &g, NFAVertex src) { try { // Use depth_first_visit, rather than depth_first_search, so that we // only search from src. - auto index_map = get(&NFAGraphVertexProps::index, g.g); - depth_first_visit( - g.g, src, DetectCycles(g), - make_iterator_property_map(colors.begin(), index_map)); - } catch (const CycleFound&) { + auto index_map = get(vertex_index, g); + boost::depth_first_visit(g, src, DetectCycles(g), + make_iterator_property_map(colors.begin(), + index_map)); + } catch (const CycleFound &) { return true; } @@ -254,10 +244,7 @@ bool hasBigCycles(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); set dead; BackEdges> backEdgeVisitor(dead); - depth_first_search( - g.g, visitor(backEdgeVisitor) - .root_vertex(g.start) - .vertex_index_map(get(&NFAGraphVertexProps::index, g.g))); + boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); for (const auto &e : dead) { if (source(e, g) != target(e, g)) { @@ -268,43 +255,9 @@ bool hasBigCycles(const NGHolder &g) { return false; } -set findVerticesInCycles(const NGHolder &g) { - map comp_map; - - strong_components(g.g, make_assoc_property_map(comp_map), - vertex_index_map(get(&NFAGraphVertexProps::index, g.g))); - - map > comps; - - for (const auto &e : comp_map) { - comps[e.second].insert(e.first); - } - - - set rv; - - for (const auto &comp : comps | map_values) { - /* every vertex in a strongly connected component is reachable from - * every other vertex in the component. A vertex is involved in a cycle - * therefore if it is in a strongly connected component with more than - * one vertex or if it is the only vertex and it has a self loop. */ - assert(!comp.empty()); - if (comp.size() > 1) { - insert(&rv, comp); - } - NFAVertex v = *comp.begin(); - if (hasSelfLoop(v, g)) { - rv.insert(v); - } - } - - return rv; -} - bool can_never_match(const NGHolder &g) { assert(edge(g.accept, g.acceptEod, g).second); - if (!hasGreaterInDegree(0, g.accept, g) - && !hasGreaterInDegree(1, g.acceptEod, g)) { + if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { DEBUG_PRINTF("no paths into accept\n"); return true; } @@ -313,7 +266,7 @@ bool can_never_match(const NGHolder &g) { } bool can_match_at_eod(const NGHolder &h) { - if (hasGreaterInDegree(1, h.acceptEod, h)) { + if (in_degree(h.acceptEod, h) > 1) { DEBUG_PRINTF("more than one edge to acceptEod\n"); return true; } @@ -337,17 +290,56 @@ bool can_only_match_at_eod(const NGHolder &g) { } bool matches_everywhere(const NGHolder &h) { - NFAEdge e; - bool exists; - tie(e, exists) = edge(h.startDs, h.accept, h); + NFAEdge e = edge(h.startDs, h.accept, h); - return exists && !h[e].assert_flags; + return e && !h[e].assert_flags; } bool is_virtual_start(NFAVertex v, const NGHolder &g) { return g[v].assert_flags & POS_FLAG_VIRTUAL_START; } +static +void reorderSpecials(const NGHolder &g, vector &topoOrder) { + // Start is last element of reverse topo ordering. + auto it = find(topoOrder.begin(), topoOrder.end(), g.start); + if (it != topoOrder.end() - 1) { + DEBUG_PRINTF("repositioning start\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.end(), g.start); + } + + // StartDs is second-to-last element of reverse topo ordering. + it = find(topoOrder.begin(), topoOrder.end(), g.startDs); + if (it != topoOrder.end() - 2) { + DEBUG_PRINTF("repositioning start ds\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.end() - 1, g.startDs); + } + + // AcceptEOD is first element of reverse topo ordering. + it = find(topoOrder.begin(), topoOrder.end(), g.acceptEod); + if (it != topoOrder.begin()) { + DEBUG_PRINTF("repositioning accept\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.begin(), g.acceptEod); + } + + // Accept is second element of reverse topo ordering, if it's connected. + it = find(topoOrder.begin(), topoOrder.end(), g.accept); + if (it != topoOrder.begin() + 1) { + DEBUG_PRINTF("repositioning accept\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + if (in_degree(g.accept, g) != 0) { + topoOrder.insert(topoOrder.begin() + 1, g.accept); + } + } +} + vector getTopoOrdering(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); @@ -360,22 +352,19 @@ vector getTopoOrdering(const NGHolder &g) { EdgeSet backEdges; BackEdges be(backEdges); - auto index_map = get(&NFAGraphVertexProps::index, g.g); - depth_first_search(g.g, visitor(be) - .root_vertex(g.start) - .color_map(make_iterator_property_map( - colour.begin(), index_map)) - .vertex_index_map(index_map)); + auto index_map = get(vertex_index, g); + depth_first_search(g, visitor(be).root_vertex(g.start) + .color_map(make_iterator_property_map( + colour.begin(), index_map))); - AcyclicFilter af(&be.backEdges); - filtered_graph> acyclic_g(g.g, af); + auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&backEdges)); vector ordering; ordering.reserve(num_verts); - topological_sort( - acyclic_g, back_inserter(ordering), - color_map(make_iterator_property_map(colour.begin(), index_map)) - .vertex_index_map(index_map)); + topological_sort(acyclic_g, back_inserter(ordering), + color_map(make_iterator_property_map(colour.begin(), index_map))); + + reorderSpecials(g, ordering); return ordering; } @@ -397,14 +386,12 @@ void mustBeSetBefore_int(NFAVertex u, const NGHolder &g, } } - // The AcyclicFilter is badly named, it's really just an edge-set filter. - filtered_graph>> prefix(g.g, - AcyclicFilter>(&dead)); + auto prefix = make_filtered_graph(g, make_bad_edge_filter(&dead)); depth_first_visit( prefix, g.start, make_dfs_visitor(boost::null_visitor()), make_iterator_property_map(vertexColor.begin(), - get(&NFAGraphVertexProps::index, g.g))); + get(vertex_index, g))); } bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g, @@ -421,15 +408,14 @@ bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g, mustBeSetBefore_int(u, g, vertexColor); for (auto vi : vertices_range(g)) { - auto key2 = make_pair(g[u].index, - g[vi].index); - DEBUG_PRINTF("adding %u %u\n", key2.first, key2.second); + auto key2 = make_pair(g[u].index, g[vi].index); + DEBUG_PRINTF("adding %zu %zu\n", key2.first, key2.second); assert(!contains(cache.cache, key2)); bool value = vertexColor[g[vi].index] == boost::white_color; cache.cache[key2] = value; assert(contains(cache.cache, key2)); } - DEBUG_PRINTF("cache miss %u %u (%zu)\n", key.first, key.second, + DEBUG_PRINTF("cache miss %zu %zu (%zu)\n", key.first, key.second, cache.cache.size()); return cache.cache[key]; } @@ -465,17 +451,21 @@ void appendLiteral(NGHolder &h, const ue2_literal &s) { ue2::flat_set getTops(const NGHolder &h) { ue2::flat_set tops; for (const auto &e : out_edges_range(h.start, h)) { - NFAVertex v = target(e, h); - if (v == h.startDs) { - continue; - } - u32 top = h[e].top; - assert(top < NFA_MAX_TOP_MASKS); - tops.insert(top); + insert(&tops, h[e].tops); } return tops; } +void setTops(NGHolder &h, u32 top) { + for (const auto &e : out_edges_range(h.start, h)) { + assert(h[e].tops.empty()); + if (target(e, h) == h.startDs) { + continue; + } + h[e].tops.insert(top); + } +} + void clearReports(NGHolder &g) { DEBUG_PRINTF("clearing reports without an accept edge\n"); ue2::unordered_set allow; @@ -553,12 +543,13 @@ void fillHolder(NGHolder *outp, const NGHolder &in, const deque &vv, fillHolderOutEdges(out, in, v_map, u); } - out.renumberEdges(); - out.renumberVertices(); + renumber_edges(out); + renumber_vertices(out); } void cloneHolder(NGHolder &out, const NGHolder &in) { assert(hasCorrectlyNumberedVertices(in)); + assert(hasCorrectlyNumberedVertices(out)); out.kind = in.kind; // Note: depending on the state of the input graph, some stylized edges @@ -568,6 +559,7 @@ void cloneHolder(NGHolder &out, const NGHolder &in) { /* remove the existing special edges */ clear_vertex(out.startDs, out); clear_vertex(out.accept, out); + renumber_edges(out); vector out_mapping(num_vertices(in)); out_mapping[NODE_START] = out.start; @@ -595,16 +587,13 @@ void cloneHolder(NGHolder &out, const NGHolder &in) { NFAVertex s = out_mapping[si]; NFAVertex t = out_mapping[ti]; - UNUSED bool added; - NFAEdge e2; - tie(e2, added) = add_edge(s, t, out); - assert(added); + NFAEdge e2 = add_edge(s, t, out); out[e2] = in[e]; } // Safety checks. - assert(num_vertices(in.g) == num_vertices(out.g)); - assert(num_edges(in.g) == num_edges(out.g)); + assert(num_vertices(in) == num_vertices(out)); + assert(num_edges(in) == num_edges(out)); assert(hasCorrectlyNumberedVertices(out)); } @@ -630,14 +619,66 @@ unique_ptr cloneHolder(const NGHolder &in) { return h; } +void reverseHolder(const NGHolder &g_in, NGHolder &g) { + // Make the BGL do the grunt work. + ue2::unordered_map vertexMap; + boost::transpose_graph(g_in, g, + orig_to_copy(boost::make_assoc_property_map(vertexMap))); + + // The transpose_graph operation will have created extra copies of our + // specials. We have to rewire their neighbours to the 'real' specials and + // delete them. + NFAVertex start = vertexMap[g_in.acceptEod]; + NFAVertex startDs = vertexMap[g_in.accept]; + NFAVertex accept = vertexMap[g_in.startDs]; + NFAVertex acceptEod = vertexMap[g_in.start]; + + // Successors of starts. + for (const auto &e : out_edges_range(start, g)) { + NFAVertex v = target(e, g); + add_edge(g.start, v, g[e], g); + } + for (const auto &e : out_edges_range(startDs, g)) { + NFAVertex v = target(e, g); + add_edge(g.startDs, v, g[e], g); + } + + // Predecessors of accepts. + for (const auto &e : in_edges_range(accept, g)) { + NFAVertex u = source(e, g); + add_edge(u, g.accept, g[e], g); + } + for (const auto &e : in_edges_range(acceptEod, g)) { + NFAVertex u = source(e, g); + add_edge(u, g.acceptEod, g[e], g); + } + + // Remove our impostors. + clear_vertex(start, g); + remove_vertex(start, g); + clear_vertex(startDs, g); + remove_vertex(startDs, g); + clear_vertex(accept, g); + remove_vertex(accept, g); + clear_vertex(acceptEod, g); + remove_vertex(acceptEod, g); + + // Renumber so that g's properties (number of vertices, edges) are + // accurate. + renumber_vertices(g); + renumber_edges(g); + + assert(num_vertices(g) == num_vertices(g_in)); + assert(num_edges(g) == num_edges(g_in)); +} + #ifndef NDEBUG bool allMatchStatesHaveReports(const NGHolder &g) { unordered_set reporters; for (auto v : inv_adjacent_vertices_range(g.accept, g)) { if (g[v].reports.empty()) { - DEBUG_PRINTF("vertex %u has no reports!\n", - g[v].index); + DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index); return false; } reporters.insert(v); @@ -648,8 +689,7 @@ bool allMatchStatesHaveReports(const NGHolder &g) { continue; // stylised edge } if (g[v].reports.empty()) { - DEBUG_PRINTF("vertex %u has no reports!\n", - g[v].index); + DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index); return false; } reporters.insert(v); @@ -657,7 +697,7 @@ bool allMatchStatesHaveReports(const NGHolder &g) { for (auto v : vertices_range(g)) { if (!contains(reporters, v) && !g[v].reports.empty()) { - DEBUG_PRINTF("vertex %u is not a match state, but has reports!\n", + DEBUG_PRINTF("vertex %zu is not a match state, but has reports!\n", g[v].index); return false; } @@ -666,32 +706,22 @@ bool allMatchStatesHaveReports(const NGHolder &g) { return true; } -bool hasCorrectlyNumberedVertices(const NGHolder &g) { - size_t count = num_vertices(g); - vector ids(count, false); - for (auto v : vertices_range(g)) { - u32 id = g[v].index; - if (id >= count || ids[id]) { - return false; // duplicate +bool isCorrectlyTopped(const NGHolder &g) { + if (is_triggered(g)) { + for (const auto &e : out_edges_range(g.start, g)) { + if (g[e].tops.empty() != (target(e, g) == g.startDs)) { + return false; + } + } + } else { + for (const auto &e : out_edges_range(g.start, g)) { + if (!g[e].tops.empty()) { + return false; + } } - ids[id] = true; } - return find(ids.begin(), ids.end(), false) == ids.end() - && num_vertices(g) == num_vertices(g.g); -} -bool hasCorrectlyNumberedEdges(const NGHolder &g) { - size_t count = num_edges(g); - vector ids(count, false); - for (const auto &e : edges_range(g)) { - u32 id = g[e].index; - if (id >= count || ids[id]) { - return false; // duplicate - } - ids[id] = true; - } - return find(ids.begin(), ids.end(), false) == ids.end() - && num_edges(g) == num_edges(g.g); + return true; } #endif // NDEBUG diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index 4f58dc45..a0752533 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -65,18 +65,30 @@ bool is_dot(NFAVertex v, const GraphT &g) { template static really_inline void succ(const NGHolder &g, NFAVertex v, U *s) { - NGHolder::adjacency_iterator ai, ae; - tie(ai, ae) = adjacent_vertices(v, g); - s->insert(ai, ae); + auto rv = adjacent_vertices(v, g); + s->insert(rv.first, rv.second); +} + +template> +ContTemp succs(NFAVertex u, const NGHolder &g) { + ContTemp rv; + succ(g, u, &rv); + return rv; } /** adds predecessors of v to s */ template static really_inline void pred(const NGHolder &g, NFAVertex v, U *p) { - NGHolder::inv_adjacency_iterator it, ite; - tie(it, ite) = inv_adjacent_vertices(v, g); - p->insert(it, ite); + auto rv = inv_adjacent_vertices(v, g); + p->insert(rv.first, rv.second); +} + +template> +ContTemp preds(NFAVertex u, const NGHolder &g) { + ContTemp rv; + pred(g, u, &rv); + return rv; } /** returns a vertex with an out edge from v and is not v. @@ -88,6 +100,30 @@ NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v); /** Like getSoleDestVertex but for in-edges */ NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex v); +/** \brief edge filtered graph. + * + * This will give you a view over the graph that has none of the edges from + * the provided set included. + * + * If this is provided with the back edges of the graph, this will result in an + * acyclic subgraph view. This is useful for topological_sort and other + * algorithms that require a DAG. + */ +template +struct bad_edge_filter { + bad_edge_filter() {} + explicit bad_edge_filter(const EdgeSet *bad_e) : bad_edges(bad_e) {} + bool operator()(const typename EdgeSet::value_type &e) const { + return !contains(*bad_edges, e); /* keep edges not in the bad set */ + } + const EdgeSet *bad_edges = nullptr; +}; + +template +bad_edge_filter make_bad_edge_filter(const EdgeSet *e) { + return bad_edge_filter(e); +} + /** Visitor that records back edges */ template class BackEdges : public boost::default_dfs_visitor { @@ -100,59 +136,11 @@ public: BackEdgeSet &backEdges; }; -/** \brief Acyclic filtered graph. - * - * This will give you a view over the graph that is directed and acyclic: - * useful for topological_sort and other algorithms that require a DAG. - */ -template -struct AcyclicFilter { - AcyclicFilter() {} - explicit AcyclicFilter(const BackEdgeSet *edges) : backEdges(edges) {} - template - bool operator()(const EdgeT &e) const { - // Only keep edges that aren't in the back edge set. - return (backEdges->find(e) == backEdges->end()); - } - const BackEdgeSet *backEdges = nullptr; -}; - -/** - * Generic code to renumber all the vertices in a graph. Assumes that we're - * using a vertex_index property of type u32, and that we always have - * N_SPECIALS special vertices already present (which we don't want to - * renumber). - */ -template -static really_inline -size_t renumberGraphVertices(GraphT &g) { - size_t num = N_SPECIALS; - for (const auto &v : vertices_range(g)) { - if (!is_special(v, g)) { - g[v].index = num++; - assert(num > 0); // no wrapping - } - } - return num; -} - -/** Renumber all the edges in a graph. */ -template -static really_inline -size_t renumberGraphEdges(GraphT &g) { - size_t num = 0; - for (const auto &e : edges_range(g)) { - g[e].index = num++; - assert(num > 0); // no wrapping - } - return num; -} - /** Returns true if the vertex is either of the real starts (NODE_START, * NODE_START_DOTSTAR). */ template static really_inline -bool is_any_start(const NFAVertex v, const GraphT &g) { +bool is_any_start(typename GraphT::vertex_descriptor v, const GraphT &g) { u32 i = g[v].index; return i == NODE_START || i == NODE_START_DOTSTAR; } @@ -160,47 +148,34 @@ bool is_any_start(const NFAVertex v, const GraphT &g) { bool is_virtual_start(NFAVertex v, const NGHolder &g); template -static really_inline -bool is_any_accept(const NFAVertex v, const GraphT &g) { +bool is_any_accept(typename GraphT::vertex_descriptor v, const GraphT &g) { u32 i = g[v].index; return i == NODE_ACCEPT || i == NODE_ACCEPT_EOD; } /** returns true iff v has an edge to accept or acceptEod */ template -static really_inline -bool is_match_vertex(NFAVertex v, const GraphT &g) { +bool is_match_vertex(typename GraphT::vertex_descriptor v, const GraphT &g) { return edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second; } /** Generate a reverse topological ordering for a back-edge filtered version of - * our graph (as it must be a DAG and correctly numbered) */ + * our graph (as it must be a DAG and correctly numbered). + * + * Note: we ensure that we produce a topo ordering that begins with acceptEod + * and accept (if present) and ends with startDs followed by start. + */ std::vector getTopoOrdering(const NGHolder &g); -/** Comparison functor used to sort by vertex_index. */ -template -struct VertexIndexOrdering { - VertexIndexOrdering(const Graph &g_in) : g(&g_in) {} - bool operator()(typename Graph::vertex_descriptor a, - typename Graph::vertex_descriptor b) const { - assert(a == b || (*g)[a].index != (*g)[b].index); - return (*g)[a].index < (*g)[b].index; - } -private: - const Graph *g; -}; - -template -static -VertexIndexOrdering make_index_ordering(const Graph &g) { - return VertexIndexOrdering(g); -} - bool onlyOneTop(const NGHolder &g); -/** Return a mask of the tops on the given graph. */ +/** Return the set of the tops on the given graph. */ flat_set getTops(const NGHolder &h); +/** Initialise the tops on h to the provide top. Assumes that h is triggered and + * no tops have been set on h. */ +void setTops(NGHolder &h, u32 top = DEFAULT_TOP); + /** adds a vertex to g with all the same vertex properties as \p v (aside from * index) */ NFAVertex clone_vertex(NGHolder &g, NFAVertex v); @@ -296,6 +271,10 @@ void clearReports(NGHolder &g); * r_old. */ void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new); +/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to + * accepts. */ +void reverseHolder(const NGHolder &g, NGHolder &out); + #ifndef NDEBUG // Assertions: only available in internal builds. @@ -308,17 +287,11 @@ void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new); bool allMatchStatesHaveReports(const NGHolder &g); /** - * Assertion: returns true if the vertices in this graph are contiguously (and - * uniquely) numbered from zero. + * Assertion: returns true if the graph is triggered and all edges out of start + * have tops OR if the graph is not-triggered and all edges out of start have no + * tops. */ -bool hasCorrectlyNumberedVertices(const NGHolder &g); - -/** - * Assertion: returns true if the edges in this graph are contiguously (and - * uniquely) numbered from zero. - */ -bool hasCorrectlyNumberedEdges(const NGHolder &g); - +bool isCorrectlyTopped(const NGHolder &g); #endif // NDEBUG } // namespace ue2 diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 94e0a998..985246f0 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -67,8 +67,6 @@ #include #include #include -#include -#include #include #define STAGE_DEBUG_PRINTF DEBUG_PRINTF @@ -466,7 +464,7 @@ void getRegionRoseLiterals(const NGHolder &g, bool seeking_anchored, DEBUG_PRINTF("inspecting region %u\n", region); set s; for (auto v : vv) { - DEBUG_PRINTF(" exit vertex: %u\n", g[v].index); + DEBUG_PRINTF(" exit vertex: %zu\n", g[v].index); /* Note: RHS can not be depended on to take all subsequent revisits * to this vertex */ set ss = getLiteralSet(g, v, false); @@ -671,7 +669,7 @@ unique_ptr findBestSplit(const NGHolder &g, lits.pop_back(); } - DEBUG_PRINTF("best is '%s' %u a%d t%d\n", + DEBUG_PRINTF("best is '%s' %zu a%d t%d\n", dumpString(*best->lit.begin()).c_str(), g[best->vv.front()].index, depths ? (int)createsAnchoredLHS(g, best->vv, *depths, cc.grey) : 0, @@ -779,7 +777,7 @@ set poisonVertices(const NGHolder &h, const RoseInGraph &vg, set bad_vertices; for (const NFAEdge &e : bad_edges) { bad_vertices.insert(target(e, h)); - DEBUG_PRINTF("bad: %u->%u\n", h[source(e, h)].index, + DEBUG_PRINTF("bad: %zu->%zu\n", h[source(e, h)].index, h[target(e, h)].index); } @@ -1076,8 +1074,10 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, assert(hasCorrectlyNumberedVertices(*rhs)); assert(hasCorrectlyNumberedEdges(*rhs)); + assert(isCorrectlyTopped(*rhs)); assert(hasCorrectlyNumberedVertices(*lhs)); assert(hasCorrectlyNumberedEdges(*lhs)); + assert(isCorrectlyTopped(*lhs)); return true; } @@ -1144,7 +1144,7 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, NFAVertex prev_v = source(e, h); NFAVertex pivot = target(e, h); - DEBUG_PRINTF("splitting on pivot %u\n", h[pivot].index); + DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); ue2::unordered_map temp_map; shared_ptr new_lhs = make_shared(); splitLHS(h, pivot, new_lhs.get(), &temp_map); @@ -1152,7 +1152,11 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, /* want to cut off paths to pivot from things other than the pivot - * makes a more svelte graphy */ clear_in_edges(temp_map[pivot], *new_lhs); - add_edge(temp_map[prev_v], temp_map[pivot], *new_lhs); + NFAEdge pivot_edge = add_edge(temp_map[prev_v], temp_map[pivot], + *new_lhs); + if (is_triggered(h) && prev_v == h.start) { + (*new_lhs)[pivot_edge].tops.insert(DEFAULT_TOP); + } pruneUseless(*new_lhs, false); renumber_vertices(*new_lhs); @@ -1162,6 +1166,7 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, assert(hasCorrectlyNumberedVertices(*new_lhs)); assert(hasCorrectlyNumberedEdges(*new_lhs)); + assert(isCorrectlyTopped(*new_lhs)); const set &lits = cut_lits.at(e); for (const auto &lit : lits) { @@ -1228,6 +1233,7 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, DEBUG_PRINTF(" into rhs %s\n", to_string(new_rhs->kind).c_str()); done_rhs.emplace(adj, new_rhs); + assert(isCorrectlyTopped(*new_rhs)); } assert(done_rhs[adj].get()); @@ -1235,6 +1241,7 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, assert(hasCorrectlyNumberedVertices(*new_rhs)); assert(hasCorrectlyNumberedEdges(*new_rhs)); + assert(isCorrectlyTopped(*new_rhs)); if (vg[dest].type == RIV_LITERAL && !can_match(*new_rhs, vg[dest].s, true)) { @@ -1317,7 +1324,7 @@ bool deanchorIfNeeded(NGHolder &g) { succ_g.erase(g.startDs); for (auto v : adjacent_vertices_range(g.start, g)) { - DEBUG_PRINTF("inspecting cand %u || = %zu\n", g[v].index, + DEBUG_PRINTF("inspecting cand %zu || = %zu\n", g[v].index, g[v].char_reach.count()); if (v == g.startDs || !g[v].char_reach.all()) { @@ -1380,6 +1387,7 @@ void avoidOutfixes(RoseInGraph &vg, const CompileContext &cc) { RoseInEdge e = *edges(vg).first; NGHolder &h = *vg[e].graph; + assert(isCorrectlyTopped(h)); renumber_vertices(h); renumber_edges(h); @@ -1602,6 +1610,7 @@ void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, continue; } + assert(isCorrectlyTopped(*h_new)); graphs[right] = make_pair(h_new, delay); } @@ -1720,6 +1729,8 @@ unique_ptr make_chain(u32 count) { h[u].reports.insert(0); add_edge(u, h.accept, h); + setTops(h); + return rv; } @@ -1777,6 +1788,7 @@ bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg, assert(willBeTransient(findMaxWidth(*h_new), cc) || willBeAnchoredTable(findMaxWidth(*h_new), cc.grey)); + assert(isCorrectlyTopped(*h_new)); graphs[v] = h_new; } @@ -1811,6 +1823,7 @@ bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, const CompileContext &cc) { DEBUG_PRINTF("trying to improve prefix %p, %zu verts\n", &h, num_vertices(h)); + assert(isCorrectlyTopped(h)); renumber_vertices(h); renumber_edges(h); @@ -1860,6 +1873,7 @@ bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, for (const auto &e : ee) { shared_ptr hh = cloneHolder(h); auto succ_lit = vg[target(e, vg)].s; + assert(isCorrectlyTopped(*hh)); u32 delay = removeTrailingLiteralStates(*hh, succ_lit, succ_lit.length(), false /* can't overhang start */); @@ -1868,6 +1882,7 @@ bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, continue; } + assert(isCorrectlyTopped(*hh)); trimmed[hh].emplace_back(e, delay); } @@ -2110,10 +2125,15 @@ void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg, add_edge(lhs->accept, lhs->acceptEod, *lhs); clearReports(*lhs); for (NFAVertex v : splitters) { - add_edge(v_map[v], lhs->accept, *lhs); + NFAEdge e = add_edge(v_map[v], lhs->accept, *lhs); + if (v == base_graph.start) { + (*lhs)[e].tops.insert(DEFAULT_TOP); + } (*lhs)[v_map[v]].reports.insert(0); + } pruneUseless(*lhs); + assert(isCorrectlyTopped(*lhs)); /* create literal vertices and connect preds */ for (const auto &lit : split.lit) { @@ -2319,7 +2339,7 @@ bool leadingDotStartLiteral(const NGHolder &h, VertLitInfo *out) { make_nocase(&lit); } - DEBUG_PRINTF("%u found %s\n", h[v].index, dumpString(lit).c_str()); + DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str()); out->vv = {v}; out->lit = {lit}; return true; @@ -2448,7 +2468,7 @@ bool trailingDotStarLiteral(const NGHolder &h, VertLitInfo *out) { } ue2_literal lit = reverse_literal(rv.second); - DEBUG_PRINTF("%u found %s\n", h[v].index, dumpString(lit).c_str()); + DEBUG_PRINTF("%zu found %s\n", h[v].index, dumpString(lit).c_str()); if (bad_mixed_sensitivity(lit)) { make_nocase(&lit); @@ -2652,6 +2672,7 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, pruneUseless(vg); dumpPreRoseGraph(vg, cc.grey); + renumber_vertices(vg); calcVertexOffsets(vg); bool rv = rose.addRose(vg, prefilter); DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail"); diff --git a/src/nfagraph/ng_width.cpp b/src/nfagraph/ng_width.cpp index 470f9343..d596b7b5 100644 --- a/src/nfagraph/ng_width.cpp +++ b/src/nfagraph/ng_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,18 +58,18 @@ namespace { struct SpecialEdgeFilter { SpecialEdgeFilter() {} explicit SpecialEdgeFilter(const NGHolder &h_in) : h(&h_in) {} - explicit SpecialEdgeFilter(const NGHolder &h_in, u32 top_in) + SpecialEdgeFilter(const NGHolder &h_in, u32 top_in) : h(&h_in), single_top(true), top(top_in) {} bool operator()(const NFAEdge &e) const { - const NFAGraph &g = h->g; - NFAVertex u = source(e, g), v = target(e, g); - if ((is_any_start(u, g) && is_any_start(v, g)) || - (is_any_accept(u, g) && is_any_accept(v, g))) { + NFAVertex u = source(e, *h); + NFAVertex v = target(e, *h); + if ((is_any_start(u, *h) && is_any_start(v, *h)) || + (is_any_accept(u, *h) && is_any_accept(v, *h))) { return false; } if (single_top) { - if (u == h->start && g[e].top != top) { + if (u == h->start && !contains((*h)[e].tops, top)) { return false; } if (u == h->startDs) { @@ -94,7 +94,7 @@ depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter, return depth::unreachable(); } - boost::filtered_graph g(h.g, filter); + boost::filtered_graph g(h, filter); assert(hasCorrectlyNumberedVertices(h)); const size_t num = num_vertices(h); @@ -106,11 +106,10 @@ depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter, // Since we are interested in the single-source shortest paths on a graph // with the same weight on every edge, using BFS will be faster than // Dijkstra here. - breadth_first_search( - g, src, + breadth_first_search(g, src, visitor(make_bfs_visitor(record_distances( make_iterator_property_map(distance.begin(), index_map), - boost::on_tree_edge()))).vertex_index_map(index_map)); + boost::on_tree_edge())))); DEBUG_PRINTF("d[accept]=%s, d[acceptEod]=%s\n", distance.at(NODE_ACCEPT).str().c_str(), @@ -130,7 +129,7 @@ depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter, static depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, NFAVertex src) { - if (isLeafNode(src, h.g)) { + if (isLeafNode(src, h)) { return depth::unreachable(); } @@ -139,7 +138,7 @@ depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, return depth::infinity(); } - boost::filtered_graph g(h.g, filter); + boost::filtered_graph g(h, filter); assert(hasCorrectlyNumberedVertices(h)); const size_t num = num_vertices(h); @@ -149,11 +148,9 @@ depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, auto index_map = get(&NFAGraphVertexProps::index, g); // DAG shortest paths with negative edge weights. - dag_shortest_paths( - g, src, + dag_shortest_paths(g, src, distance_map(make_iterator_property_map(distance.begin(), index_map)) .weight_map(boost::make_constant_property(-1)) - .vertex_index_map(index_map) .color_map(make_iterator_property_map(colors.begin(), index_map))); depth acceptDepth, acceptEodDepth; diff --git a/src/parser/prefilter.cpp b/src/parser/prefilter.cpp index ea58a134..f69362e4 100644 --- a/src/parser/prefilter.cpp +++ b/src/parser/prefilter.cpp @@ -295,6 +295,16 @@ public: Component *visit(ComponentWordBoundary *c) override { assert(c); + + // TODO: Right now, we do not have correct code for resolving these + // when prefiltering is on, UCP is on, and UTF-8 is *off*. For now, we + // just replace with an empty sequence (as that will return a superset + // of matches). + if (mode.ucp && !mode.utf8) { + return new ComponentSequence(); + } + + // All other cases can be prefiltered. c->setPrefilter(true); return c; } diff --git a/src/rose/catchup.c b/src/rose/catchup.c index 017a6bf0..82537241 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -401,7 +401,7 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, scratch->tctxt.mpv_inactive = 0; /* we know it is going to be an mpv, skip the indirection */ - next_pos_match_loc = nfaExecMpv0_QueueExecRaw(q->nfa, q, loc); + next_pos_match_loc = nfaExecMpv_QueueExecRaw(q->nfa, q, loc); assert(!q->report_current); if (!next_pos_match_loc) { /* 0 means dead */ @@ -441,7 +441,7 @@ char in_mpv(const struct RoseEngine *rose, const struct hs_scratch *scratch) { const struct RoseContext *tctxt = &scratch->tctxt; assert(tctxt->curr_qi < rose->queueCount); if (tctxt->curr_qi < rose->outfixBeginQueue) { - assert(getNfaByQueue(rose, tctxt->curr_qi)->type == MPV_NFA_0); + assert(getNfaByQueue(rose, tctxt->curr_qi)->type == MPV_NFA); return 1; } return 0; diff --git a/src/rose/init.c b/src/rose/init.c index 511eafe4..025ecca0 100644 --- a/src/rose/init.c +++ b/src/rose/init.c @@ -85,9 +85,4 @@ void roseInitState(const struct RoseEngine *t, char *state) { init_state(t, state); init_outfixes(t, state); - - // Clear the floating matcher state, if any. - DEBUG_PRINTF("clearing %u bytes of floating matcher state\n", - t->floatingStreamState); - memset(getFloatingMatcherState(t, state), 0, t->floatingStreamState); } diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 8bf41715..5b2c829f 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -45,11 +45,13 @@ #include "rose_program.h" #include "rose_types.h" #include "validate_mask.h" +#include "validate_shufti.h" #include "runtime.h" #include "scratch.h" #include "ue2common.h" #include "hwlm/hwlm.h" // for hwlmcb_rv_t #include "util/compare.h" +#include "util/copybytes.h" #include "util/fatbit.h" #include "util/multibit.h" @@ -70,73 +72,6 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, /* Inline implementation follows. */ -static rose_inline -int roseCheckBenefits(const struct core_info *ci, u64a end, u32 mask_rewind, - const u8 *and_mask, const u8 *exp_mask) { - const u8 *data; - - // If the check works over part of the history and part of the buffer, we - // create a temporary copy of the data in here so it's contiguous. - u8 temp[MAX_MASK2_WIDTH]; - - s64a buffer_offset = (s64a)end - ci->buf_offset; - DEBUG_PRINTF("rel offset %lld\n", buffer_offset); - if (buffer_offset >= mask_rewind) { - data = ci->buf + buffer_offset - mask_rewind; - DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data, - ci->buf, mask_rewind); - } else if (buffer_offset <= 0) { - data = ci->hbuf + ci->hlen + buffer_offset - mask_rewind; - DEBUG_PRINTF("all in one case data=%p buf=%p rewind=%u\n", data, - ci->buf, mask_rewind); - } else { - u32 shortfall = mask_rewind - buffer_offset; - DEBUG_PRINTF("shortfall of %u, rewind %u hlen %zu\n", shortfall, - mask_rewind, ci->hlen); - data = temp; - memcpy(temp, ci->hbuf + ci->hlen - shortfall, shortfall); - memcpy(temp + shortfall, ci->buf, mask_rewind - shortfall); - } - -#ifdef DEBUG - DEBUG_PRINTF("DATA: "); - for (u32 i = 0; i < mask_rewind; i++) { - printf("%c", ourisprint(data[i]) ? data[i] : '?'); - } - printf(" (len=%u)\n", mask_rewind); -#endif - - u32 len = mask_rewind; - while (len >= sizeof(u64a)) { - u64a a = unaligned_load_u64a(data); - a &= *(const u64a *)and_mask; - if (a != *(const u64a *)exp_mask) { - DEBUG_PRINTF("argh %016llx %016llx\n", a, *(const u64a *)exp_mask); - return 0; - } - data += sizeof(u64a); - and_mask += sizeof(u64a); - exp_mask += sizeof(u64a); - len -= sizeof(u64a); - } - - while (len) { - u8 a = *data; - a &= *and_mask; - if (a != *exp_mask) { - DEBUG_PRINTF("argh d%02hhx =%02hhx am%02hhx em%02hhx\n", a, - *data, *and_mask, *exp_mask); - return 0; - } - data++; - and_mask++; - exp_mask++; - len--; - } - - return 1; -} - static rose_inline void rosePushDelayedMatch(const struct RoseEngine *t, struct hs_scratch *scratch, u32 delay, @@ -783,6 +718,347 @@ int roseCheckMask(const struct core_info *ci, u64a and_mask, u64a cmp_mask, return 0; } } + +static rose_inline +int roseCheckMask32(const struct core_info *ci, const u8 *and_mask, + const u8 *cmp_mask, const u32 neg_mask, + s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + m256 data = zeroes256(); // consists of the following four parts. + s32 c_shift = 0; // blank bytes after current. + s32 h_shift = 0; // blank bytes before history. + s32 h_len = 32; // number of bytes from history buffer. + s32 c_len = 0; // number of bytes from current buffer. + /* h_shift + h_len + c_len + c_shift = 32 need to be hold.*/ + + if (offset < 0) { + s32 h_offset = 0; // the start offset in history buffer. + if (offset < -(s64a)ci->hlen) { + if (offset + 32 <= -(s64a)ci->hlen) { + DEBUG_PRINTF("all before history\n"); + return 1; + } + h_shift = -(offset + (s64a)ci->hlen); + h_len = 32 - h_shift; + } else { + h_offset = ci->hlen + offset; + } + if (offset + 32 > 0) { + // part in current buffer. + c_len = offset + 32; + h_len = -(offset + h_shift); + if (c_len > (s64a)ci->len) { + // out of current buffer. + c_shift = c_len - ci->len; + c_len = ci->len; + } + copy_upto_32_bytes((u8 *)&data - offset, ci->buf, c_len); + } + assert(h_shift + h_len + c_len + c_shift == 32); + copy_upto_32_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len); + } else { + if (offset + 32 > (s64a)ci->len) { + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("all in the future.\n"); + return 1; + } + c_len = ci->len - offset; + c_shift = 32 - c_len; + copy_upto_32_bytes((u8 *)&data, ci->buf + offset, c_len); + } else { + data = loadu256(ci->buf + offset); + } + } + DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift); + DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len); + // we use valid_data_mask to blind bytes before history/in the future. + u32 valid_data_mask; + valid_data_mask = (~0u) << (h_shift + c_shift) >> (c_shift); + + m256 and_mask_m256 = loadu256(and_mask); + m256 cmp_mask_m256 = loadu256(cmp_mask); + if (validateMask32(data, valid_data_mask, and_mask_m256, + cmp_mask_m256, neg_mask)) { + DEBUG_PRINTF("Mask32 passed\n"); + return 1; + } + return 0; +} + +// get 128/256 bits data from history and current buffer. +// return data and valid_data_mask. +static rose_inline +u32 getBufferDataComplex(const struct core_info *ci, const s64a loc, + u8 *data, const u32 data_len) { + assert(data_len == 16 || data_len == 32); + s32 c_shift = 0; // blank bytes after current. + s32 h_shift = 0; // blank bytes before history. + s32 h_len = data_len; // number of bytes from history buffer. + s32 c_len = 0; // number of bytes from current buffer. + if (loc < 0) { + s32 h_offset = 0; // the start offset in history buffer. + if (loc < -(s64a)ci->hlen) { + if (loc + data_len <= -(s64a)ci->hlen) { + DEBUG_PRINTF("all before history\n"); + return 0; + } + h_shift = -(loc + (s64a)ci->hlen); + h_len = data_len - h_shift; + } else { + h_offset = ci->hlen + loc; + } + if (loc + data_len > 0) { + // part in current buffer. + c_len = loc + data_len; + h_len = -(loc + h_shift); + if (c_len > (s64a)ci->len) { + // out of current buffer. + c_shift = c_len - ci->len; + c_len = ci->len; + } + copy_upto_32_bytes(data - loc, ci->buf, c_len); + } + assert(h_shift + h_len + c_len + c_shift == (s32)data_len); + copy_upto_32_bytes(data + h_shift, ci->hbuf + h_offset, h_len); + } else { + if (loc + data_len > (s64a)ci->len) { + if (loc >= (s64a)ci->len) { + DEBUG_PRINTF("all in the future.\n"); + return 0; + } + c_len = ci->len - loc; + c_shift = data_len - c_len; + copy_upto_32_bytes(data, ci->buf + loc, c_len); + } else { + if (data_len == 16) { + storeu128(data, loadu128(ci->buf + loc)); + return 0xffff; + } else { + storeu256(data, loadu256(ci->buf + loc)); + return 0xffffffff; + } + } + } + DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift); + DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len); + + if (data_len == 16) { + return (u16)(0xffff << (h_shift + c_shift)) >> c_shift; + } else { + return (~0u) << (h_shift + c_shift) >> c_shift; + } +} + +static rose_inline +m128 getData128(const struct core_info *ci, s64a offset, u16 *valid_data_mask) { + if (offset > 0 && offset + sizeof(m128) <= ci->len) { + *valid_data_mask = 0xffff; + return loadu128(ci->buf + offset); + } + ALIGN_DIRECTIVE u8 data[sizeof(m128)]; + *valid_data_mask = (u16)getBufferDataComplex(ci, offset, data, 16); + return *(m128 *)data; +} + +static rose_inline +m256 getData256(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { + if (offset > 0 && offset + sizeof(m256) <= ci->len) { + *valid_data_mask = ~0u; + return loadu256(ci->buf + offset); + } + ALIGN_AVX_DIRECTIVE u8 data[sizeof(m256)]; + *valid_data_mask = getBufferDataComplex(ci, offset, data, 32); + return *(m256 *)data; +} + +static rose_inline +int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask, + const u8 *bucket_select_mask, u32 neg_mask, + s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u16 valid_data_mask = 0; + m128 data = getData128(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m256 nib_mask_m256 = loadu256(nib_mask); + m128 bucket_select_mask_m128 = loadu128(bucket_select_mask); + if (validateShuftiMask16x8(data, nib_mask_m256, + bucket_select_mask_m128, + neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 16x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckShufti16x16(const struct core_info *ci, const u8 *hi_mask, + const u8 *lo_mask, const u8 *bucket_select_mask, + u32 neg_mask, s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u16 valid_data_mask = 0; + m128 data = getData128(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m256 data_m256 = set2x128(data); + m256 hi_mask_m256 = loadu256(hi_mask); + m256 lo_mask_m256 = loadu256(lo_mask); + m256 bucket_select_mask_m256 = loadu256(bucket_select_mask); + if (validateShuftiMask16x16(data_m256, hi_mask_m256, lo_mask_m256, + bucket_select_mask_m256, + neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 16x16 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckShufti32x8(const struct core_info *ci, const u8 *hi_mask, + const u8 *lo_mask, const u8 *bucket_select_mask, + u32 neg_mask, s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 valid_data_mask = 0; + m256 data = getData256(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m128 hi_mask_m128 = loadu128(hi_mask); + m128 lo_mask_m128 = loadu128(lo_mask); + m256 hi_mask_m256 = set2x128(hi_mask_m128); + m256 lo_mask_m256 = set2x128(lo_mask_m128); + m256 bucket_select_mask_m256 = loadu256(bucket_select_mask); + if (validateShuftiMask32x8(data, hi_mask_m256, lo_mask_m256, + bucket_select_mask_m256, + neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 32x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask, + const u8 *lo_mask, const u8 *bucket_select_mask_hi, + const u8 *bucket_select_mask_lo, u32 neg_mask, + s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 valid_data_mask = 0; + m256 data = getData256(ci, offset, &valid_data_mask); + if (unlikely(!valid_data_mask)) { + return 1; + } + + m256 hi_mask_1 = loadu2x128(hi_mask); + m256 hi_mask_2 = loadu2x128(hi_mask + 16); + m256 lo_mask_1 = loadu2x128(lo_mask); + m256 lo_mask_2 = loadu2x128(lo_mask + 16); + + m256 bucket_mask_hi = loadu256(bucket_select_mask_hi); + m256 bucket_mask_lo = loadu256(bucket_select_mask_lo); + if (validateShuftiMask32x16(data, hi_mask_1, hi_mask_2, + lo_mask_1, lo_mask_2, bucket_mask_hi, + bucket_mask_lo, neg_mask, valid_data_mask)) { + DEBUG_PRINTF("check shufti 32x16 successfully\n"); + return 1; + } else { + return 0; + } +} + +static rose_inline +int roseCheckSingleLookaround(const struct RoseEngine *t, + const struct hs_scratch *scratch, + s8 checkOffset, u32 lookaroundIndex, u64a end) { + assert(lookaroundIndex != MO_INVALID_IDX); + const struct core_info *ci = &scratch->core_info; + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + + const s64a base_offset = end - ci->buf_offset; + const s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("base_offset=%lld\n", base_offset); + DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + const u8 *reach_base = (const u8 *)t + t->lookaroundReachOffset; + const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; + + u8 c; + if (offset >= 0 && offset < (s64a)ci->len) { + c = ci->buf[offset]; + } else if (offset < 0 && offset >= -(s64a)ci->hlen) { + c = ci->hbuf[ci->hlen + offset]; + } else { + return 1; + } + + if (!reachHasBit(reach, c)) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + + DEBUG_PRINTF("OK :)\n"); + return 1; +} + /** * \brief Scan around a literal, checking that that "lookaround" reach masks * are satisfied. @@ -1055,6 +1331,78 @@ hwlmcb_rv_t roseMatcherEod(const struct RoseEngine *rose, return HWLM_CONTINUE_MATCHING; } +static rose_inline +int roseCheckLongLiteral(const struct RoseEngine *t, + const struct hs_scratch *scratch, u64a end, + u32 lit_offset, u32 lit_length, char nocase) { + const struct core_info *ci = &scratch->core_info; + const u8 *lit = getByOffset(t, lit_offset); + + DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); + DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); + + if (end < lit_length) { + DEBUG_PRINTF("too short!\n"); + return 0; + } + + // If any portion of the literal matched in the current buffer, check it. + if (end > ci->buf_offset) { + u32 scan_len = MIN(end - ci->buf_offset, lit_length); + u64a scan_start = end - ci->buf_offset - scan_len; + DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, + scan_start, end); + if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, + scan_len, nocase)) { + DEBUG_PRINTF("cmp of suffix failed\n"); + return 0; + } + } + + // If the entirety of the literal was in the current block, we are done. + if (end - lit_length >= ci->buf_offset) { + DEBUG_PRINTF("literal confirmed in current block\n"); + return 1; + } + + // We still have a prefix which we must test against the buffer prepared by + // the long literal table. This is only done in streaming mode. + + assert(t->mode != HS_MODE_BLOCK); + + const u8 *ll_buf; + size_t ll_len; + if (nocase) { + ll_buf = scratch->tctxt.ll_buf_nocase; + ll_len = scratch->tctxt.ll_len_nocase; + } else { + ll_buf = scratch->tctxt.ll_buf; + ll_len = scratch->tctxt.ll_len; + } + + assert(ll_buf); + + u64a lit_start_offset = end - lit_length; + u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); + u32 hist_rewind = ci->buf_offset - lit_start_offset; + DEBUG_PRINTF("ll_len=%zu, hist_rewind=%u\n", ll_len, hist_rewind); + if (hist_rewind > ll_len) { + DEBUG_PRINTF("not enough history\n"); + return 0; + } + + DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", + prefix_len, ll_len, hist_rewind); + assert(hist_rewind <= ll_len); + if (cmpForward(ll_buf + ll_len - hist_rewind, lit, prefix_len, nocase)) { + DEBUG_PRINTF("cmp of prefix failed\n"); + return 0; + } + + DEBUG_PRINTF("cmp succeeded\n"); + return 1; +} + static void updateSeqPoint(struct RoseContext *tctxt, u64a offset, const char from_mpv) { @@ -1080,7 +1428,7 @@ void updateSeqPoint(struct RoseContext *tctxt, u64a offset, static rose_inline hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, - u64a som, u64a end, size_t match_len, + u64a som, u64a end, UNUSED size_t match_len, u8 prog_flags) { DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, som, end, prog_flags); @@ -1113,9 +1461,15 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, assert(pc >= pc_base); assert((size_t)(pc - pc_base) < t->size); const u8 code = *(const u8 *)pc; - assert(code <= ROSE_INSTR_END); + assert(code <= LAST_ROSE_INSTRUCTION); switch ((enum RoseInstructionCode)code) { + PROGRAM_CASE(END) { + DEBUG_PRINTF("finished\n"); + return HWLM_CONTINUE_MATCHING; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ANCHORED_DELAY) { if (in_anchored && end > t->floatingMinLiteralMatchOffset) { DEBUG_PRINTF("delay until playback\n"); @@ -1128,17 +1482,6 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_LIT_MASK) { - assert(match_len); - struct core_info *ci = &scratch->core_info; - if (!roseCheckBenefits(ci, end, match_len, ri->and_mask.a8, - ri->cmp_mask.a8)) { - DEBUG_PRINTF("halt: failed mask check\n"); - return HWLM_CONTINUE_MATCHING; - } - } - PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_LIT_EARLY) { if (end < ri->min_offset) { DEBUG_PRINTF("halt: before min_offset=%u\n", @@ -1190,6 +1533,17 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) { + if (!roseCheckSingleLookaround(t, scratch, ri->offset, + ri->reach_index, end)) { + DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_LOOKAROUND) { if (!roseCheckLookaround(t, scratch, ri->index, ri->count, end)) { @@ -1213,6 +1567,17 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_MASK_32) { + struct core_info *ci = &scratch->core_info; + if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_BYTE) { const struct core_info *ci = &scratch->core_info; if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask, @@ -1225,6 +1590,55 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_SHUFTI_16x8) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti16x8(ci, ri->nib_mask, + ri->bucket_select_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x8) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti32x8(ci, ri->hi_mask, ri->lo_mask, + ri->bucket_select_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_16x16) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti16x16(ci, ri->hi_mask, ri->lo_mask, + ri->bucket_select_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x16) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckShufti32x16(ci, ri->hi_mask, ri->lo_mask, + ri->bucket_select_mask_hi, + ri->bucket_select_mask_lo, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri-> fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_INFIX) { if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report, end)) { @@ -1590,6 +2004,28 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SPARSE_ITER_ANY) { + DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); + const struct mmbit_sparse_iter *it = + getByOffset(t, ri->iter_offset); + assert(ISALIGNED(it)); + + const u8 *roles = getRoleState(scratch->core_info.state); + + u32 idx = 0; + u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount, + &idx, it, si_state); + if (i == MMB_INVALID) { + DEBUG_PRINTF("no states in sparse iter are on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + DEBUG_PRINTF("state %u (idx=%u) is on\n", i, idx); + fatbit_clear(scratch->handled_roles); + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ENGINES_EOD) { if (roseEnginesEod(t, scratch, end, ri->iter_offset) == HWLM_TERMINATE_MATCHING) { @@ -1614,9 +2050,23 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(END) { - DEBUG_PRINTF("finished\n"); - return HWLM_CONTINUE_MATCHING; + PROGRAM_CASE(CHECK_LONG_LIT) { + const char nocase = 0; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("halt: failed long lit check\n"); + return HWLM_CONTINUE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("halt: failed nocase long lit check\n"); + return HWLM_CONTINUE_MATCHING; + } } PROGRAM_NEXT_INSTRUCTION } diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 0f0e8d18..8b10bc7d 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -112,11 +112,10 @@ RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset, RoseGraph &g = build->g; // add to tree RoseVertex v = add_vertex(g); - g[v].idx = build->vertexIndex++; g[v].min_offset = min_offset; g[v].max_offset = max_offset; - DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n", g[v].idx, + DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n", g[v].index, literalId); g[v].literals.insert(literalId); build->literal_info[literalId].vertices.insert(v); @@ -137,10 +136,7 @@ RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent, /* fill in report information */ g[v].reports.insert(reports.begin(), reports.end()); - RoseEdge e; - bool added; - tie(e, added) = add_edge(parent, v, g); - assert(added); + RoseEdge e = add_edge(parent, v, g); DEBUG_PRINTF("adding edge (%u, %u) to parent\n", minBound, maxBound); g[e].minBound = minBound; @@ -167,10 +163,10 @@ RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId, RoseGraph &g = build->g; RoseVertex v = createVertex(build, literalId, min_offset, max_offset); - DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n", g[v].idx, + DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n", g[v].index, literalId); - RoseEdge e = add_edge(build->anchored_root, v, g).first; + RoseEdge e = add_edge(build->anchored_root, v, g); g[e].minBound = min_offset; g[e].maxBound = max_offset; @@ -181,8 +177,7 @@ static RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) { RoseGraph &g = build->g; RoseVertex w = add_vertex(g[v], g); - g[w].idx = build->vertexIndex++; - DEBUG_PRINTF("added vertex %zu\n", g[w].idx); + DEBUG_PRINTF("added vertex %zu\n", g[w].index); for (auto lit_id : g[w].literals) { build->literal_info[lit_id].vertices.insert(w); @@ -191,7 +186,7 @@ RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) { for (const auto &e : in_edges_range(v, g)) { RoseVertex s = source(e, g); add_edge(s, w, g[e], g); - DEBUG_PRINTF("added edge (%zu,%zu)\n", g[s].idx, g[w].idx); + DEBUG_PRINTF("added edge (%zu,%zu)\n", g[s].index, g[w].index); } return w; @@ -227,7 +222,7 @@ RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd, const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF); DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n", - g[u].idx, g[v].idx, g[e].minBound, g[e].maxBound, + g[u].index, g[v].index, g[e].minBound, g[e].maxBound, (int)g[u].fixedOffset(), (int)g[v].left); if (g[v].left) { @@ -309,7 +304,7 @@ void createVertices(RoseBuildImpl *tbi, DEBUG_PRINTF("set som_adjust to %u\n", g[w].som_adjust); } - DEBUG_PRINTF(" adding new vertex idx=%zu\n", tbi->g[w].idx); + DEBUG_PRINTF(" adding new vertex index=%zu\n", tbi->g[w].index); vertex_map[iv].push_back(w); } else { w = created[key]; @@ -317,10 +312,7 @@ void createVertices(RoseBuildImpl *tbi, RoseVertex p = pv.first; - RoseEdge e; - bool added; - tie(e, added) = add_edge(p, w, g); - assert(added); + RoseEdge e = add_edge(p, w, g); DEBUG_PRINTF("adding edge (%u,%u) to parent\n", edge_props.minBound, edge_props.maxBound); g[e].minBound = edge_props.minBound; @@ -358,7 +350,7 @@ void createVertices(RoseBuildImpl *tbi, for (const auto &pv : parents) { const RoseInEdgeProps &edge_props = bd.ig[pv.second]; - RoseEdge e = add_edge(pv.first, g_v, tbi->g).first; + RoseEdge e = add_edge(pv.first, g_v, tbi->g); g[e].minBound = edge_props.minBound; g[e].maxBound = edge_props.maxBound; g[e].history = selectHistory(*tbi, bd, pv.second, e); @@ -383,7 +375,7 @@ void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { next.clear(); for (auto curr_v : curr) { - DEBUG_PRINTF("handling %u\n", g[curr_v].index); + DEBUG_PRINTF("handling %zu\n", g[curr_v].index); vector next_cand; insert(&next_cand, next_cand.end(), inv_adjacent_vertices(curr_v, g)); @@ -401,7 +393,7 @@ void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { const CharReach &cr = g[v].char_reach; if (!overlaps(*it, cr)) { - DEBUG_PRINTF("false edge %u\n", g[v].index); + DEBUG_PRINTF("false edge %zu\n", g[v].index); continue; } @@ -409,7 +401,7 @@ void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { clone_in_edges(g, v, v2); add_edge(v2, curr_v, g); g[v2].char_reach &= *it; - DEBUG_PRINTF("next <- %u\n", g[v2].index); + DEBUG_PRINTF("next <- %zu\n", g[v2].index); next.insert(v2); } } @@ -557,7 +549,7 @@ void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector &msk, next.clear(); CharReach cr; for (auto v : curr) { - DEBUG_PRINTF("vertex %u, reach %s\n", h[v].index, + DEBUG_PRINTF("vertex %zu, reach %s\n", h[v].index, describeClass(h[v].char_reach).c_str()); cr |= h[v].char_reach; insert(&next, inv_adjacent_vertices(v, h)); @@ -705,14 +697,13 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, for (const auto &report_mapping : report_remap) { RoseVertex v = add_vertex(g); - g[v].idx = build.vertexIndex++; g[v].literals.insert(eod_event); build.literal_info[eod_event].vertices.insert(v); g[v].left.graph = eod_leftfix; g[v].left.leftfix_report = report_mapping.second; g[v].left.lag = 0; - RoseEdge e1 = add_edge(u, v, g).first; + RoseEdge e1 = add_edge(u, v, g); g[e1].minBound = 0; g[e1].maxBound = ROSE_BOUND_INF; g[v].min_offset = add_rose_depth(g[u].min_offset, @@ -728,16 +719,15 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix RoseVertex w = add_vertex(g); - g[w].idx = build.vertexIndex++; g[w].eod_accept = true; g[w].reports = report_mapping.first; g[w].min_offset = g[v].min_offset; g[w].max_offset = g[v].max_offset; - RoseEdge e = add_edge(v, w, g).first; + RoseEdge e = add_edge(v, w, g); g[e].minBound = 0; g[e].maxBound = 0; g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; - DEBUG_PRINTF("accept eod vertex (idx=%zu)\n", g[w].idx); + DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); } } @@ -769,7 +759,7 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, || (ig[iv].type == RIV_ACCEPT_EOD && out_degree(u, g) && !edge_props.graph) || (!isLeafNode(u, g) && !tbi->isAnyStart(u))) { - DEBUG_PRINTF("duplicating for parent %zu\n", g[u].idx); + DEBUG_PRINTF("duplicating for parent %zu\n", g[u].index); assert(!tbi->isAnyStart(u)); u = duplicate(tbi, u); g[u].suffix.reset(); @@ -780,20 +770,20 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, if (ig[iv].type == RIV_ACCEPT) { assert(!tbi->isAnyStart(u)); if (contains(bd.early_dfas, edge_props.graph.get())) { - DEBUG_PRINTF("adding early dfa suffix to i%zu\n", g[u].idx); + DEBUG_PRINTF("adding early dfa suffix to i%zu\n", g[u].index); g[u].suffix.rdfa = bd.early_dfas.at(edge_props.graph.get()); g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph); g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph); } else if (edge_props.graph) { - DEBUG_PRINTF("adding suffix to i%zu\n", g[u].idx); + DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index); g[u].suffix.graph = edge_props.graph; assert(g[u].suffix.graph->kind == NFA_SUFFIX); /* TODO: set dfa_(min|max)_width */ } else if (edge_props.haig) { - DEBUG_PRINTF("adding suffaig to i%zu\n", g[u].idx); + DEBUG_PRINTF("adding suffaig to i%zu\n", g[u].index); g[u].suffix.haig = edge_props.haig; } else { - DEBUG_PRINTF("adding boring accept to i%zu\n", g[u].idx); + DEBUG_PRINTF("adding boring accept to i%zu\n", g[u].index); assert(!g[u].eod_accept); g[u].reports = ig[iv].reports; } @@ -803,16 +793,15 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, if (!edge_props.graph) { RoseVertex w = add_vertex(g); - g[w].idx = tbi->vertexIndex++; g[w].eod_accept = true; g[w].reports = ig[iv].reports; g[w].min_offset = g[u].min_offset; g[w].max_offset = g[u].max_offset; - RoseEdge e = add_edge(u, w, g).first; + RoseEdge e = add_edge(u, w, g); g[e].minBound = 0; g[e].maxBound = 0; g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; - DEBUG_PRINTF("accept eod vertex (idx=%zu)\n", g[w].idx); + DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); continue; } @@ -824,7 +813,7 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, assert(h.kind == NFA_SUFFIX); assert(!tbi->isAnyStart(u)); /* etable can't/shouldn't use eod event */ - DEBUG_PRINTF("adding suffix to i%zu\n", g[u].idx); + DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index); g[u].suffix.graph = edge_props.graph; continue; } @@ -976,7 +965,7 @@ void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) { || ig[v_order.front()].type == RIV_ANCHORED_START); for (RoseInVertex iv : v_order) { - DEBUG_PRINTF("vertex %p\n", iv); + DEBUG_PRINTF("vertex %zu\n", ig[iv].index); if (ig[iv].type == RIV_START) { DEBUG_PRINTF("is root\n"); @@ -1588,6 +1577,7 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, bool finalChance) { DEBUG_PRINTF("trying to rose\n"); assert(validateKinds(ig)); + assert(hasCorrectlyNumberedVertices(ig)); if (::ue2::empty(ig)) { assert(0); @@ -1603,7 +1593,8 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, transformAnchoredLiteralOverlap(in, bd, cc); transformSuffixDelay(in, cc); - assert(validateKinds(ig)); + renumber_vertices(in); + assert(validateKinds(in)); map > graphs; vector ordered_graphs; // Stored in first-encounter order. @@ -1619,6 +1610,8 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, } NGHolder *h = in[e].graph.get(); + + assert(isCorrectlyTopped(*h)); if (!contains(graphs, h)) { ordered_graphs.push_back(h); } @@ -1760,8 +1753,7 @@ static u32 findMaxBAWidth(const NGHolder &h) { // Must be bi-anchored: no out-edges from startDs (other than its // self-loop), no in-edges to accept. - if (hasGreaterOutDegree(1, h.startDs, h) || - hasGreaterInDegree(0, h.accept, h)) { + if (out_degree(h.startDs, h) > 1 || in_degree(h.accept, h)) { return ROSE_BOUND_INF; } depth d = findMaxWidth(h); @@ -1887,9 +1879,9 @@ bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w, map &allocated_reports, flat_set &added_lit_ids) { const depth max_anchored_depth(tbi.cc.grey.maxAnchoredRegion); - const u32 idx = w[u].index; - assert(idx < vertexDepths.size()); - const DepthMinMax &d = vertexDepths.at(idx); + const size_t index = w[u].index; + assert(index < vertexDepths.size()); + const DepthMinMax &d = vertexDepths.at(index); for (const auto &int_report : w[u].reports) { assert(int_report != MO_INVALID_IDX); @@ -2006,7 +1998,6 @@ bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) { RoseVertex v = createAnchoredVertex(this, lit_id, minBound, maxBound); RoseVertex eod = add_vertex(g); - g[eod].idx = vertexIndex++; g[eod].eod_accept = true; g[eod].reports.insert(report); g[eod].min_offset = g[v].min_offset; diff --git a/src/rose/rose_build_add_mask.cpp b/src/rose/rose_build_add_mask.cpp index 45333a38..de3bdf0a 100644 --- a/src/rose/rose_build_add_mask.cpp +++ b/src/rose/rose_build_add_mask.cpp @@ -532,7 +532,7 @@ void addTransientMask(RoseBuildImpl &build, const vector &mask, g[v].left.leftfix_report = mask_report; } else { // Make sure our edge bounds are correct. - auto e = edge_by_target(parent, v, g).first; + RoseEdge e = edge(parent, v, g); g[e].minBound = 0; g[e].maxBound = anchored ? 0 : ROSE_BOUND_INF; g[e].history = anchored ? ROSE_ROLE_HISTORY_ANCH @@ -544,7 +544,7 @@ void addTransientMask(RoseBuildImpl &build, const vector &mask, g[v].max_offset = v_max_offset; if (eod) { - auto e = add_edge(v, eod_v, g).first; + RoseEdge e = add_edge(v, eod_v, g); g[e].minBound = 0; g[e].maxBound = 0; g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; @@ -574,7 +574,8 @@ unique_ptr buildMaskRhs(const ue2::flat_set &reports, succ = u; } - add_edge(h.start, succ, h); + NFAEdge e = add_edge(h.start, succ, h); + h[e].tops.insert(DEFAULT_TOP); return rhs; } @@ -632,6 +633,7 @@ void doAddMask(RoseBuildImpl &tbi, bool anchored, = buildMaskLhs(true, minBound - prefix2_len + overlap, mask3); mhs->kind = NFA_INFIX; + setTops(*mhs); add_edge(u, v, RoseInEdgeProps(mhs, delay), ig); DEBUG_PRINTF("add anch literal too!\n"); diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 60732ff9..3d0affc6 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -549,7 +549,7 @@ bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound, /* lit should only be connected to dot vertices */ for (auto u : inv_adjacent_vertices_range(lit_head, h)) { - DEBUG_PRINTF("checking %u\n", h[u].index); + DEBUG_PRINTF("checking %zu\n", h[u].index); if (!h[u].char_reach.all()) { return false; } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 56591de8..9f4abcad 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -33,20 +33,26 @@ #include "hs_compile.h" // for HS_MODE_* #include "rose_build_add_internal.h" #include "rose_build_anchored.h" +#include "rose_build_engine_blob.h" #include "rose_build_exclusive.h" #include "rose_build_groups.h" #include "rose_build_infix.h" +#include "rose_build_long_lit.h" #include "rose_build_lookaround.h" #include "rose_build_matchers.h" +#include "rose_build_program.h" #include "rose_build_scatter.h" #include "rose_build_util.h" #include "rose_build_width.h" +#include "rose_internal.h" #include "rose_program.h" #include "hwlm/hwlm.h" /* engine types */ +#include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" #include "nfa/mcclellancompile.h" #include "nfa/mcclellancompile_util.h" +#include "nfa/mcsheng_compile.h" #include "nfa/nfa_api_queue.h" #include "nfa/nfa_build_util.h" #include "nfa/nfa_internal.h" @@ -75,6 +81,7 @@ #include "util/compile_context.h" #include "util/compile_error.h" #include "util/container.h" +#include "util/fatbit_build.h" #include "util/graph_range.h" #include "util/make_unique.h" #include "util/multibit_build.h" @@ -86,6 +93,7 @@ #include "util/verify_types.h" #include +#include #include #include #include @@ -147,218 +155,6 @@ struct left_build_info { vector lookaround; // alternative implementation to the NFA }; -/** - * \brief Possible jump targets for roles that perform checks. - * - * Fixed up into offsets before the program is written to bytecode. - */ -enum class JumpTarget { - NO_JUMP, //!< Instruction does not jump. - PROGRAM_END, //!< Jump to end of program. - NEXT_BLOCK, //!< Jump to start of next block (sparse iter check, etc). - FIXUP_DONE, //!< Target fixup already applied. -}; - -/** \brief Role instruction model used at compile time. */ -class RoseInstruction { -public: - RoseInstruction(enum RoseInstructionCode c, JumpTarget j) : target(j) { - memset(&u, 0, sizeof(u)); - u.end.code = c; - } - - explicit RoseInstruction(enum RoseInstructionCode c) - : RoseInstruction(c, JumpTarget::NO_JUMP) {} - - bool operator<(const RoseInstruction &a) const { - if (code() != a.code()) { - return code() < a.code(); - } - if (target != a.target) { - return target < a.target; - } - return memcmp(&u, &a.u, sizeof(u)) < 0; - } - - bool operator==(const RoseInstruction &a) const { - return code() == a.code() && target == a.target && - memcmp(&u, &a.u, sizeof(u)) == 0; - } - - enum RoseInstructionCode code() const { - // Note that this sort of type-punning (relying on identical initial - // layout) is explicitly allowed by the C++11 standard. - return (enum RoseInstructionCode)u.end.code; - } - - const void *get() const { - switch (code()) { - case ROSE_INSTR_CHECK_LIT_MASK: return &u.checkLitMask; - case ROSE_INSTR_CHECK_LIT_EARLY: return &u.checkLitEarly; - case ROSE_INSTR_CHECK_GROUPS: return &u.checkGroups; - case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; - case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds; - case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; - case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; - case ROSE_INSTR_CHECK_MASK: return &u.checkMask; - case ROSE_INSTR_CHECK_BYTE: return &u.checkByte; - case ROSE_INSTR_CHECK_INFIX: return &u.checkInfix; - case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix; - case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; - case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed; - case ROSE_INSTR_RECORD_ANCHORED: return &u.recordAnchored; - case ROSE_INSTR_CATCH_UP: return &u.catchUp; - case ROSE_INSTR_CATCH_UP_MPV: return &u.catchUpMpv; - case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust; - case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; - case ROSE_INSTR_SOM_FROM_REPORT: return &u.somFromReport; - case ROSE_INSTR_SOM_ZERO: return &u.somZero; - case ROSE_INSTR_TRIGGER_INFIX: return &u.triggerInfix; - case ROSE_INSTR_TRIGGER_SUFFIX: return &u.triggerSuffix; - case ROSE_INSTR_DEDUPE: return &u.dedupe; - case ROSE_INSTR_DEDUPE_SOM: return &u.dedupeSom; - case ROSE_INSTR_REPORT_CHAIN: return &u.reportChain; - case ROSE_INSTR_REPORT_SOM_INT: return &u.reportSomInt; - case ROSE_INSTR_REPORT_SOM_AWARE: return &u.reportSomAware; - case ROSE_INSTR_REPORT: return &u.report; - case ROSE_INSTR_REPORT_EXHAUST: return &u.reportExhaust; - case ROSE_INSTR_REPORT_SOM: return &u.reportSom; - case ROSE_INSTR_REPORT_SOM_EXHAUST: return &u.reportSomExhaust; - case ROSE_INSTR_DEDUPE_AND_REPORT: return &u.dedupeAndReport; - case ROSE_INSTR_FINAL_REPORT: return &u.finalReport; - case ROSE_INSTR_CHECK_EXHAUSTED: return &u.checkExhausted; - case ROSE_INSTR_CHECK_MIN_LENGTH: return &u.checkMinLength; - case ROSE_INSTR_SET_STATE: return &u.setState; - case ROSE_INSTR_SET_GROUPS: return &u.setGroups; - case ROSE_INSTR_SQUASH_GROUPS: return &u.squashGroups; - case ROSE_INSTR_CHECK_STATE: return &u.checkState; - case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin; - case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext; - case ROSE_INSTR_ENGINES_EOD: return &u.enginesEod; - case ROSE_INSTR_SUFFIXES_EOD: return &u.suffixesEod; - case ROSE_INSTR_MATCHER_EOD: return &u.matcherEod; - case ROSE_INSTR_END: return &u.end; - } - assert(0); - return &u.end; - } - - size_t length() const { - switch (code()) { - case ROSE_INSTR_CHECK_LIT_MASK: return sizeof(u.checkLitMask); - case ROSE_INSTR_CHECK_LIT_EARLY: return sizeof(u.checkLitEarly); - case ROSE_INSTR_CHECK_GROUPS: return sizeof(u.checkGroups); - case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); - case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds); - case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); - case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); - case ROSE_INSTR_CHECK_MASK: return sizeof(u.checkMask); - case ROSE_INSTR_CHECK_BYTE: return sizeof(u.checkByte); - case ROSE_INSTR_CHECK_INFIX: return sizeof(u.checkInfix); - case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix); - case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); - case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed); - case ROSE_INSTR_RECORD_ANCHORED: return sizeof(u.recordAnchored); - case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp); - case ROSE_INSTR_CATCH_UP_MPV: return sizeof(u.catchUpMpv); - case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); - case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); - case ROSE_INSTR_SOM_FROM_REPORT: return sizeof(u.somFromReport); - case ROSE_INSTR_SOM_ZERO: return sizeof(u.somZero); - case ROSE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix); - case ROSE_INSTR_TRIGGER_SUFFIX: return sizeof(u.triggerSuffix); - case ROSE_INSTR_DEDUPE: return sizeof(u.dedupe); - case ROSE_INSTR_DEDUPE_SOM: return sizeof(u.dedupeSom); - case ROSE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain); - case ROSE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt); - case ROSE_INSTR_REPORT_SOM_AWARE: return sizeof(u.reportSomAware); - case ROSE_INSTR_REPORT: return sizeof(u.report); - case ROSE_INSTR_REPORT_EXHAUST: return sizeof(u.reportExhaust); - case ROSE_INSTR_REPORT_SOM: return sizeof(u.reportSom); - case ROSE_INSTR_REPORT_SOM_EXHAUST: return sizeof(u.reportSomExhaust); - case ROSE_INSTR_DEDUPE_AND_REPORT: return sizeof(u.dedupeAndReport); - case ROSE_INSTR_FINAL_REPORT: return sizeof(u.finalReport); - case ROSE_INSTR_CHECK_EXHAUSTED: return sizeof(u.checkExhausted); - case ROSE_INSTR_CHECK_MIN_LENGTH: return sizeof(u.checkMinLength); - case ROSE_INSTR_SET_STATE: return sizeof(u.setState); - case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups); - case ROSE_INSTR_SQUASH_GROUPS: return sizeof(u.squashGroups); - case ROSE_INSTR_CHECK_STATE: return sizeof(u.checkState); - case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin); - case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext); - case ROSE_INSTR_ENGINES_EOD: return sizeof(u.enginesEod); - case ROSE_INSTR_SUFFIXES_EOD: return sizeof(u.suffixesEod); - case ROSE_INSTR_MATCHER_EOD: return sizeof(u.matcherEod); - case ROSE_INSTR_END: return sizeof(u.end); - } - assert(0); - return 0; - } - - union { - ROSE_STRUCT_CHECK_LIT_MASK checkLitMask; - ROSE_STRUCT_CHECK_LIT_EARLY checkLitEarly; - ROSE_STRUCT_CHECK_GROUPS checkGroups; - ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; - ROSE_STRUCT_CHECK_BOUNDS checkBounds; - ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; - ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround; - ROSE_STRUCT_CHECK_MASK checkMask; - ROSE_STRUCT_CHECK_BYTE checkByte; - ROSE_STRUCT_CHECK_INFIX checkInfix; - ROSE_STRUCT_CHECK_PREFIX checkPrefix; - ROSE_STRUCT_ANCHORED_DELAY anchoredDelay; - ROSE_STRUCT_PUSH_DELAYED pushDelayed; - ROSE_STRUCT_RECORD_ANCHORED recordAnchored; - ROSE_STRUCT_CATCH_UP catchUp; - ROSE_STRUCT_CATCH_UP_MPV catchUpMpv; - ROSE_STRUCT_SOM_ADJUST somAdjust; - ROSE_STRUCT_SOM_LEFTFIX somLeftfix; - ROSE_STRUCT_SOM_FROM_REPORT somFromReport; - ROSE_STRUCT_SOM_ZERO somZero; - ROSE_STRUCT_TRIGGER_INFIX triggerInfix; - ROSE_STRUCT_TRIGGER_SUFFIX triggerSuffix; - ROSE_STRUCT_DEDUPE dedupe; - ROSE_STRUCT_DEDUPE_SOM dedupeSom; - ROSE_STRUCT_REPORT_CHAIN reportChain; - ROSE_STRUCT_REPORT_SOM_INT reportSomInt; - ROSE_STRUCT_REPORT_SOM_AWARE reportSomAware; - ROSE_STRUCT_REPORT report; - ROSE_STRUCT_REPORT_EXHAUST reportExhaust; - ROSE_STRUCT_REPORT_SOM reportSom; - ROSE_STRUCT_REPORT_SOM_EXHAUST reportSomExhaust; - ROSE_STRUCT_DEDUPE_AND_REPORT dedupeAndReport; - ROSE_STRUCT_FINAL_REPORT finalReport; - ROSE_STRUCT_CHECK_EXHAUSTED checkExhausted; - ROSE_STRUCT_CHECK_MIN_LENGTH checkMinLength; - ROSE_STRUCT_SET_STATE setState; - ROSE_STRUCT_SET_GROUPS setGroups; - ROSE_STRUCT_SQUASH_GROUPS squashGroups; - ROSE_STRUCT_CHECK_STATE checkState; - ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin; - ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext; - ROSE_STRUCT_ENGINES_EOD enginesEod; - ROSE_STRUCT_SUFFIXES_EOD suffixesEod; - ROSE_STRUCT_MATCHER_EOD matcherEod; - ROSE_STRUCT_END end; - } u; - - JumpTarget target; -}; - -static -size_t hash_value(const RoseInstruction &ri) { - size_t val = 0; - boost::hash_combine(val, ri.code()); - boost::hash_combine(val, ri.target); - const char *bytes = (const char *)ri.get(); - const size_t len = ri.length(); - for (size_t i = 0; i < len; i++) { - boost::hash_combine(val, bytes[i]); - } - return val; -} - /** * \brief Structure tracking which resources are used by this Rose instance at * runtime. @@ -374,7 +170,7 @@ struct RoseResources { bool has_states = false; bool checks_groups = false; bool has_lit_delay = false; - bool has_lit_mask = false; + bool has_lit_check = false; // long literal support bool has_anchored = false; bool has_eod = false; }; @@ -397,13 +193,10 @@ struct build_context : boost::noncopyable { */ size_t numStates = 0; - /** \brief Very simple cache from sparse iter to offset, used when building - * up iterators in early misc. */ - map, u32> iterCache; - /** \brief Simple cache of programs written to engine blob, used for * deduplication. */ - ue2::unordered_map, u32> program_cache; + ue2::unordered_map program_cache; /** \brief LookEntry list cache, so that we don't have to go scanning * through the full list to find cases we've used already. */ @@ -423,12 +216,19 @@ struct build_context : boost::noncopyable { * written to the engine_blob. */ vector litPrograms; + /** \brief List of long literals (ones with CHECK_LITERAL instructions) + * that need hash table support. */ + vector longLiterals; + /** \brief Minimum offset of a match from the floating table. */ u32 floatingMinLiteralMatchOffset = 0; + /** \brief Long literal length threshold, used in streaming mode. */ + size_t longLitLengthThreshold = 0; + /** \brief Contents of the Rose bytecode immediately following the * RoseEngine. */ - vector> engine_blob; + RoseEngineBlob engine_blob; /** \brief True if reports need CATCH_UP instructions, to catch up anchored * matches, suffixes, outfixes etc. */ @@ -446,81 +246,17 @@ struct build_context : boost::noncopyable { /** \brief Global bitmap of groups that can be squashed. */ rose_group squashable_groups = 0; - - /** \brief Base offset of engine_blob in the Rose engine bytecode. */ - static constexpr u32 engine_blob_base = ROUNDUP_CL(sizeof(RoseEngine)); }; } -static -void pad_engine_blob(build_context &bc, size_t align) { - assert(ISALIGNED_N(bc.engine_blob_base, align)); - size_t s = bc.engine_blob.size(); - - if (ISALIGNED_N(s, align)) { - return; - } - - bc.engine_blob.resize(s + align - s % align); -} - -static -u32 add_to_engine_blob(build_context &bc, const void *a, const size_t len, - const size_t align) { - pad_engine_blob(bc, align); - - size_t rv = bc.engine_blob_base + bc.engine_blob.size(); - assert(rv >= bc.engine_blob_base); - DEBUG_PRINTF("write %zu bytes at offset %zu\n", len, rv); - - assert(ISALIGNED_N(bc.engine_blob.size(), align)); - - bc.engine_blob.resize(bc.engine_blob.size() + len); - memcpy(&bc.engine_blob.back() - len + 1, a, len); - - return verify_u32(rv); -} - -template -static -u32 add_to_engine_blob(build_context &bc, const T &a) { - static_assert(is_pod::value, "should be pod"); - return add_to_engine_blob(bc, &a, sizeof(a), alignof(T)); -} - -template -static -u32 add_to_engine_blob(build_context &bc, const T &a, const size_t len) { - static_assert(is_pod::value, "should be pod"); - return add_to_engine_blob(bc, &a, len, alignof(T)); -} - -template -static -u32 add_to_engine_blob(build_context &bc, Iter b, const Iter &e) { - using value_type = typename std::iterator_traits::value_type; - static_assert(is_pod::value, "should be pod"); - - if (b == e) { - return 0; - } - - u32 offset = add_to_engine_blob(bc, *b); - for (++b; b != e; ++b) { - add_to_engine_blob(bc, *b); - } - - return offset; -} - static const NFA *get_nfa_from_blob(const build_context &bc, u32 qi) { assert(contains(bc.engineOffsets, qi)); u32 nfa_offset = bc.engineOffsets.at(qi); - assert(nfa_offset >= bc.engine_blob_base); + assert(nfa_offset >= bc.engine_blob.base_offset); const NFA *n = (const NFA *)(bc.engine_blob.data() + nfa_offset - - bc.engine_blob_base); + bc.engine_blob.base_offset); assert(n->queueIndex == qi); return n; } @@ -528,7 +264,7 @@ const NFA *get_nfa_from_blob(const build_context &bc, u32 qi) { static const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) { u32 qi = nfa.queueIndex; - u32 nfa_offset = add_to_engine_blob(bc, nfa, nfa.length); + u32 nfa_offset = bc.engine_blob.add(nfa, nfa.length); DEBUG_PRINTF("added nfa qi=%u, type=%u, length=%u at offset=%u\n", qi, nfa.type, nfa.length, nfa_offset); @@ -555,35 +291,32 @@ u32 countRosePrefixes(const vector &roses) { * \brief True if this Rose engine needs to run a catch up whenever a report is * generated. * - * This is only the case if there are no anchored literals, suffixes, outfixes - * etc. + * Catch up is necessary if there are output-exposed engines (suffixes, + * outfixes) or an anchored table (anchored literals, acyclic DFAs). */ static -bool needsCatchup(const RoseBuildImpl &build) { +bool needsCatchup(const RoseBuildImpl &build, + const vector &anchored_dfas) { if (!build.outfixes.empty()) { DEBUG_PRINTF("has outfixes\n"); return true; } + if (!anchored_dfas.empty()) { + DEBUG_PRINTF("has anchored dfas\n"); + return true; + } const RoseGraph &g = build.g; - if (!isLeafNode(build.anchored_root, g)) { - DEBUG_PRINTF("has anchored vertices\n"); - return true; - } - for (auto v : vertices_range(g)) { if (build.root == v) { continue; } - if (build.anchored_root == v) { - assert(isLeafNode(v, g)); continue; } - if (g[v].suffix) { - DEBUG_PRINTF("vertex %zu has suffix\n", g[v].idx); + DEBUG_PRINTF("vertex %zu has suffix\n", g[v].index); return true; } @@ -594,7 +327,7 @@ bool needsCatchup(const RoseBuildImpl &build) { } static -bool isPureFloating(const RoseResources &resources) { +bool isPureFloating(const RoseResources &resources, const CompileContext &cc) { if (resources.has_outfixes || resources.has_suffixes || resources.has_leftfixes) { DEBUG_PRINTF("has engines\n"); @@ -621,6 +354,12 @@ bool isPureFloating(const RoseResources &resources) { return false; } + if (cc.streaming && resources.has_lit_check) { + DEBUG_PRINTF("has long literals in streaming mode, which needs " + "long literal table support\n"); + return false; + } + if (resources.checks_groups) { DEBUG_PRINTF("has group checks\n"); return false; @@ -664,11 +403,11 @@ u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc, DEBUG_PRINTF("has_states=%d\n", bc.resources.has_states); DEBUG_PRINTF("checks_groups=%d\n", bc.resources.checks_groups); DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay); - DEBUG_PRINTF("has_lit_mask=%d\n", bc.resources.has_lit_mask); + DEBUG_PRINTF("has_lit_check=%d\n", bc.resources.has_lit_check); DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored); DEBUG_PRINTF("has_eod=%d\n", bc.resources.has_eod); - if (isPureFloating(bc.resources)) { + if (isPureFloating(bc.resources, build.cc)) { return ROSE_RUNTIME_PURE_LITERAL; } @@ -708,7 +447,7 @@ static void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, u32 anchorStateSize, u32 activeArrayCount, u32 activeLeftCount, u32 laggedRoseCount, - u32 floatingStreamStateRequired, u32 historyRequired, + u32 longLitStreamStateRequired, u32 historyRequired, RoseStateOffsets *so) { u32 curr_offset = 0; @@ -726,8 +465,8 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, so->activeLeftArray_size = mmbit_size(activeLeftCount); curr_offset += so->activeLeftArray_size; - so->floatingMatcherState = curr_offset; - curr_offset += floatingStreamStateRequired; + so->longLitState = curr_offset; + curr_offset += longLitStreamStateRequired; // ONE WHOLE BYTE for each active leftfix with lag. so->leftfixLagTable = curr_offset; @@ -793,7 +532,7 @@ bool nfaStuckOn(const NGHolder &g) { set done_tops; for (const auto &e : out_edges_range(g.start, g)) { - tops.insert(g[e].top); + insert(&tops, g[e].tops); if (!g[target(e, g)].char_reach.all()) { continue; } @@ -802,7 +541,7 @@ bool nfaStuckOn(const NGHolder &g) { insert(&asucc, adjacent_vertices(target(e, g), g)); if (asucc == succ) { - done_tops.insert(g[e].top); + insert(&done_tops, g[e].tops); } } @@ -878,7 +617,7 @@ aligned_unique_ptr pickImpl(aligned_unique_ptr dfa_impl, bool d_accel = has_accel(*dfa_impl); bool n_accel = has_accel(*nfa_impl); - bool d_big = dfa_impl->type == MCCLELLAN_NFA_16; + bool d_big = isBigDfaType(dfa_impl->type); bool n_vsmall = nfa_impl->nPositions <= 32; bool n_br = has_bounded_repeats(*nfa_impl); DEBUG_PRINTF("da %d na %d db %d nvs %d nbr %d\n", (int)d_accel, @@ -929,10 +668,17 @@ buildRepeatEngine(const CastleProto &proto, } static -aligned_unique_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, +aligned_unique_ptr getDfa(raw_dfa &rdfa, bool is_transient, + const CompileContext &cc, const ReportManager &rm) { // Unleash the Sheng!! auto dfa = shengCompile(rdfa, cc, rm); + if (!dfa && !is_transient) { + // Sheng wasn't successful, so unleash McClellan! + /* We don't try the hybrid for transient prefixes due to the extra + * bytecode and that they are usually run on small blocks */ + dfa = mcshengCompile(rdfa, cc, rm); + } if (!dfa) { // Sheng wasn't successful, so unleash McClellan! dfa = mcclellanCompile(rdfa, cc, rm); @@ -960,7 +706,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, } if (suff.dfa()) { - auto d = getDfa(*suff.dfa(), cc, rm); + auto d = getDfa(*suff.dfa(), false, cc, rm); assert(d); return d; } @@ -989,7 +735,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0), cc.grey); if (rdfa) { - auto d = getDfa(*rdfa, cc, rm); + auto d = getDfa(*rdfa, false, cc, rm); assert(d); if (cc.grey.roseMcClellanSuffix != 2) { n = pickImpl(move(d), move(n)); @@ -1109,12 +855,12 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, } if (left.dfa()) { - n = getDfa(*left.dfa(), cc, rm); + n = getDfa(*left.dfa(), is_transient, cc, rm); } else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix && !is_transient) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - n = getDfa(*rdfa, cc, rm); + n = getDfa(*rdfa, is_transient, cc, rm); assert(n); } } @@ -1123,8 +869,8 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, if (!n && !is_prefix && left.graph() && onlyOneTop(*left.graph())) { map > > triggers; findTriggerSequences(tbi, infixTriggers.at(left), &triggers); - assert(contains(triggers, 0)); // single top - n = constructLBR(*left.graph(), triggers[0], cc, rm); + assert(triggers.size() == 1); // single top + n = constructLBR(*left.graph(), triggers.begin()->second, cc, rm); } if (!n && left.graph()) { @@ -1141,7 +887,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, && (!n || !has_bounded_repeats_other_than_firsts(*n) || !is_fast(*n))) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - auto d = getDfa(*rdfa, cc, rm); + auto d = getDfa(*rdfa, is_transient, cc, rm); assert(d); n = pickImpl(move(d), move(n)); } @@ -1210,7 +956,7 @@ void appendTailToHolder(NGHolder &h, const vector &tail) { appendTailToHolder(h, e.first, e.second, tail); } - h.renumberEdges(); + renumber_edges(h); } static @@ -1495,11 +1241,11 @@ void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo, for (const auto &n : tamaInfo.subengines) { for (const auto &v : subengines[i].vertices) { if (is_suffix) { - tamaProto.add(n, g[v].idx, g[v].suffix.top, + tamaProto.add(n, g[v].index, g[v].suffix.top, out_top_remap); } else { for (const auto &e : in_edges_range(v, g)) { - tamaProto.add(n, g[v].idx, g[e].rose_top, + tamaProto.add(n, g[v].index, g[e].rose_top, out_top_remap); } } @@ -1543,7 +1289,7 @@ void buildInfixContainer(RoseGraph &g, build_context &bc, for (const auto &sub : subengines) { const auto &verts = sub.vertices; for (const auto &v : verts) { - DEBUG_PRINTF("vert id:%lu\n", g[v].idx); + DEBUG_PRINTF("vert id:%zu\n", g[v].index); g[v].left.tamarama = tamaProto; } } @@ -1562,7 +1308,7 @@ void buildSuffixContainer(RoseGraph &g, build_context &bc, for (const auto &sub : subengines) { const auto &verts = sub.vertices; for (const auto &v : verts) { - DEBUG_PRINTF("vert id:%lu\n", g[v].idx); + DEBUG_PRINTF("vert id:%zu\n", g[v].index); g[v].suffix.tamarama = tamaProto; } const auto &v = verts[0]; @@ -1716,7 +1462,7 @@ void findExclusiveInfixes(RoseBuildImpl &build, build_context &bc, // Sanity check: our NFA should contain each of the tops mentioned on // our in-edges. - assert(roseHasTops(g, v)); + assert(roseHasTops(build, v)); if (contains(leftfixes, leftfix)) { // NFA already built. @@ -1743,7 +1489,7 @@ void findExclusiveInfixes(RoseBuildImpl &build, build_context &bc, } if (leftfixes.size() > 1) { - DEBUG_PRINTF("leftfix size:%lu\n", leftfixes.size()); + DEBUG_PRINTF("leftfix size:%zu\n", leftfixes.size()); vector> groups; exclusiveAnalysisInfix(build, vertex_map, roleInfoSet, groups); buildExclusiveInfixes(build, bc, qif, infixTriggers, vertex_map, @@ -1785,7 +1531,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, // Sanity check: our NFA should contain each of the tops mentioned on // our in-edges. - assert(roseHasTops(g, v)); + assert(roseHasTops(tbi, v)); bool is_transient = contains(tbi.transient, leftfix); @@ -1877,7 +1623,7 @@ public: aligned_unique_ptr operator()(unique_ptr &rdfa) const { // Unleash the mighty DFA! - return getDfa(*rdfa, build.cc, build.rm); + return getDfa(*rdfa, false, build.cc, build.rm); } aligned_unique_ptr operator()(unique_ptr &haig) const { @@ -1905,7 +1651,7 @@ public: !has_bounded_repeats_other_than_firsts(*n)) { auto rdfa = buildMcClellan(h, &rm, cc.grey); if (rdfa) { - auto d = getDfa(*rdfa, cc, rm); + auto d = getDfa(*rdfa, false, cc, rm); if (d) { n = pickImpl(move(d), move(n)); } @@ -2053,7 +1799,7 @@ void assignSuffixQueues(RoseBuildImpl &build, build_context &bc) { const suffix_id s(g[v].suffix); - DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].idx, s.graph()); + DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph()); // We may have already built this NFA. if (contains(bc.suffixes, s)) { @@ -2150,7 +1896,7 @@ void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc, const suffix_id s(g[v].suffix); - DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].idx, s.graph()); + DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph()); // We may have already built this NFA. if (contains(suffixes, s)) { @@ -2180,7 +1926,7 @@ void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc, } if (suffixes.size() > 1) { - DEBUG_PRINTF("suffix size:%lu\n", suffixes.size()); + DEBUG_PRINTF("suffix size:%zu\n", suffixes.size()); vector> groups; exclusiveAnalysisSuffix(tbi, vertex_map, roleInfoSet, groups); buildExclusiveSuffixes(tbi, bc, qif, suffixTriggers, vertex_map, @@ -2240,24 +1986,13 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, } static -void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { +void buildCountingMiracles(build_context &bc) { map, u32> pre_built; - // To ensure compile determinism, we need to iterate over our leftfixes in - // a stronger order than directly over bc.leftfix_info. - vector cm_vertices; - for (const auto &m : bc.leftfix_info) { - if (m.second.countingMiracleCount) { - cm_vertices.push_back(m.first); + for (left_build_info &lbi : bc.leftfix_info | map_values) { + if (!lbi.countingMiracleCount) { + continue; } - } - sort(begin(cm_vertices), end(cm_vertices), VertexIndexComp(build.g)); - - DEBUG_PRINTF("%zu vertices with counting miracles\n", cm_vertices.size()); - - for (const auto &v : cm_vertices) { - auto &lbi = bc.leftfix_info.at(v); - assert(lbi.countingMiracleCount); const CharReach &cr = lbi.countingMiracleReach; assert(!cr.all() && !cr.none()); @@ -2275,7 +2010,7 @@ void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { rcm.c = cr.find_first(); } else { rcm.shufti = 1; - int rv = shuftiBuildMasks(cr, &rcm.lo, &rcm.hi); + int rv = shuftiBuildMasks(cr, (u8 *)&rcm.lo, (u8 *)&rcm.hi); if (rv == -1) { DEBUG_PRINTF("failed to build shufti\n"); lbi.countingMiracleCount = 0; /* remove counting miracle */ @@ -2287,7 +2022,7 @@ void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { rcm.count = lbi.countingMiracleCount; - lbi.countingMiracleOffset = add_to_engine_blob(bc, rcm); + lbi.countingMiracleOffset = bc.engine_blob.add(rcm); pre_built[key] = lbi.countingMiracleOffset; DEBUG_PRINTF("built cm for count of %u @ %u\n", rcm.count, lbi.countingMiracleOffset); @@ -2456,24 +2191,6 @@ u32 RoseBuildImpl::calcHistoryRequired() const { return m ? m - 1 : 0; } -// Adds a sparse iterator to the end of the iterator table, returning its -// offset. -static -u32 addIteratorToTable(build_context &bc, - const vector &iter) { - if (contains(bc.iterCache, iter)) { - DEBUG_PRINTF("cache hit\n"); - u32 offset = bc.iterCache.at(iter); - return offset; - } - - u32 offset = add_to_engine_blob(bc, iter.begin(), iter.end()); - - bc.iterCache.insert(make_pair(iter, offset)); - - return offset; -} - static u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { vector lb_roles; @@ -2495,7 +2212,7 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { vector iter; mmbBuildSparseIterator(iter, lb_roles, bc.numStates); - return addIteratorToTable(bc, iter); + return bc.engine_blob.add_iterator(iter); } static @@ -2536,12 +2253,12 @@ u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build, u32 minWidth = ROSE_BOUND_INF; for (auto v : vertices_range(g)) { if (build.isAnchored(v) || build.isVirtualVertex(v)) { - DEBUG_PRINTF("skipping %zu anchored or root\n", g[v].idx); + DEBUG_PRINTF("skipping %zu anchored or root\n", g[v].index); continue; } u32 w = g[v].min_offset; - DEBUG_PRINTF("%zu m_o = %u\n", g[v].idx, w); + DEBUG_PRINTF("%zu m_o = %u\n", g[v].index, w); if (w < minWidth) { minWidth = w; @@ -2582,7 +2299,7 @@ void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, for (auto &e : qi_to_ekeys) { assert(!e.second.empty()); e.second.push_back(INVALID_EKEY); /* terminator */ - (*out)[e.first] = add_to_engine_blob(bc, e.second.begin(), + (*out)[e.first] = bc.engine_blob.add(e.second.begin(), e.second.end()); } } @@ -2607,7 +2324,7 @@ u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) { vector iter; mmbBuildSparseIterator(iter, keys, activeQueueCount); - return addIteratorToTable(bc, iter); + return bc.engine_blob.add_iterator(iter); } static @@ -2770,129 +2487,8 @@ getLiteralInfoByFinalId(const RoseBuildImpl &build, u32 final_id) { return out; } -/** - * \brief Flattens a list of role programs into one finalised program with its - * fail_jump/done_jump targets set correctly. - */ static -vector -flattenProgram(const vector> &programs) { - vector out; - - vector offsets; // offset of each instruction (bytes) - vector blocks; // track which block we're in - vector block_offsets; // start offsets for each block - - DEBUG_PRINTF("%zu program blocks\n", programs.size()); - - size_t curr_offset = 0; - for (const auto &program : programs) { - DEBUG_PRINTF("block with %zu instructions\n", program.size()); - block_offsets.push_back(curr_offset); - for (const auto &ri : program) { - assert(ri.code() != ROSE_INSTR_END); - out.push_back(ri); - offsets.push_back(curr_offset); - blocks.push_back(block_offsets.size() - 1); - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); - } - } - - // Add a final END instruction, which is its own block. - out.emplace_back(ROSE_INSTR_END); - block_offsets.push_back(curr_offset); - offsets.push_back(curr_offset); - - assert(offsets.size() == out.size()); - - for (size_t i = 0; i < out.size(); i++) { - auto &ri = out[i]; - - u32 jump_target = 0; - switch (ri.target) { - case JumpTarget::NO_JUMP: - case JumpTarget::FIXUP_DONE: - continue; // Next instruction. - case JumpTarget::PROGRAM_END: - assert(i != out.size() - 1); - jump_target = offsets.back(); - break; - case JumpTarget::NEXT_BLOCK: - assert(blocks[i] + 1 < block_offsets.size()); - jump_target = block_offsets[blocks[i] + 1]; - break; - } - - // We currently always make progress and never jump backwards. - assert(jump_target > offsets[i]); - assert(jump_target <= offsets.back()); - u32 jump_val = jump_target - offsets[i]; - - switch (ri.code()) { - case ROSE_INSTR_ANCHORED_DELAY: - ri.u.anchoredDelay.done_jump = jump_val; - break; - case ROSE_INSTR_CHECK_ONLY_EOD: - ri.u.checkOnlyEod.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_BOUNDS: - ri.u.checkBounds.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_NOT_HANDLED: - ri.u.checkNotHandled.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_LOOKAROUND: - ri.u.checkLookaround.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_MASK: - ri.u.checkMask.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_BYTE: - ri.u.checkByte.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_INFIX: - ri.u.checkInfix.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_PREFIX: - ri.u.checkPrefix.fail_jump = jump_val; - break; - case ROSE_INSTR_DEDUPE: - ri.u.dedupe.fail_jump = jump_val; - break; - case ROSE_INSTR_DEDUPE_SOM: - ri.u.dedupeSom.fail_jump = jump_val; - break; - case ROSE_INSTR_DEDUPE_AND_REPORT: - ri.u.dedupeAndReport.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_EXHAUSTED: - ri.u.checkExhausted.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_MIN_LENGTH: - ri.u.checkMinLength.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_STATE: - ri.u.checkState.fail_jump = jump_val; - break; - case ROSE_INSTR_SPARSE_ITER_BEGIN: - ri.u.sparseIterBegin.fail_jump = jump_val; - break; - case ROSE_INSTR_SPARSE_ITER_NEXT: - ri.u.sparseIterNext.fail_jump = jump_val; - break; - default: - assert(0); // Unhandled opcode? - break; - } - - ri.target = JumpTarget::FIXUP_DONE; - } - - return out; -} - -static -void applyFinalSpecialisation(vector &program) { +void applyFinalSpecialisation(RoseProgram &program) { assert(!program.empty()); assert(program.back().code() == ROSE_INSTR_END); if (program.size() < 2) { @@ -2901,26 +2497,18 @@ void applyFinalSpecialisation(vector &program) { /* Replace the second-to-last instruction (before END) with a one-shot * specialisation if available. */ - auto &ri = *(next(program.rbegin())); - switch (ri.code()) { - case ROSE_INSTR_REPORT: { + auto it = next(program.rbegin()); + if (auto *ri = dynamic_cast(it->get())) { DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n"); - auto ri2 = RoseInstruction(ROSE_INSTR_FINAL_REPORT); - ri2.u.finalReport.onmatch = ri.u.report.onmatch; - ri2.u.finalReport.offset_adjust = ri.u.report.offset_adjust; - ri = ri2; - break; - } - default: - break; + program.replace(it, make_unique( + ri->onmatch, ri->offset_adjust)); } } static -void recordResources(RoseResources &resources, - const vector &program) { +void recordResources(RoseResources &resources, const RoseProgram &program) { for (const auto &ri : program) { - switch (ri.code()) { + switch (ri->code()) { case ROSE_INSTR_TRIGGER_SUFFIX: resources.has_suffixes = true; break; @@ -2942,8 +2530,9 @@ void recordResources(RoseResources &resources, case ROSE_INSTR_PUSH_DELAYED: resources.has_lit_delay = true; break; - case ROSE_INSTR_CHECK_LIT_MASK: - resources.has_lit_mask = true; + case ROSE_INSTR_CHECK_LONG_LIT: + case ROSE_INSTR_CHECK_LONG_LIT_NOCASE: + resources.has_lit_check = true; break; default: break; @@ -2978,22 +2567,31 @@ void recordResources(RoseResources &resources, } static -u32 writeProgram(build_context &bc, const vector &program) { +void recordLongLiterals(build_context &bc, const RoseProgram &program) { + for (const auto &ri : program) { + if (const auto *ri_check = + dynamic_cast(ri.get())) { + DEBUG_PRINTF("found CHECK_LITERAL for string '%s'\n", + escapeString(ri_check->literal).c_str()); + bc.longLiterals.emplace_back(ri_check->literal, false); + continue; + } + if (const auto *ri_check = + dynamic_cast(ri.get())) { + DEBUG_PRINTF("found CHECK_LITERAL_NOCASE for string '%s'\n", + escapeString(ri_check->literal).c_str()); + bc.longLiterals.emplace_back(ri_check->literal, true); + } + } +} + +static +u32 writeProgram(build_context &bc, RoseProgram &&program) { if (program.empty()) { DEBUG_PRINTF("no program\n"); return 0; } - assert(program.back().code() == ROSE_INSTR_END); - assert(program.size() >= 1); - - // This program must have been flattened; i.e. all check instructions must - // have their jump offsets set. - assert(all_of(begin(program), end(program), [](const RoseInstruction &ri) { - return ri.target == JumpTarget::NO_JUMP || - ri.target == JumpTarget::FIXUP_DONE; - })); - auto it = bc.program_cache.find(program); if (it != end(bc.program_cache)) { DEBUG_PRINTF("reusing cached program at %u\n", it->second); @@ -3001,21 +2599,15 @@ u32 writeProgram(build_context &bc, const vector &program) { } recordResources(bc.resources, program); + recordLongLiterals(bc, program); - DEBUG_PRINTF("writing %zu instructions\n", program.size()); - u32 programOffset = 0; - for (const auto &ri : program) { - u32 offset = - add_to_engine_blob(bc, ri.get(), ri.length(), ROSE_INSTR_MIN_ALIGN); - DEBUG_PRINTF("code %u len %zu written at offset %u\n", ri.code(), - ri.length(), offset); - if (!programOffset) { - programOffset = offset; - } - } - DEBUG_PRINTF("program begins at offset %u\n", programOffset); - bc.program_cache.emplace(program, programOffset); - return programOffset; + u32 len = 0; + auto prog_bytecode = writeProgram(bc.engine_blob, program, &len); + u32 offset = bc.engine_blob.add(prog_bytecode.get(), len, + ROSE_INSTR_MIN_ALIGN); + DEBUG_PRINTF("prog len %u written at offset %u\n", len, offset); + bc.program_cache.emplace(move(program), offset); + return offset; } static @@ -3233,8 +2825,7 @@ bool checkReachWithFlip(const CharReach &cr, u8 &andmask, } static -bool makeRoleByte(const vector &look, - vector &program) { +bool makeRoleByte(const vector &look, RoseProgram &program) { if (look.size() == 1) { const auto &entry = look[0]; u8 andmask_u8, cmpmask_u8; @@ -3244,21 +2835,17 @@ bool makeRoleByte(const vector &look, } s32 checkbyte_offset = verify_s32(entry.offset); DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_BYTE, - JumpTarget::NEXT_BLOCK); - ri.u.checkByte.and_mask = andmask_u8; - ri.u.checkByte.cmp_mask = cmpmask_u8; - ri.u.checkByte.negation = flip; - ri.u.checkByte.offset = checkbyte_offset; - program.push_back(ri); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(andmask_u8, cmpmask_u8, flip, + checkbyte_offset, end_inst); + program.add_before_end(move(ri)); return true; } return false; } static -bool makeRoleMask(const vector &look, - vector &program) { +bool makeRoleMask(const vector &look, RoseProgram &program) { if (look.back().offset < look.front().offset + 8) { s32 base_offset = verify_s32(look.front().offset); u64a and_mask = 0; @@ -3280,21 +2867,287 @@ bool makeRoleMask(const vector &look, } DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n", and_mask, cmp_mask); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_MASK, - JumpTarget::NEXT_BLOCK); - ri.u.checkMask.and_mask = and_mask; - ri.u.checkMask.cmp_mask = cmp_mask; - ri.u.checkMask.neg_mask = neg_mask; - ri.u.checkMask.offset = base_offset; - program.push_back(ri); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); return true; } return false; } +static UNUSED +string convertMaskstoString(u8 *p, int byte_len) { + string s; + for (int i = 0; i < byte_len; i++) { + u8 hi = *p >> 4; + u8 lo = *p & 0xf; + s += (char)(hi + (hi < 10 ? 48 : 87)); + s += (char)(lo + (lo < 10 ? 48 : 87)); + p++; + } + return s; +} + +static +bool makeRoleMask32(const vector &look, + RoseProgram &program) { + if (look.back().offset >= look.front().offset + 32) { + return false; + } + s32 base_offset = verify_s32(look.front().offset); + array and_mask, cmp_mask; + and_mask.fill(0); + cmp_mask.fill(0); + u32 neg_mask = 0; + for (const auto &entry : look) { + u8 andmask_u8, cmpmask_u8, flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, + cmpmask_u8, flip)) { + return false; + } + u32 shift = entry.offset - base_offset; + assert(shift < 32); + and_mask[shift] = andmask_u8; + cmp_mask[shift] = cmpmask_u8; + if (flip) { + neg_mask |= 1 << shift; + } + } + + DEBUG_PRINTF("and_mask %s\n", + convertMaskstoString(and_mask.data(), 32).c_str()); + DEBUG_PRINTF("cmp_mask %s\n", + convertMaskstoString(cmp_mask.data(), 32).c_str()); + DEBUG_PRINTF("neg_mask %08x\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); + return true; +} + +// Sorting by the size of every bucket. +// Used in map, cmpNibble>. +struct cmpNibble { + bool operator()(const u32 data1, const u32 data2) const{ + u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16); + u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16); + return std::tie(size1, data1) < std::tie(size2, data2); + } +}; + +// Insert all pairs of bucket and offset into buckets. +static really_inline +void getAllBuckets(const vector &look, + map, cmpNibble> &buckets, u32 &neg_mask) { + s32 base_offset = verify_s32(look.front().offset); + for (const auto &entry : look) { + CharReach cr = entry.reach; + // Flip heavy character classes to save buckets. + if (cr.count() > 128 ) { + cr.flip(); + } else { + neg_mask ^= 1 << (entry.offset - base_offset); + } + map lo2hi; + // We treat Ascii Table as a 16x16 grid. + // Push every row in cr into lo2hi and mark the row number. + for (size_t i = cr.find_first(); i != CharReach::npos;) { + u8 it_hi = i >> 4; + u16 low_encode = 0; + while (i != CharReach::npos && (i >> 4) == it_hi) { + low_encode |= 1 << (i & 0xf); + i = cr.find_next(i); + } + lo2hi[low_encode] |= 1 << it_hi; + } + for (const auto &it : lo2hi) { + u32 hi_lo = (it.second << 16) | it.first; + buckets[hi_lo].push_back(entry.offset); + } + } +} + +// Once we have a new bucket, we'll try to combine it with all old buckets. +static really_inline +void nibUpdate(map &nib, u32 hi_lo) { + u16 hi = hi_lo >> 16; + u16 lo = hi_lo & 0xffff; + for (const auto pairs : nib) { + u32 old = pairs.first; + if ((old >> 16) == hi || (old & 0xffff) == lo) { + if (!nib[old | hi_lo]) { + nib[old | hi_lo] = nib[old] | nib[hi_lo]; + } + } + } +} + +static really_inline +void nibMaskUpdate(array &mask, u32 data, u8 bit_index) { + for (u8 index = 0; data > 0; data >>= 1, index++) { + if (data & 1) { + // 0 ~ 7 bucket in first 16 bytes, + // 8 ~ 15 bucket in second 16 bytes. + if (bit_index >= 8) { + mask[index + 16] |= 1 << (bit_index - 8); + } else { + mask[index] |= 1 << bit_index; + } + } + } +} + +static +bool makeRoleShufti(const vector &look, + RoseProgram &program) { + + s32 base_offset = verify_s32(look.front().offset); + if (look.back().offset >= base_offset + 32) { + return false; + } + array hi_mask, lo_mask; + hi_mask.fill(0); + lo_mask.fill(0); + array bucket_select_hi, bucket_select_lo; + bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. + bucket_select_lo.fill(0); + u8 bit_index = 0; // number of buckets + map nib; // map every bucket to its bucket number. + map, cmpNibble> bucket2offsets; + u32 neg_mask = ~0u; + + getAllBuckets(look, bucket2offsets, neg_mask); + + for (const auto &it : bucket2offsets) { + u32 hi_lo = it.first; + // New bucket. + if (!nib[hi_lo]) { + if (bit_index >= 16) { + return false; + } + nib[hi_lo] = 1 << bit_index; + + nibUpdate(nib, hi_lo); + nibMaskUpdate(hi_mask, hi_lo >> 16, bit_index); + nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_index); + bit_index++; + } + + DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]); + + // Update bucket_select_mask. + u8 nib_hi = nib[hi_lo] >> 8; + u8 nib_lo = nib[hi_lo] & 0xff; + for (const auto offset : it.second) { + bucket_select_hi[offset - base_offset] |= nib_hi; + bucket_select_lo[offset - base_offset] |= nib_lo; + } + } + + DEBUG_PRINTF("hi_mask %s\n", + convertMaskstoString(hi_mask.data(), 32).c_str()); + DEBUG_PRINTF("lo_mask %s\n", + convertMaskstoString(lo_mask.data(), 32).c_str()); + DEBUG_PRINTF("bucket_select_hi %s\n", + convertMaskstoString(bucket_select_hi.data(), 32).c_str()); + DEBUG_PRINTF("bucket_select_lo %s\n", + convertMaskstoString(bucket_select_lo.data(), 32).c_str()); + + const auto *end_inst = program.end_instruction(); + if (bit_index < 8) { + if (look.back().offset < base_offset + 16) { + neg_mask &= 0xffff; + array nib_mask; + array bucket_select_mask_16; + copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); + copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); + copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16, + bucket_select_mask_16.begin()); + auto ri = make_unique + (nib_mask, bucket_select_mask_16, + neg_mask, base_offset, end_inst); + program.add_before_end(move(ri)); + } else { + array hi_mask_16; + array lo_mask_16; + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); + auto ri = make_unique + (hi_mask_16, lo_mask_16, bucket_select_lo, + neg_mask, base_offset, end_inst); + program.add_before_end(move(ri)); + } + } else { + if (look.back().offset < base_offset + 16) { + neg_mask &= 0xffff; + array bucket_select_mask_32; + copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16, + bucket_select_mask_32.begin()); + copy(bucket_select_hi.begin(), bucket_select_hi.begin() + 16, + bucket_select_mask_32.begin() + 16); + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_mask_32, + neg_mask, base_offset, end_inst); + program.add_before_end(move(ri)); + } else { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, + neg_mask, base_offset, end_inst); + program.add_before_end(move(ri)); + } + } + return true; +} + +/** + * Builds a lookaround instruction, or an appropriate specialization if one is + * available. + */ +static +void makeLookaroundInstruction(build_context &bc, const vector &look, + RoseProgram &program) { + assert(!look.empty()); + + if (makeRoleByte(look, program)) { + return; + } + + if (look.size() == 1) { + s8 offset = look.begin()->offset; + u32 look_idx = addLookaround(bc, look); + auto ri = make_unique(offset, look_idx, + program.end_instruction()); + program.add_before_end(move(ri)); + return; + } + + if (makeRoleMask(look, program)) { + return; + } + + if (makeRoleMask32(look, program)) { + return; + } + + if (makeRoleShufti(look, program)) { + return; + } + + u32 look_idx = addLookaround(bc, look); + u32 look_count = verify_u32(look.size()); + + auto ri = make_unique(look_idx, look_count, + program.end_instruction()); + program.add_before_end(move(ri)); +} + static void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { if (!build.cc.grey.roseLookaroundMasks) { return; } @@ -3317,28 +3170,12 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, return; } - if (makeRoleByte(look, program)) { - return; - } - - if (makeRoleMask(look, program)) { - return; - } - - DEBUG_PRINTF("role has lookaround\n"); - u32 look_idx = addLookaround(bc, look); - u32 look_count = verify_u32(look.size()); - - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND, - JumpTarget::NEXT_BLOCK); - ri.u.checkLookaround.index = look_idx; - ri.u.checkLookaround.count = look_count; - program.push_back(ri); + makeLookaroundInstruction(bc, look, program); } static void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { auto it = bc.leftfix_info.find(v); if (it == end(bc.leftfix_info)) { return; @@ -3352,26 +3189,24 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); bool is_prefix = build.isRootSuccessor(v); + const auto *end_inst = program.end_instruction(); + + unique_ptr ri; if (is_prefix) { - auto ri = - RoseInstruction(ROSE_INSTR_CHECK_PREFIX, JumpTarget::NEXT_BLOCK); - ri.u.checkPrefix.queue = lni.queue; - ri.u.checkPrefix.lag = build.g[v].left.lag; - ri.u.checkPrefix.report = build.g[v].left.leftfix_report; - program.push_back(move(ri)); + ri = make_unique(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); } else { - auto ri = - RoseInstruction(ROSE_INSTR_CHECK_INFIX, JumpTarget::NEXT_BLOCK); - ri.u.checkInfix.queue = lni.queue; - ri.u.checkInfix.lag = build.g[v].left.lag; - ri.u.checkInfix.report = build.g[v].left.leftfix_report; - program.push_back(move(ri)); + ri = make_unique(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); } + program.add_before_end(move(ri)); } static void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, - RoseVertex v, vector &program) { + RoseVertex v, RoseProgram &program) { // Only relevant for roles that can be triggered by the anchored table. if (!build.isAnchored(v)) { return; @@ -3383,36 +3218,34 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, return; } - auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY, - JumpTarget::NEXT_BLOCK); - ri.u.anchoredDelay.groups = build.g[v].groups; - program.push_back(ri); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(build.g[v].groups, end_inst); + program.add_before_end(move(ri)); } static void makeDedupe(const RoseBuildImpl &build, const Report &report, - vector &report_block) { - auto ri = RoseInstruction(ROSE_INSTR_DEDUPE, JumpTarget::NEXT_BLOCK); - ri.u.dedupe.quash_som = report.quashSom; - ri.u.dedupe.dkey = build.rm.getDkey(report); - ri.u.dedupe.offset_adjust = report.offsetAdjust; - report_block.push_back(move(ri)); + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = + make_unique(report.quashSom, build.rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); } static void makeDedupeSom(const RoseBuildImpl &build, const Report &report, - vector &report_block) { - auto ri = RoseInstruction(ROSE_INSTR_DEDUPE_SOM, JumpTarget::NEXT_BLOCK); - ri.u.dedupeSom.quash_som = report.quashSom; - ri.u.dedupeSom.dkey = build.rm.getDkey(report); - ri.u.dedupeSom.offset_adjust = report.offsetAdjust; - report_block.push_back(move(ri)); + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(report.quashSom, + build.rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); } static void makeCatchup(RoseBuildImpl &build, build_context &bc, - const flat_set &reports, - vector &program) { + const flat_set &reports, RoseProgram &program) { if (!bc.needs_catchup) { return; } @@ -3430,12 +3263,12 @@ void makeCatchup(RoseBuildImpl &build, build_context &bc, return; } - program.emplace_back(ROSE_INSTR_CATCH_UP); + program.add_before_end(make_unique()); } static void makeCatchupMpv(RoseBuildImpl &build, build_context &bc, ReportID id, - vector &program) { + RoseProgram &program) { if (!bc.needs_mpv_catchup) { return; } @@ -3445,13 +3278,15 @@ void makeCatchupMpv(RoseBuildImpl &build, build_context &bc, ReportID id, return; } - program.emplace_back(ROSE_INSTR_CATCH_UP_MPV); + program.add_before_end(make_unique()); } static void writeSomOperation(const Report &report, som_operation *op) { assert(op); + memset(op, 0, sizeof(*op)); + switch (report.type) { case EXTERNAL_CALLBACK_SOM_REL: op->type = SOM_EXTERNAL_CALLBACK_REL; @@ -3521,51 +3356,46 @@ void writeSomOperation(const Report &report, som_operation *op) { static void makeReport(RoseBuildImpl &build, const ReportID id, - const bool has_som, vector &program) { + const bool has_som, RoseProgram &program) { assert(id < build.rm.numReports()); const Report &report = build.rm.getReport(id); - vector report_block; + RoseProgram report_block; + const RoseInstruction *end_inst = report_block.end_instruction(); // Handle min/max offset checks. if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, - JumpTarget::NEXT_BLOCK); - ri.u.checkBounds.min_bound = report.minOffset; - ri.u.checkBounds.max_bound = report.maxOffset; - report_block.push_back(move(ri)); + auto ri = make_unique(report.minOffset, + report.maxOffset, end_inst); + report_block.add_before_end(move(ri)); } // If this report has an exhaustion key, we can check it in the program // rather than waiting until we're in the callback adaptor. if (report.ekey != INVALID_EKEY) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_EXHAUSTED, - JumpTarget::NEXT_BLOCK); - ri.u.checkExhausted.ekey = report.ekey; - report_block.push_back(move(ri)); + auto ri = make_unique(report.ekey, end_inst); + report_block.add_before_end(move(ri)); } // External SOM reports that aren't passthrough need their SOM value // calculated. if (isExternalSomReport(report) && report.type != EXTERNAL_CALLBACK_SOM_PASS) { - auto ri = RoseInstruction(ROSE_INSTR_SOM_FROM_REPORT); - writeSomOperation(report, &ri.u.somFromReport.som); - report_block.push_back(move(ri)); + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); } // Min length constraint. if (report.minLength > 0) { assert(build.hasSom); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_MIN_LENGTH, - JumpTarget::NEXT_BLOCK); - ri.u.checkMinLength.end_adj = report.offsetAdjust; - ri.u.checkMinLength.min_length = report.minLength; - report_block.push_back(move(ri)); + auto ri = make_unique( + report.offsetAdjust, report.minLength, end_inst); + report_block.add_before_end(move(ri)); } if (report.quashSom) { - report_block.emplace_back(ROSE_INSTR_SOM_ZERO); + report_block.add_before_end(make_unique()); } switch (report.type) { @@ -3576,42 +3406,30 @@ void makeReport(RoseBuildImpl &build, const ReportID id, bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; if (report.ekey == INVALID_EKEY) { if (needs_dedupe) { - report_block.emplace_back(ROSE_INSTR_DEDUPE_AND_REPORT, - JumpTarget::NEXT_BLOCK); - auto &ri = report_block.back(); - ri.u.dedupeAndReport.quash_som = report.quashSom; - ri.u.dedupeAndReport.dkey = build.rm.getDkey(report); - ri.u.dedupeAndReport.onmatch = report.onmatch; - ri.u.dedupeAndReport.offset_adjust = report.offsetAdjust; + report_block.add_before_end( + make_unique( + report.quashSom, build.rm.getDkey(report), + report.onmatch, report.offsetAdjust, end_inst)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT); - auto &ri = report_block.back(); - ri.u.report.onmatch = report.onmatch; - ri.u.report.offset_adjust = report.offsetAdjust; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); } } else { if (needs_dedupe) { makeDedupe(build, report, report_block); } - report_block.emplace_back(ROSE_INSTR_REPORT_EXHAUST); - auto &ri = report_block.back(); - ri.u.reportExhaust.onmatch = report.onmatch; - ri.u.reportExhaust.offset_adjust = report.offsetAdjust; - ri.u.reportExhaust.ekey = report.ekey; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); } } else { // has_som makeDedupeSom(build, report, report_block); if (report.ekey == INVALID_EKEY) { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM); - auto &ri = report_block.back(); - ri.u.reportSom.onmatch = report.onmatch; - ri.u.reportSom.offset_adjust = report.offsetAdjust; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); - auto &ri = report_block.back(); - ri.u.reportSomExhaust.onmatch = report.onmatch; - ri.u.reportSomExhaust.offset_adjust = report.offsetAdjust; - ri.u.reportSomExhaust.ekey = report.ekey; + report_block.add_before_end( + make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); } } break; @@ -3627,20 +3445,18 @@ void makeReport(RoseBuildImpl &build, const ReportID id, case INTERNAL_SOM_LOC_SET_FROM: case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: if (has_som) { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_AWARE); - auto &ri = report_block.back(); - writeSomOperation(report, &ri.u.reportSomAware.som); + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_INT); - auto &ri = report_block.back(); - writeSomOperation(report, &ri.u.reportSomInt.som); + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); } break; case INTERNAL_ROSE_CHAIN: { - report_block.emplace_back(ROSE_INSTR_REPORT_CHAIN); - auto &ri = report_block.back(); - ri.u.reportChain.event = report.onmatch; - ri.u.reportChain.top_squash_distance = report.topSquashDistance; + report_block.add_before_end(make_unique( + report.onmatch, report.topSquashDistance)); break; } case EXTERNAL_CALLBACK_SOM_REL: @@ -3649,31 +3465,21 @@ void makeReport(RoseBuildImpl &build, const ReportID id, case EXTERNAL_CALLBACK_SOM_REV_NFA: makeDedupeSom(build, report, report_block); if (report.ekey == INVALID_EKEY) { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM); - auto &ri = report_block.back(); - ri.u.reportSom.onmatch = report.onmatch; - ri.u.reportSom.offset_adjust = report.offsetAdjust; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); - auto &ri = report_block.back(); - ri.u.reportSomExhaust.onmatch = report.onmatch; - ri.u.reportSomExhaust.offset_adjust = report.offsetAdjust; - ri.u.reportSomExhaust.ekey = report.ekey; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); } break; case EXTERNAL_CALLBACK_SOM_PASS: makeDedupeSom(build, report, report_block); if (report.ekey == INVALID_EKEY) { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM); - auto &ri = report_block.back(); - ri.u.reportSom.onmatch = report.onmatch; - ri.u.reportSom.offset_adjust = report.offsetAdjust; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); - auto &ri = report_block.back(); - ri.u.reportSomExhaust.onmatch = report.onmatch; - ri.u.reportSomExhaust.offset_adjust = report.offsetAdjust; - ri.u.reportSomExhaust.ekey = report.ekey; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); } break; @@ -3683,15 +3489,12 @@ void makeReport(RoseBuildImpl &build, const ReportID id, } assert(!report_block.empty()); - report_block = flattenProgram({report_block}); - assert(report_block.back().code() == ROSE_INSTR_END); - report_block.pop_back(); - insert(&program, program.end(), report_block); + program.add_block(move(report_block)); } static void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { const auto &g = build.g; /* we are a suffaig - need to update role to provide som to the @@ -3700,29 +3503,28 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, if (g[v].left.tracksSom()) { assert(contains(bc.leftfix_info, v)); const left_build_info &lni = bc.leftfix_info.at(v); - auto ri = RoseInstruction(ROSE_INSTR_SOM_LEFTFIX); - ri.u.somLeftfix.queue = lni.queue; - ri.u.somLeftfix.lag = g[v].left.lag; - program.push_back(ri); + program.add_before_end( + make_unique(lni.queue, g[v].left.lag)); has_som = true; } else if (g[v].som_adjust) { - auto ri = RoseInstruction(ROSE_INSTR_SOM_ADJUST); - ri.u.somAdjust.distance = g[v].som_adjust; - program.push_back(ri); + program.add_before_end( + make_unique(g[v].som_adjust)); has_som = true; } const auto &reports = g[v].reports; makeCatchup(build, bc, reports, program); + RoseProgram report_block; for (ReportID id : reports) { - makeReport(build, id, has_som, program); + makeReport(build, id, has_som, report_block); } + program.add_before_end(move(report_block)); } static void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { const auto &g = build.g; if (!g[v].suffix) { return; @@ -3736,7 +3538,7 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, auto tamaProto = g[v].suffix.tamarama.get(); assert(tamaProto); u32 top = (u32)MQE_TOP_FIRST + - tamaProto->top_remap.at(make_pair(g[v].idx, + tamaProto->top_remap.at(make_pair(g[v].index, g[v].suffix.top)); assert(top < MQE_INVALID); suffixEvent = top; @@ -3751,15 +3553,13 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); suffixEvent = MQE_TOP; } - auto ri = RoseInstruction(ROSE_INSTR_TRIGGER_SUFFIX); - ri.u.triggerSuffix.queue = qi; - ri.u.triggerSuffix.event = suffixEvent; - program.push_back(ri); + program.add_before_end( + make_unique(qi, suffixEvent)); } static void makeRoleGroups(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { const auto &g = build.g; rose_group groups = g[v].groups; if (!groups) { @@ -3790,17 +3590,15 @@ void makeRoleGroups(RoseBuildImpl &build, build_context &bc, RoseVertex v, return; } - auto ri = RoseInstruction(ROSE_INSTR_SET_GROUPS); - ri.u.setGroups.groups = groups; - program.push_back(ri); + program.add_before_end(make_unique(groups)); } static void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, - RoseVertex u, vector &program) { + RoseVertex u, RoseProgram &program) { const auto &g = build.g; - vector infix_program; + vector infix_program; for (const auto &e : out_edges_range(u, g)) { RoseVertex v = target(e, g); @@ -3822,7 +3620,7 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, auto tamaProto = g[v].left.tamarama.get(); assert(tamaProto); top = MQE_TOP_FIRST + tamaProto->top_remap.at( - make_pair(g[v].idx, g[e].rose_top)); + make_pair(g[v].index, g[e].rose_top)); assert(top < MQE_INVALID); } else if (!isMultiTopType(nfa->type)) { assert(num_tops(g[v].left) == 1); @@ -3832,11 +3630,7 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, assert(top < MQE_INVALID); } - auto ri = RoseInstruction(ROSE_INSTR_TRIGGER_INFIX); - ri.u.triggerInfix.queue = lbi.queue; - ri.u.triggerInfix.event = top; - ri.u.triggerInfix.cancel = g[e].rose_cancel_prev_top; - infix_program.push_back(ri); + infix_program.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top); } if (infix_program.empty()) { @@ -3844,30 +3638,33 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, } // Order, de-dupe and add instructions to the end of program. - sort(begin(infix_program), end(infix_program)); - unique_copy(begin(infix_program), end(infix_program), - back_inserter(program)); + sort(begin(infix_program), end(infix_program), + [](const RoseInstrTriggerInfix &a, const RoseInstrTriggerInfix &b) { + return tie(a.cancel, a.queue, a.event) < + tie(b.cancel, b.queue, b.event); + }); + infix_program.erase(unique(begin(infix_program), end(infix_program)), + end(infix_program)); + for (const auto &ri : infix_program) { + program.add_before_end(make_unique(ri)); + } } static void makeRoleSetState(const build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { // We only need this instruction if a state index has been assigned to this // vertex. auto it = bc.roleStateIndices.find(v); if (it == end(bc.roleStateIndices)) { return; } - - u32 idx = it->second; - auto ri = RoseInstruction(ROSE_INSTR_SET_STATE); - ri.u.setState.index = idx; - program.push_back(ri); + program.add_before_end(make_unique(it->second)); } static void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, - const RoseEdge &e, vector &program) { + const RoseEdge &e, RoseProgram &program) { const RoseGraph &g = build.g; const RoseVertex u = source(e, g); @@ -3908,19 +3705,14 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, // than just {length, inf}. assert(min_bound > lit_length || max_bound < MAX_OFFSET); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, JumpTarget::NEXT_BLOCK); - ri.u.checkBounds.min_bound = min_bound; - ri.u.checkBounds.max_bound = max_bound; - - program.push_back(move(ri)); + const auto *end_inst = program.end_instruction(); + program.add_before_end( + make_unique(min_bound, max_bound, end_inst)); } static void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, - vector &program) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED, - JumpTarget::NEXT_BLOCK); - + RoseProgram &program) { u32 handled_key; if (contains(bc.handledKeys, v)) { handled_key = bc.handledKeys.at(v); @@ -3929,19 +3721,21 @@ void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, bc.handledKeys.emplace(v, handled_key); } - ri.u.checkNotHandled.key = handled_key; - - program.push_back(move(ri)); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(handled_key, end_inst); + program.add_before_end(move(ri)); } static void makeRoleEagerEodReports(RoseBuildImpl &build, build_context &bc, - RoseVertex v, vector &program) { - vector eod_program; + RoseVertex v, RoseProgram &program) { + RoseProgram eod_program; for (const auto &e : out_edges_range(v, build.g)) { if (canEagerlyReportAtEod(build, e)) { - makeRoleReports(build, bc, target(e, build.g), eod_program); + RoseProgram block; + makeRoleReports(build, bc, target(e, build.g), block); + eod_program.add_block(move(block)); } } @@ -3952,19 +3746,21 @@ void makeRoleEagerEodReports(RoseBuildImpl &build, build_context &bc, if (!onlyAtEod(build, v)) { // The rest of our program wasn't EOD anchored, so we need to guard // these reports with a check. - program.emplace_back(ROSE_INSTR_CHECK_ONLY_EOD, JumpTarget::NEXT_BLOCK); + const auto *end_inst = eod_program.end_instruction(); + eod_program.insert(begin(eod_program), + make_unique(end_inst)); } - program.insert(end(program), begin(eod_program), end(eod_program)); + program.add_before_end(move(eod_program)); } static -vector makeProgram(RoseBuildImpl &build, build_context &bc, - const RoseEdge &e) { +RoseProgram makeProgram(RoseBuildImpl &build, build_context &bc, + const RoseEdge &e) { const RoseGraph &g = build.g; auto v = target(e, g); - vector program; + RoseProgram program; // First, add program instructions that enforce preconditions without // effects. @@ -3973,8 +3769,8 @@ vector makeProgram(RoseBuildImpl &build, build_context &bc, if (onlyAtEod(build, v)) { DEBUG_PRINTF("only at eod\n"); - program.push_back(RoseInstruction(ROSE_INSTR_CHECK_ONLY_EOD, - JumpTarget::NEXT_BLOCK)); + const auto *end_inst = program.end_instruction(); + program.add_before_end(make_unique(end_inst)); } if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { @@ -3984,31 +3780,48 @@ vector makeProgram(RoseBuildImpl &build, build_context &bc, // This program may be triggered by different predecessors, with different // offset bounds. We must ensure we put this check/set operation after the // bounds check to deal with this case. - if (hasGreaterInDegree(1, v, g)) { + if (in_degree(v, g) > 1) { makeRoleCheckNotHandled(bc, v, program); } makeRoleLookaround(build, bc, v, program); makeRoleCheckLeftfix(build, bc, v, program); - // Next, we can add program instructions that have effects. + // Next, we can add program instructions that have effects. This must be + // done as a series of blocks, as some of them (like reports) are + // escapable. - makeRoleReports(build, bc, v, program); + RoseProgram effects_block; - makeRoleInfixTriggers(build, bc, v, program); + RoseProgram reports_block; + makeRoleReports(build, bc, v, reports_block); + effects_block.add_block(move(reports_block)); + + RoseProgram infix_block; + makeRoleInfixTriggers(build, bc, v, infix_block); + effects_block.add_block(move(infix_block)); // Note: SET_GROUPS instruction must be after infix triggers, as an infix // going dead may switch off groups. - makeRoleGroups(build, bc, v, program); + RoseProgram groups_block; + makeRoleGroups(build, bc, v, groups_block); + effects_block.add_block(move(groups_block)); - makeRoleSuffix(build, bc, v, program); + RoseProgram suffix_block; + makeRoleSuffix(build, bc, v, suffix_block); + effects_block.add_block(move(suffix_block)); - makeRoleSetState(bc, v, program); + RoseProgram state_block; + makeRoleSetState(bc, v, state_block); + effects_block.add_block(move(state_block)); // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if // the program doesn't have one already). - makeRoleEagerEodReports(build, bc, v, program); + RoseProgram eod_block; + makeRoleEagerEodReports(build, bc, v, eod_block); + effects_block.add_block(move(eod_block)); + program.add_before_end(move(effects_block)); return program; } @@ -4024,13 +3837,12 @@ u32 writeBoundaryProgram(RoseBuildImpl &build, build_context &bc, // scratch to support it). const bool has_som = false; - vector program; + RoseProgram program; for (const auto &id : reports) { makeReport(build, id, has_som, program); } - program = flattenProgram({program}); applyFinalSpecialisation(program); - return writeProgram(bc, program); + return writeProgram(bc, move(program)); } static @@ -4153,7 +3965,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, if (hasUsefulStops(lbi)) { assert(lbi.stopAlphabet.size() == N_CHARS); - left.stopTable = add_to_engine_blob(bc, lbi.stopAlphabet.begin(), + left.stopTable = bc.engine_blob.add(lbi.stopAlphabet.begin(), lbi.stopAlphabet.end()); } @@ -4194,174 +4006,125 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, } static -void addPredBlocksSingle( - map>> &predProgramLists, - vector &program) { - - vector> prog_blocks; - - for (const auto &m : predProgramLists) { - const u32 &pred_state = m.first; - assert(!m.second.empty()); - auto subprog = flattenProgram(m.second); - - // Check our pred state. - auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE, - JumpTarget::NEXT_BLOCK); - ri.u.checkState.index = pred_state; - subprog.insert(begin(subprog), ri); - assert(subprog.back().code() == ROSE_INSTR_END); - subprog.pop_back(); - prog_blocks.push_back(move(subprog)); - } - - auto prog = flattenProgram(prog_blocks); - program.insert(end(program), begin(prog), end(prog)); +void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, + RoseProgram &program) { + // Prepend an instruction to check the pred state is on. + const auto *end_inst = pred_block.end_instruction(); + pred_block.insert(begin(pred_block), + make_unique(pred_state, end_inst)); + program.add_block(move(pred_block)); } static -u32 programLength(const vector &program) { - u32 len = 0; - for (const auto &ri : program) { - len += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); - } - return len; -} +void addPredBlocksAny(build_context &bc, map &pred_blocks, + RoseProgram &program) { + RoseProgram sparse_program; -static -void addPredBlocksMulti(build_context &bc, - map>> &predProgramLists, - vector &program) { - assert(!predProgramLists.empty()); - - // First, add the iterator itself. vector keys; - for (const auto &elem : predProgramLists) { - keys.push_back(elem.first); - } - DEBUG_PRINTF("%zu keys: %s\n", keys.size(), as_string_list(keys).c_str()); - - vector iter; - mmbBuildSparseIterator(iter, keys, bc.numStates); - assert(!iter.empty()); - u32 iter_offset = addIteratorToTable(bc, iter); - - // Construct our program, starting with the SPARSE_ITER_BEGIN - // instruction, keeping track of the jump offset for each sub-program. - vector sparse_program; - vector jump_table; - - sparse_program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN, - JumpTarget::PROGRAM_END)); - u32 curr_offset = programLength(program) + programLength(sparse_program); - - for (const auto &e : predProgramLists) { - DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(), - curr_offset); - jump_table.push_back(curr_offset); - assert(!e.second.empty()); - auto subprog = flattenProgram(e.second); - - if (e.first != keys.back()) { - // For all but the last subprogram, replace the END instruction - // with a SPARSE_ITER_NEXT. - assert(!subprog.empty()); - assert(subprog.back().code() == ROSE_INSTR_END); - subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT, - JumpTarget::PROGRAM_END); - } - - curr_offset += programLength(subprog); - insert(&sparse_program, end(sparse_program), subprog); + for (const u32 &key : pred_blocks | map_keys) { + keys.push_back(key); } - // Strip the END instruction from the last block. - assert(sparse_program.back().code() == ROSE_INSTR_END); - sparse_program.pop_back(); + const RoseInstruction *end_inst = sparse_program.end_instruction(); + auto ri = make_unique(bc.numStates, keys, end_inst); + sparse_program.add_before_end(move(ri)); - sparse_program = flattenProgram({sparse_program}); - - // Write the jump table into the bytecode. - const u32 jump_table_offset = - add_to_engine_blob(bc, begin(jump_table), end(jump_table)); - - // Write jump table and iterator offset into sparse iter instructions. - auto keys_it = begin(keys); - for (auto &ri : sparse_program) { - switch (ri.code()) { - case ROSE_INSTR_SPARSE_ITER_BEGIN: - ri.u.sparseIterBegin.iter_offset = iter_offset; - ri.u.sparseIterBegin.jump_table = jump_table_offset; - break; - case ROSE_INSTR_SPARSE_ITER_NEXT: - ri.u.sparseIterNext.iter_offset = iter_offset; - ri.u.sparseIterNext.jump_table = jump_table_offset; - assert(keys_it != end(keys)); - ri.u.sparseIterNext.state = *keys_it++; - break; - default: - break; - } - } - - program.insert(end(program), begin(sparse_program), end(sparse_program)); + RoseProgram &block = pred_blocks.begin()->second; + sparse_program.add_before_end(move(block)); + program.add_block(move(sparse_program)); } static -void addPredBlocks(build_context &bc, - map>> &predProgramLists, - vector &program) { - const size_t num_preds = predProgramLists.size(); +void addPredBlocksMulti(build_context &bc, map &pred_blocks, + RoseProgram &program) { + assert(!pred_blocks.empty()); + + RoseProgram sparse_program; + const RoseInstruction *end_inst = sparse_program.end_instruction(); + vector> jump_table; + + // BEGIN instruction. + auto ri_begin = + make_unique(bc.numStates, end_inst); + RoseInstrSparseIterBegin *begin_inst = ri_begin.get(); + sparse_program.add_before_end(move(ri_begin)); + + // NEXT instructions, one per pred program. + u32 prev_key = pred_blocks.begin()->first; + for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) { + auto ri = make_unique(prev_key, begin_inst, + end_inst); + sparse_program.add_before_end(move(ri)); + prev_key = it->first; + } + + // Splice in each pred program after its BEGIN/NEXT. + auto out_it = begin(sparse_program); + for (auto &m : pred_blocks) { + u32 key = m.first; + RoseProgram &flat_prog = m.second; + assert(!flat_prog.empty()); + const size_t block_len = flat_prog.size() - 1; // without INSTR_END. + + assert(dynamic_cast(out_it->get()) || + dynamic_cast(out_it->get())); + out_it = sparse_program.insert(++out_it, move(flat_prog)); + + // Jump table target for this key is the beginning of the block we just + // spliced in. + jump_table.emplace_back(key, out_it->get()); + + assert(distance(begin(sparse_program), out_it) + block_len <= + sparse_program.size()); + advance(out_it, block_len); + } + + // Write the jump table back into the SPARSE_ITER_BEGIN instruction. + begin_inst->jump_table = move(jump_table); + + program.add_block(move(sparse_program)); +} + +static +void addPredBlocks(build_context &bc, map &pred_blocks, + RoseProgram &program) { + // Trim empty blocks, if any exist. + for (auto it = pred_blocks.begin(); it != pred_blocks.end();) { + if (it->second.empty()) { + it = pred_blocks.erase(it); + } else { + ++it; + } + } + + const size_t num_preds = pred_blocks.size(); if (num_preds == 0) { - program = flattenProgram({program}); return; } if (num_preds == 1) { - addPredBlocksSingle(predProgramLists, program); + const auto head = pred_blocks.begin(); + addPredBlockSingle(head->first, head->second, program); return; } - addPredBlocksMulti(bc, predProgramLists, program); -} - -/** - * Returns the pair (program offset, sparse iter offset). - */ -static -vector makeSparseIterProgram(build_context &bc, - map>> &predProgramLists, - const vector &root_program, - const vector &pre_program) { - vector program; - u32 curr_offset = 0; - - // Add pre-program first. - for (const auto &ri : pre_program) { - program.push_back(ri); - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + // First, see if all our blocks are equivalent, in which case we can + // collapse them down into one. + const auto &blocks = pred_blocks | map_values; + if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) { + return RoseProgramEquivalence()(*begin(blocks), block); + })) { + DEBUG_PRINTF("all blocks equiv\n"); + addPredBlocksAny(bc, pred_blocks, program); + return; } - // Add blocks to deal with non-root edges (triggered by sparse iterator or - // mmbit_isset checks). This operation will flatten the program up to this - // point. - addPredBlocks(bc, predProgramLists, program); - - // If we have a root program, replace the END instruction with it. Note - // that the root program has already been flattened. - assert(!program.empty()); - assert(program.back().code() == ROSE_INSTR_END); - if (!root_program.empty()) { - program.pop_back(); - program.insert(end(program), begin(root_program), end(root_program)); - } - - return program; + addPredBlocksMulti(bc, pred_blocks, program); } static void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id, - vector &program) { + RoseProgram &program) { const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); const auto &arb_lit_info = **lit_infos.begin(); if (arb_lit_info.delayed_ids.empty()) { @@ -4376,10 +4139,9 @@ void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id, DEBUG_PRINTF("final_id=%u delay=%u child_id=%u\n", final_id, child_literal.delay, child_id); - auto ri = RoseInstruction(ROSE_INSTR_PUSH_DELAYED); - ri.u.pushDelayed.delay = verify_u8(child_literal.delay); - ri.u.pushDelayed.index = delay_index; - program.push_back(move(ri)); + auto ri = make_unique( + verify_u8(child_literal.delay), delay_index); + program.add_before_end(move(ri)); } } @@ -4397,20 +4159,17 @@ rose_group getFinalIdGroupsUnion(const RoseBuildImpl &build, u32 final_id) { static void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id, - vector &program) { + RoseProgram &program) { rose_group groups = getFinalIdGroupsUnion(build, final_id); if (!groups) { return; } - - auto ri = RoseInstruction(ROSE_INSTR_CHECK_GROUPS); - ri.u.checkGroups.groups = groups; - program.push_back(move(ri)); + program.add_before_end(make_unique(groups)); } static -void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 final_id, - vector &program) { +void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, + u32 final_id, RoseProgram &program) { assert(contains(build.final_id_to_literal, final_id)); const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); assert(!lit_infos.empty()); @@ -4419,7 +4178,7 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 final_id, return; } - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LIT_MASK); + vector look; assert(build.final_id_to_literal.at(final_id).size() == 1); u32 lit_id = *build.final_id_to_literal.at(final_id).begin(); @@ -4427,19 +4186,21 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 final_id, DEBUG_PRINTF("building mask for lit %u (final id %u) %s\n", lit_id, final_id, dumpString(s).c_str()); assert(s.length() <= MAX_MASK2_WIDTH); - u32 i = 0; + s32 i = 0 - s.length(); for (const auto &e : s) { - ri.u.checkLitMask.and_mask.a8[i] = e.nocase ? 0 : CASE_BIT; - ri.u.checkLitMask.cmp_mask.a8[i] = e.nocase ? 0 : (CASE_BIT & e.c); + if (!e.nocase) { + look.emplace_back(verify_s8(i), e); + } i++; } - program.push_back(move(ri)); + assert(!look.empty()); + makeLookaroundInstruction(bc, look, program); } static void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id, - vector &program) { + RoseProgram &program) { assert(contains(build.final_id_to_literal, final_id)); const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); @@ -4453,10 +4214,8 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id, } DEBUG_PRINTF("final_id %u squashes 0x%llx\n", final_id, groups); - - auto ri = RoseInstruction(ROSE_INSTR_SQUASH_GROUPS); - ri.u.squashGroups.groups = ~groups; // Negated, so we can just AND it in. - program.push_back(move(ri)); + program.add_before_end( + make_unique(~groups)); // Note negated. } static @@ -4475,7 +4234,7 @@ u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { static void makeRecordAnchoredInstruction(const RoseBuildImpl &build, build_context &bc, u32 final_id, - vector &program) { + RoseProgram &program) { assert(contains(build.final_id_to_literal, final_id)); const auto &lit_ids = build.final_id_to_literal.at(final_id); @@ -4497,9 +4256,7 @@ void makeRecordAnchoredInstruction(const RoseBuildImpl &build, return; } - auto ri = RoseInstruction(ROSE_INSTR_RECORD_ANCHORED); - ri.u.recordAnchored.id = final_id; - program.push_back(move(ri)); + program.add_before_end(make_unique(final_id)); } static @@ -4519,7 +4276,7 @@ static void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, u32 final_id, const vector &lit_edges, - vector &program) { + RoseProgram &program) { if (lit_edges.empty()) { return; } @@ -4565,9 +4322,50 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, assert(min_offset < UINT32_MAX); DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LIT_EARLY); - ri.u.checkLitEarly.min_offset = min_offset; - program.push_back(move(ri)); + program.add_before_end(make_unique(min_offset)); +} + +static +void makeCheckLiteralInstruction(const RoseBuildImpl &build, + const build_context &bc, u32 final_id, + RoseProgram &program) { + const auto &lits = build.final_id_to_literal.at(final_id); + if (lits.size() != 1) { + // Long literals should not share a final_id. + assert(all_of(begin(lits), end(lits), [&](u32 lit_id) { + const rose_literal_id &lit = build.literals.right.at(lit_id); + return lit.table != ROSE_FLOATING || + lit.s.length() <= bc.longLitLengthThreshold; + })); + return; + } + + u32 lit_id = *lits.begin(); + if (build.isDelayed(lit_id)) { + return; + } + + const rose_literal_id &lit = build.literals.right.at(lit_id); + if (lit.table != ROSE_FLOATING) { + return; + } + assert(bc.longLitLengthThreshold > 0); + if (lit.s.length() <= bc.longLitLengthThreshold) { + return; + } + + // Check resource limits as well. + if (lit.s.length() > build.cc.grey.limitLiteralLength) { + throw ResourceLimitError(); + } + + unique_ptr ri; + if (lit.s.any_nocase()) { + ri = make_unique(lit.s.get_string()); + } else { + ri = make_unique(lit.s.get_string()); + } + program.add_before_end(move(ri)); } static @@ -4585,47 +4383,52 @@ bool hasDelayedLiteral(RoseBuildImpl &build, } static -vector buildLitInitialProgram(RoseBuildImpl &build, - build_context &bc, u32 final_id, - const vector &lit_edges) { - vector pre_program; +RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, + u32 final_id, + const vector &lit_edges) { + RoseProgram program; // No initial program for EOD. if (final_id == MO_INVALID_IDX) { - return pre_program; + return program; } DEBUG_PRINTF("final_id %u\n", final_id); + // Check long literal info. + makeCheckLiteralInstruction(build, bc, final_id, program); + // Check lit mask. - makeCheckLitMaskInstruction(build, final_id, pre_program); + makeCheckLitMaskInstruction(build, bc, final_id, program); // Check literal groups. This is an optimisation that we only perform for // delayed literals, as their groups may be switched off; ordinarily, we // can trust the HWLM matcher. if (hasDelayedLiteral(build, lit_edges)) { - makeGroupCheckInstruction(build, final_id, pre_program); + makeGroupCheckInstruction(build, final_id, program); } // Add instructions for pushing delayed matches, if there are any. - makePushDelayedInstructions(build, final_id, pre_program); + makePushDelayedInstructions(build, final_id, program); // Add pre-check for early literals in the floating table. - makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, pre_program); + makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, program); - return pre_program; + return program; } static -vector buildLiteralProgram(RoseBuildImpl &build, - build_context &bc, u32 final_id, - const vector &lit_edges) { +RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, + u32 final_id, + const vector &lit_edges) { const auto &g = build.g; DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size()); - // pred state id -> list of programs - map>> predProgramLists; + RoseProgram program; + + // Predecessor state id -> program block. + map pred_blocks; // Construct sparse iter sub-programs. for (const auto &e : lit_edges) { @@ -4633,68 +4436,56 @@ vector buildLiteralProgram(RoseBuildImpl &build, if (build.isAnyStart(u)) { continue; // Root roles are not handled with sparse iterator. } - DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].idx, - g[target(e, g)].idx); + DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].index, + g[target(e, g)].index); assert(contains(bc.roleStateIndices, u)); u32 pred_state = bc.roleStateIndices.at(u); - auto program = makeProgram(build, bc, e); - if (program.empty()) { - continue; - } - predProgramLists[pred_state].push_back(program); + pred_blocks[pred_state].add_block(makeProgram(build, bc, e)); } - // Construct sub-program for handling root roles. - vector> root_programs; + // Add blocks to deal with non-root edges (triggered by sparse iterator or + // mmbit_isset checks). + addPredBlocks(bc, pred_blocks, program); + + // Add blocks to handle root roles. for (const auto &e : lit_edges) { const auto &u = source(e, g); if (!build.isAnyStart(u)) { continue; } - DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].idx, g[target(e, g)].idx); - auto role_prog = makeProgram(build, bc, e); - if (role_prog.empty()) { - continue; - } - root_programs.push_back(role_prog); + DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, + g[target(e, g)].index); + program.add_block(makeProgram(build, bc, e)); } if (final_id != MO_INVALID_IDX) { - vector prog; + RoseProgram root_block; // Literal may squash groups. - makeGroupSquashInstruction(build, final_id, prog); + makeGroupSquashInstruction(build, final_id, root_block); // Literal may be anchored and need to be recorded. - makeRecordAnchoredInstruction(build, bc, final_id, prog); + makeRecordAnchoredInstruction(build, bc, final_id, root_block); - if (!prog.empty()) { - root_programs.push_back(move(prog)); - } + program.add_block(move(root_block)); } - vector root_program; - if (!root_programs.empty()) { - root_program = flattenProgram(root_programs); - } - - auto pre_program = buildLitInitialProgram(build, bc, final_id, lit_edges); - - // Put it all together. - return makeSparseIterProgram(bc, predProgramLists, root_program, - pre_program); + // Construct initial program up front, as its early checks must be able to + // jump to end and terminate processing for this literal. + auto lit_program = buildLitInitialProgram(build, bc, final_id, lit_edges); + lit_program.add_before_end(move(program)); + return lit_program; } static u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, const vector &lit_edges) { - auto program = buildLiteralProgram(build, bc, final_id, lit_edges); + RoseProgram program = buildLiteralProgram(build, bc, final_id, lit_edges); if (program.empty()) { return 0; } - // Note: already flattened. applyFinalSpecialisation(program); - return writeProgram(bc, program); + return writeProgram(bc, move(program)); } static @@ -4706,13 +4497,12 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, return 0; // No delayed IDs, no work to do. } - vector program; - makeCheckLitMaskInstruction(build, final_id, program); + RoseProgram program; + makeCheckLitMaskInstruction(build, bc, final_id, program); makePushDelayedInstructions(build, final_id, program); assert(!program.empty()); - program = flattenProgram({program}); applyFinalSpecialisation(program); - return writeProgram(bc, program); + return writeProgram(bc, move(program)); } static @@ -4740,8 +4530,8 @@ map> findEdgesByLiteral(const RoseBuildImpl &build) { auto edge_list = vector(begin(m.second), end(m.second)); sort(begin(edge_list), end(edge_list), [&g](const RoseEdge &a, const RoseEdge &b) { - return tie(g[source(a, g)].idx, g[target(a, g)].idx) < - tie(g[source(b, g)].idx, g[target(b, g)].idx); + return tie(g[source(a, g)].index, g[target(a, g)].index) < + tie(g[source(b, g)].index, g[target(b, g)].index); }); lit_edge_map.emplace(m.first, edge_list); } @@ -4773,9 +4563,9 @@ pair buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { } u32 litProgramsOffset = - add_to_engine_blob(bc, begin(bc.litPrograms), end(bc.litPrograms)); - u32 delayRebuildProgramsOffset = add_to_engine_blob( - bc, begin(delayRebuildPrograms), end(delayRebuildPrograms)); + bc.engine_blob.add(begin(bc.litPrograms), end(bc.litPrograms)); + u32 delayRebuildProgramsOffset = bc.engine_blob.add( + begin(delayRebuildPrograms), end(delayRebuildPrograms)); return {litProgramsOffset, delayRebuildProgramsOffset}; } @@ -4813,35 +4603,31 @@ pair buildReportPrograms(RoseBuildImpl &build, build_context &bc) { vector programs; programs.reserve(reports.size()); - vector program; for (ReportID id : reports) { - program.clear(); + RoseProgram program; const bool has_som = false; makeCatchupMpv(build, bc, id, program); makeReport(build, id, has_som, program); - program = flattenProgram({program}); applyFinalSpecialisation(program); - u32 offset = writeProgram(bc, program); + u32 offset = writeProgram(bc, move(program)); programs.push_back(offset); build.rm.setProgramOffset(id, offset); DEBUG_PRINTF("program for report %u @ %u (%zu instructions)\n", id, programs.back(), program.size()); } - u32 offset = add_to_engine_blob(bc, begin(programs), end(programs)); + u32 offset = bc.engine_blob.add(begin(programs), end(programs)); u32 count = verify_u32(programs.size()); return {offset, count}; } static -vector makeEodAnchorProgram(RoseBuildImpl &build, - build_context &bc, - const RoseEdge &e, - const bool multiple_preds) { +RoseProgram makeEodAnchorProgram(RoseBuildImpl &build, build_context &bc, + const RoseEdge &e, const bool multiple_preds) { const RoseGraph &g = build.g; const RoseVertex v = target(e, g); - vector program; + RoseProgram program; if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { makeRoleCheckBounds(build, v, e, program); @@ -4856,9 +4642,11 @@ vector makeEodAnchorProgram(RoseBuildImpl &build, makeCatchup(build, bc, reports, program); const bool has_som = false; + RoseProgram report_block; for (const auto &id : reports) { - makeReport(build, id, has_som, program); + makeReport(build, id, has_som, report_block); } + program.add_before_end(move(report_block)); return program; } @@ -4869,7 +4657,7 @@ bool hasEodAnchoredSuffix(const RoseBuildImpl &build) { for (auto v : vertices_range(g)) { if (g[v].suffix && build.isInETable(v)) { DEBUG_PRINTF("vertex %zu is in eod table and has a suffix\n", - g[v].idx); + g[v].index); return true; } } @@ -4881,7 +4669,7 @@ bool hasEodMatcher(const RoseBuildImpl &build) { const RoseGraph &g = build.g; for (auto v : vertices_range(g)) { if (build.isInETable(v)) { - DEBUG_PRINTF("vertex %zu is in eod table\n", g[v].idx); + DEBUG_PRINTF("vertex %zu is in eod table\n", g[v].index); return true; } } @@ -4890,30 +4678,30 @@ bool hasEodMatcher(const RoseBuildImpl &build) { static void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, - bool in_etable, vector &program) { + bool in_etable, RoseProgram &program) { const RoseGraph &g = build.g; - // pred state id -> list of programs - map>> predProgramLists; + // Predecessor state id -> program block. + map pred_blocks; for (auto v : vertices_range(g)) { if (!g[v].eod_accept) { continue; } - DEBUG_PRINTF("vertex %zu (with %zu preds) fires on EOD\n", g[v].idx, + DEBUG_PRINTF("vertex %zu (with %zu preds) fires on EOD\n", g[v].index, in_degree(v, g)); vector edge_list; for (const auto &e : in_edges_range(v, g)) { RoseVertex u = source(e, g); if (build.isInETable(u) != in_etable) { - DEBUG_PRINTF("pred %zu %s in etable\n", g[u].idx, + DEBUG_PRINTF("pred %zu %s in etable\n", g[u].index, in_etable ? "is not" : "is"); continue; } if (canEagerlyReportAtEod(build, e)) { - DEBUG_PRINTF("already done report for vertex %zu\n", g[u].idx); + DEBUG_PRINTF("already done report for vertex %zu\n", g[u].index); continue; } edge_list.push_back(e); @@ -4923,29 +4711,18 @@ void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, for (const auto &e : edge_list) { RoseVertex u = source(e, g); assert(contains(bc.roleStateIndices, u)); - u32 predStateIdx = bc.roleStateIndices.at(u); - - auto prog = makeEodAnchorProgram(build, bc, e, multiple_preds); - if (prog.empty()) { - continue; - } - predProgramLists[predStateIdx].push_back(prog); + u32 pred_state = bc.roleStateIndices.at(u); + pred_blocks[pred_state].add_block( + makeEodAnchorProgram(build, bc, e, multiple_preds)); } } - if (predProgramLists.empty()) { - return; - } - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } - addPredBlocks(bc, predProgramLists, program); + addPredBlocks(bc, pred_blocks, program); } static void addEodEventProgram(RoseBuildImpl &build, build_context &bc, - vector &program) { + RoseProgram &program) { if (build.eod_event_literal_id == MO_INVALID_IDX) { return; } @@ -4967,65 +4744,51 @@ void addEodEventProgram(RoseBuildImpl &build, build_context &bc, // Sort edge list for determinism, prettiness. sort(begin(edge_list), end(edge_list), [&g](const RoseEdge &a, const RoseEdge &b) { - return tie(g[source(a, g)].idx, g[target(a, g)].idx) < - tie(g[source(b, g)].idx, g[target(b, g)].idx); + return tie(g[source(a, g)].index, g[target(a, g)].index) < + tie(g[source(b, g)].index, g[target(b, g)].index); }); - auto prog = buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list); - program.insert(end(program), begin(prog), end(prog)); + program.add_block( + buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list)); } static -void addEnginesEodProgram(u32 eodNfaIterOffset, - vector &program) { +void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) { if (!eodNfaIterOffset) { return; } - auto ri = RoseInstruction(ROSE_INSTR_ENGINES_EOD); - ri.u.enginesEod.iter_offset = eodNfaIterOffset; - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } - program.push_back(move(ri)); - program.emplace_back(ROSE_INSTR_END); + RoseProgram block; + block.add_before_end(make_unique(eodNfaIterOffset)); + program.add_block(move(block)); } static -void addSuffixesEodProgram(const RoseBuildImpl &build, - vector &program) { +void addSuffixesEodProgram(const RoseBuildImpl &build, RoseProgram &program) { if (!hasEodAnchoredSuffix(build)) { return; } - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } - program.emplace_back(ROSE_INSTR_SUFFIXES_EOD); - program.emplace_back(ROSE_INSTR_END); + RoseProgram block; + block.add_before_end(make_unique()); + program.add_block(move(block)); } static -void addMatcherEodProgram(const RoseBuildImpl &build, - vector &program) { +void addMatcherEodProgram(const RoseBuildImpl &build, RoseProgram &program) { if (!hasEodMatcher(build)) { return; } - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } - program.emplace_back(ROSE_INSTR_MATCHER_EOD); - program.emplace_back(ROSE_INSTR_END); + RoseProgram block; + block.add_before_end(make_unique()); + program.add_block(move(block)); } static u32 writeEodProgram(RoseBuildImpl &build, build_context &bc, u32 eodNfaIterOffset) { - vector program; + RoseProgram program; addEodEventProgram(build, bc, program); addEnginesEodProgram(eodNfaIterOffset, program); @@ -5034,17 +4797,12 @@ u32 writeEodProgram(RoseBuildImpl &build, build_context &bc, addEodAnchorProgram(build, bc, true, program); addSuffixesEodProgram(build, program); - if (program.size() == 1) { - assert(program.back().code() == ROSE_INSTR_END); - return 0; - } - if (program.empty()) { return 0; } applyFinalSpecialisation(program); - return writeProgram(bc, program); + return writeProgram(bc, move(program)); } static @@ -5164,7 +4922,175 @@ u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, vector iter; mmbBuildSparseIterator(iter, vec, queue_count - leftfixBeginQueue); - return addIteratorToTable(bc, iter); + return bc.engine_blob.add_iterator(iter); +} + +static +void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, + size_t longLitLengthThreshold, u32 *next_final_id) { + const auto &g = build.g; + auto &literal_info = build.literal_info; + auto &final_id_to_literal = build.final_id_to_literal; + + /* We can allocate the same final id to multiple literals of the same type + * if they share the same vertex set and trigger the same delayed literal + * ids and squash the same roles and have the same group squashing + * behaviour. Benefits literals cannot be merged. */ + + assert(longLitLengthThreshold > 0); + + for (u32 int_id : lits) { + rose_literal_info &curr_info = literal_info[int_id]; + const rose_literal_id &lit = build.literals.right.at(int_id); + const auto &verts = curr_info.vertices; + + // Literals with benefits cannot be merged. + if (curr_info.requires_benefits) { + DEBUG_PRINTF("id %u has benefits\n", int_id); + goto assign_new_id; + } + + // Long literals (that require CHECK_LITERAL instructions) cannot be + // merged. + if (lit.s.length() > longLitLengthThreshold) { + DEBUG_PRINTF("id %u is a long literal\n", int_id); + goto assign_new_id; + } + + if (!verts.empty() && curr_info.delayed_ids.empty()) { + vector cand; + insert(&cand, cand.end(), g[*verts.begin()].literals); + for (auto v : verts) { + vector temp; + set_intersection(cand.begin(), cand.end(), + g[v].literals.begin(), + g[v].literals.end(), + inserter(temp, temp.end())); + cand.swap(temp); + } + + for (u32 cand_id : cand) { + if (cand_id >= int_id) { + break; + } + + const auto &cand_info = literal_info[cand_id]; + const auto &cand_lit = build.literals.right.at(cand_id); + + if (cand_lit.s.length() > longLitLengthThreshold) { + continue; + } + + if (cand_info.requires_benefits) { + continue; + } + + if (!cand_info.delayed_ids.empty()) { + /* TODO: allow cases where delayed ids are equivalent. + * This is awkward currently as the have not had their + * final ids allocated yet */ + continue; + } + + if (lits.find(cand_id) == lits.end() + || cand_info.vertices.size() != verts.size() + || cand_info.squash_group != curr_info.squash_group) { + continue; + } + + /* if we are squashing groups we need to check if they are the + * same group */ + if (cand_info.squash_group + && cand_info.group_mask != curr_info.group_mask) { + continue; + } + + u32 final_id = cand_info.final_id; + assert(final_id != MO_INVALID_IDX); + assert(curr_info.final_id == MO_INVALID_IDX); + curr_info.final_id = final_id; + final_id_to_literal[final_id].insert(int_id); + goto next_lit; + } + } + + assign_new_id: + /* oh well, have to give it a fresh one, hang the expense */ + DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id); + assert(curr_info.final_id == MO_INVALID_IDX); + curr_info.final_id = *next_final_id; + final_id_to_literal[*next_final_id].insert(int_id); + (*next_final_id)++; + next_lit:; + } +} + +static +bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { + assert(lit_id < build.literal_info.size()); + const auto &info = build.literal_info[lit_id]; + if (!info.vertices.empty()) { + return true; + } + + for (const u32 &delayed_id : info.delayed_ids) { + assert(delayed_id < build.literal_info.size()); + const rose_literal_info &delayed_info = build.literal_info[delayed_id]; + if (!delayed_info.vertices.empty()) { + return true; + } + } + + DEBUG_PRINTF("literal %u has no refs\n", lit_id); + return false; +} + +/** \brief Allocate final literal IDs for all literals. */ +static +void allocateFinalLiteralId(RoseBuildImpl &build, + size_t longLitLengthThreshold) { + set anch; + set norm; + set delay; + + /* undelayed ids come first */ + assert(build.final_id_to_literal.empty()); + u32 next_final_id = 0; + for (u32 i = 0; i < build.literal_info.size(); i++) { + assert(!build.hasFinalId(i)); + + if (!isUsedLiteral(build, i)) { + /* what is this literal good for? absolutely nothing */ + continue; + } + + // The special EOD event literal has its own program and does not need + // a real literal ID. + if (i == build.eod_event_literal_id) { + assert(build.eod_event_literal_id != MO_INVALID_IDX); + continue; + } + + if (build.isDelayed(i)) { + assert(!build.literal_info[i].requires_benefits); + delay.insert(i); + } else if (build.literals.right.at(i).table == ROSE_ANCHORED) { + anch.insert(i); + } else { + norm.insert(i); + } + } + + /* normal lits */ + allocateFinalIdToSet(build, norm, longLitLengthThreshold, &next_final_id); + + /* next anchored stuff */ + build.anchored_base_id = next_final_id; + allocateFinalIdToSet(build, anch, longLitLengthThreshold, &next_final_id); + + /* delayed ids come last */ + build.delay_base_id = next_final_id; + allocateFinalIdToSet(build, delay, longLitLengthThreshold, &next_final_id); } static @@ -5202,17 +5128,90 @@ aligned_unique_ptr addSmallWriteEngine(RoseBuildImpl &build, return rose2; } +/** + * \brief Returns the pair (number of literals, max length) for all real + * literals in the floating table that are in-use. + */ +static +pair floatingCountAndMaxLen(const RoseBuildImpl &build) { + size_t num = 0; + size_t max_len = 0; + + for (const auto &e : build.literals.right) { + const u32 id = e.first; + const rose_literal_id &lit = e.second; + + if (lit.table != ROSE_FLOATING) { + continue; + } + if (lit.delay) { + // Skip delayed literals, so that we only count the undelayed + // version that ends up in the HWLM table. + continue; + } + if (!isUsedLiteral(build, id)) { + continue; + } + + num++; + max_len = max(max_len, lit.s.length()); + } + DEBUG_PRINTF("%zu floating literals with max_len=%zu\n", num, max_len); + return {num, max_len}; +} + +size_t calcLongLitThreshold(const RoseBuildImpl &build, + const size_t historyRequired) { + const auto &cc = build.cc; + + // In block mode, we should only use the long literal support for literals + // that cannot be handled by HWLM. + if (!cc.streaming) { + return HWLM_LITERAL_MAX_LEN; + } + + size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN; + + // Expand to size of history we've already allocated. Note that we need N-1 + // bytes of history to match a literal of length N. + longLitLengthThreshold = max(longLitLengthThreshold, historyRequired + 1); + + // If we only have one literal, allow for a larger value in order to avoid + // building a long literal table for a trivial Noodle case that we could + // fit in history. + const auto num_len = floatingCountAndMaxLen(build); + if (num_len.first == 1) { + if (num_len.second > longLitLengthThreshold) { + DEBUG_PRINTF("expanding for single literal of length %zu\n", + num_len.second); + longLitLengthThreshold = num_len.second; + } + } + + // Clamp to max history available. + longLitLengthThreshold = + min(longLitLengthThreshold, size_t{cc.grey.maxHistoryAvailable} + 1); + + return longLitLengthThreshold; +} + aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { DerivedBoundaryReports dboundary(boundary); size_t historyRequired = calcHistoryRequired(); // Updated by HWLM. + size_t longLitLengthThreshold = calcLongLitThreshold(*this, + historyRequired); + DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); + + allocateFinalLiteralId(*this, longLitLengthThreshold); auto anchored_dfas = buildAnchoredDfas(*this); build_context bc; bc.floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this, anchored_dfas); - bc.needs_catchup = needsCatchup(*this); + bc.longLitLengthThreshold = longLitLengthThreshold; + bc.needs_catchup = needsCatchup(*this, anchored_dfas); recordResources(bc.resources, *this); if (!anchored_dfas.empty()) { bc.resources.has_anchored = true; @@ -5247,7 +5246,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { return nullptr; } u32 eodNfaIterOffset = buildEodNfaIterator(bc, leftfixBeginQueue); - buildCountingMiracles(*this, bc); + buildCountingMiracles(bc); u32 queue_count = qif.allocated_count(); /* excludes anchored matcher q; * som rev nfas */ @@ -5273,6 +5272,11 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset); + size_t longLitStreamStateRequired = 0; + u32 longLitTableOffset = buildLongLiteralTable(*this, bc.engine_blob, + bc.longLiterals, longLitLengthThreshold, &historyRequired, + &longLitStreamStateRequired); + vector activeLeftIter; buildActiveLeftIter(leftInfoTable, activeLeftIter); @@ -5287,13 +5291,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 currOffset; /* relative to base of RoseEngine */ if (!bc.engine_blob.empty()) { - currOffset = bc.engine_blob_base + byte_length(bc.engine_blob); + currOffset = bc.engine_blob.base_offset + bc.engine_blob.size(); } else { currOffset = sizeof(RoseEngine); } - UNUSED const size_t engineBlobSize = - byte_length(bc.engine_blob); // test later + UNUSED const size_t engineBlobSize = bc.engine_blob.size(); // test later currOffset = ROUNDUP_CL(currOffset); DEBUG_PRINTF("currOffset %u\n", currOffset); @@ -5312,9 +5315,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build floating HWLM matcher. rose_group fgroups = 0; size_t fsize = 0; - size_t floatingStreamStateRequired = 0; - auto ftable = buildFloatingMatcher(*this, &fgroups, &fsize, &historyRequired, - &floatingStreamStateRequired); + auto ftable = buildFloatingMatcher(*this, bc.longLitLengthThreshold, + &fgroups, &fsize, &historyRequired); u32 fmatcherOffset = 0; if (ftable) { currOffset = ROUNDUP_CL(currOffset); @@ -5387,7 +5389,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { memset(&stateOffsets, 0, sizeof(stateOffsets)); fillStateOffsets(*this, bc.numStates, anchorStateSize, activeArrayCount, activeLeftCount, laggedRoseCount, - floatingStreamStateRequired, historyRequired, + longLitStreamStateRequired, historyRequired, &stateOffsets); scatter_plan_raw state_scatter; @@ -5434,11 +5436,13 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->ekeyCount = rm.numEkeys(); engine->dkeyCount = rm.numDkeys(); + engine->dkeyLogSize = fatbit_size(engine->dkeyCount); engine->invDkeyOffset = dkeyOffset; copy_bytes(ptr + dkeyOffset, rm.getDkeyToReportTable()); engine->somHorizon = ssm.somPrecision(); engine->somLocationCount = ssm.numSomSlots(); + engine->somLocationFatbitSize = fatbit_size(engine->somLocationCount); engine->needsCatchup = bc.needs_catchup ? 1 : 0; @@ -5453,8 +5457,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->activeArrayCount = activeArrayCount; engine->activeLeftCount = activeLeftCount; engine->queueCount = queue_count; + engine->activeQueueArraySize = fatbit_size(queue_count); engine->eagerIterOffset = eagerIterOffset; engine->handledKeyCount = bc.handledKeys.size(); + engine->handledKeyFatbitSize = fatbit_size(engine->handledKeyCount); engine->rolesWithStateCount = bc.numStates; @@ -5474,11 +5480,13 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->lastByteHistoryIterOffset = lastByteOffset; - u32 delay_count = verify_u32(final_id_to_literal.size() - delay_base_id); - engine->delay_count = delay_count; + engine->delay_count = + verify_u32(final_id_to_literal.size() - delay_base_id); + engine->delay_fatbit_size = fatbit_size(engine->delay_count); engine->delay_base_id = delay_base_id; engine->anchored_base_id = anchored_base_id; engine->anchored_count = delay_base_id - anchored_base_id; + engine->anchored_fatbit_size = fatbit_size(engine->anchored_count); engine->rosePrefixCount = rosePrefixCount; @@ -5503,6 +5511,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->ematcherOffset = ematcherOffset; engine->sbmatcherOffset = sbmatcherOffset; engine->fmatcherOffset = fmatcherOffset; + engine->longLitTableOffset = longLitTableOffset; engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED); engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING); engine->eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED); @@ -5528,7 +5537,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->totalNumLiterals = verify_u32(literal_info.size()); engine->asize = verify_u32(asize); engine->ematcherRegionSize = ematcher_region_size; - engine->floatingStreamState = verify_u32(floatingStreamStateRequired); + engine->longLitStreamState = verify_u32(longLitStreamStateRequired); engine->boundary.reportEodOffset = boundary_out.reportEodOffset; engine->boundary.reportZeroOffset = boundary_out.reportZeroOffset; @@ -5545,7 +5554,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { &engine->tStateSize); // Copy in other tables - copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob); + bc.engine_blob.write_bytes(engine.get()); copy_bytes(ptr + engine->leftOffset, leftInfoTable); fillLookaroundTables(ptr + lookaroundTableOffset, @@ -5556,7 +5565,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Safety check: we shouldn't have written anything to the engine blob // after we copied it into the engine bytecode. - assert(byte_length(bc.engine_blob) == engineBlobSize); + assert(bc.engine_blob.size() == engineBlobSize); // Add a small write engine if appropriate. engine = addSmallWriteEngine(*this, move(engine)); diff --git a/src/rose/rose_build_castle.cpp b/src/rose/rose_build_castle.cpp index c65e840d..7987b0f6 100644 --- a/src/rose/rose_build_castle.cpp +++ b/src/rose/rose_build_castle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -163,7 +163,7 @@ void renovateCastle(RoseBuildImpl &tbi, CastleProto *castle, for (RoseVertex v : verts) { assert(g[v].left.castle.get() == castle); - DEBUG_PRINTF("%zu checks at lag %u\n", g[v].idx, g[v].left.lag); + DEBUG_PRINTF("%zu checks at lag %u\n", g[v].index, g[v].left.lag); vector lits = literals_for_vertex(tbi, v); for (const auto &e : lits) { DEBUG_PRINTF("%s +%u\n", dumpString(e.s).c_str(), e.delay); diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 3f82a9cc..e13d7c5c 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -43,11 +43,11 @@ #include "nfa/nfa_internal.h" #include "nfa/rdfa.h" #include "nfagraph/ng_holder.h" -#include "nfagraph/ng_dump.h" #include "nfagraph/ng_execute.h" #include "nfagraph/ng_is_equal.h" #include "nfagraph/ng_limex.h" #include "nfagraph/ng_mcclellan.h" +#include "nfagraph/ng_prune.h" #include "nfagraph/ng_repeat.h" #include "nfagraph/ng_reports.h" #include "nfagraph/ng_stop.h" @@ -88,172 +88,6 @@ namespace ue2 { #define ANCHORED_REHOME_DEEP 25 #define ANCHORED_REHOME_SHORT_LEN 3 -#ifdef DEBUG -static UNUSED -void printLitInfo(const rose_literal_info &li, u32 id) { - DEBUG_PRINTF("lit_info %u\n", id); - DEBUG_PRINTF(" parent %u%s", li.undelayed_id, - li.delayed_ids.empty() ? "":", children:"); - for (u32 d_id : li.delayed_ids) { - printf(" %u", d_id); - } - printf("\n"); - DEBUG_PRINTF(" group %llu %s\n", li.group_mask, li.squash_group ? "s":""); -} -#endif - -static -void allocateFinalIdToSet(const RoseGraph &g, const set &lits, - deque *literal_info, - map > *final_id_to_literal, - u32 *next_final_id) { - /* We can allocate the same final id to multiple literals of the same type - * if they share the same vertex set and trigger the same delayed literal - * ids and squash the same roles and have the same group squashing - * behaviour. Benefits literals cannot be merged. */ - - for (u32 int_id : lits) { - rose_literal_info &curr_info = (*literal_info)[int_id]; - const auto &verts = curr_info.vertices; - - if (!verts.empty() && !curr_info.requires_benefits - && curr_info.delayed_ids.empty()) { - vector cand; - insert(&cand, cand.end(), g[*verts.begin()].literals); - for (auto v : verts) { - vector temp; - set_intersection(cand.begin(), cand.end(), - g[v].literals.begin(), - g[v].literals.end(), - inserter(temp, temp.end())); - cand.swap(temp); - } - - for (u32 cand_id : cand) { - if (cand_id >= int_id) { - break; - } - - const rose_literal_info &cand_info = (*literal_info)[cand_id]; - - if (cand_info.requires_benefits) { - continue; - } - - if (!cand_info.delayed_ids.empty()) { - /* TODO: allow cases where delayed ids are equivalent. - * This is awkward currently as the have not had their - * final ids allocated yet */ - continue; - } - - if (lits.find(cand_id) == lits.end() - || cand_info.vertices.size() != verts.size() - || cand_info.squash_group != curr_info.squash_group) { - continue; - } - - /* if we are squashing groups we need to check if they are the - * same group */ - if (cand_info.squash_group - && cand_info.group_mask != curr_info.group_mask) { - continue; - } - - u32 final_id = cand_info.final_id; - assert(final_id != MO_INVALID_IDX); - assert(curr_info.final_id == MO_INVALID_IDX); - curr_info.final_id = final_id; - (*final_id_to_literal)[final_id].insert(int_id); - goto next_lit; - } - } - - /* oh well, have to give it a fresh one, hang the expense */ - DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id); - assert(curr_info.final_id == MO_INVALID_IDX); - curr_info.final_id = *next_final_id; - (*final_id_to_literal)[*next_final_id].insert(int_id); - (*next_final_id)++; - next_lit:; - } -} - -static -bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { - assert(lit_id < build.literal_info.size()); - const auto &info = build.literal_info[lit_id]; - if (!info.vertices.empty()) { - return true; - } - - for (const u32 &delayed_id : info.delayed_ids) { - assert(delayed_id < build.literal_info.size()); - const rose_literal_info &delayed_info = build.literal_info[delayed_id]; - if (!delayed_info.vertices.empty()) { - return true; - } - } - - DEBUG_PRINTF("literal %u has no refs\n", lit_id); - return false; -} - -/** \brief Allocate final literal IDs for all literals. - * - * These are the literal ids used in the bytecode. - */ -static -void allocateFinalLiteralId(RoseBuildImpl &tbi) { - RoseGraph &g = tbi.g; - - set anch; - set norm; - set delay; - - /* undelayed ids come first */ - assert(tbi.final_id_to_literal.empty()); - u32 next_final_id = 0; - for (u32 i = 0; i < tbi.literal_info.size(); i++) { - assert(!tbi.hasFinalId(i)); - - if (!isUsedLiteral(tbi, i)) { - /* what is this literal good for? absolutely nothing */ - continue; - } - - // The special EOD event literal has its own program and does not need - // a real literal ID. - if (i == tbi.eod_event_literal_id) { - assert(tbi.eod_event_literal_id != MO_INVALID_IDX); - continue; - } - - if (tbi.isDelayed(i)) { - assert(!tbi.literal_info[i].requires_benefits); - delay.insert(i); - } else if (tbi.literals.right.at(i).table == ROSE_ANCHORED) { - anch.insert(i); - } else { - norm.insert(i); - } - } - - /* normal lits */ - allocateFinalIdToSet(g, norm, &tbi.literal_info, &tbi.final_id_to_literal, - &next_final_id); - - /* next anchored stuff */ - tbi.anchored_base_id = next_final_id; - allocateFinalIdToSet(g, anch, &tbi.literal_info, &tbi.final_id_to_literal, - &next_final_id); - - /* delayed ids come last */ - tbi.delay_base_id = next_final_id; - allocateFinalIdToSet(g, delay, &tbi.literal_info, &tbi.final_id_to_literal, - &next_final_id); -} - #define MAX_EXPLOSION_NC 3 static bool limited_explosion(const ue2_literal &s) { @@ -285,7 +119,12 @@ void RoseBuildImpl::handleMixedSensitivity(void) { continue; } - if (limited_explosion(lit.s)) { + // We don't want to explode long literals, as they require confirmation + // with a CHECK_LITERAL instruction and need unique final_ids. + // TODO: we could allow explosion for literals where the prefixes + // covered by CHECK_LITERAL are identical. + if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN && + limited_explosion(lit.s)) { DEBUG_PRINTF("need to explode existing string '%s'\n", dumpString(lit.s).c_str()); literal_info[id].requires_explode = true; @@ -366,14 +205,6 @@ bool RoseBuildImpl::hasOnlyPseudoStarInEdges(RoseVertex v) const { return true; } -void RoseBuildImpl::renumberVertices() { - vertexIndex = 0; - DEBUG_PRINTF("renumbering vertices\n"); - for (auto v : vertices_range(g)) { - g[v].idx = vertexIndex++; - } -} - static size_t trailerDueToSelf(const rose_literal_id &lit) { size_t trailer = lit.s.length() - maxPeriod(lit.s); @@ -392,7 +223,7 @@ RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) { const RoseVertex u = source(e, g); /* pred role */ const RoseVertex v = target(e, g); /* current role */ - DEBUG_PRINTF("find history for [%zu,%zu]\n", g[u].idx, g[v].idx); + DEBUG_PRINTF("find history for [%zu,%zu]\n", g[u].index, g[v].index); DEBUG_PRINTF("u has min_offset=%u, max_offset=%u\n", g[u].min_offset, g[u].max_offset); @@ -446,7 +277,7 @@ RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) { // Non-EOD cases. DEBUG_PRINTF("examining edge [%zu,%zu] with bounds {%u,%u}\n", - g[u].idx, g[v].idx, g[e].minBound, g[e].maxBound); + g[u].index, g[v].index, g[e].minBound, g[e].maxBound); if (tbi.isAnchored(v)) { // Matches for literals in the anchored table will always arrive at the @@ -950,19 +781,230 @@ void RoseBuildImpl::findTransientLeftfixes(void) { /** Find all the different roses and their associated literals. */ static -map> findLeftSucc(RoseBuildImpl &tbi) { +map> findLeftSucc(const RoseBuildImpl &build) { map> leftfixes; - for (auto v : vertices_range(tbi.g)) { - if (tbi.g[v].left) { - const LeftEngInfo &lei = tbi.g[v].left; + for (auto v : vertices_range(build.g)) { + if (build.g[v].left) { + const LeftEngInfo &lei = build.g[v].left; leftfixes[lei].push_back(v); } } return leftfixes; } +namespace { +struct infix_info { + set preds; + set succs; +}; +} + static -bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left, +map findInfixGraphInfo(const RoseBuildImpl &build) { + map rv; + + for (auto v : vertices_range(build.g)) { + if (!build.g[v].left) { + continue; + } + + if (build.isRootSuccessor(v)) { + DEBUG_PRINTF("a prefix is never an infix\n"); + continue; + } + + /* ensure only proper nfas */ + const LeftEngInfo &lei = build.g[v].left; + if (!lei.graph) { + continue; + } + if (lei.haig || lei.dfa) { + continue; + } + assert(!lei.castle); + infix_info &info = rv[lei.graph.get()]; + insert(&info.preds, inv_adjacent_vertices_range(v, build.g)); + info.succs.insert(v); + } + + return rv; +} + +static +map> getTopInfo(const NGHolder &h) { + map> rv; + for (NFAEdge e : out_edges_range(h.start, h)) { + for (u32 t : h[e].tops) { + rv[t].insert(e); + } + } + return rv; +} + +static +u32 findUnusedTop(const map> &tops) { + u32 i = 0; + while (contains(tops, i)) { + i++; + } + return i; +} + +static +bool reduceTopTriggerLoad(RoseBuildImpl &build, NGHolder &h, RoseVertex u) { + RoseGraph &g = build.g; + + set tops; /* tops triggered by u */ + for (RoseEdge e : out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (g[v].left.graph.get() != &h) { + continue; + } + tops.insert(g[e].rose_top); + } + + assert(!tops.empty()); + if (tops.size() <= 1) { + return false; + } + DEBUG_PRINTF("%zu triggers %zu tops for %p\n", build.g[u].index, + tops.size(), &h); + + auto h_top_info = getTopInfo(h); + flat_set edges_to_trigger; + for (u32 t : tops) { + insert(&edges_to_trigger, h_top_info[t]); + } + + u32 new_top = ~0U; + /* check if there is already a top with the right the successor set */ + for (const auto &elem : h_top_info) { + if (elem.second == edges_to_trigger) { + new_top = elem.first; + break; + } + } + + /* if no existing suitable top, add a new top for us */ + if (new_top == ~0U) { + new_top = findUnusedTop(h_top_info); + + /* add top to edges out of start */ + for (NFAEdge e : out_edges_range(h.start, h)) { + if (has_intersection(tops, h[e].tops)) { + h[e].tops.insert(new_top); + } + } + + /* check still implementable if we add a new top */ + if (!isImplementableNFA(h, nullptr, build.cc)) { + DEBUG_PRINTF("unable to add new top\n"); + for (NFAEdge e : out_edges_range(h.start, h)) { + h[e].tops.erase(new_top); + } + /* we should be back to the original graph */ + assert(isImplementableNFA(h, nullptr, build.cc)); + return false; + } + } + + DEBUG_PRINTF("using new merged top %u\n", new_top); + assert(new_top != ~0U); + for (RoseEdge e: out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (g[v].left.graph.get() != &h) { + continue; + } + g[e].rose_top = new_top; + } + + return true; +} + +static +void packInfixTops(NGHolder &h, RoseGraph &g, + const set &verts) { + if (!is_triggered(h)) { + DEBUG_PRINTF("not triggered, no tops\n"); + return; + } + assert(isCorrectlyTopped(h)); + DEBUG_PRINTF("pruning unused tops\n"); + flat_set used_tops; + for (auto v : verts) { + assert(g[v].left.graph.get() == &h); + + for (const auto &e : in_edges_range(v, g)) { + u32 top = g[e].rose_top; + used_tops.insert(top); + } + } + + map top_mapping; + for (u32 t : used_tops) { + u32 new_top = top_mapping.size(); + top_mapping[t] = new_top; + } + + for (auto v : verts) { + assert(g[v].left.graph.get() == &h); + + for (const auto &e : in_edges_range(v, g)) { + g[e].rose_top = top_mapping.at(g[e].rose_top); + } + } + + vector dead; + for (const auto &e : out_edges_range(h.start, h)) { + NFAVertex v = target(e, h); + if (v == h.startDs) { + continue; // stylised edge, leave it alone. + } + flat_set updated_tops; + for (u32 t : h[e].tops) { + if (contains(top_mapping, t)) { + updated_tops.insert(top_mapping.at(t)); + } + } + h[e].tops = move(updated_tops); + if (h[e].tops.empty()) { + DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index); + dead.push_back(e); + } + } + + if (dead.empty()) { + return; + } + + remove_edges(dead, h); + pruneUseless(h); + clearReports(h); // As we may have removed vacuous edges. +} + +static +void reduceTopTriggerLoad(RoseBuildImpl &build) { + auto infixes = findInfixGraphInfo(build); + + for (auto &p : infixes) { + if (onlyOneTop(*p.first)) { + continue; + } + + bool changed = false; + for (RoseVertex v : p.second.preds) { + changed |= reduceTopTriggerLoad(build, *p.first, v); + } + + if (changed) { + packInfixTops(*p.first, build.g, p.second.succs); + reduceImplementableGraph(*p.first, SOM_NONE, nullptr, build.cc); + } + } +} + +static +bool triggerKillsRoseGraph(const RoseBuildImpl &build, const left_id &left, const set &all_lits, const RoseEdge &e) { assert(left.graph()); @@ -978,8 +1020,8 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left, /* check each pred literal to see if they all kill previous graph * state */ - for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) { - const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id); + for (u32 lit_id : build.g[source(e, build.g)].literals) { + const rose_literal_id &pred_lit = build.literals.right.at(lit_id); const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); DEBUG_PRINTF("running graph %zu\n", states.size()); @@ -995,7 +1037,7 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left, } static -bool triggerKillsRose(const RoseBuildImpl &tbi, const left_id &left, +bool triggerKillsRose(const RoseBuildImpl &build, const left_id &left, const set &all_lits, const RoseEdge &e) { if (left.haig()) { /* TODO: To allow this for som-based engines we would also need to @@ -1005,32 +1047,30 @@ bool triggerKillsRose(const RoseBuildImpl &tbi, const left_id &left, } if (left.graph()) { - return triggerKillsRoseGraph(tbi, left, all_lits, e); + return triggerKillsRoseGraph(build, left, all_lits, e); } if (left.castle()) { - return triggerKillsRoseCastle(tbi, left, all_lits, e); + return triggerKillsRoseCastle(build, left, all_lits, e); } return false; } +/* Sometimes the arrival of a top for a rose infix can ensure that the nfa would + * be dead at that time. In the case of multiple trigger literals, we can only + * base our decision on that portion of literal after any overlapping literals. + */ static -void inspectRoseTops(RoseBuildImpl &tbi) { - /* Sometimes the arrival of a top for a rose infix can ensure that the nfa - * would be dead at that time. In the case of multiple trigger literals we - * can only base our decision on that portion of literal after any - * overlapping literals */ +void findTopTriggerCancels(RoseBuildImpl &build) { + auto left_succ = findLeftSucc(build); /* leftfixes -> succ verts */ - map> roses = - findLeftSucc(tbi); /* rose -> succ verts */ - - for (const auto &r : roses) { + for (const auto &r : left_succ) { const left_id &left = r.first; const vector &succs = r.second; assert(!succs.empty()); - if (tbi.isRootSuccessor(*succs.begin())) { + if (build.isRootSuccessor(*succs.begin())) { /* a prefix is never an infix */ continue; } @@ -1040,10 +1080,10 @@ void inspectRoseTops(RoseBuildImpl &tbi) { set pred_lit_ids; for (auto v : succs) { - for (const auto &e : in_edges_range(v, tbi.g)) { - RoseVertex u = source(e, tbi.g); - tops_seen.insert(tbi.g[e].rose_top); - insert(&pred_lit_ids, tbi.g[u].literals); + for (const auto &e : in_edges_range(v, build.g)) { + RoseVertex u = source(e, build.g); + tops_seen.insert(build.g[e].rose_top); + insert(&pred_lit_ids, build.g[u].literals); rose_edges.insert(e); } } @@ -1055,7 +1095,7 @@ void inspectRoseTops(RoseBuildImpl &tbi) { } for (u32 lit_id : pred_lit_ids) { - const rose_literal_id &p_lit = tbi.literals.right.at(lit_id); + const rose_literal_id &p_lit = build.literals.right.at(lit_id); if (p_lit.delay || p_lit.table == ROSE_ANCHORED) { goto next_rose; } @@ -1067,15 +1107,22 @@ void inspectRoseTops(RoseBuildImpl &tbi) { all_lits.size(), rose_edges.size()); for (const auto &e : rose_edges) { - if (triggerKillsRose(tbi, left, all_lits, e)) { + if (triggerKillsRose(build, left, all_lits, e)) { DEBUG_PRINTF("top will override previous rose state\n"); - tbi.g[e].rose_cancel_prev_top = true; + build.g[e].rose_cancel_prev_top = true; } } next_rose:; } } +static +void optimiseRoseTops(RoseBuildImpl &build) { + reduceTopTriggerLoad(build); + /* prune unused tops ? */ + findTopTriggerCancels(build); +} + static void buildRoseSquashMasks(RoseBuildImpl &tbi) { /* Rose nfa squash masks are applied to the groups when the nfa can no @@ -1256,22 +1303,16 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai, assert(old_id < tbi.literal_info.size()); const rose_literal_info &li = tbi.literal_info[old_id]; - // For compile determinism, operate over literal vertices in index - // order. - vector lit_verts(begin(li.vertices), end(li.vertices)); - sort(begin(lit_verts), end(lit_verts), VertexIndexComp(g)); - - for (auto lit_v : lit_verts) { + for (auto lit_v : li.vertices) { // Clone vertex with the new literal ID. RoseVertex v = add_vertex(g[lit_v], g); - g[v].idx = tbi.vertexIndex++; g[v].literals.clear(); g[v].literals.insert(lit_id); g[v].min_offset = sai.min_bound + sai.literal.length(); g[v].max_offset = sai.max_bound + sai.literal.length(); lit_info.vertices.insert(v); - RoseEdge e = add_edge(anchored_root, v, g).first; + RoseEdge e = add_edge(anchored_root, v, g); g[e].minBound = sai.min_bound; g[e].maxBound = sai.max_bound; } @@ -1292,11 +1333,10 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const ue2_literal &lit, RoseGraph &g = tbi.g; RoseVertex v = add_vertex(g); - g[v].idx = tbi.vertexIndex++; g[v].literals.insert(lit_id); g[v].reports = reports; - RoseEdge e = add_edge(tbi.root, v, g).first; + RoseEdge e = add_edge(tbi.root, v, g); g[e].minBound = 0; g[e].maxBound = ROSE_BOUND_INF; g[v].min_offset = 1; @@ -1502,7 +1542,7 @@ bool historiesAreValid(const RoseGraph &g) { for (const auto &e : edges_range(g)) { if (g[e].history == ROSE_ROLE_HISTORY_INVALID) { DEBUG_PRINTF("edge [%zu,%zu] has invalid history\n", - g[source(e, g)].idx, g[target(e, g)].idx); + g[source(e, g)].index, g[target(e, g)].index); return false; } } @@ -1521,18 +1561,20 @@ bool danglingVertexRef(RoseBuildImpl &tbi) { const ue2::unordered_set valid_vertices(vi, ve); if (!contains(valid_vertices, tbi.anchored_root)) { - DEBUG_PRINTF("anchored root vertex %p not in graph\n", - tbi.anchored_root); + DEBUG_PRINTF("anchored root vertex %zu not in graph\n", + tbi.g[tbi.anchored_root].index); return true; } for (const auto &e : tbi.ghost) { if (!contains(valid_vertices, e.first)) { - DEBUG_PRINTF("ghost key vertex %p not in graph\n", e.first); + DEBUG_PRINTF("ghost key vertex %zu not in graph\n", + tbi.g[e.first].index); return true; } if (!contains(valid_vertices, e.second)) { - DEBUG_PRINTF("ghost value vertex %p not in graph\n", e.second); + DEBUG_PRINTF("ghost value vertex %zu not in graph\n", + tbi.g[e.second].index); return true; } } @@ -1544,63 +1586,16 @@ static bool roleOffsetsAreValid(const RoseGraph &g) { for (auto v : vertices_range(g)) { if (g[v].min_offset >= ROSE_BOUND_INF) { - DEBUG_PRINTF("invalid min_offset for role %zu\n", g[v].idx); + DEBUG_PRINTF("invalid min_offset for role %zu\n", g[v].index); return false; } if (g[v].min_offset > g[v].max_offset) { - DEBUG_PRINTF("min_offset > max_offset for %zu\n", g[v].idx); + DEBUG_PRINTF("min_offset > max_offset for %zu\n", g[v].index); return false; } } return true; } - -static UNUSED -bool hasOrphanedTops(const RoseBuildImpl &tbi) { - const RoseGraph &g = tbi.g; - - ue2::unordered_map > roses; - ue2::unordered_map > suffixes; - - for (auto v : vertices_range(g)) { - if (g[v].left) { - set &tops = roses[g[v].left]; - if (tbi.isRootSuccessor(v)) { - // Prefix, has only one top. - tops.insert(0); - } else { - // Tops for infixes come from the in-edges. - for (const auto &e : in_edges_range(v, g)) { - tops.insert(g[e].rose_top); - } - } - } - if (g[v].suffix) { - suffixes[g[v].suffix].insert(g[v].suffix.top); - } - } - - for (const auto &e : roses) { - if (all_tops(e.first) != e.second) { - DEBUG_PRINTF("rose tops (%s) don't match rose graph (%s)\n", - as_string_list(all_tops(e.first)).c_str(), - as_string_list(e.second).c_str()); - return true; - } - } - - for (const auto &e : suffixes) { - if (all_tops(e.first) != e.second) { - DEBUG_PRINTF("suffix tops (%s) don't match rose graph (%s)\n", - as_string_list(all_tops(e.first)).c_str(), - as_string_list(e.second).c_str()); - return true; - } - } - - return false; -} - #endif // NDEBUG aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { @@ -1681,13 +1676,17 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { mergeSmallLeftfixes(*this); } + assert(!hasOrphanedTops(*this)); + // Do a rose-merging aliasing pass. aliasRoles(*this, true); + assert(!hasOrphanedTops(*this)); // Run a merge pass over the outfixes as well. mergeOutfixes(*this); assert(!danglingVertexRef(*this)); + assert(!hasOrphanedTops(*this)); findMoreLiteralMasks(*this); @@ -1697,8 +1696,7 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { /* final prep work */ remapCastleTops(*this); - allocateFinalLiteralId(*this); - inspectRoseTops(*this); + optimiseRoseTops(*this); buildRoseSquashMasks(*this); rm.assignDkeys(this); diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index 1578dda1..b151c0c9 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -163,6 +163,8 @@ unique_ptr convertLeafToHolder(const RoseGraph &g, } } + setTops(*out); + // Literal vertices wired to accept. NFAVertex litfirst, litlast; tie(litfirst, litlast) = addLiteralVertices(g, literals, t_v, *out); @@ -288,7 +290,7 @@ bool isUnconvertibleLeaf(const RoseBuildImpl &tbi, const RoseVertex v) { // Find all of the leaves with literals whose length is <= len. static -void findBadLeaves(RoseBuildImpl &tbi, RoseVertexSet &bad) { +void findBadLeaves(RoseBuildImpl &tbi, set &bad) { RoseGraph &g = tbi.g; u32 len = tbi.cc.grey.roseMaxBadLeafLength; @@ -307,15 +309,7 @@ void findBadLeaves(RoseBuildImpl &tbi, RoseVertexSet &bad) { const rose_literal_info &info = tbi.literal_info[lid]; - // Because we do the "clone pred and re-home" trick below, we need to - // iterate over our vertices in a defined ordering, otherwise we'll get - // non-determinism in our bytecode. So, copy and sort this literal's - // vertices. - - vector verts(info.vertices.begin(), info.vertices.end()); - sort(verts.begin(), verts.end(), VertexIndexComp(g)); - - for (auto v : verts) { + for (auto v : info.vertices) { if (!isLeafNode(v, g)) { continue; } @@ -329,7 +323,7 @@ void findBadLeaves(RoseBuildImpl &tbi, RoseVertexSet &bad) { const RoseEdge &e = *in_edges(v, g).first; RoseVertex u = source(e, g); if (out_degree(u, g) != 1) { - DEBUG_PRINTF("re-homing %zu to cloned pred\n", g[v].idx); + DEBUG_PRINTF("re-homing %zu to cloned pred\n", g[v].index); RoseVertex u2 = tbi.cloneVertex(u); for (const auto &e_in : in_edges_range(u, g)) { add_edge(source(e_in, g), u2, g[e_in], g); @@ -338,7 +332,7 @@ void findBadLeaves(RoseBuildImpl &tbi, RoseVertexSet &bad) { remove_edge(e, g); } - DEBUG_PRINTF("%zu is a bad leaf vertex\n", g[v].idx); + DEBUG_PRINTF("%zu is a bad leaf vertex\n", g[v].index); bad.insert(v); } } @@ -346,7 +340,7 @@ void findBadLeaves(RoseBuildImpl &tbi, RoseVertexSet &bad) { void convertBadLeaves(RoseBuildImpl &tbi) { RoseGraph &g = tbi.g; - RoseVertexSet bad(g); + set bad; findBadLeaves(tbi, bad); DEBUG_PRINTF("found %zu bad leaves\n", bad.size()); @@ -369,7 +363,7 @@ void convertBadLeaves(RoseBuildImpl &tbi) { RoseVertex u = source(e, g); assert(!g[u].suffix); g[u].suffix.graph = h; - DEBUG_PRINTF("%zu's nfa holder %p\n", g[u].idx, h.get()); + DEBUG_PRINTF("%zu's nfa holder %p\n", g[u].index, h.get()); dead.push_back(v); } @@ -400,7 +394,10 @@ unique_ptr makeFloodProneSuffix(const ue2_literal &s, size_t len, NFAVertex u = h->start; for (auto it = s.begin() + s.length() - len; it != s.end(); ++it) { NFAVertex v = addHolderVertex(*it, *h); - add_edge(u, v, *h); + NFAEdge e = add_edge(u, v, *h); + if (u == h->start) { + (*h)[e].tops.insert(DEFAULT_TOP); + } u = v; } @@ -708,10 +705,7 @@ bool handleStartPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, assert(g[e_old].maxBound >= bound_max); setEdgeBounds(g, e_old, bound_min, bound_max); } else { - RoseEdge e_new; - UNUSED bool added; - tie(e_new, added) = add_edge(ar, v, g); - assert(added); + RoseEdge e_new = add_edge(ar, v, g); setEdgeBounds(g, e_new, bound_min, bound_max); to_delete->push_back(e_old); } @@ -728,10 +722,8 @@ bool handleStartDsPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, u32 repeatCount = 0; NFAVertex hu = h.startDs; - set start_succ; - set startds_succ; - succ(h, h.start, &start_succ); - succ(h, h.startDs, &startds_succ); + auto start_succ = succs>(h.start, h); + auto startds_succ = succs>(h.startDs, h); if (!is_subset_of(start_succ, startds_succ)) { DEBUG_PRINTF("not a simple chain\n"); @@ -781,14 +773,12 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, assert(in_degree(h.acceptEod, h) == 1); bool anchored = !proper_out_degree(h.startDs, h); - NFAVertex key = nullptr; + NFAVertex key = NGHolder::null_vertex(); NFAVertex base = anchored ? h.start : h.startDs; if (!anchored) { - set start_succ; - set startds_succ; - succ(h, h.start, &start_succ); - succ(h, h.startDs, &startds_succ); + auto start_succ = succs>(h.start, h); + auto startds_succ = succs>(h.startDs, h); if (!is_subset_of(start_succ, startds_succ)) { DEBUG_PRINTF("not a simple chain\n"); @@ -797,7 +787,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, } for (auto w : adjacent_vertices_range(base, h)) { - DEBUG_PRINTF("checking %u\n", h[w].index); + DEBUG_PRINTF("checking %zu\n", h[w].index); if (!h[w].char_reach.all()) { continue; } @@ -832,7 +822,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, set exits_and_repeat_verts; for (auto repeat_v : ri.vertices) { - DEBUG_PRINTF("repeat vertex %u\n", h[repeat_v].index); + DEBUG_PRINTF("repeat vertex %zu\n", h[repeat_v].index); succ(h, repeat_v, &exits_and_repeat_verts); exits_and_repeat_verts.insert(repeat_v); } @@ -847,8 +837,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, exits = exits_and_repeat_verts; erase_all(&exits, rep_verts); - set base_succ; - succ(h, base, &base_succ); + auto base_succ = succs>(base, h); base_succ.erase(h.startDs); if (is_subset_of(base_succ, rep_verts)) { @@ -908,10 +897,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, if (source(e_old, g) == ar) { setEdgeBounds(g, e_old, ri.repeatMin + width, ri.repeatMax + width); } else { - RoseEdge e_new; - UNUSED bool added; - tie(e_new, added) = add_edge(ar, v, g); - assert(added); + RoseEdge e_new = add_edge(ar, v, g); setEdgeBounds(g, e_new, ri.repeatMin + width, ri.repeatMax + width); to_delete->push_back(e_old); } @@ -963,7 +949,7 @@ void convertPrefixToBounds(RoseBuildImpl &tbi) { continue; } - DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].idx); + DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index); if (!proper_out_degree(h.startDs, h)) { if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) { @@ -1009,7 +995,7 @@ void convertPrefixToBounds(RoseBuildImpl &tbi) { continue; } - DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].idx); + DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index); if (!proper_out_degree(h.startDs, h)) { if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) { @@ -1044,7 +1030,7 @@ void convertAnchPrefixToBounds(RoseBuildImpl &tbi) { continue; } - DEBUG_PRINTF("vertex %zu\n", g[v].idx); + DEBUG_PRINTF("vertex %zu\n", g[v].index); // This pass runs after makeCastles, so we use the fact that bounded // repeat detection has already been done for us. diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 5fb27c55..105ee338 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -104,7 +104,7 @@ public: } os << "[label=\""; - os << "idx=" << g[v].idx <<"\\n"; + os << "index=" << g[v].index <<"\\n"; for (u32 lit_id : g[v].literals) { writeLiteral(os, lit_id); @@ -267,14 +267,14 @@ void dumpRoseGraph(const RoseBuild &build_base, const RoseEngine *t, ofstream os(ss.str()); RoseGraphWriter writer(build, t); - writeGraphviz(os, build.g, writer, get(&RoseVertexProps::idx, build.g)); + writeGraphviz(os, build.g, writer, get(boost::vertex_index, build.g)); } namespace { struct CompareVertexRole { explicit CompareVertexRole(const RoseGraph &g_in) : g(g_in) {} inline bool operator()(const RoseVertex &a, const RoseVertex &b) const { - return g[a].idx < g[b].idx; + return g[a].index < g[b].index; } private: const RoseGraph &g; @@ -372,7 +372,7 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { for (RoseVertex v : verts) { // role info - os << " Index " << g[v].idx << ": groups=0x" << hex << setw(16) + os << " Index " << g[v].index << ": groups=0x" << hex << setw(16) << setfill('0') << g[v].groups << dec; if (g[v].reports.empty()) { @@ -386,13 +386,13 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { // pred info for (const auto &ie : in_edges_range(v, g)) { const auto &u = source(ie, g); - os << " Predecessor idx="; + os << " Predecessor index="; if (u == build.root) { os << "ROOT"; } else if (u == build.anchored_root) { os << "ANCHORED_ROOT"; } else { - os << g[u].idx; + os << g[u].index; } os << ": bounds [" << g[ie].minBound << ", "; if (g[ie].maxBound == ROSE_BOUND_INF) { @@ -442,20 +442,26 @@ void dumpTestLiterals(const string &filename, const vector &lits) { static void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { - auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED); + size_t historyRequired = build.calcHistoryRequired(); + size_t longLitLengthThreshold = + calcLongLitThreshold(build, historyRequired); + + auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED, + longLitLengthThreshold); dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits); - lits = fillHamsterLiteralList(build, ROSE_FLOATING); + lits = fillHamsterLiteralList(build, ROSE_FLOATING, longLitLengthThreshold); dumpTestLiterals(base + "rose_float_test_literals.txt", lits); - lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED); + lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, + build.ematcher_region_size); dumpTestLiterals(base + "rose_eod_test_literals.txt", lits); if (!build.cc.streaming) { lits = fillHamsterLiteralList(build, ROSE_FLOATING, - ROSE_SMALL_BLOCK_LEN); + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, - ROSE_SMALL_BLOCK_LEN); + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); lits.insert(end(lits), begin(lits2), end(lits2)); dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits); } diff --git a/src/rose/rose_build_engine_blob.h b/src/rose/rose_build_engine_blob.h new file mode 100644 index 00000000..8542b87b --- /dev/null +++ b/src/rose/rose_build_engine_blob.h @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_ENGINE_BLOB_H +#define ROSE_BUILD_ENGINE_BLOB_H + +#include "rose_internal.h" + +#include "ue2common.h" +#include "util/alloc.h" +#include "util/container.h" +#include "util/multibit_build.h" +#include "util/ue2_containers.h" +#include "util/verify_types.h" + +#include +#include + +#include + +namespace ue2 { + +class RoseEngineBlob : boost::noncopyable { +public: + /** \brief Base offset of engine_blob in the Rose engine bytecode. */ + static constexpr u32 base_offset = ROUNDUP_CL(sizeof(RoseEngine)); + + bool empty() const { + return blob.empty(); + } + + size_t size() const { + return blob.size(); + } + + const char *data() const { + return blob.data(); + } + + u32 add(const void *a, const size_t len, const size_t align) { + pad(align); + + size_t rv = base_offset + blob.size(); + assert(rv >= base_offset); + DEBUG_PRINTF("write %zu bytes at offset %zu\n", len, rv); + + assert(ISALIGNED_N(blob.size(), align)); + + blob.resize(blob.size() + len); + memcpy(&blob.back() - len + 1, a, len); + + return verify_u32(rv); + } + + template + u32 add(const T &a) { + static_assert(std::is_pod::value, "should be pod"); + return add(&a, sizeof(a), alignof(T)); + } + + template + u32 add(const T &a, const size_t len) { + static_assert(std::is_pod::value, "should be pod"); + return add(&a, len, alignof(T)); + } + + template + u32 add(Iter b, const Iter &e) { + using value_type = typename std::iterator_traits::value_type; + static_assert(std::is_pod::value, "should be pod"); + + if (b == e) { + return 0; + } + + u32 offset = add(*b); + for (++b; b != e; ++b) { + add(*b); + } + + return offset; + } + + u32 add_iterator(const std::vector &iter) { + auto cache_it = cached_iters.find(iter); + if (cache_it != cached_iters.end()) { + u32 offset = cache_it->second; + DEBUG_PRINTF("cache hit for iter at %u\n", offset); + return offset; + } + + u32 offset = add(iter.begin(), iter.end()); + cached_iters.emplace(iter, offset); + return offset; + } + + void write_bytes(RoseEngine *engine) { + copy_bytes((char *)engine + base_offset, blob); + } + +private: + void pad(size_t align) { + assert(ISALIGNED_N(base_offset, align)); + size_t s = blob.size(); + + if (ISALIGNED_N(s, align)) { + return; + } + + blob.resize(s + align - s % align); + } + + /** \brief Cache of previously-written sparse iterators. */ + unordered_map, u32> cached_iters; + + /** + * \brief Contents of the Rose bytecode immediately following the + * RoseEngine. + */ + std::vector> blob; +}; + +} // namespace ue2 + +#endif // ROSE_BUILD_ENGINE_BLOB_H diff --git a/src/rose/rose_build_exclusive.cpp b/src/rose/rose_build_exclusive.cpp index c9e8d215..e91cc297 100644 --- a/src/rose/rose_build_exclusive.cpp +++ b/src/rose/rose_build_exclusive.cpp @@ -306,12 +306,12 @@ void findCliques(const map> &exclusiveGroups, // Find clique groups const auto &clique = removeClique(*cg); for (const auto &i : clique) { - DEBUG_PRINTF("cliq:%lu\n", i.size()); + DEBUG_PRINTF("cliq:%zu\n", i.size()); if (i.size() > 1) { exclusive_roles.push_back(i); } } - DEBUG_PRINTF("Clique graph size:%lu\n", exclusive_roles.size()); + DEBUG_PRINTF("Clique graph size:%zu\n", exclusive_roles.size()); } static @@ -326,7 +326,7 @@ map> findExclusiveGroups(const RoseBuildImpl &build, set group; set q1(vertex_map.at(i).begin(), vertex_map.at(i).end()); - DEBUG_PRINTF("vertex set:%lu\n", q1.size()); + DEBUG_PRINTF("vertex set:%zu\n", q1.size()); for (const auto &val : s) { set q2(vertex_map.at(val).begin(), vertex_map.at(val).end()); diff --git a/src/rose/rose_build_groups.cpp b/src/rose/rose_build_groups.cpp index 5e477e3b..0a1c501f 100644 --- a/src/rose/rose_build_groups.cpp +++ b/src/rose/rose_build_groups.cpp @@ -136,7 +136,7 @@ rose_group calcLocalGroup(const RoseVertex v, const RoseGraph &g, } } else { DEBUG_PRINTF("not sibling different mother %zu %zu\n", - g[v].idx, g[w].idx); + g[v].index, g[w].index); } } } @@ -382,7 +382,7 @@ void assignGroupsToRoles(RoseBuildImpl &build) { g[ghost_it->second].groups |= succ_groups; } - DEBUG_PRINTF("vertex %zu: groups=%llx\n", g[v].idx, g[v].groups); + DEBUG_PRINTF("vertex %zu: groups=%llx\n", g[v].index, g[v].groups); } } @@ -397,8 +397,7 @@ getVertexGroupMap(const RoseBuildImpl &build) { vector v_order; v_order.reserve(num_vertices(g)); - boost::topological_sort(g, back_inserter(v_order), - vertex_index_map(get(&RoseVertexProps::idx, g))); + boost::topological_sort(g, back_inserter(v_order)); unordered_map vertex_group_map; vertex_group_map.reserve(num_vertices(g)); @@ -406,7 +405,7 @@ getVertexGroupMap(const RoseBuildImpl &build) { const rose_group initial_groups = build.getInitialGroups(); for (const auto &v : boost::adaptors::reverse(v_order)) { - DEBUG_PRINTF("vertex %zu\n", g[v].idx); + DEBUG_PRINTF("vertex %zu\n", g[v].index); if (build.isAnyStart(v)) { DEBUG_PRINTF("start vertex, groups=0x%llx\n", initial_groups); @@ -419,7 +418,7 @@ getVertexGroupMap(const RoseBuildImpl &build) { assert(in_degree(v, g) > 0); rose_group pred_groups = ~rose_group{0}; for (auto u : inv_adjacent_vertices_range(v, g)) { - DEBUG_PRINTF("pred %zu\n", g[u].idx); + DEBUG_PRINTF("pred %zu\n", g[u].index); assert(contains(vertex_group_map, u)); pred_groups &= vertex_group_map.at(u); } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index d239a698..6b326d34 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -56,6 +56,8 @@ namespace ue2 { #define ROSE_GROUPS_MAX 64 +#define ROSE_LONG_LITERAL_THRESHOLD_MIN 33 + struct BoundaryReports; struct CastleProto; struct CompileContext; @@ -525,8 +527,6 @@ public: // max overlap considered for every pair (ulit, vlit). size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const; - void renumberVertices(void); - bool isPseudoStar(const RoseEdge &e) const; bool isPseudoStarOrFirstOnly(const RoseEdge &e) const; bool hasOnlyPseudoStarInEdges(RoseVertex v) const; @@ -549,7 +549,6 @@ public: const RoseVertex anchored_root; RoseLiteralMap literals; std::map ghost; - size_t vertexIndex; ReportID getNewNfaReport() override { return next_nfa_report++; } @@ -603,6 +602,9 @@ private: ReportID next_nfa_report; }; +size_t calcLongLitThreshold(const RoseBuildImpl &build, + const size_t historyRequired); + // Free functions, in rose_build_misc.cpp bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v); @@ -615,7 +617,8 @@ ue2_literal findNonOverlappingTail(const std::set &lits, void setReportId(NGHolder &g, ReportID id); #ifndef NDEBUG -bool roseHasTops(const RoseGraph &g, RoseVertex v); +bool roseHasTops(const RoseBuildImpl &build, RoseVertex v); +bool hasOrphanedTops(const RoseBuildImpl &build); #endif u64a findMaxOffset(const std::set &reports, const ReportManager &rm); diff --git a/src/rose/rose_build_infix.cpp b/src/rose/rose_build_infix.cpp index e81a7b00..4bbb3525 100644 --- a/src/rose/rose_build_infix.cpp +++ b/src/rose/rose_build_infix.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -108,14 +108,9 @@ void contractVertex(NGHolder &g, NFAVertex v, } static -u32 findMaxInfixMatches(const NGHolder &h, const set &lits) { +u32 findMaxLiteralMatches(const NGHolder &h, const set &lits) { DEBUG_PRINTF("h=%p, %zu literals\n", &h, lits.size()); - //dumpGraph("infix.dot", h.g); - - if (!onlyOneTop(h)) { - DEBUG_PRINTF("more than one top!n"); - return NO_MATCH_LIMIT; - } + //dumpGraph("infix.dot", h); // Indices of vertices that could terminate any of the literals in 'lits'. set terms; @@ -168,7 +163,7 @@ u32 findMaxInfixMatches(const NGHolder &h, const set &lits) { } remove_vertices(dead, g); - //dumpGraph("relaxed.dot", g.g); + //dumpGraph("relaxed.dot", g); depth maxWidth = findMaxWidth(g); DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str()); @@ -262,7 +257,11 @@ u32 findMaxInfixMatches(const left_id &left, const set &lits) { return findMaxInfixMatches(*left.castle(), lits); } if (left.graph()) { - return findMaxInfixMatches(*left.graph(), lits); + if (!onlyOneTop(*left.graph())) { + DEBUG_PRINTF("more than one top!n"); + return NO_MATCH_LIMIT; + } + return findMaxLiteralMatches(*left.graph(), lits); } return NO_MATCH_LIMIT; @@ -279,7 +278,7 @@ void findCountingMiracleInfo(const left_id &left, const vector &stopTable, const NGHolder &g = *left.graph(); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); if (!proper_out_degree(g.startDs, g)) { cyclics.erase(g.startDs); @@ -287,7 +286,7 @@ void findCountingMiracleInfo(const left_id &left, const vector &stopTable, CharReach cyclic_cr; for (NFAVertex v : cyclics) { - DEBUG_PRINTF("considering %u ||=%zu\n", g[v].index, + DEBUG_PRINTF("considering %zu ||=%zu\n", g[v].index, g[v].char_reach.count()); cyclic_cr |= g[v].char_reach; } @@ -315,7 +314,7 @@ void findCountingMiracleInfo(const left_id &left, const vector &stopTable, lits.insert(ue2_literal(c, false)); } - u32 count = findMaxInfixMatches(*left.graph(), lits); + u32 count = findMaxLiteralMatches(*left.graph(), lits); DEBUG_PRINTF("counting miracle %u\n", count + 1); if (count && count < 50) { *cm_count = count + 1; diff --git a/src/rose/rose_build_long_lit.cpp b/src/rose/rose_build_long_lit.cpp new file mode 100644 index 00000000..c32f49d0 --- /dev/null +++ b/src/rose/rose_build_long_lit.cpp @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_long_lit.h" + +#include "rose_build_engine_blob.h" +#include "rose_build_impl.h" +#include "stream_long_lit_hash.h" +#include "util/alloc.h" +#include "util/bitutils.h" +#include "util/verify_types.h" +#include "util/compile_context.h" + +#include +#include + +using namespace std; + +namespace ue2 { + +/** \brief Minimum size for a non-empty hash table. Must be a power of two. */ +static constexpr u32 MIN_HASH_TABLE_SIZE = 128; + +/** \brief Maximum load factor (between zero and one) for a hash table. */ +static constexpr double MAX_HASH_TABLE_LOAD = 0.7; + +/** \brief Minimum size (in bits) for a bloom filter. Must be a power of two. */ +static constexpr u32 MIN_BLOOM_FILTER_SIZE = 256; + +/** \brief Maximum load factor (between zero and one) for a bloom filter. */ +static constexpr double MAX_BLOOM_FILTER_LOAD = 0.25; + +struct LongLitModeInfo { + u32 num_literals = 0; //!< Number of strings for this mode. + u32 hashed_positions = 0; //!< Number of hashable string positions. +}; + +struct LongLitInfo { + LongLitModeInfo caseful; + LongLitModeInfo nocase; +}; + +static +u32 roundUpToPowerOfTwo(u32 x) { + assert(x != 0); + u32 bits = lg2(x - 1) + 1; + assert(bits < 32); + return 1U << bits; +} + +static +LongLitInfo analyzeLongLits(const vector &lits, + size_t max_len) { + LongLitInfo info; + + for (const auto &lit : lits) { + auto &lit_info = lit.nocase ? info.nocase : info.caseful; + assert(lit.s.size() > max_len); + lit_info.num_literals++; + lit_info.hashed_positions += lit.s.size() - max_len; + } + + DEBUG_PRINTF("case: hashed %u positions\n", info.caseful.hashed_positions); + DEBUG_PRINTF("nocase: hashed %u positions\n", info.nocase.hashed_positions); + + return info; +} + +static +void addToBloomFilter(vector &bloom, const u8 *substr, bool nocase) { + const u32 num_keys = verify_u32(bloom.size() * 8); + const u32 key_mask = (1U << lg2(num_keys)) -1; + + const auto hash_functions = { bloomHash_1, bloomHash_2, bloomHash_3 }; + for (const auto &hash_func : hash_functions) { + u32 hash = hash_func(substr, nocase); + u32 key = hash & key_mask; + DEBUG_PRINTF("set key %u (of %zu)\n", key, bloom.size() * 8); + bloom[key / 8] |= 1U << (key % 8); + } +} + +static +size_t bloomOccupancy(const vector &bloom) { + return accumulate(begin(bloom), end(bloom), 0, + [](const size_t &sum, const u8 &elem) { + return sum + popcount32(elem); + }); +} + +static +double bloomLoad(const vector &bloom) { + return (double)bloomOccupancy(bloom) / (double)(bloom.size() * 8); +} + +static +vector buildBloomFilter(const vector &lits, size_t max_len, + size_t num_entries, bool nocase) { + assert(num_entries % 8 == 0); + assert((num_entries & (num_entries - 1)) == 0); // Must be power of two. + + vector bloom(num_entries / 8, 0); + + if (!num_entries) { + return bloom; + } + + for (const auto &lit : lits) { + if (nocase != lit.nocase) { + continue; + } + for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { + const u8 *substr = (const u8 *)lit.s.c_str() + offset; + addToBloomFilter(bloom, substr, nocase); + } + } + + DEBUG_PRINTF("%s bloom filter occupancy %zu of %zu entries\n", + nocase ? "nocase" : "caseful", bloomOccupancy(bloom), + num_entries); + + return bloom; +} + + +static +vector makeBloomFilter(const vector &lits, + size_t max_len, bool nocase) { + vector bloom; + + size_t num_entries = MIN_BLOOM_FILTER_SIZE; + for (;;) { + bloom = buildBloomFilter(lits, max_len, num_entries, nocase); + DEBUG_PRINTF("built %s bloom for %zu entries: load %f\n", + nocase ? "nocase" : "caseful", num_entries, + bloomLoad(bloom)); + if (bloomLoad(bloom) < MAX_BLOOM_FILTER_LOAD) { + break; + } + num_entries *= 2; + } + return bloom; +} + +static +size_t hashTableOccupancy(const vector &tab) { + return count_if(begin(tab), end(tab), [](const RoseLongLitHashEntry &ent) { + return ent.str_offset != 0; + }); +} + +static +double hashTableLoad(const vector &tab) { + return (double)hashTableOccupancy(tab) / (double)(tab.size()); +} + +static +vector buildHashTable(const vector &lits, + size_t max_len, + const vector &litToOffsetVal, + size_t numEntries, bool nocase) { + vector tab(numEntries, {0,0}); + + if (!numEntries) { + return tab; + } + + map>> hashToLitOffPairs; + + for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) { + const ue2_case_string &lit = lits[lit_id]; + if (nocase != lit.nocase) { + continue; + } + for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { + const u8 *substr = (const u8 *)lit.s.c_str() + offset; + u32 hash = hashLongLiteral(substr, max_len, lit.nocase); + hashToLitOffPairs[hash].emplace_back(lit_id, offset); + } + } + + for (auto &m : hashToLitOffPairs) { + u32 hash = m.first; + vector> &d = m.second; + + // Sort by (offset, string) so that we'll be able to remove identical + // string prefixes. + stable_sort(begin(d), end(d), + [&](const pair &a, const pair &b) { + const auto &str_a = lits[a.first].s; + const auto &str_b = lits[b.first].s; + return tie(a.second, str_a) < tie(b.second, str_b); + }); + + // Remove entries that point to the same literal prefix. + d.erase(unique(begin(d), end(d), + [&](const pair &a, const pair &b) { + if (a.second != b.second) { + return false; + } + const auto &str_a = lits[a.first].s; + const auto &str_b = lits[b.first].s; + const size_t len = max_len + a.second; + return equal(begin(str_a), begin(str_a) + len, + begin(str_b)); + }), + end(d)); + + // Sort d by distance of the residual string (len minus our depth into + // the string). We need to put the 'furthest back' string first. + stable_sort(begin(d), end(d), + [](const pair &a, const pair &b) { + if (a.second != b.second) { + return a.second > b.second; /* longest is first */ + } + return a.first < b.first; + }); + + u32 bucket = hash % numEntries; + + // Placement via linear probing. + for (const auto &lit_offset : d) { + while (tab[bucket].str_offset != 0) { + bucket++; + if (bucket == numEntries) { + bucket = 0; + } + } + + u32 lit_id = lit_offset.first; + u32 offset = lit_offset.second; + + DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n", hash, + lit_id, offset, bucket); + + auto &entry = tab[bucket]; + entry.str_offset = verify_u32(litToOffsetVal.at(lit_id)); + assert(entry.str_offset != 0); + entry.str_len = offset + max_len; + } + } + + DEBUG_PRINTF("%s hash table occupancy %zu of %zu entries\n", + nocase ? "nocase" : "caseful", hashTableOccupancy(tab), + numEntries); + + return tab; +} + +static +vector makeHashTable(const vector &lits, + size_t max_len, + const vector &litToOffsetVal, + u32 numPositions, bool nocase) { + vector tab; + + // Note: for the hash table, we must always have at least enough entries + // for the number of hashable positions. + size_t num_entries = roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, + numPositions)); + + for (;;) { + tab = buildHashTable(lits, max_len, litToOffsetVal, num_entries, + nocase); + DEBUG_PRINTF("built %s hash table for %zu entries: load %f\n", + nocase ? "nocase" : "caseful", num_entries, + hashTableLoad(tab)); + if (hashTableLoad(tab) < MAX_HASH_TABLE_LOAD) { + break; + } + num_entries *= 2; + } + return tab; +} + +static +vector buildLits(const vector &lits, u32 baseOffset, + vector &litToOffsetVal) { + vector blob; + litToOffsetVal.resize(lits.size(), 0); + + u32 lit_id = 0; + for (const auto &lit : lits) { + u32 offset = baseOffset + verify_u32(blob.size()); + blob.insert(blob.end(), begin(lit.s), end(lit.s)); + litToOffsetVal[lit_id] = offset; + lit_id++; + } + + DEBUG_PRINTF("built %zu bytes of strings\n", blob.size()); + return blob; +} + +u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, + vector &lits, + size_t longLitLengthThreshold, + size_t *historyRequired, + size_t *longLitStreamStateRequired) { + // Work in terms of history requirement (i.e. literal len - 1). + const size_t max_len = longLitLengthThreshold - 1; + + // We should only be building the long literal hash table in streaming mode. + if (!build.cc.streaming) { + return 0; + } + + if (lits.empty()) { + DEBUG_PRINTF("no long literals\n"); + return 0; + } + + // The last char of each literal is trimmed as we're not interested in full + // matches, only partial matches. + for (auto &lit : lits) { + assert(!lit.s.empty()); + lit.s.pop_back(); + } + + // Sort by caseful/caseless and in lexicographical order. + stable_sort(begin(lits), end(lits), [](const ue2_case_string &a, + const ue2_case_string &b) { + if (a.nocase != b.nocase) { + return a.nocase < b.nocase; + } + return a.s < b.s; + }); + + // Find literals that are prefixes of other literals (including + // duplicates). Note that we iterate in reverse, since we want to retain + // only the longest string from a set of prefixes. + auto it = unique(lits.rbegin(), lits.rend(), [](const ue2_case_string &a, + const ue2_case_string &b) { + return a.nocase == b.nocase && a.s.size() >= b.s.size() && + equal(b.s.begin(), b.s.end(), a.s.begin()); + }); + + // Erase dupes found by unique(). + lits.erase(lits.begin(), it.base()); + + LongLitInfo info = analyzeLongLits(lits, max_len); + + vector litToOffsetVal; + const size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable)); + vector lit_blob = buildLits(lits, headerSize, litToOffsetVal); + + // Build caseful bloom filter and hash table. + vector bloom_case; + vector tab_case; + if (info.caseful.num_literals) { + bloom_case = makeBloomFilter(lits, max_len, false); + tab_case = makeHashTable(lits, max_len, litToOffsetVal, + info.caseful.hashed_positions, false); + } + + // Build nocase bloom filter and hash table. + vector bloom_nocase; + vector tab_nocase; + if (info.nocase.num_literals) { + bloom_nocase = makeBloomFilter(lits, max_len, true); + tab_nocase = makeHashTable(lits, max_len, litToOffsetVal, + info.nocase.hashed_positions, true); + } + + size_t wholeLitTabSize = ROUNDUP_16(byte_length(lit_blob)); + size_t htOffsetCase = headerSize + wholeLitTabSize; + size_t htOffsetNocase = htOffsetCase + byte_length(tab_case); + size_t bloomOffsetCase = htOffsetNocase + byte_length(tab_nocase); + size_t bloomOffsetNocase = bloomOffsetCase + byte_length(bloom_case); + + size_t tabSize = ROUNDUP_16(bloomOffsetNocase + byte_length(bloom_nocase)); + + // need to add +2 to both of these to allow space for the actual largest + // value as well as handling the fact that we add one to the space when + // storing out a position to allow zero to mean "no stream state value" + u8 streamBitsCase = lg2(roundUpToPowerOfTwo(tab_case.size() + 2)); + u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(tab_nocase.size() + 2)); + u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8; + + auto table = aligned_zmalloc_unique(tabSize); + assert(table); // otherwise would have thrown std::bad_alloc + + // Fill in the RoseLongLitTable header structure. + RoseLongLitTable *header = (RoseLongLitTable *)(table.get()); + header->size = verify_u32(tabSize); + header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255 + header->caseful.hashOffset = verify_u32(htOffsetCase); + header->caseful.hashBits = lg2(tab_case.size()); + header->caseful.streamStateBits = streamBitsCase; + header->caseful.bloomOffset = verify_u32(bloomOffsetCase); + header->caseful.bloomBits = lg2(bloom_case.size() * 8); + header->nocase.hashOffset = verify_u32(htOffsetNocase); + header->nocase.hashBits = lg2(tab_nocase.size()); + header->nocase.streamStateBits = streamBitsNocase; + header->nocase.bloomOffset = verify_u32(bloomOffsetNocase); + header->nocase.bloomBits = lg2(bloom_nocase.size() * 8); + assert(tot_state_bytes < sizeof(u64a)); + header->streamStateBytes = verify_u8(tot_state_bytes); // u8 + + // Copy in the literal strings, hash tables and bloom filters, + copy_bytes(table.get() + headerSize, lit_blob); + copy_bytes(table.get() + htOffsetCase, tab_case); + copy_bytes(table.get() + bloomOffsetCase, bloom_case); + copy_bytes(table.get() + htOffsetNocase, tab_nocase); + copy_bytes(table.get() + bloomOffsetNocase, bloom_nocase); + + DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize); + DEBUG_PRINTF("requires %zu bytes of history\n", max_len); + DEBUG_PRINTF("requires %u bytes of stream state\n", tot_state_bytes); + + *historyRequired = max(*historyRequired, max_len); + *longLitStreamStateRequired = tot_state_bytes; + + return blob.add(table.get(), tabSize, 16); +} + +} // namespace ue2 diff --git a/src/rose/rose_build_long_lit.h b/src/rose/rose_build_long_lit.h new file mode 100644 index 00000000..a77b1b69 --- /dev/null +++ b/src/rose/rose_build_long_lit.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_LONG_LIT_H +#define ROSE_BUILD_LONG_LIT_H + +#include "ue2common.h" + +#include + +namespace ue2 { + +class RoseBuildImpl; +class RoseEngineBlob; +struct ue2_case_string; + +u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, + std::vector &lits, + size_t longLitLengthThreshold, + size_t *historyRequired, + size_t *longLitStreamStateRequired); + +} // namespace ue2 + + +#endif // ROSE_BUILD_LONG_LIT_H diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index ba77b402..10bd59de 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -72,7 +72,7 @@ void getForwardReach(const NGHolder &g, u32 top, map &look) { if (v == g.startDs) { continue; } - if (g[e].top == top) { + if (contains(g[e].tops, top)) { curr.insert(v); } } @@ -261,7 +261,7 @@ void findForwardReach(const RoseGraph &g, const RoseVertex v, for (const auto &e : out_edges_range(v, g)) { RoseVertex t = target(e, g); if (!g[t].left) { - DEBUG_PRINTF("successor %zu has no leftfix\n", g[t].idx); + DEBUG_PRINTF("successor %zu has no leftfix\n", g[t].index); return; } rose_look.push_back(map()); @@ -460,17 +460,41 @@ void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v, } } +static +map findLiteralReach(const rose_literal_id &lit) { + map look; + + u32 i = lit.delay + 1; + for (auto it = lit.s.rbegin(), ite = lit.s.rend(); it != ite; ++it) { + look[0 - i] |= *it; + i++; + } + + return look; +} + static map findLiteralReach(const RoseBuildImpl &build, const RoseVertex v) { + bool first = true; map look; for (u32 lit_id : build.g[v].literals) { const rose_literal_id &lit = build.literals.right.at(lit_id); + auto lit_look = findLiteralReach(lit); - u32 i = lit.delay + 1; - for (auto it = lit.s.rbegin(), ite = lit.s.rend(); it != ite; ++it) { - look[0 - i] |= *it; - i++; + if (first) { + look = move(lit_look); + first = false; + } else { + for (auto it = look.begin(); it != look.end();) { + auto l_it = lit_look.find(it->first); + if (l_it == lit_look.end()) { + it = look.erase(it); + } else { + it->second |= l_it->second; + ++it; + } + } } } @@ -585,7 +609,7 @@ bool getTransientPrefixReach(const NGHolder &g, u32 lag, NFAVertex v = *(inv_adjacent_vertices(g.accept, g).first); u32 i = lag + 1; while (v != g.startDs) { - DEBUG_PRINTF("i=%u, v=%u\n", i, g[v].index); + DEBUG_PRINTF("i=%u, v=%zu\n", i, g[v].index); if (is_special(v, g)) { DEBUG_PRINTF("special\n"); return false; diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 2eb70f60..01633c06 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -102,7 +102,7 @@ bool maskFromLeftGraph(const LeftEngInfo &left, vector &msk, CharReach cr; for (NFAVertex v : curr) { const auto &v_cr = h[v].char_reach; - DEBUG_PRINTF("vertex %u, reach %s\n", h[v].index, + DEBUG_PRINTF("vertex %zu, reach %s\n", h[v].index, describeClass(v_cr).c_str()); cr |= v_cr; insert(&next, inv_adjacent_vertices(v, h)); @@ -438,45 +438,43 @@ static bool isNoRunsVertex(const RoseBuildImpl &build, RoseVertex u) { const RoseGraph &g = build.g; if (!g[u].isBoring()) { - DEBUG_PRINTF("u=%zu is not boring\n", g[u].idx); + DEBUG_PRINTF("u=%zu is not boring\n", g[u].index); return false; } if (!g[u].reports.empty()) { - DEBUG_PRINTF("u=%zu has accept\n", g[u].idx); + DEBUG_PRINTF("u=%zu has accept\n", g[u].index); return false; } /* TODO: handle non-root roles as well. It can't be that difficult... */ - if (!in_degree_equal_to(u, g, 1)) { - DEBUG_PRINTF("u=%zu is not a root role\n", g[u].idx); + if (in_degree(u, g) != 1) { + DEBUG_PRINTF("u=%zu is not a root role\n", g[u].index); return false; } - RoseEdge e; - bool exists; - tie(e, exists) = edge_by_target(build.root, u, g); + RoseEdge e = edge(build.root, u, g); - if (!exists) { - DEBUG_PRINTF("u=%zu is not a root role\n", g[u].idx); + if (!e) { + DEBUG_PRINTF("u=%zu is not a root role\n", g[u].index); return false; } if (g[e].minBound != 0 || g[e].maxBound != ROSE_BOUND_INF) { - DEBUG_PRINTF("u=%zu has bounds from root\n", g[u].idx); + DEBUG_PRINTF("u=%zu has bounds from root\n", g[u].index); return false; } for (const auto &oe : out_edges_range(u, g)) { RoseVertex v = target(oe, g); if (g[oe].maxBound != ROSE_BOUND_INF) { - DEBUG_PRINTF("edge (%zu,%zu) has max bound\n", g[u].idx, - g[target(oe, g)].idx); + DEBUG_PRINTF("edge (%zu,%zu) has max bound\n", g[u].index, + g[v].index); return false; } if (g[v].left) { - DEBUG_PRINTF("v=%zu has rose prefix\n", g[v].idx); + DEBUG_PRINTF("v=%zu has rose prefix\n", g[v].index); return false; } } @@ -485,7 +483,7 @@ bool isNoRunsVertex(const RoseBuildImpl &build, RoseVertex u) { static bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, - const rose_literal_info &info) { + const rose_literal_info &info, const size_t max_len) { DEBUG_PRINTF("lit id %u\n", id); if (info.requires_benefits) { @@ -493,6 +491,11 @@ bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, return false; } + if (build.literals.right.at(id).s.length() > max_len) { + DEBUG_PRINTF("requires literal check\n"); + return false; + } + if (isDirectHighlander(build, id, info)) { DEBUG_PRINTF("highlander direct report\n"); return true; @@ -558,7 +561,7 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, u64a lit_min_offset = UINT64_MAX; for (const auto &v : info.vertices) { - DEBUG_PRINTF("vertex %zu min_offset=%u\n", g[v].idx, g[v].min_offset); + DEBUG_PRINTF("vertex %zu min_offset=%u\n", g[v].index, g[v].min_offset); u64a vert_offset = g[v].min_offset; @@ -625,7 +628,7 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, vector fillHamsterLiteralList(const RoseBuildImpl &build, rose_literal_table table, - u32 max_offset) { + size_t max_len, u32 max_offset) { vector lits; for (const auto &e : build.literals.right) { @@ -663,10 +666,14 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, const vector &msk = e.second.msk; const vector &cmp = e.second.cmp; - bool noruns = isNoRunsLiteral(build, id, info); + bool noruns = isNoRunsLiteral(build, id, info, max_len); if (info.requires_explode) { DEBUG_PRINTF("exploding lit\n"); + + // We do not require_explode for long literals. + assert(lit.length() <= max_len); + case_iter cit = caseIterateBegin(lit); case_iter cite = caseIterateEnd(); for (; cit != cite; ++cit) { @@ -687,20 +694,28 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, msk, cmp); } } else { - const std::string &s = lit.get_string(); - const bool nocase = lit.any_nocase(); + string s = lit.get_string(); + bool nocase = lit.any_nocase(); DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " "cmp=%s\n", final_id, escapeString(s).c_str(), (int)nocase, noruns, dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + if (s.length() > max_len) { + DEBUG_PRINTF("truncating to tail of length %zu\n", max_len); + s.erase(0, s.length() - max_len); + // We shouldn't have set a threshold below 8 chars. + assert(msk.size() <= max_len); + } + if (!maskIsConsistent(s, nocase, msk, cmp)) { DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); continue; } - lits.emplace_back(s, nocase, noruns, final_id, groups, msk, cmp); + lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, + cmp); } } @@ -708,14 +723,15 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, } aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, + size_t longLitLengthThreshold, rose_group *fgroups, size_t *fsize, - size_t *historyRequired, - size_t *streamStateRequired) { + size_t *historyRequired) { *fsize = 0; *fgroups = 0; - auto fl = fillHamsterLiteralList(build, ROSE_FLOATING); + auto fl = fillHamsterLiteralList(build, ROSE_FLOATING, + longLitLengthThreshold); if (fl.empty()) { DEBUG_PRINTF("empty floating matcher\n"); return nullptr; @@ -747,13 +763,10 @@ aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, if (build.cc.streaming) { DEBUG_PRINTF("literal_history_required=%zu\n", ctl.literal_history_required); - DEBUG_PRINTF("literal_stream_state_required=%zu\n", - ctl.literal_stream_state_required); assert(ctl.literal_history_required <= build.cc.grey.maxHistoryAvailable); *historyRequired = max(*historyRequired, ctl.literal_history_required); - *streamStateRequired = ctl.literal_stream_state_required; } *fsize = hwlmSize(ftable.get()); @@ -778,8 +791,8 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto lits = fillHamsterLiteralList(build, ROSE_FLOATING, - ROSE_SMALL_BLOCK_LEN); + auto lits = fillHamsterLiteralList( + build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (lits.empty()) { DEBUG_PRINTF("no floating table\n"); return nullptr; @@ -788,8 +801,9 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto anchored_lits = fillHamsterLiteralList(build, - ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN); + auto anchored_lits = + fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (anchored_lits.empty()) { DEBUG_PRINTF("no small-block anchored literals\n"); return nullptr; @@ -823,7 +837,8 @@ aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, size_t *esize) { *esize = 0; - auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED); + auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, + build.ematcher_region_size); if (el.empty()) { DEBUG_PRINTF("no eod anchored literals\n"); diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index 2a225bf5..a25dbca3 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -51,13 +51,14 @@ struct hwlmLiteral; * only lead to a pattern match after max_offset may be excluded. */ std::vector fillHamsterLiteralList(const RoseBuildImpl &build, - rose_literal_table table, u32 max_offset = ROSE_BOUND_INF); + rose_literal_table table, size_t max_len, + u32 max_offset = ROSE_BOUND_INF); aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, + size_t longLitLengthThreshold, rose_group *fgroups, size_t *fsize, - size_t *historyRequired, - size_t *streamStateRequired); + size_t *historyRequired); aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, size_t *sbsize); diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 759e0dbe..54a7390e 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -53,7 +53,6 @@ #include "nfagraph/ng_redundancy.h" #include "nfagraph/ng_repeat.h" #include "nfagraph/ng_reports.h" -#include "nfagraph/ng_restructuring.h" #include "nfagraph/ng_stop.h" #include "nfagraph/ng_uncalc_components.h" #include "nfagraph/ng_util.h" @@ -207,8 +206,9 @@ void mergeDupeLeaves(RoseBuildImpl &tbi) { continue; } - DEBUG_PRINTF("inspecting vertex idx=%zu in_degree %zu out_degree %zu\n", - g[v].idx, in_degree(v, g), out_degree(v, g)); + DEBUG_PRINTF("inspecting vertex index=%zu in_degree %zu " + "out_degree %zu\n", g[v].index, in_degree(v, g), + out_degree(v, g)); // Vertex must be a reporting leaf node if (g[v].reports.empty() || !isLeafNode(v, g)) { @@ -228,24 +228,22 @@ void mergeDupeLeaves(RoseBuildImpl &tbi) { } RoseVertex t = leaves.find(dupe)->second; - DEBUG_PRINTF("found two leaf dupe roles, idx=%zu,%zu\n", g[v].idx, - g[t].idx); + DEBUG_PRINTF("found two leaf dupe roles, index=%zu,%zu\n", g[v].index, + g[t].index); vector deadEdges; for (const auto &e : in_edges_range(v, g)) { RoseVertex u = source(e, g); - DEBUG_PRINTF("u idx=%zu\n", g[u].idx); - RoseEdge et; - bool exists; - tie (et, exists) = edge(u, t, g); - if (exists) { + DEBUG_PRINTF("u index=%zu\n", g[u].index); + if (RoseEdge et = edge(u, t, g)) { if (g[et].minBound <= g[e].minBound && g[et].maxBound >= g[e].maxBound) { DEBUG_PRINTF("remove more constrained edge\n"); deadEdges.push_back(e); } } else { - DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].idx, g[t].idx); + DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].index, + g[t].index); add_edge(u, t, g[e], g); deadEdges.push_back(e); } @@ -280,7 +278,7 @@ void mergeDupeLeaves(RoseBuildImpl &tbi) { // if we've removed anything, we need to renumber vertices if (countRemovals) { - tbi.renumberVertices(); + renumber_vertices(g); DEBUG_PRINTF("removed %zu vertices.\n", countRemovals); } } @@ -313,8 +311,7 @@ void mergeCluster(RoseGraph &g, const ReportManager &rm, it = it2; DEBUG_PRINTF("merging cluster %zu\n", cluster.size()); - map merged; - mergeNfaCluster(cluster, &rm, merged, cc); + auto merged = mergeNfaCluster(cluster, &rm, cc); DEBUG_PRINTF("done\n"); for (const auto &m : merged) { @@ -351,7 +348,7 @@ void findUncalcLeavesCandidates(RoseBuildImpl &tbi, // Ref count all suffixes, as we don't want to merge a suffix // that happens to be shared with a non-leaf vertex somewhere. - DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].idx, + DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].index, g[v].suffix.graph.get()); fcount[g[v].suffix.graph.get()]++; @@ -460,7 +457,7 @@ struct RoseGroup { const RoseGraph &g = build.g; assert(in_degree(v, g) == 1); RoseVertex u = *inv_adjacent_vertices(v, g).first; - parent = g[u].idx; + parent = g[u].index; } bool operator<(const RoseGroup &b) const { @@ -581,14 +578,14 @@ bool dedupeLeftfixes(RoseBuildImpl &tbi) { } // Scan the rest of the list for dupes. - for (auto kt = next(jt); kt != jte; ++kt) { + for (auto kt = std::next(jt); kt != jte; ++kt) { if (g[v].left == g[*kt].left || !rosecmp(v, *kt)) { continue; } // Dupe found. DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n", - g[*kt].idx, g[v].idx); + g[*kt].index, g[v].index); assert(g[v].left.lag == g[*kt].left.lag); g[*kt].left = g[v].left; work_done = true; @@ -1071,8 +1068,8 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, return false; } - DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].idx, - tbi.g[v].idx); + DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index, + tbi.g[v].index); return true; } @@ -1388,7 +1385,7 @@ void processMergeQueue(RoseBuildImpl &tbi, RoseBouquet &roses, static bool nfaHasNarrowStart(const NGHolder &g) { - if (hasGreaterOutDegree(1, g.startDs, g)) { + if (out_degree(g.startDs, g) > 1) { return false; // unanchored } @@ -1410,7 +1407,7 @@ bool nfaHasFiniteMaxWidth(const NGHolder &g) { namespace { struct RoseMergeKey { - RoseMergeKey(const RoseVertexSet &parents_in, + RoseMergeKey(const set &parents_in, bool narrowStart_in, bool hasMaxWidth_in) : narrowStart(narrowStart_in), hasMaxWidth(hasMaxWidth_in), @@ -1428,7 +1425,7 @@ struct RoseMergeKey { bool narrowStart; bool hasMaxWidth; - RoseVertexSet parents; + set parents; }; } @@ -1457,11 +1454,7 @@ bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) { static u32 commonPrefixLength(left_id &r1, left_id &r2) { if (r1.graph() && r2.graph()) { - auto &g1 = *r1.graph(); - auto &g2 = *r2.graph(); - auto state_ids_1 = numberStates(g1); - auto state_ids_2 = numberStates(g2); - return commonPrefixLength(g1, state_ids_1, g2, state_ids_2); + return commonPrefixLength(*r1.graph(), *r2.graph()); } else if (r1.castle() && r2.castle()) { return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle())); } @@ -1496,7 +1489,7 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &tbi) { map rosesByParent; RoseGraph &g = tbi.g; - RoseVertexSet parents(g); + set parents; DEBUG_PRINTF("-----\n"); DEBUG_PRINTF("entry\n"); @@ -1631,7 +1624,7 @@ struct DedupeLeftKey { : left_hash(hashLeftfix(build.g[v].left)) { const auto &g = build.g; for (const auto &e : in_edges_range(v, g)) { - preds.emplace(g[source(e, g)].idx, g[e].rose_top); + preds.emplace(g[source(e, g)].index, g[e].rose_top); } } @@ -1731,7 +1724,7 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi) { for (auto v : verts1) { DEBUG_PRINTF("replacing report %u with %u on %zu\n", g[v].left.leftfix_report, - v2_left.leftfix_report, g[v].idx); + v2_left.leftfix_report, g[v].index); u32 orig_lag = g[v].left.lag; g[v].left = v2_left; g[v].left.lag = orig_lag; @@ -1750,7 +1743,6 @@ u32 findUnusedTop(const ue2::flat_set &tops) { while (contains(tops, i)) { i++; } - assert(i < NFA_MAX_TOP_MASKS); return i; } @@ -1762,9 +1754,12 @@ void replaceTops(NGHolder &h, const map &top_mapping) { if (v == h.startDs) { continue; } - DEBUG_PRINTF("vertex %u has top %u\n", h[v].index, h[e].top); - assert(contains(top_mapping, h[e].top)); - h[e].top = top_mapping.at(h[e].top); + flat_set new_tops; + for (u32 t : h[e].tops) { + DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t); + new_tops.insert(top_mapping.at(t)); + } + h[e].tops = move(new_tops); } } @@ -1776,11 +1771,6 @@ bool setDistinctTops(NGHolder &h1, const NGHolder &h2, DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(), tops2.size()); - if (tops1.size() + tops2.size() > NFA_MAX_TOP_MASKS) { - DEBUG_PRINTF("too many tops!\n"); - return false; - } - // If our tops don't intersect, we're OK to merge with no changes. if (!has_intersection(tops1, tops2)) { DEBUG_PRINTF("tops don't intersect\n"); @@ -1814,7 +1804,7 @@ bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, } for (auto v : verts1) { - DEBUG_PRINTF("vertex %zu\n", g[v].idx); + DEBUG_PRINTF("vertex %zu\n", g[v].index); assert(!g[v].left.haig); assert(!g[v].left.dfa); for (const auto &e : in_edges_range(v, g)) { @@ -1823,7 +1813,7 @@ bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, assert(contains(top_mapping, t)); g[e].rose_top = top_mapping[t]; DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n", - g[source(e, g)].idx, g[target(e, g)].idx, t, + g[source(e, g)].index, g[target(e, g)].index, t, top_mapping[t]); } } @@ -1844,7 +1834,7 @@ bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, } for (auto v : verts1) { - DEBUG_PRINTF("vertex %zu\n", g[v].idx); + DEBUG_PRINTF("vertex %zu\n", g[v].index); u32 t = g[v].suffix.top; assert(contains(top_mapping, t)); g[v].suffix.top = top_mapping[t]; @@ -1853,11 +1843,6 @@ bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, return true; } -static -bool hasMaxTops(const NGHolder &h) { - return getTops(h).size() == NFA_MAX_TOP_MASKS; -} - /** \brief Estimate the number of accel states in the given graph when built as * an NFA. * @@ -1896,11 +1881,6 @@ void mergeNfaLeftfixes(RoseBuildImpl &tbi, RoseBouquet &roses) { "with %p (%zu verts)\n", r1.graph(), verts1.size(), r2.graph(), verts2.size()); - if (hasMaxTops(*r1.graph())) { - DEBUG_PRINTF("h1 has hit max tops\n"); - break; // next h1 - } - u32 accel1 = accel_count[r1]; if (accel1 >= NFA_MAX_ACCEL_STATES) { DEBUG_PRINTF("h1 has hit max accel\n"); @@ -2189,17 +2169,17 @@ void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, suffix_id s1 = *it; const deque &verts1 = suffixes.vertices(s1); assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX); + + // Caller should ensure that we don't propose merges of graphs that are + // already too big. + assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc)); + deque merged; for (auto jt = next(it); jt != suffixes.end(); ++jt) { suffix_id s2 = *jt; const deque &verts2 = suffixes.vertices(s2); assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX); - if (hasMaxTops(*s1.graph())) { - DEBUG_PRINTF("h1 has hit max tops\n"); - break; // next h1 - } - if (!acyclic) { u32 accel1 = accel_count[s1]; if (accel1 >= NFA_MAX_ACCEL_STATES) { @@ -2306,6 +2286,10 @@ void mergeAcyclicSuffixes(RoseBuildImpl &tbi) { assert(!g[v].suffix.haig); + if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) { + continue; + } + if (!isAcyclic(*h)) { continue; } @@ -2429,7 +2413,8 @@ map chunkedNfaMerge(RoseBuildImpl &build, batch.push_back(*it); assert((*it)->kind == NFA_OUTFIX); if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) { - mergeNfaCluster(batch, &build.rm, merged, build.cc); + auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc); + insert(&merged, batch_merged); batch.clear(); } } diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index c2f9f580..28b885bd 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -75,7 +75,6 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, : cc(cc_in), root(add_vertex(g)), anchored_root(add_vertex(g)), - vertexIndex(0), delay_base_id(MO_INVALID_IDX), hasSom(false), group_end(0), @@ -89,11 +88,9 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, boundary(boundary_in), next_nfa_report(0) { // add root vertices to graph - g[root].idx = vertexIndex++; g[root].min_offset = 0; g[root].max_offset = 0; - g[anchored_root].idx = vertexIndex++; g[anchored_root].min_offset = 0; g[anchored_root].max_offset = 0; } @@ -193,7 +190,7 @@ bool RoseBuildImpl::hasLiteralInTable(RoseVertex v, bool RoseBuildImpl::hasNoFloatingRoots() const { for (auto v : adjacent_vertices_range(root, g)) { if (isFloating(v)) { - DEBUG_PRINTF("direct floating root %zu\n", g[v].idx); + DEBUG_PRINTF("direct floating root %zu\n", g[v].index); return false; } } @@ -201,7 +198,7 @@ bool RoseBuildImpl::hasNoFloatingRoots() const { /* need to check if the anchored_root has any literals which are too deep */ for (auto v : adjacent_vertices_range(anchored_root, g)) { if (isFloating(v)) { - DEBUG_PRINTF("indirect floating root %zu\n", g[v].idx); + DEBUG_PRINTF("indirect floating root %zu\n", g[v].index); return false; } } @@ -337,14 +334,14 @@ size_t RoseBuildImpl::maxLiteralOverlap(RoseVertex u, RoseVertex v) const { void RoseBuildImpl::removeVertices(const vector &dead) { for (auto v : dead) { assert(!isAnyStart(v)); - DEBUG_PRINTF("removing vertex %zu\n", g[v].idx); + DEBUG_PRINTF("removing vertex %zu\n", g[v].index); for (auto lit_id : g[v].literals) { literal_info[lit_id].vertices.erase(v); } - clear_vertex_faster(v, g); + clear_vertex(v, g); remove_vertex(v, g); } - renumberVertices(); + renumber_vertices(g); } // Find the maximum bound on the edges to this vertex's successors ignoring @@ -893,7 +890,6 @@ bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) { // Note: only clones the vertex, you'll have to wire up your own edges. RoseVertex RoseBuildImpl::cloneVertex(RoseVertex v) { RoseVertex v2 = add_vertex(g[v], g); - g[v2].idx = vertexIndex++; for (const auto &lit_id : g[v2].literals) { literal_info[lit_id].vertices.insert(v2); @@ -903,12 +899,15 @@ RoseVertex RoseBuildImpl::cloneVertex(RoseVertex v) { } #ifndef NDEBUG -bool roseHasTops(const RoseGraph &g, RoseVertex v) { +bool roseHasTops(const RoseBuildImpl &build, RoseVertex v) { + const RoseGraph &g = build.g; assert(g[v].left); set graph_tops; - for (const auto &e : in_edges_range(v, g)) { - graph_tops.insert(g[e].rose_top); + if (!build.isRootSuccessor(v)) { + for (const auto &e : in_edges_range(v, g)) { + graph_tops.insert(g[e].rose_top); + } } return is_subset_of(graph_tops, all_tops(g[v].left)); @@ -1073,18 +1072,9 @@ bool has_non_eod_accepts(const suffix_id &s) { set all_tops(const suffix_id &s) { assert(s.graph() || s.castle() || s.haig() || s.dfa()); if (s.graph()) { - set tops; - const NGHolder &h = *s.graph(); - for (const auto &e : out_edges_range(h.start, h)) { - if (target(e, h) == h.startDs) { - continue; - } - tops.insert(h[e].top); - } - if (tops.empty()) { - tops.insert(0); // Vacuous graph, triggered on zero top. - } - return tops; + flat_set tops = getTops(*s.graph()); + assert(!tops.empty()); + return {tops.begin(), tops.end()}; } if (s.castle()) { @@ -1142,18 +1132,8 @@ depth findMaxWidth(const left_id &r) { set all_tops(const left_id &r) { assert(r.graph() || r.castle() || r.haig() || r.dfa()); if (r.graph()) { - set tops; - const NGHolder &h = *r.graph(); - for (const auto &e : out_edges_range(h.start, h)) { - if (target(e, h) == h.startDs) { - continue; - } - tops.insert(h[e].top); - } - if (tops.empty()) { - tops.insert(0); // Vacuous graph, triggered on zero top. - } - return tops; + flat_set tops = getTops(*r.graph()); + return {tops.begin(), tops.end()}; } if (r.castle()) { @@ -1226,7 +1206,7 @@ u32 roseQuality(const RoseEngine *t) { } const NFA *nfa = (const NFA *)((const char *)atable + sizeof(*atable)); - if (nfa->type != MCCLELLAN_NFA_8) { + if (!isSmallDfaType(nfa->type)) { DEBUG_PRINTF("m16 atable engine\n"); return 0; } @@ -1293,7 +1273,7 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { // First, check the Rose leftfixes. for (auto v : vertices_range(g)) { - DEBUG_PRINTF("leftfix: check vertex %zu\n", g[v].idx); + DEBUG_PRINTF("leftfix: check vertex %zu\n", g[v].index); if (g[v].left.castle) { DEBUG_PRINTF("castle ok\n"); @@ -1309,10 +1289,10 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { } if (g[v].left.graph) { assert(g[v].left.graph->kind - == tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX); + == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX)); if (!isImplementableNFA(*g[v].left.graph, nullptr, tbi.cc)) { - DEBUG_PRINTF("nfa prefix %zu failed (%zu vertices)\n", g[v].idx, - num_vertices(*g[v].left.graph)); + DEBUG_PRINTF("nfa prefix %zu failed (%zu vertices)\n", + g[v].index, num_vertices(*g[v].left.graph)); return false; } } @@ -1321,7 +1301,7 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { // Suffix graphs. for (auto v : vertices_range(g)) { - DEBUG_PRINTF("suffix: check vertex %zu\n", g[v].idx); + DEBUG_PRINTF("suffix: check vertex %zu\n", g[v].index); const RoseSuffixInfo &suffix = g[v].suffix; if (suffix.castle) { @@ -1339,8 +1319,8 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { if (suffix.graph) { assert(suffix.graph->kind == NFA_SUFFIX); if (!isImplementableNFA(*suffix.graph, &tbi.rm, tbi.cc)) { - DEBUG_PRINTF("nfa suffix %zu failed (%zu vertices)\n", g[v].idx, - num_vertices(*suffix.graph)); + DEBUG_PRINTF("nfa suffix %zu failed (%zu vertices)\n", + g[v].index, num_vertices(*suffix.graph)); return false; } } @@ -1348,6 +1328,49 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { return true; } + +bool hasOrphanedTops(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; + + ue2::unordered_map > roses; + ue2::unordered_map > suffixes; + + for (auto v : vertices_range(g)) { + if (g[v].left) { + set &tops = roses[g[v].left]; + if (!build.isRootSuccessor(v)) { + // Tops for infixes come from the in-edges. + for (const auto &e : in_edges_range(v, g)) { + tops.insert(g[e].rose_top); + } + } + } + if (g[v].suffix) { + suffixes[g[v].suffix].insert(g[v].suffix.top); + } + } + + for (const auto &e : roses) { + if (all_tops(e.first) != e.second) { + DEBUG_PRINTF("rose tops (%s) don't match rose graph (%s)\n", + as_string_list(all_tops(e.first)).c_str(), + as_string_list(e.second).c_str()); + return true; + } + } + + for (const auto &e : suffixes) { + if (all_tops(e.first) != e.second) { + DEBUG_PRINTF("suffix tops (%s) don't match rose graph (%s)\n", + as_string_list(all_tops(e.first)).c_str(), + as_string_list(e.second).c_str()); + return true; + } + } + + return false; +} + #endif // NDEBUG } // namespace ue2 diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp new file mode 100644 index 00000000..ee237639 --- /dev/null +++ b/src/rose/rose_build_program.cpp @@ -0,0 +1,572 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_engine_blob.h" +#include "rose_build_program.h" +#include "util/container.h" +#include "util/multibit_build.h" +#include "util/verify_types.h" + +#include +#include + +using namespace std; + +namespace ue2 { + +/* Destructors to avoid weak vtables. */ + +RoseInstruction::~RoseInstruction() = default; +RoseInstrCatchUp::~RoseInstrCatchUp() = default; +RoseInstrCatchUpMpv::~RoseInstrCatchUpMpv() = default; +RoseInstrSomZero::~RoseInstrSomZero() = default; +RoseInstrSuffixesEod::~RoseInstrSuffixesEod() = default; +RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; +RoseInstrEnd::~RoseInstrEnd() = default; + +using OffsetMap = RoseInstruction::OffsetMap; + +static +u32 calc_jump(const OffsetMap &offset_map, const RoseInstruction *from, + const RoseInstruction *to) { + DEBUG_PRINTF("computing relative jump from %p to %p\n", from, to); + assert(from && contains(offset_map, from)); + assert(to && contains(offset_map, to)); + + u32 from_offset = offset_map.at(from); + u32 to_offset = offset_map.at(to); + DEBUG_PRINTF("offsets: %u -> %u\n", from_offset, to_offset); + assert(from_offset <= to_offset); + + return to_offset - from_offset; +} + +void RoseInstrAnchoredDelay::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; + inst->done_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLitEarly::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->min_offset = min_offset; +} + +void RoseInstrCheckGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrCheckOnlyEod::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckBounds::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->min_bound = min_bound; + inst->max_bound = max_bound; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckNotHandled::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->key = key; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->offset = offset; + inst->reach_index = reach_index; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->index = index; + inst->count = count; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMask::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->and_mask = and_mask; + inst->cmp_mask = cmp_mask; + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMask32::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(and_mask), end(and_mask), inst->and_mask); + copy(begin(cmp_mask), end(cmp_mask), inst->cmp_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->and_mask = and_mask; + inst->cmp_mask = cmp_mask; + inst->negation = negation; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti16x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(nib_mask), end(nib_mask), inst->nib_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti32x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti16x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti32x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask_hi), end(bucket_select_mask_hi), + inst->bucket_select_mask_hi); + copy(begin(bucket_select_mask_lo), end(bucket_select_mask_lo), + inst->bucket_select_mask_lo); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; + inst->report = report; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckPrefix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; + inst->report = report; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrPushDelayed::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->delay = delay; + inst->index = index; +} + +void RoseInstrRecordAnchored::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->id = id; +} + +void RoseInstrSomAdjust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->distance = distance; +} + +void RoseInstrSomLeftfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; +} + +void RoseInstrSomFromReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrTriggerInfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->cancel = cancel; + inst->queue = queue; + inst->event = event; +} + +void RoseInstrTriggerSuffix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->event = event; +} + +void RoseInstrDedupe::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrDedupeSom::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrReportChain::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->event = event; + inst->top_squash_distance = top_squash_distance; +} + +void RoseInstrReportSomInt::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrReportSomAware::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrReportExhaust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->ekey = ekey; +} + +void RoseInstrReportSom::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrReportSomExhaust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->ekey = ekey; +} + +void RoseInstrDedupeAndReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrFinalReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrCheckExhausted::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->ekey = ekey; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMinLength::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->end_adj = end_adj; + inst->min_length = min_length; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrSetState::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->index = index; +} + +void RoseInstrSetGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrSquashGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrCheckState::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->index = index; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrSparseIterBegin::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); + + // Resolve and write the multibit sparse iterator and the jump table. + vector keys; + vector jump_offsets; + for (const auto &jump : jump_table) { + keys.push_back(jump.first); + assert(contains(offset_map, jump.second)); + jump_offsets.push_back(offset_map.at(jump.second)); + } + + vector iter; + mmbBuildSparseIterator(iter, keys, num_keys); + assert(!iter.empty()); + inst->iter_offset = blob.add_iterator(iter); + inst->jump_table = blob.add(jump_offsets.begin(), jump_offsets.end()); + + // Store offsets for corresponding SPARSE_ITER_NEXT operations. + is_written = true; + iter_offset = inst->iter_offset; + jump_table_offset = inst->jump_table; +} + +void RoseInstrSparseIterNext::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->state = state; + inst->fail_jump = calc_jump(offset_map, this, target); + + // Use the same sparse iterator and jump table as the SPARSE_ITER_BEGIN + // instruction. + assert(begin); + assert(contains(offset_map, begin)); + assert(begin->is_written); + inst->iter_offset = begin->iter_offset; + inst->jump_table = begin->jump_table_offset; +} + +void RoseInstrSparseIterAny::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); + + // Write the multibit sparse iterator. + vector iter; + mmbBuildSparseIterator(iter, keys, num_keys); + assert(!iter.empty()); + inst->iter_offset = blob.add_iterator(iter); +} + +void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->iter_offset = iter_offset; +} + +void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); +} + +void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); +} + +static +OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { + OffsetMap offset_map; + u32 offset = 0; + for (const auto &ri : program) { + offset = ROUNDUP_N(offset, ROSE_INSTR_MIN_ALIGN); + DEBUG_PRINTF("instr %p (opcode %d) -> offset %u\n", ri.get(), + ri->code(), offset); + assert(!contains(offset_map, ri.get())); + offset_map.emplace(ri.get(), offset); + offset += ri->byte_length(); + } + *total_len = offset; + return offset_map; +} + +aligned_unique_ptr +writeProgram(RoseEngineBlob &blob, const RoseProgram &program, u32 *total_len) { + const auto offset_map = makeOffsetMap(program, total_len); + DEBUG_PRINTF("%zu instructions, len %u\n", program.size(), *total_len); + + auto bytecode = aligned_zmalloc_unique(*total_len); + char *ptr = bytecode.get(); + + for (const auto &ri : program) { + assert(contains(offset_map, ri.get())); + const u32 offset = offset_map.at(ri.get()); + ri->write(ptr + offset, blob, offset_map); + } + + return bytecode; +} + +bool RoseProgramEquivalence::operator()(const RoseProgram &prog1, + const RoseProgram &prog2) const { + if (prog1.size() != prog2.size()) { + return false; + } + + u32 len_1 = 0, len_2 = 0; + const auto offset_map_1 = makeOffsetMap(prog1, &len_1); + const auto offset_map_2 = makeOffsetMap(prog2, &len_2); + + if (len_1 != len_2) { + return false; + } + + auto is_equiv = [&](const unique_ptr &a, + const unique_ptr &b) { + assert(a && b); + return a->equiv(*b, offset_map_1, offset_map_2); + }; + + return std::equal(prog1.begin(), prog1.end(), prog2.begin(), is_equiv); +} + +} // namespace ue2 diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h new file mode 100644 index 00000000..0c725b46 --- /dev/null +++ b/src/rose/rose_build_program.h @@ -0,0 +1,1967 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_PROGRAM_H +#define ROSE_BUILD_PROGRAM_H + +#include "rose_build_impl.h" +#include "rose_program.h" +#include "som/som_operation.h" +#include "util/alloc.h" +#include "util/container.h" +#include "util/hash.h" +#include "util/make_unique.h" +#include "util/ue2_containers.h" +#include "util/ue2string.h" + +#include +#include +#include + +#include +#include + +namespace ue2 { + +class RoseEngineBlob; + +/** + * \brief Abstract base class representing a single Rose instruction. + */ +class RoseInstruction { +public: + virtual ~RoseInstruction(); + + /** \brief Opcode used for the instruction in the bytecode. */ + virtual RoseInstructionCode code() const = 0; + + /** + * \brief Simple hash used for program equivalence. + * + * Note that pointers (jumps, for example) should not be used when + * calculating the hash: they will be converted to instruction offsets when + * compared later. + */ + virtual size_t hash() const = 0; + + /** \brief Length of the bytecode instruction in bytes. */ + virtual size_t byte_length() const = 0; + + using OffsetMap = unordered_map; + + /** + * \brief Writes a concrete implementation of this instruction. + * + * Other data that this instruction depends on is written directly into the + * blob, while the instruction structure itself (of size given by + * the byte_length() function) is written to dest. + */ + virtual void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const = 0; + + /** + * \brief Update a target pointer. + * + * If this instruction contains any reference to the old target, replace it + * with the new one. + */ + virtual void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) = 0; + + /** + * \brief True if these instructions are equivalent within their own + * programs. + * + * Checks that any pointers to other instructions point to the same + * offsets. + */ + bool equiv(const RoseInstruction &other, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return equiv_impl(other, offsets, other_offsets); + } + +private: + virtual bool equiv_impl(const RoseInstruction &other, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const = 0; +}; + +/** + * \brief Templated implementation class to handle boring boilerplate code. + */ +template +class RoseInstrBase : public RoseInstruction { +protected: + static constexpr RoseInstructionCode opcode = Opcode; + using impl_type = ImplType; + +public: + RoseInstructionCode code() const override { return opcode; } + + size_t byte_length() const override { + return sizeof(impl_type); + } + + /** + * Note: this implementation simply zeroes the destination region and + * writes in the correct opcode. This is sufficient for trivial + * instructions, but instructions with data members will want to override + * it. + */ + void write(void *dest, RoseEngineBlob &, + const RoseInstruction::OffsetMap &) const override { + assert(dest != nullptr); + assert(ISALIGNED_N(dest, ROSE_INSTR_MIN_ALIGN)); + + impl_type *inst = static_cast(dest); + memset(inst, 0, sizeof(impl_type)); + inst->code = verify_u8(opcode); + } + +private: + bool equiv_impl(const RoseInstruction &other, const OffsetMap &offsets, + const OffsetMap &other_offsets) const override { + const auto *ri_that = dynamic_cast(&other); + if (!ri_that) { + return false; + } + const auto *ri_this = dynamic_cast(this); + assert(ri_this); + return ri_this->equiv_to(*ri_that, offsets, other_offsets); + } +}; + +/** + * \brief Refinement of RoseInstrBase to use for instructions that have + * just a single target member, called "target". + */ +template +class RoseInstrBaseOneTarget + : public RoseInstrBase { +public: + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + RoseInstrType *ri = dynamic_cast(this); + assert(ri); + if (ri->target == old_target) { + ri->target = new_target; + } + } +}; + +/** + * \brief Refinement of RoseInstrBase to use for instructions that have no + * targets. + */ +template +class RoseInstrBaseNoTargets + : public RoseInstrBase { +public: + void update_target(const RoseInstruction *, + const RoseInstruction *) override {} +}; + +/** + * \brief Refinement of RoseInstrBaseNoTargets to use for instructions that + * have no members at all, just an opcode. + */ +template +class RoseInstrBaseTrivial + : public RoseInstrBaseNoTargets { +public: + virtual bool operator==(const RoseInstrType &) const { return true; } + + size_t hash() const override { + return boost::hash_value(static_cast(Opcode)); + } + + bool equiv_to(const RoseInstrType &, const RoseInstruction::OffsetMap &, + const RoseInstruction::OffsetMap &) const { + return true; + } +}; + +//// +//// Concrete implementation classes start here. +//// + +class RoseInstrAnchoredDelay + : public RoseInstrBaseOneTarget { +public: + rose_group groups; + const RoseInstruction *target; + + RoseInstrAnchoredDelay(rose_group groups_in, + const RoseInstruction *target_in) + : groups(groups_in), target(target_in) {} + + bool operator==(const RoseInstrAnchoredDelay &ri) const { + return groups == ri.groups && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), groups); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrAnchoredDelay &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return groups == ri.groups && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLitEarly + : public RoseInstrBaseNoTargets { +public: + u32 min_offset; + + explicit RoseInstrCheckLitEarly(u32 min) : min_offset(min) {} + + bool operator==(const RoseInstrCheckLitEarly &ri) const { + return min_offset == ri.min_offset; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), min_offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLitEarly &ri, const OffsetMap &, + const OffsetMap &) const { + return min_offset == ri.min_offset; + } +}; + +class RoseInstrCheckGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrCheckGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrCheckGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), groups); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrCheckOnlyEod + : public RoseInstrBaseOneTarget { +public: + const RoseInstruction *target; + + explicit RoseInstrCheckOnlyEod(const RoseInstruction *target_in) + : target(target_in) {} + + bool operator==(const RoseInstrCheckOnlyEod &ri) const { + return target == ri.target; + } + + size_t hash() const override { + return boost::hash_value(static_cast(opcode)); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckOnlyEod &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckBounds + : public RoseInstrBaseOneTarget { +public: + u64a min_bound; + u64a max_bound; + const RoseInstruction *target; + + RoseInstrCheckBounds(u64a min, u64a max, const RoseInstruction *target_in) + : min_bound(min), max_bound(max), target(target_in) {} + + bool operator==(const RoseInstrCheckBounds &ri) const { + return min_bound == ri.min_bound && max_bound == ri.max_bound && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), min_bound, max_bound); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckBounds &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return min_bound == ri.min_bound && max_bound == ri.max_bound && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckNotHandled + : public RoseInstrBaseOneTarget { +public: + u32 key; + const RoseInstruction *target; + + RoseInstrCheckNotHandled(u32 key_in, const RoseInstruction *target_in) + : key(key_in), target(target_in) {} + + bool operator==(const RoseInstrCheckNotHandled &ri) const { + return key == ri.key && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), key); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckNotHandled &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return key == ri.key && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckSingleLookaround + : public RoseInstrBaseOneTarget { +public: + s8 offset; + u32 reach_index; + const RoseInstruction *target; + + RoseInstrCheckSingleLookaround(s8 offset_in, u32 reach_index_in, + const RoseInstruction *target_in) + : offset(offset_in), reach_index(reach_index_in), target(target_in) {} + + bool operator==(const RoseInstrCheckSingleLookaround &ri) const { + return offset == ri.offset && reach_index == ri.reach_index && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), offset, reach_index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckSingleLookaround &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return offset == ri.offset && reach_index == ri.reach_index && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLookaround + : public RoseInstrBaseOneTarget { +public: + u32 index; + u32 count; + const RoseInstruction *target; + + RoseInstrCheckLookaround(u32 index_in, u32 count_in, + const RoseInstruction *target_in) + : index(index_in), count(count_in), target(target_in) {} + + bool operator==(const RoseInstrCheckLookaround &ri) const { + return index == ri.index && count == ri.count && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), index, count); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return index == ri.index && count == ri.count && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMask + : public RoseInstrBaseOneTarget { +public: + u64a and_mask; + u64a cmp_mask; + u64a neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckMask(u64a and_mask_in, u64a cmp_mask_in, u64a neg_mask_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(and_mask_in), cmp_mask(cmp_mask_in), neg_mask(neg_mask_in), + offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMask &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, + offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMask &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMask32 + : public RoseInstrBaseOneTarget { +public: + std::array and_mask; + std::array cmp_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckMask32(std::array and_mask_in, + std::array cmp_mask_in, u32 neg_mask_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(move(and_mask_in)), cmp_mask(move(cmp_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMask32 &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, + offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMask32 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckByte + : public RoseInstrBaseOneTarget { +public: + u8 and_mask; + u8 cmp_mask; + u8 negation; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckByte(u8 and_mask_in, u8 cmp_mask_in, u8 negation_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(and_mask_in), cmp_mask(cmp_mask_in), negation(negation_in), + offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckByte &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + negation == ri.negation && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), and_mask, cmp_mask, negation, + offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckByte &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + negation == ri.negation && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti16x8 + : public RoseInstrBaseOneTarget { +public: + std::array nib_mask; + std::array bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti16x8(std::array nib_mask_in, + std::array bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : nib_mask(move(nib_mask_in)), + bucket_select_mask(move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti16x8 &ri) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), nib_mask, + bucket_select_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti16x8 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti32x8 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti32x8(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), + bucket_select_mask(move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti32x8 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti32x8 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti16x16 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti16x16(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), + bucket_select_mask(move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti16x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti16x16 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti32x16 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask_hi; + std::array bucket_select_mask_lo; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti32x16(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_hi_in, + std::array bucket_select_mask_lo_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), + bucket_select_mask_hi(move(bucket_select_mask_hi_in)), + bucket_select_mask_lo(move(bucket_select_mask_lo_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti32x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask_hi, bucket_select_mask_lo, + neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti32x16 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckInfix + : public RoseInstrBaseOneTarget { +public: + u32 queue; + u32 lag; + ReportID report; + const RoseInstruction *target; + + RoseInstrCheckInfix(u32 queue_in, u32 lag_in, ReportID report_in, + const RoseInstruction *target_in) + : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} + + bool operator==(const RoseInstrCheckInfix &ri) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), queue, lag, report); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckInfix &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckPrefix + : public RoseInstrBaseOneTarget { +public: + u32 queue; + u32 lag; + ReportID report; + const RoseInstruction *target; + + RoseInstrCheckPrefix(u32 queue_in, u32 lag_in, ReportID report_in, + const RoseInstruction *target_in) + : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} + + bool operator==(const RoseInstrCheckPrefix &ri) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), queue, lag, report); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckPrefix &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrPushDelayed + : public RoseInstrBaseNoTargets { +public: + u8 delay; + u32 index; + + RoseInstrPushDelayed(u8 delay_in, u32 index_in) + : delay(delay_in), index(index_in) {} + + bool operator==(const RoseInstrPushDelayed &ri) const { + return delay == ri.delay && index == ri.index; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), delay, index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrPushDelayed &ri, const OffsetMap &, + const OffsetMap &) const { + return delay == ri.delay && index == ri.index; + } +}; + +class RoseInstrRecordAnchored + : public RoseInstrBaseNoTargets { +public: + u32 id; + + explicit RoseInstrRecordAnchored(u32 id_in) : id(id_in) {} + + bool operator==(const RoseInstrRecordAnchored &ri) const { + return id == ri.id; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), id); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrRecordAnchored &ri, const OffsetMap &, + const OffsetMap &) const { + return id == ri.id; + } +}; + +class RoseInstrCatchUp + : public RoseInstrBaseTrivial { +public: + ~RoseInstrCatchUp() override; +}; + +class RoseInstrCatchUpMpv + : public RoseInstrBaseTrivial { +public: + ~RoseInstrCatchUpMpv() override; +}; + +class RoseInstrSomAdjust + : public RoseInstrBaseNoTargets { +public: + u32 distance; + + explicit RoseInstrSomAdjust(u32 distance_in) : distance(distance_in) {} + + bool operator==(const RoseInstrSomAdjust &ri) const { + return distance == ri.distance; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), distance); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomAdjust &ri, const OffsetMap &, + const OffsetMap &) const { + return distance == ri.distance; + } +}; + +class RoseInstrSomLeftfix + : public RoseInstrBaseNoTargets { +public: + u32 queue; + u32 lag; + + RoseInstrSomLeftfix(u32 queue_in, u32 lag_in) + : queue(queue_in), lag(lag_in) {} + + bool operator==(const RoseInstrSomLeftfix &ri) const { + return queue == ri.queue && lag == ri.lag; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), queue, lag); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomLeftfix &ri, const OffsetMap &, + const OffsetMap &) const { + return queue == ri.queue && lag == ri.lag; + } +}; + +class RoseInstrSomFromReport + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrSomFromReport() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrSomFromReport &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), som.type, som.onmatch); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomFromReport &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrSomZero + : public RoseInstrBaseTrivial { +public: + ~RoseInstrSomZero() override; +}; + +class RoseInstrTriggerInfix + : public RoseInstrBaseNoTargets { +public: + u8 cancel; + u32 queue; + u32 event; + + RoseInstrTriggerInfix(u8 cancel_in, u32 queue_in, u32 event_in) + : cancel(cancel_in), queue(queue_in), event(event_in) {} + + bool operator==(const RoseInstrTriggerInfix &ri) const { + return cancel == ri.cancel && queue == ri.queue && event == ri.event; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), cancel, queue, event); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrTriggerInfix &ri, const OffsetMap &, + const OffsetMap &) const { + return cancel == ri.cancel && queue == ri.queue && event == ri.event; + } +}; + +class RoseInstrTriggerSuffix + : public RoseInstrBaseNoTargets { +public: + u32 queue; + u32 event; + + RoseInstrTriggerSuffix(u32 queue_in, u32 event_in) + : queue(queue_in), event(event_in) {} + + bool operator==(const RoseInstrTriggerSuffix &ri) const { + return queue == ri.queue && event == ri.event; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), queue, event); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrTriggerSuffix &ri, const OffsetMap &, + const OffsetMap &) const { + return queue == ri.queue && event == ri.event; + } +}; + +class RoseInstrDedupe + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupe(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupe &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), quash_som, dkey, + offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupe &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrDedupeSom + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupeSom(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupeSom &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), quash_som, dkey, + offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupeSom &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrReportChain + : public RoseInstrBaseNoTargets { +public: + u32 event; + u64a top_squash_distance; + + RoseInstrReportChain(u32 event_in, u32 top_squash_distance_in) + : event(event_in), top_squash_distance(top_squash_distance_in) {} + + bool operator==(const RoseInstrReportChain &ri) const { + return event == ri.event && + top_squash_distance == ri.top_squash_distance; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), event, top_squash_distance); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportChain &ri, const OffsetMap &, + const OffsetMap &) const { + return event == ri.event && + top_squash_distance == ri.top_squash_distance; + } +}; + +class RoseInstrReportSomInt + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrReportSomInt() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrReportSomInt &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), som.type, som.onmatch); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomInt &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrReportSomAware + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrReportSomAware() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrReportSomAware &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), som.type, som.onmatch); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomAware &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrReport + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrReport(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrReport &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReport &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrReportExhaust + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + u32 ekey; + + RoseInstrReportExhaust(ReportID onmatch_in, s32 offset_adjust_in, + u32 ekey_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} + + bool operator==(const RoseInstrReportExhaust &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportExhaust &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } +}; + +class RoseInstrReportSom + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrReportSom(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrReportSom &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSom &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrReportSomExhaust + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + u32 ekey; + + RoseInstrReportSomExhaust(ReportID onmatch_in, s32 offset_adjust_in, + u32 ekey_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} + + bool operator==(const RoseInstrReportSomExhaust &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomExhaust &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } +}; + +class RoseInstrDedupeAndReport + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + ReportID onmatch; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupeAndReport(u8 quash_som_in, u32 dkey_in, ReportID onmatch_in, + s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), onmatch(onmatch_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupeAndReport &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), quash_som, dkey, onmatch, + offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupeAndReport &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrFinalReport + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrFinalReport(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrFinalReport &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrFinalReport &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrCheckExhausted + : public RoseInstrBaseOneTarget { +public: + u32 ekey; + const RoseInstruction *target; + + RoseInstrCheckExhausted(u32 ekey_in, const RoseInstruction *target_in) + : ekey(ekey_in), target(target_in) {} + + bool operator==(const RoseInstrCheckExhausted &ri) const { + return ekey == ri.ekey && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), ekey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckExhausted &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return ekey == ri.ekey && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMinLength + : public RoseInstrBaseOneTarget { +public: + s32 end_adj; + u64a min_length; + const RoseInstruction *target; + + RoseInstrCheckMinLength(s32 end_adj_in, u64a min_length_in, + const RoseInstruction *target_in) + : end_adj(end_adj_in), min_length(min_length_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMinLength &ri) const { + return end_adj == ri.end_adj && min_length == ri.min_length && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), end_adj, min_length); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMinLength &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return end_adj == ri.end_adj && min_length == ri.min_length && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSetState + : public RoseInstrBaseNoTargets { +public: + u32 index; + + explicit RoseInstrSetState(u32 index_in) : index(index_in) {} + + bool operator==(const RoseInstrSetState &ri) const { + return index == ri.index; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetState &ri, const OffsetMap &, + const OffsetMap &) const { + return index == ri.index; + } +}; + +class RoseInstrSetGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrSetGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrSetGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), groups); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrSquashGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrSquashGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrSquashGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), groups); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSquashGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrCheckState + : public RoseInstrBaseOneTarget { +public: + u32 index; + const RoseInstruction *target; + + RoseInstrCheckState(u32 index_in, const RoseInstruction *target_in) + : index(index_in), target(target_in) {} + + bool operator==(const RoseInstrCheckState &ri) const { + return index == ri.index && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckState &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return index == ri.index && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSparseIterBegin + : public RoseInstrBase { +public: + u32 num_keys; // total number of multibit keys + std::vector> jump_table; + const RoseInstruction *target; + + RoseInstrSparseIterBegin(u32 num_keys_in, + const RoseInstruction *target_in) + : num_keys(num_keys_in), target(target_in) {} + + bool operator==(const RoseInstrSparseIterBegin &ri) const { + return num_keys == ri.num_keys && jump_table == ri.jump_table && + target == ri.target; + } + + size_t hash() const override { + size_t v = hash_all(static_cast(opcode), num_keys); + for (const u32 &key : jump_table | boost::adaptors::map_keys) { + boost::hash_combine(v, key); + } + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + if (target == old_target) { + target = new_target; + } + for (auto &jump : jump_table) { + if (jump.second == old_target) { + jump.second = new_target; + } + } + } + + bool equiv_to(const RoseInstrSparseIterBegin &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + if (iter_offset != ri.iter_offset || + offsets.at(target) != other_offsets.at(ri.target)) { + return false; + } + if (jump_table.size() != ri.jump_table.size()) { + return false; + } + auto it1 = jump_table.begin(), it2 = ri.jump_table.begin(); + for (; it1 != jump_table.end(); ++it1, ++it2) { + if (it1->first != it2->first) { + return false; + } + if (offsets.at(it1->second) != other_offsets.at(it2->second)) { + return false; + } + } + return true; + } + +private: + friend class RoseInstrSparseIterNext; + + // These variables allow us to use the same multibit iterator and jump + // table in subsequent SPARSE_ITER_NEXT write() operations. + mutable bool is_written = false; + mutable u32 iter_offset = 0; + mutable u32 jump_table_offset = 0; +}; + +class RoseInstrSparseIterNext + : public RoseInstrBase { +public: + u32 state; + const RoseInstrSparseIterBegin *begin; + const RoseInstruction *target; + + RoseInstrSparseIterNext(u32 state_in, + const RoseInstrSparseIterBegin *begin_in, + const RoseInstruction *target_in) + : state(state_in), begin(begin_in), target(target_in) {} + + bool operator==(const RoseInstrSparseIterNext &ri) const { + return state == ri.state && begin == ri.begin && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), state); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + if (target == old_target) { + target = new_target; + } + if (begin == old_target) { + assert(new_target->code() == ROSE_INSTR_SPARSE_ITER_BEGIN); + begin = static_cast(new_target); + } + } + + bool equiv_to(const RoseInstrSparseIterNext &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return state == ri.state && + offsets.at(begin) == other_offsets.at(ri.begin) && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSparseIterAny + : public RoseInstrBaseOneTarget { +public: + u32 num_keys; // total number of multibit keys + std::vector keys; + const RoseInstruction *target; + + RoseInstrSparseIterAny(u32 num_keys_in, std::vector keys_in, + const RoseInstruction *target_in) + : num_keys(num_keys_in), keys(std::move(keys_in)), target(target_in) {} + + bool operator==(const RoseInstrSparseIterAny &ri) const { + return num_keys == ri.num_keys && keys == ri.keys && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), num_keys, keys); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSparseIterAny &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return num_keys == ri.num_keys && keys == ri.keys && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrEnginesEod + : public RoseInstrBaseNoTargets { +public: + u32 iter_offset; + + explicit RoseInstrEnginesEod(u32 iter_in) : iter_offset(iter_in) {} + + bool operator==(const RoseInstrEnginesEod &ri) const { + return iter_offset == ri.iter_offset; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), iter_offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrEnginesEod &ri, const OffsetMap &, + const OffsetMap &) const { + return iter_offset == ri.iter_offset; + } +}; + +class RoseInstrSuffixesEod + : public RoseInstrBaseTrivial { +public: + ~RoseInstrSuffixesEod() override; +}; + +class RoseInstrMatcherEod : public RoseInstrBaseTrivial { +public: + ~RoseInstrMatcherEod() override; +}; + +class RoseInstrCheckLongLit + : public RoseInstrBaseNoTargets { +public: + std::string literal; + + RoseInstrCheckLongLit(std::string literal_in) + : literal(std::move(literal_in)) {} + + bool operator==(const RoseInstrCheckLongLit &ri) const { + return literal == ri.literal; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &, + const OffsetMap &) const { + return literal == ri.literal; + } +}; + +class RoseInstrCheckLongLitNocase + : public RoseInstrBaseNoTargets { +public: + std::string literal; + + RoseInstrCheckLongLitNocase(std::string literal_in) + : literal(std::move(literal_in)) { + upperString(literal); + } + + bool operator==(const RoseInstrCheckLongLitNocase &ri) const { + return literal == ri.literal; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLongLitNocase &ri, const OffsetMap &, + const OffsetMap &) const { + return literal == ri.literal; + } +}; + +class RoseInstrEnd + : public RoseInstrBaseTrivial { +public: + ~RoseInstrEnd() override; +}; + +/** + * \brief Container for a list of program instructions. + */ +class RoseProgram { +private: + std::vector> prog; + +public: + RoseProgram() { + prog.push_back(make_unique()); + } + + bool empty() const { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + // Empty if we only have one element, the END instruction. + return std::next(prog.begin()) == prog.end(); + } + + size_t size() const { return prog.size(); } + + const RoseInstruction &back() const { return *prog.back(); } + const RoseInstruction &front() const { return *prog.front(); } + + using iterator = decltype(prog)::iterator; + iterator begin() { return prog.begin(); } + iterator end() { return prog.end(); } + + using const_iterator = decltype(prog)::const_iterator; + const_iterator begin() const { return prog.begin(); } + const_iterator end() const { return prog.end(); } + + using reverse_iterator = decltype(prog)::reverse_iterator; + reverse_iterator rbegin() { return prog.rbegin(); } + reverse_iterator rend() { return prog.rend(); } + + using const_reverse_iterator = decltype(prog)::const_reverse_iterator; + const_reverse_iterator rbegin() const { return prog.rbegin(); } + const_reverse_iterator rend() const { return prog.rend(); } + + /** \brief Retrieve a pointer to the terminating ROSE_INSTR_END. */ + const RoseInstruction *end_instruction() const { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + return prog.back().get(); + } + +private: + static void update_targets(iterator it, iterator it_end, + const RoseInstruction *old_target, + const RoseInstruction *new_target) { + assert(old_target && new_target && old_target != new_target); + for (; it != it_end; ++it) { + std::unique_ptr &ri = *it; + assert(ri); + ri->update_target(old_target, new_target); + } + } + +public: + iterator insert(iterator it, std::unique_ptr ri) { + assert(!prog.empty()); + assert(it != end()); + assert(prog.back()->code() == ROSE_INSTR_END); + + return prog.insert(it, std::move(ri)); + } + + iterator insert(iterator it, RoseProgram &&block) { + assert(!prog.empty()); + assert(it != end()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return it; + } + + const RoseInstruction *end_ptr = block.end_instruction(); + assert(end_ptr->code() == ROSE_INSTR_END); + block.prog.pop_back(); + + const RoseInstruction *new_target = it->get(); + update_targets(block.prog.begin(), block.prog.end(), end_ptr, + new_target); + + // Workaround: container insert() for ranges doesn't return an iterator + // in the version of the STL distributed with gcc 4.8. + auto dist = distance(prog.begin(), it); + prog.insert(it, std::make_move_iterator(block.prog.begin()), + std::make_move_iterator(block.prog.end())); + it = prog.begin(); + std::advance(it, dist); + return it; + } + + /** + * \brief Adds this instruction to the program just before the terminating + * ROSE_INSTR_END. + */ + void add_before_end(std::unique_ptr ri) { + assert(!prog.empty()); + insert(std::prev(prog.end()), std::move(ri)); + } + + /** + * \brief Adds this block to the program just before the terminating + * ROSE_INSTR_END. + */ + void add_before_end(RoseProgram &&block) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return; + } + + insert(std::prev(prog.end()), std::move(block)); + } + + /** + * \brief Append this program block, replacing our current ROSE_INSTR_END. + */ + void add_block(RoseProgram &&block) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return; + } + + // Replace pointers to the current END with pointers to the first + // instruction in the new sequence. + const RoseInstruction *end_ptr = end_instruction(); + prog.pop_back(); + update_targets(prog.begin(), prog.end(), end_ptr, + block.prog.front().get()); + prog.insert(prog.end(), std::make_move_iterator(block.prog.begin()), + std::make_move_iterator(block.prog.end())); + } + + /** + * \brief Replace the instruction pointed to by the given iterator. + */ + template + void replace(Iter it, std::unique_ptr ri) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + const RoseInstruction *old_ptr = it->get(); + *it = move(ri); + update_targets(prog.begin(), prog.end(), old_ptr, it->get()); + + assert(prog.back()->code() == ROSE_INSTR_END); + } +}; + +aligned_unique_ptr +writeProgram(RoseEngineBlob &blob, const RoseProgram &program, u32 *total_len); + +class RoseProgramHash { +public: + size_t operator()(const RoseProgram &program) const { + size_t v = 0; + for (const auto &ri : program) { + assert(ri); + boost::hash_combine(v, ri->hash()); + } + return v; + } +}; + +class RoseProgramEquivalence { +public: + bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const; +}; + +} // namespace ue2 + +#endif // ROSE_BUILD_PROGRAM_H diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index c2366f0e..c6139097 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -40,7 +40,6 @@ #include "nfagraph/ng_is_equal.h" #include "nfagraph/ng_limex.h" #include "nfagraph/ng_prune.h" -#include "nfagraph/ng_restructuring.h" #include "nfagraph/ng_uncalc_components.h" #include "nfagraph/ng_util.h" #include "util/bitutils.h" @@ -112,11 +111,9 @@ struct AliasInEdge : EdgeAndVertex { class CandidateSet { public: - typedef RoseVertexSet::iterator iterator; + typedef set::iterator iterator; typedef RoseVertex key_type; - explicit CandidateSet(const VertexIndexComp &comp) : main_cont(comp) {} - iterator begin() { return main_cont.begin(); } iterator end() { return main_cont.end(); } @@ -152,7 +149,7 @@ public: private: /* if a vertex is worth storing, it is worth storing twice */ - RoseVertexSet main_cont; /* deterministic iterator */ + set main_cont; /* deterministic iterator */ ue2::unordered_set hash_cont; /* member checks */ }; @@ -257,10 +254,8 @@ bool samePredecessors(RoseVertex a, RoseVertex b, const RoseGraph &g) { } for (const auto &e_a : in_edges_range(a, g)) { - bool exists; - RoseEdge e; - tie(e, exists) = edge_by_target(source(e_a, g), b, g); - if (!exists || g[e].rose_top != g[e_a].rose_top) { + RoseEdge e = edge(source(e_a, g), b, g); + if (!e || g[e].rose_top != g[e_a].rose_top) { DEBUG_PRINTF("bad tops\n"); return false; } @@ -274,10 +269,7 @@ static bool hasCommonSuccWithBadBounds(RoseVertex a, RoseVertex b, const RoseGraph &g) { for (const auto &e_a : out_edges_range(a, g)) { - bool exists; - RoseEdge e; - tie(e, exists) = edge(b, target(e_a, g), g); - if (exists) { + if (RoseEdge e = edge(b, target(e_a, g), g)) { if (g[e_a].maxBound < g[e].minBound || g[e].maxBound < g[e_a].minBound) { return true; @@ -296,10 +288,7 @@ static bool hasCommonPredWithBadBounds(RoseVertex a, RoseVertex b, const RoseGraph &g) { for (const auto &e_a : in_edges_range(a, g)) { - bool exists; - RoseEdge e; - tie(e, exists) = edge_by_target(source(e_a, g), b, g); - if (exists) { + if (RoseEdge e = edge(source(e_a, g), b, g)) { if (g[e_a].maxBound < g[e].minBound || g[e].maxBound < g[e_a].minBound) { return true; @@ -499,11 +488,11 @@ void mergeEdgeAdd(RoseVertex u, RoseVertex v, const RoseEdge &from_edge, const RoseEdgeProps &from_props = g[from_edge]; if (!to_edge) { - DEBUG_PRINTF("adding edge [%zu,%zu]\n", g[u].idx, g[v].idx); + DEBUG_PRINTF("adding edge [%zu,%zu]\n", g[u].index, g[v].index); add_edge(u, v, from_props, g); } else { // union of the two edges. - DEBUG_PRINTF("updating edge [%zu,%zu]\n", g[u].idx, g[v].idx); + DEBUG_PRINTF("updating edge [%zu,%zu]\n", g[u].index, g[v].index); RoseEdgeProps &to_props = g[*to_edge]; to_props.minBound = min(to_props.minBound, from_props.minBound); to_props.maxBound = max(to_props.maxBound, from_props.maxBound); @@ -627,7 +616,7 @@ static void mergeVerticesLeft(RoseVertex a, RoseVertex b, RoseBuildImpl &build, RoseAliasingInfo &rai) { RoseGraph &g = build.g; - DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].idx, g[b].idx); + DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index); insert(&g[b].reports, g[a].reports); @@ -649,7 +638,7 @@ static void mergeVerticesRight(RoseVertex a, RoseVertex b, RoseBuildImpl &build, RoseAliasingInfo &rai) { RoseGraph &g = build.g; - DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].idx, g[b].idx); + DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index); insert(&g[b].reports, g[a].reports); g[b].min_offset = min(g[a].min_offset, g[b].min_offset); @@ -667,7 +656,7 @@ static void mergeVerticesDiamond(RoseVertex a, RoseVertex b, RoseBuildImpl &build, RoseAliasingInfo &rai) { RoseGraph &g = build.g; - DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].idx, g[b].idx); + DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index); // For a diamond merge, most properties are already the same (with the // notable exception of the literal set). @@ -684,7 +673,7 @@ static never_inline void findCandidates(const RoseBuildImpl &build, CandidateSet *candidates) { for (auto v : vertices_range(build.g)) { if (isAliasingCandidate(v, build)) { - DEBUG_PRINTF("candidate %zu\n", build.g[v].idx); + DEBUG_PRINTF("candidate %zu\n", build.g[v].index); DEBUG_PRINTF("lits: %u\n", *build.g[v].literals.begin()); candidates->insert(v); } @@ -747,10 +736,7 @@ bool hasCommonPredWithDiffRoses(RoseVertex a, RoseVertex b, const bool equal_roses = hasEqualLeftfixes(a, b, g); for (const auto &e_a : in_edges_range(a, g)) { - bool exists; - RoseEdge e; - tie(e, exists) = edge_by_target(source(e_a, g), b, g); - if (exists) { + if (RoseEdge e = edge(source(e_a, g), b, g)) { DEBUG_PRINTF("common pred, e_r=%d r_t %u,%u\n", (int)equal_roses, g[e].rose_top, g[e_a].rose_top); if (!equal_roses) { @@ -786,8 +772,8 @@ void pruneReportIfUnused(const RoseBuildImpl &build, shared_ptr h, // unimplementable. DEBUG_PRINTF("report %u has been merged away, pruning\n", report); - assert(h->kind == build.isRootSuccessor(*verts.begin()) ? NFA_PREFIX - : NFA_INFIX); + assert(h->kind == (build.isRootSuccessor(*verts.begin()) ? NFA_PREFIX + : NFA_INFIX)); unique_ptr h_new = cloneHolder(*h); pruneReport(*h_new, report); @@ -863,7 +849,13 @@ void pruneUnusedTops(CastleProto &castle, const RoseGraph &g, static void pruneUnusedTops(NGHolder &h, const RoseGraph &g, const set &verts) { - ue2::unordered_set used_tops; + if (!is_triggered(h)) { + DEBUG_PRINTF("not triggered, no tops\n"); + return; + } + assert(isCorrectlyTopped(h)); + DEBUG_PRINTF("prunning unused tops\n"); + ue2::flat_set used_tops; for (auto v : verts) { assert(g[v].left.graph.get() == &h); @@ -879,10 +871,13 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g, if (v == h.startDs) { continue; // stylised edge, leave it alone. } - u32 top = h[e].top; - if (!contains(used_tops, top)) { - DEBUG_PRINTF("edge (start,%u) has unused top %u\n", - h[v].index, top); + flat_set pruned_tops; + auto pt_inserter = inserter(pruned_tops, pruned_tops.end()); + set_intersection(h[e].tops.begin(), h[e].tops.end(), + used_tops.begin(), used_tops.end(), pt_inserter); + h[e].tops = move(pruned_tops); + if (h[e].tops.empty()) { + DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index); dead.push_back(e); } } @@ -1116,8 +1111,7 @@ bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, // We should be protected from merging common preds with tops leading // to completely different repeats by earlier checks, but just in // case... - if (edge(source(e, g), a, g).second) { - RoseEdge a_edge = edge(source(e, g), a, g).first; + if (RoseEdge a_edge = edge(source(e, g), a, g)) { u32 a_top = g[a_edge].rose_top; const PureRepeat &a_pr = m_castle->repeats[a_top]; // new report if (pr != a_pr) { @@ -1287,7 +1281,7 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, } DEBUG_PRINTF("attempting merge of roses on vertices %zu and %zu\n", - g[a].idx, g[b].idx); + g[a].index, g[b].index); set &b_verts = rai.rev_leftfix[b_left]; set aa; @@ -1327,8 +1321,8 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, DEBUG_PRINTF("winner %zu states\n", num_vertices(*b_h)); if (!setDistinctRoseTops(g, victim, *b_h, deque(1, a))) { - assert(roseHasTops(g, a)); - assert(roseHasTops(g, b)); + assert(roseHasTops(build, a)); + assert(roseHasTops(build, b)); return false; } @@ -1341,8 +1335,8 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, for (const auto &e : in_edges_range(a, g)) { g[e] = a_props[source(e, g)]; } - assert(roseHasTops(g, a)); - assert(roseHasTops(g, b)); + assert(roseHasTops(build, a)); + assert(roseHasTops(build, b)); return false; } @@ -1365,8 +1359,8 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, reduceImplementableGraph(*b_h, SOM_NONE, nullptr, build.cc); - assert(roseHasTops(g, a)); - assert(roseHasTops(g, b)); + assert(roseHasTops(build, a)); + assert(roseHasTops(build, b)); assert(isImplementableNFA(*b_h, nullptr, build.cc)); return true; } @@ -1379,7 +1373,7 @@ bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, RoseVertex b, bool trivialCasesOnly, RoseAliasingInfo &rai) { DEBUG_PRINTF("attempting rose merge, vertices a=%zu, b=%zu\n", - build.g[a].idx, build.g[b].idx); + build.g[a].index, build.g[b].index); assert(a != b); RoseGraph &g = build.g; @@ -1417,8 +1411,8 @@ bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, return false; } - assert(roseHasTops(g, a)); - assert(roseHasTops(g, b)); + assert(roseHasTops(build, a)); + assert(roseHasTops(build, b)); if (a_left_id.graph() && b_left_id.graph()) { return attemptRoseGraphMerge(build, preds_same, a, b, trivialCasesOnly, @@ -1592,7 +1586,7 @@ void diamondMergePass(CandidateSet &candidates, RoseBuildImpl &build, assert(contains(candidates, a)); - DEBUG_PRINTF("trying to merge %zu into somebody\n", g[a].idx); + DEBUG_PRINTF("trying to merge %zu into somebody\n", g[a].index); for (auto jt = it; jt != siblings.end(); ++jt) { RoseVertex b = *jt; assert(contains(candidates, b)); @@ -1706,8 +1700,8 @@ void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build, RoseVertex pred = pickPred(a, g, build); siblings.clear(); - if (pred == RoseGraph::null_vertex() || build.isAnyStart(pred) || - hasGreaterOutDegree(verts.size(), pred, g)) { + if (pred == RoseGraph::null_vertex() || build.isAnyStart(pred) + || out_degree(pred, g) > verts.size()) { // Select sibling from amongst the vertices that share a literal. siblings.insert(siblings.end(), verts.begin(), verts.end()); } else { @@ -1716,8 +1710,6 @@ void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build, insert(&siblings, siblings.end(), adjacent_vertices(pred, g)); } - sort(siblings.begin(), siblings.end(), VertexIndexComp(g)); - auto jt = findLeftMergeSibling(siblings.begin(), siblings.end(), a, build, rai, candidates); if (jt == siblings.end()) { @@ -1737,6 +1729,7 @@ void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build, } DEBUG_PRINTF("%zu candidates remaining\n", candidates.size()); + assert(!hasOrphanedTops(build)); } // Can't merge vertices with different root predecessors. @@ -1745,12 +1738,12 @@ bool safeRootPreds(RoseVertex a, RoseVertex b, const RoseGraph &g) { set a_roots, b_roots; for (auto u : inv_adjacent_vertices_range(a, g)) { - if (!hasGreaterInDegree(0, u, g)) { + if (!in_degree(u, g)) { a_roots.insert(u); } } for (auto u : inv_adjacent_vertices_range(b, g)) { - if (!hasGreaterInDegree(0, u, g)) { + if (!in_degree(u, g)) { b_roots.insert(u); } } @@ -1858,8 +1851,8 @@ void buildCandidateRightSiblings(CandidateSet &candidates, RoseBuildImpl &build, u32 lit_id = *g[a].literals.begin(); RoseVertex succ = pickSucc(a, g); const auto &verts = build.literal_info.at(lit_id).vertices; - if (succ != RoseGraph::null_vertex() && - !hasGreaterInDegree(verts.size(), succ, g)) { + if (succ != RoseGraph::null_vertex() + && in_degree(succ, g) < verts.size()) { if (!done_succ.insert(succ).second) { continue; // succ already in done_succ. } @@ -1892,7 +1885,7 @@ void buildCandidateRightSiblings(CandidateSet &candidates, RoseBuildImpl &build, } for (auto &siblings : sibling_cache | map_values) { - sort(siblings.begin(), siblings.end(), VertexIndexComp(build.g)); + sort(siblings.begin(), siblings.end()); } } @@ -1952,6 +1945,7 @@ void rightMergePass(CandidateSet &candidates, RoseBuildImpl &build, } DEBUG_PRINTF("%zu candidates remaining\n", candidates.size()); + assert(!hasOrphanedTops(build)); } /** @@ -1966,7 +1960,7 @@ bool hasNoDiamondSiblings(const RoseGraph &g, RoseVertex v) { if (has_successor(v, g)) { bool only_succ = true; for (const auto &w : adjacent_vertices_range(v, g)) { - if (hasGreaterInDegree(1, w, g)) { + if (in_degree(w, g) > 1) { only_succ = false; break; } @@ -1982,7 +1976,7 @@ bool hasNoDiamondSiblings(const RoseGraph &g, RoseVertex v) { bool only_pred = true; for (const auto &u : inv_adjacent_vertices_range(v, g)) { - if (hasGreaterOutDegree(1, u, g)) { + if (out_degree(u, g) > 1) { only_pred = false; break; } @@ -2017,6 +2011,8 @@ void filterDiamondCandidates(RoseGraph &g, CandidateSet &candidates) { void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { const CompileContext &cc = build.cc; RoseGraph &g = build.g; + assert(!hasOrphanedTops(build)); + assert(canImplementGraphs(build)); if (!cc.grey.roseRoleAliasing || !cc.grey.roseGraphReduction) { return; @@ -2028,7 +2024,7 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { mergeRoses &= cc.grey.mergeRose & cc.grey.roseMergeRosesDuringAliasing; - CandidateSet candidates(g); + CandidateSet candidates; findCandidates(build, &candidates); DEBUG_PRINTF("candidates %zu\n", candidates.size()); @@ -2050,6 +2046,8 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { DEBUG_PRINTF("killed %zu vertices\n", dead.size()); build.removeVertices(dead); + assert(!hasOrphanedTops(build)); + assert(canImplementGraphs(build)); } } // namespace ue2 diff --git a/src/rose/rose_build_util.h b/src/rose/rose_build_util.h index 85cfc010..81bb6845 100644 --- a/src/rose/rose_build_util.h +++ b/src/rose/rose_build_util.h @@ -39,31 +39,6 @@ namespace ue2 { /** Max allowed width for transient graphs in block mode */ #define ROSE_BLOCK_TRANSIENT_MAX_WIDTH 255U -// Comparator for vertices using their index property. -struct VertexIndexComp { - VertexIndexComp(const RoseGraph &gg) : g(gg) {} - - bool operator()(const RoseVertex &a, const RoseVertex &b) const { - const RoseVertexProps &pa = g[a]; - const RoseVertexProps &pb = g[b]; - - if (pa.idx < pb.idx) { - return true; - } - if (pa.idx > pb.idx) { - return false; - } - - assert(a == b); // All vertex indices should be distinct. - return a < b; - } - - const RoseGraph &g; -}; - -// Vertex set type, ordered by index. Construct with a graph reference. -typedef std::set RoseVertexSet; - /** * \brief Add two Rose depths together, coping correctly with infinity at * ROSE_BOUND_INF. diff --git a/src/rose/rose_build_width.cpp b/src/rose/rose_build_width.cpp index 6bfcee48..182b62ee 100644 --- a/src/rose/rose_build_width.cpp +++ b/src/rose/rose_build_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -77,19 +77,20 @@ u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { u32 minWidth = ROSE_BOUND_INF; for (auto v : reachable) { if (g[v].eod_accept) { - DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].idx); + DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].index); continue; } const u32 w = g[v].min_offset; if (!g[v].reports.empty()) { - DEBUG_PRINTF("%zu can fire report at offset %u\n", g[v].idx, w); + DEBUG_PRINTF("%zu can fire report at offset %u\n", g[v].index, w); minWidth = min(minWidth, w); } if (is_end_anchored(g, v)) { - DEBUG_PRINTF("%zu can fire eod report at offset %u\n", g[v].idx, w); + DEBUG_PRINTF("%zu can fire eod report at offset %u\n", g[v].index, + w); minWidth = min(minWidth, w); } @@ -98,7 +99,7 @@ u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { assert(suffix_width.is_reachable()); DEBUG_PRINTF("%zu has suffix with top %u (width %s), can fire " "report at %u\n", - g[v].idx, g[v].suffix.top, suffix_width.str().c_str(), + g[v].index, g[v].suffix.top, suffix_width.str().c_str(), w + suffix_width); minWidth = min(minWidth, w + suffix_width); } @@ -203,10 +204,10 @@ u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { // Everyone's anchored, so the max width can be taken from the max // max_offset on our vertices (so long as all accepts are ACCEPT_EOD). for (auto v : reachable) { - DEBUG_PRINTF("inspecting vert %zu\n", g[v].idx); + DEBUG_PRINTF("inspecting vert %zu\n", g[v].index); if (g[v].eod_accept) { - DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].idx); + DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].index); continue; } diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index a3d00943..1867be50 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -42,16 +42,17 @@ #include "nfa/nfa_internal.h" #include "nfa/nfa_kind.h" #include "util/dump_charclass.h" -#include "util/multibit_internal.h" +#include "util/multibit_build.h" #include "util/multibit.h" #include #include #include #include +#include #include -#include #include +#include #include #ifndef DUMP_SUPPORT @@ -234,9 +235,12 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { const char *pc_base = pc; for (;;) { u8 code = *(const u8 *)pc; - assert(code <= ROSE_INSTR_END); + assert(code <= LAST_ROSE_INSTRUCTION); const size_t offset = pc - pc_base; switch (code) { + PROGRAM_CASE(END) { return; } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ANCHORED_DELAY) { os << " groups 0x" << std::hex << ri->groups << std::dec << endl; @@ -244,16 +248,6 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_LIT_MASK) { - os << " and_mask " - << dumpStrMask(ri->and_mask.a8, sizeof(ri->and_mask.a8)) - << endl; - os << " cmp_mask " - << dumpStrMask(ri->cmp_mask.a8, sizeof(ri->cmp_mask.a8)) - << endl; - } - PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_LIT_EARLY) { os << " min_offset " << ri->min_offset << endl; } @@ -283,6 +277,20 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) { + os << " offset " << int{ri->offset} << endl; + os << " reach_index " << ri->reach_index << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + const u8 *base = (const u8 *)t; + const u8 *reach_base = base + t->lookaroundReachOffset; + const u8 *reach = reach_base + + ri->reach_index * REACH_BITVECTOR_LEN; + os << " contents "; + describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); + os << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_LOOKAROUND) { os << " index " << ri->index << endl; os << " count " << ri->count << endl; @@ -303,6 +311,20 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_MASK_32) { + os << " and_mask " + << dumpStrMask(ri->and_mask, sizeof(ri->and_mask)) + << endl; + os << " cmp_mask " + << dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask)) + << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_BYTE) { os << " and_mask 0x" << std::hex << std::setw(2) << std::setfill('0') << u32{ri->and_mask} << std::dec @@ -316,6 +338,71 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_SHUFTI_16x8) { + os << " nib_mask " + << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x8) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_16x16) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x16) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask_hi " + << dumpStrMask(ri->bucket_select_mask_hi, + sizeof(ri->bucket_select_mask_hi)) + << endl; + os << " bucket_select_mask_lo " + << dumpStrMask(ri->bucket_select_mask_lo, + sizeof(ri->bucket_select_mask_lo)) + << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_INFIX) { os << " queue " << ri->queue << endl; os << " lag " << ri->lag << endl; @@ -507,6 +594,12 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SPARSE_ITER_ANY) { + os << " iter_offset " << ri->iter_offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ENGINES_EOD) { os << " iter_offset " << ri->iter_offset << endl; } @@ -518,7 +611,22 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(MATCHER_EOD) {} PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(END) { return; } + PROGRAM_CASE(CHECK_LONG_LIT) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + } PROGRAM_NEXT_INSTRUCTION default: @@ -573,9 +681,8 @@ void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); const char *base = (const char *)t; - os << "EOD Program:" << endl; - if (t->eodProgramOffset) { + os << "EOD Program @ " << t->eodProgramOffset << ":" << endl; dumpProgram(os, t, base + t->eodProgramOffset); os << endl; } else { @@ -810,24 +917,14 @@ void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); const NFA *n = getNfaByInfo(t, nfa_info); - stringstream sstxt, ssdot, ssraw; - - sstxt << base << "rose_nfa_" << i << ".txt"; - ssdot << base << "rose_nfa_" << i << ".dot"; - ssraw << base << "rose_nfa_" << i << ".raw"; - - FILE *f; - - f = fopen(ssdot.str().c_str(), "w"); - nfaDumpDot(n, f, base); - fclose(f); - - f = fopen(sstxt.str().c_str(), "w"); - nfaDumpText(n, f); - fclose(f); + stringstream ssbase; + ssbase << base << "rose_nfa_" << i; + nfaGenerateDumpFiles(n, ssbase.str()); if (dump_raw) { - f = fopen(ssraw.str().c_str(), "w"); + stringstream ssraw; + ssraw << base << "rose_nfa_" << i << ".raw"; + FILE *f = fopen(ssraw.str().c_str(), "w"); fwrite(n, 1, n->length, f); fclose(f); } @@ -870,24 +967,14 @@ void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) { for (u32 i = 0; i < t->somRevCount; i++) { const NFA *n = (const NFA *)(tp + rev_offsets[i]); - stringstream sstxt, ssdot, ssraw; - - sstxt << base << "som_rev_nfa_" << i << ".txt"; - ssdot << base << "som_rev_nfa_" << i << ".dot"; - ssraw << base << "som_nfa_nfa_" << i << ".raw"; - - FILE *f; - - f = fopen(ssdot.str().c_str(), "w"); - nfaDumpDot(n, f, base); - fclose(f); - - f = fopen(sstxt.str().c_str(), "w"); - nfaDumpText(n, f); - fclose(f); + stringstream ssbase; + ssbase << base << "som_rev_nfa_" << i; + nfaGenerateDumpFiles(n, ssbase.str()); if (dump_raw) { - f = fopen(ssraw.str().c_str(), "w"); + stringstream ssraw; + ssraw << base << "som_rev_nfa_" << i << ".raw"; + FILE *f = fopen(ssraw.str().c_str(), "w"); fwrite(n, 1, n->length, f); fclose(f); } @@ -902,20 +989,10 @@ void dumpAnchored(const RoseEngine *t, const string &base) { while (curr) { const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr)); - stringstream sstxt, ssdot; - sstxt << base << "anchored_" << i << ".txt"; - ssdot << base << "anchored_" << i << ".dot"; - - FILE *f; - - f = fopen(ssdot.str().c_str(), "w"); - nfaDumpDot(n, f, base); - fclose(f); - - f = fopen(sstxt.str().c_str(), "w"); - nfaDumpText(n, f); - fclose(f); + stringstream ssbase; + ssbase << base << "anchored_" << i; + nfaGenerateDumpFiles(n, ssbase.str()); curr = curr->next_offset ? (const anchored_matcher_info *) ((const char *)curr + curr->next_offset) : nullptr; @@ -943,6 +1020,63 @@ void dumpAnchoredStats(const void *atable, FILE *f) { } +static +void dumpLongLiteralSubtable(const RoseLongLitTable *ll_table, + const RoseLongLitSubtable *ll_sub, FILE *f) { + if (!ll_sub->hashBits) { + fprintf(f, " \n"); + return; + } + + const char *base = (const char *)ll_table; + + u32 nbits = ll_sub->hashBits; + u32 num_entries = 1U << nbits; + const auto *tab = (const RoseLongLitHashEntry *)(base + ll_sub->hashOffset); + u32 hash_occ = + count_if(tab, tab + num_entries, [](const RoseLongLitHashEntry &ent) { + return ent.str_offset != 0; + }); + float hash_occ_percent = ((float)hash_occ / (float)num_entries) * 100; + + fprintf(f, " hash table : %u bits, occupancy %u/%u (%0.1f%%)\n", + nbits, hash_occ, num_entries, hash_occ_percent); + + u32 bloom_bits = ll_sub->bloomBits; + u32 bloom_size = 1U << bloom_bits; + const u8 *bloom = (const u8 *)base + ll_sub->bloomOffset; + u32 bloom_occ = accumulate(bloom, bloom + bloom_size / 8, 0, + [](const u32 &sum, const u8 &elem) { return sum + popcount32(elem); }); + float bloom_occ_percent = ((float)bloom_occ / (float)(bloom_size)) * 100; + + fprintf(f, " bloom filter : %u bits, occupancy %u/%u (%0.1f%%)\n", + bloom_bits, bloom_occ, bloom_size, bloom_occ_percent); +} + +static +void dumpLongLiteralTable(const RoseEngine *t, FILE *f) { + if (!t->longLitTableOffset) { + return; + } + + fprintf(f, "\n"); + fprintf(f, "Long literal table (streaming):\n"); + + const auto *ll_table = + (const struct RoseLongLitTable *)loadFromByteCodeOffset( + t, t->longLitTableOffset); + + fprintf(f, " total size : %u bytes\n", ll_table->size); + fprintf(f, " longest len : %u\n", ll_table->maxLen); + fprintf(f, " stream state : %u bytes\n", ll_table->streamStateBytes); + + fprintf(f, " caseful:\n"); + dumpLongLiteralSubtable(ll_table, &ll_table->caseful, f); + + fprintf(f, " nocase:\n"); + dumpLongLiteralSubtable(ll_table, &ll_table->nocase, f); +} + // Externally accessible functions void roseDumpText(const RoseEngine *t, FILE *f) { @@ -1018,7 +1152,7 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8); fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); - fprintf(f, " - floating matcher : %u bytes\n", t->floatingStreamState); + fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState); fprintf(f, " - active array : %u bytes\n", mmbit_size(t->activeArrayCount)); fprintf(f, " - active rose : %u bytes\n", @@ -1072,6 +1206,8 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, "\nSmall-block literal matcher stats:\n\n"); hwlmPrintStats(sbtable, f); } + + dumpLongLiteralTable(t, f); } #define DUMP_U8(o, member) \ @@ -1096,8 +1232,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, historyRequired); DUMP_U32(t, ekeyCount); DUMP_U32(t, dkeyCount); + DUMP_U32(t, dkeyLogSize); DUMP_U32(t, invDkeyOffset); DUMP_U32(t, somLocationCount); + DUMP_U32(t, somLocationFatbitSize); DUMP_U32(t, rolesWithStateCount); DUMP_U32(t, stateSize); DUMP_U32(t, anchorStateSize); @@ -1108,6 +1246,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, ematcherOffset); DUMP_U32(t, fmatcherOffset); DUMP_U32(t, sbmatcherOffset); + DUMP_U32(t, longLitTableOffset); DUMP_U32(t, amatcherMinWidth); DUMP_U32(t, fmatcherMinWidth); DUMP_U32(t, eodmatcherMinWidth); @@ -1121,8 +1260,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, activeArrayCount); DUMP_U32(t, activeLeftCount); DUMP_U32(t, queueCount); + DUMP_U32(t, activeQueueArraySize); DUMP_U32(t, eagerIterOffset); DUMP_U32(t, handledKeyCount); + DUMP_U32(t, handledKeyFatbitSize); DUMP_U32(t, leftOffset); DUMP_U32(t, roseCount); DUMP_U32(t, lookaroundTableOffset); @@ -1143,8 +1284,10 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U64(t, floating_group_mask); DUMP_U32(t, size); DUMP_U32(t, delay_count); + DUMP_U32(t, delay_fatbit_size); DUMP_U32(t, delay_base_id); DUMP_U32(t, anchored_count); + DUMP_U32(t, anchored_fatbit_size); DUMP_U32(t, anchored_base_id); DUMP_U32(t, maxFloatingDelayedMatch); DUMP_U32(t, delayRebuildLength); @@ -1157,7 +1300,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, stateOffsets.anchorState); DUMP_U32(t, stateOffsets.groups); DUMP_U32(t, stateOffsets.groups_size); - DUMP_U32(t, stateOffsets.floatingMatcherState); + DUMP_U32(t, stateOffsets.longLitState); DUMP_U32(t, stateOffsets.somLocation); DUMP_U32(t, stateOffsets.somValid); DUMP_U32(t, stateOffsets.somWritable); @@ -1176,7 +1319,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, ematcherRegionSize); DUMP_U32(t, somRevCount); DUMP_U32(t, somRevOffsetOffset); - DUMP_U32(t, floatingStreamState); + DUMP_U32(t, longLitStreamState); fprintf(f, "}\n"); fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); } diff --git a/src/rose/rose_graph.h b/src/rose/rose_graph.h index 6abe629b..c3af749f 100644 --- a/src/rose/rose_graph.h +++ b/src/rose/rose_graph.h @@ -44,11 +44,10 @@ #include "util/charreach.h" #include "util/depth.h" #include "util/ue2_containers.h" +#include "util/ue2_graph.h" #include #include -#include -#include namespace ue2 { @@ -139,7 +138,7 @@ struct RoseSuffixInfo { /** \brief Properties attached to each Rose graph vertex. */ struct RoseVertexProps { /** \brief Unique dense vertex index. Used for BGL algorithms. */ - size_t idx = ~size_t{0}; + size_t index = ~size_t{0}; /** \brief IDs of literals in the Rose literal map. */ flat_set literals; @@ -183,6 +182,9 @@ struct RoseVertexProps { /** \brief Properties attached to each Rose graph edge. */ /* bounds are distance from end of prev to start of the next */ struct RoseEdgeProps { + /** \brief Unique dense vertex index. Used for BGL algorithms. */ + size_t index = ~size_t{0}; + /** * \brief Minimum distance from the end of the source role's match to the * start of the target role's match. @@ -215,18 +217,10 @@ bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b); /** * \brief Core Rose graph structure. - * - * Note that we use the list selector for the edge and vertex lists: we depend - * on insertion order for determinism, so we must use these containers. */ -using RoseGraph = boost::adjacency_list; - +struct RoseGraph : public ue2_graph { + friend class RoseBuildImpl; /* to allow index renumbering */ +}; using RoseVertex = RoseGraph::vertex_descriptor; using RoseEdge = RoseGraph::edge_descriptor; diff --git a/src/rose/rose_in_dump.cpp b/src/rose/rose_in_dump.cpp index fbd6858b..172b58e8 100644 --- a/src/rose/rose_in_dump.cpp +++ b/src/rose/rose_in_dump.cpp @@ -122,7 +122,7 @@ void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey, ostringstream name; name << grey.dumpPath << "pre_rose_" << id << ".dot"; - dumpGraph(name.str().c_str(), h->g); + dumpGraph(name.str().c_str(), *h); assert(allMatchStatesHaveReports(*h)); } diff --git a/src/rose/rose_in_graph.h b/src/rose/rose_in_graph.h index 14d4d9b2..0e218576 100644 --- a/src/rose/rose_in_graph.h +++ b/src/rose/rose_in_graph.h @@ -46,13 +46,11 @@ #include "ue2common.h" #include "rose/rose_common.h" #include "util/ue2_containers.h" +#include "util/ue2_graph.h" #include "util/ue2string.h" #include -#include -#include - namespace ue2 { class NGHolder; @@ -128,6 +126,7 @@ public: flat_set reports; /**< for RIV_ACCEPT/RIV_ACCEPT_EOD */ u32 min_offset; /**< Minimum offset at which this vertex can match. */ u32 max_offset; /**< Maximum offset at which this vertex can match. */ + size_t index = 0; }; struct RoseInEdgeProps { @@ -174,11 +173,12 @@ struct RoseInEdgeProps { std::shared_ptr haig; u32 graph_lag; + size_t index = 0; }; -typedef boost::adjacency_list RoseInGraph; +struct RoseInGraph + : public ue2_graph { +}; typedef RoseInGraph::vertex_descriptor RoseInVertex; typedef RoseInGraph::edge_descriptor RoseInEdge; diff --git a/src/rose/rose_in_util.cpp b/src/rose/rose_in_util.cpp index cce6ff35..3b31b38e 100644 --- a/src/rose/rose_in_util.cpp +++ b/src/rose/rose_in_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,27 +48,15 @@ using namespace std; namespace ue2 { -static -void populateIndexMap(const RoseInGraph &in, - map *index_map) { - size_t i = 0; - for (auto v : vertices_range(in)) { - (*index_map)[v] = i++; - } -} - /* Returns a topological ordering of the vertices in g. That is the starts are * at the front and all the predecessors of a vertex occur earlier in the list * than the vertex. */ vector topo_order(const RoseInGraph &g) { - map index_map; - populateIndexMap(g, &index_map); - + assert(hasCorrectlyNumberedVertices(g)); vector v_order; - v_order.reserve(index_map.size()); + v_order.reserve(num_vertices(g)); - topological_sort(g, back_inserter(v_order), - vertex_index_map(boost::make_assoc_property_map(index_map))); + boost::topological_sort(g, back_inserter(v_order)); reverse(v_order.begin(), v_order.end()); /* put starts at the front */ @@ -105,6 +93,7 @@ private: } unique_ptr cloneRoseGraph(const RoseInGraph &ig) { + assert(hasCorrectlyNumberedVertices(ig)); unique_ptr out = make_unique(); unordered_map> graph_map; @@ -120,12 +109,8 @@ unique_ptr cloneRoseGraph(const RoseInGraph &ig) { } } - map index_map; - populateIndexMap(ig, &index_map); - copy_graph(ig, *out, - boost::edge_copy(RoseEdgeCopier(ig, *out, graph_map, haig_map)) - .vertex_index_map(boost::make_assoc_property_map(index_map))); + boost::edge_copy(RoseEdgeCopier(ig, *out, graph_map, haig_map))); return out; } diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 51913984..411ce03f 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -217,8 +217,8 @@ struct RoseStateOffsets { /** Size of packed Rose groups value, in bytes. */ u32 groups_size; - /** State for floating literal matcher (managed by HWLM). */ - u32 floatingMatcherState; + /** State for long literal support. */ + u32 longLitState; /** Packed SOM location slots. */ u32 somLocation; @@ -309,9 +309,11 @@ struct RoseEngine { u32 historyRequired; /**< max amount of history required for streaming */ u32 ekeyCount; /**< number of exhaustion keys */ u32 dkeyCount; /**< number of dedupe keys */ + u32 dkeyLogSize; /**< size of fatbit for storing dkey log (bytes) */ u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external * report ids */ u32 somLocationCount; /**< number of som locations required */ + u32 somLocationFatbitSize; /**< size of SOM location fatbit (bytes) */ u32 rolesWithStateCount; // number of roles with entries in state bitset u32 stateSize; /* size of the state bitset * WARNING: not the size of the rose state */ @@ -325,6 +327,7 @@ struct RoseEngine { u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes) u32 fmatcherOffset; // offset of the floating literal matcher (bytes) u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes) + u32 longLitTableOffset; // offset of the long literal table u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern * involved with the anchored table to produce a full * match. */ @@ -369,14 +372,18 @@ struct RoseEngine { u32 activeArrayCount; //number of nfas tracked in the active array u32 activeLeftCount; //number of nfas tracked in the active rose array u32 queueCount; /**< number of nfa queues */ + u32 activeQueueArraySize; //!< size of fatbit for active queues (bytes) u32 eagerIterOffset; /**< offset to sparse iter for eager prefixes or 0 if * none */ /** \brief Number of keys used by CHECK_SET_HANDLED instructions in role - * programs. Used to size the handled_roles fatbit in scratch. */ + * programs. */ u32 handledKeyCount; + /** \brief Size of the handled keys fatbit in scratch (bytes). */ + u32 handledKeyFatbitSize; + u32 leftOffset; u32 roseCount; u32 lookaroundTableOffset; //!< base of lookaround offset list (of s8 values) @@ -411,9 +418,11 @@ struct RoseEngine { rose_group floating_group_mask; /* groups that are used by the ftable */ u32 size; // (bytes) u32 delay_count; /* number of delayed literal ids. */ + u32 delay_fatbit_size; //!< size of each delay fatbit in scratch (bytes) u32 delay_base_id; /* literal id of the first delayed literal. * delayed literal ids are contiguous */ u32 anchored_count; /* number of anchored literal ids */ + u32 anchored_fatbit_size; //!< size of each anch fatbit in scratch (bytes) u32 anchored_base_id; /* literal id of the first literal in the A table. * anchored literal ids are contiguous */ u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can @@ -434,7 +443,7 @@ struct RoseEngine { u32 ematcherRegionSize; /* max region size to pass to ematcher */ u32 somRevCount; /**< number of som reverse nfas */ u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */ - u32 floatingStreamState; // size in bytes + u32 longLitStreamState; // size in bytes struct scatter_full_plan state_init; }; @@ -445,6 +454,72 @@ struct ALIGN_CL_DIRECTIVE anchored_matcher_info { u32 anchoredMinDistance; /* start of region to run anchored table over */ }; +/** + * \brief Long literal subtable for a particular mode (caseful or nocase). + */ +struct RoseLongLitSubtable { + /** + * \brief Offset of the hash table (relative to RoseLongLitTable base). + * + * Offset is zero if no such table exists. + */ + u32 hashOffset; + + /** + * \brief Offset of the bloom filter (relative to RoseLongLitTable base). + * + * Offset is zero if no such table exists. + */ + u32 bloomOffset; + + /** \brief lg2 of the size of the hash table. */ + u8 hashBits; + + /** \brief Size of the bloom filter in bits. */ + u8 bloomBits; + + /** \brief Number of bits of packed stream state used. */ + u8 streamStateBits; +}; + +/** + * \brief Long literal table header. + */ +struct RoseLongLitTable { + /** + * \brief Total size of the whole table (including strings, bloom filters, + * hash tables). + */ + u32 size; + + /** \brief Caseful sub-table (hash table and bloom filter). */ + struct RoseLongLitSubtable caseful; + + /** \brief Caseless sub-table (hash table and bloom filter). */ + struct RoseLongLitSubtable nocase; + + /** \brief Total size of packed stream state in bytes. */ + u8 streamStateBytes; + + /** \brief Max length of literal prefixes. */ + u8 maxLen; +}; + +/** + * \brief One of these structures per hash table entry in our long literal + * table. + */ +struct RoseLongLitHashEntry { + /** + * \brief Offset of the literal string itself, relative to + * RoseLongLitTable base. Zero if this bucket is empty. + */ + u32 str_offset; + + /** \brief Length of the literal string. */ + u32 str_len; +}; + static really_inline const struct anchored_matcher_info *getALiteralMatcher( const struct RoseEngine *t) { diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 545e190f..ed913316 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -42,16 +42,22 @@ /** \brief Role program instruction opcodes. */ enum RoseInstructionCode { + ROSE_INSTR_END, //!< End of program. ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. - ROSE_INSTR_CHECK_LIT_MASK, //!< Check and/cmp mask. ROSE_INSTR_CHECK_LIT_EARLY, //!< Skip matches before floating min offset. ROSE_INSTR_CHECK_GROUPS, //!< Check that literal groups are on. ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". + ROSE_INSTR_CHECK_SINGLE_LOOKAROUND, //!< Single lookaround check. ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check. + ROSE_INSTR_CHECK_MASK_32, //!< 32-bytes and/cmp/neg mask check. ROSE_INSTR_CHECK_BYTE, //!< Single Byte check. + ROSE_INSTR_CHECK_SHUFTI_16x8, //!< Check 16-byte data by 8-bucket shufti. + ROSE_INSTR_CHECK_SHUFTI_32x8, //!< Check 32-byte data by 8-bucket shufti. + ROSE_INSTR_CHECK_SHUFTI_16x16, //!< Check 16-byte data by 16-bucket shufti. + ROSE_INSTR_CHECK_SHUFTI_32x16, //!< Check 32-byte data by 16-bucket shufti. ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state. ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. @@ -99,6 +105,7 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit. ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states. ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states. + ROSE_INSTR_SPARSE_ITER_ANY, //!< Test for any bit in the sparse iterator. /** \brief Check outfixes and suffixes for EOD and fire reports if so. */ ROSE_INSTR_ENGINES_EOD, @@ -110,7 +117,23 @@ enum RoseInstructionCode { /** \brief Run the EOD-anchored HWLM literal matcher. */ ROSE_INSTR_MATCHER_EOD, - ROSE_INSTR_END //!< End of program. + /** + * \brief Confirm a case-sensitive literal at the current offset. In + * streaming mode, this makes use of the long literal table. + */ + ROSE_INSTR_CHECK_LONG_LIT, + + /** + * \brief Confirm a case-insensitive literal at the current offset. In + * streaming mode, this makes use of the long literal table. + */ + ROSE_INSTR_CHECK_LONG_LIT_NOCASE, + + LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_LONG_LIT_NOCASE //!< Sentinel. +}; + +struct ROSE_STRUCT_END { + u8 code; //!< From enum RoseInstructionCode. }; struct ROSE_STRUCT_ANCHORED_DELAY { @@ -119,18 +142,6 @@ struct ROSE_STRUCT_ANCHORED_DELAY { u32 done_jump; //!< Jump forward this many bytes if successful. }; -union RoseLiteralMask { - u64a a64[MAX_MASK2_WIDTH / sizeof(u64a)]; - u8 a8[MAX_MASK2_WIDTH]; -}; - -/** Note: check failure will halt program. */ -struct ROSE_STRUCT_CHECK_LIT_MASK { - u8 code; //!< From enum RoseInstructionCode. - union RoseLiteralMask and_mask; - union RoseLiteralMask cmp_mask; -}; - /** Note: check failure will halt program. */ struct ROSE_STRUCT_CHECK_LIT_EARLY { u8 code; //!< From enum RoseInstructionCode. @@ -161,6 +172,13 @@ struct ROSE_STRUCT_CHECK_NOT_HANDLED { u32 fail_jump; //!< Jump forward this many bytes if we have seen key before. }; +struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND { + u8 code; //!< From enum RoseInstructionCode. + s8 offset; //!< The offset of the byte to examine. + u32 reach_index; //!< The index of the reach table entry to use. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_CHECK_LOOKAROUND { u8 code; //!< From enum RoseInstructionCode. u32 index; @@ -170,9 +188,18 @@ struct ROSE_STRUCT_CHECK_LOOKAROUND { struct ROSE_STRUCT_CHECK_MASK { u8 code; //!< From enum roseInstructionCode. - u64a and_mask; //!< 64-bits and mask. - u64a cmp_mask; //!< 64-bits cmp mask. - u64a neg_mask; //!< 64-bits negation mask. + u64a and_mask; //!< 8-byte and mask. + u64a cmp_mask; //!< 8-byte cmp mask. + u64a neg_mask; //!< 8-byte negation mask. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MASK_32 { + u8 code; //!< From enum RoseInstructionCode. + u8 and_mask[32]; //!< 32-byte and mask. + u8 cmp_mask[32]; //!< 32-byte cmp mask. + u32 neg_mask; //!< negation mask with 32 bits. s32 offset; //!< Relative offset of the first byte. u32 fail_jump; //!< Jump forward this many bytes on failure. }; @@ -186,6 +213,48 @@ struct ROSE_STRUCT_CHECK_BYTE { u32 fail_jump; //!< Jump forward this many bytes on failure. }; +// Since m128 and m256 could be missaligned in the bytecode, +// we'll use u8[16] and u8[32] instead in all rose_check_shufti structures. +struct ROSE_STRUCT_CHECK_SHUFTI_16x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 nib_mask[32]; //!< High 16 and low 16 bits nibble mask in shufti. + u8 bucket_select_mask[16]; //!< Mask for bucket assigning. + u32 neg_mask; //!< Negation mask in low 16 bits. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_SHUFTI_32x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[16]; //!< High nibble mask in shufti. + u8 lo_mask[16]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[32]; //!< Mask for bucket assigning. + u32 neg_mask; //!< 32 bits negation mask. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_SHUFTI_16x16 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[32]; //!< High nibble mask in shufti. + u8 lo_mask[32]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[32]; //!< Mask for bucket assigning. + u32 neg_mask; //!< Negation mask in low 16 bits. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_SHUFTI_32x16 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[32]; //!< High nibble mask in shufti. + u8 lo_mask[32]; //!< Low nibble mask in shufti. + u8 bucket_select_mask_hi[32]; //!< Bucket mask for high 8 buckets. + u8 bucket_select_mask_lo[32]; //!< Bucket mask for low 8 buckets. + u32 neg_mask; //!< 32 bits negation mask. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_CHECK_INFIX { u8 code; //!< From enum RoseInstructionCode. u32 queue; //!< Queue of leftfix to check. @@ -389,6 +458,12 @@ struct ROSE_STRUCT_SPARSE_ITER_NEXT { u32 fail_jump; //!< Jump forward this many bytes on failure. }; +struct ROSE_STRUCT_SPARSE_ITER_ANY { + u8 code; //!< From enum RoseInstructionCode. + u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_ENGINES_EOD { u8 code; //!< From enum RoseInstructionCode. u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. @@ -402,8 +477,18 @@ struct ROSE_STRUCT_MATCHER_EOD { u8 code; //!< From enum RoseInstructionCode. }; -struct ROSE_STRUCT_END { +/** Note: check failure will halt program. */ +struct ROSE_STRUCT_CHECK_LONG_LIT { u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. +}; + +/** Note: check failure will halt program. */ +struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. }; #endif // ROSE_ROSE_PROGRAM_H diff --git a/src/rose/runtime.h b/src/rose/runtime.h index 60c7d34b..d2a4b5d7 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -97,8 +97,8 @@ void storeGroups(const struct RoseEngine *t, char *state, rose_group groups) { } static really_inline -u8 *getFloatingMatcherState(const struct RoseEngine *t, char *state) { - return (u8 *)(state + t->stateOffsets.floatingMatcherState); +u8 *getLongLitState(const struct RoseEngine *t, char *state) { + return (u8 *)(state + t->stateOffsets.longLitState); } static really_inline diff --git a/src/rose/stream.c b/src/rose/stream.c index b934f98f..9599612f 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -33,6 +33,8 @@ #include "miracle.h" #include "program_runtime.h" #include "rose.h" +#include "rose_internal.h" +#include "stream_long_lit.h" #include "hwlm/hwlm.h" #include "nfa/mcclellan.h" #include "nfa/nfa_api.h" @@ -406,6 +408,7 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, roseFlushLastByteHistory(t, scratch, offset + length); tctxt->lastEndOffset = offset + length; storeGroups(t, state, tctxt->groups); + storeLongLiteralState(t, state, scratch); } static really_inline @@ -548,6 +551,7 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { tctxt->minMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset; tctxt->next_mpv_offset = 0; + DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu groups=%016llx\n", scratch->core_info.hlen, scratch->core_info.len, tctxt->groups); @@ -576,6 +580,12 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { const struct HWLM *ftable = getFLiteralMatcher(t); if (ftable) { + // Load in long literal table state and set up "fake history" buffers + // (ll_buf, etc, used by the CHECK_LONG_LIT instruction). Note that this + // must be done here in order to ensure that it happens before any path + // that leads to storeLongLiteralState(), which relies on these buffers. + loadLongLiteralState(t, state, scratch); + if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) { DEBUG_PRINTF("skip FLOATING: no inflight matches\n"); goto flush_delay_and_exit; @@ -621,17 +631,9 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { } DEBUG_PRINTF("start=%zu\n", start); - u8 *stream_state; - if (t->floatingStreamState) { - stream_state = getFloatingMatcherState(t, state); - } else { - stream_state = NULL; - } - DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); hwlmExecStreaming(ftable, scratch, flen, start, roseFloatingCallback, - scratch, tctxt->groups & t->floating_group_mask, - stream_state); + scratch, tctxt->groups & t->floating_group_mask); } flush_delay_and_exit: diff --git a/src/rose/stream_long_lit.h b/src/rose/stream_long_lit.h new file mode 100644 index 00000000..d78e2863 --- /dev/null +++ b/src/rose/stream_long_lit.h @@ -0,0 +1,371 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STREAM_LONG_LIT_H +#define STREAM_LONG_LIT_H + +#include "rose.h" +#include "rose_common.h" +#include "rose_internal.h" +#include "stream_long_lit_hash.h" +#include "util/copybytes.h" + +static really_inline +const struct RoseLongLitHashEntry * +getHashTableBase(const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub) { + assert(ll_sub->hashOffset); + return (const struct RoseLongLitHashEntry *)((const char *)ll_table + + ll_sub->hashOffset); +} + +// Reads from stream state and unpacks values into stream state table. +static really_inline +void loadLongLitStreamState(const struct RoseLongLitTable *ll_table, + const u8 *ll_state, u32 *state_case, + u32 *state_nocase) { + assert(ll_table); + assert(ll_state); + assert(state_case && state_nocase); + + u8 ss_bytes = ll_table->streamStateBytes; + u8 ssb = ll_table->caseful.streamStateBits; + UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits; + assert(ss_bytes == (ssb + ssb_nc + 7) / 8); + +#if defined(ARCH_32_BIT) + // On 32-bit hosts, we may be able to avoid having to do any u64a + // manipulation at all. + if (ss_bytes <= 4) { + u32 ssb_mask = (1U << ssb) - 1; + u32 streamVal = partial_load_u32(ll_state, ss_bytes); + *state_case = (u32)(streamVal & ssb_mask); + *state_nocase = (u32)(streamVal >> ssb); + return; + } +#endif + + u64a ssb_mask = (1ULL << ssb) - 1; + u64a streamVal = partial_load_u64a(ll_state, ss_bytes); + *state_case = (u32)(streamVal & ssb_mask); + *state_nocase = (u32)(streamVal >> ssb); +} + +static rose_inline +void loadLongLiteralStateMode(struct hs_scratch *scratch, + const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub, + const u32 state, const char nocase) { + if (!state) { + DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful"); + return; + } + + const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub); + const struct RoseLongLitHashEntry *ent = tab + state - 1; + + assert(ent->str_offset + ent->str_len <= ll_table->size); + const u8 *found_buf = (const u8 *)ll_table + ent->str_offset; + size_t found_sz = ent->str_len; + + struct RoseContext *tctxt = &scratch->tctxt; + if (nocase) { + tctxt->ll_buf_nocase = found_buf; + tctxt->ll_len_nocase = found_sz; + } else { + tctxt->ll_buf = found_buf; + tctxt->ll_len = found_sz; + } +} + +static rose_inline +void loadLongLiteralState(const struct RoseEngine *t, char *state, + struct hs_scratch *scratch) { + if (!t->longLitTableOffset) { + return; + } + + // If we don't have any long literals in play, these values must point to + // the real history buffer so that CHECK_LITERAL instructions examine the + // history buffer. + scratch->tctxt.ll_buf = scratch->core_info.hbuf; + scratch->tctxt.ll_len = scratch->core_info.hlen; + scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf; + scratch->tctxt.ll_len_nocase = scratch->core_info.hlen; + + if (!scratch->core_info.hlen) { + return; + } + + const struct RoseLongLitTable *ll_table = + getByOffset(t, t->longLitTableOffset); + const u8 *ll_state = getLongLitState(t, state); + + u32 state_case; + u32 state_nocase; + loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase); + + DEBUG_PRINTF("loaded {%u, %u}\n", state_case, state_nocase); + + loadLongLiteralStateMode(scratch, ll_table, &ll_table->caseful, + state_case, 0); + loadLongLiteralStateMode(scratch, ll_table, &ll_table->nocase, + state_nocase, 1); +} + +static rose_inline +char confirmLongLiteral(const struct RoseLongLitTable *ll_table, + const struct hs_scratch *scratch, + const struct RoseLongLitHashEntry *ent, + const char nocase) { + assert(ent->str_offset + ent->str_len <= ll_table->size); + const u8 *s = (const u8 *)ll_table + ent->str_offset; + size_t len = ent->str_len; + const u8 *buf = scratch->core_info.buf; + const size_t buf_len = scratch->core_info.len; + + if (len > buf_len) { + const struct RoseContext *tctxt = &scratch->tctxt; + const u8 *hist = nocase ? tctxt->ll_buf_nocase : tctxt->ll_buf; + size_t hist_len = nocase ? tctxt->ll_len_nocase : tctxt->ll_len; + + if (len > buf_len + hist_len) { + return 0; // Break out - not enough total history + } + + size_t overhang = len - buf_len; + assert(overhang <= hist_len); + + if (cmpForward(hist + hist_len - overhang, s, overhang, nocase)) { + return 0; + } + s += overhang; + len -= overhang; + } + + // if we got here, we don't need history or we compared ok out of history + assert(len <= buf_len); + + if (cmpForward(buf + buf_len - len, s, len, nocase)) { + return 0; + } + + return 1; +} + +static rose_inline +const u8 *prepScanBuffer(const struct core_info *ci, + const struct RoseLongLitTable *ll_table, u8 *tempbuf) { + const u8 hash_len = ll_table->maxLen; + assert(hash_len >= LONG_LIT_HASH_LEN); + + // Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from + // location (end of buffer - hash_len). If this block can be satisfied + // entirely from either the current buffer or the history buffer, we pass + // in the pointer directly; otherwise we must make a copy. + + const u8 *base; + + if (hash_len > ci->len) { + size_t overhang = hash_len - ci->len; + if (overhang >= LONG_LIT_HASH_LEN) { + // Can read enough to hash from inside the history buffer. + assert(overhang <= ci->hlen); + base = ci->hbuf + ci->hlen - overhang; + } else { + // Copy: first chunk from history buffer. + assert(overhang <= ci->hlen); + copy_upto_32_bytes(tempbuf, ci->hbuf + ci->hlen - overhang, + overhang); + // Copy: second chunk from current buffer. + size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang; + assert(copy_buf_len <= ci->len); + copy_upto_32_bytes(tempbuf + overhang, ci->buf, copy_buf_len); + // Read from our temporary buffer for the hash. + base = tempbuf; + } + } else { + // Can read enough to hash from inside the current buffer. + base = ci->buf + ci->len - hash_len; + } + + return base; +} + +#ifndef NDEBUG +// Defensive checking (used in assert) that these table values don't overflow +// the range available. +static really_inline +char streamingTableOverflow(u32 state_case, u32 state_nocase, u8 ssb, + u8 ssb_nc) { + u32 ssb_mask = (1ULL << (ssb)) - 1; + if (state_case & ~ssb_mask) { + return 1; + } + u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1; + if (state_nocase & ~ssb_nc_mask) { + return 1; + } + return 0; +} +#endif + +// Reads from stream state table and packs values into stream state. +static rose_inline +void storeLongLitStreamState(const struct RoseLongLitTable *ll_table, + u8 *ll_state, u32 state_case, u32 state_nocase) { + assert(ll_table); + assert(ll_state); + + u8 ss_bytes = ll_table->streamStateBytes; + u8 ssb = ll_table->caseful.streamStateBits; + UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits; + assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8); + assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc)); + +#if defined(ARCH_32_BIT) + // On 32-bit hosts, we may be able to avoid having to do any u64a + // manipulation at all. + if (ss_bytes <= 4) { + u32 stagingStreamState = state_case; + stagingStreamState |= (state_nocase << ssb); + partial_store_u32(ll_state, stagingStreamState, ss_bytes); + return; + } +#endif + + u64a stagingStreamState = (u64a)state_case; + stagingStreamState |= (u64a)state_nocase << ssb; + partial_store_u64a(ll_state, stagingStreamState, ss_bytes); +} + +static really_inline +char has_bit(const u8 *data, u32 bit) { + return (data[bit / 8] >> (bit % 8)) & 1; +} + +static rose_inline +char bloomHasKey(const u8 *bloom, u32 bloom_mask, u32 hash) { + return has_bit(bloom, hash & bloom_mask); +} + +static rose_inline +char checkBloomFilter(const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub, + const u8 *scan_buf, char nocase) { + assert(ll_sub->bloomBits); + + const u8 *bloom = (const u8 *)ll_table + ll_sub->bloomOffset; + const u32 bloom_mask = (1U << ll_sub->bloomBits) - 1; + + char v = 1; + v &= bloomHasKey(bloom, bloom_mask, bloomHash_1(scan_buf, nocase)); + v &= bloomHasKey(bloom, bloom_mask, bloomHash_2(scan_buf, nocase)); + v &= bloomHasKey(bloom, bloom_mask, bloomHash_3(scan_buf, nocase)); + return v; +} + +/** + * \brief Look for a hit in the hash table. + * + * Returns zero if not found, otherwise returns (bucket + 1). + */ +static rose_inline +u32 checkHashTable(const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub, const u8 *scan_buf, + const struct hs_scratch *scratch, char nocase) { + const u32 nbits = ll_sub->hashBits; + assert(nbits && nbits < 32); + const u32 num_entries = 1U << nbits; + + const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub); + + u32 hash = hashLongLiteral(scan_buf, LONG_LIT_HASH_LEN, nocase); + u32 bucket = hash & ((1U << nbits) - 1); + + while (tab[bucket].str_offset != 0) { + DEBUG_PRINTF("checking bucket %u\n", bucket); + if (confirmLongLiteral(ll_table, scratch, &tab[bucket], nocase)) { + DEBUG_PRINTF("found hit for bucket %u\n", bucket); + return bucket + 1; + } + + if (++bucket == num_entries) { + bucket = 0; + } + } + + return 0; +} + +static rose_inline +void storeLongLiteralState(const struct RoseEngine *t, char *state, + struct hs_scratch *scratch) { + if (!t->longLitTableOffset) { + DEBUG_PRINTF("no table\n"); + return; + } + + struct core_info *ci = &scratch->core_info; + const struct RoseLongLitTable *ll_table = + getByOffset(t, t->longLitTableOffset); + assert(ll_table->maxLen); + + DEBUG_PRINTF("maxLen=%u, len=%zu, hlen=%zu\n", ll_table->maxLen, ci->len, + ci->hlen); + + u32 state_case = 0; + u32 state_nocase = 0; + + // If we don't have enough history, we don't need to do anything. + if (ll_table->maxLen <= ci->len + ci->hlen) { + u8 tempbuf[LONG_LIT_HASH_LEN]; + const u8 *scan_buf = prepScanBuffer(ci, ll_table, tempbuf); + + if (ll_table->caseful.hashBits && + checkBloomFilter(ll_table, &ll_table->caseful, scan_buf, 0)) { + state_case = checkHashTable(ll_table, &ll_table->caseful, scan_buf, + scratch, 0); + } + + if (ll_table->nocase.hashBits && + checkBloomFilter(ll_table, &ll_table->nocase, scan_buf, 1)) { + state_nocase = checkHashTable(ll_table, &ll_table->nocase, scan_buf, + scratch, 1); + } + } else { + DEBUG_PRINTF("not enough history (%zu bytes)\n", ci->len + ci->hlen); + } + + DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase); + + u8 *ll_state = getLongLitState(t, state); + storeLongLitStreamState(ll_table, ll_state, state_case, state_nocase); +} + +#endif // STREAM_LONG_LIT_H diff --git a/src/rose/stream_long_lit_hash.h b/src/rose/stream_long_lit_hash.h new file mode 100644 index 00000000..041f05e6 --- /dev/null +++ b/src/rose/stream_long_lit_hash.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STREAM_LONG_LIT_HASH_H +#define STREAM_LONG_LIT_HASH_H + +#include "ue2common.h" +#include "util/bitutils.h" +#include "util/unaligned.h" + +/** \brief Length of the buffer operated on by \ref hashLongLiteral(). */ +#define LONG_LIT_HASH_LEN 24 + +/** \brief Multiplier used by al the hash functions below. */ +#define HASH_MULTIPLIER 0x0b4e0ef37bc32127ULL + +/** \brief Hash function used for long literal table in streaming mode. */ +static really_inline +u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) { + // We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this + // hash are for strings longer than this. + assert(len >= 24); + + u64a v1 = unaligned_load_u64a(ptr); + u64a v2 = unaligned_load_u64a(ptr + 8); + u64a v3 = unaligned_load_u64a(ptr + 16); + if (nocase) { + v1 &= OCTO_CASE_CLEAR; + v2 &= OCTO_CASE_CLEAR; + v3 &= OCTO_CASE_CLEAR; + } + v1 *= HASH_MULTIPLIER; + v2 *= HASH_MULTIPLIER * HASH_MULTIPLIER; + v3 *= HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER; + v1 >>= 32; + v2 >>= 32; + v3 >>= 32; + return v1 ^ v2 ^ v3; +} + +/** + * \brief Internal, used by the bloom filter hash functions below. Hashes 16 + * bytes beginning at (ptr + offset). + */ +static really_inline +u32 bloomHash_i(const u8 *ptr, u32 offset, u64a multiplier, char nocase) { + assert(offset + 16 <= LONG_LIT_HASH_LEN); + + u64a v = unaligned_load_u64a(ptr + offset); + if (nocase) { + v &= OCTO_CASE_CLEAR; + } + v *= multiplier; + return v >> 32; +} + +/* + * We ensure that we see every byte the first LONG_LIT_HASH_LEN bytes of input + * data (using at least one of the following functions). + */ + +static really_inline +u32 bloomHash_1(const u8 *ptr, char nocase) { + const u64a multiplier = HASH_MULTIPLIER; + return bloomHash_i(ptr, 0, multiplier, nocase); +} + +static really_inline +u32 bloomHash_2(const u8 *ptr, char nocase) { + const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER; + return bloomHash_i(ptr, 4, multiplier, nocase); +} + +static really_inline +u32 bloomHash_3(const u8 *ptr, char nocase) { + const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER; + return bloomHash_i(ptr, 8, multiplier, nocase); +} + +#endif // STREAM_LONG_LIT_HASH_H diff --git a/src/rose/validate_mask.h b/src/rose/validate_mask.h index b2c2f5d6..ac8cc312 100644 --- a/src/rose/validate_mask.h +++ b/src/rose/validate_mask.h @@ -26,7 +26,22 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#ifndef VALIDATE_MASK_H +#define VALIDATE_MASK_H + #include "ue2common.h" +#include "util/simd_utils.h" + +#if defined(DEBUG) +static +void validateMask32Print(const u8 *mask) { + int i; + for (i = 0; i < 32; i++) { + printf("%02x", mask[i]); + } + printf("\n"); +} +#endif // check positive bytes in cmp_result. // return one if the check passed, zero otherwise. @@ -75,3 +90,29 @@ int validateMask(u64a data, u64a valid_data_mask, u64a and_mask, return 0; } } + +static really_inline +int validateMask32(const m256 data, const u32 valid_data_mask, + const m256 and_mask, const m256 cmp_mask, + const u32 neg_mask) { + m256 cmp_result_256 = eq256(and256(data, and_mask), cmp_mask); + u32 cmp_result = ~movemask256(cmp_result_256); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + validateMask32Print((const u8 *)&data); + DEBUG_PRINTF("cmp_result\n"); + validateMask32Print((const u8 *)&cmp_result_256); +#endif + DEBUG_PRINTF("cmp_result %08x neg_mask %08x\n", cmp_result, neg_mask); + DEBUG_PRINTF("valid_data_mask %08x\n", valid_data_mask); + + if ((cmp_result & valid_data_mask) == (neg_mask & valid_data_mask)) { + DEBUG_PRINTF("checkCompareResult32 passed\n"); + return 1; + } else { + DEBUG_PRINTF("checkCompareResult32 failed\n"); + return 0; + } +} + +#endif diff --git a/src/rose/validate_shufti.h b/src/rose/validate_shufti.h new file mode 100644 index 00000000..49d2c2fe --- /dev/null +++ b/src/rose/validate_shufti.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef VALIDATE_SHUFTI_H +#define VALIDATE_SHUFTI_H + +#include "ue2common.h" +#include "util/simd_utils.h" + +#if defined(DEBUG) +static +void dumpMask(const void *mask, int len) { + const u8 *c = (const u8 *)mask; + for (int i = 0; i < len; i++) { + printf("%02x", c[i]); + } + printf("\n"); +} +#endif + +static really_inline +int validateShuftiMask16x16(const m256 data, const m256 hi_mask, + const m256 lo_mask, const m256 and_mask, + const u32 neg_mask, const u16 valid_data_mask) { + m256 low4bits = set32x8(0xf); + m256 c_lo = vpshufb(lo_mask, and256(data, low4bits)); + m256 c_hi = vpshufb(hi_mask, rshift64_m256(andnot256(low4bits, data), 4)); + m256 t = and256(c_lo, c_hi); + u32 nresult = movemask256(eq256(and256(t, and_mask), zeroes256())); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data, 32); + DEBUG_PRINTF("hi_mask\n"); + dumpMask(&hi_mask, 32); + DEBUG_PRINTF("lo_mask\n"); + dumpMask(&lo_mask, 32); + DEBUG_PRINTF("c_lo\n"); + dumpMask(&c_lo, 32); + DEBUG_PRINTF("c_hi\n"); + dumpMask(&c_hi, 32); + DEBUG_PRINTF("and_mask\n"); + dumpMask(&and_mask, 32); + DEBUG_PRINTF("nresult %x\n", nresult); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); +#endif + u32 cmp_result = (((nresult >> 16) & nresult) ^ neg_mask) & valid_data_mask; + return !cmp_result; +} + +static really_inline +int validateShuftiMask16x8(const m128 data, const m256 nib_mask, + const m128 and_mask, const u32 neg_mask, + const u16 valid_data_mask) { + m256 data_m256 = combine2x128(rshift64_m128(data, 4), data); + m256 low4bits = set32x8(0xf); + m256 c_nib = vpshufb(nib_mask, and256(data_m256, low4bits)); + m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib)); + m128 nresult = eq128(and128(t, and_mask), zeroes128()); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data_m256, 32); + DEBUG_PRINTF("nib_mask\n"); + dumpMask(&nib_mask, 32); + DEBUG_PRINTF("c_nib\n"); + dumpMask(&c_nib, 32); + DEBUG_PRINTF("nresult\n"); + dumpMask(&nresult, 16); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); +#endif + u32 cmp_result = (movemask128(nresult) ^ neg_mask) & valid_data_mask; + return !cmp_result; +} + +static really_inline +int validateShuftiMask32x8(const m256 data, const m256 hi_mask, + const m256 lo_mask, const m256 and_mask, + const u32 neg_mask, const u32 valid_data_mask) { + m256 low4bits = set32x8(0xf); + m256 c_lo = vpshufb(lo_mask, and256(data, low4bits)); + m256 c_hi = vpshufb(hi_mask, rshift64_m256(andnot256(low4bits, data), 4)); + m256 t = and256(c_lo, c_hi); + m256 nresult = eq256(and256(t, and_mask), zeroes256()); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data, 32); + DEBUG_PRINTF("hi_mask\n"); + dumpMask(&hi_mask, 32); + DEBUG_PRINTF("lo_mask\n"); + dumpMask(&lo_mask, 32); + DEBUG_PRINTF("c_lo\n"); + dumpMask(&c_lo, 32); + DEBUG_PRINTF("c_hi\n"); + dumpMask(&c_hi, 32); + DEBUG_PRINTF("nresult\n"); + dumpMask(&nresult, 32); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); +#endif + u32 cmp_result = (movemask256(nresult) ^ neg_mask) & valid_data_mask; + return !cmp_result; +} + +static really_inline +int validateShuftiMask32x16(const m256 data, + const m256 hi_mask_1, const m256 hi_mask_2, + const m256 lo_mask_1, const m256 lo_mask_2, + const m256 bucket_mask_hi, + const m256 bucket_mask_lo, const u32 neg_mask, + const u32 valid_data_mask) { + m256 low4bits = set32x8(0xf); + m256 data_lo = and256(data, low4bits); + m256 data_hi = and256(rshift64_m256(data, 4), low4bits); + m256 c_lo_1 = vpshufb(lo_mask_1, data_lo); + m256 c_lo_2 = vpshufb(lo_mask_2, data_lo); + m256 c_hi_1 = vpshufb(hi_mask_1, data_hi); + m256 c_hi_2 = vpshufb(hi_mask_2, data_hi); + m256 t1 = and256(c_lo_1, c_hi_1); + m256 t2 = and256(c_lo_2, c_hi_2); + m256 result = or256(and256(t1, bucket_mask_lo), and256(t2, bucket_mask_hi)); + u32 nresult = movemask256(eq256(result, zeroes256())); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data, 32); + DEBUG_PRINTF("data_lo\n"); + dumpMask(&data_lo, 32); + DEBUG_PRINTF("data_hi\n"); + dumpMask(&data_hi, 32); + DEBUG_PRINTF("hi_mask_1\n"); + dumpMask(&hi_mask_1, 16); + DEBUG_PRINTF("hi_mask_2\n"); + dumpMask(&hi_mask_2, 16); + DEBUG_PRINTF("lo_mask_1\n"); + dumpMask(&lo_mask_1, 16); + DEBUG_PRINTF("lo_mask_2\n"); + dumpMask(&lo_mask_2, 16); + DEBUG_PRINTF("c_lo_1\n"); + dumpMask(&c_lo_1, 32); + DEBUG_PRINTF("c_lo_2\n"); + dumpMask(&c_lo_2, 32); + DEBUG_PRINTF("c_hi_1\n"); + dumpMask(&c_hi_1, 32); + DEBUG_PRINTF("c_hi_2\n"); + dumpMask(&c_hi_2, 32); + DEBUG_PRINTF("result\n"); + dumpMask(&result, 32); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); +#endif + u32 cmp_result = (nresult ^ neg_mask) & valid_data_mask; + return !cmp_result; +} +#endif diff --git a/src/runtime.c b/src/runtime.c index e761acc2..88e866dc 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -55,7 +55,6 @@ #include "state.h" #include "ue2common.h" #include "util/exhaust.h" -#include "util/fatbit.h" #include "util/multibit.h" static really_inline @@ -291,12 +290,12 @@ void runSmallWriteEngine(const struct SmallWriteEngine *smwr, if (nfa->type == MCCLELLAN_NFA_8) { nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer, local_alen, roseReportAdaptor, scratch); - } else if (nfa->type == MCCLELLAN_NFA_16){ + } else if (nfa->type == MCCLELLAN_NFA_16) { nfaExecMcClellan16_B(nfa, smwr->start_offset, local_buffer, local_alen, roseReportAdaptor, scratch); } else { - nfaExecSheng0_B(nfa, smwr->start_offset, local_buffer, - local_alen, roseReportAdaptor, scratch); + nfaExecSheng_B(nfa, smwr->start_offset, local_buffer, + local_alen, roseReportAdaptor, scratch); } } @@ -736,20 +735,11 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, assert(scratch); assert(!can_stop_matching(scratch)); - char *state = getMultiState(stream_state); - const struct RoseEngine *rose = stream_state->rose; const struct HWLM *ftable = getFLiteralMatcher(rose); size_t len2 = scratch->core_info.len; - u8 *hwlm_stream_state; - if (rose->floatingStreamState) { - hwlm_stream_state = getFloatingMatcherState(rose, state); - } else { - hwlm_stream_state = NULL; - } - DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n", stream_state->offset, scratch->core_info.len); @@ -761,8 +751,8 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, // start the match region at zero. const size_t start = 0; - hwlmExecStreaming(ftable, scratch, len2, start, roseCallback, - scratch, rose->initialGroups, hwlm_stream_state); + hwlmExecStreaming(ftable, scratch, len2, start, roseCallback, scratch, + rose->initialGroups); if (!told_to_stop_matching(scratch) && isAllExhausted(rose, scratch->core_info.exhaustionVector)) { diff --git a/src/scratch.c b/src/scratch.c index dae2c672..8cbe9760 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -43,17 +43,19 @@ #include "nfa/nfa_api_queue.h" #include "rose/rose_internal.h" #include "util/fatbit.h" -#include "util/multibit.h" /** * Determine the space required for a correctly aligned array of fatbit * structure, laid out as: * * - an array of num_entries pointers, each to a fatbit. - * - an array of fatbit structures, each of size fatbit_size(num_keys). + * - an array of fatbit structures, each of size fatbit_len. + * + * fatbit_len should have been determined at compile time, via the + * fatbit_size() call. */ static -size_t fatbit_array_size(u32 num_entries, u32 num_keys) { +size_t fatbit_array_size(u32 num_entries, u32 fatbit_len) { size_t len = 0; // Array of pointers to each fatbit entry. @@ -61,7 +63,7 @@ size_t fatbit_array_size(u32 num_entries, u32 num_keys) { // Fatbit entries themselves. len = ROUNDUP_N(len, alignof(struct fatbit)); - len += (size_t)fatbit_size(num_keys) * num_entries; + len += (size_t)fatbit_len * num_entries; return ROUNDUP_N(len, 8); // Round up for potential padding. } @@ -71,17 +73,19 @@ size_t fatbit_array_size(u32 num_entries, u32 num_keys) { static hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { u32 queueCount = proto->queueCount; - u32 deduperCount = proto->deduper.log_size; + u32 activeQueueArraySize = proto->activeQueueArraySize; + u32 deduperCount = proto->deduper.dkey_count; + u32 deduperLogSize = proto->deduper.log_size; u32 bStateSize = proto->bStateSize; u32 tStateSize = proto->tStateSize; u32 fullStateSize = proto->fullStateSize; u32 anchored_literal_region_len = proto->anchored_literal_region_len; - u32 anchored_literal_region_width = proto->anchored_literal_count; + u32 anchored_literal_fatbit_size = proto->anchored_literal_fatbit_size; u32 som_store_size = proto->som_store_count * sizeof(u64a); u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a); - u32 som_now_size = fatbit_size(proto->som_store_count); - u32 som_attempted_size = fatbit_size(proto->som_store_count); + u32 som_now_size = proto->som_fatbit_size; + u32 som_attempted_size = proto->som_fatbit_size; struct hs_scratch *s; struct hs_scratch *s_tmp; @@ -91,18 +95,18 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { assert(anchored_literal_region_len < 8 * sizeof(s->al_log_sum)); size_t anchored_literal_region_size = fatbit_array_size( - anchored_literal_region_len, anchored_literal_region_width); + anchored_literal_region_len, proto->anchored_literal_fatbit_size); size_t delay_region_size = - fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_count); + fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_fatbit_size); // the size is all the allocated stuff, not including the struct itself size_t size = queue_size + 63 + bStateSize + tStateSize + fullStateSize + 63 /* cacheline padding */ - + fatbit_size(proto->handledKeyCount) /* handled roles */ - + fatbit_size(queueCount) /* active queue array */ - + 2 * fatbit_size(deduperCount) /* need odd and even logs */ - + 2 * fatbit_size(deduperCount) /* ditto som logs */ + + proto->handledKeyFatbitSize /* handled roles */ + + activeQueueArraySize /* active queue array */ + + 2 * deduperLogSize /* need odd and even logs */ + + 2 * deduperLogSize /* ditto som logs */ + 2 * sizeof(u64a) * deduperCount /* start offsets for som */ + anchored_literal_region_size + qmpq_size + delay_region_size @@ -157,7 +161,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { for (u32 i = 0; i < DELAY_SLOT_COUNT; i++) { s->delay_slots[i] = (struct fatbit *)current; assert(ISALIGNED(s->delay_slots[i])); - current += fatbit_size(proto->delay_count); + current += proto->delay_fatbit_size; } current = ROUNDUP_PTR(current, alignof(struct fatbit *)); @@ -167,7 +171,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { for (u32 i = 0; i < anchored_literal_region_len; i++) { s->al_log[i] = (struct fatbit *)current; assert(ISALIGNED(s->al_log[i])); - current += fatbit_size(anchored_literal_region_width); + current += anchored_literal_fatbit_size; } current = ROUNDUP_PTR(current, 8); @@ -193,22 +197,22 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { assert(ISALIGNED_N(current, 8)); s->aqa = (struct fatbit *)current; - current += fatbit_size(queueCount); + current += activeQueueArraySize; s->handled_roles = (struct fatbit *)current; - current += fatbit_size(proto->handledKeyCount); + current += proto->handledKeyFatbitSize; s->deduper.log[0] = (struct fatbit *)current; - current += fatbit_size(deduperCount); + current += deduperLogSize; s->deduper.log[1] = (struct fatbit *)current; - current += fatbit_size(deduperCount); + current += deduperLogSize; s->deduper.som_log[0] = (struct fatbit *)current; - current += fatbit_size(deduperCount); + current += deduperLogSize; s->deduper.som_log[1] = (struct fatbit *)current; - current += fatbit_size(deduperCount); + current += deduperLogSize; s->som_set_now = (struct fatbit *)current; current += som_now_size; @@ -293,19 +297,19 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { proto->anchored_literal_region_len = rose->anchoredDistance; } - if (rose->anchored_count > proto->anchored_literal_count) { + if (rose->anchored_fatbit_size > proto->anchored_literal_fatbit_size) { resize = 1; - proto->anchored_literal_count = rose->anchored_count; + proto->anchored_literal_fatbit_size = rose->anchored_fatbit_size; } - if (rose->delay_count > proto->delay_count) { + if (rose->delay_fatbit_size > proto->delay_fatbit_size) { resize = 1; - proto->delay_count = rose->delay_count; + proto->delay_fatbit_size = rose->delay_fatbit_size; } - if (rose->handledKeyCount > proto->handledKeyCount) { + if (rose->handledKeyFatbitSize > proto->handledKeyFatbitSize) { resize = 1; - proto->handledKeyCount = rose->handledKeyCount; + proto->handledKeyFatbitSize = rose->handledKeyFatbitSize; } if (rose->tStateSize > proto->tStateSize) { @@ -319,12 +323,22 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { proto->som_store_count = som_store_count; } + if (rose->somLocationFatbitSize > proto->som_fatbit_size) { + resize = 1; + proto->som_fatbit_size = rose->somLocationFatbitSize; + } + u32 queueCount = rose->queueCount; if (queueCount > proto->queueCount) { resize = 1; proto->queueCount = queueCount; } + if (rose->activeQueueArraySize > proto->activeQueueArraySize) { + resize = 1; + proto->activeQueueArraySize = rose->activeQueueArraySize; + } + u32 bStateSize = 0; if (rose->mode == HS_MODE_BLOCK) { bStateSize = rose->stateOffsets.end; @@ -344,9 +358,10 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { proto->fullStateSize = fullStateSize; } - if (rose->dkeyCount > proto->deduper.log_size) { + if (rose->dkeyCount > proto->deduper.dkey_count) { resize = 1; - proto->deduper.log_size = rose->dkeyCount; + proto->deduper.dkey_count = rose->dkeyCount; + proto->deduper.log_size = rose->dkeyLogSize; } if (resize) { diff --git a/src/scratch.h b/src/scratch.h index a2f02503..b59dc8d4 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -45,7 +45,7 @@ extern "C" #endif UNUSED static const u32 SCRATCH_MAGIC = 0x544F4259; -#define FDR_TEMP_BUF_SIZE 220 +#define FDR_TEMP_BUF_SIZE 222 struct fatbit; struct hs_scratch; @@ -122,12 +122,33 @@ struct RoseContext { u32 filledDelayedSlots; u32 curr_qi; /**< currently executing main queue index during * \ref nfaQueueExec */ + + /** + * \brief Buffer for caseful long literal support, used in streaming mode + * only. + * + * If a long literal prefix was at the end of the buffer at the end of a + * stream write, then the long lit table hashes it and stores the result in + * stream state. At the start of the next write, this value is used to set + * this buffer to the matching prefix string (stored in the bytecode. + */ + const u8 *ll_buf; + + /** \brief Length in bytes of the string pointed to by ll_buf. */ + size_t ll_len; + + /** \brief Caseless version of ll_buf. */ + const u8 *ll_buf_nocase; + + /** \brief Length in bytes of the string pointed to by ll_buf_nocase. */ + size_t ll_len_nocase; }; struct match_deduper { struct fatbit *log[2]; /**< even, odd logs */ struct fatbit *som_log[2]; /**< even, odd fatbit logs for som */ u64a *som_start_log[2]; /**< even, odd start offset logs for som */ + u32 dkey_count; u32 log_size; u64a current_report_offset; u8 som_log_dirty; @@ -142,6 +163,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 magic; u8 in_use; /**< non-zero when being used by an API call. */ u32 queueCount; + u32 activeQueueArraySize; /**< size of active queue array fatbit in bytes */ u32 bStateSize; /**< sizeof block mode states */ u32 tStateSize; /**< sizeof transient rose states */ u32 fullStateSize; /**< size of uncompressed nfa state */ @@ -159,7 +181,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { struct core_info core_info; struct match_deduper deduper; u32 anchored_literal_region_len; - u32 anchored_literal_count; + u32 anchored_literal_fatbit_size; /**< size of each anch fatbit in bytes */ struct fatbit *handled_roles; /**< fatbit of ROLES (not states) already * handled by this literal */ u64a *som_store; /**< array of som locations */ @@ -171,8 +193,9 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { * location had been writable */ u64a som_set_now_offset; /**< offset at which som_set_now represents */ u32 som_store_count; - u32 handledKeyCount; - u32 delay_count; + u32 som_fatbit_size; /**< size of som location fatbit structures in bytes */ + u32 handledKeyFatbitSize; /**< size of handled_keys fatbit in bytes */ + u32 delay_fatbit_size; /**< size of each delay fatbit in bytes */ u32 scratchSize; char *scratch_alloc; /* user allocated scratch object */ u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE]; diff --git a/src/scratch_dump.cpp b/src/scratch_dump.cpp index 78a854bb..47c93c37 100644 --- a/src/scratch_dump.cpp +++ b/src/scratch_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,7 +32,7 @@ #include "scratch_dump.h" #include "hs_internal.h" #include "ue2common.h" -#include "util/multibit_internal.h" +#include "util/multibit_build.h" #include "nfa/nfa_api_queue.h" #include "rose/rose_internal.h" @@ -54,12 +54,11 @@ void dumpScratch(const struct hs_scratch *s, FILE *f) { fprintf(f, " queues : %zu bytes\n", s->queueCount * sizeof(struct mq)); fprintf(f, " bStateSize : %u bytes\n", s->bStateSize); - fprintf(f, " active queue array : %u bytes\n", - mmbit_size(s->queueCount)); + fprintf(f, " active queue array : %u bytes\n", s->activeQueueArraySize); fprintf(f, " qmpq : %zu bytes\n", s->queueCount * sizeof(struct queue_match)); fprintf(f, " delay info : %u bytes\n", - mmbit_size(s->delay_count) * DELAY_SLOT_COUNT); + s->delay_fatbit_size * DELAY_SLOT_COUNT); } } // namespace ue2 diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 90770ba5..108bca8a 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -30,16 +30,18 @@ #include "grey.h" #include "ue2common.h" +#include "nfa/dfa_min.h" #include "nfa/mcclellancompile.h" #include "nfa/mcclellancompile_util.h" #include "nfa/nfa_internal.h" #include "nfa/rdfa_merge.h" #include "nfa/shengcompile.h" #include "nfagraph/ng.h" +#include "nfagraph/ng_depth.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_mcclellan.h" +#include "nfagraph/ng_prune.h" #include "nfagraph/ng_util.h" -#include "nfagraph/ng_width.h" #include "smallwrite/smallwrite_internal.h" #include "util/alloc.h" #include "util/charreach.h" @@ -101,6 +103,74 @@ SmallWriteBuildImpl::SmallWriteBuildImpl(size_t num_patterns, || num_patterns > cc.grey.smallWriteMaxPatterns) { } +/** + * \brief Remove any reports from the given vertex that cannot match within + * max_depth due to their constraints. + */ +static +bool pruneOverlongReports(NFAVertex v, NGHolder &g, const depth &max_depth, + const ReportManager &rm) { + assert(!g[v].reports.empty()); + + vector bad_reports; + + for (ReportID id : g[v].reports) { + const auto &report = rm.getReport(id); + if (report.minOffset > max_depth) { + bad_reports.push_back(id); + } + } + + for (ReportID id : bad_reports) { + g[v].reports.erase(id); + } + + if (g[v].reports.empty()) { + DEBUG_PRINTF("none of vertex %zu's reports can match, cut accepts\n", + g[v].index); + remove_edge(v, g.accept, g); + remove_edge(v, g.acceptEod, g); + } + + return !bad_reports.empty(); +} + +/** + * \brief Prune vertices and reports from the graph that cannot match within + * max_depth. + */ +static +bool pruneOverlong(NGHolder &g, const depth &max_depth, + const ReportManager &rm) { + bool modified = false; + std::vector depths; + calcDepths(g, depths); + + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + const auto &d = depths.at(g[v].index); + depth min_depth = min(d.fromStart.min, d.fromStartDotStar.min); + if (min_depth > max_depth) { + clear_vertex(v, g); + modified = true; + continue; + } + + if (is_match_vertex(v, g)) { + modified |= pruneOverlongReports(v, g, max_depth, rm); + } + } + + if (modified) { + pruneUseless(g); + DEBUG_PRINTF("pruned graph down to %zu vertices\n", num_vertices(g)); + } + + return modified; +} + void SmallWriteBuildImpl::add(const NGWrapper &w) { // If the graph is poisoned (i.e. we can't build a SmallWrite version), // we don't even try. @@ -118,13 +188,12 @@ void SmallWriteBuildImpl::add(const NGWrapper &w) { // make a copy of the graph so that we can modify it for our purposes unique_ptr h = cloneHolder(w); + pruneOverlong(*h, depth(cc.grey.smallWriteLargestBuffer), rm); + reduceGraph(*h, SOM_NONE, w.utf8, cc); - // If the earliest match location is outside the small write region, - // then we don't need to build a SmallWrite version. - // However, we don't poison this case either, since it is simply a case, - // where we know the resulting graph won't match. - if (findMinWidth(*h) > depth(cc.grey.smallWriteLargestBuffer)) { + if (can_never_match(*h)) { + DEBUG_PRINTF("graph can never match in small block\n"); return; } @@ -140,7 +209,9 @@ void SmallWriteBuildImpl::add(const NGWrapper &w) { return; } - prune_overlong(*r, cc.grey.smallWriteLargestBuffer); + if (prune_overlong(*r, cc.grey.smallWriteLargestBuffer)) { + minimize_hopcroft(*r, cc.grey); + } if (rdfa) { // do a merge of the new dfa with the existing dfa @@ -350,6 +421,7 @@ aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, return nullptr; } if (prune_overlong(rdfa, *small_region - *start_offset)) { + minimize_hopcroft(rdfa, cc.grey); if (rdfa.start_anchored == DEAD_STATE) { DEBUG_PRINTF("all patterns pruned out\n"); return nullptr; diff --git a/src/smallwrite/smallwrite_dump.cpp b/src/smallwrite/smallwrite_dump.cpp index 0db97df5..bdf55c30 100644 --- a/src/smallwrite/smallwrite_dump.cpp +++ b/src/smallwrite/smallwrite_dump.cpp @@ -70,18 +70,11 @@ void smwrDumpNFA(const SmallWriteEngine *smwr, bool dump_raw, } const struct NFA *n = getSmwrNfa(smwr); - FILE *f; - f = fopen((base + "smallwrite_nfa.dot").c_str(), "w"); - nfaDumpDot(n, f, base); - fclose(f); - - f = fopen((base + "smallwrite_nfa.txt").c_str(), "w"); - nfaDumpText(n, f); - fclose(f); + nfaGenerateDumpFiles(n, base + "smallwrite_nfa"); if (dump_raw) { - f = fopen((base + "smallwrite_nfa.raw").c_str(), "w"); + FILE *f = fopen((base + "smallwrite_nfa.raw").c_str(), "w"); fwrite(n, 1, n->length, f); fclose(f); } diff --git a/src/som/slot_manager.h b/src/som/slot_manager.h index 9de78f44..971ea362 100644 --- a/src/som/slot_manager.h +++ b/src/som/slot_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,7 @@ #define SLOT_MANAGER_H #include "ue2common.h" -#include "nfagraph/ng_graph.h" +#include "nfagraph/ng_holder.h" #include "util/alloc.h" #include "util/ue2_containers.h" diff --git a/src/util/bitutils.h b/src/util/bitutils.h index 6f1bcd09..d144e879 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -70,6 +70,7 @@ #define CASE_BIT 0x20 #define CASE_CLEAR 0xdf #define DOUBLE_CASE_CLEAR 0xdfdf +#define OCTO_CASE_CLEAR 0xdfdfdfdfdfdfdfdfULL static really_inline u32 clz32(u32 x) { @@ -470,4 +471,55 @@ u32 rank_in_mask64(u64a mask, u32 bit) { return popcount64(mask); } +#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) +#define HAVE_PEXT +#endif + +static really_inline +u32 pext32(u32 x, u32 mask) { +#if defined(HAVE_PEXT) + // Intel BMI2 can do this operation in one instruction. + return _pext_u32(x, mask); +#else + + u32 result = 0, num = 1; + while (mask != 0) { + u32 bit = findAndClearLSB_32(&mask); + if (x & (1U << bit)) { + assert(num != 0); // more than 32 bits! + result |= num; + } + num <<= 1; + } + return result; +#endif +} + +static really_inline +u64a pext64(u64a x, u64a mask) { +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) + // Intel BMI2 can do this operation in one instruction. + return _pext_u64(x, mask); +#else + + u32 result = 0, num = 1; + while (mask != 0) { + u32 bit = findAndClearLSB_64(&mask); + if (x & (1ULL << bit)) { + assert(num != 0); // more than 32 bits! + result |= num; + } + num <<= 1; + } + return result; +#endif +} + +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +static really_inline +u64a pdep64(u64a x, u64a mask) { + return _pdep_u64(x, mask); +} +#endif + #endif // BITUTILS_H diff --git a/src/util/clique.cpp b/src/util/clique.cpp index ea22779c..79f06932 100644 --- a/src/util/clique.cpp +++ b/src/util/clique.cpp @@ -103,7 +103,7 @@ bool graph_empty(const Graph &g) { } vector> removeClique(CliqueGraph &cg) { - DEBUG_PRINTF("graph size:%lu\n", num_vertices(cg)); + DEBUG_PRINTF("graph size:%zu\n", num_vertices(cg)); vector> cliquesVec = {findCliqueGroup(cg)}; while (!graph_empty(cg)) { const vector &c = cliquesVec.back(); diff --git a/src/util/container.h b/src/util/container.h index 63e27743..e2cfb485 100644 --- a/src/util/container.h +++ b/src/util/container.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,6 +41,7 @@ #include #include #include +#include namespace ue2 { @@ -78,7 +79,9 @@ void insert(C *container, typename C::iterator pos, const D &donor) { } /** - * \brief Constructs a vector from a range bounded by the given pair of iterators. */ + * \brief Constructs a vector from a range bounded by the given pair of + * iterators. + */ template auto make_vector_from(const std::pair &range) -> std::vector { diff --git a/src/util/copybytes.h b/src/util/copybytes.h new file mode 100644 index 00000000..872b8d28 --- /dev/null +++ b/src/util/copybytes.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef COPY_BYTES_H +#define COPY_BYTES_H + +#include "unaligned.h" +#include "simd_utils.h" + +static really_inline +void copy_upto_32_bytes(u8 *dst, const u8 *src, unsigned int len) { + switch (len) { + case 0: + break; + case 1: + *dst = *src; + break; + case 2: + unaligned_store_u16(dst, unaligned_load_u16(src)); + break; + case 3: + unaligned_store_u16(dst, unaligned_load_u16(src)); + dst[2] = src[2]; + break; + case 4: + unaligned_store_u32(dst, unaligned_load_u32(src)); + break; + case 5: + case 6: + case 7: + unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4)); + unaligned_store_u32(dst, unaligned_load_u32(src)); + break; + case 8: + unaligned_store_u64a(dst, unaligned_load_u64a(src)); + break; + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8)); + unaligned_store_u64a(dst, unaligned_load_u64a(src)); + break; + case 16: + storeu128(dst, loadu128(src)); + break; + case 32: + storeu256(dst, loadu256(src)); + break; + default: + assert(len < 32); + storeu128(dst + len - 16, loadu128(src + len - 16)); + storeu128(dst, loadu128(src)); + break; + } +} + +#endif diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c index 9a8bd922..dba147ee 100644 --- a/src/util/cpuid_flags.c +++ b/src/util/cpuid_flags.c @@ -40,12 +40,14 @@ #define SSSE3 (1 << 9) #define SSE4_1 (1 << 19) #define SSE4_2 (1 << 20) +#define POPCNT (1 << 23) #define XSAVE (1 << 27) #define AVX (1 << 28) // EDX +#define FXSAVE (1 << 24) #define SSE (1 << 25) -#define SSE2 (1 << 25) +#define SSE2 (1 << 26) #define HTT (1 << 28) // Structured Extended Feature Flags Enumeration Leaf ECX values @@ -87,7 +89,6 @@ u64a xgetbv(u32 op) { #endif } -static int check_avx2(void) { #if defined(__INTEL_COMPILER) return _may_i_use_cpu_feature(_FEATURE_AVX2); @@ -137,6 +138,24 @@ u64a cpuid_flags(void) { return cap; } +int check_ssse3(void) { + unsigned int eax, ebx, ecx, edx; + cpuid(1, 0, &eax, &ebx, &ecx, &edx); + return !!(ecx & SSSE3); +} + +int check_sse42(void) { + unsigned int eax, ebx, ecx, edx; + cpuid(1, 0, &eax, &ebx, &ecx, &edx); + return !!(ecx & SSE4_2); +} + +int check_popcnt(void) { + unsigned int eax, ebx, ecx, edx; + cpuid(1, 0, &eax, &ebx, &ecx, &edx); + return !!(ecx & POPCNT); +} + struct family_id { u32 full_family; u32 full_model; diff --git a/src/util/cpuid_flags.h b/src/util/cpuid_flags.h index 2df97ab5..8b23d495 100644 --- a/src/util/cpuid_flags.h +++ b/src/util/cpuid_flags.h @@ -41,6 +41,11 @@ u64a cpuid_flags(void); u32 cpuid_tune(void); +int check_avx2(void); +int check_ssse3(void); +int check_sse42(void); +int check_popcnt(void); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/util/dump_charclass.cpp b/src/util/dump_charclass.cpp index 74b45414..4c159ec2 100644 --- a/src/util/dump_charclass.cpp +++ b/src/util/dump_charclass.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -249,6 +249,15 @@ string describeClass(const CharReach &cr, size_t maxLength, return oss.str(); } +string describeClasses(const std::vector &v, size_t maxClassLength, + enum cc_output_t out_type) { + std::ostringstream oss; + for (const auto &cr : v) { + describeClass(oss, cr, maxClassLength, out_type); + } + return oss.str(); +} + // C stdio wrapper void describeClass(FILE *f, const CharReach &cr, size_t maxLength, enum cc_output_t out_type) { diff --git a/src/util/dump_charclass.h b/src/util/dump_charclass.h index 9c3362bc..45b707f1 100644 --- a/src/util/dump_charclass.h +++ b/src/util/dump_charclass.h @@ -38,6 +38,7 @@ #include #include #include +#include namespace ue2 { @@ -54,6 +55,10 @@ void describeClass(std::ostream &os, const CharReach &cr, size_t maxLength = 16, std::string describeClass(const CharReach &cr, size_t maxLength = 16, enum cc_output_t out_type = CC_OUT_TEXT); +std::string describeClasses(const std::vector &v, + size_t maxClassLength = 16, + enum cc_output_t out_type = CC_OUT_TEXT); + void describeClass(FILE *f, const CharReach &cr, size_t maxLength, enum cc_output_t out_type); diff --git a/src/util/dump_util.cpp b/src/util/dump_util.cpp new file mode 100644 index 00000000..5b961367 --- /dev/null +++ b/src/util/dump_util.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "dump_util.h" + +#include +#include + +using namespace std; + +FILE *fopen_or_throw(const char *path, const char *mode) { + FILE *f = fopen(path, mode); + if (!f) { + throw runtime_error(string("Unable to open file: ") + path); + } + return f; +} diff --git a/src/util/dump_util.h b/src/util/dump_util.h new file mode 100644 index 00000000..487d2e7c --- /dev/null +++ b/src/util/dump_util.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DUMP_UTIL +#define DUMP_UTIL + +#include + +/** + * Same as fopen(), but on error throws an exception rather than returning NULL. + */ +FILE *fopen_or_throw(const char *path, const char *mode); + +#endif diff --git a/src/util/fatbit.h b/src/util/fatbit.h index ad607638..3c65db1a 100644 --- a/src/util/fatbit.h +++ b/src/util/fatbit.h @@ -40,6 +40,10 @@ #include "multibit.h" #include "ue2common.h" +#ifdef __cplusplus +extern "C" { +#endif + #define MIN_FAT_SIZE 32 struct fatbit { @@ -82,11 +86,8 @@ u32 fatbit_iterate(const struct fatbit *bits, u32 total_bits, u32 it_in) { return mmbit_iterate(bits->fb_int.raw, total_bits, it_in); } -/** \brief Return the size in bytes of a fatbit that can store the given - * number of bits. - * - * Not for use in performance-critical code, implementation is in fatbit.c. - */ -u32 fatbit_size(u32 total_bits); +#ifdef __cplusplus +} // extern "C" +#endif #endif diff --git a/src/util/fatbit_build.cpp b/src/util/fatbit_build.cpp new file mode 100644 index 00000000..77f4b550 --- /dev/null +++ b/src/util/fatbit_build.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "fatbit_build.h" + +#include "fatbit.h" +#include "multibit_build.h" + +#include + +using namespace std; + +namespace ue2 { + +u32 fatbit_size(u32 total_bits) { + return max(u32{sizeof(struct fatbit)}, mmbit_size(total_bits)); +} + +} // namespace ue2 diff --git a/src/util/fatbit_build.h b/src/util/fatbit_build.h new file mode 100644 index 00000000..d7611657 --- /dev/null +++ b/src/util/fatbit_build.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Fatbit: build code + */ + +#ifndef FATBIT_BUILD_H +#define FATBIT_BUILD_H + +#include "ue2common.h" + +namespace ue2 { + +/** + * \brief Return the size in bytes of a fatbit that can store the given + * number of bits. + */ +u32 fatbit_size(u32 total_bits); + +} // namespace ue2 + +#endif // FATBIT_BUILD_H diff --git a/src/util/graph.h b/src/util/graph.h index 90589f14..4c2876f1 100644 --- a/src/util/graph.h +++ b/src/util/graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,71 +38,22 @@ #include "util/graph_range.h" #include "util/ue2_containers.h" -#include -#include #include -#include +#include +#include + +#include +#include +#include +#include +#include namespace ue2 { /** \brief True if the given vertex has no out-edges. */ template bool isLeafNode(const typename Graph::vertex_descriptor& v, const Graph& g) { - typename Graph::adjacency_iterator ai, ae; - std::tie(ai, ae) = adjacent_vertices(v, g); - return ai == ae; // no out edges -} - -/** \brief True if the out-degree of vertex \a v is greater than the given - * limit. */ -template -bool hasGreaterOutDegree(size_t limit, - const typename Graph::vertex_descriptor& v, - const Graph& g) { - typename Graph::out_edge_iterator ei, ee; - for (std::tie(ei, ee) = out_edges(v, g); ei != ee; ++ei) { - if (limit-- == 0) { - return true; - } - } - return false; -} - -/** \brief Returns true if the in-degree of vertex \a v is greater than the - * given limit. */ -template -bool hasGreaterInDegree(size_t limit, - const typename Graph::vertex_descriptor& v, - const Graph& g) { - typename Graph::in_edge_iterator ei, ee; - for (std::tie(ei, ee) = in_edges(v, g); ei != ee; ++ei) { - if (limit-- == 0) { - return true; - } - } - return false; -} - -/** - * \brief True if the degree of vertex \a v is greater than the given limit. - */ -template -bool has_greater_degree(size_t limit, - const typename Graph::vertex_descriptor &v, - const Graph &g) { - typename Graph::in_edge_iterator ei, ee; - for (std::tie(ei, ee) = in_edges(v, g); ei != ee; ++ei) { - if (limit-- == 0) { - return true; - } - } - typename Graph::out_edge_iterator oi, oe; - for (std::tie(oi, oe) = out_edges(v, g); oi != oe; ++oi) { - if (limit-- == 0) { - return true; - } - } - return false; + return out_degree(v, g) == 0; } /** \brief True if vertex \a v has an edge to itself. */ @@ -137,48 +88,10 @@ size_t proper_in_degree(const typename Graph::vertex_descriptor &v, return in_degree(v, g) - (edge(v, v, g).second ? 1 : 0); } -/** \brief Returns true iff the in-degree of vertex \a v is \a expected */ -template -bool in_degree_equal_to(const typename Graph::vertex_descriptor &v, - const Graph &g, size_t expected) { - size_t seen = 0; - typename Graph::in_edge_iterator ei, ee; - for (std::tie(ei, ee) = in_edges(v, g);; ++ei, seen++) { - if (seen == expected) { - return ei == ee; - } - if (ei == ee) { - return false; - } - } -} - -/** \brief same as edge(s, t, g) by finds edge by inspecting in-edges of target. - * Should be used when it is known that t has a small in-degree and when s - * may have a large out-degree. - */ -template -std::pair -edge_by_target(const typename Graph::vertex_descriptor &s, - const typename Graph::vertex_descriptor &t, const Graph &g) { - typename Graph::in_edge_iterator ei, ee; - for (std::tie(ei, ee) = in_edges(t, g); ei != ee; ++ei) { - if (source(*ei, g) == s) { - return std::make_pair(*ei, true); - } - } - - return std::make_pair(typename Graph::edge_descriptor(), false); -} - - /** \brief True if vertex \a v has at least one successor. */ template bool has_successor(const typename Graph::vertex_descriptor &v, const Graph &g) { - typename Graph::adjacency_iterator ai, ae; - std::tie(ai, ae) = adjacent_vertices(v, g); - - return ai != ae; + return out_degree(v, g) > 0; } /** \brief True if vertex \a v has at least one successor other than itself. */ @@ -197,26 +110,6 @@ bool has_proper_successor(const typename Graph::vertex_descriptor &v, return ai != ae; } -/** \brief A version of clear_vertex that explicitly removes in- and out-edges - * for vertex \a v. For many graphs, this is faster than the BGL clear_vertex - * function, which walks the graph's full edge list. */ -template -void clear_vertex_faster(typename Graph::vertex_descriptor v, Graph &g) { - typename Graph::in_edge_iterator ei, ee; - tie(ei, ee) = in_edges(v, g); - while (ei != ee) { - remove_edge(*ei++, g); - } - - typename Graph::out_edge_iterator oi, oe; - tie(oi, oe) = out_edges(v, g); - while (oi != oe) { - // NOTE: version that takes out_edge_iterator is faster according to - // the BGL docs. - remove_edge(oi++, g); - } -} - /** \brief Find the set of vertices that are reachable from the vertices in \a * sources. */ template @@ -251,6 +144,41 @@ void find_unreachable(const Graph &g, const SourceCont &sources, OutCont *out) { } } +template +ue2::flat_set +find_vertices_in_cycles(const Graph &g) { + using vertex_descriptor = typename Graph::vertex_descriptor; + + std::map comp_map; + + boost::strong_components(g, boost::make_assoc_property_map(comp_map)); + + std::map> comps; + + for (const auto &e : comp_map) { + comps[e.second].push_back(e.first); + } + + ue2::flat_set rv; + + for (const auto &comp : comps | boost::adaptors::map_values) { + /* every vertex in a strongly connected component is reachable from + * every other vertex in the component. A vertex is involved in a cycle + * therefore if it is in a strongly connected component with more than + * one vertex or if it is the only vertex and it has a self loop. */ + assert(!comp.empty()); + if (comp.size() > 1) { + insert(&rv, comp); + } + vertex_descriptor v = *comp.begin(); + if (hasSelfLoop(v, g)) { + rv.insert(v); + } + } + + return rv; +} + template bool has_parallel_edge(const Graph &g) { using vertex_descriptor = typename Graph::vertex_descriptor; @@ -291,6 +219,22 @@ bool is_dag(const Graph &g, bool ignore_self_loops = false) { return true; } +template +class vertex_recorder : public boost::default_dfs_visitor { +public: + explicit vertex_recorder(Cont &o) : out(o) {} + template + void discover_vertex(typename Cont::value_type v, const G &) { + out.insert(v); + } + Cont &out; +}; + +template +vertex_recorder make_vertex_recorder(Cont &o) { + return vertex_recorder(o); +} + template std::pair add_edge_if_not_present(typename Graph::vertex_descriptor u, @@ -313,6 +257,40 @@ std::pair add_edge_if_not_present( return e; } +#ifndef NDEBUG + +template +bool hasCorrectlyNumberedVertices(const Graph &g) { + auto count = num_vertices(g); + std::vector ids(count, false); + for (auto v : vertices_range(g)) { + auto id = g[v].index; + if (id >= count || ids[id]) { + return false; // duplicate + } + ids[id] = true; + } + return std::find(ids.begin(), ids.end(), false) == ids.end() + && count == vertex_index_upper_bound(g); +} + +template +bool hasCorrectlyNumberedEdges(const Graph &g) { + auto count = num_edges(g); + std::vector ids(count, false); + for (const auto &e : edges_range(g)) { + auto id = g[e].index; + if (id >= count || ids[id]) { + return false; // duplicate + } + ids[id] = true; + } + return std::find(ids.begin(), ids.end(), false) == ids.end() + && count == edge_index_upper_bound(g); +} + +#endif + } // namespace ue2 #endif // UTIL_GRAPH_H diff --git a/src/util/graph_range.h b/src/util/graph_range.h index 82814695..3df06911 100644 --- a/src/util/graph_range.h +++ b/src/util/graph_range.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -51,7 +51,6 @@ #ifndef UTIL_GRAPH_RANGE_H #define UTIL_GRAPH_RANGE_H -#include #include namespace ue2 { diff --git a/src/util/hash.h b/src/util/hash.h new file mode 100644 index 00000000..0b571772 --- /dev/null +++ b/src/util/hash.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Hashing utility functions. + */ + +#ifndef UTIL_HASH_H +#define UTIL_HASH_H + +#include + +namespace ue2 { + +namespace hash_detail { + +template +void hash_build(size_t &v, const T &obj) { + boost::hash_combine(v, obj); +} + +template +void hash_build(size_t &v, const T &obj, Args&&... args) { + hash_build(v, obj); + hash_build(v, args...); // recursive +} + +} // namespace hash_detail + +/** + * \brief Computes the combined hash of all its arguments. + * + * Simply use: + * + * size_t hash = hash_all(a, b, c, d); + * + * Where a, b, c and d are hashable. + */ +template +size_t hash_all(Args&&... args) { + size_t v = 0; + hash_detail::hash_build(v, args...); + return v; +} + +} // namespace ue2 + +#endif // UTIL_HASH_H diff --git a/src/util/masked_move.c b/src/util/masked_move.c index 71406308..ec788db7 100644 --- a/src/util/masked_move.c +++ b/src/util/masked_move.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,7 @@ /* masks for masked moves */ /* magic mask for maskload (vmmaskmovq) - described in UE-2424 */ -const u32 mm_mask_mask[16] ALIGN_CL_DIRECTIVE = { +const ALIGN_CL_DIRECTIVE u32 mm_mask_mask[16] = { 0x00000000U, 0x00000000U, 0x00000000U, diff --git a/src/util/multibit.c b/src/util/multibit.c index c22b73ff..de192d7d 100644 --- a/src/util/multibit.c +++ b/src/util/multibit.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -138,62 +138,3 @@ const u32 mmbit_root_offset_from_level[7] = { 1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3) + (1 << MMB_KEY_SHIFT * 4), 1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3) + (1 << MMB_KEY_SHIFT * 4) + (1 << MMB_KEY_SHIFT * 5), }; - -u32 mmbit_size(u32 total_bits) { - MDEBUG_PRINTF("%u\n", total_bits); - - // Flat model multibit structures are just stored as a bit vector. - if (total_bits <= MMB_FLAT_MAX_BITS) { - return ROUNDUP_N(total_bits, 8) / 8; - } - - u64a current_level = 1; // Number of blocks on current level. - u64a total = 0; // Total number of blocks. - while (current_level * MMB_KEY_BITS < total_bits) { - total += current_level; - current_level <<= MMB_KEY_SHIFT; - } - - // Last level is a one-for-one bit vector. It needs room for total_bits - // elements, rounded up to the nearest block. - u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS; - total += last_level; - - assert(total * sizeof(MMB_TYPE) <= UINT32_MAX); - return (u32)(total * sizeof(MMB_TYPE)); -} - -#ifdef DUMP_SUPPORT - -#include -#include - -/** \brief Dump a sparse iterator's keys to stdout. */ -void mmbit_sparse_iter_dump(const struct mmbit_sparse_iter *it, - u32 total_bits) { - // Expediency and future-proofing: create a temporary multibit of the right - // size with all the bits on, then walk it with this sparse iterator. - size_t bytes = mmbit_size(total_bits); - u8 *bits = malloc(bytes); - if (!bits) { - printf("Failed to alloc %zu bytes for temp multibit", bytes); - return; - } - for (u32 i = 0; i < total_bits; i++) { - mmbit_set_i(bits, total_bits, i); - } - - struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES]; - u32 idx = 0; - for (u32 i = mmbit_sparse_iter_begin(bits, total_bits, &idx, it, s); - i != MMB_INVALID; - i = mmbit_sparse_iter_next(bits, total_bits, i, &idx, it, s)) { - printf("%u ", i); - } - - printf("(%u keys)", idx + 1); - - free(bits); -} - -#endif // DUMP_SUPPORT diff --git a/src/util/multibit.h b/src/util/multibit.h index ddc8bbdd..4df8733a 100644 --- a/src/util/multibit.h +++ b/src/util/multibit.h @@ -162,7 +162,7 @@ u32 mmb_popcount(MMB_TYPE val) { } #ifndef MMMB_DEBUG -#define MDEBUG_PRINTF(x, ...) do { } while(0); +#define MDEBUG_PRINTF(x, ...) do { } while(0) #else #define MDEBUG_PRINTF DEBUG_PRINTF #endif diff --git a/src/util/multibit_build.cpp b/src/util/multibit_build.cpp index 2a402d8c..5fe2d617 100644 --- a/src/util/multibit_build.cpp +++ b/src/util/multibit_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #include "scatter.h" #include "ue2common.h" #include "rose/rose_build_scatter.h" +#include "util/compile_error.h" #include #include // for memset @@ -45,6 +46,32 @@ using namespace std; namespace ue2 { +u32 mmbit_size(u32 total_bits) { + if (total_bits > MMB_MAX_BITS) { + throw ResourceLimitError(); + } + + // Flat model multibit structures are just stored as a bit vector. + if (total_bits <= MMB_FLAT_MAX_BITS) { + return ROUNDUP_N(total_bits, 8) / 8; + } + + u64a current_level = 1; // Number of blocks on current level. + u64a total = 0; // Total number of blocks. + while (current_level * MMB_KEY_BITS < total_bits) { + total += current_level; + current_level <<= MMB_KEY_SHIFT; + } + + // Last level is a one-for-one bit vector. It needs room for total_bits + // elements, rounded up to the nearest block. + u64a last_level = ((u64a)total_bits + MMB_KEY_BITS - 1) / MMB_KEY_BITS; + total += last_level; + + assert(total * sizeof(MMB_TYPE) <= UINT32_MAX); + return (u32)(total * sizeof(MMB_TYPE)); +} + namespace { struct TreeNode { MMB_TYPE mask = 0; @@ -133,6 +160,7 @@ void mmbBuildSparseIterator(vector &out, assert(out.empty()); assert(!bits.empty()); assert(total_bits > 0); + assert(total_bits <= MMB_MAX_BITS); DEBUG_PRINTF("building sparse iter for %zu of %u bits\n", bits.size(), total_bits); diff --git a/src/util/multibit_build.h b/src/util/multibit_build.h index ac263552..951f1fb4 100644 --- a/src/util/multibit_build.h +++ b/src/util/multibit_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,20 +34,31 @@ #define MULTIBIT_BUILD_H #include "multibit_internal.h" +#include "hash.h" #include -/** \brief Comparator for \ref mmbit_sparse_iter structures. */ -static inline -bool operator<(const mmbit_sparse_iter &a, const mmbit_sparse_iter &b) { - if (a.mask != b.mask) { - return a.mask < b.mask; - } - return a.val < b.val; +inline +bool operator==(const mmbit_sparse_iter &a, const mmbit_sparse_iter &b) { + return a.mask == b.mask && a.val == b.val; +} + +inline +size_t hash_value(const mmbit_sparse_iter &iter) { + return ue2::hash_all(iter.mask, iter.val); } namespace ue2 { +/** + * \brief Return the size in bytes of a multibit that can store the given + * number of bits. + * + * This will throw a resource limit assertion if the requested mmbit is too + * large. + */ +u32 mmbit_size(u32 total_bits); + /** \brief Construct a sparse iterator over the values in \a bits for a * multibit of size \a total_bits. */ void mmbBuildSparseIterator(std::vector &out, diff --git a/src/util/multibit_internal.h b/src/util/multibit_internal.h index de87fe2a..350f3bfd 100644 --- a/src/util/multibit_internal.h +++ b/src/util/multibit_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,6 +47,9 @@ extern "C" { typedef u64a MMB_TYPE; /**< Basic block type for mmbit operations. */ #define MMB_MAX_LEVEL 6 /**< Maximum level in the mmbit pyramid. */ +/** \brief Maximum number of keys (bits) in a multibit. */ +#define MMB_MAX_BITS (1U << 31) + /** \brief Sparse iterator record type. * * A sparse iterator is a tree of these records, where val identifies the @@ -71,13 +74,6 @@ struct mmbit_sparse_state { /** \brief Maximum number of \ref mmbit_sparse_state that could be needed. */ #define MAX_SPARSE_ITER_STATES (6 + 1) -/** \brief Return the size in bytes of a multibit that can store the given - * number of bits. - * - * Not for use in performance-critical code, implementation is in multibit.c. - */ -u32 mmbit_size(u32 total_bits); - #ifdef __cplusplus } // extern "C" #endif diff --git a/src/util/simd_types.h b/src/util/simd_types.h index e4541411..d6e5d6a3 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -61,7 +61,12 @@ #error no intrinsics! #endif +#if defined(__SSE2__) || defined(_M_X64) || (_M_IX86_FP >= 2) typedef __m128i m128; +#else +typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128; +#endif + #if defined(__AVX2__) typedef __m256i m256; #else diff --git a/src/util/simd_utils.c b/src/util/simd_utils.c index a86c568d..54b5b4ba 100644 --- a/src/util/simd_utils.c +++ b/src/util/simd_utils.c @@ -32,7 +32,7 @@ #include "simd_utils.h" -const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = { +ALIGN_CL_DIRECTIVE const char vbs_mask_data[] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, @@ -48,7 +48,7 @@ const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = { #define ZEROES_32 ZEROES_8, ZEROES_8, ZEROES_8, ZEROES_8 /** \brief LUT for the mask1bit functions. */ -const u8 simd_onebit_masks[] ALIGN_CL_DIRECTIVE = { +ALIGN_CL_DIRECTIVE const u8 simd_onebit_masks[] = { ZEROES_31, 0x01, ZEROES_32, ZEROES_31, 0x02, ZEROES_32, ZEROES_31, 0x04, ZEROES_32, diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 3544629f..e8676249 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -71,6 +71,7 @@ #include "ue2common.h" #include "simd_types.h" +#include "unaligned.h" // Define a common assume_aligned using an appropriate compiler built-in, if // it's available. Note that we need to handle C or C++ compilation. @@ -158,6 +159,10 @@ static really_inline m128 set16x8(u8 c) { return _mm_set1_epi8(c); } +static really_inline m128 set4x32(u32 c) { + return _mm_set1_epi32(c); +} + static really_inline u32 movd(const m128 in) { return _mm_cvtsi128_si32(in); } @@ -172,6 +177,20 @@ static really_inline u64a movq(const m128 in) { #endif } +/* another form of movq */ +static really_inline +m128 load_m128_from_u64a(const u64a *p) { +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) + /* unfortunately _mm_loadl_epi64() is best avoided as it seems to cause + * trouble on some older compilers, possibly because it is misdefined to + * take an m128 as its parameter */ + return _mm_set_epi64((__m64)0ULL, (__m64)*p); +#else + /* ICC doesn't like casting to __m64 */ + return _mm_loadl_epi64((const m128 *)p); +#endif +} + #define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed) #define lshiftbyte_m128(a, count_immed) _mm_slli_si128(a, count_immed) @@ -245,7 +264,13 @@ m128 loadbytes128(const void *ptr, unsigned int n) { return a; } +#ifdef __cplusplus +extern "C" { +#endif extern const u8 simd_onebit_masks[]; +#ifdef __cplusplus +} +#endif static really_inline m128 mask1bit128(unsigned int n) { @@ -269,12 +294,12 @@ void clearbit128(m128 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit128(const m128 *ptr, unsigned int n) { +char testbit128(m128 val, unsigned int n) { const m128 mask = mask1bit128(n); #if defined(__SSE4_1__) - return !_mm_testz_si128(mask, *ptr); + return !_mm_testz_si128(mask, val); #else - return isnonzero128(and128(mask, *ptr)); + return isnonzero128(and128(mask, val)); #endif } @@ -307,6 +332,25 @@ m128 variable_byte_shift_m128(m128 in, s32 amount) { return pshufb(in, shift_mask); } +static really_inline +m128 max_u8_m128(m128 a, m128 b) { + return _mm_max_epu8(a, b); +} + +static really_inline +m128 min_u8_m128(m128 a, m128 b) { + return _mm_min_epu8(a, b); +} + +static really_inline +m128 sadd_u8_m128(m128 a, m128 b) { + return _mm_adds_epu8(a, b); +} + +static really_inline +m128 sub_u8_m128(m128 a, m128 b) { + return _mm_sub_epi8(a, b); +} /**** **** 256-bit Primitives @@ -354,6 +398,26 @@ m256 set32x8(u32 in) { return rv; } +static really_inline +m256 eq256(m256 a, m256 b) { + m256 rv; + rv.lo = eq128(a.lo, b.lo); + rv.hi = eq128(a.hi, b.hi); + return rv; +} + +static really_inline +u32 movemask256(m256 a) { + u32 lo_mask = movemask128(a.lo); + u32 hi_mask = movemask128(a.hi); + return lo_mask | (hi_mask << 16); +} + +static really_inline +m256 set2x128(m128 a) { + m256 rv = {a, a}; + return rv; +} #endif static really_inline m256 zeroes256(void) { @@ -504,6 +568,10 @@ static really_inline m256 load2x128(const void *ptr) { #endif } +static really_inline m256 loadu2x128(const void *ptr) { + return set2x128(loadu128(ptr)); +} + // aligned store static really_inline void store256(void *ptr, m256 a) { assert(ISALIGNED_N(ptr, alignof(m256))); @@ -525,6 +593,16 @@ static really_inline m256 loadu256(const void *ptr) { #endif } +// unaligned store +static really_inline void storeu256(void *ptr, m256 a) { +#if defined(__AVX2__) + _mm256_storeu_si256((m256 *)ptr, a); +#else + storeu128(ptr, a.lo); + storeu128((char *)ptr + 16, a.hi); +#endif +} + // packed unaligned store of first N bytes static really_inline void storebytes256(void *ptr, m256 a, unsigned int n) { @@ -580,18 +658,34 @@ void clearbit256(m256 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit256(const m256 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); - const m128 *sub; +char testbit256(m256 val, unsigned int n) { + assert(n < sizeof(val) * 8); + m128 sub; if (n < 128) { - sub = &ptr->lo; + sub = val.lo; } else { - sub = &ptr->hi; + sub = val.hi; n -= 128; } return testbit128(sub, n); } +static really_really_inline +m128 movdq_hi(m256 x) { + return x.hi; +} + +static really_really_inline +m128 movdq_lo(m256 x) { + return x.lo; +} + +static really_inline +m256 combine2x128(m128 hi, m128 lo) { + m256 rv = {lo, hi}; + return rv; +} + #else // AVX2 // switches on bit N in the given vector. @@ -607,9 +701,9 @@ void clearbit256(m256 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit256(const m256 *ptr, unsigned int n) { +char testbit256(m256 val, unsigned int n) { const m256 mask = mask1bit256(n); - return !_mm256_testz_si256(mask, *ptr); + return !_mm256_testz_si256(mask, val); } static really_really_inline @@ -636,6 +730,14 @@ m128 movdq_lo(m256 x) { #define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b); #define vpalignr(r, l, offset) _mm256_alignr_epi8(r, l, offset) +static really_inline +m256 combine2x128(m128 hi, m128 lo) { +#if defined(_mm256_set_m128i) + return _mm256_set_m128i(hi, lo); +#else + return insert128to256(cast128to256(lo), hi, 1); +#endif +} #endif //AVX2 /**** @@ -801,15 +903,15 @@ void clearbit384(m384 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit384(const m384 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); - const m128 *sub; +char testbit384(m384 val, unsigned int n) { + assert(n < sizeof(val) * 8); + m128 sub; if (n < 128) { - sub = &ptr->lo; + sub = val.lo; } else if (n < 256) { - sub = &ptr->mid; + sub = val.mid; } else { - sub = &ptr->hi; + sub = val.hi; } return testbit128(sub, n % 128); } @@ -1014,26 +1116,26 @@ void clearbit512(m512 *ptr, unsigned int n) { // tests bit N in the given vector. static really_inline -char testbit512(const m512 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); +char testbit512(m512 val, unsigned int n) { + assert(n < sizeof(val) * 8); #if !defined(__AVX2__) - const m128 *sub; + m128 sub; if (n < 128) { - sub = &ptr->lo.lo; + sub = val.lo.lo; } else if (n < 256) { - sub = &ptr->lo.hi; + sub = val.lo.hi; } else if (n < 384) { - sub = &ptr->hi.lo; + sub = val.hi.lo; } else { - sub = &ptr->hi.hi; + sub = val.hi.hi; } return testbit128(sub, n % 128); #else - const m256 *sub; + m256 sub; if (n < 256) { - sub = &ptr->lo; + sub = val.lo; } else { - sub = &ptr->hi; + sub = val.hi; n -= 256; } return testbit256(sub, n); diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 217d08ea..5bbf4cfe 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -207,6 +207,10 @@ public: return std::make_pair(iterator(it), false); } + iterator insert(UNUSED const_iterator hint, const value_type &value) { + return insert(value).first; + } + std::pair insert(value_type &&value) { auto it = std::lower_bound(data.begin(), data.end(), value, comp); if (it == data.end() || comp(value, *it)) { @@ -216,6 +220,10 @@ public: return std::make_pair(iterator(it), false); } + iterator insert(UNUSED const_iterator hint, value_type &&value) { + return insert(value).first; + } + template void insert(InputIt first, InputIt second) { for (; first != second; ++first) { diff --git a/src/util/ue2_graph.h b/src/util/ue2_graph.h new file mode 100644 index 00000000..9634b032 --- /dev/null +++ b/src/util/ue2_graph.h @@ -0,0 +1,1304 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UE2_GRAPH_H +#define UE2_GRAPH_H + +#include "ue2common.h" +#include "util/graph_range.h" + +#include +#include +#include /* vertex_index_t, ... */ +#include /* no_property */ +#include +#include +#include +#include + +#include /* tie */ +#include /* pair, declval */ + +/* + * Basic design of ue2_graph: + * + * Fairly standard adjacency list type graph structure. The main internal + * structures are vertex_node and edge_node. + * + * Each vertex_node maintains lists of incoming and outgoing edge_nodes, a + * serial number and the vertex properties. + * + * Each edge_node contains pointers to the source and target vertex as well as + * the serial number and edge properties. + * + * Every time an edge_node or vertex_node is created in the graph, it is given a + * unique serial number by increasing a private counter in the graph. + * + * The main thing to note is that the in and out edge lists are intrusive lists + * with the edge_node containing the necessary hooks. This means that we can + * easily convert the edge_node to iterators of the in_edge_list and + * out_edge_list and remove them from the lists. + * + * vertex_descriptor and edge_descriptor structures both just wrap pointers to + * the relevant node structure along with the serial number. operator<() for the + * descriptors is overridden to look at the serial member of the node. + * We do not use: + * - the address of the node structure as this would lead to an unstable + * ordering of vertices between runs. + * - the index field as this would mean that the generation of new index + * values (during say renumbering of vertex nodes after removing some + * vertices) would potentially reorder vertices and corrupt containers + * such as std::set<>. + * The serial number is copied into the descriptors so that we can still have + * descriptors in a container (such as set or unordered_set) after removing the + * underlying node. + * + * Hashing of descriptors is based on the serial field for similar reasons. + * + * + * + * Main differences from boost::adjacency_list<> with listS: + * + * (1) Deterministic ordering for vertices and edges + * boost::adjacency_list<> uses pointer ordering for vertex_descriptors. As + * a result, ordering of vertices and edges between runs is + * non-deterministic unless containers, etc use custom comparators. + * + * (2) Proper types for descriptors, etc. + * No more void * for vertex_descriptors and trying to use it for the wrong + * graph type. + * + * (3) Constant time num_edges(), num_vertices(), degree(), in_degree() and + * out_degree() + * std::list is meant to have constant time in C++11 ::size(), but this is + * not always implemented as people want to keep ABI compatibility with + * existing C++98 standard libraries (gcc 4.8). As ue2_graph_h uses + * intrusive lists rather than std::list this is not an issue for us. + * + * (4) Constant time remove_edge(e, g) + * ue2_graph uses boost::intrusive_lists internally so we can easily unlink + * an edge from the in and out edgelist of its source and target. + * + * (5) More efficient edge(u, v, g) and remove_edge(u, v, g) + * ue2_graph will check which of u and v has the smallest relevant degree + * and use that to search for the edge(s). + * + * (6) Automatically populate the index field of vertex and edge bundles. + * Saves us from doing it manually. Naturally there is nothing to prevent + * the user from stuffing up the index properties later. + * + * (7) Different edge iteration order + * ue2_graph does not maintain an explicit global edge list, so the + * edge_iterator is constructed out of vertex_iterator and + * out_edge_iterators by iterating the out_edges of each vertices. This + * means that edge iteration order is not insertion order like for + * adjacency_list. + * + * (8) null_edge() + * Because why not? + * + * (9) vertex and edge properties must have an index field. + * We generally need them so the effort has not been put into specialising + * for when they are not present. + * + * + * + * Possible Future Work: + * + * (1) Improve edge(u, v, g) performance + * This function sees a fair amount of use and is O(n) in the smallest of + * the source out_degree or target in_degree. This could be improved by + * changes on of the edge containers to be something similar to a multiset. + * + * (2) 'Lie' about the number of edges / vertices + * + * One of the main uses of num_edges() and num_vertices() is to allocate a + * vector, etc so that it can be indexed by edge or vertex index. If + * num_edges() and num_vertices() returned the appropriate size for such a + * vector (at least one more than the largest index), we would be able to + * avoid some renumbering operations. Functions would have to be provided to + * get the real number of vertices and edges. Having num_vertices() and + * num_edges() return an over-estimate is not without precedence in the BGL + * - the filtered_graph adaptor does the same thing and is compatible with + * various (all?) BGL algorithms. It is not clear that this was done + * deliberately for the same reason or because it is difficult for + * filtered_graph to get the true counts. + * + * (3) Investigate slab/pooled allocation schemes for nodes. + */ + +namespace ue2 { + +namespace graph_detail { + +class graph_base : boost::noncopyable { +}; + +struct default_edge_property { + size_t index; +}; + +struct default_vertex_property { + size_t index; +}; + +} + +template +class ue2_graph : graph_detail::graph_base { +private: + struct in_edge_tag { }; + struct out_edge_tag { }; + + struct vertex_node; + + using out_edge_hook + = boost::intrusive::list_base_hook >; + + /* in_edge_hook does not use safe mode as during graph destruction we do not + * maintain the in edge lists */ + using in_edge_hook + = boost::intrusive::list_base_hook, + boost::intrusive::link_mode >; + + struct edge_node : public out_edge_hook, public in_edge_hook { + explicit edge_node(u64a serial_in) : serial(serial_in) { } + + vertex_node *source = nullptr; + vertex_node *target = nullptr; + const u64a serial; /*< used to order edges. We do not use props.index so + * that there is no danger of invalidating sets or + * other containers by changing the index due to + * renumbering */ + EdgePropertyType props; + }; + + template using vertex_edge_list + = boost::intrusive::list >; + + struct vertex_node : public boost::intrusive::list_base_hook<> { + explicit vertex_node(u64a serial_in) : serial(serial_in) { } + + VertexPropertyType props; + const u64a serial; /*< used to order vertices. We do not use props.index + * so that there is no danger of invalidating sets or + * other containers by changing the index due to + * renumbering */ + + /* The incoming edges are not considered owned by the vertex */ + vertex_edge_list in_edge_list; + + /* The out going edges are considered owned by the vertex and + * need to be freed when the graph is begin destroyed */ + vertex_edge_list out_edge_list; + + /* The destructor only frees memory owned by the vertex and will leave + * the neighbour's edges in a bad state. If a vertex is being removed + * (rather than the graph being destroyed), then the more gentle clean + * up of clear_vertex() is required to be called first */ + ~vertex_node() { + out_edge_list.clear_and_dispose(delete_disposer()); + } + }; + + struct delete_disposer { + template void operator()(const T *d) const { delete d; } + }; + + struct in_edge_disposer { + void operator()(edge_node *e) const { + /* remove from source's out edge list before deleting */ + vertex_node *u = e->source; + u->out_edge_list.erase(u->out_edge_list.iterator_to(*e)); + delete e; + } + }; + + struct out_edge_disposer { + void operator()(edge_node *e) const { + /* remove from target's in edge list before deleting */ + vertex_node *v = e->target; + v->in_edge_list.erase(v->in_edge_list.iterator_to(*e)); + delete e; + } + }; + + using vertices_list_type + = boost::intrusive::list > >; + + vertices_list_type vertices_list; + +protected: /* to allow renumbering */ + static const size_t N_SPECIAL_VERTICES = 0; /* override in derived class */ + size_t next_vertex_index = 0; + size_t next_edge_index = 0; + +private: + size_t graph_edge_count = 0; /* maintained explicitly as we have no global + edge list */ + + u64a next_serial = 0; + u64a new_serial() { + u64a serial = next_serial++; + if (!next_serial) { + /* if we have created enough graph edges/vertices to overflow a u64a + * we must have spent close to an eternity adding to this graph so + * something must have gone very wrong and we will not be producing + * a final bytecode in a reasonable amount of time. Or, more likely, + * the next_serial value has become corrupt. */ + throw std::overflow_error("too many graph edges/vertices created"); + } + return serial; + } +public: + using vertices_size_type = typename vertices_list_type::size_type; + using degree_size_type + = typename vertex_edge_list::size_type; + using edges_size_type = size_t; + + using vertex_property_type = VertexPropertyType; + using edge_property_type = EdgePropertyType; + + using graph_bundled = boost::no_property; + using vertex_bundled = VertexPropertyType; + using edge_bundled = EdgePropertyType; + + class vertex_descriptor : boost::totally_ordered { + public: + vertex_descriptor() : p(nullptr), serial(0) { } + explicit vertex_descriptor(vertex_node *pp) + : p(pp), serial(pp->serial) { } + + operator bool() const { return p; } + bool operator<(const vertex_descriptor b) const { + if (p && b.p) { + /* no vertices in the same graph can have the same serial */ + assert(p == b.p || serial != b.serial); + return serial < b.serial; + } else { + return p < b.p; + } + } + bool operator==(const vertex_descriptor b) const { + return p == b.p; + } + + friend size_t hash_value(vertex_descriptor v) { + using boost::hash_value; + return hash_value(v.serial); + } + + private: + vertex_node *raw(void) { return p; } + vertex_node *p; + u64a serial; + friend ue2_graph; + }; + + class edge_descriptor : boost::totally_ordered { + public: + edge_descriptor() : p(nullptr), serial(0) { } + explicit edge_descriptor(edge_node *pp) : p(pp), serial(pp->serial) { } + + /* Convenice ctor to allow us to directly get an edge_descriptor from + * edge() and add_edge(). As we have null_edges and we always allow + * parallel edges, the bool component of the return from these functions + * is not required. */ + edge_descriptor(const std::pair &tup) + : p(tup.first.p), serial(tup.first.serial) { + assert(tup.second == (bool)tup.first); + } + + operator bool() const { return p; } + bool operator<(const edge_descriptor b) const { + if (p && b.p) { + /* no edges in the same graph can have the same serial */ + assert(p == b.p || serial != b.serial); + return serial < b.serial; + } else { + return p < b.p; + } + } + bool operator==(const edge_descriptor b) const { + return p == b.p; + } + + friend size_t hash_value(edge_descriptor e) { + using boost::hash_value; + return hash_value(e.serial); + } + + private: + edge_node *raw(void) { return p; } + edge_node *p; + u64a serial; + friend ue2_graph; + }; + +private: + /* Note: apparently, nested class templates cannot be fully specialised but + * they can be partially specialised. Sigh, ... */ + template + struct bundle_key_type { + }; + + template + struct bundle_key_type { + using type = vertex_descriptor; + }; + + template + struct bundle_key_type { + using type = edge_descriptor; + }; + +public: + class out_edge_iterator : public boost::iterator_adaptor< + out_edge_iterator, + typename vertex_edge_list::const_iterator, + edge_descriptor, + boost::bidirectional_traversal_tag, + edge_descriptor> { + using super = typename out_edge_iterator::iterator_adaptor_; + public: + out_edge_iterator() : super() { } + explicit out_edge_iterator( + typename vertex_edge_list::const_iterator it) + : super(it) { } + edge_descriptor dereference() const { + /* :( const_cast makes me sad but constness is defined by the graph + * parameter of bgl api calls */ + return edge_descriptor(const_cast(&*super::base())); + } + }; + + class in_edge_iterator : public boost::iterator_adaptor< + in_edge_iterator, + typename vertex_edge_list::const_iterator, + edge_descriptor, + boost::bidirectional_traversal_tag, + edge_descriptor> { + using super = typename in_edge_iterator::iterator_adaptor_; + public: + in_edge_iterator() : super() { } + explicit in_edge_iterator( + typename vertex_edge_list::const_iterator it) + : super(it) { } + edge_descriptor dereference() const { + /* :( const_cast makes me sad but constness is defined by the graph + * parameter of bgl api calls */ + return edge_descriptor(const_cast(&*super::base())); + } + }; + + class adjacency_iterator : public boost::iterator_adaptor< + adjacency_iterator, + out_edge_iterator, + vertex_descriptor, + boost::bidirectional_traversal_tag, + vertex_descriptor> { + using super = typename adjacency_iterator::iterator_adaptor_; + public: + adjacency_iterator(out_edge_iterator a) : super(std::move(a)) { } + adjacency_iterator() { } + + vertex_descriptor dereference() const { + return vertex_descriptor(super::base()->p->target); + } + }; + + class inv_adjacency_iterator : public boost::iterator_adaptor< + inv_adjacency_iterator, + in_edge_iterator, + vertex_descriptor, + boost::bidirectional_traversal_tag, + vertex_descriptor> { + using super = typename inv_adjacency_iterator::iterator_adaptor_; + public: + inv_adjacency_iterator(in_edge_iterator a) : super(std::move(a)) { } + inv_adjacency_iterator() { } + + vertex_descriptor dereference() const { + return vertex_descriptor(super::base()->p->source); + } + }; + + class vertex_iterator : public boost::iterator_adaptor< + vertex_iterator, + typename vertices_list_type::const_iterator, + vertex_descriptor, + boost::bidirectional_traversal_tag, + vertex_descriptor> { + using super = typename vertex_iterator::iterator_adaptor_; + public: + vertex_iterator() : super() { } + explicit vertex_iterator(typename vertices_list_type::const_iterator it) + : super(it) { } + vertex_descriptor dereference() const { + /* :( const_cast makes me sad but constness is defined by the graph + * parameter of bgl api calls */ + return vertex_descriptor( + const_cast(&*super::base())); + } + }; + + class edge_iterator : public boost::iterator_facade< + edge_iterator, + edge_descriptor, + boost::forward_traversal_tag, /* TODO: make bidi */ + edge_descriptor> { + public: + using main_base_iter_type = vertex_iterator; + using aux_base_iter_type = out_edge_iterator; + + edge_iterator(main_base_iter_type b, main_base_iter_type e) + : main(std::move(b)), main_end(std::move(e)) { + if (main == main_end) { + return; + } + std::tie(aux, aux_end) = out_edges_impl(*main); + while (aux == aux_end) { + ++main; + if (main == main_end) { + break; + } + std::tie(aux, aux_end) = out_edges_impl(*main); + } + } + edge_iterator() { } + + friend class boost::iterator_core_access; + void increment() { + ++aux; + while (aux == aux_end) { + ++main; + if (main == main_end) { + break; + } + std::tie(aux, aux_end) = out_edges_impl(*main); + } + } + bool equal(const edge_iterator &other) const { + return main == other.main && (main == main_end || aux == other.aux); + } + edge_descriptor dereference() const { + return *aux; + } + + main_base_iter_type main; + main_base_iter_type main_end; + aux_base_iter_type aux; + aux_base_iter_type aux_end; + }; + +public: + static + vertex_descriptor null_vertex() { return vertex_descriptor(); } + + vertex_descriptor add_vertex_impl() { + vertex_node *v = new vertex_node(new_serial()); + v->props.index = next_vertex_index++; + vertices_list.push_back(*v); + return vertex_descriptor(v); + } + + void remove_vertex_impl(vertex_descriptor v) { + vertex_node *vv = v.raw(); + assert(vv->in_edge_list.empty()); + assert(vv->out_edge_list.empty()); + vertices_list.erase_and_dispose(vertices_list.iterator_to(*vv), + delete_disposer()); + } + + void clear_in_edges_impl(vertex_descriptor v) { + graph_edge_count -= v.raw()->in_edge_list.size(); + v.raw()->in_edge_list.clear_and_dispose(in_edge_disposer()); + } + + void clear_out_edges_impl(vertex_descriptor v) { + graph_edge_count -= v.raw()->out_edge_list.size(); + v.raw()->out_edge_list.clear_and_dispose(out_edge_disposer()); + } + + /* IncidenceGraph concept functions */ + + static + vertex_descriptor source_impl(edge_descriptor e) { + return vertex_descriptor(e.raw()->source); + } + + static + vertex_descriptor target_impl(edge_descriptor e) { + return vertex_descriptor(e.raw()->target); + } + + static + degree_size_type out_degree_impl(vertex_descriptor v) { + return v.raw()->out_edge_list.size(); + } + + static + std::pair + out_edges_impl(vertex_descriptor v) { + return {out_edge_iterator(v.raw()->out_edge_list.begin()), + out_edge_iterator(v.raw()->out_edge_list.end())}; + } + + /* BidirectionalGraph concept functions */ + + static + degree_size_type in_degree_impl(vertex_descriptor v) { + return v.raw()->in_edge_list.size(); + } + + static + std::pair + in_edges_impl(vertex_descriptor v) { + return {in_edge_iterator(v.raw()->in_edge_list.begin()), + in_edge_iterator(v.raw()->in_edge_list.end())}; + } + + /* Note: this is defined so that self loops are counted twice - which may or + * may not be what you want. Actually, you probably don't want this at + * all. */ + static + degree_size_type degree_impl(vertex_descriptor v) { + return in_degree_impl(v) + out_degree_impl(v); + } + + /* AdjacencyList concept functions */ + + static + std::pair + adjacent_vertices_impl(vertex_descriptor v) { + auto out_edge_its = out_edges_impl(v); + return {adjacency_iterator(out_edge_its.first), + adjacency_iterator(out_edge_its.second)}; + } + + /* AdjacencyMatrix concept functions + * (Note: complexity guarantee is not met) */ + + std::pair edge_impl(vertex_descriptor u, + vertex_descriptor v) const { + if (in_degree_impl(v) < out_degree_impl(u)) { + for (const edge_descriptor &e : in_edges_range(v, *this)) { + if (source_impl(e) == u) { + return {e, true}; + } + } + } else { + for (const edge_descriptor &e : out_edges_range(u, *this)) { + if (target_impl(e) == v) { + return {e, true}; + } + } + } + + return {edge_descriptor(), false}; + } + + /* Misc functions that don't actually seem to belong to a formal BGL + concept. */ + static + edge_descriptor null_edge() { return edge_descriptor(); } + + static + std::pair + inv_adjacent_vertices_impl(vertex_descriptor v) { + auto in_edge_its = in_edges_impl(v); + return {inv_adjacency_iterator(in_edge_its.first), + inv_adjacency_iterator(in_edge_its.second)}; + } + + /* MutableGraph concept functions */ + + std::pair + add_edge_impl(vertex_descriptor u, vertex_descriptor v) { + bool added = true; /* we always allow parallel edges */ + edge_node *e = new edge_node(new_serial()); + e->source = u.raw(); + e->target = v.raw(); + e->props.index = next_edge_index++; + + u.raw()->out_edge_list.push_back(*e); + v.raw()->in_edge_list.push_back(*e); + + graph_edge_count++; + return {edge_descriptor(e), added}; + } + + void remove_edge_impl(edge_descriptor e) { + graph_edge_count--; + + vertex_node *u = e.raw()->source; + vertex_node *v = e.raw()->target; + + v->in_edge_list.erase(v->in_edge_list.iterator_to(*e.raw())); + u->out_edge_list.erase(u->out_edge_list.iterator_to(*e.raw())); + + delete e.raw(); + } + + template + void remove_out_edge_if_impl(vertex_descriptor v, Predicate pred) { + out_edge_iterator it, ite; + std::tie(it, ite) = out_edges_impl(v); + while (it != ite) { + auto jt = it; + ++it; + if (pred(*jt)) { + this->remove_edge_impl(*jt); + } + } + } + + template + void remove_in_edge_if_impl(vertex_descriptor v, Predicate pred) { + in_edge_iterator it, ite; + std::tie(it, ite) = in_edges_impl(v); + while (it != ite) { + auto jt = it; + ++it; + if (pred(*jt)) { + remove_edge_impl(*jt); + } + } + } + + template + void remove_edge_if_impl(Predicate pred) { + edge_iterator it, ite; + std::tie(it, ite) = edges_impl(); + while (it != ite) { + auto jt = it; + ++it; + if (pred(*jt)) { + remove_edge_impl(*jt); + } + } + } + +private: + /* GCC 4.8 has bugs with lambdas in templated friend functions, so: */ + struct source_match { + explicit source_match(const vertex_descriptor &uu) : u(uu) { } + bool operator()(edge_descriptor e) const { return source_impl(e) == u; } + const vertex_descriptor &u; + }; + + struct target_match { + explicit target_match(const vertex_descriptor &vv) : v(vv) { } + bool operator()(edge_descriptor e) const { return target_impl(e) == v; } + const vertex_descriptor &v; + }; +public: + /* Note: (u,v) variant needs to remove all (parallel) edges between (u,v). + * + * The edge_descriptor version should be strongly preferred if the + * edge_descriptor is available. + */ + void remove_edge_impl(const vertex_descriptor &u, + const vertex_descriptor &v) { + if (in_degree_impl(v) < out_degree_impl(u)) { + remove_in_edge_if_impl(v, source_match(u)); + } else { + remove_out_edge_if_impl(u, target_match(v)); + } + } + + /* VertexListGraph concept functions */ + vertices_size_type num_vertices_impl() const { + return vertices_list.size(); + } + + std::pair vertices_impl() const { + return {vertex_iterator(vertices_list.begin()), + vertex_iterator(vertices_list.end())}; + } + + /* EdgeListGraph concept functions (aside from those in IncidenceGraph) */ + + edges_size_type num_edges_impl() const { + return graph_edge_count; + } + + std::pair edges_impl() const { + vertex_iterator vi, ve; + std::tie(vi, ve) = vertices_impl(); + + return {edge_iterator(vi, ve), edge_iterator(ve, ve)}; + } + + /* bundled properties functions */ + + vertex_property_type &operator[](vertex_descriptor v) { + return v.raw()->props; + } + + const vertex_property_type &operator[](vertex_descriptor v) const { + return v.raw()->props; + } + + edge_property_type &operator[](edge_descriptor e) { + return e.raw()->props; + } + + const edge_property_type &operator[](edge_descriptor e) const { + return e.raw()->props; + } + + /* PropertyGraph concept functions & helpers */ + + template + struct prop_map : public boost::put_get_helper > { + using value_type = typename std::decay::type; + using reference = R; + using key_type = typename bundle_key_type::type; + + typedef typename boost::lvalue_property_map_tag category; + + prop_map(value_type P_of::*m_in) : member(m_in) { } + + reference operator[](key_type k) const { + return k.raw()->props.*member; + } + reference operator()(key_type k) const { return (*this)[k]; } + + private: + value_type P_of::*member; + }; + + template + struct prop_map_all : public boost::put_get_helper > { + using value_type = typename std::decay::type; + using reference = R; + using key_type = typename bundle_key_type::type; + + typedef typename boost::lvalue_property_map_tag category; + + reference operator[](key_type k) const { + return k.raw()->props; + } + reference operator()(key_type k) const { return (*this)[k]; } + }; + + template + friend + prop_map get(P_type P_of::*t, Graph &) { + return prop_map(t); + } + + template + friend + prop_map get(P_type P_of::*t, const Graph &) { + return prop_map(t); + } + + /* We can't seem to use auto/decltype returns here as it seems that the + * templated member functions are not yet visible when the compile is + * evaluating the decltype for the return value. We could probably work + * around it by making this a dummy templated function. */ + friend + prop_map + get(boost::vertex_index_t, Graph &g) { + return get(&VertexPropertyType::index, g); + } + + friend + prop_map + get(boost::vertex_index_t, const Graph &g) { + return get(&VertexPropertyType::index, g); + } + + friend + prop_map + get(boost::edge_index_t, Graph &g) { + return get(&EdgePropertyType::index, g); + } + + friend + prop_map + get(boost::edge_index_t, const Graph &g) { + return get(&EdgePropertyType::index, g); + } + + friend + prop_map_all get(boost::vertex_all_t, Graph &) { + return {}; + } + + friend + prop_map_all get(boost::vertex_all_t, + const Graph &) { + return {}; + } + + friend + prop_map_all get(boost::edge_all_t, Graph &) { + return {}; + } + + friend + prop_map_all get(boost::edge_all_t, + const Graph &) { + return {}; + } + + friend + prop_map_all get(boost::vertex_bundle_t, Graph &) { + return {}; + } + + friend + prop_map_all get(boost::vertex_bundle_t, + const Graph &) { + return {}; + } + + friend + prop_map_all get(boost::edge_bundle_t, Graph &) { + return {}; + } + + friend + prop_map_all get(boost::edge_bundle_t, + const Graph &) { + return {}; + } + + template + friend + auto get(Prop p, Graph &g, K key) -> decltype(get(p, g)[key]) { + return get(p, g)[key]; + } + + template + friend + auto get(Prop p, const Graph &g, K key) -> decltype(get(p, g)[key]) { + return get(p, g)[key]; + } + + template + friend + void put(Prop p, Graph &g, K key, const V &value) { + get(p, g)[key] = value; + } + + /* MutablePropertyGraph concept functions */ + + /* Note: add_vertex(g, vp) allocates a next index value for the vertex + * rather than using the index in vp. i.e., except for in rare coincidences: + * g[add_vertex(g, vp)].index != vp.index + */ + vertex_descriptor add_vertex_impl(const VertexPropertyType &vp) { + vertex_descriptor v = add_vertex_impl(); + auto i = (*this)[v].index; + (*this)[v] = vp; + (*this)[v].index = i; + + return v; + } + + /* Note: add_edge(u, v, g, vp) allocates a next index value for the edge + * rather than using the index in ep. i.e., except for in rare coincidences: + * g[add_edge(u, v, g, ep)].index != ep.index + */ + std::pair + add_edge_impl(vertex_descriptor u, vertex_descriptor v, + const EdgePropertyType &ep) { + auto e = add_edge_impl(u, v); + auto i = (*this)[e.first].index; + (*this)[e.first] = ep; + (*this)[e.first].index = i; + + return e; + } + + /* End MutablePropertyGraph */ + + /** Pack the edge index into a contiguous range [ 0, num_edges(g) ). */ + void renumber_edges_impl() { + next_edge_index = 0; + edge_iterator it; + edge_iterator ite; + for (std::tie(it, ite) = edges_impl(); it != ite; ++it) { + (*this)[*it].index = next_edge_index++; + } + } + + /** Pack the vertex index into a contiguous range [ 0, num_vertices(g) ). + * Vertices with indices less than N_SPECIAL_VERTICES are not renumbered. + */ + void renumber_vertices_impl() { + DEBUG_PRINTF("renumbering above %zu\n", Graph::N_SPECIAL_VERTICES); + next_vertex_index = Graph::N_SPECIAL_VERTICES; + vertex_iterator it; + vertex_iterator ite; + for (std::tie(it, ite) = vertices_impl(); it != ite; ++it) { + if ((*this)[*it].index < Graph::N_SPECIAL_VERTICES) { + continue; + } + + (*this)[*it].index = next_vertex_index++; + } + } + + /** Returns what the next allocated vertex index will be. This is an upper + * on the values of index for vertices (vertex removal means that there may + * be gaps). */ + vertices_size_type vertex_index_upper_bound_impl() const { + return next_vertex_index; + } + + /** Returns what the next allocated edge index will be. This is an upper on + * the values of index for edges (edge removal means that there may be + * gaps). */ + vertices_size_type edge_index_upper_bound_impl() const { + return next_edge_index; + } + + using directed_category = boost::directed_tag; + using edge_parallel_category = boost::allow_parallel_edge_tag; + struct traversal_category : + public virtual boost::bidirectional_graph_tag, + public virtual boost::adjacency_graph_tag, + public virtual boost::vertex_list_graph_tag, + public virtual boost::edge_list_graph_tag { }; + + ue2_graph() = default; + + ue2_graph(ue2_graph &&old) + : next_vertex_index(old.next_vertex_index), + next_edge_index(old.next_edge_index), + graph_edge_count(old.graph_edge_count), + next_serial(old.next_serial) { + using std::swap; + swap(vertices_list, old.vertices_list); + } + + ue2_graph &operator=(ue2_graph &&old) { + next_vertex_index = old.next_vertex_index; + next_edge_index = old.next_edge_index; + graph_edge_count = old.graph_edge_count; + next_serial = old.next_serial; + using std::swap; + swap(vertices_list, old.vertices_list); + return *this; + } + + ~ue2_graph() { + vertices_list.clear_and_dispose(delete_disposer()); + } +}; + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::vertex_descriptor>::type +add_vertex(Graph &g) { + return g.add_vertex_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +remove_vertex(typename Graph::vertex_descriptor v, Graph &g) { + g.remove_vertex_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +clear_in_edges(typename Graph::vertex_descriptor v, Graph &g) { + g.clear_in_edges_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +clear_out_edges(typename Graph::vertex_descriptor v, Graph &g) { + g.clear_out_edges_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +clear_vertex(typename Graph::vertex_descriptor v, Graph &g) { + g.clear_in_edges_impl(v); + g.clear_out_edges_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::vertex_descriptor>::type +source(typename Graph::edge_descriptor e, const Graph &) { + return Graph::source_impl(e); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::vertex_descriptor>::type +target(typename Graph::edge_descriptor e, const Graph &) { + return Graph::target_impl(e); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::degree_size_type>::type +out_degree(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::out_degree_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +out_edges(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::out_edges_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::degree_size_type>::type +in_degree(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::in_degree_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +in_edges(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::in_edges_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::degree_size_type>::type +degree(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::degree_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +adjacent_vertices(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::adjacent_vertices_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +edge(typename Graph::vertex_descriptor u, typename Graph::vertex_descriptor v, + const Graph &g) { + return g.edge_impl(u, v); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +inv_adjacent_vertices(typename Graph::vertex_descriptor v, const Graph &) { + return Graph::inv_adjacent_vertices_impl(v); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +add_edge(typename Graph::vertex_descriptor u, + typename Graph::vertex_descriptor v, Graph &g) { + return g.add_edge_impl(u, v); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +remove_edge(typename Graph::edge_descriptor e, Graph &g) { + g.remove_edge_impl(e); +} + +template +typename std::enable_if< + !std::is_convertible::value + && std::is_base_of::value>::type +remove_edge(Iter it, Graph &g) { + g.remove_edge_impl(*it); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +remove_out_edge_if(typename Graph::vertex_descriptor v, Predicate pred, + Graph &g) { + g.remove_out_edge_if_impl(v, pred); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +remove_in_edge_if(typename Graph::vertex_descriptor v, Predicate pred, + Graph &g) { + g.remove_in_edge_if_impl(v, pred); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +remove_edge_if(Predicate pred, Graph &g) { + g.remove_edge_if_impl(pred); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +remove_edge(const typename Graph::vertex_descriptor &u, + const typename Graph::vertex_descriptor &v, Graph &g) { + g.remove_edge_impl(u, v); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::vertices_size_type>::type +num_vertices(const Graph &g) { + return g.num_vertices_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +vertices(const Graph &g) { + return g.vertices_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::edges_size_type>::type +num_edges(const Graph &g) { + return g.num_edges_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +edges(const Graph &g) { + return g.edges_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::vertex_descriptor>::type +add_vertex(const typename Graph::vertex_property_type &vp, Graph &g) { + return g.add_vertex_impl(vp); +} + +template +typename std::enable_if< + std::is_base_of::value, + std::pair>::type +add_edge(typename Graph::vertex_descriptor u, + typename Graph::vertex_descriptor v, + const typename Graph::edge_property_type &ep, Graph &g) { + return g.add_edge_impl(u, v, ep); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +renumber_edges(Graph &g) { + g.renumber_edges_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value>::type +renumber_vertices(Graph &g) { + g.renumber_vertices_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::vertices_size_type>::type +vertex_index_upper_bound(const Graph &g) { + return g.vertex_index_upper_bound_impl(); +} + +template +typename std::enable_if< + std::is_base_of::value, + typename Graph::edges_size_type>::type +edge_index_upper_bound(const Graph &g) { + return g.edge_index_upper_bound_impl(); +} + +using boost::vertex_index; +using boost::edge_index; + +} + +namespace boost { + +/* Install partial specialisation of property_map - this is required for + * adaptors (like filtered_graph) to know the type of the property maps */ +template +struct property_map::value + >::type > { + typedef decltype(get(std::declval(), + std::declval())) type; + typedef decltype(get(std::declval(), + std::declval())) const_type; +}; + +} +#endif diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 3c7be473..08b6a544 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -55,6 +55,29 @@ size_t maxStringSelfOverlap(const std::string &a, bool nocase); /// Compares two strings, returns non-zero if they're different. u32 cmp(const char *a, const char *b, size_t len, bool nocase); +/** + * \brief String type that also records whether the whole string is caseful or + * caseless. + * + * You should use \ref ue2_literal if you need to represent a mixed-case + * literal. + */ +struct ue2_case_string { + ue2_case_string(std::string s_in, bool nocase_in) + : s(std::move(s_in)), nocase(nocase_in) { + if (nocase) { + upperString(s); + } + } + + bool operator==(const ue2_case_string &other) const { + return s == other.s && nocase == other.nocase; + } + + std::string s; + bool nocase; +}; + struct ue2_literal { public: /// Single element proxy, pointed to by our const_iterator. diff --git a/src/util/uniform_ops.h b/src/util/uniform_ops.h index 0619c7e4..3385e441 100644 --- a/src/util/uniform_ops.h +++ b/src/util/uniform_ops.h @@ -180,44 +180,52 @@ #define partial_load_m384(ptr, sz) loadbytes384(ptr, sz) #define partial_load_m512(ptr, sz) loadbytes512(ptr, sz) -#define store_compressed_u32(ptr, x, m) storecompressed32(ptr, x, m) -#define store_compressed_u64a(ptr, x, m) storecompressed64(ptr, x, m) -#define store_compressed_m128(ptr, x, m) storecompressed128(ptr, x, m) -#define store_compressed_m256(ptr, x, m) storecompressed256(ptr, x, m) -#define store_compressed_m384(ptr, x, m) storecompressed384(ptr, x, m) -#define store_compressed_m512(ptr, x, m) storecompressed512(ptr, x, m) +#define store_compressed_u32(ptr, x, m, len) storecompressed32(ptr, x, m, len) +#define store_compressed_u64a(ptr, x, m, len) storecompressed64(ptr, x, m, len) +#define store_compressed_m128(ptr, x, m, len) storecompressed128(ptr, x, m, len) +#define store_compressed_m256(ptr, x, m, len) storecompressed256(ptr, x, m, len) +#define store_compressed_m384(ptr, x, m, len) storecompressed384(ptr, x, m, len) +#define store_compressed_m512(ptr, x, m, len) storecompressed512(ptr, x, m, len) -#define load_compressed_u32(x, ptr, m) loadcompressed32(x, ptr, m) -#define load_compressed_u64a(x, ptr, m) loadcompressed64(x, ptr, m) -#define load_compressed_m128(x, ptr, m) loadcompressed128(x, ptr, m) -#define load_compressed_m256(x, ptr, m) loadcompressed256(x, ptr, m) -#define load_compressed_m384(x, ptr, m) loadcompressed384(x, ptr, m) -#define load_compressed_m512(x, ptr, m) loadcompressed512(x, ptr, m) +#define load_compressed_u32(x, ptr, m, len) loadcompressed32(x, ptr, m, len) +#define load_compressed_u64a(x, ptr, m, len) loadcompressed64(x, ptr, m, len) +#define load_compressed_m128(x, ptr, m, len) loadcompressed128(x, ptr, m, len) +#define load_compressed_m256(x, ptr, m, len) loadcompressed256(x, ptr, m, len) +#define load_compressed_m384(x, ptr, m, len) loadcompressed384(x, ptr, m, len) +#define load_compressed_m512(x, ptr, m, len) loadcompressed512(x, ptr, m, len) -static really_inline void clearbit_u32(u32 *p, u32 n) { +static really_inline +void clearbit_u32(u32 *p, u32 n) { assert(n < sizeof(*p) * 8); *p &= ~(1U << n); } -static really_inline void clearbit_u64a(u64a *p, u32 n) { + +static really_inline +void clearbit_u64a(u64a *p, u32 n) { assert(n < sizeof(*p) * 8); *p &= ~(1ULL << n); } + #define clearbit_m128(ptr, n) (clearbit128(ptr, n)) #define clearbit_m256(ptr, n) (clearbit256(ptr, n)) #define clearbit_m384(ptr, n) (clearbit384(ptr, n)) #define clearbit_m512(ptr, n) (clearbit512(ptr, n)) -static really_inline char testbit_u32(const u32 *p, u32 n) { - assert(n < sizeof(*p) * 8); - return !!(*p & (1U << n)); +static really_inline +char testbit_u32(u32 val, u32 n) { + assert(n < sizeof(val) * 8); + return !!(val & (1U << n)); } -static really_inline char testbit_u64a(const u64a *p, u32 n) { - assert(n < sizeof(*p) * 8); - return !!(*p & (1ULL << n)); + +static really_inline +char testbit_u64a(u64a val, u32 n) { + assert(n < sizeof(val) * 8); + return !!(val & (1ULL << n)); } -#define testbit_m128(ptr, n) (testbit128(ptr, n)) -#define testbit_m256(ptr, n) (testbit256(ptr, n)) -#define testbit_m384(ptr, n) (testbit384(ptr, n)) -#define testbit_m512(ptr, n) (testbit512(ptr, n)) + +#define testbit_m128(val, n) (testbit128(val, n)) +#define testbit_m256(val, n) (testbit256(val, n)) +#define testbit_m384(val, n) (testbit384(val, n)) +#define testbit_m512(val, n) (testbit512(val, n)) #endif diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt new file mode 100644 index 00000000..049fd368 --- /dev/null +++ b/tools/CMakeLists.txt @@ -0,0 +1,19 @@ +find_package(Threads) + +# remove some warnings +if(CMAKE_CXX_FLAGS MATCHES "-Wmissing-declarations" ) + string(REPLACE "-Wmissing-declarations" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +endif() + +include_directories(${PROJECT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) +include_directories(${PROJECT_SOURCE_DIR}/util) + +# add any subdir with a cmake file +file(GLOB dirents RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *) +foreach(e ${dirents}) + if(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${e} AND + EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${e}/CMakeLists.txt) + add_subdirectory(${e}) + endif () +endforeach () diff --git a/tools/hsbench/CMakeLists.txt b/tools/hsbench/CMakeLists.txt new file mode 100644 index 00000000..25a833d0 --- /dev/null +++ b/tools/hsbench/CMakeLists.txt @@ -0,0 +1,36 @@ +include (${CMAKE_MODULE_PATH}/sqlite3.cmake) + +if (NOT XCODE) + include_directories(SYSTEM ${SQLITE3_INCLUDE_DIRS}) +else() + # cmake doesn't think Xcode supports isystem + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${SQLITE3_INCLUDE_DIRS}") +endif() + +CHECK_FUNCTION_EXISTS(malloc_info HAVE_MALLOC_INFO) +CHECK_FUNCTION_EXISTS(shmget HAVE_SHMGET) +set(HAVE_SHMGET ${HAVE_SHMGET} CACHE BOOL "shmget()") + +# only set these after all tests are done +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") + + +SET(hsbench_SOURCES + common.h + data_corpus.cpp + data_corpus.h + engine_hyperscan.cpp + engine_hyperscan.h + heapstats.cpp + heapstats.h + huge.cpp + huge.h + main.cpp + thread_barrier.h + timer.h +) + +add_executable(hsbench ${hsbench_SOURCES}) +target_link_libraries(hsbench hs databaseutil expressionutil ${SQLITE3_LDFLAGS} + ${CMAKE_THREAD_LIBS_INIT}) diff --git a/tools/hsbench/README.md b/tools/hsbench/README.md new file mode 100644 index 00000000..344a6c00 --- /dev/null +++ b/tools/hsbench/README.md @@ -0,0 +1,8 @@ +Hyperscan Benchmarker: hsbench +============================== + +The `hsbench` tool provides an easy way to measure Hyperscan's performance +for a particular set of patterns and corpus of data to be scanned. + +Documentation describing its operation is available in the Tools section of the +[Developer Reference Guide](http://01org.github.io/hyperscan/dev-reference/). diff --git a/tools/hsbench/common.h b/tools/hsbench/common.h new file mode 100644 index 00000000..a4d60021 --- /dev/null +++ b/tools/hsbench/common.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef COMMON_H +#define COMMON_H + +#include + +enum class ScanMode { BLOCK, STREAMING, VECTORED }; + +extern bool echo_matches; +extern bool saveDatabases; +extern bool loadDatabases; +extern std::string serializePath; +extern unsigned int somPrecisionMode; + +#endif // COMMON_H diff --git a/tools/hsbench/data_corpus.cpp b/tools/hsbench/data_corpus.cpp new file mode 100644 index 00000000..55bfe93a --- /dev/null +++ b/tools/hsbench/data_corpus.cpp @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "data_corpus.h" + +#include "util/container.h" +#include "ue2common.h" + +#include +#include +#include +#include +#include + +#include + +using namespace std; +using namespace ue2; + +static +void readRow(sqlite3_stmt *statement, vector &blocks, + map &stream_indices) { + unsigned int id = sqlite3_column_int(statement, 0); + unsigned int stream_id = sqlite3_column_int(statement, 1); + const char *blob = (const char *)sqlite3_column_blob(statement, 2); + unsigned int bytes = sqlite3_column_bytes(statement, 2); + + if (!contains(stream_indices, stream_id)) { + unsigned int internal_stream_index = stream_indices.size(); + stream_indices[stream_id] = internal_stream_index; + } + auto internal_stream_index = stream_indices[stream_id]; + + assert(blob || bytes > 0); + blocks.emplace_back(id, stream_id, internal_stream_index, + string(blob, blob + bytes)); +} + +vector readCorpus(const string &filename) { + int status; + sqlite3 *db = nullptr; + + status = sqlite3_open_v2(filename.c_str(), &db, SQLITE_OPEN_READONLY, + nullptr); + + assert(db); + if (status != SQLITE_OK) { + ostringstream err; + err << "Unable to open database '" << filename << "': " + << sqlite3_errmsg(db); + status = sqlite3_close(db); + assert(status == SQLITE_OK); + throw DataCorpusError(err.str()); + } + + static const string query("SELECT id, stream_id, data " + "FROM chunk ORDER BY id;"); + + sqlite3_stmt *statement = nullptr; + + status = sqlite3_prepare_v2(db, query.c_str(), query.size(), &statement, + nullptr); + if (status != SQLITE_OK) { + status = sqlite3_finalize(statement); + assert(status == SQLITE_OK); + status = sqlite3_close(db); + assert(status == SQLITE_OK); + + ostringstream oss; + oss << "Query failed: " << query; + throw DataCorpusError(oss.str()); + } + + vector blocks; + map stream_indices; + + status = sqlite3_step(statement); + while (status == SQLITE_ROW) { + readRow(statement, blocks, stream_indices); + status = sqlite3_step(statement); + } + + if (status != SQLITE_DONE) { + ostringstream oss; + oss << "Error retrieving blocks from corpus: " + << sqlite3_errstr(status); + + status = sqlite3_finalize(statement); + assert(status == SQLITE_OK); + status = sqlite3_close(db); + assert(status == SQLITE_OK); + + throw DataCorpusError(oss.str()); + } + + status = sqlite3_finalize(statement); + assert(status == SQLITE_OK); + status = sqlite3_close(db); + assert(status == SQLITE_OK); + + if (blocks.empty()) { + throw DataCorpusError("Database contains no blocks."); + } + + return blocks; +} diff --git a/tools/hsbench/data_corpus.h b/tools/hsbench/data_corpus.h new file mode 100644 index 00000000..91a87acc --- /dev/null +++ b/tools/hsbench/data_corpus.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DATACORPUS_H +#define DATACORPUS_H + +#include +#include + +class DataBlock { +public: + DataBlock(unsigned int in_id, unsigned int in_stream, + unsigned int int_stream_index_in, std::string in_data) + : id(in_id), stream_id(in_stream), + internal_stream_index(int_stream_index_in), + payload(std::move(in_data)) {} + + unsigned int id; // unique block identifier + unsigned int stream_id; // unique stream identifier (from corpus file) + unsigned int internal_stream_index; /* dense index for this stream + * (allocated by hsbench) */ + std::string payload; // actual block payload +}; + +/** Exception thrown if an error occurs. */ +class DataCorpusError { +public: + explicit DataCorpusError(std::string msg_in) : msg(std::move(msg_in)) {} + std::string msg; +}; + +/** + * Interface to a corpus database. Any error will produce a DataCorpusError + * and should be considered fatal. + */ +std::vector readCorpus(const std::string &filename); + +#endif // DATACORPUS_H diff --git a/tools/hsbench/engine_hyperscan.cpp b/tools/hsbench/engine_hyperscan.cpp new file mode 100644 index 00000000..f5abb9fa --- /dev/null +++ b/tools/hsbench/engine_hyperscan.cpp @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "ExpressionParser.h" +#include "common.h" +#include "engine_hyperscan.h" +#include "expressions.h" +#include "heapstats.h" +#include "huge.h" +#include "timer.h" + +#include "crc32.h" +#include "database.h" +#include "hs_compile.h" +#include "hs_internal.h" +#include "hs_runtime.h" +#include "util/database_util.h" +#include "util/make_unique.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +EngineContext::EngineContext(const hs_database_t *db) { + hs_alloc_scratch(db, &scratch); + assert(scratch); +} + +EngineContext::~EngineContext() { + hs_free_scratch(scratch); +} + +namespace /* anonymous */ { + +/** Scan context structure passed to the onMatch callback function. */ +struct ScanContext { + ScanContext(unsigned id_in, ResultEntry &result_in, + const EngineStream *stream_in) + : id(id_in), result(result_in), stream(stream_in) {} + unsigned id; + ResultEntry &result; + const EngineStream *stream; // nullptr except in streaming mode. +}; + +} // namespace + +/** + * Callback function called for every match that Hyperscan produces, used when + * "echo matches" is off. + */ +static +int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int, + void *ctx) { + ScanContext *sc = static_cast(ctx); + assert(sc); + sc->result.matches++; + + return 0; +} + +/** + * Callback function called for every match that Hyperscan produces when "echo + * matches" is enabled. + */ +static +int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to, + unsigned int, void *ctx) { + ScanContext *sc = static_cast(ctx); + assert(sc); + sc->result.matches++; + + if (sc->stream) { + printf("Match @%u:%u:%llu for %u\n", sc->stream->sn, sc->id, to, id); + } else { + printf("Match @%u:%llu for %u\n", sc->id, to, id); + } + + return 0; +} + +EngineHyperscan::EngineHyperscan(hs_database_t *db_in) : db(db_in) { + assert(db); +} + +EngineHyperscan::~EngineHyperscan() { + release_huge(db); +} + +unique_ptr EngineHyperscan::makeContext() const { + return ue2::make_unique(db); +} + +void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id, + ResultEntry &result, EngineContext &ctx) const { + assert(data); + + ScanContext sc(id, result, nullptr); + auto callback = echo_matches ? onMatchEcho : onMatch; + hs_error_t rv = hs_scan(db, data, len, 0, ctx.scratch, callback, &sc); + + if (rv != HS_SUCCESS) { + printf("Fatal error: hs_scan returned error %d\n", rv); + abort(); + } +} + +void EngineHyperscan::scan_vectored(const char *const *data, + const unsigned int *len, unsigned int count, + unsigned streamId, ResultEntry &result, + EngineContext &ctx) const { + assert(data); + assert(len); + + ScanContext sc(streamId, result, nullptr); + auto callback = echo_matches ? onMatchEcho : onMatch; + hs_error_t rv = + hs_scan_vector(db, data, len, count, 0, ctx.scratch, callback, &sc); + + if (rv != HS_SUCCESS) { + printf("Fatal error: hs_scan_vector returned error %d\n", rv); + abort(); + } +} + +unique_ptr EngineHyperscan::streamOpen(EngineContext &ctx, + unsigned streamId) const { + auto stream = ue2::make_unique(); + stream->ctx = &ctx; + + hs_open_stream(db, 0, &stream->id); + if (!stream->id) { + // an error occurred, propagate to caller + return nullptr; + } + stream->sn = streamId; + return stream; +} + +void EngineHyperscan::streamClose(unique_ptr stream, + ResultEntry &result) const { + assert(stream); + + auto &s = static_cast(*stream); + EngineContext &ctx = *s.ctx; + + ScanContext sc(0, result, &s); + auto callback = echo_matches ? onMatchEcho : onMatch; + + assert(s.id); + hs_close_stream(s.id, ctx.scratch, callback, &sc); + s.id = nullptr; +} + +void EngineHyperscan::streamScan(EngineStream &stream, const char *data, + unsigned len, unsigned id, + ResultEntry &result) const { + assert(data); + + auto &s = static_cast(stream); + EngineContext &ctx = *s.ctx; + + ScanContext sc(id, result, &s); + auto callback = echo_matches ? onMatchEcho : onMatch; + hs_error_t rv = + hs_scan_stream(s.id, data, len, 0, ctx.scratch, callback, &sc); + + if (rv != HS_SUCCESS) { + printf("Fatal error: hs_scan_stream returned error %d\n", rv); + abort(); + } +} + +static +unsigned makeModeFlags(ScanMode scan_mode) { + switch (scan_mode) { + case ScanMode::BLOCK: + return HS_MODE_BLOCK; + case ScanMode::STREAMING: + return HS_MODE_STREAM; + case ScanMode::VECTORED: + return HS_MODE_VECTORED; + } + assert(0); + return HS_MODE_STREAM; +} + +/** + * Hash the settings used to compile a database, returning a string that can be + * used as a filename. + */ +static +string dbSettingsHash(const string &filename, u32 mode) { + ostringstream info_oss; + + info_oss << filename.c_str() << ' '; + info_oss << mode << ' '; + + string info = info_oss.str(); + + u32 crc = Crc32c_ComputeBuf(0, info.data(), info.size()); + + // return STL string with printable version of digest + ostringstream oss; + oss << hex << setw(8) << setfill('0') << crc << dec; + + return oss.str(); +} + +static +string dbFilename(const std::string &name, unsigned mode) { + ostringstream oss; + oss << serializePath << '/' << dbSettingsHash(name, mode) << ".db"; + return oss.str(); +} + +std::unique_ptr +buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode, + const std::string &name, UNUSED const ue2::Grey &grey) { + if (expressions.empty()) { + assert(0); + return nullptr; + } + + long double compileSecs = 0.0; + size_t compiledSize = 0.0; + size_t streamSize = 0; + size_t scratchSize = 0; + unsigned int peakMemorySize = 0; + unsigned int crc = 0; + std::string db_info; + + unsigned int mode = makeModeFlags(scan_mode); + + hs_database_t *db; + hs_error_t err; + + if (loadDatabases) { + db = loadDatabase(dbFilename(name, mode).c_str()); + if (!db) { + return nullptr; + } + } else { + const unsigned int count = expressions.size(); + + vector exprs; + vector flags, ids; + vector ext; + + for (const auto &m : expressions) { + string expr; + unsigned int f = 0; + hs_expr_ext extparam; + extparam.flags = 0; + if (!readExpression(m.second, expr, &f, &extparam)) { + printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(), + m.first); + return nullptr; + } + + exprs.push_back(expr); + ids.push_back(m.first); + flags.push_back(f); + ext.push_back(extparam); + } + + unsigned full_mode = mode; + if (mode == HS_MODE_STREAM) { + full_mode |= somPrecisionMode; + } + + // Our compiler takes an array of plain ol' C strings. + vector patterns(count); + for (unsigned int i = 0; i < count; i++) { + patterns[i] = exprs[i].c_str(); + } + + // Extended parameters are passed as pointers to hs_expr_ext structures. + vector ext_ptr(count); + for (unsigned int i = 0; i < count; i++) { + ext_ptr[i] = &ext[i]; + } + + Timer timer; + timer.start(); + + hs_compile_error_t *compile_err; + +#ifndef RELEASE_BUILD + err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(), + ext_ptr.data(), count, full_mode, nullptr, + &db, &compile_err, grey); +#else + err = hs_compile_ext_multi(patterns.data(), flags.data(), ids.data(), + ext_ptr.data(), count, full_mode, nullptr, + &db, &compile_err); +#endif + + timer.complete(); + compileSecs = timer.seconds(); + peakMemorySize = getPeakHeap(); + + if (err == HS_COMPILER_ERROR) { + if (compile_err->expression >= 0) { + printf("Compile error for signature #%u: %s\n", + compile_err->expression, compile_err->message); + } else { + printf("Compile error: %s\n", compile_err->message); + } + hs_free_compile_error(compile_err); + return nullptr; + } + } + + // copy the db into huge pages (where available) to reduce TLB pressure + db = get_huge(db); + if (!db) { + return nullptr; + } + + err = hs_database_size(db, &compiledSize); + if (err != HS_SUCCESS) { + return nullptr; + } + assert(compiledSize > 0); + + crc = db->crc32; + + if (saveDatabases) { + saveDatabase(db, dbFilename(name, mode).c_str()); + } + + if (mode & HS_MODE_STREAM) { + err = hs_stream_size(db, &streamSize); + if (err != HS_SUCCESS) { + return nullptr; + } + } else { + streamSize = 0; + } + + char *info; + err = hs_database_info(db, &info); + if (err != HS_SUCCESS) { + return nullptr; + } else { + db_info = string(info); + free(info); + } + + // Allocate scratch temporarily to find its size: this is a good test + // anyway. + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + if (err != HS_SUCCESS) { + return nullptr; + } + + err = hs_scratch_size(scratch, &scratchSize); + if (err != HS_SUCCESS) { + return nullptr; + } + hs_free_scratch(scratch); + + // Output summary information. + printf("Signatures: %s\n", name.c_str()); + printf("Hyperscan info: %s\n", db_info.c_str()); + printf("Expression count: %'zu\n", expressions.size()); + printf("Bytecode size: %'zu bytes\n", compiledSize); + printf("Database CRC: 0x%x\n", crc); + if (mode & HS_MODE_STREAM) { + printf("Stream state size: %'zu bytes\n", streamSize); + } + printf("Scratch size: %'zu bytes\n", scratchSize); + printf("Compile time: %'0.3Lf seconds\n", compileSecs); + printf("Peak heap usage: %'u bytes\n", peakMemorySize); + + return ue2::make_unique(db); +} diff --git a/tools/hsbench/engine_hyperscan.h b/tools/hsbench/engine_hyperscan.h new file mode 100644 index 00000000..7875decc --- /dev/null +++ b/tools/hsbench/engine_hyperscan.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ENGINEHYPERSCAN_H +#define ENGINEHYPERSCAN_H + +#include "expressions.h" +#include "common.h" +#include "hs_runtime.h" + +#include + +/** Structure for the result of a single complete scan. */ +struct ResultEntry { + double seconds = 0; //!< Time taken for scan. + unsigned int matches = 0; //!< Count of matches found. +}; + +/** Engine context which is allocated on a per-thread basis. */ +class EngineContext { +public: + explicit EngineContext(const hs_database_t *db); + ~EngineContext(); + + hs_scratch_t *scratch = nullptr; +}; + +/** Streaming mode scans have persistent stream state associated with them. */ +class EngineStream { +public: + hs_stream_t *id; + unsigned int sn; + EngineContext *ctx; +}; + +/** Hyperscan Engine for scanning data. */ +class EngineHyperscan { +public: + explicit EngineHyperscan(hs_database_t *db); + ~EngineHyperscan(); + + std::unique_ptr makeContext() const; + + void scan(const char *data, unsigned int len, unsigned int id, + ResultEntry &result, EngineContext &ctx) const; + + void scan_vectored(const char *const *data, const unsigned int *len, + unsigned int count, unsigned int streamId, + ResultEntry &result, EngineContext &ctx) const; + + std::unique_ptr streamOpen(EngineContext &ctx, + unsigned id) const; + + void streamClose(std::unique_ptr stream, + ResultEntry &result) const; + + void streamScan(EngineStream &stream, const char *data, unsigned int len, + unsigned int id, ResultEntry &result) const; + +private: + hs_database_t *db; +}; + +namespace ue2 { +struct Grey; +} + +std::unique_ptr +buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode, + const std::string &name, const ue2::Grey &grey); + +#endif // ENGINEHYPERSCAN_H diff --git a/tools/hsbench/heapstats.cpp b/tools/hsbench/heapstats.cpp new file mode 100644 index 00000000..d0dffdb3 --- /dev/null +++ b/tools/hsbench/heapstats.cpp @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Peak heap usage code. + * + * At present, we only have an implementation for modern glibc systems, using + * the malloc_info() call. We return zero elsewhere. + */ + +#include "config.h" + +#include "heapstats.h" + +#if defined HAVE_MALLOC_INFO + +#include +#include +#include +#include + +#include + +size_t getPeakHeap(void) { + FILE *tmpf = tmpfile(); + if (!tmpf) { + return 0; + } + + int rv = malloc_info(0, tmpf); + if (rv != 0) { + fclose(tmpf); + return 0; + } + + rewind(tmpf); + + // We don't want to depend on a real XML parser. This is ugly and brittle + // and hopefully good enough for the time being. We look for the last + // system tag with type max, which should be the malloc-wide one. + + static const char begin[] = " +#include +#include +#include + +#include +#include + +using namespace std; + +size_t getPeakHeap(void) { + // Modern Linux kernels write a 'VmPeak' value into /proc/$PID/status. This + // is a reasonable approximation, though it likely includes shared libs and + // the like as well... + ostringstream path; + path << "/proc/" << getpid() << "/status"; + + ifstream f(path.str().c_str()); + if (!f.good()) { + return 0; + } + + const string vmpeak("VmPeak:"); + + string line; + while (getline(f, line)) { + istringstream iss(line, istringstream::in); + string word; + iss >> word; + if (word != vmpeak) { + continue; + } + + // Skip spaces + while (iss.good() && !isdigit(iss.peek())) { + iss.ignore(); + } + + size_t num = 0; + iss >> num; + return num * 1024; + } + + f.close(); + return 0; +} + +#else + +// Stub. +size_t getPeakHeap(void) { + return 0; +} + +#endif diff --git a/src/util/fatbit.c b/tools/hsbench/heapstats.h similarity index 91% rename from src/util/fatbit.c rename to tools/hsbench/heapstats.h index a80c3165..c2c37998 100644 --- a/src/util/fatbit.c +++ b/tools/hsbench/heapstats.h @@ -26,9 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "fatbit.h" -#include "multibit.h" +#ifndef HEAPSTATS_H +#define HEAPSTATS_H -u32 fatbit_size(u32 total_bits) { - return MAX(sizeof(struct fatbit), mmbit_size(total_bits)); -} +#include // for size_t + +size_t getPeakHeap(void); + +#endif diff --git a/tools/hsbench/huge.cpp b/tools/hsbench/huge.cpp new file mode 100644 index 00000000..dbb453b2 --- /dev/null +++ b/tools/hsbench/huge.cpp @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "hs.h" +#include "ue2common.h" + +#include "common.h" +#include "huge.h" + +#ifndef _WIN32 +#include +#include +#include +#include +#include +#include +#include +#if defined(HAVE_SHMGET) +#include +#include +#endif + +UNUSED static int hsdb_shmid; + +using namespace std; + +long gethugepagesize(void); + +hs_database_t *get_huge(hs_database_t *db) { +#if defined(HAVE_SHMGET) && defined(SHM_HUGETLB) + /* move the database to huge pages where possible, but fail politely */ + hs_error_t err; + size_t len; + char *bytes; + + long hpage_size = gethugepagesize(); + if (hpage_size < 0) { + printf("Couldn't determine huge page size\n"); + hsdb_shmid = -1; + return db; + } + + err = hs_serialize_database(db, &bytes, &len); + if (err != HS_SUCCESS) { + printf("Failed to serialize database for copy: %d\n", err); + // this is weird - don't fail gracefully this time + return nullptr; + } + + size_t size; + err = hs_serialized_database_size(bytes, len, &size); + if (err != HS_SUCCESS) { + printf("Failed to get database size: %d\n", err); + // this is weird - don't fail gracefully this time + return nullptr; + } + + void *shmaddr; + if ((hsdb_shmid = shmget(IPC_PRIVATE, ROUNDUP_N(size, gethugepagesize()), + SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) { + // This could fail if the user doesn't have permission to shmget(), + // which is OK. + goto fini; + } + + shmaddr = shmat(hsdb_shmid, nullptr, SHM_RND); + if (shmaddr == (char *)-1) { + perror("Shared memory attach failure"); + goto fini; + } + + // Mark this segment to be destroyed after this process detaches. + shmctl(hsdb_shmid, IPC_RMID, nullptr); + + err = hs_deserialize_database_at(bytes, len, (hs_database_t *)shmaddr); + if (err != HS_SUCCESS) { + printf("Failed to deserialize database into shm: %d\n", err); + shmdt((const void *)shmaddr); + goto fini; + } + + free(bytes); + hs_free_database(db); + return (hs_database_t *)shmaddr; + +fini: + free(bytes); + hsdb_shmid = -1; + return db; +#else + return db; +#endif +} + +void release_huge(hs_database_t *db) { +#if defined(HAVE_SHMGET) && defined(SHM_HUGETLB) + if (hsdb_shmid != -1) { + if (shmdt((const void *)db) != 0) { + perror("Detach failure"); + } + } else { + // fallback + hs_free_database(db); + } +#else + hs_free_database(db); +#endif +} + +#define BUF_SIZE 4096 +static long read_meminfo(const char *tag) { + int fd; + char buf[BUF_SIZE]; + int len; + char *p, *q; + long val; + + fd = open("/proc/meminfo", O_RDONLY); + if (fd < 0) { + perror("Couldn't open /proc/meminfo"); + return -1; + } + + len = read(fd, buf, sizeof(buf)); + close(fd); + if (len < 0) { + perror("Error reading /proc/meminfo"); + return -1; + } + if (len == sizeof(buf)) { + printf("/proc/meminfo is too large\n"); + return -1; + } + buf[len] = '\0'; + + p = strstr(buf, tag); + if (!p) { + return -1; + } + + p += strlen(tag); + val = strtol(p, &q, 0); + if (!isspace(*q)) { + printf("Couldn't parse /proc/meminfo value\n"); + return -1; + } + + return val; +} + +long gethugepagesize(void) { + long hpage_size; + int hpage_kb; + + hpage_kb = read_meminfo("Hugepagesize:"); + if (hpage_kb < 0) { + hpage_size = -1; + } else { + /* convert from kb to bytes */ + hpage_size = 1024 * hpage_kb; + } + + return hpage_size; +} + +#else + +/* No huge page support on WIN32. */ + +hs_database_t *get_huge(hs_database_t *db) { return db; } + +void release_huge(hs_database_t *db) { hs_free_database(db); } + +#endif diff --git a/tools/hsbench/huge.h b/tools/hsbench/huge.h new file mode 100644 index 00000000..da539bd6 --- /dev/null +++ b/tools/hsbench/huge.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HUGE_H +#define HUGE_H + +#include "hs.h" + +hs_database_t *get_huge(hs_database_t *db); +void release_huge(hs_database_t *db); + +#endif /* HUGE_H */ diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp new file mode 100644 index 00000000..4298963b --- /dev/null +++ b/tools/hsbench/main.cpp @@ -0,0 +1,780 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "common.h" +#include "data_corpus.h" +#include "engine_hyperscan.h" +#include "expressions.h" +#include "thread_barrier.h" +#include "timer.h" +#include "util/expression_path.h" +#include "util/string_util.h" + +#include "grey.h" +#include "hs.h" +#include "ue2common.h" +#include "util/make_unique.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#ifndef _WIN32 +#include +#include +#endif + +#include +#include + +using namespace std; +using namespace ue2; +using boost::adaptors::map_keys; + +// Globals common to all files. +bool echo_matches = false; +bool saveDatabases = false; +bool loadDatabases = false; +string serializePath(""); +unsigned int somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE; + +namespace /* anonymous */ { + +// Globals local to this file. +bool display_per_scan = false; +ScanMode scan_mode = ScanMode::STREAMING; +unsigned repeats = 20; +string exprPath(""); +string corpusFile(""); +vector threadCores; +Timer totalTimer; +double totalSecs = 0; + +typedef void (*thread_func_t)(void *context); + +class ThreadContext : boost::noncopyable { +public: + ThreadContext(unsigned num_in, const EngineHyperscan &db_in, + thread_barrier &tb_in, thread_func_t function_in, + vector corpus_data_in) + : num(num_in), results(repeats), engine(db_in), + enginectx(db_in.makeContext()), corpus_data(move(corpus_data_in)), + tb(tb_in), function(function_in) {} + + // Start the thread. + bool start(int cpu) { + thr = thread(function, this); + + // affine if it's asked for + if (cpu >= 0) { + return affine(cpu); + } + return true; + } + + // Wait for the thread to exit. + void join() { + thr.join(); + } + + // Serialise all threads on a global barrier. + void barrier() { + tb.wait(); + } + + // Apply processor affinity (if available) to this thread. + bool affine(UNUSED int cpu) { +#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + assert(cpu >= 0 && cpu < CPU_SETSIZE); + + // The 'clang' compiler complains about an unused result here, so we + // silence it. + (void)CPU_SET(cpu, &cpuset); + + int rv = pthread_setaffinity_np(thr.native_handle(), sizeof(cpuset), + &cpuset); + return (rv == 0); +#endif + return false; // not available + } + + unsigned num; + Timer timer; + vector results; + const EngineHyperscan &engine; + unique_ptr enginectx; + vector corpus_data; + +protected: + thread_barrier &tb; // shared barrier for time sync + thread_func_t function; + thread thr; +}; + +/** Display usage information, with an optional error. */ +static +void usage(const char *error) { + printf("Usage: hsbench [OPTIONS...]\n\n"); + printf("Options:\n\n"); + printf(" -h Display help and exit.\n"); + printf(" -G OVERRIDES Overrides for the grey box.\n"); + printf(" -e PATH Path to expression directory.\n"); + printf(" -s FILE Signature file to use.\n"); + printf(" -z NUM Signature ID to use.\n"); + printf(" -c FILE File to use as corpus.\n"); + printf(" -n NUMBER Repeat scan NUMBER times (default 20).\n"); + printf(" -N Benchmark in block mode" + " (default: streaming).\n"); + printf(" -V Benchmark in vectored mode" + " (default: streaming).\n"); + printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n"); + printf(" -i DIR Don't compile, load from files in DIR" + " instead.\n"); + printf(" -w DIR After compiling, save to files in DIR.\n"); + printf(" -d NUMBER Set SOM precision mode (default: 8 (large)).\n"); + printf("\n"); + printf(" --per-scan Display per-scan Mbit/sec results.\n"); + printf(" --echo-matches Display all matches that occur during scan.\n"); + printf("\n\n"); + + if (error) { + printf("Error: %s\n", error); + } +} + +/** Wraps up a name and the set of signature IDs it refers to. */ +struct BenchmarkSigs { + BenchmarkSigs(string name_in, SignatureSet sigs_in) + : name(move(name_in)), sigs(move(sigs_in)) {} + string name; + SignatureSet sigs; +}; + +/** Process command-line arguments. Prints usage and exits on error. */ +static +void processArgs(int argc, char *argv[], vector &sigSets, + UNUSED Grey &grey) { + const char options[] = "-b:c:Cd:e:G:hi:n:No:p:sT:Vw:z:"; + int in_sigfile = 0; + int do_per_scan = 0; + int do_echo_matches = 0; + vector sigFiles; + + static struct option longopts[] = { + {"per-scan", 0, &do_per_scan, 1}, + {"echo-matches", 0, &do_echo_matches, 1}, + {nullptr, 0, nullptr, 0} + }; + + for (;;) { + int c = getopt_long(argc, argv, options, longopts, nullptr); + if (c < 0) { + break; + } + switch (c) { + case 'c': + corpusFile.assign(optarg); + break; + case 'd': { + unsigned dist; + if (!fromString(optarg, dist)) { + usage("Must provide an integer argument to '-d' flag"); + exit(1); + } + switch (dist) { + case 2: + somPrecisionMode = HS_MODE_SOM_HORIZON_SMALL; + break; + case 4: + somPrecisionMode = HS_MODE_SOM_HORIZON_MEDIUM; + break; + case 8: + somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE; + break; + default: + usage("SOM precision must be 2, 4 or 8"); + exit(1); + } + break; + } + case 'e': + exprPath.assign(optarg); + break; +#ifndef RELEASE_BUILD + case 'G': + applyGreyOverrides(&grey, string(optarg)); + break; +#endif + case 'h': + usage(nullptr); + exit(0); + break; + case 'n': + if (!fromString(optarg, repeats) || repeats == 0) { + usage("Couldn't parse argument to -n flag, should be" + " a positive integer."); + exit(1); + } + break; + case 's': + in_sigfile = 2; + break; + case 'N': + scan_mode = ScanMode::BLOCK; + break; + case 'V': + scan_mode = ScanMode::VECTORED; + break; + case 'T': + if (!strToList(optarg, threadCores)) { + usage("Couldn't parse argument to -T flag, should be" + " a list of positive integers."); + exit(1); + } + break; + case 'z': { + unsigned int sinumber; + if (!fromString(optarg, sinumber)) { + usage("Argument to '-z' flag must be an integer"); + exit(1); + } + SignatureSet sigs = {sinumber}; + sigSets.emplace_back(string("-z ") + optarg, sigs); + break; + } + case 'i': + loadDatabases = true; + serializePath = optarg; + break; + case 'w': + saveDatabases = true; + serializePath = optarg; + break; + case 1: + if (in_sigfile) { + sigFiles.push_back(optarg); + in_sigfile = 2; + break; + } + case 0: + break; + default: + usage("Unrecognised command line argument."); + exit(1); + } + + if (in_sigfile) { + in_sigfile--; + } + } + + if (do_echo_matches) { + echo_matches = true; + } + if (do_per_scan) { + display_per_scan = true; + } + + if (exprPath.empty() && !sigFiles.empty()) { + /* attempt to infer an expression directory */ + auto si = sigFiles.begin(); + exprPath = inferExpressionPath(*si); + for (++si; si != sigFiles.end(); ++si) { + if (exprPath != inferExpressionPath(*si)) { + usage("Unable to infer consistent expression directory"); + exit(1); + } + } + } + + // Must have a valid expression path + if (exprPath.empty()) { + usage("Must specify an expression path with the -e option."); + exit(1); + } + + // Must have valid database to scan + if (corpusFile.empty()) { + usage("Must specify a corpus file with the -c option."); + exit(1); + } + + // Cannot ask for both loading and saving + if (loadDatabases && saveDatabases) { + usage("You cannot both load and save databases."); + exit(1); + } + + // Read in any -s signature sets. + for (const auto &file : sigFiles) { + SignatureSet sigs; + loadSignatureList(file, sigs); + sigSets.emplace_back(file, move(sigs)); + } +} + +/** Start the global timer. */ +static +void startTotalTimer(ThreadContext *ctx) { + if (ctx->num != 0) { + return; // only runs in the first thread + } + totalTimer.start(); +} + +/** Stop the global timer and calculate totals. */ +static +void stopTotalTimer(ThreadContext *ctx) { + if (ctx->num != 0) { + return; // only runs in the first thread + } + totalTimer.complete(); + totalSecs = totalTimer.seconds(); +} + +/** Run a benchmark over a given engine and corpus in block mode. */ +static +void benchBlock(void *context) { + ThreadContext *ctx = (ThreadContext *)context; + + // Synchronization point + ctx->barrier(); + + startTotalTimer(ctx); + + for (ResultEntry &r : ctx->results) { + ctx->timer.start(); + + for (const DataBlock &block : ctx->corpus_data) { + ctx->engine.scan(block.payload.c_str(), block.payload.size(), + block.id, r, *ctx->enginectx); + } + + ctx->timer.complete(); + r.seconds = ctx->timer.seconds(); + } + + // Synchronization point + ctx->barrier(); + + // Now that all threads are finished, we can stop the clock. + stopTotalTimer(ctx); +} + +/** Structure used to represent a stream. */ +struct StreamInfo { + unsigned int stream_id = ~0U; + unsigned int first_block_id = ~0U; + unsigned int last_block_id = 0; + unique_ptr eng_handle; +}; + +static +u64a count_streams(const vector &corpus_blocks) { + set streams; + for (const DataBlock &block : corpus_blocks) { + streams.insert(block.stream_id); + } + + return (u64a)streams.size(); +} + +/** + * Take a ThreadContext and prepare a vector for streaming mode + * scanning from it. + */ +static +vector prepStreamingData(const ThreadContext *ctx) { + vector info(count_streams(ctx->corpus_data)); + for (const DataBlock &block : ctx->corpus_data) { + assert(block.internal_stream_index < info.size()); + StreamInfo &si = info[block.internal_stream_index]; + + /* check if this is the first time we have encountered this stream */ + if (si.first_block_id > si.last_block_id) { + si.stream_id = block.stream_id; + si.first_block_id = block.id; + si.last_block_id = block.id; + } else { + assert(block.stream_id == si.stream_id); + assert(block.id > si.last_block_id); + assert(block.id > si.first_block_id); + si.last_block_id = block.id; + } + } + return info; +} + +static +void benchStreamingInternal(ThreadContext *ctx, vector &streams) { + assert(ctx); + const EngineHyperscan &e = ctx->engine; + const vector &blocks = ctx->corpus_data; + + for (ResultEntry &r : ctx->results) { + ctx->timer.start(); + + for (const auto &b : blocks) { + StreamInfo &stream = streams[b.internal_stream_index]; + assert(stream.stream_id == b.stream_id); + + // If this is the first block in the stream, open the stream + // handle. + if (b.id == stream.first_block_id) { + assert(!stream.eng_handle); + stream.eng_handle = e.streamOpen(*ctx->enginectx, b.stream_id); + if (!stream.eng_handle) { + printf("Fatal error: stream open failed!\n"); + exit(1); + } + } + + assert(stream.eng_handle); + + e.streamScan(*stream.eng_handle, b.payload.c_str(), + b.payload.size(), b.id, r); + + // if this was the last block in the stream, close the stream handle + if (b.id == stream.last_block_id) { + e.streamClose(move(stream.eng_handle), r); + stream.eng_handle = nullptr; + } + } + + ctx->timer.complete(); + r.seconds = ctx->timer.seconds(); + } +} + +/** Run a benchmark over a given engine and corpus in streaming mode. */ +static +void benchStreaming(void *context) { + ThreadContext *ctx = (ThreadContext *)context; + vector streams = prepStreamingData(ctx); + + // Synchronization point + ctx->barrier(); + + startTotalTimer(ctx); + + benchStreamingInternal(ctx, streams); + + // Synchronization point + ctx->barrier(); + + // Now that all threads are finished, we can stop the clock. + stopTotalTimer(ctx); +} + +/** In-memory structure for a data block to be scanned in vectored mode. */ +struct VectoredInfo { + vector data; + vector len; + unsigned int stream_id; +}; + +/** + * Take a ThreadContext and prepare a vector for vectored mode + * scanning from it. + */ +static +vector prepVectorData(const ThreadContext *ctx) { + vector out(count_streams(ctx->corpus_data)); + for (const DataBlock &block : ctx->corpus_data) { + VectoredInfo &vi = out[block.internal_stream_index]; + if (vi.data.empty()) { + vi.stream_id = block.stream_id; + } else { + assert(vi.stream_id == block.stream_id); + } + vi.data.push_back(block.payload.c_str()); + vi.len.push_back(block.payload.size()); + } + + return out; +} + +/** Run a benchmark over a given engine and corpus in vectored mode. */ +static +void benchVectored(void *context) { + ThreadContext *ctx = (ThreadContext *)context; + + vector v_plans = prepVectorData(ctx); + + // Synchronization point + ctx->barrier(); + + startTotalTimer(ctx); + + for (ResultEntry &r : ctx->results) { + ctx->timer.start(); + + for (const VectoredInfo &v_plan : v_plans) { + ctx->engine.scan_vectored(&v_plan.data[0], &v_plan.len[0], + v_plan.data.size(), v_plan.stream_id, r, + *ctx->enginectx); + } + + ctx->timer.complete(); + r.seconds = ctx->timer.seconds(); + } + + // Synchronization point + ctx->barrier(); + + // Now that all threads are finished, we can stop the clock. + stopTotalTimer(ctx); +} + +/** Given a time and a size, compute the throughput in megabits/sec. */ +static +long double calc_mbps(double seconds, u64a bytes) { + assert(seconds > 0); + return (long double)bytes / ((long double)seconds * 125000); +} + +/** Dump per-scan throughput data to screen. */ +static +void displayPerScanResults(const vector> &threads, + u64a bytesPerRun) { + for (const auto &t : threads) { + const auto &results = t->results; + for (size_t j = 0; j != results.size(); j++) { + const auto &r = results[j]; + double mbps = calc_mbps(r.seconds, bytesPerRun); + printf("T %2u Scan %2zu: %'0.2f Mbit/sec\n", t->num, j, mbps); + } + } + printf("\n"); +} + +static +u64a byte_size(const vector &corpus_blocks) { + u64a total = 0; + for (const DataBlock &block : corpus_blocks) { + total += block.payload.size(); + } + + return total; +} + +/** Dump benchmark results to screen. */ +static +void displayResults(const vector> &threads, + const vector &corpus_blocks) { + u64a bytesPerRun = byte_size(corpus_blocks); + u64a matchesPerRun = threads[0]->results[0].matches; + + // Sanity check: all of our results should have the same match count. + for (const auto &t : threads) { + if (!all_of(begin(t->results), end(t->results), + [&matchesPerRun](const ResultEntry &e) { + return e.matches == matchesPerRun; + })) { + printf("\nWARNING: PER-SCAN MATCH COUNTS ARE INCONSISTENT!\n\n"); + break; + } + } + + printf("Time spent scanning: %'0.3f seconds\n", totalSecs); + printf("Corpus size: %'llu bytes ", bytesPerRun); + switch (scan_mode) { + case ScanMode::STREAMING: + printf("(%'zu blocks in %'llu streams)\n", corpus_blocks.size(), + count_streams(corpus_blocks)); + break; + case ScanMode::VECTORED: + printf("(%'zu blocks in %'llu vectors)\n", corpus_blocks.size(), + count_streams(corpus_blocks)); + break; + case ScanMode::BLOCK: + printf("(%'zu blocks)\n", corpus_blocks.size()); + break; + } + + u64a totalBytes = bytesPerRun * repeats * threads.size(); + u64a totalBlocks = corpus_blocks.size() * repeats * threads.size(); + + double matchRate = ((double)matchesPerRun * 1024) / bytesPerRun; + printf("Matches per iteration: %'llu (%'0.3f matches/kilobyte)\n", + matchesPerRun, matchRate); + + double blockRate = (double)totalBlocks / (double)totalSecs; + printf("Overall block rate: %'0.2f blocks/sec\n", blockRate); + printf("Overall throughput: %'0.2Lf Mbit/sec\n", + calc_mbps(totalSecs, totalBytes)); + printf("\n"); + + if (display_per_scan) { + displayPerScanResults(threads, bytesPerRun); + } +} + +/** + * Construct a thread context for this scanning mode. + * + * Note: does not take blocks by reference. This is to give every thread their + * own copy of the data. It would be unrealistic for every thread to be scanning + * the same copy of the data. + */ +static +unique_ptr makeThreadContext(const EngineHyperscan &db, + const vector &blocks, + unsigned id, + thread_barrier &sync_barrier) { + thread_func_t fn = nullptr; + switch (scan_mode) { + case ScanMode::STREAMING: + fn = benchStreaming; + break; + case ScanMode::VECTORED: + fn = benchVectored; + break; + case ScanMode::BLOCK: + fn = benchBlock; + break; + } + assert(fn); + + return ue2::make_unique(id, db, sync_barrier, fn, blocks); +} + +/** Run the given benchmark. */ +static +void runBenchmark(const EngineHyperscan &db, + const vector &corpus_blocks) { + size_t numThreads; + bool useAffinity = false; + + if (threadCores.empty()) { + numThreads = 1; + } else { + numThreads = threadCores.size(); +#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP + useAffinity = true; +#else + useAffinity = false; +#endif + } + + // Initialise a barrier that will let us sync threads before/after scanning + // for timer measurements. + thread_barrier sync_barrier(numThreads); + + vector> threads; + + for (unsigned i = 0; i < numThreads; i++) { + auto t = makeThreadContext(db, corpus_blocks, i, sync_barrier); + int core = useAffinity ? (int)threadCores[i] : -1; + if (!t->start(core)) { + printf("Unable to start processing thread %u\n", i); + exit(1); + } + threads.push_back(move(t)); + } + + // Reap threads. + for (auto &t : threads) { + t->join(); + } + + // Display global results. + displayResults(threads, corpus_blocks); +} + +} // namespace + +/** Main driver. */ +int main(int argc, char *argv[]) { + Grey grey; + + setlocale(LC_ALL, ""); // use the user's locale + +#ifndef NDEBUG + printf("\nWARNING: DO NOT BENCHMARK A HYPERSCAN BUILD WITH ASSERTIONS\n\n"); +#endif + + vector sigSets; + processArgs(argc, argv, sigSets, grey); + + // read in and process our expressions + ExpressionMap exprMapTemplate; + loadExpressions(exprPath, exprMapTemplate); + + // If we have no signature sets, the user wants us to benchmark all the + // known expressions together. + if (sigSets.empty()) { + SignatureSet sigs; + for (auto i : exprMapTemplate | map_keys) { + sigs.push_back(i); + } + sigSets.emplace_back(exprPath, move(sigs)); + } + + // read in and process our corpus + vector corpus_blocks; + try { + corpus_blocks = readCorpus(corpusFile); + } catch (const DataCorpusError &e) { + printf("Corpus data error: %s\n", e.msg.c_str()); + return 1; + } + + for (const auto &s : sigSets) { + ExpressionMap exprMap = exprMapTemplate; // copy + + limitBySignature(exprMap, s.sigs); + if (exprMap.empty()) { + continue; + } + + auto engine = buildEngineHyperscan(exprMap, scan_mode, s.name, grey); + if (!engine) { + printf("Error: expressions failed to compile.\n"); + exit(1); + } + + printf("\n"); + + runBenchmark(*engine, corpus_blocks); + } + + return 0; +} diff --git a/tools/hsbench/scripts/CorpusBuilder.py b/tools/hsbench/scripts/CorpusBuilder.py new file mode 100755 index 00000000..5baed2bd --- /dev/null +++ b/tools/hsbench/scripts/CorpusBuilder.py @@ -0,0 +1,58 @@ +#!/usr/bin/python + +''' +A module to construct corpora databases for the Hyperscan benchmarker +(hsbench). + +After construction, simply add blocks with the add_chunk() method, then call +finish() when you're done. +''' + +import os.path + +try: + from sqlite3 import dbapi2 as sqlite +except: + from pysqlite2 import dbapi2 as sqlite + +class CorpusBuilder: + SCHEMA = ''' +CREATE TABLE chunk ( + id integer primary key, + stream_id integer not null, + data blob +); +''' + + def __init__(self, outfile): + if os.path.exists(outfile): + raise RuntimeError("Database '%s' already exists" % outfile) + self.outfile = outfile + self.db = sqlite.connect(self.outfile) + self.db.executescript(CorpusBuilder.SCHEMA) + self.current_chunk_id = 0; + + def add_chunk(self, stream_id, data): + chunk_id = self.current_chunk_id; + c = self.db.cursor() + q = 'insert into chunk (id, stream_id, data) values (?, ?, ?)' + c.execute(q, (chunk_id, stream_id, sqlite.Binary(data))) + self.current_chunk_id += 1 + return chunk_id + + def finish(self): + self.db.commit() + + c = self.db.cursor() + q = 'create index chunk_stream_id_idx on chunk(stream_id)' + c.execute(q) + + c = self.db.cursor() + q = 'vacuum' + c.execute(q) + + c = self.db.cursor() + q = 'analyze' + c.execute(q) + + self.db.commit() diff --git a/tools/hsbench/scripts/gutenbergCorpus.py b/tools/hsbench/scripts/gutenbergCorpus.py new file mode 100755 index 00000000..fa1b1570 --- /dev/null +++ b/tools/hsbench/scripts/gutenbergCorpus.py @@ -0,0 +1,68 @@ +#!/usr/bin/python + +''' +This script creates a Hyperscan benchmarking corpus database from a supplied +group of Project Gutenberg texts. +''' + +import sys, getopt, os.path +import gutenberg.acquire, gutenberg.cleanup, gutenberg.query +from CorpusBuilder import CorpusBuilder + +stream_id = 0 +stream_bytes = 0 + +def addBlocks(builder, block_size, stream_size, text_id, text): + global stream_id + global stream_bytes + + print "text", text_id, "len", len(text) + i = 0 + while i < len(text): + chunk = text[i:min(len(text), i + block_size)] + builder.add_chunk(stream_id, chunk) + i += block_size + stream_bytes += len(chunk) + if stream_bytes >= stream_size: + stream_id += 1 + stream_bytes = 0 + print "Text", text_id, ": added", i/block_size, "blocks of", block_size, "bytes." + +def buildCorpus(outFN, block_size, stream_size, text_ids): + if len(text_ids) == 0: + print >>sys.stderr, "Must provide at least one input ID" + sys.exit(0) + + builder = CorpusBuilder(outFN) + + total_bytes = 0 + stream_id = 0 + stream_bytes = 0 + + for text_id in text_ids: + text_id = int(text_id) + text = gutenberg.acquire.load_etext(text_id) + text = gutenberg.cleanup.strip_headers(text).strip() + addBlocks(builder, block_size, stream_size, text_id, text) + total_bytes += len(text) + + builder.finish() + + print "Total:", total_bytes, "bytes." + +def usage(exeName): + errmsg = "Usage: %s -o -b -s ..." + errmsg = errmsg % exeName + print >> sys.stderr, errmsg + sys.exit(-1) + +if __name__ == '__main__': + opts, args = getopt.getopt(sys.argv[1:], 'o:b:s:') + opts = dict(opts) + + requiredKeys = [ '-o', '-b', '-s' ] + for k in requiredKeys: + if not opts.has_key(k): + usage(os.path.basename(sys.argv[0])) + + buildCorpus(opts['-o'], int(opts['-b']), int(opts['-s']), args) diff --git a/tools/hsbench/scripts/linebasedCorpus.py b/tools/hsbench/scripts/linebasedCorpus.py new file mode 100755 index 00000000..bde20e39 --- /dev/null +++ b/tools/hsbench/scripts/linebasedCorpus.py @@ -0,0 +1,53 @@ +#!/usr/bin/python + +''' +Simple script to take a file full of lines of text and push them into a +Hyperscan benchmarking corpus database, one block per line. +''' + +import sys, getopt, os.path +from CorpusBuilder import CorpusBuilder + +def lineCorpus(inFN, outFN): + ''' + Read lines from file name @inFN and write them as blocks to a new db with + name @outFN. + ''' + + if not os.path.exists(inFN): + print >> sys.stderr, "Input file '%s' does not exist. Exiting." % outFN + sys.exit(-1) + + lines = open(inFN).readlines() + + if len(lines) == 0: + print >> sys.stderr, "Input file contained no lines. Exiting." + sys.exit(0) + + builder = CorpusBuilder(outFN) + + # write a single stream to contain everything + streamId = 0 + + for l in lines: + builder.add_chunk(streamId, l.rstrip()) + + builder.finish() + +def usage(exeName): + errmsg = "Usage: %s -i -o " + errmsg = errmsg % exeName + print >> sys.stderr, errmsg + sys.exit(-1) + +if __name__ == '__main__': + args = getopt.getopt(sys.argv[1:], 'i:o:c:') + args = dict(args[0]) + + requiredKeys = [ '-i', '-o' ] + for k in requiredKeys: + if not args.has_key(k): + usage(os.path.basename(sys.argv[0])) + + fnArgs = tuple([args[k] for k in requiredKeys]) + lineCorpus(*fnArgs) diff --git a/tools/hsbench/scripts/pcapCorpus.py b/tools/hsbench/scripts/pcapCorpus.py new file mode 100755 index 00000000..c10bfef3 --- /dev/null +++ b/tools/hsbench/scripts/pcapCorpus.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python + +''' +Script to convert a pcap file containing UDP and TCP packets to a corpus file. +''' + +import sys, getopt, pprint, os +from sqlite3 import dbapi2 as sqlite +import pcap +from optparse import OptionParser +from socket import AF_INET, IPPROTO_UDP, IPPROTO_TCP, inet_ntop, ntohs, ntohl, inet_ntoa +import struct +from CorpusBuilder import CorpusBuilder + +ETHERTYPE_IP = 0x0800 # IP protocol +ETHERTYPE_ARP = 0x0806 # Addr. resolution protocol +ETHERTYPE_REVARP = 0x8035 # reverse Addr. resolution protocol +ETHERTYPE_VLAN = 0x8100 # IEEE 802.1Q VLAN tagging +ETHERTYPE_IPV6 = 0x86dd # IPv6 + +# +# A dictionary of active TCP streams +# +tcp_streams = {} + +# +# A dictionary of UDP streams +# +udp_streams = {} + +# +# Current stream id +cur_stream_id = 0 + +def usage(exeName) : + errmsg = "Usage: %s -i -o " + errmsg = errmsg % exeName + print >> sys.stderr, errmsg + sys.exit(-1) + +class FiveTuple(object): + def __init__(self, protocol, src_addr, src_port, dst_addr, dst_port): + self.protocol = protocol + self.src_addr = src_addr + self.src_port = src_port + self.dst_addr = dst_addr + self.dst_port = dst_port + + def __str__(self): + return "%d,%s,%d,%s,%d" % (self.protocol, self.src_addr, self.src_port, self.dst_addr, self.dst_port) + +class UdpSegment: + """Definition of a UDP segment + """ + def __init__(self, five_tuple, header, payload): + self.five_tuple = five_tuple + self.udp_header = header + self.udp_payload = payload + +class TcpSegment: + """Definition of a TCP segment + """ + def __init__(self, five_tuple, header, payload): + self.five_tuple = five_tuple + self.tcp_header = header + self.tcp_payload = payload + self.tcp_sequence_number, self.tcp_acknowledgement_number = struct.unpack('!LL', header[4:12]) + + def opt_isset_FIN(self): + opts = ord(self.tcp_header[13]) & 0x3F + return (opts & 0x01) + + def opt_isset_SYN(self): + opts = ord(self.tcp_header[13]) & 0x3F + return (opts & 0x02) + + def get_sequence_number(self): + return self.tcp_sequence_number + + def __cmp__(self, other): + return cmp(self.tcp_sequence_number, other.tcp_sequence_number) + +class TcpStream: + """Definition of a TCP stream. + """ + TCP_STREAM_ACTIVE = 0x1 + TCP_STREAM_CLOSED = 0x02 + + def __init__(self, five_tuple): + self.five_tuple = five_tuple + self.initial_sequence_number = 0 + self.segments = [] + + def reset_stream(self): + self.segments = [] + self.initial_sequence_number = 0 + + def set_initial_sequence_number(self, sequence_number): + self.initial_sequence_number = sequence_number + + def append_segment(self, tcp_segment): + if len(self.segments) == 0: + self.set_initial_sequence_number(tcp_segment.get_sequence_number()) + self.segments.append(tcp_segment) + + def get_segments_sorted(self): + return sorted(self.segments) + +class UdpStream: + """A container for UDP packets that share the same 5-tuple + """ + def __init__(self, five_tuple): + self.five_tuple = five_tuple + self.segments = [] + + def append_segment(self, udp_segment): + self.segments.append(udp_segment) + + +def newStream(five_tuple): + ''' + Create a new stream using the arguments passed-in and return its ID. + ''' + global cur_stream_id + stream_id = cur_stream_id + cur_stream_id += 1 + return stream_id + +def process_tcp_segment(builder, segment): + """Process a tcp segment. It checks for SYN and FIN segments are + if set modifies the associated stream. + """ + segment_id = str(segment.five_tuple) + if segment_id in tcp_streams: + m_tcp_stream = tcp_streams[segment_id] + m_tcp_stream.append_segment(segment) + else: + m_tcp_stream = TcpStream(segment.five_tuple) + m_tcp_stream.append_segment(segment) + tcp_streams[segment_id] = m_tcp_stream + + + if segment.opt_isset_SYN(): + m_tcp_stream.segments = [] + + if segment.opt_isset_FIN(): + # + # Finished with the stream - add the segments in the + # stream to db allowing the stream to be reused. + # + db_add_tcp_stream_segments(builder, m_tcp_stream) + del tcp_streams[segment_id] + +def process_udp_segment(builder, segment): + """ Process a UDP segment. Given the connectionless nature of the UDP + protocol we simple accumulate the segment for later processing + when all the packets have been read + """ + segment_id = str(segment.five_tuple) + if segment_id in udp_streams: + m_udp_stream = udp_streams[segment_id] + m_udp_stream.append_segment(segment) + else: + m_udp_stream = UdpStream(segment.five_tuple) + m_udp_stream.append_segment(segment) + udp_streams[segment_id] = m_udp_stream + + +def db_add_tcp_stream_segments(builder, tcp_stream): + """Add the contents of a tcp stream to the database + """ + tcp_segments = tcp_stream.get_segments_sorted() + last_sequence_num = 0 + streamID = None + + for tcp_segment in tcp_segments: + if (len(tcp_segment.tcp_payload) > 0) and (tcp_segment.tcp_sequence_number > last_sequence_num): + # + # Segment with an actual payload - add it to the stream's + # list of chunks. + # + # Note: delay creating the stream until we have a via chunk to + # commit to it + # + if streamID == None: + streamID = newStream(tcp_stream.five_tuple) + builder.add_chunk(streamID, tcp_segment.tcp_payload) + last_sequence_num = tcp_segment.tcp_sequence_number + + +def db_add_udp_stream_segments(builder, udp_stream): + """Add the contents of a UDP stream to the database. Since UDP is + connection-less, a UDP stream object is really just an accumulation + of all the packets associated with a given 5-tuple. + """ + udp_segments = udp_stream.segments + streamID = None + for udp_segment in udp_segments: + if len(udp_segment.udp_payload) > 0: + if streamID == None: + streamID = newStream(udp_stream.five_tuple) + builder.add_chunk(streamID, udp_segment.udp_payload) + +def enchunk_pcap(pcapFN, sqliteFN): + """Read the contents of a pcap file with name @pcapFN and produce + a sqlite db with name @sqliteFN. It will contain chunks of data + from TCP and UDP streams, + """ + + if not os.path.exists(pcapFN): + print >> sys.stderr, "Input file '%s' does not exist. Exiting." % pcapFN + sys.exit(-1) + + builder = CorpusBuilder(sqliteFN) + + # + # Read in the contents of the pcap file, adding stream segments as found + # + pkt_cnt = 0; + ip_pkt_cnt = 0; + unsupported_ip_protocol_cnt = 0 + pcap_ref = pcap.pcap(pcapFN) + done = False + + while not done: + try: + ts, packet = pcap_ref.next() + except: + break + + pkt_cnt += 1 + + linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff - 2):pcap_ref.dloff])[0] + if linkLayerType != ETHERTYPE_IP: + # + # We're only interested in IP packets + # + continue + + ip_pkt_cnt += 1 + + ip_pkt_total_len = struct.unpack('!H', packet[pcap_ref.dloff + 2: pcap_ref.dloff + 4])[0] + ip_pkt = packet[pcap_ref.dloff:pcap_ref.dloff + ip_pkt_total_len] + pkt_protocol = struct.unpack('B', ip_pkt[9])[0] + + if (pkt_protocol != IPPROTO_UDP) and (pkt_protocol != IPPROTO_TCP): + # + # we're only interested in UDP and TCP packets at the moment + # + continue + + pkt_src_addr = inet_ntoa(ip_pkt[12:16]) + pkt_dst_addr = inet_ntoa(ip_pkt[16:20]) + + ip_hdr_len_offset = (ord(ip_pkt[0]) & 0x0f) * 4 + ip_payload = ip_pkt[ip_hdr_len_offset:len(ip_pkt)] + + pkt_src_port, pkt_dst_port = struct.unpack('!HH', ip_payload[0:4]) + five_tuple = FiveTuple(pkt_protocol, pkt_src_addr, pkt_src_port, pkt_dst_addr, pkt_dst_port) + five_tuple_id = str(five_tuple) + + if pkt_protocol == IPPROTO_UDP: + udp_payload_len = struct.unpack('!H', ip_payload[4:6])[0] - 8 + udp_header = ip_payload[0:8] + udp_payload = ip_payload[8:len(ip_payload)] + udp_segment = UdpSegment(five_tuple, udp_header, udp_payload) + process_udp_segment(builder, udp_segment) + elif pkt_protocol == IPPROTO_TCP: + tcp_hdr_len = (ord(ip_payload[12]) >> 4) * 4 + tcp_header = ip_payload[0:tcp_hdr_len] + tcp_payload = ip_payload[tcp_hdr_len:len(ip_payload)] + segment = TcpSegment(five_tuple, tcp_header, tcp_payload) + process_tcp_segment(builder, segment) + + # + # Having read the contents of the pcap, we fill the database with any + # remaining TCP and UDP segments + # + for tcp_stream in tcp_streams.itervalues(): + db_add_tcp_stream_segments(builder, tcp_stream) + + for udp_stream in udp_streams.itervalues(): + db_add_udp_stream_segments(builder, udp_stream) + + # + # We've finished with the database + # + builder.finish() + +if __name__ == '__main__' : + + args = getopt.getopt(sys.argv[1:], 'i:o:') + args = dict(args[0]) + + requiredKeys = [ '-i', '-o'] + for k in requiredKeys : + if not args.has_key(k) : + usage(os.path.basename(sys.argv[0])) + + fnArgs = tuple([ args[k] for k in requiredKeys ]) + enchunk_pcap(*fnArgs) diff --git a/tools/hsbench/thread_barrier.h b/tools/hsbench/thread_barrier.h new file mode 100644 index 00000000..1c3a53e7 --- /dev/null +++ b/tools/hsbench/thread_barrier.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Simple thread barrier. + */ + +#ifndef TOOLS_THREAD_BARRIER_H +#define TOOLS_THREAD_BARRIER_H + +#include +#include + +/** + * \brief Simple thread barrier class. + * + * Blocks until wait() has been called N times. + */ +class thread_barrier { +public: + explicit thread_barrier(unsigned int n) : max(n) { + if (max == 0) { + throw std::runtime_error("invalid barrier"); + } + } + + void wait() { + std::unique_lock lock(mtx); + count++; + if (count >= max) { + count = 0; + condvar.notify_all(); + } else { + condvar.wait(lock); + } + } + +private: + std::mutex mtx; + std::condition_variable condvar; + unsigned int count = 0; + unsigned int max; +}; + +#endif // TOOLS_THREAD_BARRIER_H diff --git a/tools/hsbench/timer.h b/tools/hsbench/timer.h new file mode 100644 index 00000000..85bd294c --- /dev/null +++ b/tools/hsbench/timer.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TIMER_H +#define TIMER_H + +#include "ue2common.h" + +#include + +class Timer { +public: + Timer() = default; + + void start() { + clock_start = Clock::now(); + } + + void complete() { + clock_end = Clock::now(); + } + + double seconds() const { + std::chrono::duration secs = clock_end - clock_start; + return secs.count(); + } + +protected: + using Clock = std::chrono::steady_clock; + std::chrono::time_point clock_start; + std::chrono::time_point clock_end; +}; + +#endif // TIMER_H diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 63f3a9ac..8b494444 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -34,7 +34,7 @@ add_library(gtest STATIC ${gtest_SOURCES}) add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${PROJECT_SOURCE_DIR}) -if (NOT RELEASE_BUILD) +if (NOT (RELEASE_BUILD OR FAT_RUNTIME)) set(unit_internal_SOURCES internal/bitfield.cpp internal/bitutils.cpp @@ -71,6 +71,7 @@ set(unit_internal_SOURCES internal/repeat.cpp internal/rose_build_merge.cpp internal/rose_mask.cpp + internal/rose_mask_32.cpp internal/rvermicelli.cpp internal/simd_utils.cpp internal/shuffle.cpp @@ -88,7 +89,7 @@ set(unit_internal_SOURCES add_executable(unit-internal ${unit_internal_SOURCES}) target_link_libraries(unit-internal hs gtest corpusomatic) -endif(NOT RELEASE_BUILD) +endif(NOT (RELEASE_BUILD OR FAT_RUNTIME)) set(unit_hyperscan_SOURCES hyperscan/allocators.cpp diff --git a/unit/hyperscan/arg_checks.cpp b/unit/hyperscan/arg_checks.cpp index d277a26b..8e86cc64 100644 --- a/unit/hyperscan/arg_checks.cpp +++ b/unit/hyperscan/arg_checks.cpp @@ -84,6 +84,12 @@ void breakDatabaseBytecode(hs_database *db) { *bytecode += 3; } +// Check that hs_valid_platform says we can run here +TEST(HyperscanArgChecks, ValidPlatform) { + hs_error_t error = hs_valid_platform(); + ASSERT_EQ(HS_SUCCESS, error) << "hs_valid_platform should return zero"; +} + // Check that hs_version gives us a reasonable string back TEST(HyperscanArgChecks, Version) { const char *version = hs_version(); diff --git a/unit/internal/bitutils.cpp b/unit/internal/bitutils.cpp index 4d476932..31aaf17f 100644 --- a/unit/internal/bitutils.cpp +++ b/unit/internal/bitutils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -436,3 +436,16 @@ TEST(BitUtils, rank_in_mask64) { ASSERT_EQ(15, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 31)); ASSERT_EQ(31, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 63)); } + +#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +TEST(BitUtils, pdep64) { + u64a data = 0xF123456789ABCDEF; + ASSERT_EQ(0xfULL, pdep64(data, 0xf)); + ASSERT_EQ(0xefULL, pdep64(data, 0xff)); + ASSERT_EQ(0xf0ULL, pdep64(data, 0xf0)); + ASSERT_EQ(0xfULL, pdep64(data, 0xf)); + ASSERT_EQ(0xef0ULL, pdep64(data, 0xff0)); + ASSERT_EQ(0xef00ULL, pdep64(data, 0xff00)); + ASSERT_EQ(0xd0e0f00ULL, pdep64(data, 0xf0f0f00)); +} +#endif diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index c66ab4c5..6116bfdb 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -337,8 +337,8 @@ TEST_P(FDRp, NoRepeat3) { static hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, size_t start, - HWLMCallback cb, void *ctxt, hwlm_group_t groups, - u8 *stream_state) { + HWLMCallback cb, void *ctxt, + hwlm_group_t groups) { array wrapped_history = {{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}}; if (hlen < 16) { @@ -346,8 +346,7 @@ hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen, memcpy(new_hbuf, hbuf, hlen); hbuf = new_hbuf; } - return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, ctxt, groups, - stream_state); + return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, ctxt, groups); } TEST_P(FDRp, SmallStreaming) { @@ -366,7 +365,7 @@ TEST_P(FDRp, SmallStreaming) { expected.push_back(match(2, 2, 1)); safeExecStreaming(fdr.get(), (const u8 *)"", 0, (const u8 *)"aaar", 4, 0, - decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); + decentCallback, &matches, HWLM_ALL_GROUPS); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i]); } @@ -378,7 +377,7 @@ TEST_P(FDRp, SmallStreaming) { expected.push_back(match(1, 8, 10)); safeExecStreaming(fdr.get(), (const u8 *)"aaar", 4, (const u8 *)"dvark", 5, - 0, decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); + 0, decentCallback, &matches, HWLM_ALL_GROUPS); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i] + 4); @@ -407,7 +406,7 @@ TEST_P(FDRp, SmallStreaming2) { safeExecStreaming(fdr.get(), (const u8 *)"foobar", 6, (const u8 *)"aardvarkkk", 10, 0, decentCallback, &matches, - HWLM_ALL_GROUPS, nullptr); + HWLM_ALL_GROUPS); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i] + 6); @@ -445,44 +444,6 @@ TEST_P(FDRp, LongLiteral) { EXPECT_EQ(0U, count); } -TEST_P(FDRp, VeryLongLiteral) { - const u32 hint = GetParam(); - SCOPED_TRACE(hint); - vector lits; - - string s1000; - for(int i = 0; i < 1000; i++) { - s1000 += char('A' + i % 10); - } - - string s66k; - for(int i = 0; i < 66; i++) { - s66k += s1000; - } - - string corpus = s66k + s66k; - lits.push_back(hwlmLiteral(s66k.c_str(), 0, 10)); - - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); - CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - - vector matches; - u32 rv = fdrExec(fdr.get(), (const u8 *)s66k.c_str(), s66k.size(), 0, - decentCallback, &matches, HWLM_ALL_GROUPS); - EXPECT_EQ(0U, rv); - ASSERT_EQ(1U, matches.size()); - ASSERT_EQ(match(0, 65999, 10), matches[0]); - - matches.clear(); - rv = fdrExec(fdr.get(), (const u8 *)corpus.c_str(), corpus.size(), 0, - decentCallback, &matches, HWLM_ALL_GROUPS); - EXPECT_EQ(0U, rv); - for (u32 i = 0; i < matches.size(); i++) { - ASSERT_EQ(match(10 * i, 65999 + 10 * i, 10), matches[i]); - } - EXPECT_EQ(6601U, matches.size()); -} - TEST_P(FDRp, moveByteStream) { const u32 hint = GetParam(); SCOPED_TRACE(hint); @@ -538,9 +499,9 @@ TEST_P(FDRp, Stream1) { // check matches vector matches; - fdrStatus = safeExecStreaming( - fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2, - 0, decentCallback, &matches, HWLM_ALL_GROUPS, nullptr); + fdrStatus = safeExecStreaming(fdr.get(), (const u8 *)data1, data_len1, + (const u8 *)data2, data_len2, 0, + decentCallback, &matches, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); ASSERT_EQ(4U, matches.size()); @@ -783,9 +744,9 @@ TEST(FDR, FDRTermS) { // check matches vector matches; - fdrStatus = safeExecStreaming( - fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2, - 0, decentCallbackT, &matches, HWLM_ALL_GROUPS, nullptr); + fdrStatus = safeExecStreaming(fdr.get(), (const u8 *)data1, data_len1, + (const u8 *)data2, data_len2, 0, + decentCallbackT, &matches, HWLM_ALL_GROUPS); ASSERT_EQ(HWLM_TERMINATED, fdrStatus); ASSERT_EQ(1U, matches.size()); @@ -812,30 +773,3 @@ TEST(FDR, FDRTermB) { ASSERT_EQ(1U, matches.size()); } - -TEST(FDR, ManyLengths) { - // UE-2400: we had a crash due to div by zero in the compiler when given a - // set of literals with precisely 512 different lengths. - const u32 num = 512; - vector lits; - char c = 0; - string s; - for (u32 i = 0; i < num; i++) { - s.push_back(c++); - lits.push_back(hwlmLiteral(s, false, i + 1)); - } - - auto fdr = fdrBuildTable(lits, false, get_current_target(), Grey()); - ASSERT_TRUE(fdr != nullptr); - - // Confirm that we can scan against this FDR table as well. - - vector matches; - - hwlm_error_t fdrStatus = - fdrExec(fdr.get(), (const u8 *)s.c_str(), s.size(), 0, decentCallback, - &matches, HWLM_ALL_GROUPS); - ASSERT_EQ(HWLM_SUCCESS, fdrStatus); - - ASSERT_EQ(768U, matches.size()); -} diff --git a/unit/internal/fdr_flood.cpp b/unit/internal/fdr_flood.cpp index 68d8f632..7b00ac4c 100644 --- a/unit/internal/fdr_flood.cpp +++ b/unit/internal/fdr_flood.cpp @@ -495,7 +495,7 @@ TEST_P(FDRFloodp, StreamingMask) { const u8 *fhist = fake_history.data() + fake_history_size; fdrStatus = fdrExecStreaming(fdr.get(), fhist, 0, d, streamChunk, 0, countCallback, &matchesCounts, - HWLM_ALL_GROUPS, nullptr); + HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); for (u32 j = streamChunk; j < dataSize; j += streamChunk) { if (j < 16) { @@ -506,12 +506,12 @@ TEST_P(FDRFloodp, StreamingMask) { fdrStatus = fdrExecStreaming(fdr.get(), tmp_d, j, tmp_d + j, streamChunk, 0, countCallback, &matchesCounts, - HWLM_ALL_GROUPS, nullptr); + HWLM_ALL_GROUPS); } else { fdrStatus = fdrExecStreaming(fdr.get(), d + j - 8, 8, d + j, streamChunk, 0, countCallback, &matchesCounts, - HWLM_ALL_GROUPS, nullptr); + HWLM_ALL_GROUPS); } ASSERT_EQ(0, fdrStatus); } diff --git a/unit/internal/graph.cpp b/unit/internal/graph.cpp index 3ab3326d..b7ec7b03 100644 --- a/unit/internal/graph.cpp +++ b/unit/internal/graph.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,10 +29,14 @@ #include "config.h" #include "gtest/gtest.h" #include "util/graph.h" +#include "util/ue2_graph.h" #include #include #include +#include + +#include using namespace boost; using namespace std; @@ -167,107 +171,1617 @@ TEST(graph_util, degrees) { ASSERT_TRUE( has_proper_successor(d, g)); ASSERT_FALSE(has_proper_successor(e, g)); ASSERT_TRUE( has_proper_successor(f, g)); - - ASSERT_TRUE( hasGreaterInDegree(0, a, g)); - ASSERT_FALSE(hasGreaterInDegree(1, a, g)); - ASSERT_TRUE( hasGreaterInDegree(2, b, g)); - ASSERT_FALSE(hasGreaterInDegree(3, b, g)); - ASSERT_TRUE( hasGreaterInDegree(1, c, g)); - ASSERT_FALSE(hasGreaterInDegree(2, c, g)); - ASSERT_FALSE(hasGreaterInDegree(0, d, g)); - ASSERT_TRUE( hasGreaterInDegree(1, e, g)); - ASSERT_FALSE(hasGreaterInDegree(2, e, g)); - ASSERT_FALSE(hasGreaterInDegree(0, f, g)); - - ASSERT_TRUE( hasGreaterOutDegree(0, a, g)); - ASSERT_FALSE(hasGreaterOutDegree(1, a, g)); - ASSERT_TRUE( hasGreaterOutDegree(1, b, g)); - ASSERT_FALSE(hasGreaterOutDegree(2, b, g)); - ASSERT_FALSE(hasGreaterOutDegree(0, c, g)); - ASSERT_TRUE( hasGreaterOutDegree(0, d, g)); - ASSERT_FALSE(hasGreaterOutDegree(1, d, g)); - ASSERT_TRUE( hasGreaterOutDegree(0, e, g)); - ASSERT_FALSE(hasGreaterOutDegree(1, e, g)); - ASSERT_TRUE( hasGreaterOutDegree(2, f, g)); - ASSERT_FALSE(hasGreaterOutDegree(3, f, g)); } -TEST(graph_util, in_degree_equal_to_1) { - unit_graph g; +struct SimpleV { + size_t index; + string test_v = "SimpleV"; +}; - unit_vertex a = add_vertex(g); - unit_vertex b = add_vertex(g); - unit_vertex c = add_vertex(g); - unit_vertex d = add_vertex(g); +struct SimpleE { + size_t index; + string test_e = "SimpleE"; +}; - ASSERT_TRUE(in_degree_equal_to(a, g, 0)); - ASSERT_FALSE(in_degree_equal_to(a, g, 1)); - ASSERT_FALSE(in_degree_equal_to(a, g, 2)); +struct SimpleG : public ue2_graph { +}; + +TEST(ue2_graph, graph_concept) { + static_assert(std::is_same::vertex_descriptor>::value, + "vertex_descriptor"); + static_assert(std::is_same::edge_descriptor>::value, + "edge_descriptor"); + static_assert(std::is_same::directed_category>::value, + "directed_category"); + static_assert(std::is_same::edge_parallel_category>::value, + "edge_parallel_category"); + static_assert(std::is_same::traversal_category>::value, + "traversal_category"); + + UNUSED SimpleG::vertex_descriptor n = SimpleG::null_vertex(); + + BOOST_CONCEPT_ASSERT((GraphConcept)); +} + +TEST(ue2_graph, vertex_list_concept) { + BOOST_CONCEPT_ASSERT((VertexListGraphConcept)); +} + +TEST(ue2_graph, edge_list_concept) { + BOOST_CONCEPT_ASSERT((EdgeListGraphConcept)); +} + +TEST(ue2_graph, incidence_concept) { + BOOST_CONCEPT_ASSERT((IncidenceGraphConcept)); +} + +TEST(ue2_graph, bidi_concept) { + BOOST_CONCEPT_ASSERT((BidirectionalGraphConcept)); +} + +TEST(ue2_graph, mutable_concept) { + BOOST_CONCEPT_ASSERT((MutableGraphConcept)); +} + +TEST(ue2_graph, property_concept) { + static_assert(std::is_same::value, + "vertex_property_type"); + static_assert(std::is_same::value, + "edge_property_type"); + + /* Although documented as part of the MutablePropertyGraph concept, + * (vertex|edge)_property_type don't appear to exist in the traits for any + * existing graph types and the typedefs are not installed by default */ + + // static_assert(std::is_same< + // typename graph_traits::vertex_property_type, + // SimpleV>::value, + // "vertex_property_type"); + // static_assert(std::is_same< + // typename graph_traits::edge_property_type, + // SimpleE>::value, + // "edge_property_type"); + + /* However, there does seem to be an undocumented templated structure + * paralleling the main graph_traits */ + static_assert(std::is_same< + typename vertex_property_type::type, + SimpleV>::value, + "vertex_property_type"); + static_assert(std::is_same< + typename edge_property_type::type, + SimpleE>::value, + "edge_property_type"); + + BOOST_CONCEPT_ASSERT((VertexMutablePropertyGraphConcept)); + BOOST_CONCEPT_ASSERT((EdgeMutablePropertyGraphConcept)); +} + +TEST(ue2_graph, add_vertex) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); +} + +TEST(ue2_graph, add_and_remove_vertex) { + SimpleG g; + ASSERT_EQ(0U, num_vertices(g)); + + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_EQ(1U, num_vertices(g)); + ASSERT_NE(SimpleG::null_vertex(), a); + auto p = vertices(g); + ASSERT_NE(p.first, p.second); + ASSERT_EQ(a, *p.first); + ++p.first; + ASSERT_EQ(p.first, p.second); + + remove_vertex(a, g); + ASSERT_EQ(0U, num_vertices(g)); + auto q = vertices(g); + ASSERT_EQ(q.first, q.second); +} + +TEST(ue2_graph, add_edge) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + auto q = edge(a, b, g); + ASSERT_TRUE(q.second); + ASSERT_EQ(p.second, q.first); +} + +TEST(ue2_graph, add_remove_edge1) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + remove_edge(p.first, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, add_remove_edge2) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + remove_edge(a, b, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, add_edge_clear1) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + clear_vertex(a, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, add_edge_clear2) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + clear_vertex(b, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, add_edge_clear_out) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + clear_out_edges(a, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, add_edge_clear_in) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + clear_in_edges(b, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, add_remove_edge_iter) { + SimpleG g; + SimpleG::vertex_descriptor a = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), a); + SimpleG::vertex_descriptor b = add_vertex(g); + ASSERT_NE(SimpleG::null_vertex(), b); + ASSERT_NE(a, b); + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + + ASSERT_EQ(a, source(p.first, g)); + ASSERT_EQ(b, target(p.first, g)); + + remove_edge(edges(g).first, g); + auto q = edge(a, b, g); + ASSERT_FALSE(q.second); + ASSERT_EQ(q.first, SimpleG::null_edge()); + ASSERT_EQ(0U, num_edges(g)); +} + +TEST(ue2_graph, vertices_0) { + SimpleG g; + auto p = vertices(g); + ASSERT_EQ(p.first, p.second); +} + +TEST(ue2_graph, vertices_1) { + SimpleG g; + SimpleG::vertex_iterator vi; + SimpleG::vertex_iterator ve; + auto a = add_vertex(g); + + ASSERT_EQ(1U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(a, *vi++); + ASSERT_EQ(vi, ve); + + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); + + ASSERT_EQ(4U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(a, *vi++); + ASSERT_EQ(b, *vi++); + ASSERT_EQ(c, *vi++); + ASSERT_EQ(d, *vi++); + ASSERT_EQ(vi, ve); + + remove_vertex(c, g); + + ASSERT_EQ(3U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(a, *vi++); + ASSERT_EQ(b, *vi++); + ASSERT_EQ(d, *vi++); + ASSERT_EQ(vi, ve); + + remove_vertex(a, g); + + ASSERT_EQ(2U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(b, *vi++); + ASSERT_EQ(d, *vi++); + ASSERT_EQ(vi, ve); + + auto e = add_vertex(g); + + ASSERT_EQ(3U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(b, *vi++); + ASSERT_EQ(d, *vi++); + ASSERT_EQ(e, *vi++); + ASSERT_EQ(vi, ve); + + remove_vertex(e, g); + + ASSERT_EQ(2U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(b, *vi++); + ASSERT_EQ(d, *vi++); + ASSERT_EQ(vi, ve); + + remove_vertex(b, g); + remove_vertex(d, g); + + ASSERT_EQ(0U, num_vertices(g)); + tie(vi, ve) = vertices(g); + ASSERT_EQ(vi, ve); +} + +TEST(ue2_graph, out_edges_1) { + SimpleG g; + auto a = add_vertex(g); + + ASSERT_EQ(1U, num_vertices(g)); + ASSERT_EQ(0U, out_degree(a, g)); + + SimpleG::out_edge_iterator ei; + SimpleG::out_edge_iterator ee; + + tie(ei, ee) = out_edges(a, g); + ASSERT_TRUE(ei == ee); + + auto p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + SimpleG::edge_descriptor e1 = p.first; + + ASSERT_EQ(1U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(2U, num_edges(g)); + SimpleG::edge_descriptor e2 = p.first; + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); +} + +TEST(ue2_graph, out_edges_2) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + + ASSERT_EQ(3U, num_vertices(g)); + ASSERT_EQ(0U, out_degree(a, g)); + + SimpleG::out_edge_iterator ei; + SimpleG::out_edge_iterator ee; + + tie(ei, ee) = out_edges(a, g); + ASSERT_TRUE(ei == ee); + + auto p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + SimpleG::edge_descriptor e1 = p.first; + + ASSERT_EQ(1U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, c, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(2U, num_edges(g)); + SimpleG::edge_descriptor e2 = p.first; + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(c, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(3U, num_edges(g)); + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(b, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(4U, num_edges(g)); + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + remove_edge(a, c, g); + ASSERT_EQ(3U, num_edges(g)); + + ASSERT_EQ(1U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, a, g); + ASSERT_EQ(4U, num_edges(g)); + ASSERT_TRUE(p.second); + SimpleG::edge_descriptor e3 = p.first; + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e3, *ei++); + ASSERT_EQ(ei, ee); + + clear_out_edges(a, g); + ASSERT_EQ(2U, num_edges(g)); + + ASSERT_EQ(0U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(ei, ee); +} + +TEST(ue2_graph, in_edges_1) { + SimpleG g; + auto a = add_vertex(g); + + ASSERT_EQ(1U, num_vertices(g)); + ASSERT_EQ(0U, in_degree(a, g)); + + SimpleG::in_edge_iterator ei; + SimpleG::in_edge_iterator ee; + + tie(ei, ee) = in_edges(a, g); + ASSERT_TRUE(ei == ee); + + auto p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + SimpleG::edge_descriptor e1 = p.first; + + ASSERT_EQ(1U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(2U, num_edges(g)); + SimpleG::edge_descriptor e2 = p.first; + + ASSERT_EQ(2U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); +} + +TEST(ue2_graph, in_edges_2) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + + ASSERT_EQ(3U, num_vertices(g)); + ASSERT_EQ(0U, in_degree(a, g)); + + SimpleG::in_edge_iterator ei; + SimpleG::in_edge_iterator ee; + + tie(ei, ee) = in_edges(a, g); + ASSERT_TRUE(ei == ee); + + auto p = add_edge(b, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + SimpleG::edge_descriptor e1 = p.first; + + ASSERT_EQ(1U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(c, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(2U, num_edges(g)); + SimpleG::edge_descriptor e2 = p.first; + + ASSERT_EQ(2U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(c, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(3U, num_edges(g)); + + ASSERT_EQ(2U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, b, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(4U, num_edges(g)); + + ASSERT_EQ(2U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + remove_edge(c, a, g); + ASSERT_EQ(3U, num_edges(g)); + + ASSERT_EQ(1U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, a, g); + ASSERT_EQ(4U, num_edges(g)); + ASSERT_TRUE(p.second); + SimpleG::edge_descriptor e3 = p.first; + + ASSERT_EQ(2U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e3, *ei++); + ASSERT_EQ(ei, ee); + + clear_in_edges(a, g); + ASSERT_EQ(2U, num_edges(g)); + + ASSERT_EQ(0U, in_degree(a, g)); + tie(ei, ee) = in_edges(a, g); + ASSERT_EQ(ei, ee); +} + +TEST(ue2_graph, parallel_1) { + SimpleG g; + SimpleG::vertex_iterator vi; + SimpleG::vertex_iterator ve; + auto a = add_vertex(g); + + ASSERT_EQ(1U, num_vertices(g)); + ASSERT_EQ(0U, out_degree(a, g)); + + SimpleG::out_edge_iterator ei; + SimpleG::out_edge_iterator ee; + + tie(ei, ee) = out_edges(a, g); + ASSERT_TRUE(ei == ee); + + auto p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(1U, num_edges(g)); + SimpleG::edge_descriptor e1 = p.first; + + ASSERT_EQ(1U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(2U, num_edges(g)); + SimpleG::edge_descriptor e2 = p.first; + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + remove_edge(e1, g); + + ASSERT_EQ(1U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ei, ee); + + p = add_edge(a, a, g); + ASSERT_TRUE(p.second); + ASSERT_EQ(2U, num_edges(g)); + SimpleG::edge_descriptor e3 = p.first; + + ASSERT_EQ(2U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(e3, *ei++); + ASSERT_EQ(ei, ee); + + remove_edge(a, a, g); + ASSERT_EQ(0U, out_degree(a, g)); + tie(ei, ee) = out_edges(a, g); + ASSERT_EQ(ei, ee); +} + +TEST(ue2_graph, edges_0a) { + SimpleG g; + auto p = edges(g); + ASSERT_EQ(p.first, p.second); +} + +TEST(ue2_graph, edges_0b) { + SimpleG g; + add_vertex(g); + ASSERT_EQ(1U, num_vertices(g)); + auto p = edges(g); + ASSERT_EQ(p.first, p.second); +} + +TEST(ue2_graph, edges_0c) { + SimpleG g; + add_vertex(g); + add_vertex(g); + ASSERT_EQ(2U, num_vertices(g)); + auto p = edges(g); + ASSERT_EQ(p.first, p.second); +} + +TEST(ue2_graph, edges_1a) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + auto v = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(v, v, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(1U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_1b) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + auto u = add_vertex(g); + auto v = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(u, v, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(1U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_1c) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + auto u = add_vertex(g); + auto v = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(v, u, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(1U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_1d) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + UNUSED auto u = add_vertex(g); + UNUSED auto v = add_vertex(g); + auto w = add_vertex(g); + auto x = add_vertex(g); + UNUSED auto y = add_vertex(g); + UNUSED auto z = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(w, x, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(1U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_2a) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + auto v = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(v, v, g).first; + auto e2 = add_edge(v, v, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(2U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(1U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e2, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_2b) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + auto u = add_vertex(g); + auto v = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(u, v, g).first; + auto e2 = add_edge(v, u, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(2U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(1U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e2, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_2c) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + UNUSED auto s = add_vertex(g); + UNUSED auto t = add_vertex(g); + auto u = add_vertex(g); + UNUSED auto v = add_vertex(g); + auto w = add_vertex(g); + auto x = add_vertex(g); + UNUSED auto y = add_vertex(g); + UNUSED auto z = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(w, x, g).first; + auto e2 = add_edge(u, x, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(2U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ee, ei); + + clear_in_edges(x, g); + + ASSERT_EQ(0U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, edges_3a) { + SimpleG g; + ASSERT_EQ(0U, num_edges(g)); + + UNUSED auto s = add_vertex(g); + UNUSED auto t = add_vertex(g); + auto u = add_vertex(g); + auto v = add_vertex(g); + auto w = add_vertex(g); + auto x = add_vertex(g); + UNUSED auto y = add_vertex(g); + auto z = add_vertex(g); + + ASSERT_EQ(0U, num_edges(g)); + auto e1 = add_edge(w, x, g).first; + auto e2 = add_edge(u, v, g).first; + auto e3 = add_edge(u, z, g).first; + + SimpleG::edge_iterator ei, ee; + + ASSERT_EQ(3U, num_edges(g)); + tie(ei, ee) = edges(g); + ASSERT_EQ(e2, *ei++); + ASSERT_EQ(e3, *ei++); + ASSERT_EQ(e1, *ei++); + ASSERT_EQ(ee, ei); + + remove_edge(e1, g); + + ASSERT_EQ(2U, num_edges(g)); + clear_out_edges(u, g); + + ASSERT_EQ(0U, num_edges(g)); + + tie(ei, ee) = edges(g); + ASSERT_EQ(ee, ei); +} + +TEST(ue2_graph, degree) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); + + add_edge(a, b, g); + add_edge(a, c, g); + add_edge(a, d, g); + + ASSERT_EQ(3U, degree(a, g)); + ASSERT_EQ(1U, degree(b, g)); + ASSERT_EQ(1U, degree(c, g)); + ASSERT_EQ(1U, degree(d, g)); + + add_edge(b, c, g); + + ASSERT_EQ(3U, degree(a, g)); + ASSERT_EQ(2U, degree(b, g)); + ASSERT_EQ(2U, degree(c, g)); + ASSERT_EQ(1U, degree(d, g)); + + add_edge(d, d, g); + ASSERT_EQ(3U, degree(a, g)); + ASSERT_EQ(2U, degree(b, g)); + ASSERT_EQ(2U, degree(c, g)); + ASSERT_EQ(3U, degree(d, g)); add_edge(b, a, g); + ASSERT_EQ(4U, degree(a, g)); + ASSERT_EQ(3U, degree(b, g)); + ASSERT_EQ(2U, degree(c, g)); + ASSERT_EQ(3U, degree(d, g)); - ASSERT_FALSE(in_degree_equal_to(a, g, 0)); - ASSERT_TRUE(in_degree_equal_to(a, g, 1)); - ASSERT_FALSE(in_degree_equal_to(a, g, 2)); + add_edge(b, a, g); + ASSERT_EQ(5U, degree(a, g)); + ASSERT_EQ(4U, degree(b, g)); + ASSERT_EQ(2U, degree(c, g)); + ASSERT_EQ(3U, degree(d, g)); - add_edge(c, a, g); - - ASSERT_FALSE(in_degree_equal_to(a, g, 0)); - ASSERT_FALSE(in_degree_equal_to(a, g, 1)); - ASSERT_TRUE(in_degree_equal_to(a, g, 2)); - - add_edge(d, a, g); - - ASSERT_FALSE(in_degree_equal_to(a, g, 0)); - ASSERT_FALSE(in_degree_equal_to(a, g, 1)); - ASSERT_FALSE(in_degree_equal_to(a, g, 2)); + add_edge(d, d, g); + ASSERT_EQ(5U, degree(a, g)); + ASSERT_EQ(4U, degree(b, g)); + ASSERT_EQ(2U, degree(c, g)); + ASSERT_EQ(5U, degree(d, g)); } -TEST(graph_util, edge_by_target_1) { - unit_graph g; +TEST(ue2_graph, adj) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); - unit_vertex a = add_vertex(g); - unit_vertex b = add_vertex(g); - unit_vertex c = add_vertex(g); + add_edge(a, b, g); + add_edge(a, c, g); + add_edge(a, d, g); + add_edge(b, a, g); + add_edge(b, b, g); - ASSERT_FALSE(edge_by_target(a, a, g).second); - ASSERT_FALSE(edge_by_target(a, b, g).second); - ASSERT_FALSE(edge_by_target(a, c, g).second); - ASSERT_FALSE(edge_by_target(b, a, g).second); - ASSERT_FALSE(edge_by_target(c, b, g).second); + SimpleG::adjacency_iterator ai, ae; + tie(ai, ae) = adjacent_vertices(a, g); + ASSERT_EQ(b, *ai++); + ASSERT_EQ(c, *ai++); + ASSERT_EQ(d, *ai++); + ASSERT_EQ(ai, ae); - unit_edge ab = add_edge(a, b, g).first; + tie(ai, ae) = adjacent_vertices(b, g); + ASSERT_EQ(a, *ai++); + ASSERT_EQ(b, *ai++); + ASSERT_EQ(ai, ae); - ASSERT_FALSE(edge_by_target(a, a, g).second); - ASSERT_TRUE(edge_by_target(a, b, g).second); - ASSERT_TRUE(ab == edge_by_target(a, b, g).first); - ASSERT_FALSE(edge_by_target(a, c, g).second); - ASSERT_FALSE(edge_by_target(b, a, g).second); - ASSERT_FALSE(edge_by_target(b, b, g).second); - ASSERT_FALSE(edge_by_target(c, b, g).second); + tie(ai, ae) = adjacent_vertices(c, g); + ASSERT_EQ(ai, ae); - unit_edge cb = add_edge(c, b, g).first; - - ASSERT_FALSE(edge_by_target(a, a, g).second); - ASSERT_TRUE(edge_by_target(a, b, g).second); - ASSERT_TRUE(ab == edge_by_target(a, b, g).first); - ASSERT_FALSE(edge_by_target(a, c, g).second); - ASSERT_FALSE(edge_by_target(b, a, g).second); - ASSERT_FALSE(edge_by_target(b, b, g).second); - ASSERT_TRUE(edge_by_target(c, b, g).second); - ASSERT_TRUE(cb == edge_by_target(c, b, g).first); - - unit_edge aa = add_edge(a, a, g).first; - unit_edge bb = add_edge(b, b, g).first; - - ASSERT_TRUE(edge_by_target(a, a, g).second); - ASSERT_TRUE(aa == edge_by_target(a, a, g).first); - ASSERT_TRUE(edge_by_target(a, b, g).second); - ASSERT_TRUE(ab == edge_by_target(a, b, g).first); - ASSERT_FALSE(edge_by_target(a, c, g).second); - ASSERT_FALSE(edge_by_target(b, a, g).second); - ASSERT_TRUE(edge_by_target(b, b, g).second); - ASSERT_TRUE(bb == edge_by_target(b, b, g).first); - ASSERT_TRUE(edge_by_target(c, b, g).second); - ASSERT_TRUE(cb == edge_by_target(c, b, g).first); + tie(ai, ae) = adjacent_vertices(d, g); + ASSERT_EQ(ai, ae); +} + +TEST(ue2_graph, inv_adj) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); + + add_edge(a, b, g); + add_edge(a, c, g); + add_edge(a, d, g); + add_edge(b, a, g); + add_edge(b, b, g); + + SimpleG::inv_adjacency_iterator ai, ae; + tie(ai, ae) = inv_adjacent_vertices(a, g); + ASSERT_EQ(b, *ai++); + ASSERT_EQ(ai, ae); + + tie(ai, ae) = inv_adjacent_vertices(b, g); + ASSERT_EQ(a, *ai++); + ASSERT_EQ(b, *ai++); + ASSERT_EQ(ai, ae); + + tie(ai, ae) = inv_adjacent_vertices(c, g); + ASSERT_EQ(a, *ai++); + ASSERT_EQ(ai, ae); + + tie(ai, ae) = inv_adjacent_vertices(d, g); + ASSERT_EQ(a, *ai++); + ASSERT_EQ(ai, ae); +} + +TEST(ue2_graph, square_brackets_v) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); + + ASSERT_EQ(0U, g[a].index); + ASSERT_EQ(1U, g[b].index); + ASSERT_EQ(2U, g[c].index); + ASSERT_EQ(3U, g[d].index); + + ASSERT_EQ("SimpleV", g[a].test_v); + ASSERT_EQ("SimpleV", g[b].test_v); + ASSERT_EQ("SimpleV", g[c].test_v); + ASSERT_EQ("SimpleV", g[d].test_v); + + g[a].test_v = "a"; + g[b].test_v = "b"; + g[c].test_v = "c"; + g[d].test_v = "d"; + + ASSERT_EQ("a", g[a].test_v); + ASSERT_EQ("b", g[b].test_v); + ASSERT_EQ("c", g[c].test_v); + ASSERT_EQ("d", g[d].test_v); +} + +TEST(ue2_graph, square_brackets_e) { + SimpleG g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto b = add_edge(u, v, g).first; + auto c = add_edge(u, u, g).first; + auto d = add_edge(v, u, g).first; + + ASSERT_EQ(0U, g[a].index); + ASSERT_EQ(1U, g[b].index); + ASSERT_EQ(2U, g[c].index); + ASSERT_EQ(3U, g[d].index); + + ASSERT_EQ("SimpleE", g[a].test_e); + ASSERT_EQ("SimpleE", g[b].test_e); + ASSERT_EQ("SimpleE", g[c].test_e); + ASSERT_EQ("SimpleE", g[d].test_e); + + g[a].test_e = "a"; + g[b].test_e = "b"; + g[c].test_e = "c"; + g[d].test_e = "d"; + + ASSERT_EQ("a", g[a].test_e); + ASSERT_EQ("b", g[b].test_e); + ASSERT_EQ("c", g[c].test_e); + ASSERT_EQ("d", g[d].test_e); +} + +TEST(ue2_graph, vertex_ordering_1) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); + + ASSERT_LE(a, b); + ASSERT_LE(a, c); + ASSERT_LE(a, d); + ASSERT_LE(b, c); + ASSERT_LE(b, d); + ASSERT_LE(c, d); + + g[a].index = 5; + g[b].index = 0; + g[c].index = 3; + g[d].index = 1; + + ASSERT_LE(a, b); + ASSERT_LE(a, c); + ASSERT_LE(a, d); + ASSERT_LE(b, c); + ASSERT_LE(b, d); + ASSERT_LE(c, d); +} + +TEST(ue2_graph, vertex_ordering_2) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto c = add_vertex(g); + auto d = add_vertex(g); + + set s; + s.insert(a); + s.insert(b); + s.insert(c); + s.insert(d); + + auto it = s.begin(); + ASSERT_EQ(a, *it++); + ASSERT_EQ(b, *it++); + ASSERT_EQ(c, *it++); + ASSERT_EQ(d, *it++); + ASSERT_EQ(it, s.end()); + + g[a].index = 5; + g[b].index = 0; + g[c].index = 3; + g[d].index = 1; + + it = s.begin(); + ASSERT_EQ(a, *it++); + ASSERT_EQ(b, *it++); + ASSERT_EQ(c, *it++); + ASSERT_EQ(d, *it++); + ASSERT_EQ(it, s.end()); +} + +TEST(ue2_graph, get_v_2_arg) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + + auto pm = get(&SimpleV::test_v, g); + + ASSERT_EQ("SimpleV", pm[a]); + ASSERT_EQ("SimpleV", pm[b]); + + pm[a] = "a"; + pm[b] = "b"; + + ASSERT_EQ("a", pm[a]); + ASSERT_EQ("b", pm[b]); + + ASSERT_EQ("a", g[a].test_v); + ASSERT_EQ("b", g[b].test_v); + + g[a].test_v = "X"; + g[b].test_v = "Y"; + + ASSERT_EQ("X", pm[a]); + ASSERT_EQ("Y", pm[b]); + + ASSERT_EQ("X", get(pm, a)); + ASSERT_EQ("Y", get(pm, b)); + + put(pm, a, "A"); + put(pm, b, "B"); + + ASSERT_EQ("A", g[a].test_v); + ASSERT_EQ("B", g[b].test_v); +} + +TEST(ue2_graph, get_v_2_arg_const) { + SimpleG g; + const SimpleG &gg = g; + auto a = add_vertex(g); + auto b = add_vertex(g); + + auto pm = get(&SimpleV::test_v, gg); + + ASSERT_EQ("SimpleV", pm[a]); + ASSERT_EQ("SimpleV", pm[b]); + + g[a].test_v = "a"; + g[b].test_v = "b"; + + ASSERT_EQ("a", pm[a]); + ASSERT_EQ("b", pm[b]); + + ASSERT_EQ("a", get(pm, a)); + ASSERT_EQ("b", get(pm, b)); +} + +TEST(ue2_graph, get_e_2_arg) { + SimpleG g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto b = add_edge(v, u, g).first; + + auto pm = get(&SimpleE::test_e, g); + + ASSERT_EQ("SimpleE", pm[a]); + ASSERT_EQ("SimpleE", pm[b]); + + pm[a] = "a"; + pm[b] = "b"; + + ASSERT_EQ("a", pm[a]); + ASSERT_EQ("b", pm[b]); + + ASSERT_EQ("a", g[a].test_e); + ASSERT_EQ("b", g[b].test_e); + + g[a].test_e = "X"; + g[b].test_e = "Y"; + + ASSERT_EQ("X", pm[a]); + ASSERT_EQ("Y", pm[b]); + + ASSERT_EQ("X", get(pm, a)); + ASSERT_EQ("Y", get(pm, b)); + + put(pm, a, "A"); + put(pm, b, "B"); + + ASSERT_EQ("A", g[a].test_e); + ASSERT_EQ("B", g[b].test_e); +} + +TEST(ue2_graph, get_e_2_arg_const) { + SimpleG g; + const SimpleG &gg = g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto b = add_edge(v, u, g).first; + + auto pm = get(&SimpleE::test_e, gg); + + ASSERT_EQ("SimpleE", pm[a]); + ASSERT_EQ("SimpleE", pm[b]); + + g[a].test_e = "a"; + g[b].test_e = "b"; + + ASSERT_EQ("a", pm[a]); + ASSERT_EQ("b", pm[b]); + + ASSERT_EQ("a", get(pm, a)); + ASSERT_EQ("b", get(pm, b)); +} + +TEST(ue2_graph, get_v_3_arg) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + + ASSERT_EQ("SimpleV", get(&SimpleV::test_v, g, a)); + ASSERT_EQ("SimpleV", get(&SimpleV::test_v, g, a)); + + get(&SimpleV::test_v, g, a) = "a"; + get(&SimpleV::test_v, g, b) = "b"; + + ASSERT_EQ("a", get(&SimpleV::test_v, g, a)); + ASSERT_EQ("b", get(&SimpleV::test_v, g, b)); + + ASSERT_EQ("a", g[a].test_v); + ASSERT_EQ("b", g[b].test_v); + + g[a].test_v = "X"; + g[b].test_v = "Y"; + + ASSERT_EQ("X", get(&SimpleV::test_v, g, a)); + ASSERT_EQ("Y", get(&SimpleV::test_v, g, b)); + + //std::decay::type x = "A"; + + put(&SimpleV::test_v, g, a, "A"); + put(&SimpleV::test_v, g, b, "B"); + + ASSERT_EQ("A", g[a].test_v); + ASSERT_EQ("B", g[b].test_v); +} + +TEST(ue2_graph, get_v_3_arg_const) { + SimpleG g; + const SimpleG &gg = g; + auto a = add_vertex(g); + auto b = add_vertex(g); + + ASSERT_EQ("SimpleV", get(&SimpleV::test_v, gg, a)); + ASSERT_EQ("SimpleV", get(&SimpleV::test_v, gg, b)); + + g[a].test_v = "a"; + g[b].test_v = "b"; + + ASSERT_EQ("a", get(&SimpleV::test_v, gg, a)); + ASSERT_EQ("b", get(&SimpleV::test_v, gg, b)); +} + +TEST(ue2_graph, get_e_3_arg) { + SimpleG g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto b = add_edge(v, u, g).first; + + ASSERT_EQ("SimpleE", get(&SimpleE::test_e, g, a)); + ASSERT_EQ("SimpleE", get(&SimpleE::test_e, g, b)); + + get(&SimpleE::test_e, g, a) = "a"; + get(&SimpleE::test_e, g, b) = "b"; + + ASSERT_EQ("a", get(&SimpleE::test_e, g, a)); + ASSERT_EQ("b", get(&SimpleE::test_e, g, b)); + + ASSERT_EQ("a", g[a].test_e); + ASSERT_EQ("b", g[b].test_e); + + g[a].test_e = "X"; + g[b].test_e = "Y"; + + ASSERT_EQ("X", get(&SimpleE::test_e, g, a)); + ASSERT_EQ("Y", get(&SimpleE::test_e, g, b)); +} + +TEST(ue2_graph, get_e_3_arg_const) { + SimpleG g; + const SimpleG &gg = g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto b = add_edge(v, u, g).first; + + ASSERT_EQ("SimpleE", get(&SimpleE::test_e, gg, a)); + ASSERT_EQ("SimpleE", get(&SimpleE::test_e, gg, b)); + + g[a].test_e = "a"; + g[b].test_e = "b"; + + ASSERT_EQ("a", get(&SimpleE::test_e, gg, a)); + ASSERT_EQ("b", get(&SimpleE::test_e, gg, b)); +} + +TEST(ue2_graph, get_vertex_index) { + SimpleG g; + auto a = add_vertex(g); + auto pm = get(vertex_index, g); + ASSERT_EQ(0U, pm(a)); + pm(a) = 1; + ASSERT_EQ(1U, pm[a]); + ASSERT_EQ(1U, g[a].index); + ASSERT_EQ(1U, get(vertex_index, g, a)); +} + +TEST(ue2_graph, get_vertex_index_const) { + SimpleG g; + const SimpleG &gg = g; + auto a = add_vertex(g); + auto pm = get(vertex_index, gg); + ASSERT_EQ(0U, pm(a)); + g[a].index = 1; + ASSERT_EQ(1U, pm[a]); + ASSERT_EQ(1U, get(vertex_index, gg, a)); +} + +TEST(ue2_graph, get_edge_index) { + SimpleG g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto pm = get(edge_index, g); + ASSERT_EQ(0U, pm(a)); + pm(a) = 1; + ASSERT_EQ(1U, pm[a]); + ASSERT_EQ(1U, g[a].index); + ASSERT_EQ(1U, get(edge_index, g, a)); +} + +TEST(ue2_graph, get_edge_index_const) { + SimpleG g; + const SimpleG &gg = g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto pm = get(edge_index, gg); + ASSERT_EQ(0U, pm(a)); + g[a].index = 1; + ASSERT_EQ(1U, pm[a]); + ASSERT_EQ(1U, get(edge_index, gg, a)); +} + +TEST(ue2_graph, get_vertex_all) { + SimpleG g; + auto a = add_vertex(g); + auto pm = get(vertex_all, g); + ASSERT_EQ(0U, pm(a).index); + pm(a).index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, g[a].index); + ASSERT_EQ(1U, get(vertex_all, g, a).index); + auto &a_all = get(vertex_all, g, a); + ASSERT_EQ(1U, a_all.index); + g[a].index = 2; + ASSERT_EQ(2U, a_all.index); +} + +TEST(ue2_graph, get_vertex_all_const) { + SimpleG g; + const SimpleG &gg = g; + auto a = add_vertex(g); + auto pm = get(vertex_all, gg); + ASSERT_EQ(0U, pm(a).index); + g[a].index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, get(vertex_all, gg, a).index); + auto &a_all = get(vertex_all, gg, a); + ASSERT_EQ(1U, a_all.index); + g[a].index = 2; + ASSERT_EQ(2U, a_all.index); +} + +TEST(ue2_graph, get_vertex_bundle) { + SimpleG g; + auto a = add_vertex(g); + auto pm = get(vertex_bundle, g); + ASSERT_EQ(0U, pm(a).index); + pm(a).index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, g[a].index); + ASSERT_EQ(1U, get(vertex_bundle, g, a).index); + auto &a_bundle = get(vertex_bundle, g, a); + ASSERT_EQ(1U, a_bundle.index); + g[a].index = 2; + ASSERT_EQ(2U, a_bundle.index); +} + +TEST(ue2_graph, get_vertex_bundle_const) { + SimpleG g; + const SimpleG &gg = g; + auto a = add_vertex(g); + auto pm = get(vertex_bundle, gg); + ASSERT_EQ(0U, pm(a).index); + g[a].index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, get(vertex_bundle, gg, a).index); + auto &a_bundle = get(vertex_bundle, gg, a); + ASSERT_EQ(1U, a_bundle.index); + g[a].index = 2; + ASSERT_EQ(2U, a_bundle.index); +} + +TEST(ue2_graph, get_edge_all) { + SimpleG g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto pm = get(edge_all, g); + ASSERT_EQ(0U, pm(a).index); + pm(a).index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, g[a].index); + ASSERT_EQ(1U, get(edge_all, g, a).index); + auto &a_all = get(edge_all, g, a); + ASSERT_EQ(1U, a_all.index); + g[a].index = 2; + ASSERT_EQ(2U, a_all.index); +} + +TEST(ue2_graph, get_edge_all_const) { + SimpleG g; + const SimpleG &gg = g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto pm = get(edge_all, gg); + ASSERT_EQ(0U, pm(a).index); + g[a].index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, get(edge_all, gg, a).index); + auto &a_all = get(edge_all, gg, a); + ASSERT_EQ(1U, a_all.index); + g[a].index = 2; + ASSERT_EQ(2U, a_all.index); +} + +TEST(ue2_graph, get_edge_bundle) { + SimpleG g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto pm = get(edge_bundle, g); + ASSERT_EQ(0U, pm(a).index); + pm(a).index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, g[a].index); + ASSERT_EQ(1U, get(edge_bundle, g, a).index); + auto &a_bundle = get(edge_bundle, g, a); + ASSERT_EQ(1U, a_bundle.index); + g[a].index = 2; + ASSERT_EQ(2U, a_bundle.index); +} + +TEST(ue2_graph, get_edge_bundle_const) { + SimpleG g; + const SimpleG &gg = g; + auto u = add_vertex(g); + auto v = add_vertex(g); + auto a = add_edge(u, v, g).first; + auto pm = get(edge_bundle, gg); + ASSERT_EQ(0U, pm(a).index); + g[a].index = 1; + ASSERT_EQ(1U, pm[a].index); + ASSERT_EQ(1U, get(edge_bundle, gg, a).index); + auto &a_bundle = get(edge_bundle, gg, a); + ASSERT_EQ(1U, a_bundle.index); + g[a].index = 2; + ASSERT_EQ(2U, a_bundle.index); +} + +TEST(ue2_graph, add_vertex_prop) { + SimpleG g; + SimpleV vp; + vp.index = 42; + vp.test_v = "prop"; + auto u = add_vertex(vp, g); + auto v = add_vertex(vp, g); + + ASSERT_EQ(0U, g[u].index); + ASSERT_EQ(1U, g[v].index); + + ASSERT_EQ("prop", g[u].test_v); + ASSERT_EQ("prop", g[v].test_v); +} + +TEST(ue2_graph, add_edge_prop) { + SimpleG g; + SimpleE ep; + ep.index = 42; + ep.test_e = "prop"; + auto u = add_vertex(g); + auto v = add_vertex(g); + + auto e = add_edge(u, v, ep, g).first; + auto f = add_edge(u, v, ep, g).first; + + ASSERT_EQ(0U, g[e].index); + ASSERT_EQ(1U, g[f].index); + + ASSERT_EQ("prop", g[e].test_e); + ASSERT_EQ("prop", g[f].test_e); +} + +TEST(ue2_graph, reverse_graph) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto e = add_edge(a, b, g).first; + reverse_graph rg(g); + auto index_map = get(vertex_index, rg); + + ASSERT_EQ(0U, rg[a].index); + ASSERT_EQ(1U, rg[b].index); + ASSERT_EQ(0U, rg[e].index); + + ASSERT_EQ(0U, get(vertex_index, rg, a)); + ASSERT_EQ(1U, get(vertex_index, rg, b)); + ASSERT_EQ(0U, get(edge_index, rg, edge(b, a, rg).first)); + + ASSERT_EQ(0U, index_map(a)); + ASSERT_EQ(1U, index_map(b)); + + ASSERT_TRUE(edge(b, a, rg).second); + ASSERT_FALSE(edge(a, b, rg).second); +} + +TEST(ue2_graph, reverse_graph_const) { + SimpleG g; + auto a = add_vertex(g); + auto b = add_vertex(g); + auto e = add_edge(a, b, g).first; + reverse_graph rg(g); + auto index_map = get(&SimpleV::index, rg); + + // Note: reverse_graph fails to make bundles const so things break. + // ASSERT_EQ(0U, rg[a].index); + // ASSERT_EQ(1U, rg[b].index); + // ASSERT_EQ(0U, rg[e].index); + + ASSERT_EQ(0U, get(vertex_index, g, a)); + ASSERT_EQ(1U, get(vertex_index, g, b)); + ASSERT_EQ(0U, get(edge_index, g, e)); + + ASSERT_EQ(0U, index_map(a)); + ASSERT_EQ(1U, index_map(b)); + + ASSERT_TRUE(edge(b, a, rg).second); + ASSERT_FALSE(edge(a, b, rg).second); +} + +TEST(ue2_graph, default_param) { + struct TestGraph : ue2_graph { }; + TestGraph g; + + auto v = add_vertex(g); + auto e = add_edge(v, v, g).first; + + ASSERT_EQ(0U, get(vertex_index, g, v)); + ASSERT_EQ(0U, get(edge_index, g, e)); +#if !defined(_MSC_VER) + /* This makes MSVC up to VS2015 sad in ways that shouldn't happen. */ + ASSERT_EQ(0U, get(&ue2::graph_detail::default_edge_property::index, g, e)); +#endif } diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 6bb4fcb9..804fcb1f 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -31,14 +31,12 @@ #include "grey.h" #include "compiler/compiler.h" -#include "nfa/limex_context.h" #include "nfa/limex_internal.h" #include "nfa/nfa_api.h" #include "nfa/nfa_api_util.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng.h" #include "nfagraph/ng_limex.h" -#include "nfagraph/ng_restructuring.h" #include "nfagraph/ng_util.h" #include "util/alloc.h" #include "util/target_info.h" @@ -167,11 +165,10 @@ TEST_P(LimExModelTest, QueueExec) { TEST_P(LimExModelTest, CompressExpand) { ASSERT_TRUE(nfa != nullptr); - // 64-bit NFAs assume during compression that they have >= 5 bytes of - // compressed NFA state, which isn't true for our 8-state test pattern. We - // skip this test for just these models. - if (nfa->scratchStateSize == 8) { - return; + u32 real_state_size = nfa->scratchStateSize; + /* Only look at 8 bytes for limex 64 (rather than the padding) */ + if (nfa->type == LIMEX_NFA_64) { + real_state_size = sizeof(u64a); } initQueue(); @@ -195,8 +192,7 @@ TEST_P(LimExModelTest, CompressExpand) { memset(dest, 0xff, nfa->scratchStateSize); nfaExpandState(nfa.get(), dest, q.streamState, q.offset, queue_prev_byte(&q, end)); - ASSERT_TRUE(std::equal(dest, dest + nfa->scratchStateSize, - full_state.get())); + ASSERT_TRUE(std::equal(dest, dest + real_state_size, full_state.get())); } TEST_P(LimExModelTest, InitCompressedState0) { diff --git a/unit/internal/multi_bit.cpp b/unit/internal/multi_bit.cpp index 925092b3..38da1d8a 100644 --- a/unit/internal/multi_bit.cpp +++ b/unit/internal/multi_bit.cpp @@ -30,10 +30,10 @@ #include "gtest/gtest.h" #include "ue2common.h" +#include "util/compile_error.h" #include "util/make_unique.h" #include "util/multibit.h" #include "util/multibit_build.h" -#include "util/target_info.h" #include #include @@ -1303,9 +1303,11 @@ static const MultiBitTestParam multibitTests[] = { { 1U << 29, 24413 }, { 1U << 30, 50377 }, { 1U << 31, 104729 }, - - // { UINT32_MAX, 104729 }, // Very slow }; INSTANTIATE_TEST_CASE_P(MultiBit, MultiBitTest, ValuesIn(multibitTests)); +TEST(MultiBit, SizeTooBig) { + ASSERT_NO_THROW(mmbit_size(MMB_MAX_BITS)); + ASSERT_THROW(mmbit_size(MMB_MAX_BITS + 1), ResourceLimitError); +} diff --git a/unit/internal/nfagraph_equivalence.cpp b/unit/internal/nfagraph_equivalence.cpp index 3ca1923f..8fda9223 100644 --- a/unit/internal/nfagraph_equivalence.cpp +++ b/unit/internal/nfagraph_equivalence.cpp @@ -84,7 +84,7 @@ TEST(NFAGraph, RemoveEquivalence1) { ASSERT_TRUE(tmpcr.test('a')); } // check if we found our vertex - ASSERT_TRUE(a != nullptr); + ASSERT_TRUE(a != NGHolder::null_vertex()); // There should be two edges from v to nodes with reachability 'b' and 'c' NFAVertex b = NGHolder::null_vertex(); @@ -101,8 +101,8 @@ TEST(NFAGraph, RemoveEquivalence1) { } } // check if we found our vertices - ASSERT_TRUE(b != nullptr); - ASSERT_TRUE(c != nullptr); + ASSERT_TRUE(b != NGHolder::null_vertex()); + ASSERT_TRUE(c != NGHolder::null_vertex()); // both vertices should have an edge to accept ASSERT_TRUE(edge(b, g.accept, g).second); @@ -145,7 +145,7 @@ TEST(NFAGraph, RemoveEquivalence2) { ASSERT_TRUE(tmpcr.test('a')); } // check if we found our vertex - ASSERT_TRUE(a != nullptr); + ASSERT_TRUE(a != NGHolder::null_vertex()); // There should be two edges from v to nodes with reachability 'b' and 'c' NFAVertex b = NGHolder::null_vertex(); @@ -162,8 +162,8 @@ TEST(NFAGraph, RemoveEquivalence2) { } } // check if we found our vertices - ASSERT_TRUE(b != nullptr); - ASSERT_TRUE(c != nullptr); + ASSERT_TRUE(b != NGHolder::null_vertex()); + ASSERT_TRUE(c != NGHolder::null_vertex()); // both new vertices should have edges from startDs ASSERT_TRUE(edge(g.startDs, b, g).second); @@ -207,7 +207,7 @@ TEST(NFAGraph, RemoveEquivalence3) { ASSERT_TRUE(tmpcr.test('a')); } // check if we found our 'a' - ASSERT_TRUE(a != nullptr); + ASSERT_TRUE(a != NGHolder::null_vertex()); // There should be an edge from 'a' to '.' ASSERT_EQ(1U, out_degree(a, g)); @@ -234,7 +234,6 @@ TEST(NFAGraph, RemoveEquivalence3) { NFAVertex X = NGHolder::null_vertex(); NFAVertex Y = NGHolder::null_vertex(); for (NFAVertex tmp : adjacent_vertices_range(dot2, g)) { - // we already know about dot1, so skip it if (tmp == dot1) { continue; @@ -251,8 +250,8 @@ TEST(NFAGraph, RemoveEquivalence3) { } } // check if we found both vertices - ASSERT_TRUE(X != nullptr); - ASSERT_TRUE(Y != nullptr); + ASSERT_TRUE(X != NGHolder::null_vertex()); + ASSERT_TRUE(Y != NGHolder::null_vertex()); // finally, check if these two vertices only have edges to accept ASSERT_EQ(1U, out_degree(X, g)); @@ -306,8 +305,8 @@ TEST(NFAGraph, RemoveEquivalence4) { } } // check if we found both vertices - ASSERT_TRUE(X != nullptr); - ASSERT_TRUE(Y != nullptr); + ASSERT_TRUE(X != NGHolder::null_vertex()); + ASSERT_TRUE(Y != NGHolder::null_vertex()); // now, find first dot from X ASSERT_EQ(1U, out_degree(X, g)); @@ -351,7 +350,7 @@ TEST(NFAGraph, RemoveEquivalence4) { } } // make sure we found our 'a' - ASSERT_TRUE(a != nullptr); + ASSERT_TRUE(a != NGHolder::null_vertex()); // now, check if 'a' has an edge to accept ASSERT_EQ(1U, out_degree(a, g)); @@ -396,7 +395,7 @@ TEST(NFAGraph, RemoveEquivalence5) { ASSERT_TRUE(edge(v, v, g).second); } // check if we found our vertex - ASSERT_TRUE(v != nullptr); + ASSERT_TRUE(v != NGHolder::null_vertex()); // now, find the vertex leading to accept NFAVertex v2 = NGHolder::null_vertex(); @@ -414,7 +413,7 @@ TEST(NFAGraph, RemoveEquivalence5) { ASSERT_TRUE(edge(tmp, g.accept, g).second); } // check if we found our vertex - ASSERT_TRUE(v2 != nullptr); + ASSERT_TRUE(v2 != NGHolder::null_vertex()); } // catching UE-2692 @@ -452,7 +451,7 @@ TEST(NFAGraph, RemoveEquivalence6) { ASSERT_TRUE(edge(v, g.accept, g).second); } // check if we found our vertex - ASSERT_TRUE(v != nullptr); + ASSERT_TRUE(v != NGHolder::null_vertex()); } // catching UE-2692 @@ -492,7 +491,7 @@ TEST(NFAGraph, RemoveEquivalence7) { ASSERT_EQ(1U, proper_out_degree(v, g)); } // check if we found our vertex - ASSERT_TRUE(v != nullptr); + ASSERT_TRUE(v != NGHolder::null_vertex()); // find the next vertex and ensure it has an edge to accept NFAVertex v2 = NGHolder::null_vertex(); @@ -511,7 +510,7 @@ TEST(NFAGraph, RemoveEquivalence7) { ASSERT_TRUE(edge(v2, g.accept, g).second); } // check if we found our vertex - ASSERT_TRUE(v2 != nullptr); + ASSERT_TRUE(v2 != NGHolder::null_vertex()); } TEST(NFAGraph, RemoveEquivalence_Reports1) { diff --git a/unit/internal/nfagraph_redundancy.cpp b/unit/internal/nfagraph_redundancy.cpp index acb3cc7b..be9527fd 100644 --- a/unit/internal/nfagraph_redundancy.cpp +++ b/unit/internal/nfagraph_redundancy.cpp @@ -55,13 +55,13 @@ TEST(NFAGraph, RemoveRedundancy1) { unique_ptr graph(constructGraphWithCC("(a|b)c", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); + NGHolder &g = *graph; // Run removeRedundancy - removeRedundancy(*graph, SOM_NONE); - NFAGraph &g = graph->g; + removeRedundancy(g, SOM_NONE); // Our graph should only have two non-special nodes - ASSERT_EQ((size_t)N_SPECIALS + 2, num_vertices(*graph)); + ASSERT_EQ((size_t)N_SPECIALS + 2, num_vertices(g)); // Dot-star start state should be connected to itself and a single other // vertex @@ -98,13 +98,13 @@ TEST(NFAGraph, RemoveRedundancy2) { unique_ptr graph(constructGraphWithCC("a.*b?c", cc, HS_FLAG_DOTALL)); ASSERT_TRUE(graph.get() != nullptr); + NGHolder &g = *graph; // Run removeRedundancy - removeRedundancy(*graph, SOM_NONE); - NFAGraph &g = graph->g; + removeRedundancy(g, SOM_NONE); // Our graph should now have only 3 non-special vertices - ASSERT_EQ((size_t)N_SPECIALS + 3, num_vertices(*graph)); + ASSERT_EQ((size_t)N_SPECIALS + 3, num_vertices(g)); // Dot-star start state should be connected to itself and a single other // vertex @@ -156,12 +156,12 @@ TEST(NFAGraph, RemoveRedundancy3) { cc, 0)); ASSERT_TRUE(graph.get() != nullptr); - unsigned countBefore = num_vertices(graph->g); + unsigned countBefore = num_vertices(*graph); removeRedundancy(*graph, SOM_NONE); // The '(a|b)?' construction (two states) should have disappeared, leaving // this expr as 'foobar.*teakettle' - ASSERT_EQ(countBefore - 2, num_vertices(graph->g)); + ASSERT_EQ(countBefore - 2, num_vertices(*graph)); } TEST(NFAGraph, RemoveRedundancy4) { @@ -169,11 +169,11 @@ TEST(NFAGraph, RemoveRedundancy4) { unique_ptr graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); - unsigned countBefore = num_vertices(graph->g); + unsigned countBefore = num_vertices(*graph); removeRedundancy(*graph, SOM_NONE); // We should end up with the alternation collapsing into one state - ASSERT_EQ(countBefore - 3, num_vertices(graph->g)); + ASSERT_EQ(countBefore - 3, num_vertices(*graph)); } TEST(NFAGraph, RemoveRedundancy5) { @@ -182,12 +182,12 @@ TEST(NFAGraph, RemoveRedundancy5) { cc, 0)); ASSERT_TRUE(graph.get() != nullptr); - unsigned countBefore = num_vertices(graph->g); + unsigned countBefore = num_vertices(*graph); removeRedundancy(*graph, SOM_NONE); // Since we don't return a start offset, the first state ('[0-9]?') is // redundant. - ASSERT_EQ(countBefore - 1, num_vertices(graph->g)); + ASSERT_EQ(countBefore - 1, num_vertices(*graph)); } TEST(NFAGraph, RemoveEdgeRedundancy1) { @@ -196,12 +196,12 @@ TEST(NFAGraph, RemoveEdgeRedundancy1) { auto graph = constructGraphWithCC("A+hatstand", cc, HS_FLAG_DOTALL); ASSERT_TRUE(graph.get() != nullptr); - unsigned countBefore = num_edges(graph->g); + unsigned countBefore = num_edges(*graph); removeEdgeRedundancy(*graph, SOM_NONE, cc); // One edge (the self-loop on the leading A+) should have been removed. - ASSERT_EQ(countBefore - 1, num_edges(graph->g)); + ASSERT_EQ(countBefore - 1, num_edges(*graph)); } TEST(NFAGraph, RemoveEdgeRedundancy2) { @@ -210,12 +210,12 @@ TEST(NFAGraph, RemoveEdgeRedundancy2) { auto graph = constructGraphWithCC("foo.*A*bar", cc, HS_FLAG_DOTALL); ASSERT_TRUE(graph.get() != nullptr); - size_t numEdgesBefore = num_edges(graph->g); - size_t numVertsBefore = num_vertices(graph->g); + size_t numEdgesBefore = num_edges(*graph); + size_t numVertsBefore = num_vertices(*graph); removeEdgeRedundancy(*graph, SOM_NONE, cc); // The .* should swallow up the A* and its self-loop. - ASSERT_EQ(numEdgesBefore - 4, num_edges(graph->g)); - ASSERT_EQ(numVertsBefore - 1, num_vertices(graph->g)); + ASSERT_EQ(numEdgesBefore - 4, num_edges(*graph)); + ASSERT_EQ(numVertsBefore - 1, num_vertices(*graph)); } diff --git a/unit/internal/nfagraph_util.cpp b/unit/internal/nfagraph_util.cpp index 135276dd..b6952f5a 100644 --- a/unit/internal/nfagraph_util.cpp +++ b/unit/internal/nfagraph_util.cpp @@ -320,9 +320,9 @@ TEST(NFAGraph, cyclicVerts1) { add_edge(a, b, g); add_edge(b, a, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, a, b}), cyclics); + ASSERT_EQ(flat_set({g.startDs, a, b}), cyclics); } TEST(NFAGraph, cyclicVerts2) { @@ -341,9 +341,9 @@ TEST(NFAGraph, cyclicVerts2) { add_edge(c, d, g); add_edge(a, e, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, a, b, c}), cyclics); + ASSERT_EQ(flat_set({g.startDs, a, b, c}), cyclics); } TEST(NFAGraph, cyclicVerts3) { @@ -369,9 +369,9 @@ TEST(NFAGraph, cyclicVerts3) { add_edge(f, h, g); add_edge(h, h, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, a, b, c, d, e, h}), cyclics); + ASSERT_EQ(flat_set({g.startDs, a, b, c, d, e, h}), cyclics); } TEST(NFAGraph, cyclicVerts4) { @@ -396,9 +396,9 @@ TEST(NFAGraph, cyclicVerts4) { add_edge(e, f, g); add_edge(f, h, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, a, b, c, d, e}), cyclics); + ASSERT_EQ(flat_set({g.startDs, a, b, c, d, e}), cyclics); } TEST(NFAGraph, cyclicVerts5) { @@ -418,7 +418,7 @@ TEST(NFAGraph, cyclicVerts5) { add_edge(c, d, g); add_edge(e, c, g); - auto cyclics = findVerticesInCycles(g); + auto cyclics = find_vertices_in_cycles(g); - ASSERT_EQ(set({g.startDs, b, c}), cyclics); + ASSERT_EQ(flat_set({g.startDs, b, c}), cyclics); } diff --git a/unit/internal/rose_build_merge.cpp b/unit/internal/rose_build_merge.cpp index 3f5a8382..291c241a 100644 --- a/unit/internal/rose_build_merge.cpp +++ b/unit/internal/rose_build_merge.cpp @@ -64,7 +64,6 @@ RoseVertex addVertex(RoseBuildImpl &build, RoseVertex parent, u32 lit_id) { RoseGraph &g = build.g; RoseVertex v = add_vertex(g); - g[v].idx = build.vertexIndex++; g[v].min_offset = 0; g[v].max_offset = ROSE_BOUND_INF; g[v].literals.insert(lit_id); diff --git a/unit/internal/rose_mask_32.cpp b/unit/internal/rose_mask_32.cpp new file mode 100644 index 00000000..732f51a0 --- /dev/null +++ b/unit/internal/rose_mask_32.cpp @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "rose/validate_mask.h" +#include "gtest/gtest.h" + +#define ONES32 0xffffffffu + +union RoseLookaroundMask32 { + m256 a256; + u8 a8[32]; +}; + +struct ValidateMask32TestInfo { + RoseLookaroundMask32 data; + u32 valid_mask; + RoseLookaroundMask32 and_mask; + RoseLookaroundMask32 cmp_mask; + u32 neg_mask; +}; + +struct ValidateMask32InitInfo { + int idx; + u8 data; + u8 and_mask; + u8 cmp_mask; + u8 neg_mask; +}; + + +static const ValidateMask32InitInfo testBasicIdx[][33] = { + { + {1, 0x34, 0xf8, 0x30, 0}, + {2, 0x34, 0xf8, 0x30, 0}, + {8, 0x23, 0xff, 0x23, 0}, + {9, 0x34, 0xf8, 0x30, 0}, + {10, 0x41, 0xdf, 0x41, 0}, + {11, 0x63, 0xdd, 0x41, 0}, + {12, 0x61, 0xdd, 0x41, 0}, + {13, 0x41, 0xdf, 0x41, 0}, + {14, 0x61, 0xdf, 0x41, 0}, + {15, 0x41, 0xdf, 0x41, 0}, + {16, 0x43, 0xdd, 0x41, 0}, + {17, 0x61, 0xdd, 0x41, 0}, + {23, 0x63, 0xdd, 0x41, 0}, + {24, 0x4f, 0xfc, 0x4c, 0}, + {25, 0x4d, 0xfc, 0x4c, 0}, + {26, 0x4d, 0xfc, 0x4c, 0}, + {-1, 0, 0, 0, 0}, + }, + { + {11, 0, 0xff, 0x55, 1}, + {12, 0, 0xff, 0x36, 1}, + {13, 0, 0xfe, 0x34, 1}, + {14, 0x4d, 0xfe, 0x4c, 0}, + {15, 0x41, 0xbf, 0x01, 0}, + {16, 0x53, 0xdf, 0x73, 1}, + {17, 0x4b, 0, 0, 0}, + {18, 0, 0x2c, 0x2c, 1}, + {-1, 0, 0, 0, 0}, + }, + { + {15, 0x46, 0xdf, 0x46, 0}, + {16, 0x4f, 0xdf, 0x46, 1}, + {17, 0x6f, 0xff, 0x6f, 0}, + {18, 0x31, 0xfe, 0x30, 0}, + {19, 0x34, 0xf8, 0x30, 0}, + {20, 0x66, 0xc0, 0x40, 0}, + {21, 0x6f, 0xf0, 0x60, 0}, + {22, 0x6f, 0, 0, 0}, + {23, 0x46, 0xdf, 0x44, 1}, + {24, 0x4f, 0xdf, 0x46, 1}, + {25, 0x6f, 0xff, 0x4f, 1}, + {26, 0x31, 0xfe, 0x30, 0}, + {27, 0x34, 0xf8, 0x34, 1}, + {28, 0x66, 0xc0, 0x60, 1}, + {29, 0x6f, 0xf0, 0x6f, 1}, + {30, 0x6f, 0, 0x60, 1}, + {-1, 0, 0, 0, 0}, + }, + { + {31, 0x4a, 0x80, 0, 0}, + {-1, 0, 0, 0, 1}, + }, + { + {12, 0x2b, 0x3d, 0x2d, 1}, + {13, 0x2b, 0x3d, 0x4c, 1}, + {23, 0x4a, 0x88, 0x0a, 1}, + {-1, 0, 0, 0, 0}, + }, +}; + +static void initTestInfo(ValidateMask32TestInfo &t) { + t.data.a256 = zeroes256(); + t.valid_mask = 0xffffffff; + t.and_mask.a256 = zeroes256(); + t.cmp_mask.a256 = zeroes256(); + t.neg_mask = 0; +}; + + +static +int testBasicInit(ValidateMask32TestInfo *testB) { + int len = 0; + ValidateMask32TestInfo t; + for (size_t i = 0; i < ARRAY_LENGTH(testBasicIdx); i++) { + initTestInfo(t); + for (const auto &line: testBasicIdx[i]) { + if (line.idx < 0) { + break; + } + int index = line.idx; + t.data.a8[index] = line.data; + t.and_mask.a8[index] = line.and_mask; + t.cmp_mask.a8[index] = line.cmp_mask; + t.neg_mask |= line.neg_mask << index; + } + testB[i] = t; + len++; + } + return len; +} + +TEST(ValidateMask32, testMask32_1) { + ValidateMask32TestInfo testBasic[20]; + int test_len = testBasicInit(testBasic); + for (int i = 0; i < test_len; i++) { + const auto t = testBasic[i]; + EXPECT_EQ(1, validateMask32(t.data.a256, t.valid_mask, + t.and_mask.a256, t.cmp_mask.a256, + t.neg_mask)); + } +} + +TEST(ValidateMask32, testMask32_2) { + ValidateMask32TestInfo testBasic[20]; + int test_len = testBasicInit(testBasic); + for (int left = 0; left <= 32; left++) { + for (int right = 0; right + left < 32; right++) { + u32 valid_mask = ONES32 << (left + right) >> left; + for (int i = 0; i < test_len; i++) { + const auto &t = testBasic[i]; + int bool_result; + bool_result = !(valid_mask & t.neg_mask); + EXPECT_EQ(bool_result, validateMask32(t.data.a256, + valid_mask, + t.and_mask.a256, + t.cmp_mask.a256, + 0)); + bool_result = (valid_mask & t.neg_mask) == valid_mask; + EXPECT_EQ(bool_result, validateMask32(t.data.a256, + valid_mask, + t.and_mask.a256, + t.cmp_mask.a256, + ONES32)); + } + } + } +} + +TEST(ValidateMask32, testMask32_3) { + ValidateMask32TestInfo testBasic[20]; + testing::internal::Random neg_mask_rand(451); + int test_len = testBasicInit(testBasic); + for (int left = 0; left <= 32; left++) { + for (int right = 0; right + left < 32; right++) { + u32 valid_mask = ONES32 << (left + right) >> left; + for (int i = 0; i < test_len; i++) { + const auto &t = testBasic[i]; + int bool_result; + for (int j = 0; j < 5000; j++) { + u32 neg_mask = neg_mask_rand.Generate(1u << 31); + bool_result = (neg_mask & valid_mask) == + (t.neg_mask & valid_mask); + EXPECT_EQ(bool_result, validateMask32(t.data.a256, + valid_mask, + t.and_mask.a256, + t.cmp_mask.a256, + neg_mask)); + } + } + } + } +} diff --git a/unit/internal/shuffle.cpp b/unit/internal/shuffle.cpp index 614b641d..a4632c36 100644 --- a/unit/internal/shuffle.cpp +++ b/unit/internal/shuffle.cpp @@ -54,14 +54,14 @@ TEST(Shuffle, PackedExtract32_1) { for (unsigned int i = 0; i < 32; i++) { // shuffle a single 1 bit to the front u32 mask = 1U << i; - EXPECT_EQ(1U, packedExtract32(mask, mask)); - EXPECT_EQ(1U, packedExtract32(~0U, mask)); + EXPECT_EQ(1U, pext32(mask, mask)); + EXPECT_EQ(1U, pext32(~0U, mask)); // we should get zero out of these cases - EXPECT_EQ(0U, packedExtract32(0, mask)); - EXPECT_EQ(0U, packedExtract32(~mask, mask)); + EXPECT_EQ(0U, pext32(0, mask)); + EXPECT_EQ(0U, pext32(~mask, mask)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 32); j++) { - EXPECT_EQ(0U, packedExtract32((1U << j), mask)); + EXPECT_EQ(0U, pext32((1U << j), mask)); } } } @@ -69,10 +69,10 @@ TEST(Shuffle, PackedExtract32_1) { TEST(Shuffle, PackedExtract32_2) { // All 32 bits in mask are on u32 mask = ~0U; - EXPECT_EQ(0U, packedExtract32(0, mask)); - EXPECT_EQ(mask, packedExtract32(mask, mask)); + EXPECT_EQ(0U, pext32(0, mask)); + EXPECT_EQ(mask, pext32(mask, mask)); for (unsigned int i = 0; i < 32; i++) { - EXPECT_EQ(1U << i, packedExtract32(1U << i, mask)); + EXPECT_EQ(1U << i, pext32(1U << i, mask)); } } @@ -84,16 +84,16 @@ TEST(Shuffle, PackedExtract32_3) { } // Test both cases (all even bits, all odd bits) - EXPECT_EQ((1U << 16) - 1, packedExtract32(mask, mask)); - EXPECT_EQ((1U << 16) - 1, packedExtract32(~mask, ~mask)); - EXPECT_EQ(0U, packedExtract32(~mask, mask)); - EXPECT_EQ(0U, packedExtract32(mask, ~mask)); + EXPECT_EQ((1U << 16) - 1, pext32(mask, mask)); + EXPECT_EQ((1U << 16) - 1, pext32(~mask, ~mask)); + EXPECT_EQ(0U, pext32(~mask, mask)); + EXPECT_EQ(0U, pext32(mask, ~mask)); for (unsigned int i = 0; i < 32; i += 2) { - EXPECT_EQ(1U << (i/2), packedExtract32(1U << i, mask)); - EXPECT_EQ(0U, packedExtract32(1U << i, ~mask)); - EXPECT_EQ(1U << (i/2), packedExtract32(1U << (i+1), ~mask)); - EXPECT_EQ(0U, packedExtract32(1U << (i+1), mask)); + EXPECT_EQ(1U << (i/2), pext32(1U << i, mask)); + EXPECT_EQ(0U, pext32(1U << i, ~mask)); + EXPECT_EQ(1U << (i/2), pext32(1U << (i+1), ~mask)); + EXPECT_EQ(0U, pext32(1U << (i+1), mask)); } } @@ -102,14 +102,14 @@ TEST(Shuffle, PackedExtract64_1) { for (unsigned int i = 0; i < 64; i++) { // shuffle a single 1 bit to the front u64a mask = 1ULL << i; - EXPECT_EQ(1U, packedExtract64(mask, mask)); - EXPECT_EQ(1U, packedExtract64(~0ULL, mask)); + EXPECT_EQ(1U, pext64(mask, mask)); + EXPECT_EQ(1U, pext64(~0ULL, mask)); // we should get zero out of these cases - EXPECT_EQ(0U, packedExtract64(0, mask)); - EXPECT_EQ(0U, packedExtract64(~mask, mask)); + EXPECT_EQ(0U, pext64(0, mask)); + EXPECT_EQ(0U, pext64(~mask, mask)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 64); j++) { - EXPECT_EQ(0U, packedExtract64((1ULL << j), mask)); + EXPECT_EQ(0U, pext64((1ULL << j), mask)); } } } @@ -117,26 +117,26 @@ TEST(Shuffle, PackedExtract64_1) { TEST(Shuffle, PackedExtract64_2) { // Fill first half of mask u64a mask = 0x00000000ffffffffULL; - EXPECT_EQ(0U, packedExtract64(0, mask)); - EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); + EXPECT_EQ(0U, pext64(0, mask)); + EXPECT_EQ(0xffffffffU, pext64(mask, mask)); for (unsigned int i = 0; i < 32; i++) { - EXPECT_EQ(1U << i, packedExtract64(1ULL << i, mask)); + EXPECT_EQ(1U << i, pext64(1ULL << i, mask)); } // Fill second half of mask mask = 0xffffffff00000000ULL; - EXPECT_EQ(0U, packedExtract64(0, mask)); - EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); + EXPECT_EQ(0U, pext64(0, mask)); + EXPECT_EQ(0xffffffffU, pext64(mask, mask)); for (unsigned int i = 32; i < 64; i++) { - EXPECT_EQ(1U << (i - 32), packedExtract64(1ULL << i, mask)); + EXPECT_EQ(1U << (i - 32), pext64(1ULL << i, mask)); } // Try one in the middle mask = 0x0000ffffffff0000ULL; - EXPECT_EQ(0U, packedExtract64(0, mask)); - EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); + EXPECT_EQ(0U, pext64(0, mask)); + EXPECT_EQ(0xffffffffU, pext64(mask, mask)); for (unsigned int i = 16; i < 48; i++) { - EXPECT_EQ(1U << (i - 16), packedExtract64(1ULL << i, mask)); + EXPECT_EQ(1U << (i - 16), pext64(1ULL << i, mask)); } } @@ -148,16 +148,16 @@ TEST(Shuffle, PackedExtract64_3) { } // Test both cases (all even bits, all odd bits) - EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); - EXPECT_EQ(0xffffffffU, packedExtract64(~mask, ~mask)); - EXPECT_EQ(0U, packedExtract64(~mask, mask)); - EXPECT_EQ(0U, packedExtract64(mask, ~mask)); + EXPECT_EQ(0xffffffffU, pext64(mask, mask)); + EXPECT_EQ(0xffffffffU, pext64(~mask, ~mask)); + EXPECT_EQ(0U, pext64(~mask, mask)); + EXPECT_EQ(0U, pext64(mask, ~mask)); for (unsigned int i = 0; i < 64; i += 2) { - EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << i, mask)); - EXPECT_EQ(0U, packedExtract64(1ULL << i, ~mask)); - EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << (i+1), ~mask)); - EXPECT_EQ(0U, packedExtract64(1ULL << (i+1), mask)); + EXPECT_EQ(1U << (i/2), pext64(1ULL << i, mask)); + EXPECT_EQ(0U, pext64(1ULL << i, ~mask)); + EXPECT_EQ(1U << (i/2), pext64(1ULL << (i+1), ~mask)); + EXPECT_EQ(0U, pext64(1ULL << (i+1), mask)); } } diff --git a/unit/internal/shufti.cpp b/unit/internal/shufti.cpp index 81495a9c..06407c41 100644 --- a/unit/internal/shufti.cpp +++ b/unit/internal/shufti.cpp @@ -47,7 +47,7 @@ TEST(Shufti, BuildMask1) { chars.set('a'); - int ret = shuftiBuildMasks(chars, &lomask, &himask); + int ret = shuftiBuildMasks(chars, (u8 *)&lomask, (u8 *)&himask); ASSERT_NE(-1, ret); u8 *lo = (u8 *)&lomask; @@ -75,7 +75,7 @@ TEST(Shufti, BuildMask2) { chars.set('a'); chars.set('B'); - int ret = shuftiBuildMasks(chars, &lomask, &himask); + int ret = shuftiBuildMasks(chars, (u8 *)&lomask, (u8 *)&himask); ASSERT_NE(-1, ret); u8 *lo = (u8 *)&lomask; @@ -96,7 +96,7 @@ TEST(Shufti, BuildMask4) { chars.set('A'); chars.set('b'); - int ret = shuftiBuildMasks(chars, &lomask, &himask); + int ret = shuftiBuildMasks(chars, (u8 *)&lomask, (u8 *)&himask); ASSERT_NE(-1, ret); u8 *lo = (u8 *)&lomask; @@ -113,12 +113,12 @@ TEST(Shufti, ExecNoMatch1) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; - for (size_t i = 0; i < 16; i++) { + for (size_t i = 0; i < 32; i++) { const u8 *rv = shuftiExec(lo, hi, (u8 *)t1 + i, (u8 *)t1 + strlen(t1)); ASSERT_LE(((size_t)t1 + strlen(t1)) & ~0xf, (size_t)rv); @@ -132,7 +132,7 @@ TEST(Shufti, ExecNoMatch2) { chars.set('a'); chars.set('B'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -150,7 +150,7 @@ TEST(Shufti, ExecNoMatch3) { CharReach chars; chars.set('V'); /* V = 0x56, e = 0x65 */ - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -168,16 +168,16 @@ TEST(Shufti, ExecMatch1) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ - char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbb"; + char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbabbbbbbbbbbbb"; - for (size_t i = 0; i < 16; i++) { + for (size_t i = 0; i < 32; i++) { const u8 *rv = shuftiExec(lo, hi, (u8 *)t1 + i, (u8 *)t1 + strlen(t1)); - ASSERT_EQ((size_t)t1 + 17, (size_t)rv); + ASSERT_EQ((size_t)t1 + 33, (size_t)rv); } } @@ -187,7 +187,7 @@ TEST(Shufti, ExecMatch2) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -207,7 +207,7 @@ TEST(Shufti, ExecMatch3) { chars.set('a'); chars.set('B'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -229,7 +229,7 @@ TEST(Shufti, ExecMatch4) { chars.set('A'); chars.set('c'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -263,7 +263,7 @@ TEST(Shufti, ExecMatch5) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -283,8 +283,8 @@ TEST(DoubleShufti, BuildMask1) { lits.insert(make_pair('a', 'B')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, - &lo2m, &hi2m); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1m, (u8 *)&hi1m, + (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; @@ -326,8 +326,8 @@ TEST(DoubleShufti, BuildMask2) { lits.insert(make_pair('a','z')); lits.insert(make_pair('B','z')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, - &lo2m, &hi2m); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1m, (u8 *)&hi1m, + (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; @@ -354,8 +354,8 @@ TEST(DoubleShufti, BuildMask4) { lits.insert(make_pair('A','z')); lits.insert(make_pair('b','z')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, - &lo2m, &hi2m); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1m, (u8 *)&hi1m, + (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; @@ -383,8 +383,8 @@ TEST(DoubleShufti, BuildMask5) { CharReach bytes; bytes.set('X'); - bool ret = shuftiBuildDoubleMasks(bytes, lits, &lo1m, &hi1m, - &lo2m, &hi2m); + bool ret = shuftiBuildDoubleMasks(bytes, lits, (u8 *)&lo1m, (u8 *)&hi1m, + (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; @@ -421,8 +421,8 @@ TEST(DoubleShufti, BuildMask6) { lits.insert(make_pair('A','x')); lits.insert(make_pair('b','x')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, - &lo2m, &hi2m); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1m, (u8 *)&hi1m, + (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; @@ -473,8 +473,8 @@ TEST(DoubleShufti, BuildMask7) { lits.insert(make_pair('u','v')); lits.insert(make_pair('w','x')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, - &lo2m, &hi2m); + bool rv = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1m, (u8 *)&hi1m, + (u8 *)&lo2m, (u8 *)&hi2m); ASSERT_FALSE(rv); } @@ -485,8 +485,8 @@ TEST(DoubleShufti, ExecNoMatch1) { lits.insert(make_pair('a','b')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, - &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -506,7 +506,8 @@ TEST(DoubleShufti, ExecNoMatch1b) { lits.insert(make_pair('b','a')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -527,7 +528,8 @@ TEST(DoubleShufti, ExecNoMatch2) { lits.insert(make_pair('a','b')); lits.insert(make_pair('B','b')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -548,14 +550,15 @@ TEST(DoubleShufti, ExecNoMatch2b) { lits.insert(make_pair('b','a')); lits.insert(make_pair('b','B')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; for (size_t i = 0; i < 16; i++) { - const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, - (u8 *)t1 + i, (u8 *)t1 + strlen(t1)); + const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, (u8 *)t1 + i, + (u8 *)t1 + strlen(t1)); ASSERT_EQ((size_t)t1 + i + 15, (size_t)rv); } @@ -568,7 +571,8 @@ TEST(DoubleShufti, ExecNoMatch3) { lits.insert(make_pair('V','e')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -588,7 +592,8 @@ TEST(DoubleShufti, ExecNoMatch3b) { lits.insert(make_pair('e','V')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -601,6 +606,28 @@ TEST(DoubleShufti, ExecNoMatch3b) { } } +TEST(DoubleShufti, ExecMatchShort1) { + m128 lo1, hi1, lo2, hi2; + + flat_set> lits; + + lits.insert(make_pair('a','b')); + + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); + ASSERT_TRUE(ret); + + /* 0123456789012345678901234567890 */ + char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbb"; + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, + (u8 *)t1 + i, (u8 *)t1 + strlen(t1)); + + ASSERT_EQ((size_t)t1 + 17, (size_t)rv); + } +} + TEST(DoubleShufti, ExecMatch1) { m128 lo1, hi1, lo2, hi2; @@ -608,7 +635,8 @@ TEST(DoubleShufti, ExecMatch1) { lits.insert(make_pair('a','b')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ @@ -629,7 +657,8 @@ TEST(DoubleShufti, ExecMatch2) { lits.insert(make_pair('a','a')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ @@ -651,7 +680,8 @@ TEST(DoubleShufti, ExecMatch3) { lits.insert(make_pair('B','a')); lits.insert(make_pair('a','a')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ @@ -675,7 +705,8 @@ TEST(DoubleShufti, ExecMatch4) { lits.insert(make_pair('C','a')); lits.insert(make_pair('c','a')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ @@ -717,7 +748,8 @@ TEST(DoubleShufti, ExecMatch4b) { lits.insert(make_pair('a','C')); lits.insert(make_pair('a','c')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ @@ -756,7 +788,8 @@ TEST(DoubleShufti, ExecMatch5) { lits.insert(make_pair('a','A')); - bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -780,7 +813,8 @@ TEST(DoubleShufti, ExecMatchMixed1) { // just one one-byte literal onebyte.set('a'); - bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -804,7 +838,8 @@ TEST(DoubleShufti, ExecMatchMixed2) { onebyte.set('a'); twobyte.insert(make_pair('x', 'y')); - bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -838,7 +873,8 @@ TEST(DoubleShufti, ExecMatchMixed3) { onebyte.set('a'); twobyte.insert(make_pair('x', 'y')); - bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, (u8 *)&lo1, (u8 *)&hi1, + (u8 *)&lo2, (u8 *)&hi2); ASSERT_TRUE(ret); const int len = 420; @@ -871,7 +907,7 @@ TEST(ReverseShufti, ExecNoMatch1) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -890,7 +926,7 @@ TEST(ReverseShufti, ExecNoMatch2) { chars.set('a'); chars.set('B'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -908,7 +944,7 @@ TEST(ReverseShufti, ExecNoMatch3) { CharReach chars; chars.set('V'); /* V = 0x56, e = 0x65 */ - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -926,7 +962,7 @@ TEST(ReverseShufti, ExecMatch1) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -947,7 +983,7 @@ TEST(ReverseShufti, ExecMatch2) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -969,7 +1005,7 @@ TEST(ReverseShufti, ExecMatch3) { chars.set('a'); chars.set('B'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -1003,7 +1039,7 @@ TEST(ReverseShufti, ExecMatch4) { chars.set('A'); chars.set('c'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); /* 0123456789012345678901234567890 */ @@ -1038,7 +1074,7 @@ TEST(ReverseShufti, ExecMatch5) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -1058,7 +1094,7 @@ TEST(ReverseShufti, ExecMatch6) { CharReach chars; chars.set('a'); - int ret = shuftiBuildMasks(chars, &lo, &hi); + int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); const size_t len = 256; diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index 3c07b2b0..7b34d92e 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -110,10 +110,10 @@ void simd_setbit(m128 *a, unsigned int i) { return setbit128(a, i); } void simd_setbit(m256 *a, unsigned int i) { return setbit256(a, i); } void simd_setbit(m384 *a, unsigned int i) { return setbit384(a, i); } void simd_setbit(m512 *a, unsigned int i) { return setbit512(a, i); } -bool simd_testbit(const m128 *a, unsigned int i) { return testbit128(a, i); } -bool simd_testbit(const m256 *a, unsigned int i) { return testbit256(a, i); } -bool simd_testbit(const m384 *a, unsigned int i) { return testbit384(a, i); } -bool simd_testbit(const m512 *a, unsigned int i) { return testbit512(a, i); } +bool simd_testbit(const m128 &a, unsigned int i) { return testbit128(a, i); } +bool simd_testbit(const m256 &a, unsigned int i) { return testbit256(a, i); } +bool simd_testbit(const m384 &a, unsigned int i) { return testbit384(a, i); } +bool simd_testbit(const m512 &a, unsigned int i) { return testbit512(a, i); } u32 simd_diffrich(const m128 &a, const m128 &b) { return diffrich128(a, b); } u32 simd_diffrich(const m256 &a, const m256 &b) { return diffrich256(a, b); } u32 simd_diffrich(const m384 &a, const m384 &b) { return diffrich384(a, b); } @@ -419,15 +419,15 @@ TYPED_TEST(SimdUtilsTest, testbit) { // First, all bits are on in 'ones'. for (unsigned int i = 0; i < total_bits; i++) { - ASSERT_EQ(1, simd_testbit(&ones, i)) << "bit " << i << " is on"; + ASSERT_EQ(1, simd_testbit(ones, i)) << "bit " << i << " is on"; } // Try individual bits; only 'i' should be on. for (unsigned int i = 0; i < total_bits; i++) { TypeParam a = setbit(i); for (unsigned int j = 0; j < total_bits; j++) { - ASSERT_EQ(i == j ? 1 : 0, simd_testbit(&a, j)) << "bit " << i - << " is wrong"; + ASSERT_EQ(i == j ? 1 : 0, simd_testbit(a, j)) << "bit " << i + << " is wrong"; } } } @@ -470,7 +470,7 @@ TYPED_TEST(SimdUtilsTest, diffrich) { // and nothing is on in zeroes for (unsigned int i = 0; i < total_bits; i++) { - ASSERT_EQ(0, simd_testbit(&zeroes, i)) << "bit " << i << " is off"; + ASSERT_EQ(0, simd_testbit(zeroes, i)) << "bit " << i << " is off"; } // All-zeroes and all-ones differ in all words @@ -614,6 +614,12 @@ TEST(SimdUtilsTest, set16x8) { } } +TEST(SimdUtilsTest, set4x32) { + u32 cmp[4] = { 0x12345678, 0x12345678, 0x12345678, 0x12345678 }; + m128 simd = set4x32(cmp[0]); + ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd))); +} + #if defined(__AVX2__) TEST(SimdUtilsTest, set32x8) { char cmp[sizeof(m256)]; @@ -693,4 +699,50 @@ TEST(SimdUtilsTest, variableByteShift128) { EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, -16))); } +TEST(SimdUtilsTest, max_u8_m128) { + char base1[] = "0123456789ABCDE\xfe"; + char base2[] = "!!23455889aBCd\xff\xff"; + char expec[] = "0123456889aBCd\xff\xff"; + m128 in1 = loadu128(base1); + m128 in2 = loadu128(base2); + m128 result = max_u8_m128(in1, in2); + EXPECT_TRUE(!diff128(result, loadu128(expec))); +} + +TEST(SimdUtilsTest, min_u8_m128) { + char base1[] = "0123456789ABCDE\xfe"; + char base2[] = "!!23455889aBCd\xff\xff"; + char expec[] = "!!23455789ABCDE\xfe"; + m128 in1 = loadu128(base1); + m128 in2 = loadu128(base2); + m128 result = min_u8_m128(in1, in2); + EXPECT_TRUE(!diff128(result, loadu128(expec))); +} + +TEST(SimdUtilsTest, sadd_u8_m128) { + unsigned char base1[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4', + '1', '2', '3', '4', '1', '2', '3', '4'}; + unsigned char base2[] = {'a', 0x80, 'b', 'A', 0x10, 0x10, 0x10, 0x10, + 0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0}; + unsigned char expec[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D', + 'a', 'b', 'c', 'd', '1', '2', '3', '4'}; + m128 in1 = loadu128(base1); + m128 in2 = loadu128(base2); + m128 result = sadd_u8_m128(in1, in2); + EXPECT_TRUE(!diff128(result, loadu128(expec))); +} + +TEST(SimdUtilsTest, sub_u8_m128) { + unsigned char base1[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D', + 'a', 'b', 'c', 'd', '1', '2', '3', '4'}; + unsigned char base2[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4', + '1', '2', '3', '4', '1', '2', '3', '4'}; + unsigned char expec[] = {'a', 0x7f, 0, 'A', 0x10, 0x10, 0x10, 0x10, + 0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0}; + m128 in1 = loadu128(base1); + m128 in2 = loadu128(base2); + m128 result = sub_u8_m128(in1, in2); + EXPECT_TRUE(!diff128(result, loadu128(expec))); +} + } // namespace diff --git a/unit/internal/truffle.cpp b/unit/internal/truffle.cpp index 859c8a08..e9e4f19c 100644 --- a/unit/internal/truffle.cpp +++ b/unit/internal/truffle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,9 +45,9 @@ TEST(Truffle, CompileDot) { chars.setall(); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); - CharReach out = truffle2cr(mask1, mask2); + CharReach out = truffle2cr((u8 *)&mask1, (u8 *)&mask2); ASSERT_EQ(out, chars); @@ -64,8 +64,8 @@ TEST(Truffle, CompileChars) { mask2 = zeroes128(); chars.clear(); chars.set((u8)c); - truffleBuildMasks(chars, &mask1, &mask2); - CharReach out = truffle2cr(mask1, mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); + CharReach out = truffle2cr((u8 *)&mask1, (u8 *)&mask2); ASSERT_EQ(out, chars); } @@ -74,8 +74,8 @@ TEST(Truffle, CompileChars) { mask1 = zeroes128(); mask2 = zeroes128(); chars.set((u8)c); - truffleBuildMasks(chars, &mask1, &mask2); - CharReach out = truffle2cr(mask1, mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); + CharReach out = truffle2cr((u8 *)&mask1, (u8 *)&mask2); ASSERT_EQ(out, chars); } @@ -84,8 +84,8 @@ TEST(Truffle, CompileChars) { mask1 = zeroes128(); mask2 = zeroes128(); chars.clear((u8)c); - truffleBuildMasks(chars, &mask1, &mask2); - CharReach out = truffle2cr(mask1, mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); + CharReach out = truffle2cr((u8 *)&mask1, (u8 *)&mask2); ASSERT_EQ(out, chars); } @@ -100,7 +100,7 @@ TEST(Truffle, ExecNoMatch1) { chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\xff"; @@ -119,7 +119,7 @@ TEST(Truffle, ExecNoMatch2) { chars.set('a'); chars.set('B'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -137,7 +137,7 @@ TEST(Truffle, ExecNoMatch3) { chars.set('V'); /* V = 0x56, e = 0x65 */ - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -154,7 +154,7 @@ TEST(Truffle, ExecMiniMatch0) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &lo, &hi); + truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); char t1[] = "a"; @@ -169,7 +169,7 @@ TEST(Truffle, ExecMiniMatch1) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &lo, &hi); + truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); char t1[] = "bbbbbbbabbb"; @@ -184,7 +184,7 @@ TEST(Truffle, ExecMiniMatch2) { CharReach chars; chars.set(0); - truffleBuildMasks(chars, &lo, &hi); + truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); char t1[] = "bbbbbbb\0bbb"; @@ -199,7 +199,7 @@ TEST(Truffle, ExecMiniMatch3) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &lo, &hi); + truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); char t1[] = "\0\0\0\0\0\0\0a\0\0\0"; @@ -214,7 +214,7 @@ TEST(Truffle, ExecMatchBig) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &lo, &hi); + truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); std::array t1; t1.fill('b'); @@ -234,7 +234,7 @@ TEST(Truffle, ExecMatch1) { chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -253,7 +253,7 @@ TEST(Truffle, ExecMatch2) { chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -273,7 +273,7 @@ TEST(Truffle, ExecMatch3) { chars.set('a'); chars.set('B'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbBaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -295,7 +295,7 @@ TEST(Truffle, ExecMatch4) { chars.set('A'); chars.set('c'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbAaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -329,7 +329,7 @@ TEST(Truffle, ExecMatch5) { chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -349,7 +349,7 @@ TEST(Truffle, ExecMatch6) { // [0-Z] - includes some graph chars chars.setRange('0', 'Z'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); std::array t1; t1.fill('*'); // it's full of stars! @@ -370,7 +370,7 @@ TEST(Truffle, ExecMatch7) { // hi bits chars.setRange(127, 255); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); std::array t1; t1.fill('*'); // it's full of stars! @@ -389,7 +389,7 @@ TEST(ReverseTruffle, ExecNoMatch1) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; size_t len = strlen(t1); @@ -408,7 +408,7 @@ TEST(ReverseTruffle, ExecNoMatch2) { chars.set('a'); chars.set('B'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; size_t len = strlen(t1); @@ -425,7 +425,7 @@ TEST(ReverseTruffle, ExecNoMatch3) { CharReach chars; chars.set('V'); /* V = 0x56, e = 0x65 */ - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; size_t len = strlen(t1); @@ -442,7 +442,7 @@ TEST(ReverseTruffle, ExecMiniMatch0) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &lo, &hi); + truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); char t1[] = "a"; @@ -457,7 +457,7 @@ TEST(ReverseTruffle, ExecMiniMatch1) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbabbbb"; @@ -475,7 +475,7 @@ TEST(ReverseTruffle, ExecMiniMatch2) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "babbbbbabbbb"; @@ -494,7 +494,7 @@ TEST(ReverseTruffle, ExecMatch1) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbabbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -514,7 +514,7 @@ TEST(ReverseTruffle, ExecMatch2) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbabbbbbbbbbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -535,7 +535,7 @@ TEST(ReverseTruffle, ExecMatch3) { chars.set('a'); chars.set('B'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaBbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -568,7 +568,7 @@ TEST(ReverseTruffle, ExecMatch4) { chars.set('A'); chars.set('c'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaAbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -602,7 +602,7 @@ TEST(ReverseTruffle, ExecMatch5) { CharReach chars; chars.set('a'); - truffleBuildMasks(chars, &mask1, &mask2); + truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; size_t len = strlen(t1); diff --git a/unit/internal/uniform_ops.cpp b/unit/internal/uniform_ops.cpp index 33d7cd30..10defdbd 100644 --- a/unit/internal/uniform_ops.cpp +++ b/unit/internal/uniform_ops.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -156,26 +156,26 @@ TEST(Uniform, loadstore_m512) { TEST(Uniform, testbit_u32) { for (u32 i = 0; i < 32; i++) { u32 v = 0; - EXPECT_EQ((char)0, testbit_u32(&v, i)); + EXPECT_EQ((char)0, testbit_u32(v, i)); v |= 1ULL << i; - EXPECT_EQ((char)1, testbit_u32(&v, i)); + EXPECT_EQ((char)1, testbit_u32(v, i)); v = ~v; - EXPECT_EQ((char)0, testbit_u32(&v, i)); + EXPECT_EQ((char)0, testbit_u32(v, i)); v |= 1ULL << i; - EXPECT_EQ((char)1, testbit_u32(&v, i)); + EXPECT_EQ((char)1, testbit_u32(v, i)); } } TEST(Uniform, testbit_u64a) { for (u32 i = 0; i < 64; i++) { u64a v = 0; - EXPECT_EQ((char)0, testbit_u64a(&v, i)); + EXPECT_EQ((char)0, testbit_u64a(v, i)); v |= 1ULL << i; - EXPECT_EQ((char)1, testbit_u64a(&v, i)); + EXPECT_EQ((char)1, testbit_u64a(v, i)); v = ~v; - EXPECT_EQ((char)0, testbit_u64a(&v, i)); + EXPECT_EQ((char)0, testbit_u64a(v, i)); v |= 1ULL << i; - EXPECT_EQ((char)1, testbit_u64a(&v, i)); + EXPECT_EQ((char)1, testbit_u64a(v, i)); } } @@ -183,7 +183,7 @@ TEST(Uniform, clearbit_u32) { for (u32 i = 0; i < 32; i++) { u32 v = ~0U; clearbit_u32(&v, i); - EXPECT_EQ((char)0, testbit_u32(&v, i)); + EXPECT_EQ((char)0, testbit_u32(v, i)); v = ~v; clearbit_u32(&v, i); EXPECT_EQ(0U, v); @@ -194,7 +194,7 @@ TEST(Uniform, clearbit_u64a) { for (u32 i = 0; i < 64; i++) { u64a v = ~0ULL; clearbit_u64a(&v, i); - EXPECT_EQ((char)0, testbit_u64a(&v, i)); + EXPECT_EQ((char)0, testbit_u64a(v, i)); v = ~v; clearbit_u64a(&v, i); EXPECT_EQ(0ULL, v); diff --git a/unit/internal/vermicelli.cpp b/unit/internal/vermicelli.cpp index 5d66a332..5e4a8253 100644 --- a/unit/internal/vermicelli.cpp +++ b/unit/internal/vermicelli.cpp @@ -126,27 +126,29 @@ TEST(DoubleVermicelli, ExecNoMatch1) { const u8 *rv = vermicelliDoubleExec('a', 'b', 0, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j), (size_t)rv); rv = vermicelliDoubleExec('B', 'b', 0, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j), (size_t)rv); rv = vermicelliDoubleExec('A', 'B', 1, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j), (size_t)rv); + /* partial match */ rv = vermicelliDoubleExec('b', 'B', 0, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + /* partial match */ rv = vermicelliDoubleExec('B', 'A', 1, (u8 *)t1 + i, (u8 *)t1 + strlen(t1) - j); - ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1 + strlen(t1) - j - 1), (size_t)rv); } } } @@ -353,30 +355,32 @@ TEST(DoubleVermicelliMasked, ExecNoMatch1) { t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); - rv = vermicelliDoubleMaskedExec('B', 'b', 0xff, CASE_CLEAR, + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j), (size_t)rv); + + rv = vermicelliDoubleMaskedExec('B', 'B', 0xff, CASE_CLEAR, t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j), (size_t)rv); rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR, t1_raw + i, t1_raw + t1.length() -i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j), (size_t)rv); - rv = vermicelliDoubleMaskedExec('b', 'B', CASE_CLEAR, 0xff, + /* partial match */ + rv = vermicelliDoubleMaskedExec('B', 'B', CASE_CLEAR, 0xff, t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); rv = vermicelliDoubleMaskedExec('B', 'A', 0xff, 0xff, t1_raw + i, t1_raw + t1.length() - i - j); - ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1), (size_t)rv); + ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j), (size_t)rv); } } } diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt index dc731322..c0a6bc21 100644 --- a/util/CMakeLists.txt +++ b/util/CMakeLists.txt @@ -1,7 +1,10 @@ # utility libs +CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") -include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} + ${PROJECT_SOURCE_DIR}) set_source_files_properties( ${CMAKE_BINARY_DIR}/tools/ExpressionParser.cpp @@ -31,3 +34,14 @@ SET(corpusomatic_SRCS ) add_library(corpusomatic STATIC ${corpusomatic_SRCS}) +set(databaseutil_SRCS + database_util.cpp + database_util.h +) +add_library(databaseutil STATIC ${databaseutil_SRCS}) + +set(crosscompileutil_SRCS + cross_compile.cpp + cross_compile.h + ) +add_library(crosscompileutil STATIC ${crosscompileutil_SRCS}) diff --git a/util/cross_compile.cpp b/util/cross_compile.cpp new file mode 100644 index 00000000..b4d1f5f1 --- /dev/null +++ b/util/cross_compile.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "cross_compile.h" +#include "src/ue2common.h" +#include "src/hs_compile.h" +#include "src/util/make_unique.h" + +#include +#include + +using namespace std; + +struct XcompileMode { + const char *name; + unsigned long long cpu_features; +}; + +static const XcompileMode xcompile_options[] = { + { "avx2", HS_CPU_FEATURES_AVX2 }, + { "base", 0 }, +}; + +unique_ptr xcompileReadMode(const char *s) { + hs_platform_info rv; + UNUSED hs_error_t err; + err = hs_populate_platform(&rv); + assert(!err); + + string str(s); + string mode = str.substr(0, str.find(":")); + string opt = str.substr(str.find(":")+1, str.npos); + bool found_mode = false; + + if (!opt.empty()) { + const size_t numOpts = ARRAY_LENGTH(xcompile_options); + for (size_t i = 0; i < numOpts; i++) { + if (opt.compare(xcompile_options[i].name) == 0) { + DEBUG_PRINTF("found opt %zu:%llu\n", i, + xcompile_options[i].cpu_features); + rv.cpu_features = xcompile_options[i].cpu_features; + found_mode = true; + break; + } + } + } + + if (!found_mode) { + return nullptr; + } else { + DEBUG_PRINTF("cpu_features %llx\n", rv.cpu_features); + return ue2::make_unique(rv); + } +} + +string to_string(const hs_platform_info &p) { + ostringstream out; + if (p.tune) { + out << p.tune; + } + + if (p.cpu_features) { + u64a features = p.cpu_features; + if (features & HS_CPU_FEATURES_AVX2) { + out << " avx2"; + features &= ~HS_CPU_FEATURES_AVX2; + } + + if (features) { + out << " " << "?cpu_features?:" << features; + } + } + + return out.str(); +} + +string xcompileUsage(void) { + string variants = "Instruction set options: "; + const size_t numOpts = ARRAY_LENGTH(xcompile_options); + for (size_t i = 0; i < numOpts; i++) { + variants += xcompile_options[i].name; + if (i + 1 != numOpts) { + variants += ", "; + } + } + + return variants; +} diff --git a/util/cross_compile.h b/util/cross_compile.h new file mode 100644 index 00000000..ddfc7b10 --- /dev/null +++ b/util/cross_compile.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CROSS_COMPILE_H +#define CROSS_COMPILE_H + +#include +#include + +struct hs_platform_info; + +std::unique_ptr xcompileReadMode(const char *s); +std::string xcompileUsage(void); + +std::string to_string(const hs_platform_info &p); + +#endif /* CROSS_COMPILE_H */ diff --git a/util/database_util.cpp b/util/database_util.cpp new file mode 100644 index 00000000..3df75e2a --- /dev/null +++ b/util/database_util.cpp @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "database_util.h" + +#include "hs_common.h" + +#include +#include +#include +#include +#include + +#if defined(HAVE_MMAP) +#include // for mmap +#include // for close +#include +#include +#endif + +using namespace std; + +bool saveDatabase(const hs_database_t *db, const char *filename, bool verbose) { + assert(db); + assert(filename); + + if (verbose) { + cout << "Saving database to: " << filename << endl; + } + + char *bytes = nullptr; + size_t length = 0; + hs_error_t err = hs_serialize_database(db, &bytes, &length); + if (err != HS_SUCCESS) { + return false; + } + + assert(bytes); + assert(length > 0); + + ofstream out(filename, ios::binary); + out.write(bytes, length); + out.close(); + + ::free(bytes); + + return true; +} + +hs_database_t * loadDatabase(const char *filename, bool verbose) { + assert(filename); + + if (verbose) { + cout << "Loading database from: " << filename << endl; + } + + char *bytes = nullptr; + +#if defined(HAVE_MMAP) + // Use mmap to read the file + int fd = open(filename, O_RDONLY); + if (fd < 0) { + return nullptr; + } + struct stat st; + if (fstat(fd, &st) < 0) { + close(fd); + return nullptr; + } + size_t len = st.st_size; + + bytes = (char *)mmap(nullptr, len, PROT_READ, MAP_SHARED, fd, 0); + if (bytes == MAP_FAILED) { + cout << "mmap failed" << endl; + close(fd); + return nullptr; + } +#else + // Fall back on stream IO + ifstream is; + is.open(filename, ios::in | ios::binary); + if (!is.is_open()) { + return nullptr; + } + is.seekg(0, ios::end); + size_t len = is.tellg(); + if (verbose) { + cout << "Reading " << len << " bytes" << endl; + } + is.seekg(0, ios::beg); + bytes = new char[len]; + is.read(bytes, len); + is.close(); +#endif + + assert(bytes); + + if (verbose) { + char *info = nullptr; + hs_error_t err = hs_serialized_database_info(bytes, len, &info); + if (err) { + cout << "Unable to decode serialized database info: " << err + << endl; + } else if (info) { + cout << "Serialized database info: " << info << endl; + std::free(info); + } else { + cout << "Unable to decode serialized database info." << endl; + } + } + + hs_database_t *db = nullptr; + hs_error_t err = hs_deserialize_database(bytes, len, &db); + +#if defined(HAVE_MMAP) + munmap(bytes, len); + close(fd); +#else + delete [] bytes; +#endif + + if (err != HS_SUCCESS) { + cout << "hs_deserialize_database call failed: " << err << endl; + return nullptr; + } + + assert(db); + + return db; +} diff --git a/util/database_util.h b/util/database_util.h new file mode 100644 index 00000000..badd036d --- /dev/null +++ b/util/database_util.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DATABASE_UTIL_H +#define DATABASE_UTIL_H + +struct hs_database; + +bool saveDatabase(const hs_database *db, const char *filename, + bool verbose = false); + +hs_database *loadDatabase(const char *filename, bool verbose = false); + +#endif /* DATABASE_UTIL_H */ diff --git a/util/expression_path.h b/util/expression_path.h new file mode 100644 index 00000000..3075b4d4 --- /dev/null +++ b/util/expression_path.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EXPRESSION_PATH_H +#define EXPRESSION_PATH_H + +#include "ue2common.h" + +#include +#include +#include +#include +#include + +#include +#if !defined(_WIN32) +#include +#include +#endif + +// +// Utility functions +// + +/** + * Given a path to a signature file, infer the path of the pcre directory. + */ +static inline +std::string inferExpressionPath(const std::string &sigFile) { +#ifndef _WIN32 + // POSIX variant. + + // dirname() may modify its argument, so we must make a copy. + std::vector path(sigFile.size() + 1); + memcpy(path.data(), sigFile.c_str(), sigFile.size()); + path[sigFile.size()] = 0; // ensure null termination. + + std::string rv = dirname(path.data()); +#else + // Windows variant. + if (sigFile.size() >= _MAX_DIR) { + return std::string(); + } + char path[_MAX_DIR]; + _splitpath(sigFile.c_str(), nullptr, path, nullptr, nullptr); + std::string rv(path); +#endif + + rv += "/../pcre"; + return rv; +} + +#if defined(_WIN32) +#define stat _stat +#define S_IFREG _S_IFREG +#endif + +static inline +bool isDir(const std::string &filename) { + struct stat s; + + if (stat(filename.c_str(), &s) == -1) { + std::cerr << "stat: " << strerror(errno) << std::endl; + return false; + } + + return (S_IFDIR & s.st_mode); +} + +static inline +bool isFile(const std::string &filename) { + struct stat s; + + if (stat(filename.c_str(), &s) == -1) { + std::cerr << "stat: " << strerror(errno) << std::endl; + return false; + } + + return (S_IFREG & s.st_mode); +} + +#endif /* EXPRESSION_PATH_H */ diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index 9fa6743e..ca7c413a 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -144,7 +144,7 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, ue2::unordered_set one_way_in; for (const auto &v : vertices_range(g)) { - if (!hasGreaterInDegree(1, v, g)) { + if (in_degree(v, g) <= 1) { one_way_in.insert(v); } } @@ -155,7 +155,7 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, ptr_vector::auto_type p = open.pop_back(); NFAVertex u = p->back(); - DEBUG_PRINTF("dequeuing path %s, back %u\n", + DEBUG_PRINTF("dequeuing path %s, back %zu\n", pathToString(g, *p).c_str(), g[u].index); NGHolder::adjacency_iterator ai, ae; @@ -187,7 +187,7 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, // Note that vertices that only have one predecessor don't need // their cycle limit checked, as their predecessors will have // the same count. - DEBUG_PRINTF("exceeded cycle limit for v=%u, pruning path\n", + DEBUG_PRINTF("exceeded cycle limit for v=%zu, pruning path\n", g[v].index); continue; } @@ -301,7 +301,7 @@ void CorpusGeneratorImpl::addRandom(const min_max &mm, string *out) { } unsigned char CorpusGeneratorImpl::getChar(NFAVertex v) { - const CharReach &cr = graph.g[v].char_reach; + const CharReach &cr = graph[v].char_reach; switch (cProps.throwDice()) { case CorpusProperties::ROLLED_MATCH: @@ -521,7 +521,7 @@ CorpusGeneratorUtf8::pathToCorpus(const vector &path) { } static -u32 classify_vertex(const NFAGraph &g, NFAVertex v) { +u32 classify_vertex(const NGHolder &g, NFAVertex v) { const CharReach &cr = g[v].char_reach; if (cr.isSubsetOf(UTF_ASCII_CR)) { return 1; @@ -560,7 +560,7 @@ void expandCodePointSet(const CharReach &cr, CodePointSet *out, u32 mask, } static -void decodePath(const NFAGraph &g, const VertexPath &in, +void decodePath(const NGHolder &g, const VertexPath &in, vector &out) { VertexPath::const_iterator it = in.begin(); while (it != in.end()) { @@ -618,7 +618,7 @@ void translatePaths(const NGHolder &graph, assert(out); for (const auto &path : allPathsTemp) { out->push_back(vector()); - decodePath(graph.g, path, out->back()); + decodePath(graph, path, out->back()); } } diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index 60ff0a17..2b337365 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -34,7 +34,7 @@ #include "ng_find_matches.h" -#include "nfagraph/ng_graph.h" +#include "nfagraph/ng_holder.h" #include "nfagraph/ng_util.h" #include "parser/position.h" #include "util/container.h"