Initial commit of Hyperscan

This commit is contained in:
Matthew Barr 2015-10-20 09:13:35 +11:00
commit 904e436f11
610 changed files with 213627 additions and 0 deletions

6
.clang-format Normal file
View File

@ -0,0 +1,6 @@
BasedOnStyle: LLVM
IndentWidth: 4
UseTab: false
AllowShortIfStatementsOnASingleLine: false
IndentCaseLabels: false
AccessModifierOffset: -4

103
.gitignore vendored Normal file
View File

@ -0,0 +1,103 @@
##
## There are some more .gitignore files in subdirs, but this is the main place
## to add new entries. These are mostly for the common case when ue2 is built
## in place
##
# Autogenerated stuff that we don't want to know about
.deps
autom4te.cache
autojunk
.dirstamp
# Temp and swap files
*~
.*.swp
.sw?
# compiler output and binaries
*.a
*.o
*.lo
*.la
*.so
*.pyc
.libs
bin
# Merge files created by git.
*.orig
# sigs dir is handled externally
signatures
# ignore pcre symlink if it exists
pcre
# but not pcre subdirs!
!pcre/
# ignore boost symlink if it exists
include/boost
# ignore sqlite3 symlink if it exists
sqlite3
# Generated files
src/config.h
src/config.h.in
src/hs_version.h
src/fdr/fdr_autogen.c
src/fdr/fdr_autogen_compiler.cpp
src/fdr/teddy_autogen.c
src/fdr/teddy_autogen_compiler.cpp
src/parser/Parser.cpp
# Generated PCRE files
pcre/pcre_chartables.c
pcre/pcregrep
pcre/pcretest
# Autoconf/automake/libtool noise
Makefile
Makefile.in
aclocal.m4
config.cache
config.log
config.status
configure
libhs.pc
libtool
m4/libtool.m4
m4/ltoptions.m4
m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4
src/stamp-h1
# Docs
!doc/dev-reference/Makefile # not generated
doc/dev-reference/doxygen_sqlite3.db
doc/dev-reference/doxygen_xml/
doc/dev-reference/_build/
# Autotools noise in pcre
pcre/INSTALL
pcre/Makefile
pcre/Makefile.in
pcre/aclocal.m4
pcre/ar-lib
pcre/compile
pcre/config.*
pcre/configure
pcre/depcomp
pcre/install-sh
pcre/*.pc
pcre/libtool
pcre/ltmain.sh
pcre/missing
pcre/pcre-config
pcre/pcre.h
pcre/pcre_stringpiece.h
pcre/pcrecpparg.h
pcre/stamp-h1
pcre/test-driver

944
CMakeLists.txt Normal file
View File

@ -0,0 +1,944 @@
cmake_minimum_required (VERSION 2.8)
project (Hyperscan C CXX)
set (HS_MAJOR_VERSION 4)
set (HS_MINOR_VERSION 0)
set (HS_PATCH_VERSION 0)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
include(CheckCCompilerFlag)
include(CheckCXXCompilerFlag)
INCLUDE (CheckFunctionExists)
INCLUDE (CheckIncludeFiles)
INCLUDE (CheckIncludeFileCXX)
INCLUDE (CheckLibraryExists)
INCLUDE (CheckSymbolExists)
include (CMakeDependentOption)
include (${CMAKE_MODULE_PATH}/platform.cmake)
include (${CMAKE_MODULE_PATH}/ragel.cmake)
find_package(PkgConfig QUIET)
if (NOT CMAKE_BUILD_TYPE)
message(STATUS "Default build type 'Release with debug info'")
set(CMAKE_BUILD_TYPE "RELWITHDEBINFO")
else()
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
message(STATUS "Build type ${CMAKE_BUILD_TYPE}")
endif()
if(CMAKE_BUILD_TYPE MATCHES RELEASE|RELWITHDEBINFO)
set(RELEASE_BUILD TRUE)
else()
set(RELEASE_BUILD FALSE)
endif()
set(BINDIR ${PROJECT_BINARY_DIR}/bin)
set(LIBDIR ${PROJECT_BINARY_DIR}/lib)
# First for the generic no-config case
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${BINDIR})
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LIBDIR})
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LIBDIR})
# Second, for multi-config builds (e.g. msvc)
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
string (TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${BINDIR})
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${LIBDIR})
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${LIBDIR})
endforeach (OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES)
if(CMAKE_GENERATOR STREQUAL Xcode)
set(XCODE TRUE)
endif()
include_directories(src .)
include_directories(${CMAKE_BINARY_DIR})
include_directories(SYSTEM include)
set(BOOST_USE_STATIC_LIBS OFF)
set(BOOST_USE_MULTITHREADED OFF)
set(BOOST_USE_STATIC_RUNTIME OFF)
set(BOOST_MINVERSION 1.57.0)
set(BOOST_NO_BOOST_CMAKE ON)
# first check for Boost installed on the system
find_package(Boost ${BOOST_MINVERSION})
if(NOT Boost_FOUND)
# we might have boost in tree, so provide a hint and try again
message(STATUS "trying include dir for boost")
set(BOOST_INCLUDEDIR ${CMAKE_SOURCE_DIR}/include)
find_package(Boost ${BOOST_MINVERSION})
if(NOT Boost_FOUND)
message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available or extract Boost headers to ${CMAKE_SOURCE_DIR}/include")
endif()
endif()
# -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
find_package(PythonInterp)
find_program(RAGEL ragel)
if(PYTHONINTERP_FOUND)
set(PYTHON ${PYTHON_EXECUTABLE})
else()
message(FATAL_ERROR "No python interpreter found")
endif()
option(OPTIMISE "Turns off compiler optimizations (on by default unless debug output enabled or coverage testing)" TRUE)
option(DEBUG_OUTPUT "Enable debug output (warning: very verbose)" FALSE)
if(DEBUG_OUTPUT)
add_definitions(-DDEBUG)
set(OPTIMISE FALSE)
endif(DEBUG_OUTPUT)
option(BUILD_SHARED_LIBS "Build shared libs instead of static" OFF)
option(BUILD_STATIC_AND_SHARED "Build shared libs as well as static" OFF)
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
if (WIN32)
message(FATAL_ERROR "Windows DLLs currently not supported")
else()
message(STATUS "Building shared libraries")
endif()
endif()
#for config
set(HS_OPTIMIZE OPTIMISE)
CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in release builds" ON "NOT RELEASE_BUILD" OFF)
option(DISABLE_ASSERTS "Disable assert(); enabled in debug builds, disabled in release builds" FALSE)
if (DISABLE_ASSERTS)
if (CMAKE_BUILD_TYPE STREQUAL "DEBUG")
add_definitions(-DNDEBUG)
endif()
endif()
option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF)
# TODO: per platform config files?
# TODO: windows generator on cmake always uses msvc, even if we plan to build with icc
if(MSVC OR MSVC_IDE)
message(STATUS "Building for Windows")
if (MSVC_VERSION LESS 1700)
message(FATAL_ERROR "The project requires C++11 features.")
else()
if (WINDOWS_ICC)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /O3 /wd4267 /Qdiag-disable:remark")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /O2 /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
else()
#TODO: don't hardcode arch
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /O2 /wd4267")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /O2 /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
endif()
string(REGEX REPLACE "/RTC1" ""
CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" )
string(REGEX REPLACE "/RTC1" ""
CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}" )
endif()
else()
# compiler version checks TODO: test more compilers
if (CMAKE_COMPILER_IS_GNUCXX)
set (GNUCXX_MINVER "4.8.1")
exec_program(${CMAKE_CXX_COMPILER}
ARGS ${CMAKE_CXX_COMPILER_ARG1} --version
OUTPUT_VARIABLE _GXX_OUTPUT)
# is the following too fragile?
string(REGEX REPLACE ".* ([0-9]\\.[0-9](\\.[0-9])?)( |\n).*" "\\1"
GNUCXX_VERSION "${_GXX_OUTPUT}")
message(STATUS "g++ version ${GNUCXX_VERSION}")
if (GNUCXX_VERSION VERSION_LESS ${GNUCXX_MINVER})
message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++11 support")
endif()
unset(_GXX_OUTPUT)
endif()
# set compiler flags - more are tested and added later
set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual -Werror")
set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Werror -Wno-shadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor")
if (NOT CMAKE_C_FLAGS MATCHES .*march.*)
message(STATUS "Building for current host CPU")
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -march=native -mtune=native")
endif()
if (NOT CMAKE_CXX_FLAGS MATCHES .*march.*)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -march=native -mtune=native")
endif()
if(CMAKE_COMPILER_IS_GNUCC)
# spurious warnings?
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-array-bounds -Wno-maybe-uninitialized")
endif()
if(CMAKE_COMPILER_IS_GNUCXX)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0 -Wno-unused-local-typedefs -Wno-maybe-uninitialized")
endif()
if(OPTIMISE)
set(EXTRA_C_FLAGS "-O3 ${EXTRA_C_FLAGS}")
set(EXTRA_CXX_FLAGS "-O2 ${EXTRA_CXX_FLAGS}")
else()
set(EXTRA_C_FLAGS "-O0 ${EXTRA_C_FLAGS}")
set(EXTRA_CXX_FLAGS "-O0 ${EXTRA_CXX_FLAGS}")
endif(OPTIMISE)
if(NOT RELEASE_BUILD)
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
endif()
endif()
CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H)
CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H)
CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H)
CHECK_INCLUDE_FILES(tmmintrin.h HAVE_TMMINTRIN_H)
CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H)
CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H)
CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN)
CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC)
# these end up in the config file
CHECK_C_COMPILER_FLAG(-fvisibility=hidden HAS_C_HIDDEN)
CHECK_CXX_COMPILER_FLAG(-fvisibility=hidden HAS_CXX_HIDDEN)
# testing a builtin takes a little more work
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
if (NOT WIN32)
set(C_FLAGS_TO_CHECK
# Variable length arrays are way bad, most especially at run time
"-Wvla"
# Pointer arith on void pointers is doing it wong.
"-Wpointer-arith"
# Build our C code with -Wstrict-prototypes -Wmissing-prototypes
"-Wstrict-prototypes"
"-Wmissing-prototypes"
)
foreach (FLAG ${C_FLAGS_TO_CHECK})
# munge the name so it doesn't break things
string(REPLACE "-" "_" FNAME C_FLAG${FLAG})
CHECK_C_COMPILER_FLAG("${FLAG}" ${FNAME})
if (${FNAME})
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} ${FLAG}")
endif()
endforeach()
set(CXX_FLAGS_TO_CHECK
"-Wvla"
"-Wpointer-arith"
)
foreach (FLAG ${CXX_FLAGS_TO_CHECK})
string(REPLACE "-" "_" FNAME CXX_FLAG${FLAG})
CHECK_CXX_COMPILER_FLAG("${FLAG}" ${FNAME})
if (${FNAME})
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} ${FLAG}")
endif()
endforeach()
# self-assign should be thrown away, but clang whinges
CHECK_C_COMPILER_FLAG("-Wself-assign" CC_SELF_ASSIGN)
if (CC_SELF_ASSIGN)
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-self-assign")
endif()
CHECK_CXX_COMPILER_FLAG("-Wself-assign" CXX_SELF_ASSIGN)
if (CXX_SELF_ASSIGN)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-self-assign")
endif()
# clang gets up in our face for going paren crazy with macros
CHECK_C_COMPILER_FLAG("-Wparentheses-equality" CC_PAREN_EQUALITY)
if (CC_PAREN_EQUALITY)
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-parentheses-equality")
endif()
# clang compains about unused const vars in our Ragel-generated code.
CHECK_CXX_COMPILER_FLAG("-Wunused-const-variable" CXX_UNUSED_CONST_VAR)
if (CXX_UNUSED_CONST_VAR)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
endif()
# note this for later
# g++ doesn't have this flag but clang does
CHECK_CXX_COMPILER_FLAG("-Wweak-vtables" CXX_WEAK_VTABLES)
if (CXX_WEAK_VTABLES)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wweak-vtables")
endif()
CHECK_CXX_COMPILER_FLAG("-Wmissing-declarations" CXX_MISSING_DECLARATIONS)
if (CXX_MISSING_DECLARATIONS)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wmissing-declarations")
endif()
# gcc5 complains about this
CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
endif()
if (NOT XCODE)
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
else()
# cmake doesn't think Xcode supports isystem
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${Boost_INCLUDE_DIR}")
endif()
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(LINUX TRUE)
endif(CMAKE_SYSTEM_NAME MATCHES "Linux")
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
set(FREEBSD true)
endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
if(NOT WIN32)
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -diag-error 10006 -diag-disable 177 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable=remark")
endif()
if(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-error 10006 -diag-disable 177 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 1170 -diag-disable 3373 -diag-disable=remark")
endif()
endif()
add_subdirectory(util)
add_subdirectory(unit)
add_subdirectory(doc/dev-reference)
if (EXISTS ${CMAKE_SOURCE_DIR}/tools)
add_subdirectory(tools)
endif()
# do substitutions
configure_file(${CMAKE_MODULE_PATH}/config.h.in ${CMAKE_BINARY_DIR}/config.h)
configure_file(src/hs_version.h.in hs_version.h)
if (PKG_CONFIG_FOUND)
# we really only need to do this if we have pkg-config
configure_file(libhs.pc.in libhs.pc @ONLY) # only replace @ quoted vars
install(FILES ${CMAKE_BINARY_DIR}/libhs.pc
DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/pkgconfig")
endif()
# only set these after all tests are done
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
# include the autogen targets
add_subdirectory(src/fdr)
include_directories(${CMAKE_BINARY_DIR}/src/fdr)
if(NOT WIN32)
set(RAGEL_C_FLAGS "-Wno-unused")
endif()
set_source_files_properties(
${CMAKE_BINARY_DIR}/src/parser/Parser.cpp
PROPERTIES
COMPILE_FLAGS "${RAGEL_C_FLAGS}")
ragelmaker(src/parser/Parser.rl)
SET(hs_HEADERS
src/hs.h
src/hs_common.h
src/hs_compile.h
src/hs_runtime.h
)
install(FILES ${hs_HEADERS} DESTINATION include/hs)
set(fdr_autogen_targets autogen_runtime autogen_teddy_runtime)
set (hs_exec_SRCS
${hs_HEADERS}
src/hs_version.h
src/ue2common.h
src/alloc.c
src/allocator.h
src/runtime.c
src/fdr/fdr.c
src/fdr/fdr.h
src/fdr/fdr_internal.h
src/fdr/fdr_confirm.h
src/fdr/fdr_confirm_runtime.h
src/fdr/fdr_streaming_runtime.h
src/fdr/flood_runtime.h
src/fdr/fdr_loadval.h
src/fdr/teddy.c
src/fdr/teddy_internal.h
src/hwlm/hwlm.c
src/hwlm/hwlm.h
src/hwlm/hwlm_internal.h
src/hwlm/noodle_engine.c
src/hwlm/noodle_engine.h
src/hwlm/noodle_internal.h
src/nfa/accel.c
src/nfa/accel.h
src/nfa/castle.c
src/nfa/castle.h
src/nfa/castle_internal.h
src/nfa/gough.c
src/nfa/gough_internal.h
src/nfa/lbr.c
src/nfa/lbr.h
src/nfa/lbr_common_impl.h
src/nfa/lbr_internal.h
src/nfa/mcclellan.c
src/nfa/mcclellan.h
src/nfa/mcclellan_common_impl.h
src/nfa/mcclellan_internal.h
src/nfa/limex_accel.c
src/nfa/limex_accel.h
src/nfa/limex_exceptional.h
src/nfa/limex_native.c
src/nfa/limex_ring.h
src/nfa/limex_simd128.c
src/nfa/limex_simd256.c
src/nfa/limex_simd384.c
src/nfa/limex_simd512a.c
src/nfa/limex_simd512b.c
src/nfa/limex_simd512c.c
src/nfa/limex.h
src/nfa/limex_common_impl.h
src/nfa/limex_context.h
src/nfa/limex_internal.h
src/nfa/limex_runtime.h
src/nfa/limex_runtime_impl.h
src/nfa/limex_state_impl.h
src/nfa/mpv.h
src/nfa/mpv.c
src/nfa/mpv_internal.h
src/nfa/nfa_api.h
src/nfa/nfa_api_dispatch.c
src/nfa/nfa_internal.h
src/nfa/nfa_rev_api.h
src/nfa/repeat.c
src/nfa/repeat.h
src/nfa/repeat_internal.h
src/nfa/shufti.c
src/nfa/shufti.h
src/nfa/truffle.c
src/nfa/truffle.h
src/nfa/vermicelli.h
src/nfa/vermicelli_run.h
src/nfa/vermicelli_sse.h
src/sidecar/sidecar.c
src/sidecar/sidecar.h
src/sidecar/sidecar_generic.h
src/sidecar/sidecar_internal.h
src/sidecar/sidecar_shufti.c
src/sidecar/sidecar_shufti.h
src/som/som.h
src/som/som_runtime.h
src/som/som_runtime.c
src/som/som_stream.c
src/som/som_stream.h
src/rose/block.c
src/rose/catchup.h
src/rose/catchup.c
src/rose/eod.c
src/rose/infix.h
src/rose/init.h
src/rose/init.c
src/rose/stream.c
src/rose/match.h
src/rose/match.c
src/rose/miracle.h
src/rose/runtime.h
src/rose/rose_sidecar_runtime.h
src/rose/rose.h
src/rose/rose_internal.h
src/rose/rose_types.h
src/rose/rose_common.h
src/util/bitutils.h
src/util/exhaust.h
src/util/fatbit.h
src/util/fatbit.c
src/util/join.h
src/util/masked_move.c
src/util/masked_move.h
src/util/multibit.h
src/util/multibit_internal.h
src/util/multibit.c
src/util/pack_bits.h
src/util/popcount.h
src/util/pqueue.h
src/util/scatter.h
src/util/scatter_runtime.h
src/util/shuffle.h
src/util/shuffle_ssse3.h
src/util/simd_utils.h
src/util/simd_utils_ssse3.h
src/util/state_compress.h
src/util/state_compress.c
src/util/unaligned.h
src/util/uniform_ops.h
src/scratch.h
src/scratch.c
src/crc32.c
src/crc32.h
src/database.c
src/database.h
)
SET (hs_SRCS
${hs_HEADERS}
src/crc32.h
src/database.h
src/grey.cpp
src/grey.h
src/hs.cpp
src/hs_internal.h
src/hs_version.c
src/hs_version.h
src/scratch.h
src/state.h
src/ue2common.h
src/compiler/asserts.cpp
src/compiler/asserts.h
src/compiler/compiler.cpp
src/compiler/compiler.h
src/compiler/error.cpp
src/compiler/error.h
src/fdr/engine_description.cpp
src/fdr/engine_description.h
src/fdr/fdr_compile.cpp
src/fdr/fdr_compile.h
src/fdr/fdr_compile_internal.h
src/fdr/fdr_compile_util.cpp
src/fdr/fdr_confirm_compile.cpp
src/fdr/fdr_confirm.h
src/fdr/fdr_engine_description.cpp
src/fdr/fdr_engine_description.h
src/fdr/fdr_internal.h
src/fdr/fdr_streaming_compile.cpp
src/fdr/fdr_streaming_internal.h
src/fdr/flood_compile.cpp
src/fdr/teddy_compile.cpp
src/fdr/teddy_compile.h
src/fdr/teddy_engine_description.cpp
src/fdr/teddy_engine_description.h
src/fdr/teddy_internal.h
src/hwlm/hwlm_build.cpp
src/hwlm/hwlm_build.h
src/hwlm/hwlm_internal.h
src/hwlm/hwlm_literal.cpp
src/hwlm/hwlm_literal.h
src/hwlm/noodle_build.cpp
src/hwlm/noodle_build.h
src/hwlm/noodle_internal.h
src/nfa/accel.h
src/nfa/accelcompile.cpp
src/nfa/accelcompile.h
src/nfa/callback.h
src/nfa/castlecompile.cpp
src/nfa/castlecompile.h
src/nfa/dfa_min.cpp
src/nfa/dfa_min.h
src/nfa/goughcompile.cpp
src/nfa/goughcompile.h
src/nfa/goughcompile_accel.cpp
src/nfa/goughcompile_internal.h
src/nfa/goughcompile_reg.cpp
src/nfa/mcclellan.h
src/nfa/mcclellan_internal.h
src/nfa/mcclellancompile.cpp
src/nfa/mcclellancompile.h
src/nfa/mcclellancompile_util.cpp
src/nfa/mcclellancompile_util.h
src/nfa/limex_compile.cpp
src/nfa/limex_compile.h
src/nfa/limex_accel.h
src/nfa/limex_internal.h
src/nfa/mpv_internal.h
src/nfa/mpvcompile.cpp
src/nfa/mpvcompile.h
src/nfa/nfa_api.h
src/nfa/nfa_api_queue.h
src/nfa/nfa_api_util.h
src/nfa/nfa_build_util.cpp
src/nfa/nfa_build_util.h
src/nfa/nfa_internal.h
src/nfa/nfa_kind.h
src/nfa/rdfa.h
src/nfa/rdfa_merge.cpp
src/nfa/rdfa_merge.h
src/nfa/repeat_internal.h
src/nfa/repeatcompile.cpp
src/nfa/repeatcompile.h
src/nfa/shufticompile.cpp
src/nfa/shufticompile.h
src/nfa/trufflecompile.cpp
src/nfa/trufflecompile.h
src/nfagraph/ng.cpp
src/nfagraph/ng.h
src/nfagraph/ng_anchored_acyclic.cpp
src/nfagraph/ng_anchored_acyclic.h
src/nfagraph/ng_anchored_dots.cpp
src/nfagraph/ng_anchored_dots.h
src/nfagraph/ng_asserts.cpp
src/nfagraph/ng_asserts.h
src/nfagraph/ng_builder.cpp
src/nfagraph/ng_builder.h
src/nfagraph/ng_calc_components.cpp
src/nfagraph/ng_calc_components.h
src/nfagraph/ng_cyclic_redundancy.cpp
src/nfagraph/ng_cyclic_redundancy.h
src/nfagraph/ng_depth.cpp
src/nfagraph/ng_depth.h
src/nfagraph/ng_dominators.cpp
src/nfagraph/ng_dominators.h
src/nfagraph/ng_edge_redundancy.cpp
src/nfagraph/ng_edge_redundancy.h
src/nfagraph/ng_equivalence.cpp
src/nfagraph/ng_equivalence.h
src/nfagraph/ng_execute.cpp
src/nfagraph/ng_execute.h
src/nfagraph/ng_expr_info.cpp
src/nfagraph/ng_expr_info.h
src/nfagraph/ng_extparam.cpp
src/nfagraph/ng_extparam.h
src/nfagraph/ng_fixed_width.cpp
src/nfagraph/ng_fixed_width.h
src/nfagraph/ng_graph.h
src/nfagraph/ng_haig.cpp
src/nfagraph/ng_haig.h
src/nfagraph/ng_holder.cpp
src/nfagraph/ng_holder.h
src/nfagraph/ng_is_equal.cpp
src/nfagraph/ng_is_equal.h
src/nfagraph/ng_lbr.cpp
src/nfagraph/ng_lbr.h
src/nfagraph/ng_literal_analysis.cpp
src/nfagraph/ng_literal_analysis.h
src/nfagraph/ng_literal_component.cpp
src/nfagraph/ng_literal_component.h
src/nfagraph/ng_literal_decorated.cpp
src/nfagraph/ng_literal_decorated.h
src/nfagraph/ng_mcclellan.cpp
src/nfagraph/ng_mcclellan.h
src/nfagraph/ng_mcclellan_internal.h
src/nfagraph/ng_limex.cpp
src/nfagraph/ng_limex.h
src/nfagraph/ng_limex_accel.cpp
src/nfagraph/ng_limex_accel.h
src/nfagraph/ng_misc_opt.cpp
src/nfagraph/ng_misc_opt.h
src/nfagraph/ng_netflow.cpp
src/nfagraph/ng_netflow.h
src/nfagraph/ng_prefilter.cpp
src/nfagraph/ng_prefilter.h
src/nfagraph/ng_prune.cpp
src/nfagraph/ng_prune.h
src/nfagraph/ng_puff.cpp
src/nfagraph/ng_puff.h
src/nfagraph/ng_redundancy.cpp
src/nfagraph/ng_redundancy.h
src/nfagraph/ng_region.cpp
src/nfagraph/ng_region.h
src/nfagraph/ng_region_redundancy.cpp
src/nfagraph/ng_region_redundancy.h
src/nfagraph/ng_repeat.cpp
src/nfagraph/ng_repeat.h
src/nfagraph/ng_reports.cpp
src/nfagraph/ng_reports.h
src/nfagraph/ng_restructuring.cpp
src/nfagraph/ng_restructuring.h
src/nfagraph/ng_revacc.cpp
src/nfagraph/ng_revacc.h
src/nfagraph/ng_rose.cpp
src/nfagraph/ng_rose.h
src/nfagraph/ng_sep.cpp
src/nfagraph/ng_sep.h
src/nfagraph/ng_small_literal_set.cpp
src/nfagraph/ng_small_literal_set.h
src/nfagraph/ng_som.cpp
src/nfagraph/ng_som.h
src/nfagraph/ng_som_add_redundancy.cpp
src/nfagraph/ng_som_add_redundancy.h
src/nfagraph/ng_som_util.cpp
src/nfagraph/ng_som_util.h
src/nfagraph/ng_split.cpp
src/nfagraph/ng_split.h
src/nfagraph/ng_squash.cpp
src/nfagraph/ng_squash.h
src/nfagraph/ng_stop.cpp
src/nfagraph/ng_stop.h
src/nfagraph/ng_uncalc_components.cpp
src/nfagraph/ng_uncalc_components.h
src/nfagraph/ng_undirected.h
src/nfagraph/ng_utf8.cpp
src/nfagraph/ng_utf8.h
src/nfagraph/ng_util.cpp
src/nfagraph/ng_util.h
src/nfagraph/ng_vacuous.cpp
src/nfagraph/ng_vacuous.h
src/nfagraph/ng_width.cpp
src/nfagraph/ng_width.h
src/parser/AsciiComponentClass.cpp
src/parser/AsciiComponentClass.h
src/parser/Component.cpp
src/parser/Component.h
src/parser/ComponentAlternation.cpp
src/parser/ComponentAlternation.h
src/parser/ComponentAssertion.cpp
src/parser/ComponentAssertion.h
src/parser/ComponentAtomicGroup.cpp
src/parser/ComponentAtomicGroup.h
src/parser/ComponentBackReference.cpp
src/parser/ComponentBackReference.h
src/parser/ComponentBoundary.cpp
src/parser/ComponentBoundary.h
src/parser/ComponentByte.cpp
src/parser/ComponentByte.h
src/parser/ComponentClass.cpp
src/parser/ComponentClass.h
src/parser/ComponentCondReference.cpp
src/parser/ComponentCondReference.h
src/parser/ComponentEUS.cpp
src/parser/ComponentEUS.h
src/parser/ComponentEmpty.cpp
src/parser/ComponentEmpty.h
src/parser/ComponentRepeat.cpp
src/parser/ComponentRepeat.h
src/parser/ComponentSequence.cpp
src/parser/ComponentSequence.h
src/parser/ComponentVisitor.cpp
src/parser/ComponentVisitor.h
src/parser/ComponentWordBoundary.cpp
src/parser/ComponentWordBoundary.h
src/parser/ConstComponentVisitor.cpp
src/parser/ConstComponentVisitor.h
src/parser/Parser.cpp
src/parser/Parser.h
src/parser/Utf8ComponentClass.cpp
src/parser/Utf8ComponentClass.h
src/parser/buildstate.cpp
src/parser/buildstate.h
src/parser/check_refs.cpp
src/parser/check_refs.h
src/parser/parse_error.cpp
src/parser/parse_error.h
src/parser/parser_util.cpp
src/parser/position.h
src/parser/position_info.h
src/parser/prefilter.cpp
src/parser/prefilter.h
src/parser/shortcut_literal.cpp
src/parser/shortcut_literal.h
src/parser/ucp_table.cpp
src/parser/ucp_table.h
src/parser/unsupported.cpp
src/parser/unsupported.h
src/parser/utf8_validate.h
src/parser/utf8_validate.cpp
src/sidecar/sidecar_compile.cpp
src/sidecar/sidecar_compile.h
src/smallwrite/smallwrite_build.cpp
src/smallwrite/smallwrite_build.h
src/smallwrite/smallwrite_internal.h
src/som/slot_manager.cpp
src/som/slot_manager.h
src/som/slot_manager_internal.h
src/som/som.h
src/rose/rose_build.h
src/rose/rose_build_add.cpp
src/rose/rose_build_add_internal.h
src/rose/rose_build_add_mask.cpp
src/rose/rose_build_anchored.cpp
src/rose/rose_build_anchored.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_build_compile.cpp
src/rose/rose_build_convert.cpp
src/rose/rose_build_convert.h
src/rose/rose_build_impl.h
src/rose/rose_build_infix.cpp
src/rose/rose_build_infix.h
src/rose/rose_build_lookaround.cpp
src/rose/rose_build_lookaround.h
src/rose/rose_build_merge.cpp
src/rose/rose_build_merge.h
src/rose/rose_build_misc.cpp
src/rose/rose_build_role_aliasing.cpp
src/rose/rose_build_scatter.cpp
src/rose/rose_build_scatter.h
src/rose/rose_build_util.h
src/rose/rose_build_width.cpp
src/rose/rose_build_width.h
src/rose/rose_graph.h
src/rose/rose_in_graph.h
src/rose/rose_in_util.cpp
src/rose/rose_in_util.h
src/util/alloc.cpp
src/util/alloc.h
src/util/bitfield.h
src/util/boundary_reports.h
src/util/charreach.cpp
src/util/charreach.h
src/util/charreach_util.h
src/util/compare.h
src/util/compile_context.cpp
src/util/compile_context.h
src/util/compile_error.cpp
src/util/compile_error.h
src/util/container.h
src/util/cpuid_flags.c
src/util/cpuid_flags.h
src/util/depth.cpp
src/util/depth.h
src/util/determinise.h
src/util/dump_mask.cpp
src/util/dump_mask.h
src/util/graph.h
src/util/internal_report.h
src/util/multibit_build.cpp
src/util/multibit_build.h
src/util/order_check.h
src/util/partial_store.h
src/util/partitioned_set.h
src/util/popcount.h
src/util/queue_index_factory.h
src/util/report.cpp
src/util/report.h
src/util/report_manager.cpp
src/util/report_manager.h
src/util/simd_utils.h
src/util/simd_utils_ssse3.h
src/util/target_info.cpp
src/util/target_info.h
src/util/ue2_containers.h
src/util/ue2string.cpp
src/util/ue2string.h
src/util/unaligned.h
src/util/unicode_def.h
src/util/unicode_set.h
src/util/uniform_ops.h
src/util/verify_types.h
)
set(hs_dump_SRCS
src/scratch_dump.cpp
src/scratch_dump.h
src/fdr/fdr_dump.cpp
src/hwlm/hwlm_dump.cpp
src/hwlm/hwlm_dump.h
src/nfa/accel_dump.cpp
src/nfa/accel_dump.h
src/nfa/castle_dump.cpp
src/nfa/castle_dump.h
src/nfagraph/ng_dump.cpp
src/nfagraph/ng_dump.h
src/nfa/goughcompile_dump.cpp
src/nfa/goughcompile_dump.h
src/nfa/goughdump.cpp
src/nfa/goughdump.h
src/nfa/lbr_dump.cpp
src/nfa/limex_dump.cpp
src/nfa/mcclellandump.cpp
src/nfa/mcclellandump.h
src/nfa/mpv_dump.cpp
src/nfa/nfa_dump_api.h
src/nfa/nfa_dump_dispatch.cpp
src/nfa/nfa_dump_internal.cpp
src/nfa/nfa_dump_internal.h
src/parser/dump.cpp
src/parser/dump.h
src/parser/position_dump.h
src/sidecar/sidecar_dump.cpp
src/sidecar/sidecar_dump.h
src/smallwrite/smallwrite_dump.cpp
src/smallwrite/smallwrite_dump.h
src/som/slot_manager_dump.cpp
src/som/slot_manager_dump.h
src/rose/rose_build_dump.cpp
src/rose/rose_build_dump.h
src/rose/rose_in_dump.cpp
src/rose/rose_in_dump.h
src/rose/rose_dump.cpp
src/rose/rose_dump.h
src/util/dump_charclass.cpp
src/util/dump_charclass.h
)
if (DUMP_SUPPORT)
set(hs_SRCS ${hs_SRCS} ${hs_dump_SRCS})
endif()
# we group things by sublibraries, specifying shared and static and then
# choose which ones to build
set (LIB_VERSION ${HS_VERSION})
set (LIB_SOVERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION})
add_library(hs_exec OBJECT ${hs_exec_SRCS})
add_dependencies(hs_exec ${fdr_autogen_targets})
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
add_library(hs_exec_shared OBJECT ${hs_exec_SRCS})
add_dependencies(hs_exec_shared ${fdr_autogen_targets})
set_target_properties(hs_exec_shared PROPERTIES
POSITION_INDEPENDENT_CODE TRUE)
endif()
# hs_version.c is added explicitly to avoid some build systems that refuse to
# create a lib without any src (I'm looking at you Xcode)
add_library(hs_runtime STATIC src/hs_version.c $<TARGET_OBJECTS:hs_exec>)
set_target_properties(hs_runtime PROPERTIES
LINKER_LANGUAGE C)
if (NOT BUILD_SHARED_LIBS)
install(TARGETS hs_runtime DESTINATION lib)
endif()
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
add_library(hs_runtime_shared SHARED src/hs_version.c $<TARGET_OBJECTS:hs_exec_shared>)
set_target_properties(hs_runtime_shared PROPERTIES
VERSION ${LIB_VERSION}
SOVERSION ${LIB_SOVERSION}
OUTPUT_NAME hs_runtime
MACOSX_RPATH ON
LINKER_LANGUAGE C)
install(TARGETS hs_runtime_shared DESTINATION lib)
endif()
# we want the static lib for testing
add_library(hs STATIC ${hs_SRCS} $<TARGET_OBJECTS:hs_exec>)
add_dependencies(hs ragel_Parser)
add_dependencies(hs autogen_compiler autogen_teddy_compiler)
if (NOT BUILD_SHARED_LIBS)
install(TARGETS hs DESTINATION lib)
endif()
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
add_library(hs_shared SHARED ${hs_SRCS} $<TARGET_OBJECTS:hs_exec_shared>)
add_dependencies(hs_shared ragel_Parser)
add_dependencies(hs_shared autogen_compiler autogen_teddy_compiler)
set_target_properties(hs_shared PROPERTIES
OUTPUT_NAME hs
VERSION ${LIB_VERSION}
SOVERSION ${LIB_SOVERSION}
MACOSX_RPATH ON)
install(TARGETS hs_shared DESTINATION lib)
endif()
if(NOT WIN32)
add_subdirectory(examples)
endif()

26
COPYING Normal file
View File

@ -0,0 +1,26 @@
Copyright (c) 2015, Intel Corporation
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

118
LICENSE Normal file
View File

@ -0,0 +1,118 @@
Hyperscan is licensed under the BSD License.
Copyright (c) 2015, Intel Corporation
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
This product also contains code from third parties, under the following
licenses:
Intel's Slicing-by-8 CRC32 implementation
-----------------------------------------
Copyright (c) 2004-2006, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Boost C++ Headers Library
-------------------------
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
The Google C++ Testing Framework (Google Test)
----------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

22
README.md Normal file
View File

@ -0,0 +1,22 @@
# Hyperscan
Hyperscan is a high-performance multiple regex matching library. It follows the
regular expression syntax of the commonly-used libpcre library, but is a
standalone library with its own C API.
Hyperscan uses hybrid automata techniques to allow simultaneous matching of
large numbers (up to tens of thousands) of regular expressions and for the
matching of regular expressions across streams of data.
Hyperscan is typically used in a DPI library stack.
# Documentation
Information on building the Hyperscan library and using its API is available in
the [Developer Reference Guide](http://01org.github.io/hyperscan/dev-reference/).
# License
Hyperscan is licensed under the BSD License. See the LICENSE file in the
project repository.

54
cmake/backtrace.cmake Normal file
View File

@ -0,0 +1,54 @@
# The `backtrace' function is available on Linux via glibc, and on FreeBSD if
# the 'libexecinfo' package is installed.
CHECK_C_SOURCE_COMPILES(
"#include <stdlib.h>\n#include <execinfo.h>\nint main () { backtrace(NULL, 0); }"
BACKTRACE_LIBC)
if(BACKTRACE_LIBC)
set(HAVE_BACKTRACE TRUE)
set(BACKTRACE_CFLAGS "")
set(BACKTRACE_LDFLAGS "")
endif()
if(NOT BACKTRACE_LIBC)
# FreeBSD 10 has backtrace but requires libexecinfo
list(INSERT CMAKE_REQUIRED_LIBRARIES 0 "-lexecinfo")
CHECK_C_SOURCE_COMPILES(
"#include <stdlib.h>\n#include <execinfo.h>\nint main () { backtrace(NULL, 0); }"
BACKTRACE_LIBEXECINFO)
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES "-lexecinfo")
if(BACKTRACE_LIBEXECINFO)
set(HAVE_BACKTRACE TRUE)
set(BACKTRACE_CFLAGS "")
set(BACKTRACE_LDFLAGS "-lexecinfo")
else()
# older FreeBSD requires it from ports
list(INSERT CMAKE_REQUIRED_INCLUDES 0 "/usr/local/include")
list(INSERT CMAKE_REQUIRED_LIBRARIES 0 "-L/usr/local/lib -lexecinfo")
CHECK_C_SOURCE_COMPILES(
"#include <stdlib.h>\n#include <execinfo.h>\nint main () { backtrace(NULL, 0); }"
BACKTRACE_LIBEXECINFO_LOCAL)
list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES 0 "/usr/local/include")
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES "-L/usr/local/lib -lexecinfo")
if(BACKTRACE_LIBEXECINFO_LOCAL)
set(HAVE_BACKTRACE TRUE)
set(BACKTRACE_CFLAGS "-I/usr/local/include")
set(BACKTRACE_LDFLAGS "-L/usr/local/lib -lexecinfo")
endif()
endif()
endif()
if(HAVE_BACKTRACE)
CHECK_C_COMPILER_FLAG(-rdynamic HAS_RDYNAMIC)
if(HAS_RDYNAMIC)
list(INSERT BACKTRACE_LDFLAGS 0 -rdynamic)
endif()
# cmake scope fun
set(HAVE_BACKTRACE ${HAVE_BACKTRACE} PARENT_SCOPE)
else()
set(BACKTRACE_CFLAGS "")
set(BACKTRACE_LDFLAGS "")
endif()

101
cmake/config.h.in Normal file
View File

@ -0,0 +1,101 @@
/* used by cmake */
/* "Define if the build is 32 bit" */
#cmakedefine ARCH_32_BIT
/* "Define if the build is 64 bit" */
#cmakedefine ARCH_64_BIT
/* "Define if building for IA32" */
#cmakedefine ARCH_IA32
/* "Define if building for EM64T" */
#cmakedefine ARCH_X86_64
/* internal build, switch on dump support. */
#cmakedefine DUMP_SUPPORT
/* Build tools with threading support */
#cmakedefine ENABLE_TOOLS_THREADS
/* Define to 1 if `backtrace' works. */
#cmakedefine HAVE_BACKTRACE
/* C compiler has __builtin_assume_aligned */
#cmakedefine HAVE_CC_BUILTIN_ASSUME_ALIGNED
/* C++ compiler has __builtin_assume_aligned */
#cmakedefine HAVE_CXX_BUILTIN_ASSUME_ALIGNED
/* C++ compiler has x86intrin.h */
#cmakedefine HAVE_CXX_X86INTRIN_H
/* C compiler has x86intrin.h */
#cmakedefine HAVE_C_X86INTRIN_H
/* C++ compiler has intrin.h */
#cmakedefine HAVE_CXX_INTRIN_H
/* C compiler has intrin.h */
#cmakedefine HAVE_C_INTRIN_H
/* Define to 1 if you have the declaration of `pthread_barrier_init', and to 0
if you don't. */
#cmakedefine HAVE_DECL_PTHREAD_BARRIER_INIT
/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
0 if you don't. */
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP
/* Define to 1 if you have the `malloc_info' function. */
#cmakedefine HAVE_MALLOC_INFO
/* Define to 1 if you have the `memmem' function. */
#cmakedefine HAVE_MEMMEM
/* Define to 1 if you have a working `mmap' system call. */
#cmakedefine HAVE_MMAP
/* Define to 1 if `posix_memalign' works. */
#cmakedefine HAVE_POSIX_MEMALIGN
/* Define to 1 if you have the <pthread.h> header file. */
#cmakedefine HAVE_PTHREAD_H
/* Define to 1 if you have the `setrlimit' function. */
#cmakedefine HAVE_SETRLIMIT
/* Define to 1 if you have the `shmget' function. */
#cmakedefine HAVE_SHMGET
/* Define to 1 if you have the `sigaction' function. */
#cmakedefine HAVE_SIGACTION
/* Define to 1 if you have the `sigaltstack' function. */
#cmakedefine HAVE_SIGALTSTACK
/* Define if the sqlite3_open_v2 call is available */
#cmakedefine HAVE_SQLITE3_OPEN_V2
/* Define to 1 if you have the <tmmintrin.h> header file. */
#cmakedefine HAVE_TMMINTRIN_H
/* Define to 1 if you have the <unistd.h> header file. */
#cmakedefine HAVE_UNISTD_H
/* Define to 1 if you have the `_aligned_malloc' function. */
#cmakedefine HAVE__ALIGNED_MALLOC
/* Optimize, inline critical functions */
#cmakedefine HS_OPTIMIZE
#cmakedefine HS_VERSION
#cmakedefine HS_MAJOR_VERSION
#cmakedefine HS_MINOR_VERSION
#cmakedefine HS_PATCH_VERSION
#cmakedefine BUILD_DATE
/* define if this is a release build. */
#cmakedefine RELEASE_BUILD

9
cmake/platform.cmake Normal file
View File

@ -0,0 +1,9 @@
# determine the target arch
# really only interested in the preprocessor here
CHECK_C_SOURCE_COMPILES("#if !(defined(__x86_64__) || defined(_M_X64))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_64_BIT)
CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_32_BIT)
set(ARCH_X86_64 ${ARCH_64_BIT})
set(ARCH_IA32 ${ARCH_32_BIT})

16
cmake/ragel.cmake Normal file
View File

@ -0,0 +1,16 @@
# function for doing all the dirty work in turning a .rl into C++
function(ragelmaker src_rl)
get_filename_component(src_dir ${src_rl} PATH) # old cmake needs PATH
get_filename_component(src_file ${src_rl} NAME_WE)
set(rl_out ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}/${src_file}.cpp)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}/${src_file}.cpp
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}
COMMAND ${RAGEL} ${CMAKE_CURRENT_SOURCE_DIR}/${src_rl} -o ${rl_out}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${src_rl}
)
add_custom_target(ragel_${src_file} DEPENDS ${rl_out})
set_source_files_properties(${rl_out} PROPERTIES GENERATED TRUE)
endfunction(ragelmaker)

View File

@ -0,0 +1,35 @@
find_program(DOXYGEN doxygen)
if (DOXYGEN STREQUAL DOXYGEN-NOTFOUND)
message(STATUS "Doxygen not found, unable to generate API reference")
else()
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/hyperscan.doxyfile.in"
"${CMAKE_CURRENT_BINARY_DIR}/hyperscan.doxyfile" @ONLY)
add_custom_target(dev-reference-doxygen
${DOXYGEN} ${CMAKE_CURRENT_BINARY_DIR}/hyperscan.doxyfile
COMMENT "Building doxygen XML for API reference")
endif()
find_program(SPHINX_BUILD sphinx-build)
if (SPHINX_BUILD STREQUAL SPHINX_BUILD-NOTFOUND)
message(STATUS "Sphinx not found, unable to generate developer reference")
else()
set(SPHINX_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build")
set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
"${CMAKE_CURRENT_BINARY_DIR}/conf.py" @ONLY)
add_custom_target(dev-reference
${SPHINX_BUILD}
-b html
-c "${CMAKE_CURRENT_BINARY_DIR}"
-d "${SPHINX_CACHE_DIR}"
"${CMAKE_CURRENT_SOURCE_DIR}"
"${SPHINX_HTML_DIR}"
DEPENDS dev-reference-doxygen
COMMENT "Building HTML dev reference with Sphinx")
endif()

View File

@ -0,0 +1,4 @@
/* Differentiate the way we display regex fragments. */
.regexp {
color: darkred !important;
}

View File

@ -0,0 +1,53 @@
.. _api_constants:
########################
API Reference: Constants
########################
***********
Error Codes
***********
.. doxygengroup:: HS_ERROR
:content-only:
:no-link:
*****************
hs_expr_ext flags
*****************
.. doxygengroup:: HS_EXT_FLAG
:content-only:
:no-link:
*************
Pattern flags
*************
.. doxygengroup:: HS_PATTERN_FLAG
:content-only:
:no-link:
*************************
CPU feature support flags
*************************
.. doxygengroup:: HS_CPU_FEATURES_FLAG
:content-only:
:no-link:
****************
CPU tuning flags
****************
.. doxygengroup:: HS_TUNE_FLAG
:content-only:
:no-link:
******************
Compile mode flags
******************
.. doxygengroup:: HS_MODE_FLAG
:content-only:
:no-link:

View File

@ -0,0 +1,29 @@
.. _api_files:
####################
API Reference: Files
####################
**********
File: hs.h
**********
.. doxygenfile:: hs.h
*****************
File: hs_common.h
*****************
.. doxygenfile:: hs_common.h
******************
File: hs_compile.h
******************
.. doxygenfile:: hs_compile.h
******************
File: hs_runtime.h
******************
.. doxygenfile:: hs_runtime.h

View File

@ -0,0 +1,365 @@
.. include:: <isonum.txt>
.. _compilation:
##################
Compiling Patterns
##################
*******************
Building a Database
*******************
The Hyperscan compiler API accepts regular expressions and converts them into a
compiled pattern database that can then be used to scan data.
The API provides three functions that compile regular expressions into
databases:
#. :c:func:`hs_compile`: compiles a single expression into a pattern database.
#. :c:func:`hs_compile_multi`: compiles an array of expressions into a pattern
database. All of the supplied patterns will be scanned for concurrently at
scan time, with user-supplied identifiers returned when they match.
#. :c:func:`hs_compile_ext_multi`: compiles an array of expressions as above,
but allows :ref:`extparam` to be specified for each expression.
Compilation allows the Hyperscan library to analyze the given pattern(s) and
pre-determine how to scan for these patterns in an optimized fashion that would
be far too expensive to compute at run-time.
When compiling expressions, a decision needs to be made whether the resulting
compiled patterns are to be used in a streaming, block or vectored mode:
- **Streaming mode**: the target data to be scanned is a continuous stream, not
all of which is available at once; blocks of data are scanned in sequence and
matches may span multiple blocks in a stream. In streaming mode, each stream
requires a block of memory to store its state between scan calls.
- **Block mode**: the target data is a discrete, contiguous block which can be
scanned in one call and does not require state to be retained.
- **Vectored mode**: the target data consists of a list of non-contiguous
blocks that are available all at once. As for block mode, no retention of
state is required.
To compile patterns to be used in streaming mode, the ``mode`` parameter of
:c:func:`hs_compile` must be set to :c:member:`HS_MODE_STREAM`; similarly,
block mode requires the use of :c:member:`HS_MODE_BLOCK` and vectored mode
requires the use of :c:member:`HS_MODE_VECTORED`. A pattern database compiled
for one mode (streaming, block or vectored) can only be used in that mode. The
version of Hyperscan used to produce a compiled pattern database must match the
version of Hyperscan used to scan with it.
Hyperscan provides support for targeting a database at a particular CPU
platform; see :ref:`instr_specialization` for details.
***************
Pattern Support
***************
Hyperscan supports the pattern syntax used by the PCRE library ("libpcre"),
described at <http://www.pcre.org/>. However, not all constructs available in
libpcre are supported. The use of unsupported constructs will result in
compilation errors.
====================
Supported Constructs
====================
The following regex constructs are supported by Hyperscan:
* Literal characters and strings, with all libpcre quoting and character
escapes.
* Character classes such as :regexp:`.` (dot), :regexp:`[abc]`, and
:regexp:`[^abc]`, as well as the predefined character classes :regexp:`\\s`,
:regexp:`\\d`, :regexp:`\\w`, :regexp:`\\v`, and :regexp:`\\h` and their
negated counterparts (:regexp:`\\S`, :regexp:`\\D`, :regexp:`\\W`,
:regexp:`\\V`, and :regexp:`\\H`).
* The POSIX named character classes :regexp:`[[:xxx:]]` and negated named
character classes :regexp:`[[:^xxx:]]`.
* Unicode character properties, such as :regexp:`\\p{L}`, :regexp:`\\P{Sc}`,
:regexp:`\\p{Greek}`.
* Quantifiers:
* Quantifiers such as :regexp:`?`, :regexp:`*` and :regexp:`+` are supported
when applied to arbitrary supported sub-expressions.
* Bounded repeat qualifiers such as :regexp:`{n}`, :regexp:`{m,n}`,
:regexp:`{n,}` are supported with limitations.
* For arbitrary repeated sub-patterns: *n* and *m* should be either small
or infinite, e.g. :regexp:`(a|b}{4}`, :regexp:`(ab?c?d){4,10}` or
:regexp:`(ab(cd)*){6,}`.
* For single-character width sub-patterns such as :regexp:`[^\\a]` or
:regexp:`.` or :regexp:`x`, nearly all repeat counts are supported, except
where repeats are extremely large (maximum bound greater than 32767).
Stream states may be very large for large bounded repeats, e.g.
:regexp:`a.{2000}b`. Note: such sub-patterns may be considerably
cheaper if at the beginning or end of patterns and especially if the
:c:member:`HS_FLAG_SINGLEMATCH` flag is on for that pattern.
* Lazy modifiers (:regexp:`?` appended to another quantifier, e.g.
:regexp:`\\w+?`) are supported but ignored (as Hyperscan reports all
matches).
* Parenthesization, including the named and unnamed capturing and
non-capturing forms. However, capturing is ignored.
* Alternation with the :regexp:`|` symbol, as in :regexp:`foo|bar`.
* The anchors :regexp:`^`, :regexp:`$`, :regexp:`\\A`, :regexp:`\\Z` and
:regexp:`\\z`.
* Option modifiers for:
* Case-sensitivity: :regexp:`(?i)` and :regexp:`(?-i)`
* Multi-line: :regexp:`(?m)` and :regexp:`(?-m)`
* Dot-all: :regexp:`(?s)` and :regexp:`(?-s)`
* Extended syntax: :regexp:`(?s)` and :regexp:`(?-s)`
* The :regexp:`\\b` and :regexp:`\\B` zero-width assertions (word boundary and
'not word boundary', respectively).
* Comments in :regexp:`(?# comment)` syntax.
* The :regexp:`(*UTF8)` and :regexp:`(*UCP)` control verbs at the beginning of a
pattern, used to enable UTF-8 and UCP mode.
.. note:: Bounded-repeat quantifiers with large repeat counts of arbitrary
expressions (e.g. :regexp:`([a-z]|bc*d|xy?z){1000,5000}`) will result in a
"Pattern too large" error at pattern compile time.
.. note:: At this time, not all patterns can be successfully compiled with the
:c:member:`HS_FLAG_SOM_LEFTMOST` flag, which enables per-pattern support for
:ref:`som`. The patterns that support this flag are a subset of patterns that
can be successfully compiled with Hyperscan; notably, many bounded repeat
forms that can be compiled with Hyperscan without the Start of Match flag
enabled cannot be compiled with the flag enabled.
======================
Unsupported Constructs
======================
The following regex constructs are not supported by Hyperscan:
* Backreferences and capturing sub-expressions.
* Arbitrary zero-width assertions.
* Subroutine references and recursive patterns.
* Conditional patterns.
* Backtracking control verbs.
* The :regexp:`\\C` "single-byte" directive (which breaks UTF-8 sequences).
* The :regexp:`\\R` newline match.
* The :regexp:`\\K` start of match reset directive.
* Callouts and embedded code.
* Atomic grouping and possessive quantifiers.
*********
Semantics
*********
While Hyperscan follows libpcre syntax, it provides different semantics. The
major departures from libpcre semantics are motivated by the requirements of
streaming and multiple simultaneous pattern matching.
The major departures from libpcre semantics are:
#. **Multiple pattern matching**: Hyperscan allows matches to be reported for
several patterns simultaneously. This is not equivalent to separating the
patterns by :regexp:`|` in libpcre, which evaluates alternations
left-to-right.
#. **Lack of ordering**: the multiple matches that Hyperscan produces are not
guaranteed to be ordered, although they will always fall within the bounds of
the current scan.
#. **End offsets only**: Hyperscan's default behaviour is only to report the end
offset of a match. Reporting of the start offset can be enabled with
per-expression flags at pattern compile time. See :ref:`som` for details.
#. **"All matches" reported**: scanning :regexp:`/foo.*bar/` against
``fooxyzbarbar`` will return two matches from Hyperscan -- at the points
corresponding to the ends of ``fooxyzbar`` and ``fooxyzbarbar``. In contrast,
libpcre semantics by default would report only one match at ``fooxyzbarbar``
(greedy semantics) or, if non-greedy semantics were switched on, one match at
``fooxyzbar``. This means that switching between greedy and non-greedy
semantics is a no-op in Hyperscan.
To support libpcre quantifier semantics while accurately reporting streaming
matches at the time they occur is impossible. For example, consider the pattern
above, :regexp:`/foo.*bar/`, in streaming mode, against the following
stream (three blocks scanned in sequence):
============= ======= ========
block 1 block 2 block 3
============= ======= ========
``fooxyzbar`` ``baz`` ``qbar``
============= ======= ========
Since the :regexp:`.*` repeat in the pattern is a *greedy* repeat in libpcre, it
must match as much as possible without causing the rest of the pattern to fail.
However, in streaming mode, this would require knowledge of data in the stream
beyond the current block being scanned.
In this example, the match at offset 9 in the first block is only the correct
match (under libpcre semantics) if there is no ``bar`` in a subsequent block --
as in block 3 -- which would constitute a better match for the pattern.
.. _som:
==============
Start of Match
==============
In standard operation, Hyperscan will only provide the end offset of a match
when the match callback is called. If the :c:member:`HS_FLAG_SOM_LEFTMOST` flag
is specified for a particular pattern, then the same set of matches is
returned, but each match will also provide the leftmost possible start offset
corresponding to its end offset.
Using the SOM flag entails a number of trade-offs and limitations:
* Reduced pattern support: For many patterns, tracking SOM is complex and can
result in Hyperscan failing to compile a pattern with a "Pattern too
large" error, even if the pattern is supported in normal operation.
* Increased stream state: At scan time, state space is required to track
potential SOM offsets, and this must be stored in persistent stream state in
streaming mode. Accordingly, SOM will generally increase the stream state
required to match a pattern.
* Performance overhead: Similarly, there is generally a performance cost
associated with tracking SOM.
* Incompatible features: Some other Hyperscan pattern flags (such as
:c:member:`HS_FLAG_SINGLEMATCH` and :c:member:`HS_FLAG_PREFILTER`) can not be
used in combination with SOM. Specifying them together with
:c:member:`HS_FLAG_SOM_LEFTMOST` will result in a compilation error.
In streaming mode, the amount of precision delivered by SOM can be controlled
with the SOM horizon flags. These instruct Hyperscan to deliver accurate SOM
information within a certain distance of the end offset, and return a special
start offset of :c:member:`HS_OFFSET_PAST_HORIZON` otherwise. Specifying a
small or medium SOM horizon will usually reduce the stream state required for a
given database.
.. note:: In streaming mode, the start offset returned for a match may refer to
a point in the stream *before* the current block being scanned. Hyperscan
provides no facility for accessing earlier blocks; if the calling application
needs to inspect historical data, then it must store it itself.
.. _extparam:
===================
Extended Parameters
===================
In some circumstances, more control over the matching behaviour of a pattern is
required than can be specified easily using regular expression syntax. For
these scenarios, Hyperscan provides the :c:func:`hs_compile_ext_multi` function
that allows a set of "extended parameters" to be set on a per-pattern basis.
Extended parameters are specified using an :c:type:`hs_expr_ext_t` structure,
which provides the following fields:
* ``flags``: Flags governing which of the other fields in the structure are
used.
* ``min_offset``: The minimum end offset in the data stream at which this
expression should match successfully.
* ``max_offset``: The maximum end offset in the data stream at which this
expression should match successfully.
* ``min_length``: The minimum match length (from start to end) required to
successfully match this expression.
These parameters allow the set of matches produced by a pattern to be
constrained at compile time, rather than relying on the application to process
unwanted matches at runtime.
For example, the pattern :regexp:`/foo.*bar/` when given a ``min_offset`` of 10
and a ``max_offset`` of 15 will not produce matches when scanned against
``foobar`` or ``foo0123456789bar`` but will produce a match against the data
streams ``foo0123bar`` or ``foo0123456bar``.
=================
Prefiltering Mode
=================
Hyperscan provides a per-pattern flag, :c:member:`HS_FLAG_PREFILTER`, which can
be used to implement a prefilter for a pattern than Hyperscan would not
ordinarily support.
This flag instructs Hyperscan to compile an "approximate" version of this
pattern for use in a prefiltering application, even if Hyperscan does not
support the pattern in normal operation.
The set of matches returned when this flag is used is guaranteed to be a
superset of the matches specified by the non-prefiltering expression.
If the pattern contains pattern constructs not supported by Hyperscan (such as
zero-width assertions, back-references or conditional references) these
constructs will be replaced internally with broader constructs that may match
more often.
For example, the pattern :regexp:`/(\\w+) again \\1/` contains the
back-reference :regexp:`\\1`. In prefiltering mode, this pattern might be
approximated by having its back-reference replaced with its referent, forming
:regexp:`/\\w+ again \\w+/`.
Furthermore, in prefiltering mode Hyperscan may simplify a pattern that would
otherwise return a "Pattern too large" error at compile time, or for performance
reasons (subject to the matching guarantee above).
It is generally expected that the application will subsequently confirm
prefilter matches with another regular expression matcher that can provide exact
matches for the pattern.
.. note:: The use of this flag in combination with Start of Match mode (using
the :c:member:`HS_FLAG_SOM_LEFTMOST` flag) is not currently supported and
will result in a pattern compilation error.
.. _instr_specialization:
******************************
Instruction Set Specialization
******************************
Hyperscan is able to make use of several modern instruction set features found
on x86 processors to provide improvements in scanning performance.
Some of these features are selected when the library is built; for example,
Hyperscan will use the native ``POPCNT`` instruction on processors where it is
available and the library has been optimized for the host architecture.
.. note:: By default, the Hyperscan runtime is built with the ``-march=native``
compiler flag and (where possible) will make use of all instructions known by
the host's C compiler.
To use some instruction set features, however, Hyperscan must build a
specialized database to support them. This means that the target platform must
be specified at pattern compile time.
The Hyperscan compiler API functions all accept an optional
:c:type:`hs_platform_info_t` argument, which describes the target platform
for the database to be built. If this argument is NULL, the database will be
targeted at the current host platform.
The :c:type:`hs_platform_info_t` structure has two fields:
#. ``tune``: This allows the application to specify information about the target
platform which may be used to guide the optimisation process of the compile.
Use of this field does not limit the processors that the resulting database
can run on, but may impact the performance of the resulting database.
#. ``cpu_features``: This allows the application to specify a mask of CPU
features that may be used on the target platform. For example,
:c:member:`HS_CPU_FEATURES_AVX2` can be specified for Intel\ |reg| Advanced
Vector Extensions +2 (Intel\ |reg| AVX2) instruction set support. If a flag
for a particular CPU feature is specified, the database will not be usable on
a CPU without that feature.
An :c:type:`hs_platform_info_t` structure targeted at the current host can be
built with the :c:func:`hs_populate_platform` function.
See :ref:`api_constants` for the full list of CPU tuning and feature flags.

View File

@ -0,0 +1,275 @@
# -*- coding: utf-8 -*-
#
# Hyperscan documentation build configuration file, created by
# sphinx-quickstart on Tue Sep 29 15:59:19 2015.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys
import os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ['breathe']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'Hyperscan'
copyright = u'2015, Intel Corporation'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '@HS_MAJOR_VERSION@.@HS_MINOR_VERSION@'
# The full version, including alpha/beta/rc tags.
release = '@HS_VERSION@'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The reST default role (used for this markup: `text`) to use for all
# documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'alabaster'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
# Change some style colors; these are used for admonitions
'pink_1' : '#e0f8ff',
'pink_2' : '#e0f8ff'
}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['@CMAKE_CURRENT_SOURCE_DIR@/_static']
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
#html_extra_path = []
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
html_sidebars = {
'**': ['globaltoc.html', 'searchbox.html']
}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
html_show_sourcelink = False
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'Hyperscandoc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
('index', 'Hyperscan.tex', u'Hyperscan Documentation',
u'Intel Corporation', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'hyperscan', u'Hyperscan Documentation',
[u'Intel Corporation'], 1)
]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'Hyperscan', u'Hyperscan Documentation',
u'Intel Corporation', 'Hyperscan', 'High-performance regular expression matcher.',
'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'
# If true, do not generate a @detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False
# -- Options for Breathe doxygen import -----------------------------------
breathe_projects = { "hyperscan": "doxygen_xml" }
breathe_default_project = "hyperscan"
breathe_domain_by_extension = {"h" : "c"}
# -- Add some customisation -----------------------------------------------
def setup(app):
app.add_stylesheet("hyperscan.css") # Custom stylesheet for e.g. :regex:

View File

@ -0,0 +1,33 @@
.. include:: <isonum.txt>
#########
Copyright
#########
No license (express or implied, by estoppel or otherwise) to any intellectual
property rights is granted by this document.
Intel disclaims all express and implied warranties, including without
limitation, the implied warranties of merchantability, fitness for a particular
purpose, and non-infringement, as well as any warranty arising from course of
performance, course of dealing, or usage in trade.
This document contains information on products, services and/or processes in
development. All information provided here is subject to change without
notice. Contact your Intel representative to obtain the latest forecast,
schedule, specifications and roadmaps.
The products and services described may contain defects or errors known as
errata which may cause deviations from published specifications. Current
characterized errata are available on request.
Copies of documents which have an order number and are referenced in this
document, or other Intel literature, may be obtained by calling 1-800-548-4725,
or go to: <http://www.intel.com/design/literature.htm>.
Intel, and the Intel logo, are trademarks of Intel Corporation in the U.S.
and/or other countries.
\*Other names and brands may be claimed as the property of others.
Copyright |copy| 2015, Intel Corporation. All rights reserved.

View File

@ -0,0 +1,211 @@
.. include:: <isonum.txt>
###############
Getting Started
###############
Very Quick Start
****************
#. Clone Hyperscan ::
cd <where-you-want-hyperscan-source>
git clone git://github/01org/hyperscan
#. Configure Hyperscan
Ensure that you have the correct :ref:`dependencies <software>` present,
and then:
::
cd <where-you-want-to-build-hyperscan>
mkdir <build-dir>
cd <build-dir>
cmake [-G <generator>] [options] <hyperscan-source-path>
Known working generators:
* ``Unix Makefiles`` --- make-compatible makefiles (default on Linux/FreeBSD/Mac OS X)
* ``Ninja`` --- `Ninja <http://martine.github.io/ninja/>`_ build files.
Generators that might work include:
* ``Xcode`` --- OS X Xcode projects.
* ``Visual Studio`` --- Visual Studio projects - very experimental
#. Build Hyperscan
Depending on the generator used:
* ``cmake --build .`` --- will build everything
* ``make -j<jobs>`` --- use makefiles in parallel
* ``ninja`` --- use Ninja build
* etc.
#. Check Hyperscan
Run the Hyperscan unit tests: ::
bin/unit-hyperscan
Requirements
************
Hardware
========
Hyperscan will run on x86 processors in 64-bit (Intel\ |reg| 64 Architecture) and
32-bit (IA-32 Architecture) modes.
Hyperscan is a high performance software library that takes advantage of recent
Intel architecture advances. At a minimum, support for Supplemental Streaming
SIMD Extensions 3 (SSSE3) is required, which should be available on any modern
x86 processor.
Additionally, Hyperscan can make use of:
* Intel Streaming SIMD Extensions 4.2 (SSE4.2)
* the POPCNT instruction
* Bit Manipulation Instructions (BMI, BMI2)
* Intel Advanced Vector Extensions 2 (Intel AVX2)
if present.
These can be determined at library compile time, see :ref:`target_arch`.
.. _software:
Software
========
As a software library, Hyperscan doesn't impose any particular runtime
software requirements, however to build the Hyperscan library we require a
modern C and C++ compiler -- in particular, Hyperscan requires C99 and C++11
compiler support. The supported compilers are:
* GCC, v4.8.1 or higher
* Clang, v3.4 or higher (with libstdc++ or libc++)
* Intel C++ Compiler v15 or higher
Examples of operating systems that Hyperscan is known to work on include:
Linux:
* Ubuntu 14.04 LTS or newer
* RedHat/CentOS 7 or newer
FreeBSD:
* 10.0 or newer
Mac OS X:
* 10.8 or newer, using XCode/Clang
Hyperscan *may* compile and run on other platforms, but there is no guarantee.
We currently have experimental support for Windows using Intel C++ Compiler
or Visual Studio 2015.
In addition, the following software is required for compiling the Hyperscan library:
======================================================= =========== ======================================
Dependency Version Notes
======================================================= =========== ======================================
`CMake <http://www.cmake.org/>`_ >=2.8.11
`Ragel <http://www.colm.net/open-source/ragel/>`_ 6.9
`Python <http://www.python.org/>`_ 2.7
`Boost <http://boost.org/>`_ >=1.57 Boost headers required
`Pcap <http://tcpdump.org>`_ >=0.8 Optional: needed for example code only
======================================================= =========== ======================================
Most of these dependencies can be provided by the package manager on the build
system (e.g. Debian/Ubuntu/RedHat packages, FreeBSD ports, etc). However,
ensure that the correct version is present.
Boost Headers
-------------
Compiling Hyperscan depends on a recent version of the Boost C++ header
library. If the Boost libraries are installed on the build machine in the
usual paths, CMake will find them. An alternative is to put a copy of (or a
symlink to) the boost subdirectory in ``<hyperscan-source-path>/include/boost``.
For example: for the Boost-1.59.0 release: ::
ln -s boost_1_59_0/boost <hyperscan-source-path>/include/boost
As Hyperscan uses the header-only parts of Boost, it is not necessary to
compile the Boost libraries.
CMake Configuration
===================
When CMake is invoked, it generates build files using the given options.
Options are passed to CMake in the form ``-D<variable name>=<value>``.
Common options for CMake include:
+------------------------+----------------------------------------------------+
| Variable | Description |
+========================+====================================================+
| CMAKE_C_COMPILER | C compiler to use. Default is /usr/bin/cc. |
+------------------------+----------------------------------------------------+
| CMAKE_CXX_COMPILER | C++ compiler to use. Default is /usr/bin/c++. |
+------------------------+----------------------------------------------------+
| CMAKE_INSTALL_PREFIX | Install directory for ``install`` target |
+------------------------+----------------------------------------------------+
| CMAKE_BUILD_TYPE | Define which kind of build to generate. |
| | Valid options are Debug, Release, RelWithDebInfo, |
| | and MinSizeRel. Default is RelWithDebInfo. |
+------------------------+----------------------------------------------------+
| BUILD_SHARED_LIBS | Build Hyperscan as a shared library instead of |
| | the default static library. |
+------------------------+----------------------------------------------------+
| BUILD_STATIC_AND_SHARED| Build both static and shared Hyperscan libs. |
| | Default off. |
+------------------------+----------------------------------------------------+
| DEBUG_OUTPUT | Enable very verbose debug output. Default off. |
+------------------------+----------------------------------------------------+
For example, to generate a ``Debug`` build: ::
cd <build-dir>
cmake -DCMAKE_BUILD_TYPE=Debug <hyperscan-source-path>
Build Type
----------
CMake determines a number of features for a build based on the Build Type.
Hyperscan defaults to ``RelWithDebInfo``, i.e. "release with debugging
information". This is a performance optimized build without runtime assertions
but with debug symbols enabled.
The other types of builds are:
* ``Release``: as above, but without debug symbols
* ``MinSizeRel``: a stripped release build
* ``Debug``: used when developing Hyperscan. Includes runtime assertions
(which has a large impact on runtime performance), and will also enable
some other build features like building internal unit
tests.
.. _target_arch:
Target Architecture
-------------------
By default, Hyperscan will be compiled to target the instruction set of the
processor of the machine that being used for compilation. This is done via
the use of ``-march=native``. The result of this means that a library built on
one machine may not work on a different machine if they differ in supported
instruction subsets.
To override the use of ``-march=native``, set appropriate flags for the
compiler in ``CFLAGS`` and ``CXXFLAGS`` environment variables before invoking
CMake, or ``CMAKE_C_FLAGS`` and ``CMAKE_CXX_FLAGS`` on the CMake command line. For
example, to set the instruction subsets up to ``SSE4.2`` using GCC 4.8: ::
cmake -DCMAKE_C_FLAGS="-march=corei7" \
-DCMAKE_CXX_FLAGS="-march=corei7" <hyperscan-source-path>
For more information, refer to :ref:`instr_specialization`.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
###############################################
Hyperscan |version| Developer's Reference Guide
###############################################
-------
|today|
-------
.. toctree::
:maxdepth: 2
copyright
preface
intro
getting_started
compilation
runtime
performance
api_constants
api_files

View File

@ -0,0 +1,78 @@
.. include:: <isonum.txt>
.. _intro:
############
Introduction
############
Hyperscan is a software regular expression matching engine designed with
high performance and flexibility in mind. It is implemented as a library that
exposes a straightforward C API.
The Hyperscan API itself is composed of two major components:
***********
Compilation
***********
These functions take a group of regular expressions, along with identifiers and
option flags, and compile them into an immutable database that can be used by
the Hyperscan scanning API. This compilation process performs considerable
analysis and optimization work in order to build a database that will match the
given expressions efficiently.
If a pattern cannot be built into a database for any reason (such as the use of
an unsupported expression construct, or the overflowing of a resource limit),
an error will be returned by the pattern compiler.
Compiled databases can be serialized and relocated, so that they can be stored
to disk or moved between hosts. They can also be targeted to particular
platform features (for example, the use of Intel\ |reg| Advanced Vector Extensions
2 (Intel\ |reg| AVX2) instructions).
See :ref:`compilation` for more detail.
********
Scanning
********
Once a Hyperscan database has been created, it can be used to scan data in
memory. Hyperscan provides several scanning modes, depending on whether the
data to be scanned is available as a single contiguous block, whether it is
distributed amongst several blocks in memory at the same time, or whether it is
to be scanned as a sequence of blocks in a stream.
Matches are delivered to the application via a user-supplied callback function
that is called synchronously for each match.
For a given database, Hyperscan provides several guarantees:
* No memory allocations occur at runtime with the exception of two
fixed-size allocations, both of which should be done ahead of time for
performance-critical applications:
- **Scratch space**: temporary memory used for internal data at scan time.
Structures in scratch space do not persist beyond the end of a single scan
call.
- **Stream state**: in streaming mode only, some state space is required to
store data that persists between scan calls for each stream. This allows
Hyperscan to track matches that span multiple blocks of data.
* The sizes of the scratch space and stream state (in streaming mode) required
for a given database are fixed and determined at database compile time. This
means that the memory requirements of the application are known ahead of
time, and these structures can be pre-allocated if required for performance
reasons.
* Any pattern that has successfully been compiled by the Hyperscan compiler can
be scanned against any input. There are no internal resource limits or other
limitations at runtime that could cause a scan call to return an error.
See :ref:`runtime` for more detail.
************
Example Code
************
Some simple example code demonstrating the use of the Hyperscan API is
available in the ``examples/`` subdirectory of the Hyperscan distribution.

View File

@ -0,0 +1,335 @@
.. _perf:
##########################
Performance Considerations
##########################
Hyperscan supports a wide range of patterns in all three scanning modes. It is
capable of extremely high levels of performance, but certain patterns can
reduce performance markedly.
The following guidelines will help construct patterns and pattern sets that
will perform better:
*****************************
Regular expression constructs
*****************************
.. tip:: Do not hand-optimize regular expression constructs.
Quite a large number of regular expressions can be written in multiple ways.
For example, caseless matching of :regexp:`/abc/` can be written as:
* :regexp:`/[Aa][Bb][Cc]/`
* :regexp:`/(A|a)(B|b)(C|c)/`
* :regexp:`/(?i)abc(?-i)/`
* :regexp:`/abc/i`
Hyperscan is capable of handling all these constructs. Unless there is a
specific reason otherwise, do not rewrite patterns from one form to another.
As another example, matching of :regexp:`/foo(bar|baz)(frotz)?/` can be
equivalently written as:
* :regexp:`/foobarfrotz|foobazfrotz|foobar|foobaz/`
This change will not improve performance or reduce overheads.
*************
Library usage
*************
.. tip:: Do not hand-optimize library usage.
The Hyperscan library is capable of dealing with small writes, unusually large
and small pattern sets, etc. Unless there is a specific performance problem
with some usage of the library, it is best to use Hyperscan in a simple and
direct fashion. For example, it is unlikely for there to be much benefit in
buffering input to the library into larger blocks unless streaming writes are
tiny (say, 1-2 bytes at a time).
Unlike many other pattern matching products, Hyperscan will run faster with
small numbers of patterns and slower with large numbers of patterns in a smooth
fashion (as opposed to, typically, running at a moderate speed up to some fixed
limit then either breaking or running half as fast).
Hyperscan also provides high-throughput matching with a single thread of
control per core; if a database runs at 3.0 Gbps in Hyperscan it means that a
3000-bit block of data will be scanned in 1 microsecond in a single thread of
control, not that it is required to scan 22 3000-bit blocks of data in 22
microseconds. Thus, it is not usually necessary to buffer data to supply
Hyperscan with available parallelism.
********************
Block-based matching
********************
.. tip:: Prefer block-based matching to streaming matching where possible.
Whenever input data appears in discrete records, or already requires some sort
of transformation (e.g. URI normalization) that requires all the data to be
accumulated before processing, it should be scanned in block rather than in
streaming mode.
Unnecessary use of streaming mode reduces the number of optimizations that can
be applied in Hyperscan and may make some patterns run slower.
If there is a mixture of 'block' and 'streaming' mode patterns, these should be
scanned in separate databases except in the case that the streaming patterns
vastly outnumber the block mode patterns.
*********************
Unnecessary databases
*********************
.. tip:: Avoid unnecessary 'union' databases.
If there are 5 different types of network traffic T1 through T5 that must
be scanned against 5 different signature sets, it will be far more efficient to
construct 5 separate databases and scan traffic against the appropriate one
than it will be to merge all 5 signature sets and remove inappropriate matches
after the fact.
This will be true even in the case where there is substantial overlap among the
signatures. Only if the common subset of the signatures is overwhelmingly large
(say, 90% of the signatures appear in all 5 traffic types) should a database
that merges all 5 signature sets be considered, and only then if there are no
performance issues with specific patterns that appear outside the common
subset.
******************************
Allocate scratch ahead of time
******************************
.. tip:: Do not allocate scratch space for your pattern database just before
calling a scan function. Instead, do it just after the pattern database is
compiled or deserialized.
Scratch allocation is not necessarily a cheap operation. Since it is the first
time (after compilation or deserialization) that a pattern database is used,
Hyperscan performs some validation checks inside :c:func:`hs_alloc_scratch` and
must also allocate memory.
Therefore, it is important to ensure that :c:func:`hs_alloc_scratch` is not
called in the application's scanning path just before :c:func:`hs_scan` (for
example).
Instead, scratch should be allocated immediately after a pattern database is
compiled or deserialized, then retained for later scanning operations.
***********************************************
Allocate one scratch space per scanning context
***********************************************
.. tip:: A scratch space can be allocated so that it can be used with any one of
a number of databases. Each concurrent scan operation (such as a thread)
needs its own scratch space.
The :c:func:`hs_alloc_scratch` function can accept an existing scratch space and
"grow" it to support scanning with another pattern database. This means that
instead of allocating one scratch space for every database used by an
application, one can call :c:func:`hs_alloc_scratch` with a pointer to the same
:c:type:`hs_scratch_t` and it will be sized appropriately for use with any of
the given databases. For example:
.. code-block:: c
hs_database_t *db1 = buildDatabaseOne();
hs_database_t *db2 = buildDatabaseTwo();
hs_database_t *db3 = buildDatabaseThree();
hs_error_t err;
hs_scratch_t *scratch = NULL;
err = hs_alloc_scratch(db1, &scratch);
if (err != HS_SUCCESS) {
printf("hs_alloc_scratch failed!");
exit(1);
}
err = hs_alloc_scratch(db2, &scratch);
if (err != HS_SUCCESS) {
printf("hs_alloc_scratch failed!");
exit(1);
}
err = hs_alloc_scratch(db3, &scratch);
if (err != HS_SUCCESS) {
printf("hs_alloc_scratch failed!");
exit(1);
}
/* scratch may now be used to scan against any of
the databases db1, db2, db3. */
*****************
Anchored patterns
*****************
.. tip:: If a pattern is meant to appear at the start of data, be sure to
anchor it.
Anchored patterns (:regexp:`/^.../`) are far simpler to match than other
patterns, especially patterns anchored to the start of the buffer (or stream, in
streaming mode). Anchoring patterns to the end of the buffer results in less of
a performance gain, especially in streaming mode.
There are a variety of ways to anchor a pattern to a particular offset:
- The :regexp:`^` and :regexp:`\\A` constructs anchor the pattern to the start
of the buffer. For example, :regexp:`/^foo/` can *only* match at offset 3.
- The :regexp:`$`, :regexp:`\\z` and :regexp:`\\Z` constructs anchor the pattern
to the end of the buffer. For example, :regexp:`/foo\\z/` can only match when
the data buffer being scanned ends in ``foo``. (It should be noted that
:regexp:`$` and :regexp:`\\Z` will also match before a newline at the end of
the buffer, so :regexp:`/foo\\z/` would match against either ``abc foo`` or
``abc foo\n``.)
- The ``min_offset`` and ``max_offset`` extended parameters may also be used to
constrain where a pattern could match. For example, the pattern
:regexp:`/foo/` with a ``max_offset`` of 10 will only match at offsets less
than or equal to 10 in the buffer. (This pattern could also be written as
:regexp:`/^.{0,7}foo/`, compiled with the :c:member:`HS_FLAG_DOTALL` flag).
*******************
Matching everywhere
*******************
.. tip:: Avoid patterns that match everywhere, and remember that our semantics
are 'match everywhere, end of match only'.
Pattern that match everywhere will run slowly due to the sheer number of
matches that they return.
Patterns like :regexp:`/.*/` in an automata-based matcher will match before and
after every single character position, so a buffer with 100 characters will
return 101 matches. Greedy pattern matchers such as libpcre will return a
single match in this case, but our semantics is to return all matches. This is
likely to be very expensive for our code and for the client code of the
library.
Another result of our semantics ("match everywhere") is that patterns that have
optional start or ending sections -- for example :regexp:`/x?abcd*/` -- may not
perform as expected.
Firstly, the :regexp:`x?` portion of the pattern is unnecessary, as it will not
affect the match results.
Secondly, the above pattern will match 'more' than :regexp:`/abc/` but
:regexp:`/abc/` will always detect any input data that will be matched by
:regexp:`/x?abcd*/` -- it will just produce fewer matches.
For example, input data ``0123abcdddd`` will match :regexp:`/abc/` once but
:regexp:`/abcd*/` five times (at ``abc``, ``abcd``, ``abcdd``, ``abcddd``, and
``abcdddd``).
*********************************
Bounded repeats in streaming mode
*********************************
.. tip:: Bounded repeats are expensive in streaming mode.
A bounded repeat construction such as :regexp:`/X.{1000,1001}abcd/` is extremely
expensive in streaming mode, of necessity. It requires us to take action on
each ``X`` character (itself expensive, relative to searching for longer strings)
and potentially record a history of hundreds of offsets where ``X`` occurred in
case the ``X`` and ``abcd`` characters are separated by a stream boundary.
Heavy and unnecessary use of bounded repeats should be avoided, especially
where other parts of a signature are quite specific. For example, a virus
signature that matches a virus payload may be sufficient without including a
prefix that includes, for example, a 2-character Windows executable prefix and
a bounded repeat beforehand.
***************
Prefer literals
***************
.. tip:: Where possible, prefer patterns which 'require' literals, especially
longer literals, and in streaming mode, prefer signatures that 'require'
literals earlier in the pattern.
Patterns which must match on a literal will run faster than patterns that do
not. For example:
- :regexp:`/\\wab\\d*\\w\\w\\w/` will run faster than
- :regexp:`/\\w\\w\\d*\\w\\w/`, or, for that matter
- :regexp:`/\\w(abc)?\\d*\\w\\w\\w/` (this contains a literal but it need
not appear in the input).
Even implicit literals are better than none: :regexp:`/[0-2][3-5].*\\w\\w/`
still effectively contains 9 2-character literals. No hand-optimization of this
case is required; this pattern will not run faster if rewritten as:
:regexp:`/(03|04|05|13|14|15|23|24|25).*\\w\\w/`.
Under all circumstances it is better to use longer literals than shorter ones.
A database consisting of 100 14-character literals will scan considerably
faster than one consisting of 100 4-character literals and return fewer
positives.
Additionally, in streaming mode, a signature that contains a longer literal
early in the pattern is preferred to one that does not.
For example: :regexp:`/b\\w*foobar/` is not as good a pattern as
:regexp:`/blah\\w*foobar/`.
The disparity between these patterns is much smaller in block mode.
Longer literals anywhere in the pattern are still preferred in streaming mode.
For example, both of the above patterns are stronger and will scan faster than
:regexp:`/b\\w*fo/` even in streaming mode.
**************
"Dot all" mode
**************
.. tip:: Use "dot all" mode where possible.
Not using the :c:member:`HS_FLAG_DOTALL` pattern flag can be expensive, as
implicitly, it means that patterns of the form :regexp:`/A.*B/` become
:regexp:`/A[^\\n]*B/`.
It is likely that scanning tasks without the DOTALL flag are better done 'line
at a time', with the newline sequences marking the beginning and end of each
block.
This will be true in most use-cases (an exception being where the DOTALL flag
is off but the pattern contains either explicit newlines or constructs such as
:regexp:`\\s` that implicitly match a newline character).
*****************
Single-match flag
*****************
.. tip:: Consider using the single-match flag to limit matches to one match per
pattern only if possible.
If only one match per pattern is required, use the flag provided to indicate
this (:c:member:`HS_FLAG_SINGLEMATCH`). This flag can allow a number of
optimizations to be applied, allowing both performance improvements and state
space reductions when streaming.
However, there is some overhead associated with tracking whether each pattern in
the pattern set has matched, and some applications with infrequent matches may
see reduced performance when the single-match flag is used.
********************
Start of Match flag
********************
.. tip:: Do not request Start of Match information if it is not not needed.
Start of Match (SOM) information can be expensive to gather and can require
large amounts of stream state to store in streaming mode. As such, SOM
information should only be requested with the :c:member:`HS_FLAG_SOM_LEFTMOST`
flag for patterns that require it.
SOM information is not generally expected to be cheaper (in either performance
terms or in stream state overhead) than the use of bounded repeats.
Consequently, :regexp:`/foo.*bar/L` with a check on start of match values after
the callback is considerably more expensive and general than
:regexp:`/foo.{300}bar/`.
Similarly, the :c:member:`hs_expr_ext::min_length` extended parameter can be
used to specify a lower bound on the length of the matches for a pattern. Using
this facility may be more lightweight in some circumstances than using the SOM
flag and post-confirming match length in the calling application.

View File

@ -0,0 +1,47 @@
#######
Preface
#######
********
Overview
********
Hyperscan is a regular expression engine designed to offer high performance, the
ability to match multiple expressions simultaneously and flexibility in
scanning operation.
Patterns are provided to a compilation interface which generates an immutable
pattern database. The scan interface then can be used to scan a target data
buffer for the given patterns, returning any matching results from that data
buffer. Hyperscan also provides a streaming mode, in which matches that span
several blocks in a stream are detected.
This document is designed to facilitate code-level integration of the Hyperscan
library with existing or new applications.
:ref:`intro` is a short overview of the Hyperscan library, with more detail on
the Hyperscan API provided in the subsequent sections: :ref:`compilation` and
:ref:`runtime`.
:ref:`perf` provides details on various factors which may impact the
performance of a Hyperscan integration.
:ref:`api_constants` and :ref:`api_files` provides a detailed summary of the
Hyperscan Application Programming Interface (API).
********
Audience
********
This guide is aimed at developers interested in integrating Hyperscan into an
application. For information on building the Hyperscan library, see the Quick
Start Guide.
***********
Conventions
***********
* Text in a ``fixed-width font`` refers to a code element, e.g. type name;
function or method name.
* Text in a :regexp:`coloured fixed-width font` refers to a regular
expression or a part of a regular expression.

View File

@ -0,0 +1,198 @@
.. _runtime:
#####################
Scanning for Patterns
#####################
Hyperscan provides three different scanning modes, each with its own scan
function beginning with ``hs_scan``. In addition, streaming mode has a number
of other API functions for managing stream state.
****************
Handling Matches
****************
All of these functions will call a user-supplied callback function when a match
is found. This function has the following signature:
.. doxygentypedef:: match_event_handler
:outline:
:no-link:
The *id* argument will be set to the identifier for the matching expression
provided at compile time, and the *to* argument will be set to the end-offset
of the match. If SOM was requested for the pattern (see :ref:`som`), the
*from* argument will be set to the leftmost possible start-offset for the match.
The match callback function has the capability to halt scanning
by returning a non-zero value.
See :c:type:`match_event_handler` for more information.
**************
Streaming Mode
**************
The streaming runtime API consists of functions to open, scan, and close
Hyperscan data streams -- these functions being :c:func:`hs_open_stream`,
:c:func:`hs_scan_stream`, and :c:func:`hs_close_stream`. Any matches detected
in the written data are returned to the calling application via a function
pointer callback.
The match callback function has the capability to halt scanning of the current
data stream by returning a non-zero value. In streaming mode, the result of
this is that the stream is then left in a state where no more data can be
scanned, and any subsequent calls to :c:func:`hs_scan_stream` for that stream
will return immediately with :c:member:`HS_SCAN_TERMINATED`. The caller must
still call :c:func:`hs_close_stream` to complete the clean-up process for that
stream.
Streams exist in the Hyperscan library so that pattern matching state can be
maintained across multiple blocks of target data -- without maintaining this
state, it would not be possible to detect patterns that span these blocks of
data. This, however, does come at the cost of requiring an amount of storage
per-stream (the size of this storage is fixed at compile time), and a slight
performance penalty in some cases to manage the state.
While Hyperscan does always support a strict ordering of multiple matches,
streaming matches will not be delivered at offsets before the current stream
write, with the exception of zero-width asserts, where constructs such as
:regexp:`\\b` and :regexp:`$` can cause a match on the final character of a
stream write to be delayed until the next stream write or stream close
operation.
=================
Stream Management
=================
In addition to :c:func:`hs_open_stream`, :c:func:`hs_scan_stream`, and
:c:func:`hs_close_stream`, the Hyperscan API provides a number of other
functions for the management of streams:
* :c:func:`hs_reset_stream`: resets a stream to its initial state; this is
equivalent to calling :c:func:`hs_close_stream` but will not free the memory
used for stream state.
* :c:func:`hs_copy_stream`: constructs a (newly allocated) duplicate of a
stream.
* :c:func:`hs_reset_and_copy_stream`: constructs a duplicate of a stream into
another, resetting the destination stream first. This call avoids the
allocation done by :c:func:`hs_copy_stream`.
**********
Block Mode
**********
The block mode runtime API consists of a single function: :c:func:`hs_scan`. Using
the compiled patterns this function identifies matches in the target data,
using a function pointer callback to communicate with the application.
This single :c:func:`hs_scan` function is essentially equivalent to calling
:c:func:`hs_open_stream`, making a single call to :c:func:`hs_scan_stream`, and
then :c:func:`hs_close_stream`, except that block mode operation does not
incur all the stream related overhead.
*************
Vectored Mode
*************
The vectored mode runtime API, like the block mode API, consists of a single
function: :c:func:`hs_scan_vector`. This function accepts an array of data
pointers and lengths, facilitating the scanning in sequence of a set of data
blocks that are not contiguous in memory.
From the caller's perspective, this mode will produce the same matches as if
the set of data blocks were (a) scanned in sequence with a series of streaming
mode scans, or (b) copied in sequence into a single block of memory and then
scanned in block mode.
*************
Scratch Space
*************
While scanning data, Hyperscan needs a small amount of temporary memory to store
on-the-fly internal data. This amount is unfortunately too large to fit on the
stack, particularly for embedded applications, and allocating memory dynamically
is too expensive, so a pre-allocated "scratch" space must be provided to the
scanning functions.
The function :c:func:`hs_alloc_scratch` allocates a large enough region of
scratch space to support a given database. If the application uses multiple
databases, only a single scratch region is necessary: in this case, calling
:c:func:`hs_alloc_scratch` on each database (with the same ``scratch`` pointer)
will ensure that the scratch space is large enough to support scanning against
any of the given databases.
Importantly, only one such space is required per thread and can (and indeed
should) be allocated before data scanning is to commence. In a scenario where a
set of expressions are compiled by a single "master" thread and data will be
scanned by multiple "worker" threads, the convenience function
:c:func:`hs_clone_scratch` allows multiple copies of an existing scratch space
to be made for each thread (rather than forcing the caller to pass all the
compiled databases through :c:func:`hs_alloc_scratch` multiple times).
For example:
.. code-block:: c
hs_error_t err;
hs_scratch_t *scratch_prototype = NULL;
err = hs_alloc_scratch(db, &scratch_prototype);
if (err != HS_SUCCESS) {
printf("hs_alloc_scratch failed!");
exit(1);
}
hs_scratch_t *scratch_thread1 = NULL;
hs_scratch_t *scratch_thread2 = NULL;
err = hs_clone_scratch(scratch_prototype, &scratch_thread1);
if (err != HS_SUCCESS) {
printf("hs_clone_scratch failed!");
exit(1);
}
err = hs_clone_scratch(scratch_prototype, &scratch_thread2);
if (err != HS_SUCCESS) {
printf("hs_clone_scratch failed!");
exit(1);
}
hs_free_scratch(scratch_prototype);
/* Now two threads can both scan against database db,
each with its own scratch space. */
While the Hyperscan library is re-entrant, the use of scratch spaces is not.
For example, if by design it is deemed necessary to run recursive or nested
scanning (say, from the match callback function), then an additional scratch
space is required for that context.
The easiest way to achieve this is to build up a single scratch space as a
prototype, then clone it for each context:
*****************
Custom Allocators
*****************
By default, structures used by Hyperscan at runtime (scratch space, stream
state, etc) are allocated with the default system allocators, usually
``malloc()`` and ``free()``.
The Hyperscan API provides a facility for changing this behaviour to support
applications that use custom memory allocators.
These functions are:
- :c:func:`hs_set_database_allocator`, which sets the allocate and free functions
used for compiled pattern databases.
- :c:func:`hs_set_scratch_allocator`, which sets the allocate and free
functions used for scratch space.
- :c:func:`hs_set_stream_allocator`, which sets the allocate and free functions
used for stream state in streaming mode.
- :c:func:`hs_set_misc_allocator`, which sets the allocate and free functions
used for miscellaneous data, such as compile error structures and
informational strings.
The :c:func:`hs_set_allocator` function can be used to set all of the custom
allocators to the same allocate/free pair.

24
examples/CMakeLists.txt Normal file
View File

@ -0,0 +1,24 @@
find_library(PCAP_LIBRARY pcap)
if (NOT PCAP_LIBRARY)
message(STATUS "Could not find libpcap - some examples will not be built")
endif()
add_executable(simplegrep simplegrep.c)
set_source_files_properties(simplegrep.c PROPERTIES COMPILE_FLAGS
"-Wall -Wno-unused-parameter")
target_link_libraries(simplegrep hs)
if (PCAP_LIBRARY)
add_executable(pcapscan pcapscan.cc)
set_source_files_properties(pcapscan.cc PROPERTIES COMPILE_FLAGS
"-Wall -Wno-unused-parameter")
target_link_libraries(pcapscan hs pcap)
endif()
if (PCAP_LIBRARY)
add_executable(patbench patbench.cc)
set_source_files_properties(patbench.cc PROPERTIES COMPILE_FLAGS
"-Wall -Wno-unused-parameter")
target_link_libraries(patbench hs pcap)
endif()

155
examples/README.md Normal file
View File

@ -0,0 +1,155 @@
Hyperscan Example Code
======================
Copyright (C) 2015 Intel Corporation. All rights reserved.
The files in this directory contain example code demonstrating the use of the
Hyperscan regular expression matching library. The examples have been
constructed to be useful utility programs, but they have been simplified
somewhat, so generally contain "shortcuts" that one would not take if building
a "real" system.
The examples each contain a short description in a comment at the top of the
file, including build instructions.
---
Example 1: simplegrep
---------------------
The first example program (`simplegrep.c`) is modelled on the ubiquitous grep
tool to search a file for a single regular expression. 'simplegrep' does the
same, but eschews a lot of grep's complexity: it is unable to read data from
`stdin`, and doesn't support grep's plethora of command-line arguments.
This code is intended to be simple portable C99.
simplegrep demonstrates the following Hyperscan concepts:
- Single pattern compilation: As simplegrep can scan for one pattern only, it
uses the `hs_compile` function instead of the multi-pattern variant:
`hs_compile_multi`.
- Block mode pattern-matching: simplegrep will search a single data buffer
for the given pattern, so it has no need to set up and tear down streams.
(See the next section for a streaming mode example)
- Scratch space allocation and use: Hyperscan requires a small amount of
temporary memory that is used in the `hs_scan` call. The caller needs to
guarantee that only one instance of `hs_scan` is using the scratch space at a
time, but there is no requirement that the same scratch area be used on
consecutive calls to `hs_scan`. Given that it is expensive to allocate the
scratch space, one would typically allocate all necessary scratch space at
system startup and reuse it throughout execution of the program.
Example 2: pcapscan
-------------------
The second example program (`pcapscan.cc`) is a very simple packet scanning
benchmark. It scans a given PCAP file full of network traffic against a group
of regular expressions and returns some coarse performance measurements. This
example provides a quick way to examine the performance achievable on a
particular combination of platform, pattern set and input data.
In block mode, pcapscan scans each packet individually against a Hyperscan
database. In streaming mode, pcapscan assigns packets to flows using a
rudimentary connection tracker, then scans the packets in each flow with
Hyperscan's streaming mode interface. This demonstrates the use of streaming
mode operation to detect matches that straddle packet boundaries.
**Note**: the flow assignment implemented here is intended as a simple demo; it
merely ensures that packets with the same 5-tuple are written to the same
stream in the order in which they appear in the PCAP file. No packet
re-ordering or connection state tracking (as you would expect to find in a real
network scanning application) is done.
pcapscan introduces the following Hyperscan concepts:
- Multi-pattern compilation: Unlike simplegrep, pcapscan requires a file of
expressions as input instead of a single pattern. pcapscan will read this
file in, one pattern per line, and use it as input to the `hs_compile_multi`
function. This function generates a pattern database that will match all the
input patterns in parallel.
- Streamed pattern-matching: pcapscan uses the `hs_scan_stream` function
(instead of the block-mode `hs_scan` call) to allow it to identify matches
that occur in a stream of data, even if they straddle the boundaries between blocks.
Streaming mode operation has a number of unique properties:
- Stream state that persists for the lifetime of the stream must be allocated
with the `hs_open_stream` function before scanning can take place.
Similarly, it must be freed with `hs_close_stream` after it is no longer
needed. Each stream being scanned concurrently requires its own stream
state.
- In streaming mode, a non-zero return from the user-specified event-handler
function has consequences for the rest of that stream's lifetime: when a
non-zero return occurs, it signals that no more of the stream should be
scanned. Consequently if the user makes a subsequent call to
`hs_scan_stream` on a stream whose processing was terminated in this way,
hs_scan_stream will return `HS_SCAN_TERMINATED`. This case has not been
demonstrated in pcapscan, as its callback always returns 0.
- Match handling during stream shutdown: As matches may occur when the
`hs_close_stream` function is called, it too must be provided with scratch
space in order to perform this match processing. Similarly, the user must
be prepared to be issued match event callbacks during the `hs_close_stream`
call. For this reason, we advise that stream shutdown be an integral part
of the system design.
Example 3: patbench
-------------------
This program allows users to detect which signatures may be the most expensive
in a set of patterns. It is designed for use with small to medium pattern set
sizes (e.g. 5-500). If used with very large pattern sets it may take a very
long time - the number of recompiles done is `g * O(lg2(n))` where `g` is the
number of generations and `n` is the number of patterns (assuming that `n >>
g`).
This utility will return a cumulative series of removed patterns. The first
generation will find and remove a single pattern. The second generation will
begin with the first pattern removed and find another pattern to remove, etc.
So if we have 100 patterns and 15 generations, the final generation's score
will be a run over 85 patterns.
This utility is probabilistic. It is possible that the pattern removed in a
generation is not a particularly expensive pattern. To reduce noise in the
results use 'taskset' and set the number of repeats to a level that still
completes in reasonable time (this will reduce the effect of random measurement
noise).
The criterion for performance can be altered by use of the `-C<x>` flag where
`<x>` can be `t,r,s,c,b`, selecting pattern matching throughput, scratch size,
stream state size (only available in streaming mode), compile time and bytecode
size respectively.
This utility will also not produce good results if all the patterns are roughly
equally expensive.
### Factor Group Size:
If there are multiple expensive patterns that are very similar on the
left-hand-side or identical, this utility will typically not find these groups
unless the `-F` flag is used to search for a group size that is equal to or
larger than the size of the group of similar patterns.
Otherwise, removing a portion of the similar patterns will have no or almost no
effect, and the search procedure used relies on the ability to remove all of
the similar patterns in at least one search case, something which will only
happen if the `factor_group_size` is large enough.
This alters the operation of the tool so that instead of trying to find the
single pattern whose removal has the most effect by binary search (the default
with `factor_group_size == 1`), we attempt to find the N patterns whose removal
has the most effect by searching over `N + 1` evenly sized groups, removing
only `1/(N + 1)` of the search signatures per iteration.
Note that the number of recompiles done greatly increases with increased factor
group size. For example, with `factor_group_size = 1`, we do `g * 2 * lg2(n)`
recompiles, while with `factor_group_size = 4`, we do `g * 4 * log(5/4)(n)`.
Informally the number of generations we require goes up as we eliminate a
smaller number of signatures and the we have to do more work per generation.

892
examples/patbench.cc Normal file
View File

@ -0,0 +1,892 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Hyperscan pattern benchmarker.
*
* This program allows users to detect which signatures may be the most
* expensive in a set of patterns. It is designed for use with small to medium
* pattern set sizes (e.g. 5-500). If used with very large pattern sets it may
* take a very long time - the number of recompiles done is g * O(lg2(n)) where
* g is the number of generations and n is the number of patterns (assuming
* that n >> g).
*
* This utility will return a cumulative series of removed patterns. The first
* generation will find and remove a single pattern. The second generation will
* begin with the first pattern removed and find another pattern to remove,
* etc. So if we have 100 patterns and 15 generations, the final generation's
* score will be a run over 85 patterns.
*
* This utility is probabilistic. It is possible that the pattern removed in a
* generation is not a particularly expensive pattern. To reduce noise in the
* results use 'taskset' and set the number of repeats to a level that still
* completes in reasonable time (this will reduce the effect of random
* measurement noise).
*
* The criterion for performance can be altered by use of the -C<x> flag where
* <x> can be t,r,s,c,b, selecting pattern matching throughput, scratch size,
* stream state size (only available in streaming mode), compile time and
* bytecode size respectively.
*
* This utility will also not produce good results if all the patterns are
* roughly equally expensive.
*
* Factor Group Size:
*
* If there are multiple expensive patterns that are very similar on the
* left-hand-side or identical, this utility will typically not find these
* groups unless the -F flag is used to search for a group size that is equal
* to or larger than the size of the group of similar patterns.
*
* Otherwise, removing a portion of the similar patterns will have no or almost
* no effect, and the search procedure used relies on the ability to remove all
* of the similar patterns in at least one search case, something which will
* only happen if the factor_group_size is large enough.
*
* This alters the operation of our tool so that instead of trying to find the
* single pattern whose removal has the most effect by binary search (the
* default with factor_group_size == 1), we attempt to find the N patterns
* whose removal has the most effect by searching over N+1 evenly sized groups,
* removing only 1/(N+1) of the search signatures per iteration.
*
* Note that the number of recompiles done greatly increases with increased
* factor group size. For example, with factor_group_size = 1, we do g * 2 *
* lg2(n) recompiles, while with factor_group_size = 4, we do g * 4 *
* log(5/4)(n). Informally the number of generations we require goes up as we
* eliminate a smaller number of signatures and the we have to do more work per
* generation.
*
*
* Build instructions:
*
* g++ -o patbench patbench.cc $(pkg-config --cflags --libs libhs) -lpcap
*
* Usage:
*
* ./patbench [ -n repeats] [ -G generations] [ -C criterion ]
* [ -F factor_group_size ] [ -N | -S ] <pattern file> <pcap file>
*
* -n repeats sets the number of times the PCAP is repeatedly scanned
* with the pattern
* -G generations sets the number of generations that the algorithm is
* run for
* -N sets non-streaming mode, -S sets streaming mode (default)
* -F sets the factor group size (must be >0); this allows the detection
* of multiple interacting factors
*
* -C sets the "criterion", which can be either:
* t throughput (the default) - this requires a pcap file
* r scratch size
* s stream state size
* c compile time
* b bytecode size
*
* We recommend the use of a utility like 'taskset' on multiprocessor hosts to
* lock execution to a single processor: this will remove processor migration
* by the scheduler as a source of noise in the results.
*
*/
#include <algorithm>
#include <cstring>
#include <chrono>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <set>
#include <string>
#include <vector>
#include <unordered_map>
#include <unistd.h>
// We use the BSD primitives throughout as they exist on both BSD and Linux.
#define __FAVOR_BSD
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/ip_icmp.h>
#include <net/ethernet.h>
#include <arpa/inet.h>
#include <pcap.h>
#include <hs.h>
using std::cerr;
using std::cout;
using std::endl;
using std::ifstream;
using std::string;
using std::unordered_map;
using std::vector;
using std::set;
using std::min;
using std::max;
using std::copy;
enum Criterion {
CRITERION_THROUGHPUT,
CRITERION_BYTECODE_SIZE,
CRITERION_COMPILE_TIME,
CRITERION_STREAM_STATE,
CRITERION_SCRATCH_SIZE
};
static bool higher_is_better(Criterion c) {
return c == CRITERION_THROUGHPUT;
}
static void print_criterion(Criterion c, double val) {
switch (c) {
case CRITERION_THROUGHPUT:
cout << std::fixed << std::setprecision(3) << val << " Megabits/s";
break;
case CRITERION_COMPILE_TIME:
cout << std::fixed << std::setprecision(3) << val << " seconds";
break;
case CRITERION_BYTECODE_SIZE:
case CRITERION_STREAM_STATE:
case CRITERION_SCRATCH_SIZE:
default:
cout << static_cast<size_t>(val) << " bytes";
break;
}
}
// Key for identifying a stream in our pcap input data, using data from its IP
// headers.
struct FiveTuple {
unsigned int protocol;
unsigned int srcAddr;
unsigned int srcPort;
unsigned int dstAddr;
unsigned int dstPort;
// Construct a FiveTuple from a TCP or UDP packet.
FiveTuple(const struct ip *iphdr) {
// IP fields
protocol = iphdr->ip_p;
srcAddr = iphdr->ip_src.s_addr;
dstAddr = iphdr->ip_dst.s_addr;
// UDP/TCP ports
const struct udphdr *uh = (const struct udphdr *)
(((const char *)iphdr) + (iphdr->ip_hl * 4));
srcPort = uh->uh_sport;
dstPort = uh->uh_dport;
}
bool operator==(const FiveTuple &a) const {
return protocol == a.protocol && srcAddr == a.srcAddr &&
srcPort == a.srcPort && dstAddr == a.dstAddr &&
dstPort == a.dstPort;
}
};
// A *very* simple hash function, used when we create an unordered_map of
// FiveTuple objects.
struct FiveTupleHash {
size_t operator()(const FiveTuple &x) const {
return x.srcAddr ^ x.dstAddr ^ x.protocol ^ x.srcPort ^ x.dstPort;
}
};
// Helper function. See end of file.
static bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset,
unsigned int *length);
// Match event handler: called every time Hyperscan finds a match.
static
int onMatch(unsigned int id, unsigned long long from, unsigned long long to,
unsigned int flags, void *ctx) {
// Our context points to a size_t storing the match count
size_t *matches = (size_t *)ctx;
(*matches)++;
return 0; // continue matching
}
// Simple timing class
class Clock {
public:
void start() {
time_start = std::chrono::system_clock::now();
}
void stop() {
time_end = std::chrono::system_clock::now();
}
double seconds() const {
std::chrono::duration<double> delta = time_end - time_start;
return delta.count();
}
private:
std::chrono::time_point<std::chrono::system_clock> time_start, time_end;
};
// Class wrapping all state associated with the benchmark
class Benchmark {
private:
// Packet data to be scanned
vector<string> packets;
// Stream ID for each packet
vector<size_t> stream_ids;
// Map used to construct stream_ids
unordered_map<FiveTuple, size_t, FiveTupleHash> stream_map;
// Hyperscan compiled database
hs_database_t *db = nullptr;
// Hyperscan temporary scratch space
hs_scratch_t *scratch = nullptr;
// Vector of Hyperscan stream state
vector<hs_stream_t *> streams;
// Count of matches found while scanning
size_t matchCount = 0;
public:
~Benchmark() {
hs_free_scratch(scratch);
hs_free_database(db);
}
// Initialisation; after this call, Benchmark owns the database and will
// ensure it is freed.
void setDatabase(hs_database_t *hs_db) {
hs_free_database(db); // Free previous database.
db = hs_db;
// (Re)allocate scratch to ensure that it is large enough to handle the
// database.
hs_error_t err = hs_alloc_scratch(db, &scratch);
if (err != HS_SUCCESS) {
cerr << "ERROR: could not allocate scratch space. Exiting." << endl;
exit(-1);
}
}
const hs_database_t *getDatabase() const {
return db;
}
size_t getScratchSize() const {
size_t scratch_size;
hs_error_t err = hs_scratch_size(scratch, &scratch_size);
if (err != HS_SUCCESS) {
cerr << "ERROR: could not query scratch space size. Exiting."
<< endl;
exit(-1);
}
return scratch_size;
}
// Read a set of streams from a pcap file
bool readStreams(const char *pcapFile) {
// Open PCAP file for input
char errbuf[PCAP_ERRBUF_SIZE];
pcap_t *pcapHandle = pcap_open_offline(pcapFile, errbuf);
if (pcapHandle == nullptr) {
cerr << "ERROR: Unable to open pcap file \"" << pcapFile
<< "\": " << errbuf << endl;
return false;
}
struct pcap_pkthdr pktHeader;
const unsigned char *pktData;
while ((pktData = pcap_next(pcapHandle, &pktHeader)) != nullptr) {
unsigned int offset = 0, length = 0;
if (!payloadOffset(pktData, &offset, &length)) {
continue;
}
// Valid TCP or UDP packet
const struct ip *iphdr = (const struct ip *)(pktData
+ sizeof(struct ether_header));
const char *payload = (const char *)pktData + offset;
size_t id = stream_map.insert(std::make_pair(FiveTuple(iphdr),
stream_map.size())).first->second;
packets.push_back(string(payload, length));
stream_ids.push_back(id);
}
pcap_close(pcapHandle);
return !packets.empty();
}
// Return the number of bytes scanned
size_t bytes() const {
size_t sum = 0;
for (const auto &packet : packets) {
sum += packet.size();
}
return sum;
}
// Return the number of matches found.
size_t matches() const {
return matchCount;
}
// Clear the number of matches found.
void clearMatches() {
matchCount = 0;
}
// Open a Hyperscan stream for each stream in stream_ids
void openStreams() {
streams.resize(stream_map.size());
for (auto &stream : streams) {
hs_error_t err = hs_open_stream(db, 0, &stream);
if (err != HS_SUCCESS) {
cerr << "ERROR: Unable to open stream. Exiting." << endl;
exit(-1);
}
}
}
// Close all open Hyperscan streams (potentially generating any
// end-anchored matches)
void closeStreams() {
for (auto &stream : streams) {
hs_error_t err =
hs_close_stream(stream, scratch, onMatch, &matchCount);
if (err != HS_SUCCESS) {
cerr << "ERROR: Unable to close stream. Exiting." << endl;
exit(-1);
}
}
}
// Scan each packet (in the ordering given in the PCAP file) through
// Hyperscan using the streaming interface.
void scanStreams() {
for (size_t i = 0; i != packets.size(); ++i) {
const std::string &pkt = packets[i];
hs_error_t err = hs_scan_stream(streams[stream_ids[i]],
pkt.c_str(), pkt.length(), 0,
scratch, onMatch, &matchCount);
if (err != HS_SUCCESS) {
cerr << "ERROR: Unable to scan packet. Exiting." << endl;
exit(-1);
}
}
}
// Scan each packet (in the ordering given in the PCAP file) through
// Hyperscan using the block-mode interface.
void scanBlock() {
for (size_t i = 0; i != packets.size(); ++i) {
const std::string &pkt = packets[i];
hs_error_t err = hs_scan(db, pkt.c_str(), pkt.length(), 0,
scratch, onMatch, &matchCount);
if (err != HS_SUCCESS) {
cerr << "ERROR: Unable to scan packet. Exiting." << endl;
exit(-1);
}
}
}
};
// helper function - see end of file
static void parseFile(const char *filename, vector<string> &patterns,
vector<unsigned> &flags, vector<unsigned> &ids,
vector<string> &originals);
class Sigdata {
vector<unsigned> flags;
vector<unsigned> ids;
vector<string> patterns;
vector<string> originals;
public:
Sigdata() {}
Sigdata(const char *filename) {
parseFile(filename, patterns, flags, ids, originals);
}
const string &get_original(unsigned index) const {
return originals[index];
}
hs_database_t *compileDatabase(unsigned mode, double *compileTime) const {
hs_database_t *db = nullptr;
hs_compile_error_t *compileErr;
// Turn our vector of strings into a vector of char*'s to pass in to
// hs_compile_multi. (This is just using the vector of strings as
// dynamic storage.)
vector<const char *> cstrPatterns;
cstrPatterns.reserve(patterns.size());
for (const auto &pattern : patterns) {
cstrPatterns.push_back(pattern.c_str());
}
Clock clock;
clock.start();
hs_error_t err = hs_compile_multi(cstrPatterns.data(), flags.data(),
ids.data(), cstrPatterns.size(), mode,
nullptr, &db, &compileErr);
clock.stop();
if (err != HS_SUCCESS) {
if (compileErr->expression < 0) {
// The error does not refer to a particular expression.
cerr << "ERROR: " << compileErr->message << endl;
} else {
cerr << "ERROR: Pattern '"
<< patterns[compileErr->expression]
<< "' failed with error '" << compileErr->message << "'"
<< endl;
}
// As the compileErr pointer points to dynamically allocated memory,
// if we get an error, we must be sure to release it. This is not
// necessary when no error is detected.
hs_free_compile_error(compileErr);
exit(-1);
}
*compileTime = clock.seconds();
return db;
}
unsigned size() const {
return patterns.size();
}
Sigdata cloneExclude(const set<unsigned> &excludeIndexSet) const {
Sigdata c;
for (unsigned i = 0, e = size(); i != e; ++i) {
if (excludeIndexSet.find(i) == excludeIndexSet.end()) {
c.flags.push_back(flags[i]);
c.ids.push_back(ids[i]);
c.patterns.push_back(patterns[i]);
c.originals.push_back(originals[i]);
}
}
return c;
}
};
static
void usage(const char *) {
cerr << "Usage:" << endl << endl;
cerr << " patbench [-n repeats] [ -G generations] [ -C criterion ]" << endl
<< " [ -F factor_group_size ] [ -N | -S ] "
<< "<pattern file> <pcap file>" << endl << endl
<< " -n repeats sets the number of times the PCAP is repeatedly "
"scanned" << endl << " with the pattern." << endl
<< " -G generations sets the number of generations that the "
"algorithm is" << endl << " run for." << endl
<< " -N sets non-streaming mode, -S sets streaming mode (default)."
<< endl << " -F sets the factor group size (must be >0); this "
"allows the detection" << endl
<< " of multiple interacting factors." << endl << "" << endl
<< " -C sets the 'criterion', which can be either:" << endl
<< " t throughput (the default) - this requires a pcap file"
<< endl << " r scratch size" << endl
<< " s stream state size" << endl
<< " c compile time" << endl << " b bytecode size"
<< endl << endl
<< "We recommend the use of a utility like 'taskset' on "
"multiprocessor hosts to" << endl
<< "lock execution to a single processor: this will remove processor "
"migration" << endl
<< "by the scheduler as a source of noise in the results." << endl;
}
static
double measure_stream_time(Benchmark &bench, unsigned int repeatCount) {
Clock clock;
bench.clearMatches();
clock.start();
for (unsigned int i = 0; i < repeatCount; i++) {
bench.openStreams();
bench.scanStreams();
bench.closeStreams();
}
clock.stop();
double secsScan = clock.seconds();
return secsScan;
}
static
double measure_block_time(Benchmark &bench, unsigned int repeatCount) {
Clock clock;
bench.clearMatches();
clock.start();
for (unsigned int i = 0; i < repeatCount; i++) {
bench.scanBlock();
}
clock.stop();
double secsScan = clock.seconds();
return secsScan;
}
static
double eval_set(Benchmark &bench, Sigdata &sigs, unsigned int mode,
unsigned repeatCount, Criterion criterion,
bool diagnose = true) {
double compileTime = 0;
bench.setDatabase(sigs.compileDatabase(mode, &compileTime));
switch (criterion) {
case CRITERION_BYTECODE_SIZE: {
size_t dbSize;
hs_error_t err = hs_database_size(bench.getDatabase(), &dbSize);
if (err != HS_SUCCESS) {
cerr << "ERROR: could not retrieve bytecode size" << endl;
exit(1);
}
return dbSize;
}
case CRITERION_COMPILE_TIME:
return compileTime;
case CRITERION_STREAM_STATE: {
size_t streamStateSize;
hs_error_t err = hs_stream_size(bench.getDatabase(), &streamStateSize);
if (err != HS_SUCCESS) {
cerr << "ERROR: could not retrieve stream state size" << endl;
exit(1);
}
return streamStateSize;
}
case CRITERION_SCRATCH_SIZE:
return bench.getScratchSize();
case CRITERION_THROUGHPUT:
default:
break; // do nothing - we are THROUGHPUT
}
double scan_time;
if (mode == HS_MODE_NOSTREAM) {
scan_time = measure_block_time(bench, repeatCount);
} else {
scan_time = measure_stream_time(bench, repeatCount);
}
size_t bytes = bench.bytes();
size_t matches = bench.matches();
if (diagnose) {
cout << "Scan time " << std::fixed << std::setprecision(3) << scan_time
<< " sec, Scanned " << bytes * repeatCount << " bytes, Throughput "
<< std::fixed << std::setprecision(3)
<< (bytes * 8 * repeatCount) / (scan_time * 1000000)
<< " Mbps, Matches " << matches << endl;
}
return (bytes * 8 * repeatCount) / (scan_time * 1000000);
}
// Main entry point.
int main(int argc, char **argv) {
unsigned int repeatCount = 1;
unsigned int mode = HS_MODE_STREAM;
Criterion criterion = CRITERION_THROUGHPUT;
unsigned int gen_max = 10;
unsigned int factor_max = 1;
// Process command line arguments.
int opt;
while ((opt = getopt(argc, argv, "SNn:G:F:C:")) != -1) {
switch (opt) {
case 'F':
factor_max = atoi(optarg);
break;
case 'G':
gen_max = atoi(optarg);
break;
case 'S':
mode = HS_MODE_STREAM;
break;
case 'N':
mode = HS_MODE_NOSTREAM;
break;
case 'C':
switch (optarg[0]) {
case 't':
criterion = CRITERION_THROUGHPUT;
break;
case 'b':
criterion = CRITERION_BYTECODE_SIZE;
break;
case 'c':
criterion = CRITERION_COMPILE_TIME;
break;
case 's':
criterion = CRITERION_STREAM_STATE;
break;
case 'r':
criterion = CRITERION_SCRATCH_SIZE;
break;
default:
cerr << "Unrecognised criterion: " << optarg[0] << endl;
usage(argv[0]);
exit(-1);
}
break;
case 'n':
repeatCount = atoi(optarg);
break;
default:
usage(argv[0]);
exit(-1);
}
}
if (argc - optind != ((criterion == CRITERION_THROUGHPUT) ? 2 : 1)) {
usage(argv[0]);
exit(-1);
}
const char *patternFile = argv[optind];
const char *pcapFile = argv[optind + 1];
// Read our input PCAP file in
Benchmark bench;
if (criterion == CRITERION_THROUGHPUT) {
if (!bench.readStreams(pcapFile)) {
cerr << "Unable to read packets from PCAP file. Exiting." << endl;
exit(-1);
}
}
if ((criterion == CRITERION_STREAM_STATE) && (mode != HS_MODE_STREAM)) {
cerr << "Cannot evaluate stream state for block mode compile. Exiting."
<< endl;
exit(-1);
}
cout << "Base signatures: " << patternFile;
if (pcapFile) {
cout << "\tPCAP input file: " << pcapFile
<< "\tRepeat count: " << repeatCount;
}
if (mode == HS_MODE_STREAM) {
cout << "\tMode: streaming";
} else {
cout << "\tMode: block";
}
cout << endl;
Sigdata sigs(patternFile);
// calculate and show a baseline
eval_set(bench, sigs, mode, repeatCount, criterion);
set<unsigned> work_sigs, exclude;
for (unsigned i = 0; i < sigs.size(); ++i) {
work_sigs.insert(i);
}
double score_base =
eval_set(bench, sigs, mode, repeatCount, criterion, false);
bool maximize = higher_is_better(criterion);
cout << "Number of signatures: " << sigs.size() << endl;
cout << "Base performance: ";
print_criterion(criterion, score_base);
cout << endl;
unsigned generations = min(gen_max, (sigs.size() - 1) / factor_max);
cout << "Cutting signatures cumulatively for " << generations
<< " generations" << endl;
for (unsigned gen = 0; gen < generations; ++gen) {
cout << "Generation " << gen << " ";
set<unsigned> s(work_sigs.begin(), work_sigs.end());
double best = maximize ? 0 : 1000000000000.0;
unsigned count = 0;
while (s.size() > factor_max) {
count++;
cout << "." << std::flush;
vector<unsigned> sv(s.begin(), s.end());
random_shuffle(sv.begin(), sv.end());
unsigned groups = factor_max + 1;
for (unsigned current_group = 0; current_group < groups;
current_group++) {
unsigned sz = sv.size();
unsigned lo = (current_group * sz) / groups;
unsigned hi = ((current_group + 1) * sz) / groups;
set<unsigned> s_part1(sv.begin(), sv.begin() + lo);
set<unsigned> s_part2(sv.begin() + hi, sv.end());
set<unsigned> s_tmp = s_part1;
s_tmp.insert(s_part2.begin(), s_part2.end());
set<unsigned> tmp = s_tmp;
tmp.insert(exclude.begin(), exclude.end());
Sigdata sigs_tmp = sigs.cloneExclude(tmp);
double score = eval_set(bench, sigs_tmp, mode, repeatCount,
criterion, false);
if ((current_group == 0) ||
(!maximize ? (score < best) : (score > best))) {
s = s_tmp;
best = score;
}
}
}
for (unsigned i = count; i < 16; i++) {
cout << " ";
}
cout << "Performance: ";
print_criterion(criterion, best);
cout << " (" << std::fixed << std::setprecision(3) << (best / score_base)
<< "x) after cutting:" << endl;
// s now has factor_max signatures
for (const auto &found : s) {
exclude.insert(found);
work_sigs.erase(found);
cout << sigs.get_original(found) << endl;
}
cout << endl;
}
return 0;
}
/**
* Helper function to locate the offset of the first byte of the payload in the
* given ethernet frame. Offset into the packet, and the length of the payload
* are returned in the arguments @a offset and @a length.
*/
static
bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset,
unsigned int *length) {
const ip *iph = (const ip *)(pkt_data + sizeof(ether_header));
const tcphdr *th = nullptr;
// Ignore packets that aren't IPv4
if (iph->ip_v != 4) {
return false;
}
// Ignore fragmented packets.
if (iph->ip_off & htons(IP_MF | IP_OFFMASK)) {
return false;
}
// IP header length, and transport header length.
unsigned int ihlen = iph->ip_hl * 4;
unsigned int thlen = 0;
switch (iph->ip_p) {
case IPPROTO_TCP:
th = (const tcphdr *)((const char *)iph + ihlen);
thlen = th->th_off * 4;
break;
case IPPROTO_UDP:
thlen = sizeof(udphdr);
break;
default:
return false;
}
*offset = sizeof(ether_header) + ihlen + thlen;
*length = sizeof(ether_header) + ntohs(iph->ip_len) - *offset;
return *length != 0;
}
static unsigned parseFlags(const string &flagsStr) {
unsigned flags = 0;
for (const auto &c : flagsStr) {
switch (c) {
case 'i':
flags |= HS_FLAG_CASELESS; break;
case 'm':
flags |= HS_FLAG_MULTILINE; break;
case 's':
flags |= HS_FLAG_DOTALL; break;
case 'H':
flags |= HS_FLAG_SINGLEMATCH; break;
case 'V':
flags |= HS_FLAG_ALLOWEMPTY; break;
case '8':
flags |= HS_FLAG_UTF8; break;
case 'W':
flags |= HS_FLAG_UCP; break;
default:
cerr << "Unsupported flag \'" << c << "\'" << endl;
exit(-1);
}
}
return flags;
}
static void parseFile(const char *filename, vector<string> &patterns,
vector<unsigned> &flags, vector<unsigned> &ids,
vector<string> &originals) {
ifstream inFile(filename);
if (!inFile.good()) {
cerr << "ERROR: Can't open pattern file \"" << filename << "\"" << endl;
exit(-1);
}
for (unsigned i = 1; !inFile.eof(); ++i) {
string line;
getline(inFile, line);
// if line is empty, or a comment, we can skip it
if (line.empty() || line[0] == '#') {
continue;
}
// otherwise, it should be ID:PCRE, e.g.
// 10001:/foobar/is
size_t colonIdx = line.find_first_of(':');
if (colonIdx == string::npos) {
cerr << "ERROR: Could not parse line " << i << endl;
exit(-1);
}
// we should have an unsigned int as an ID, before the colon
unsigned id = std::stoi(line.substr(0, colonIdx).c_str());
// rest of the expression is the PCRE
const string expr(line.substr(colonIdx + 1));
size_t flagsStart = expr.find_last_of('/');
if (flagsStart == string::npos) {
cerr << "ERROR: no trailing '/' char" << endl;
exit(-1);
}
string pcre(expr.substr(1, flagsStart - 1));
string flagsStr(expr.substr(flagsStart + 1, expr.size() - flagsStart));
unsigned flag = parseFlags(flagsStr);
originals.push_back(line);
patterns.push_back(pcre);
flags.push_back(flag);
ids.push_back(id);
}
}

679
examples/pcapscan.cc Normal file
View File

@ -0,0 +1,679 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Hyperscan example program 2: pcapscan
*
* This example is a very simple packet scanning benchmark. It scans a given
* PCAP file full of network traffic against a group of regular expressions and
* returns some coarse performance measurements. This example provides a quick
* way to examine the performance achievable on a particular combination of
* platform, pattern set and input data.
*
* Build instructions:
*
* g++ -std=c++11 -O2 -o pcapscan pcapscan.cc $(pkg-config --cflags --libs libhs) -lpcap
*
* Usage:
*
* ./pcapscan [-n repeats] <pattern file> <pcap file>
*
* We recommend the use of a utility like 'taskset' on multiprocessor hosts to
* pin execution to a single processor: this will remove processor migration
* by the scheduler as a source of noise in the results.
*
*/
#include <cstring>
#include <chrono>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
#include <unistd.h>
// We use the BSD primitives throughout as they exist on both BSD and Linux.
#define __FAVOR_BSD
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/ip_icmp.h>
#include <net/ethernet.h>
#include <arpa/inet.h>
#include <pcap.h>
#include <hs.h>
using std::cerr;
using std::cout;
using std::endl;
using std::ifstream;
using std::string;
using std::unordered_map;
using std::vector;
// Key for identifying a stream in our pcap input data, using data from its IP
// headers.
struct FiveTuple {
unsigned int protocol;
unsigned int srcAddr;
unsigned int srcPort;
unsigned int dstAddr;
unsigned int dstPort;
// Construct a FiveTuple from a TCP or UDP packet.
FiveTuple(const struct ip *iphdr) {
// IP fields
protocol = iphdr->ip_p;
srcAddr = iphdr->ip_src.s_addr;
dstAddr = iphdr->ip_dst.s_addr;
// UDP/TCP ports
const struct udphdr *uh =
(const struct udphdr *)(((const char *)iphdr) + (iphdr->ip_hl * 4));
srcPort = uh->uh_sport;
dstPort = uh->uh_dport;
}
bool operator==(const FiveTuple &a) const {
return protocol == a.protocol && srcAddr == a.srcAddr &&
srcPort == a.srcPort && dstAddr == a.dstAddr &&
dstPort == a.dstPort;
}
};
// A *very* simple hash function, used when we create an unordered_map of
// FiveTuple objects.
struct FiveTupleHash {
size_t operator()(const FiveTuple &x) const {
return x.srcAddr ^ x.dstAddr ^ x.protocol ^ x.srcPort ^ x.dstPort;
}
};
// Helper function. See end of file.
static bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset,
unsigned int *length);
// Match event handler: called every time Hyperscan finds a match.
static
int onMatch(unsigned int id, unsigned long long from, unsigned long long to,
unsigned int flags, void *ctx) {
// Our context points to a size_t storing the match count
size_t *matches = (size_t *)ctx;
(*matches)++;
return 0; // continue matching
}
// Simple timing class
class Clock {
public:
void start() {
time_start = std::chrono::system_clock::now();
}
void stop() {
time_end = std::chrono::system_clock::now();
}
double seconds() const {
std::chrono::duration<double> delta = time_end - time_start;
return delta.count();
}
private:
std::chrono::time_point<std::chrono::system_clock> time_start, time_end;
};
// Class wrapping all state associated with the benchmark
class Benchmark {
private:
// Packet data to be scanned.
vector<string> packets;
// The stream ID to which each packet belongs
vector<size_t> stream_ids;
// Map used to construct stream_ids
unordered_map<FiveTuple, size_t, FiveTupleHash> stream_map;
// Hyperscan compiled database (streaming mode)
const hs_database_t *db_streaming;
// Hyperscan compiled database (block mode)
const hs_database_t *db_block;
// Hyperscan temporary scratch space (used in both modes)
hs_scratch_t *scratch;
// Vector of Hyperscan stream state (used in streaming mode)
vector<hs_stream_t *> streams;
// Count of matches found during scanning
size_t matchCount;
public:
Benchmark(const hs_database_t *streaming, const hs_database_t *block)
: db_streaming(streaming), db_block(block), scratch(nullptr),
matchCount(0) {
// Allocate enough scratch space to handle either streaming or block
// mode, so we only need the one scratch region.
hs_error_t err = hs_alloc_scratch(db_streaming, &scratch);
if (err != HS_SUCCESS) {
cerr << "ERROR: could not allocate scratch space. Exiting." << endl;
exit(-1);
}
// This second call will increase the scratch size if more is required
// for block mode.
err = hs_alloc_scratch(db_block, &scratch);
if (err != HS_SUCCESS) {
cerr << "ERROR: could not allocate scratch space. Exiting." << endl;
exit(-1);
}
}
~Benchmark() {
// Free scratch region
hs_free_scratch(scratch);
}
// Read a set of streams from a pcap file
bool readStreams(const char *pcapFile) {
// Open PCAP file for input
char errbuf[PCAP_ERRBUF_SIZE];
pcap_t *pcapHandle = pcap_open_offline(pcapFile, errbuf);
if (pcapHandle == nullptr) {
cerr << "ERROR: Unable to open pcap file \"" << pcapFile
<< "\": " << errbuf << endl;
return false;
}
struct pcap_pkthdr pktHeader;
const unsigned char *pktData;
while ((pktData = pcap_next(pcapHandle, &pktHeader)) != nullptr) {
unsigned int offset = 0, length = 0;
if (!payloadOffset(pktData, &offset, &length)) {
continue;
}
// Valid TCP or UDP packet
const struct ip *iphdr = (const struct ip *)(pktData
+ sizeof(struct ether_header));
const char *payload = (const char *)pktData + offset;
size_t id = stream_map.insert(std::make_pair(FiveTuple(iphdr),
stream_map.size())).first->second;
packets.push_back(string(payload, length));
stream_ids.push_back(id);
}
pcap_close(pcapHandle);
return !packets.empty();
}
// Return the number of bytes scanned
size_t bytes() const {
size_t sum = 0;
for (const auto &packet : packets) {
sum += packet.size();
}
return sum;
}
// Return the number of matches found.
size_t matches() const {
return matchCount;
}
// Clear the number of matches found.
void clearMatches() {
matchCount = 0;
}
// Open a Hyperscan stream for each stream in stream_ids
void openStreams() {
streams.resize(stream_map.size());
for (auto &stream : streams) {
hs_error_t err = hs_open_stream(db_streaming, 0, &stream);
if (err != HS_SUCCESS) {
cerr << "ERROR: Unable to open stream. Exiting." << endl;
exit(-1);
}
}
}
// Close all open Hyperscan streams (potentially generating any
// end-anchored matches)
void closeStreams() {
for (auto &stream : streams) {
hs_error_t err = hs_close_stream(stream, scratch, onMatch,
&matchCount);
if (err != HS_SUCCESS) {
cerr << "ERROR: Unable to close stream. Exiting." << endl;
exit(-1);
}
}
}
// Scan each packet (in the ordering given in the PCAP file) through
// Hyperscan using the streaming interface.
void scanStreams() {
for (size_t i = 0; i != packets.size(); ++i) {
const std::string &pkt = packets[i];
hs_error_t err = hs_scan_stream(streams[stream_ids[i]],
pkt.c_str(), pkt.length(), 0,
scratch, onMatch, &matchCount);
if (err != HS_SUCCESS) {
cerr << "ERROR: Unable to scan packet. Exiting." << endl;
exit(-1);
}
}
}
// Scan each packet (in the ordering given in the PCAP file) through
// Hyperscan using the block-mode interface.
void scanBlock() {
for (size_t i = 0; i != packets.size(); ++i) {
const std::string &pkt = packets[i];
hs_error_t err = hs_scan(db_block, pkt.c_str(), pkt.length(), 0,
scratch, onMatch, &matchCount);
if (err != HS_SUCCESS) {
cerr << "ERROR: Unable to scan packet. Exiting." << endl;
exit(-1);
}
}
}
// Display some information about the compiled database and scanned data.
void displayStats() {
size_t numPackets = packets.size();
size_t numStreams = stream_map.size();
size_t numBytes = bytes();
hs_error_t err;
cout << numPackets << " packets in " << numStreams
<< " streams, totalling " << numBytes << " bytes." << endl;
cout << "Average packet length: " << numBytes / numPackets << " bytes."
<< endl;
cout << "Average stream length: " << numBytes / numStreams << " bytes."
<< endl;
cout << endl;
size_t dbStream_size = 0;
err = hs_database_size(db_streaming, &dbStream_size);
if (err == HS_SUCCESS) {
cout << "Streaming mode Hyperscan database size : "
<< dbStream_size << " bytes." << endl;
} else {
cout << "Error getting streaming mode Hyperscan database size"
<< endl;
}
size_t dbBlock_size = 0;
err = hs_database_size(db_block, &dbBlock_size);
if (err == HS_SUCCESS) {
cout << "Block mode Hyperscan database size : "
<< dbBlock_size << " bytes." << endl;
} else {
cout << "Error getting block mode Hyperscan database size"
<< endl;
}
size_t stream_size = 0;
err = hs_stream_size(db_streaming, &stream_size);
if (err == HS_SUCCESS) {
cout << "Streaming mode Hyperscan stream state size: "
<< stream_size << " bytes (per stream)." << endl;
} else {
cout << "Error getting stream state size" << endl;
}
}
};
// helper function - see end of file
static void parseFile(const char *filename, vector<string> &patterns,
vector<unsigned> &flags, vector<unsigned> &ids);
static hs_database_t *buildDatabase(const vector<const char *> &expressions,
const vector<unsigned> flags,
const vector<unsigned> ids,
unsigned int mode) {
hs_database_t *db;
hs_compile_error_t *compileErr;
hs_error_t err;
Clock clock;
clock.start();
err = hs_compile_multi(expressions.data(), flags.data(), ids.data(),
expressions.size(), mode, nullptr, &db, &compileErr);
clock.stop();
if (err != HS_SUCCESS) {
if (compileErr->expression < 0) {
// The error does not refer to a particular expression.
cerr << "ERROR: " << compileErr->message << endl;
} else {
cerr << "ERROR: Pattern '" << expressions[compileErr->expression]
<< "' failed compilation with error: " << compileErr->message
<< endl;
}
// As the compileErr pointer points to dynamically allocated memory, if
// we get an error, we must be sure to release it. This is not
// necessary when no error is detected.
hs_free_compile_error(compileErr);
exit(-1);
}
cout << "Hyperscan " << (mode == HS_MODE_STREAM ? "streaming" : "block")
<< " mode database compiled in " << clock.seconds() << " seconds."
<< endl;
return db;
}
/**
* This function will read in the file with the specified name, with an
* expression per line, ignoring lines starting with '#' and build a Hyperscan
* database for it.
*/
static void databasesFromFile(const char *filename,
hs_database_t **db_streaming,
hs_database_t **db_block) {
// hs_compile_multi requires three parallel arrays containing the patterns,
// flags and ids that we want to work with. To achieve this we use
// vectors and new entries onto each for each valid line of input from
// the pattern file.
vector<string> patterns;
vector<unsigned> flags;
vector<unsigned> ids;
// do the actual file reading and string handling
parseFile(filename, patterns, flags, ids);
// Turn our vector of strings into a vector of char*'s to pass in to
// hs_compile_multi. (This is just using the vector of strings as dynamic
// storage.)
vector<const char*> cstrPatterns;
for (const auto &pattern : patterns) {
cstrPatterns.push_back(pattern.c_str());
}
cout << "Compiling Hyperscan databases with " << patterns.size()
<< " patterns." << endl;
*db_streaming = buildDatabase(cstrPatterns, flags, ids, HS_MODE_STREAM);
*db_block = buildDatabase(cstrPatterns, flags, ids, HS_MODE_BLOCK);
}
static void usage(const char *prog) {
cerr << "Usage: " << prog << " [-n repeats] <pattern file> <pcap file>" << endl;
}
// Main entry point.
int main(int argc, char **argv) {
unsigned int repeatCount = 1;
// Process command line arguments.
int opt;
while ((opt = getopt(argc, argv, "n:")) != -1) {
switch (opt) {
case 'n':
repeatCount = atoi(optarg);
break;
default:
usage(argv[0]);
exit(-1);
}
}
if (argc - optind != 2) {
usage(argv[0]);
exit(-1);
}
const char *patternFile = argv[optind];
const char *pcapFile = argv[optind + 1];
// Read our pattern set in and build Hyperscan databases from it.
cout << "Pattern file: " << patternFile << endl;
hs_database_t *db_streaming, *db_block;
databasesFromFile(patternFile, &db_streaming, &db_block);
// Read our input PCAP file in
Benchmark bench(db_streaming, db_block);
cout << "PCAP input file: " << pcapFile << endl;
if (!bench.readStreams(pcapFile)) {
cerr << "Unable to read packets from PCAP file. Exiting." << endl;
exit(-1);
}
if (repeatCount != 1) {
cout << "Repeating PCAP scan " << repeatCount << " times." << endl;
}
bench.displayStats();
Clock clock;
// Streaming mode scans.
double secsStreamingScan = 0.0, secsStreamingOpenClose = 0.0;
for (unsigned int i = 0; i < repeatCount; i++) {
// Open streams.
clock.start();
bench.openStreams();
clock.stop();
secsStreamingOpenClose += clock.seconds();
// Scan all our packets in streaming mode.
clock.start();
bench.scanStreams();
clock.stop();
secsStreamingScan += clock.seconds();
// Close streams.
clock.start();
bench.closeStreams();
clock.stop();
secsStreamingOpenClose += clock.seconds();
}
// Collect data from streaming mode scans.
size_t bytes = bench.bytes();
double tputStreamScanning = (bytes * 8 * repeatCount) / secsStreamingScan;
double tputStreamOverhead = (bytes * 8 * repeatCount) / (secsStreamingScan + secsStreamingOpenClose);
size_t matchesStream = bench.matches();
double matchRateStream = matchesStream / ((bytes * repeatCount) / 1024.0); // matches per kilobyte
// Scan all our packets in block mode.
bench.clearMatches();
clock.start();
for (unsigned int i = 0; i < repeatCount; i++) {
bench.scanBlock();
}
clock.stop();
double secsScanBlock = clock.seconds();
// Collect data from block mode scans.
double tputBlockScanning = (bytes * 8 * repeatCount) / secsScanBlock;
size_t matchesBlock = bench.matches();
double matchRateBlock = matchesBlock / ((bytes * repeatCount) / 1024.0); // matches per kilobyte
cout << endl << "Streaming mode:" << endl << endl;
cout << " Total matches: " << matchesStream << endl;
cout << std::fixed << std::setprecision(4);
cout << " Match rate: " << matchRateStream << " matches/kilobyte" << endl;
cout << std::fixed << std::setprecision(2);
cout << " Throughput (with stream overhead): "
<< tputStreamOverhead/1000000 << " megabits/sec" << endl;
cout << " Throughput (no stream overhead): "
<< tputStreamScanning/1000000 << " megabits/sec" << endl;
cout << endl << "Block mode:" << endl << endl;
cout << " Total matches: " << matchesBlock << endl;
cout << std::fixed << std::setprecision(4);
cout << " Match rate: " << matchRateBlock << " matches/kilobyte" << endl;
cout << std::fixed << std::setprecision(2);
cout << " Throughput: "
<< tputBlockScanning/1000000 << " megabits/sec" << endl;
cout << endl;
if (bytes < (2*1024*1024)) {
cout << endl << "WARNING: Input PCAP file is less than 2MB in size." << endl
<< "This test may have been too short to calculate accurate results." << endl;
}
// Close Hyperscan databases
hs_free_database(db_streaming);
hs_free_database(db_block);
return 0;
}
/**
* Helper function to locate the offset of the first byte of the payload in the
* given ethernet frame. Offset into the packet, and the length of the payload
* are returned in the arguments @a offset and @a length.
*/
static bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset,
unsigned int *length) {
const ip *iph = (const ip *)(pkt_data + sizeof(ether_header));
const tcphdr *th = nullptr;
// Ignore packets that aren't IPv4
if (iph->ip_v != 4) {
return false;
}
// Ignore fragmented packets.
if (iph->ip_off & htons(IP_MF|IP_OFFMASK)) {
return false;
}
// IP header length, and transport header length.
unsigned int ihlen = iph->ip_hl * 4;
unsigned int thlen = 0;
switch (iph->ip_p) {
case IPPROTO_TCP:
th = (const tcphdr *)((const char *)iph + ihlen);
thlen = th->th_off * 4;
break;
case IPPROTO_UDP:
thlen = sizeof(udphdr);
break;
default:
return false;
}
*offset = sizeof(ether_header) + ihlen + thlen;
*length = sizeof(ether_header) + ntohs(iph->ip_len) - *offset;
return *length != 0;
}
static unsigned parseFlags(const string &flagsStr) {
unsigned flags = 0;
for (const auto &c : flagsStr) {
switch (c) {
case 'i':
flags |= HS_FLAG_CASELESS; break;
case 'm':
flags |= HS_FLAG_MULTILINE; break;
case 's':
flags |= HS_FLAG_DOTALL; break;
case 'H':
flags |= HS_FLAG_SINGLEMATCH; break;
case 'V':
flags |= HS_FLAG_ALLOWEMPTY; break;
case '8':
flags |= HS_FLAG_UTF8; break;
case 'W':
flags |= HS_FLAG_UCP; break;
default:
cerr << "Unsupported flag \'" << c << "\'" << endl;
exit(-1);
}
}
return flags;
}
static void parseFile(const char *filename, vector<string> &patterns,
vector<unsigned> &flags, vector<unsigned> &ids) {
ifstream inFile(filename);
if (!inFile.good()) {
cerr << "ERROR: Can't open pattern file \"" << filename << "\"" << endl;
exit(-1);
}
for (unsigned i = 1; !inFile.eof(); ++i) {
string line;
getline(inFile, line);
// if line is empty, or a comment, we can skip it
if (line.empty() || line[0] == '#') {
continue;
}
// otherwise, it should be ID:PCRE, e.g.
// 10001:/foobar/is
size_t colonIdx = line.find_first_of(':');
if (colonIdx == string::npos) {
cerr << "ERROR: Could not parse line " << i << endl;
exit(-1);
}
// we should have an unsigned int as an ID, before the colon
unsigned id = std::stoi(line.substr(0, colonIdx).c_str());
// rest of the expression is the PCRE
const string expr(line.substr(colonIdx + 1));
size_t flagsStart = expr.find_last_of('/');
if (flagsStart == string::npos) {
cerr << "ERROR: no trailing '/' char" << endl;
exit(-1);
}
string pcre(expr.substr(1, flagsStart - 1));
string flagsStr(expr.substr(flagsStart + 1, expr.size() - flagsStart));
unsigned flag = parseFlags(flagsStr);
patterns.push_back(pcre);
flags.push_back(flag);
ids.push_back(id);
}
}

221
examples/simplegrep.c Normal file
View File

@ -0,0 +1,221 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Hyperscan example program 1: simplegrep
*
* This is a simple example of Hyperscan's most basic functionality: it will
* search a given input file for a pattern supplied as a command-line argument.
* It is intended to demonstrate correct usage of the hs_compile and hs_scan
* functions of Hyperscan.
*
* Patterns are scanned in 'DOTALL' mode, which is equivalent to PCRE's '/s'
* modifier. This behaviour can be changed by modifying the "flags" argument to
* hs_compile.
*
* Build instructions:
*
* gcc -o simplegrep simplegrep.c $(pkg-config --cflags --libs libhs)
*
* Usage:
*
* ./simplegrep <pattern> <input file>
*
* Example:
*
* ./simplegrep int simplegrep.c
*
*/
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <hs.h>
/**
* This is the function that will be called for each match that occurs. @a ctx
* is to allow you to have some application-specific state that you will get
* access to for each match. In our simple example we're just going to use it
* to pass in the pattern that was being searched for so we can print it out.
*/
static int eventHandler(unsigned int id, unsigned long long from,
unsigned long long to, unsigned int flags, void *ctx) {
printf("Match for pattern \"%s\" at offset %llu\n", (char *)ctx, to);
return 0;
}
/**
* Fill a data buffer from the given filename, returning it and filling @a
* length with its length. Returns NULL on failure.
*/
static char *readInputData(const char *inputFN, unsigned int *length) {
FILE *f = fopen(inputFN, "r");
if (!f) {
fprintf(stderr, "ERROR: unable to open file \"%s\": %s\n", inputFN,
strerror(errno));
return NULL;
}
/* We use fseek/ftell to get our data length, in order to keep this example
* code as portable as possible. */
if (fseek(f, 0, SEEK_END) != 0) {
fprintf(stderr, "ERROR: unable to seek file \"%s\": %s\n", inputFN,
strerror(errno));
fclose(f);
return NULL;
}
long dataLen = ftell(f);
if (dataLen < 0) {
fprintf(stderr, "ERROR: ftell() failed: %s\n", strerror(errno));
fclose(f);
return NULL;
}
if (fseek(f, 0, SEEK_SET) != 0) {
fprintf(stderr, "ERROR: unable to seek file \"%s\": %s\n", inputFN,
strerror(errno));
fclose(f);
return NULL;
}
/* Hyperscan's hs_scan function accepts length as an unsigned int, so we
* limit the size of our buffer appropriately. */
if ((unsigned long)dataLen > UINT_MAX) {
dataLen = UINT_MAX;
printf("WARNING: clipping data to %lu bytes\n", dataLen);
} else if (dataLen == 0) {
fprintf(stderr, "ERROR: input file \"%s\" is empty\n", inputFN);
fclose(f);
return NULL;
}
char *inputData = malloc(dataLen);
if (!inputData) {
fprintf(stderr, "ERROR: unable to malloc %lu bytes\n", dataLen);
fclose(f);
return NULL;
}
char *p = inputData;
size_t bytesLeft = dataLen;
while (bytesLeft) {
size_t bytesRead = fread(p, 1, bytesLeft, f);
bytesLeft -= bytesRead;
p += bytesRead;
if (ferror(f) != 0) {
fprintf(stderr, "ERROR: fread() failed\n");
free(inputData);
fclose(f);
return NULL;
}
}
fclose(f);
*length = (unsigned int)dataLen;
return inputData;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
fprintf(stderr, "Usage: %s <pattern> <input file>\n", argv[0]);
return -1;
}
char *pattern = argv[1];
char *inputFN = argv[2];
/* First, we attempt to compile the pattern provided on the command line.
* We assume 'DOTALL' semantics, meaning that the '.' meta-character will
* match newline characters. The compiler will analyse the given pattern and
* either return a compiled Hyperscan database, or an error message
* explaining why the pattern didn't compile.
*/
hs_database_t *database;
hs_compile_error_t *compile_err;
if (hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, NULL, &database,
&compile_err) != HS_SUCCESS) {
fprintf(stderr, "ERROR: Unable to compile pattern \"%s\": %s\n",
pattern, compile_err->message);
hs_free_compile_error(compile_err);
return -1;
}
/* Next, we read the input data file into a buffer. */
unsigned int length;
char *inputData = readInputData(inputFN, &length);
if (!inputData) {
hs_free_database(database);
return -1;
}
/* Finally, we issue a call to hs_scan, which will search the input buffer
* for the pattern represented in the bytecode. Note that in order to do
* this, scratch space needs to be allocated with the hs_alloc_scratch
* function. In typical usage, you would reuse this scratch space for many
* calls to hs_scan, but as we're only doing one, we'll be allocating it
* and deallocating it as soon as our matching is done.
*
* When matches occur, the specified callback function (eventHandler in
* this file) will be called. Note that although it is reminiscent of
* asynchronous APIs, Hyperscan operates synchronously: all matches will be
* found, and all callbacks issued, *before* hs_scan returns.
*
* In this example, we provide the input pattern as the context pointer so
* that the callback is able to print out the pattern that matched on each
* match event.
*/
hs_scratch_t *scratch = NULL;
if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) {
fprintf(stderr, "ERROR: Unable to allocate scratch space. Exiting.\n");
free(inputData);
hs_free_database(database);
return -1;
}
printf("Scanning %u bytes with Hyperscan\n", length);
if (hs_scan(database, inputData, length, 0, scratch, eventHandler,
pattern) != HS_SUCCESS) {
fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n");
hs_free_scratch(scratch);
free(inputData);
hs_free_database(database);
return -1;
}
/* Scanning is complete, any matches have been handled, so now we just
* clean up and exit.
*/
hs_free_scratch(scratch);
free(inputData);
hs_free_database(database);
return 0;
}

View File

@ -0,0 +1,501 @@
//=======================================================================
// Copyright (C) 2005-2009 Jongsoo Park <jongsoo.park -at- gmail.com>
//
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
//=======================================================================
#ifndef BOOST_GRAPH_DOMINATOR_HPP
#define BOOST_GRAPH_DOMINATOR_HPP
#include <boost/config.hpp>
#include <deque>
#include <set>
#include <boost/graph/depth_first_search.hpp>
#include <boost/concept/assert.hpp>
// Dominator tree computation
// NOTE: This file contains modifications from the distributed Boost version to
// correctly support supplying a vertex index map to the algorithm. To
// differentiate it, it has been moved into the boost_ue2 namespace.
namespace boost_ue2 {
using namespace boost;
namespace detail {
/**
* An extended time_stamper which also records vertices for each dfs number
*/
template<class TimeMap, class VertexVector, class TimeT, class Tag>
class time_stamper_with_vertex_vector
: public base_visitor<
time_stamper_with_vertex_vector<TimeMap, VertexVector, TimeT, Tag> >
{
public :
typedef Tag event_filter;
time_stamper_with_vertex_vector(TimeMap timeMap, VertexVector& v,
TimeT& t)
: timeStamper_(timeMap, t), v_(v) { }
template<class Graph>
void
operator()(const typename property_traits<TimeMap>::key_type& v,
const Graph& g)
{
timeStamper_(v, g);
v_[timeStamper_.m_time] = v;
}
private :
time_stamper<TimeMap, TimeT, Tag> timeStamper_;
VertexVector& v_;
};
/**
* A convenient way to create a time_stamper_with_vertex_vector
*/
template<class TimeMap, class VertexVector, class TimeT, class Tag>
time_stamper_with_vertex_vector<TimeMap, VertexVector, TimeT, Tag>
stamp_times_with_vertex_vector(TimeMap timeMap, VertexVector& v, TimeT& t,
Tag)
{
return time_stamper_with_vertex_vector<TimeMap, VertexVector, TimeT,
Tag>(timeMap, v, t);
}
template<class Graph, class IndexMap, class TimeMap, class PredMap,
class DomTreePredMap>
class dominator_visitor
{
typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
public :
/**
* @param g [in] the target graph of the dominator tree
* @param entry [in] the entry node of g
* @param indexMap [in] the vertex index map for g
* @param domTreePredMap [out] the immediate dominator map
* (parent map in dominator tree)
*/
dominator_visitor(const Graph& g, const Vertex& entry,
const IndexMap& indexMap,
DomTreePredMap domTreePredMap)
: semi_(num_vertices(g)),
ancestor_(num_vertices(g), graph_traits<Graph>::null_vertex()),
samedom_(ancestor_),
best_(semi_),
semiMap_(make_iterator_property_map(semi_.begin(),
indexMap)),
ancestorMap_(make_iterator_property_map(ancestor_.begin(),
indexMap)),
bestMap_(make_iterator_property_map(best_.begin(),
indexMap)),
buckets_(num_vertices(g)),
bucketMap_(make_iterator_property_map(buckets_.begin(),
indexMap)),
entry_(entry),
domTreePredMap_(domTreePredMap),
numOfVertices_(num_vertices(g)),
samedomMap(make_iterator_property_map(samedom_.begin(),
indexMap))
{
}
void
operator()(const Vertex& n, const TimeMap& dfnumMap,
const PredMap& parentMap, const Graph& g)
{
if (n == entry_) return;
const Vertex p(get(parentMap, n));
Vertex s(p);
// 1. Calculate the semidominator of n,
// based on the semidominator thm.
// * Semidominator thm. : To find the semidominator of a node n,
// consider all predecessors v of n in the CFG (Control Flow Graph).
// - If v is a proper ancestor of n in the spanning tree
// (so dfnum(v) < dfnum(n)), then v is a candidate for semi(n)
// - If v is a non-ancestor of n (so dfnum(v) > dfnum(n))
// then for each u that is an ancestor of v (or u = v),
// Let semi(u) be a candidate for semi(n)
// of all these candidates, the one with lowest dfnum is
// the semidominator of n.
// For each predecessor of n
typename graph_traits<Graph>::in_edge_iterator inItr, inEnd;
for (boost::tie(inItr, inEnd) = in_edges(n, g); inItr != inEnd; ++inItr)
{
const Vertex v = source(*inItr, g);
// To deal with unreachable nodes
if (get(dfnumMap, v) < 0 || get(dfnumMap, v) >= numOfVertices_)
continue;
Vertex s2;
if (get(dfnumMap, v) <= get(dfnumMap, n))
s2 = v;
else
s2 = get(semiMap_, ancestor_with_lowest_semi_(v, dfnumMap));
if (get(dfnumMap, s2) < get(dfnumMap, s))
s = s2;
}
put(semiMap_, n, s);
// 2. Calculation of n's dominator is deferred until
// the path from s to n has been linked into the forest
get(bucketMap_, s).push_back(n);
get(ancestorMap_, n) = p;
get(bestMap_, n) = n;
// 3. Now that the path from p to v has been linked into
// the spanning forest, these lines calculate the dominator of v,
// based on the dominator thm., or else defer the calculation
// until y's dominator is known
// * Dominator thm. : On the spanning-tree path below semi(n) and
// above or including n, let y be the node
// with the smallest-numbered semidominator. Then,
//
// idom(n) = semi(n) if semi(y)=semi(n) or
// idom(y) if semi(y) != semi(n)
typename std::deque<Vertex>::iterator buckItr;
for (buckItr = get(bucketMap_, p).begin();
buckItr != get(bucketMap_, p).end();
++buckItr)
{
const Vertex v(*buckItr);
const Vertex y(ancestor_with_lowest_semi_(v, dfnumMap));
if (get(semiMap_, y) == get(semiMap_, v))
put(domTreePredMap_, v, p);
else
put(samedomMap, v, y);
}
get(bucketMap_, p).clear();
}
protected :
/**
* Evaluate function in Tarjan's path compression
*/
const Vertex
ancestor_with_lowest_semi_(const Vertex& v, const TimeMap& dfnumMap)
{
const Vertex a(get(ancestorMap_, v));
if (get(ancestorMap_, a) != graph_traits<Graph>::null_vertex())
{
const Vertex b(ancestor_with_lowest_semi_(a, dfnumMap));
put(ancestorMap_, v, get(ancestorMap_, a));
if (get(dfnumMap, get(semiMap_, b)) <
get(dfnumMap, get(semiMap_, get(bestMap_, v))))
put(bestMap_, v, b);
}
return get(bestMap_, v);
}
std::vector<Vertex> semi_, ancestor_, samedom_, best_;
PredMap semiMap_, ancestorMap_, bestMap_;
std::vector< std::deque<Vertex> > buckets_;
iterator_property_map<typename std::vector<std::deque<Vertex> >::iterator,
IndexMap> bucketMap_;
const Vertex& entry_;
DomTreePredMap domTreePredMap_;
const VerticesSizeType numOfVertices_;
public :
PredMap samedomMap;
};
} // namespace detail
/**
* @brief Build dominator tree using Lengauer-Tarjan algorithm.
* It takes O((V+E)log(V+E)) time.
*
* @pre dfnumMap, parentMap and verticesByDFNum have dfs results corresponding
* indexMap.
* If dfs has already run before,
* this function would be good for saving computations.
* @pre Unreachable nodes must be masked as
* graph_traits<Graph>::null_vertex in parentMap.
* @pre Unreachable nodes must be masked as
* (std::numeric_limits<VerticesSizeType>::max)() in dfnumMap.
*
* @param domTreePredMap [out] : immediate dominator map (parent map
* in dom. tree)
*
* @note reference Appel. p. 452~453. algorithm 19.9, 19.10.
*
* @todo : Optimization in Finding Dominators in Practice, Loukas Georgiadis
*/
template<class Graph, class IndexMap, class TimeMap, class PredMap,
class VertexVector, class DomTreePredMap>
void
lengauer_tarjan_dominator_tree_without_dfs
(const Graph& g,
const typename graph_traits<Graph>::vertex_descriptor& entry,
const IndexMap& indexMap,
TimeMap dfnumMap, PredMap parentMap, VertexVector& verticesByDFNum,
DomTreePredMap domTreePredMap)
{
// Typedefs and concept check
typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept<Graph> ));
const VerticesSizeType numOfVertices = num_vertices(g);
if (numOfVertices == 0) return;
// 1. Visit each vertex in reverse post order and calculate sdom.
detail::dominator_visitor<Graph, IndexMap, TimeMap, PredMap, DomTreePredMap>
visitor(g, entry, indexMap, domTreePredMap);
VerticesSizeType i;
for (i = 0; i < numOfVertices; ++i)
{
const Vertex u(verticesByDFNum[numOfVertices - 1 - i]);
if (u != graph_traits<Graph>::null_vertex())
visitor(u, dfnumMap, parentMap, g);
}
// 2. Now all the deferred dominator calculations,
// based on the second clause of the dominator thm., are performed
for (i = 0; i < numOfVertices; ++i)
{
const Vertex n(verticesByDFNum[i]);
if (n == entry || n == graph_traits<Graph>::null_vertex())
continue;
Vertex u = get(visitor.samedomMap, n);
if (u != graph_traits<Graph>::null_vertex())
{
put(domTreePredMap, n, get(domTreePredMap, u));
}
}
}
/**
* Unlike lengauer_tarjan_dominator_tree_without_dfs,
* dfs is run in this function and
* the result is written to dfnumMap, parentMap, vertices.
*
* If the result of dfs required after this algorithm,
* this function can eliminate the need of rerunning dfs.
*/
template<class Graph, class IndexMap, class TimeMap, class PredMap,
class VertexVector, class DomTreePredMap>
void
lengauer_tarjan_dominator_tree
(const Graph& g,
const typename graph_traits<Graph>::vertex_descriptor& entry,
const IndexMap& indexMap,
TimeMap dfnumMap, PredMap parentMap, VertexVector& verticesByDFNum,
DomTreePredMap domTreePredMap)
{
// Typedefs and concept check
typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept<Graph> ));
// 1. Depth first visit
const VerticesSizeType numOfVertices = num_vertices(g);
if (numOfVertices == 0) return;
VerticesSizeType time =
(std::numeric_limits<VerticesSizeType>::max)();
std::vector<default_color_type>
colors(numOfVertices, color_traits<default_color_type>::white());
depth_first_visit
(g, entry,
make_dfs_visitor
(make_pair(record_predecessors(parentMap, on_tree_edge()),
detail::stamp_times_with_vertex_vector
(dfnumMap, verticesByDFNum, time, on_discover_vertex()))),
make_iterator_property_map(colors.begin(), indexMap));
// 2. Run main algorithm.
lengauer_tarjan_dominator_tree_without_dfs(g, entry, indexMap, dfnumMap,
parentMap, verticesByDFNum,
domTreePredMap);
}
/**
* Use vertex_index as IndexMap and make dfnumMap, parentMap, verticesByDFNum
* internally.
* If we don't need the result of dfs (dfnumMap, parentMap, verticesByDFNum),
* this function would be more convenient one.
*/
template<class Graph, class DomTreePredMap>
void
lengauer_tarjan_dominator_tree
(const Graph& g,
const typename graph_traits<Graph>::vertex_descriptor& entry,
DomTreePredMap domTreePredMap)
{
// typedefs
typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
typedef typename property_map<Graph, vertex_index_t>::const_type IndexMap;
typedef
iterator_property_map<typename std::vector<VerticesSizeType>::iterator,
IndexMap> TimeMap;
typedef
iterator_property_map<typename std::vector<Vertex>::iterator, IndexMap>
PredMap;
// Make property maps
const VerticesSizeType numOfVertices = num_vertices(g);
if (numOfVertices == 0) return;
const IndexMap indexMap = get(vertex_index, g);
std::vector<VerticesSizeType> dfnum(numOfVertices, 0);
TimeMap dfnumMap(make_iterator_property_map(dfnum.begin(), indexMap));
std::vector<Vertex> parent(numOfVertices,
graph_traits<Graph>::null_vertex());
PredMap parentMap(make_iterator_property_map(parent.begin(), indexMap));
std::vector<Vertex> verticesByDFNum(parent);
// Run main algorithm
lengauer_tarjan_dominator_tree(g, entry,
indexMap, dfnumMap, parentMap,
verticesByDFNum, domTreePredMap);
}
/**
* Muchnick. p. 182, 184
*
* using iterative bit vector analysis
*/
template<class Graph, class IndexMap, class DomTreePredMap>
void
iterative_bit_vector_dominator_tree
(const Graph& g,
const typename graph_traits<Graph>::vertex_descriptor& entry,
const IndexMap& indexMap,
DomTreePredMap domTreePredMap)
{
typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
typedef typename graph_traits<Graph>::vertex_iterator vertexItr;
typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
typedef
iterator_property_map<typename std::vector< std::set<Vertex> >::iterator,
IndexMap> vertexSetMap;
BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept<Graph> ));
// 1. Finding dominator
// 1.1. Initialize
const VerticesSizeType numOfVertices = num_vertices(g);
if (numOfVertices == 0) return;
vertexItr vi, viend;
boost::tie(vi, viend) = vertices(g);
const std::set<Vertex> N(vi, viend);
bool change = true;
std::vector< std::set<Vertex> > dom(numOfVertices, N);
vertexSetMap domMap(make_iterator_property_map(dom.begin(), indexMap));
get(domMap, entry).clear();
get(domMap, entry).insert(entry);
while (change)
{
change = false;
for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
{
if (*vi == entry) continue;
std::set<Vertex> T(N);
typename graph_traits<Graph>::in_edge_iterator inItr, inEnd;
for (boost::tie(inItr, inEnd) = in_edges(*vi, g); inItr != inEnd; ++inItr)
{
const Vertex p = source(*inItr, g);
std::set<Vertex> tempSet;
std::set_intersection(T.begin(), T.end(),
get(domMap, p).begin(),
get(domMap, p).end(),
std::inserter(tempSet, tempSet.begin()));
T.swap(tempSet);
}
T.insert(*vi);
if (T != get(domMap, *vi))
{
change = true;
get(domMap, *vi).swap(T);
}
} // end of for (boost::tie(vi, viend) = vertices(g)
} // end of while(change)
// 2. Build dominator tree
for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
get(domMap, *vi).erase(*vi);
Graph domTree(numOfVertices);
for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
{
if (*vi == entry) continue;
// We have to iterate through copied dominator set
const std::set<Vertex> tempSet(get(domMap, *vi));
typename std::set<Vertex>::const_iterator s;
for (s = tempSet.begin(); s != tempSet.end(); ++s)
{
typename std::set<Vertex>::iterator t;
for (t = get(domMap, *vi).begin(); t != get(domMap, *vi).end(); )
{
typename std::set<Vertex>::iterator old_t = t;
++t; // Done early because t may become invalid
if (*old_t == *s) continue;
if (get(domMap, *s).find(*old_t) != get(domMap, *s).end())
get(domMap, *vi).erase(old_t);
}
}
}
for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
{
if (*vi != entry && get(domMap, *vi).size() == 1)
{
Vertex temp = *get(domMap, *vi).begin();
put(domTreePredMap, *vi, temp);
}
}
}
template<class Graph, class DomTreePredMap>
void
iterative_bit_vector_dominator_tree
(const Graph& g,
const typename graph_traits<Graph>::vertex_descriptor& entry,
DomTreePredMap domTreePredMap)
{
typename property_map<Graph, vertex_index_t>::const_type
indexMap = get(vertex_index, g);
iterative_bit_vector_dominator_tree(g, entry, indexMap, domTreePredMap);
}
} // namespace boost
#endif // BOOST_GRAPH_DOMINATOR_HPP

10
libhs.pc.in Normal file
View File

@ -0,0 +1,10 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=@CMAKE_INSTALL_PREFIX@
libdir=@CMAKE_INSTALL_PREFIX@/lib
includedir=@CMAKE_INSTALL_PREFIX@/include
Name: libhs
Description: Intel(R) Hyperscan Library
Version: @HS_VERSION@
Libs: -L${libdir} -lhs
Cflags: -I${includedir}/hs

109
src/alloc.c Normal file
View File

@ -0,0 +1,109 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Runtime functions for setting custom allocators.
*/
#include <stdlib.h>
#include <string.h>
#include "allocator.h"
#define default_malloc malloc
#define default_free free
hs_alloc_t hs_database_alloc = default_malloc;
hs_alloc_t hs_misc_alloc = default_malloc;
hs_alloc_t hs_scratch_alloc = default_malloc;
hs_alloc_t hs_stream_alloc = default_malloc;
hs_free_t hs_database_free = default_free;
hs_free_t hs_misc_free = default_free;
hs_free_t hs_scratch_free = default_free;
hs_free_t hs_stream_free = default_free;
static
hs_alloc_t normalise_alloc(hs_alloc_t a) {
if (!a) {
return default_malloc;
} else {
return a;
}
}
static
hs_free_t normalise_free(hs_free_t f) {
if (!f) {
return default_free;
} else {
return f;
}
}
HS_PUBLIC_API
hs_error_t hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
hs_set_database_allocator(allocfunc, freefunc);
hs_set_misc_allocator(allocfunc, freefunc);
hs_set_stream_allocator(allocfunc, freefunc);
hs_set_scratch_allocator(allocfunc, freefunc);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_set_database_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
hs_database_alloc = normalise_alloc(allocfunc);
hs_database_free = normalise_free(freefunc);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_set_misc_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
hs_misc_alloc = normalise_alloc(allocfunc);
hs_misc_free = normalise_free(freefunc);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_set_scratch_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
hs_scratch_alloc = normalise_alloc(allocfunc);
hs_scratch_free = normalise_free(freefunc);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_set_stream_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
hs_stream_alloc = normalise_alloc(allocfunc);
hs_stream_free = normalise_free(freefunc);
return HS_SUCCESS;
}

66
src/allocator.h Normal file
View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ALLOCATOR_H
#define ALLOCATOR_H
#include "hs_common.h"
#include "ue2common.h"
#ifdef __cplusplus
extern "C"
{
#endif
extern hs_alloc_t hs_database_alloc;
extern hs_alloc_t hs_misc_alloc;
extern hs_alloc_t hs_scratch_alloc;
extern hs_alloc_t hs_stream_alloc;
extern hs_free_t hs_database_free;
extern hs_free_t hs_misc_free;
extern hs_free_t hs_scratch_free;
extern hs_free_t hs_stream_free;
#ifdef __cplusplus
} /* extern C */
#endif
/** \brief Check the results of an alloc done with hs_alloc for alignment.
*
* If we have incorrect alignment, return an error. Caller should free the
* offending block. */
static really_inline
hs_error_t hs_check_alloc(const void *mem) {
hs_error_t ret = HS_SUCCESS;
if (!mem) {
ret = HS_NOMEM;
} else if (!ISALIGNED_N(mem, alignof(unsigned long long))) {
ret = HS_BAD_ALLOC;
}
return ret;
}
#endif

310
src/compiler/asserts.cpp Normal file
View File

@ -0,0 +1,310 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Convert temporary assert vertices (from construction method) to
* edge-based flags.
*
* This pass converts the temporary assert vertices created by the Glushkov
* construction process above (vertices with special assertions flags) into
* edges between those vertices' neighbours in the graph.
*
* These edges have the appropriate flags applied to them -- a path (u,t,v)
* through an assert vertex t will be replaced with the edge (u,v) with the
* assertion flags from t.
*
* Edges with mutually incompatible flags (such as the conjunction of
* word-to-word and word-to-nonword) are dropped.
*/
#include "asserts.h"
#include "nfagraph/ng.h"
#include "nfagraph/ng_prune.h"
#include "nfagraph/ng_redundancy.h"
#include "nfagraph/ng_util.h"
#include "parser/position.h" // for POS flags
#include "util/compile_error.h"
#include "util/graph_range.h"
#include <queue>
#include <set>
using namespace std;
namespace ue2 {
/** Hard limit on the maximum number of edges we'll clone before we throw up
* our hands and report 'Pattern too large.' */
static const size_t MAX_ASSERT_EDGES = 300000;
/** Flags representing the word-boundary assertions, \\b or \\B. */
static const int WORDBOUNDARY_FLAGS = POS_FLAG_ASSERT_WORD_TO_WORD
| POS_FLAG_ASSERT_WORD_TO_NONWORD
| POS_FLAG_ASSERT_NONWORD_TO_WORD
| POS_FLAG_ASSERT_NONWORD_TO_NONWORD
| POS_FLAG_ASSERT_WORD_TO_WORD_UCP
| POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP
| POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP
| POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP;
#define OPEN_EDGE 0U
#define DEAD_EDGE (~0U)
static
u32 disjunct(u32 flags1, u32 flags2) {
/* from two asserts in parallel */
DEBUG_PRINTF("disjunct %x %x\n", flags1, flags2);
u32 rv;
if (flags1 == DEAD_EDGE) {
rv = flags2;
} else if (flags2 == DEAD_EDGE) {
rv = flags1;
} else if (flags1 == OPEN_EDGE || flags2 == OPEN_EDGE) {
rv = OPEN_EDGE;
} else {
rv = flags1 | flags2;
}
DEBUG_PRINTF("--> %x\n", rv);
return rv;
}
static
u32 conjunct(u32 flags1, u32 flags2) {
/* from two asserts in series */
DEBUG_PRINTF("conjunct %x %x\n", flags1, flags2);
u32 rv;
if (flags1 == OPEN_EDGE) {
rv = flags2;
} else if (flags2 == OPEN_EDGE) {
rv = flags1;
} else if (flags1 & flags2) {
rv = flags1 & flags2;
} else {
rv = DEAD_EDGE; /* the conjunction of two different word boundary
* assertion is impassable */
}
DEBUG_PRINTF("--> %x\n", rv);
return rv;
}
typedef map<pair<NFAVertex, NFAVertex>, NFAEdge> edge_cache_t;
static
void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
u32 &assert_edge_count) {
DEBUG_PRINTF("replacing assert vertex %u\n", g[t].index);
const u32 flags = g[t].assert_flags;
DEBUG_PRINTF("consider assert vertex %u with flags %u\n",
g[t].index, flags);
// Wire up all the predecessors to all the successors.
for (const auto &inEdge : in_edges_range(t, g)) {
NFAVertex u = source(inEdge, g);
if (u == t) {
continue; // ignore self-loops
}
const u32 flags_inc_in = conjunct(g[inEdge].assert_flags,
flags);
if (flags_inc_in == DEAD_EDGE) {
DEBUG_PRINTF("fail, in-edge has bad flags %d\n",
g[inEdge].assert_flags);
continue;
}
for (const auto &outEdge : out_edges_range(t, g)) {
NFAVertex v = target(outEdge, g);
DEBUG_PRINTF("consider path [%u,%u,%u]\n", g[u].index,
g[t].index, g[v].index);
if (v == t) {
continue; // ignore self-loops
}
const u32 flags_final = conjunct(g[outEdge].assert_flags,
flags_inc_in);
if (flags_final == DEAD_EDGE) {
DEBUG_PRINTF("fail, out-edge has bad flags %d\n",
g[outEdge].assert_flags);
continue;
}
if ((g[u].assert_flags & POS_FLAG_MULTILINE_START)
&& v == g.acceptEod) {
DEBUG_PRINTF("fail, (?m)^ does not match \\n at eod\n");
continue;
}
/* Replace path (u,t,v) with direct edge (u,v), unless the edge
* already exists, in which case we just need to edit its
* properties.
*
* Use edge_cache to prevent us going O(N).
*/
auto cache_key = make_pair(u, v);
auto ecit = edge_cache.find(cache_key);
if (ecit == edge_cache.end()) {
DEBUG_PRINTF("adding edge %u %u\n", g[u].index,
g[v].index);
NFAEdge e = add_edge(u, v, g).first;
edge_cache.emplace(cache_key, e);
g[e].assert_flags = flags;
if (++assert_edge_count > MAX_ASSERT_EDGES) {
throw CompileError(g.expressionIndex,
"Pattern is too large.");
}
} else {
NFAEdge e = ecit->second;
DEBUG_PRINTF("updating edge %u %u [a %u]\n", g[u].index,
g[v].index, g[t].index);
// Edge already exists.
u32 &e_flags = g[e].assert_flags;
e_flags = disjunct(e_flags, flags_final);
assert(e_flags != DEAD_EDGE);
}
}
}
// Clear vertex t to remove all the old edges.
/* no need to clear the cache, as we will never look up its edge as it is
* unreachable */
clear_vertex(t, g);
}
static
void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) {
// Don't try and set the report ID of a special vertex.
assert(!is_special(v, g));
// There should be no reports set already.
assert(g[v].reports.empty());
Report r = rm.getBasicInternalReport(g, adj);
g[v].reports.insert(rm.getInternalId(r));
DEBUG_PRINTF("set report id for vertex %u, adj %d\n",
g[v].index, adj);
}
static
void checkForMultilineStart(ReportManager &rm, NGWrapper &g) {
vector<NFAEdge> dead;
for (auto v : adjacent_vertices_range(g.start, g)) {
if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) {
continue;
}
DEBUG_PRINTF("mls %u %08x\n", g[v].index,
g[v].assert_flags);
/* we have found a multi-line start (maybe more than one) */
/* we need to interpose a dummy dot vertex between v and accept if
* required so that ^ doesn't match trailing \n */
for (const auto &e : out_edges_range(v, g)) {
if (target(e, g) == g.accept) {
dead.push_back(e);
}
}
/* assert has been resolved; clear flag */
g[v].assert_flags &= ~POS_FLAG_MULTILINE_START;
}
for (const auto &e : dead) {
NFAVertex dummy = add_vertex(g);
g[dummy].char_reach.setall();
setReportId(rm, g, dummy, -1);
add_edge(source(e, g), dummy, g[e], g);
add_edge(dummy, g.accept, g);
}
remove_edges(dead, g);
}
static
bool hasAssertVertices(const NGHolder &g) {
for (auto v : vertices_range(g)) {
int flags = g[v].assert_flags;
if (flags & WORDBOUNDARY_FLAGS) {
return true;
}
}
return false;
}
/** \brief Convert temporary assert vertices (from construction method) to
* edge-based flags.
*
* Remove the horrors that are the temporary assert vertices which arise from
* our construction method. Allows the rest of our code base to live in
* blissful ignorance of their existence. */
void removeAssertVertices(ReportManager &rm, NGWrapper &g) {
size_t num = 0;
DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g));
// Sweep over the graph and ascertain that we do actually have vertices
// with assertion flags set. Otherwise, we're done.
if (!hasAssertVertices(g)) {
DEBUG_PRINTF("no assert vertices, done\n");
return;
}
u32 assert_edge_count = 0;
// Build a cache of (u, v) vertex pairs to edge descriptors.
edge_cache_t edge_cache;
for (const auto &e : edges_range(g)) {
edge_cache[make_pair(source(e, g), target(e, g))] = e;
}
for (auto v : vertices_range(g)) {
if (g[v].assert_flags & WORDBOUNDARY_FLAGS) {
replaceAssertVertex(g, v, edge_cache, assert_edge_count);
num++;
}
}
checkForMultilineStart(rm, g);
if (num) {
DEBUG_PRINTF("resolved %zu assert vertices\n", num);
pruneUseless(g);
pruneEmptyVertices(g);
g.renumberVertices();
g.renumberEdges();
}
DEBUG_PRINTF("after: graph has %zu vertices\n", num_vertices(g));
assert(!hasAssertVertices(g));
}
} // namespace ue2

51
src/compiler/asserts.h Normal file
View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Convert temporary assert vertices (from construction method) to
* edge-based flags.
*/
#ifndef ASSERTS_H
#define ASSERTS_H
namespace ue2 {
class ReportManager;
class NGWrapper;
/** \brief Convert temporary assert vertices (from construction method) to
* edge-based flags.
*
* Remove the horrors that are the temporary assert vertices which arise from
* our construction method. Allows the rest of our code base to live in
* blissful ignorance of their existence. */
void removeAssertVertices(ReportManager &rm, NGWrapper &g);
} // namespace ue2
#endif // ASSERTS_H

459
src/compiler/compiler.cpp Normal file
View File

@ -0,0 +1,459 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Compiler front-end interface.
*/
#include "asserts.h"
#include "compiler.h"
#include "database.h"
#include "grey.h"
#include "hs_internal.h"
#include "hs_runtime.h"
#include "ue2common.h"
#include "nfagraph/ng_builder.h"
#include "nfagraph/ng_dump.h"
#include "nfagraph/ng.h"
#include "nfagraph/ng_util.h"
#include "parser/buildstate.h"
#include "parser/dump.h"
#include "parser/Component.h"
#include "parser/parse_error.h"
#include "parser/Parser.h" // for flags
#include "parser/position.h"
#include "parser/position_dump.h"
#include "parser/position_info.h"
#include "parser/prefilter.h"
#include "parser/shortcut_literal.h"
#include "parser/unsupported.h"
#include "parser/utf8_validate.h"
#include "smallwrite/smallwrite_build.h"
#include "rose/rose_build.h"
#include "rose/rose_build_dump.h"
#include "som/slot_manager_dump.h"
#include "util/alloc.h"
#include "util/compile_error.h"
#include "util/target_info.h"
#include "util/verify_types.h"
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <memory>
#include <sstream>
using namespace std;
namespace ue2 {
static
void validateExt(const hs_expr_ext &ext) {
static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET |
HS_EXT_FLAG_MAX_OFFSET |
HS_EXT_FLAG_MIN_LENGTH;
if (ext.flags & ~ALL_EXT_FLAGS) {
throw CompileError("Invalid hs_expr_ext flag set.");
}
if ((ext.flags & HS_EXT_FLAG_MIN_OFFSET) &&
(ext.flags & HS_EXT_FLAG_MAX_OFFSET) &&
(ext.min_offset > ext.max_offset)) {
throw CompileError("In hs_expr_ext, min_offset must be less than or "
"equal to max_offset.");
}
if ((ext.flags & HS_EXT_FLAG_MIN_LENGTH) &&
(ext.flags & HS_EXT_FLAG_MAX_OFFSET) &&
(ext.min_length > ext.max_offset)) {
throw CompileError("In hs_expr_ext, min_length must be less than or "
"equal to max_offset.");
}
}
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
unsigned flags, ReportID actionId,
const hs_expr_ext *ext)
: utf8(false),
allow_vacuous(flags & HS_FLAG_ALLOWEMPTY),
highlander(flags & HS_FLAG_SINGLEMATCH),
prefilter(flags & HS_FLAG_PREFILTER),
som(SOM_NONE),
index(index_in),
id(actionId),
min_offset(0),
max_offset(MAX_OFFSET),
min_length(0) {
ParseMode mode(flags);
component = parse(expression, mode);
utf8 = mode.utf8; /* utf8 may be set by parse() */
if (utf8 && !isValidUtf8(expression)) {
throw ParseError("Expression is not valid UTF-8.");
}
if (!component) {
assert(0); // parse() should have thrown a ParseError.
throw ParseError("Parse error.");
}
if (flags & ~HS_FLAG_ALL) {
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
throw CompileError("Unrecognised flag.");
}
// FIXME: we disallow highlander + SOM, see UE-1850.
if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) {
throw CompileError("HS_FLAG_SINGLEMATCH is not supported in "
"combination with HS_FLAG_SOM_LEFTMOST.");
}
// FIXME: we disallow prefilter + SOM, see UE-1899.
if ((flags & HS_FLAG_PREFILTER) && (flags & HS_FLAG_SOM_LEFTMOST)) {
throw CompileError("HS_FLAG_PREFILTER is not supported in "
"combination with HS_FLAG_SOM_LEFTMOST.");
}
// Set SOM type.
if (flags & HS_FLAG_SOM_LEFTMOST) {
som = SOM_LEFT;
}
// Set extended parameters, if we have them.
if (ext) {
// Ensure that the given parameters make sense.
validateExt(*ext);
if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
min_offset = ext->min_offset;
}
if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
max_offset = ext->max_offset;
}
if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) {
min_length = ext->min_length;
}
}
// These are validated in validateExt, so an error will already have been
// thrown if these conditions don't hold.
assert(max_offset >= min_offset);
assert(max_offset >= min_length);
// Since prefiltering and SOM aren't supported together, we must squash any
// min_length constraint as well.
if (flags & HS_FLAG_PREFILTER && min_length) {
DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n");
min_length = 0;
}
}
#if defined(DUMP_SUPPORT) || defined(DEBUG)
/**
* \brief Dumps the parse tree to screen in debug mode and to disk in dump
* mode.
*/
void dumpExpression(UNUSED const ParsedExpression &expr,
UNUSED const char *stage, UNUSED const Grey &grey) {
#if defined(DEBUG)
DEBUG_PRINTF("===== Rule ID: %u (internalID: %u) =====\n", expr.id,
expr.index);
ostringstream debug_tree;
dumpTree(debug_tree, expr.component.get());
printf("%s\n", debug_tree.str().c_str());
#endif // DEBUG
#if defined(DUMP_SUPPORT)
if (grey.dumpFlags & Grey::DUMP_PARSE) {
stringstream ss;
ss << grey.dumpPath << "Expr_" << expr.index << "_componenttree_"
<< stage << ".txt";
ofstream out(ss.str().c_str());
out << "Component Tree for " << expr.id << endl;
dumpTree(out, expr.component.get());
if (expr.utf8) {
out << "UTF8 mode" << endl;
}
}
#endif // DEBUG
}
#endif
/** \brief Run Component tree optimisations on \a expr. */
static
void optimise(ParsedExpression &expr) {
if (expr.min_length || expr.som) {
return;
}
DEBUG_PRINTF("optimising\n");
expr.component->optimise(true /* root is connected to sds */);
}
void addExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID id) {
assert(expression);
const CompileContext &cc = ng.cc;
DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s'\n", index, id, flags,
expression);
// Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) {
throw CompileError("Pattern length exceeds limit.");
}
// Do per-expression processing: errors here will result in an exception
// being thrown up to our caller
ParsedExpression expr(index, expression, flags, id, ext);
dumpExpression(expr, "orig", cc.grey);
// Apply prefiltering transformations if desired.
if (expr.prefilter) {
prefilterTree(expr.component, ParseMode(flags));
dumpExpression(expr, "prefiltered", cc.grey);
}
// Expressions containing zero-width assertions and other extended pcre
// types aren't supported yet. This call will throw a ParseError exception
// if the component tree contains such a construct.
checkUnsupported(*expr.component);
expr.component->checkEmbeddedStartAnchor(true);
expr.component->checkEmbeddedEndAnchor(true);
if (cc.grey.optimiseComponentTree) {
optimise(expr);
dumpExpression(expr, "opt", cc.grey);
}
DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n",
expr.component.get(), expr.index, expr.id);
// You can only use the SOM flags if you've also specified an SOM
// precision mode.
if (expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
throw CompileError("To use a SOM expression flag in streaming mode, "
"an SOM precision mode (e.g. "
"HS_MODE_SOM_HORIZON_LARGE) must be specified.");
}
// If this expression is a literal, we can feed it directly to Rose rather
// than building the NFA graph.
if (shortcutLiteral(ng, expr)) {
DEBUG_PRINTF("took literal short cut\n");
return;
}
unique_ptr<NGWrapper> g = buildWrapper(ng.rm, cc, expr);
if (!g) {
DEBUG_PRINTF("NFA build failed on ID %u, but no exception was "
"thrown.\n", expr.id);
throw CompileError("Internal error.");
}
if (!expr.allow_vacuous && matches_everywhere(*g)) {
throw CompileError("Pattern matches empty buffer; use "
"HS_FLAG_ALLOWEMPTY to enable support.");
}
if (!ng.addGraph(*g)) {
DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", expr.id);
throw CompileError("Error compiling expression.");
}
}
static
aligned_unique_ptr<RoseEngine> generateRoseEngine(NG &ng) {
const u32 minWidth =
ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF;
auto rose = ng.rose->buildRose(minWidth);
if (!rose) {
DEBUG_PRINTF("error building rose\n");
assert(0);
return nullptr;
}
/* avoid building a smwr if just a pure floating case. */
if (!roseIsPureLiteral(rose.get())) {
u32 qual = roseQuality(rose.get());
auto smwr = ng.smwr->build(qual);
if (smwr) {
rose = roseAddSmallWrite(rose.get(), smwr.get());
}
}
dumpRose(*ng.rose, rose.get(), ng.cc.grey);
dumpReportManager(ng.rm, ng.cc.grey);
dumpSomSlotManager(ng.ssm, ng.cc.grey);
dumpSmallWrite(rose.get(), ng.cc.grey);
return rose;
}
platform_t target_to_platform(const target_t &target_info) {
platform_t p;
p = 0;
if (!target_info.has_avx2()) {
p |= HS_PLATFORM_NOAVX2;
}
return p;
}
struct hs_database *build(NG &ng, unsigned int *length) {
assert(length);
auto rose = generateRoseEngine(ng);
if (!rose) {
throw CompileError("Unable to generate bytecode.");
}
*length = roseSize(rose.get());
if (!*length) {
DEBUG_PRINTF("RoseEngine has zero length\n");
assert(0);
throw CompileError("Internal error.");
}
const char *bytecode = (const char *)(rose.get());
const platform_t p = target_to_platform(ng.cc.target_info);
struct hs_database *db = dbCreate(bytecode, *length, p);
if (!db) {
throw CompileError("Could not allocate memory for bytecode.");
}
return db;
}
static
void stripFromPositions(vector<PositionInfo> &v, Position pos) {
auto removed = remove(v.begin(), v.end(), PositionInfo(pos));
v.erase(removed, v.end());
}
static
void connectInitialStates(GlushkovBuildState &bs,
const ParsedExpression &expr) {
vector<PositionInfo> initials = expr.component->first();
const NFABuilder &builder = bs.getBuilder();
const Position startState = builder.getStart();
const Position startDotStarState = builder.getStartDotStar();
DEBUG_PRINTF("wiring initials = %s\n",
dumpPositions(initials.begin(), initials.end()).c_str());
vector<PositionInfo> starts = {startState, startDotStarState};
// strip start and startDs, which can be present due to boundaries
stripFromPositions(initials, startState);
stripFromPositions(initials, startDotStarState);
// replace epsilons with accepts
for (const auto &s : initials) {
if (s.pos != GlushkovBuildState::POS_EPSILON) {
continue;
}
assert(starts.size() == 2); /* start, startds */
vector<PositionInfo> starts_temp = starts;
starts_temp[0].flags = s.flags;
starts_temp[1].flags = s.flags;
bs.connectAccepts(starts_temp);
}
if (!initials.empty()) {
bs.connectRegions(starts, initials);
}
}
static
void connectFinalStates(GlushkovBuildState &bs, const ParsedExpression &expr) {
vector<PositionInfo> finals = expr.component->last();
DEBUG_PRINTF("wiring finals = %s\n",
dumpPositions(finals.begin(), finals.end()).c_str());
bs.connectAccepts(finals);
}
#ifndef NDEBUG
static
bool isSupported(const Component &c) {
try {
checkUnsupported(c);
return true;
}
catch (ParseError &) {
return false;
}
}
#endif
unique_ptr<NGWrapper> buildWrapper(ReportManager &rm, const CompileContext &cc,
const ParsedExpression &expr) {
assert(isSupported(*expr.component));
const unique_ptr<NFABuilder> builder = makeNFABuilder(rm, cc, expr);
assert(builder);
// Set up START and ACCEPT states; retrieve the special states
const auto bs = makeGlushkovBuildState(*builder, expr.prefilter);
// Map position IDs to characters/components
expr.component->notePositions(*bs);
// Wire the start dotstar state to the firsts
connectInitialStates(*bs, expr);
DEBUG_PRINTF("wire up body of expr\n");
// Build the rest of the FOLLOW set
vector<PositionInfo> initials = {builder->getStartDotStar(),
builder->getStart()};
expr.component->buildFollowSet(*bs, initials);
// Wire the lasts to the accept state
connectFinalStates(*bs, expr);
// Create our edges
bs->buildEdges();
auto g = builder->getGraph();
assert(g);
dumpDotWrapper(*g, "00_before_asserts", cc.grey);
removeAssertVertices(rm, *g);
return g;
}
} // namespace ue2

152
src/compiler/compiler.h Normal file
View File

@ -0,0 +1,152 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Compiler front-end interface
*/
#ifndef COMPILER_H
#define COMPILER_H
#include "ue2common.h"
#include "database.h"
#include "parser/Component.h"
#include "som/som.h"
#include <memory>
#include <boost/core/noncopyable.hpp>
struct hs_database;
struct hs_expr_ext;
namespace ue2 {
struct CompileContext;
struct Grey;
struct target_t;
class NG;
class ReportManager;
class NGWrapper;
/** Class gathering together the pieces of a parsed expression.
* Note: Owns the provided component.
*/
class ParsedExpression : boost::noncopyable {
public:
ParsedExpression(unsigned index, const char *expression, unsigned flags,
ReportID actionId, const hs_expr_ext *ext = nullptr);
bool utf8; //!< UTF-8 mode flag specified
/** \brief root node of parsed component tree. */
std::unique_ptr<ue2::Component> component;
const bool allow_vacuous; //!< HS_FLAG_ALLOWEMPTY specified
const bool highlander; //!< HS_FLAG_SINGLEMATCH specified
const bool prefilter; //!< HS_FLAG_PREFILTER specified
som_type som; //!< chosen SOM mode, or SOM_NONE
/** \brief index in expressions array passed to \ref hs_compile_multi */
const unsigned index;
const ReportID id; //!< user-specified pattern ID
u64a min_offset; //!< 0 if not used
u64a max_offset; //!< MAX_OFFSET if not used
u64a min_length; //!< 0 if not used
};
/**
* Add an expression to the compiler.
*
* @param ng
* The global NG object.
* @param index
* The index of the expression (used for errors)
* @param expression
* NULL-terminated PCRE expression
* @param flags
* The full set of Hyperscan flags associated with this rule.
* @param ext
* Struct containing extra parameters for this expression, or NULL if
* none.
* @param actionId
* The identifier to associate with the expression; returned by engine on
* match.
*/
void addExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID actionId);
/**
* Build a Hyperscan database out of the expressions we've been given. A
* fatal error will result in an exception being thrown.
*
* @param ng
* The global NG object.
* @param[out] length
* The number of bytes occupied by the compiled structure.
* @return
* The compiled structure. Should be deallocated with the
* hs_database_free() function.
*/
struct hs_database *build(NG &ng, unsigned int *length);
/**
* Constructs an NFA graph from the given expression tree.
*
* @param rm
* Global ReportManager for this compile.
* @param cc
* Global compile context for this compile.
* @param expr
* ParsedExpression object.
* @return
* nullptr on error.
*/
std::unique_ptr<NGWrapper> buildWrapper(ReportManager &rm,
const CompileContext &cc,
const ParsedExpression &expr);
/**
* Build a platform_t out of a target_t.
*/
platform_t target_to_platform(const target_t &target_info);
#if defined(DUMP_SUPPORT) || defined(DEBUG)
void dumpExpression(const ParsedExpression &expr, const char *stage,
const Grey &grey);
#else
static really_inline
void dumpExpression(UNUSED const ParsedExpression &expr,
UNUSED const char *stage, UNUSED const Grey &grey) {
}
#endif
} // namespace
#endif // COMPILER_H

95
src/compiler/error.cpp Normal file
View File

@ -0,0 +1,95 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Compile-time error utils.
*/
#include "allocator.h"
#include "error.h"
#include "ue2common.h"
#include "hs_compile.h"
#include "util/compile_error.h"
#include <cstring>
#include <string>
using std::string;
static const char failureNoMemory[] = "Unable to allocate memory.";
static const char failureInternal[] = "Internal error.";
extern const hs_compile_error_t hs_enomem = {
const_cast<char *>(failureNoMemory), 0
};
extern const hs_compile_error_t hs_einternal = {
const_cast<char *>(failureInternal), 0
};
namespace ue2 {
hs_compile_error_t *generateCompileError(const string &err, int expression) {
hs_compile_error_t *ret =
(struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t));
if (ret) {
char *msg = (char *)hs_misc_alloc(err.size() + 1);
if (msg) {
memcpy(msg, err.c_str(), err.size() + 1);
ret->message = msg;
} else {
hs_misc_free(ret);
ret = nullptr;
}
}
if (!ret || !ret->message) {
return const_cast<hs_compile_error_t *>(&hs_enomem);
}
ret->expression = expression;
return ret;
}
hs_compile_error_t *generateCompileError(const CompileError &e) {
return generateCompileError(e.reason, e.hasIndex ? (int)e.index : -1);
}
void freeCompileError(hs_compile_error_t *error) {
if (!error) {
return;
}
if (error == &hs_enomem || error == &hs_einternal) {
// These are not allocated.
return;
}
hs_misc_free(error->message);
hs_misc_free(error);
}
} // namespace ue2

55
src/compiler/error.h Normal file
View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Compile-time error utils.
*/
#ifndef COMPILE_ERROR_H
#define COMPILE_ERROR_H
#include <string>
struct hs_compile_error;
// Special errors that aren't allocated with hs_alloc/hs_free.
extern const hs_compile_error hs_enomem;
extern const hs_compile_error hs_einternal;
namespace ue2 {
class CompileError;
hs_compile_error *generateCompileError(const std::string &err, int expression);
hs_compile_error *generateCompileError(const CompileError &e);
void freeCompileError(hs_compile_error *error);
} // namespace ue2
#endif

652
src/crc32.c Normal file
View File

@ -0,0 +1,652 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "crc32.h"
#include "config.h"
#include "ue2common.h"
#if defined(HAVE_C_X86INTRIN_H)
#include <x86intrin.h>
#elif defined(HAVE_C_INTRIN_H)
#include <intrin.h>
#endif
#ifndef __SSE4_2__
/***
*** What follows is derived from Intel's Slicing-by-8 CRC32 impl, which is BSD
*** licensed and available from http://sourceforge.net/projects/slicing-by-8/
***/
/*
* Copyright (c) 2004-2006 Intel Corporation - All Rights Reserved
*
*
* This software program is licensed subject to the BSD License,
* available at http://www.opensource.org/licenses/bsd-license.html.
*
* Abstract:
*
* Tables for software CRC generation
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o32[256] =
{
0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
};
/*
* end of the CRC lookup table crc_tableil8_o32
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o40[256] =
{
0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945,
0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD,
0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4,
0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C,
0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47,
0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF,
0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6,
0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E,
0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41,
0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9,
0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0,
0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78,
0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43,
0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB,
0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2,
0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A,
0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC,
0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004,
0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D,
0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185,
0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE,
0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306,
0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F,
0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287,
0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8,
0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600,
0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439,
0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781,
0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA,
0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502,
0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B,
0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483
};
/*
* end of the CRC lookup table crc_tableil8_o40
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o48[256] =
{
0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469,
0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC,
0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3,
0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726,
0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D,
0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8,
0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7,
0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32,
0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0,
0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75,
0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A,
0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF,
0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4,
0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161,
0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E,
0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB,
0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A,
0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF,
0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0,
0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065,
0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E,
0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB,
0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4,
0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71,
0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3,
0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36,
0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79,
0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC,
0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7,
0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622,
0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D,
0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8
};
/*
* end of the CRC lookup table crc_tableil8_o48
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o56[256] =
{
0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA,
0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C,
0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7,
0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11,
0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41,
0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7,
0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C,
0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A,
0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D,
0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB,
0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610,
0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6,
0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6,
0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040,
0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B,
0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D,
0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5,
0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213,
0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8,
0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E,
0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E,
0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698,
0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443,
0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5,
0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12,
0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4,
0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F,
0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9,
0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99,
0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F,
0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4,
0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842
};
/*
* end of the CRC lookup table crc_tableil8_o56
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o64[256] =
{
0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44,
0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5,
0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97,
0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406,
0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13,
0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082,
0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0,
0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151,
0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA,
0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B,
0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539,
0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8,
0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD,
0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C,
0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E,
0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF,
0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18,
0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089,
0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB,
0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A,
0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F,
0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE,
0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C,
0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D,
0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6,
0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27,
0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065,
0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4,
0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1,
0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70,
0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532,
0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3
};
/*
* end of the CRC lookup table crc_tableil8_o64
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o72[256] =
{
0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD,
0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2,
0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93,
0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C,
0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20,
0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F,
0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E,
0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201,
0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746,
0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59,
0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778,
0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67,
0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB,
0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4,
0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5,
0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA,
0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B,
0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364,
0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45,
0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A,
0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6,
0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9,
0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8,
0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7,
0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090,
0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F,
0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE,
0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1,
0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D,
0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02,
0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623,
0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C
};
/*
* end of the CRC lookup table crc_tableil8_o72
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o80[256] =
{
0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089,
0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA,
0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F,
0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C,
0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334,
0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67,
0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992,
0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1,
0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3,
0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0,
0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55,
0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006,
0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E,
0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D,
0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8,
0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB,
0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D,
0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E,
0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB,
0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988,
0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0,
0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093,
0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766,
0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35,
0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907,
0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454,
0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1,
0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2,
0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA,
0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9,
0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C,
0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F
};
/*
* end of the CRC lookup table crc_tableil8_o80
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o88[256] =
{
0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504,
0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE,
0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0,
0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A,
0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D,
0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447,
0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929,
0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3,
0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36,
0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC,
0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782,
0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358,
0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF,
0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75,
0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B,
0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1,
0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360,
0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA,
0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4,
0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E,
0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9,
0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223,
0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D,
0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97,
0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852,
0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88,
0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6,
0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C,
0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB,
0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911,
0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F,
0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5
};
/*
* end of the CRC lookup table crc_tableil8_o88
*/
//#define VERIFY_ASSERTION
#ifdef VERIFY_ASSERTION
// Trivial byte-by-byte version: you can switch on the assertion in the
// Crc32_ComputeBuf function (by defining VERIFY_ASSERTION) to check this
// against the slicing variant.
static really_inline
u32 crc32c(u32 running_crc, const unsigned char* p_buf, size_t length) {
u32 crc = running_crc;
while (length--) {
crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
}
return crc;
}
#endif // VERIFY_ASSERTION
// Slicing-by-8 approach, which is much faster. Derived from Intel's
// BSD-licensed code, with additions to handled aligned case automatically.
static really_inline
u32 crc32c_sb8_64_bit(u32 running_crc, const unsigned char* p_buf,
const size_t length) {
u32 crc = running_crc;
// Process byte-by-byte until p_buf is aligned
const unsigned char *aligned_buf = ROUNDUP_PTR(p_buf, 4);
size_t init_bytes = aligned_buf - p_buf;
size_t running_length = ((length - init_bytes)/8)*8;
size_t end_bytes = length - init_bytes - running_length;
while (p_buf < aligned_buf) {
crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
}
// Main aligned loop, processes eight bytes at a time.
u32 term1, term2;
for (size_t li = 0; li < running_length/8; li++) {
u32 block = *(const u32 *)p_buf;
crc ^= block;
p_buf += 4;
term1 = crc_tableil8_o88[crc & 0x000000FF] ^
crc_tableil8_o80[(crc >> 8) & 0x000000FF];
term2 = crc >> 16;
crc = term1 ^
crc_tableil8_o72[term2 & 0x000000FF] ^
crc_tableil8_o64[(term2 >> 8) & 0x000000FF];
block = *(const u32 *)p_buf;
term1 = crc_tableil8_o56[block & 0x000000FF] ^
crc_tableil8_o48[(block >> 8) & 0x000000FF];
term2 = block >> 16;
crc = crc ^
term1 ^
crc_tableil8_o40[term2 & 0x000000FF] ^
crc_tableil8_o32[(term2 >> 8) & 0x000000FF];
p_buf += 4;
}
// Remaining bytes
for(size_t li = 0; li < end_bytes; li++) {
crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
}
return crc;
}
#else // __SSE4_2__
#ifdef ARCH_64_BIT
#define CRC_WORD 8
#define CRC_TYPE u64a
#define CRC_FUNC _mm_crc32_u64
#else
#define CRC_WORD 4
#define CRC_TYPE u32
#define CRC_FUNC _mm_crc32_u32
#endif
/*
* Use the crc32 instruction from SSE4.2 to compute our checksum - same
* polynomial as the above function.
*/
static really_inline
u32 crc32c_sse42(u32 running_crc, const unsigned char* p_buf,
const size_t length) {
u32 crc = running_crc;
// Process byte-by-byte until p_buf is aligned
const unsigned char *aligned_buf = ROUNDUP_PTR(p_buf, CRC_WORD);
size_t init_bytes = aligned_buf - p_buf;
size_t running_length = ((length - init_bytes)/CRC_WORD)*CRC_WORD;
size_t end_bytes = length - init_bytes - running_length;
while (p_buf < aligned_buf) {
crc = _mm_crc32_u8(crc, *p_buf++);
}
// Main aligned loop, processes a word at a time.
for (size_t li = 0; li < running_length/CRC_WORD; li++) {
CRC_TYPE block = *(const CRC_TYPE *)p_buf;
crc = CRC_FUNC(crc, block);
p_buf += CRC_WORD;
}
// Remaining bytes
for(size_t li = 0; li < end_bytes; li++) {
crc = _mm_crc32_u8(crc, *p_buf++);
}
return crc;
}
#endif
#ifdef VERIFY_ASSERTION
#include <assert.h>
#endif
// Externally visible function
u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen) {
#ifdef __SSE4_2__
u32 crc = crc32c_sse42(inCrc32, (const unsigned char *)buf, bufLen);
#else
u32 crc = crc32c_sb8_64_bit(inCrc32, (const unsigned char *)buf, bufLen);
#endif
#ifdef VERIFY_ASSERTION
assert(crc == crc32c(inCrc32, (const unsigned char *)buf, bufLen));
#endif
return crc;
}

46
src/crc32.h Normal file
View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CRC32_H_36A5015B5840C1
#define CRC32_H_36A5015B5840C1
#include "ue2common.h"
#ifdef __cplusplus
extern "C"
{
#endif
u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen);
#ifdef __cplusplus
}
#endif
#endif /* CRC32_H_36A5015B5840C1 */

507
src/database.c Normal file
View File

@ -0,0 +1,507 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Runtime code for hs_database manipulation.
*/
#include <stdio.h>
#include <string.h>
#include "allocator.h"
#include "hs_common.h"
#include "hs_internal.h"
#include "hs_version.h"
#include "ue2common.h"
#include "database.h"
#include "crc32.h"
#include "rose/rose_internal.h"
#include "util/unaligned.h"
static really_inline
int db_correctly_aligned(const void *db) {
return ISALIGNED_N(db, alignof(unsigned long long));
}
HS_PUBLIC_API
hs_error_t hs_free_database(hs_database_t *db) {
if (db && db->magic != HS_DB_MAGIC) {
return HS_INVALID;
}
hs_database_free(db);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes,
size_t *serialized_length) {
if (!db || !bytes || !serialized_length) {
return HS_INVALID;
}
if (!db_correctly_aligned(db)) {
return HS_BAD_ALIGN;
}
hs_error_t ret = validDatabase(db);
if (ret != HS_SUCCESS) {
return ret;
}
size_t length = sizeof(struct hs_database) + db->length;
char *out = hs_misc_alloc(length);
ret = hs_check_alloc(out);
if (ret != HS_SUCCESS) {
hs_misc_free(out);
return ret;
}
memset(out, 0, length);
u32 *buf = (u32 *)out;
*buf = db->magic;
buf++;
*buf = db->version;
buf++;
*buf = db->length;
buf++;
memcpy(buf, &db->platform, sizeof(u64a));
buf += 2;
*buf = db->crc32;
buf++;
*buf = db->reserved0;
buf++;
*buf = db->reserved1;
buf++;
const char *bytecode = hs_get_bytecode(db);
memcpy(buf, bytecode, db->length);
*bytes = out;
*serialized_length = length;
return HS_SUCCESS;
}
// check that the database header's platform is compatible with the current
// runtime platform.
static
hs_error_t db_check_platform(const u64a p) {
if (p != hs_current_platform
&& p != hs_current_platform_no_avx2) {
return HS_DB_PLATFORM_ERROR;
}
// passed all checks
return HS_SUCCESS;
}
// Decode and check the database header, returning appropriate errors or
// HS_SUCCESS if it's OK. The header should be allocated on the stack
// and later copied into the deserialized database.
static
hs_error_t db_decode_header(const char **bytes, const size_t length,
struct hs_database *header) {
if (!*bytes) {
return HS_INVALID;
}
if (length < sizeof(struct hs_database)) {
return HS_INVALID;
}
// There's no requirement, really, that the serialized stream of bytes
// we've been given is 4-byte aligned, so we use unaligned loads here.
const u32 *buf = (const u32 *)*bytes;
// Zero header so that none of it (e.g. its padding) is uninitialized.
memset(header, 0, sizeof(struct hs_database));
header->magic = unaligned_load_u32(buf++);
if (header->magic != HS_DB_MAGIC) {
return HS_INVALID;
}
header->version = unaligned_load_u32(buf++);
if (header->version != HS_DB_VERSION) {
return HS_DB_VERSION_ERROR;
}
header->length = unaligned_load_u32(buf++);
if (length != sizeof(struct hs_database) + header->length) {
DEBUG_PRINTF("bad length %zu, expecting %zu\n", length,
sizeof(struct hs_database) + header->length);
return HS_INVALID;
}
header->platform = unaligned_load_u64a(buf);
buf += 2;
header->crc32 = unaligned_load_u32(buf++);
header->reserved0 = unaligned_load_u32(buf++);
header->reserved1 = unaligned_load_u32(buf++);
*bytes = (const char *)buf;
return HS_SUCCESS; // Header checks out
}
// Check the CRC on a database
static
hs_error_t db_check_crc(const hs_database_t *db) {
const char *bytecode = hs_get_bytecode(db);
u32 crc = Crc32c_ComputeBuf(0, bytecode, db->length);
if (crc != db->crc32) {
DEBUG_PRINTF("crc mismatch! 0x%x != 0x%x\n", crc, db->crc32);
return HS_INVALID;
}
return HS_SUCCESS;
}
static
void db_copy_bytecode(const char *serialized, hs_database_t *db) {
// we need to align things manually
uintptr_t shift = (uintptr_t)db->bytes & 0x3f;
db->bytecode = offsetof(struct hs_database, bytes) - shift;
char *bytecode = (char *)db + db->bytecode;
// Copy the bytecode into place
memcpy(bytecode, serialized, db->length);
}
HS_PUBLIC_API
hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length,
hs_database_t *db) {
if (!bytes || !db) {
return HS_INVALID;
}
// We require the user to deserialize into an 8-byte aligned region.
if (!ISALIGNED_N(db, 8)) {
return HS_BAD_ALIGN;
}
// Decode the header
hs_database_t header;
hs_error_t ret = db_decode_header(&bytes, length, &header);
if (ret != HS_SUCCESS) {
return ret;
}
// Make sure the serialized database is for our platform
ret = db_check_platform(header.platform);
if (ret != HS_SUCCESS) {
return ret;
}
// Zero new space for safety
size_t dblength = sizeof(struct hs_database) + header.length;
memset(db, 0, dblength);
// Copy the decoded header into place
memcpy(db, &header, sizeof(header));
// Copy the bytecode into the correctly-aligned location, set offsets
db_copy_bytecode(bytes, db);
if (db_check_crc(db) != HS_SUCCESS) {
return HS_INVALID;
}
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_deserialize_database(const char *bytes, const size_t length,
hs_database_t **db) {
if (!bytes || !db) {
return HS_INVALID;
}
*db = NULL;
// Decode and check the header
hs_database_t header;
hs_error_t ret = db_decode_header(&bytes, length, &header);
if (ret != HS_SUCCESS) {
return ret;
}
// Make sure the serialized database is for our platform
ret = db_check_platform(header.platform);
if (ret != HS_SUCCESS) {
return ret;
}
// Allocate space for new database
size_t dblength = sizeof(struct hs_database) + header.length;
struct hs_database *tempdb = hs_database_alloc(dblength);
ret = hs_check_alloc(tempdb);
if (ret != HS_SUCCESS) {
hs_database_free(tempdb);
return ret;
}
// Zero new space for safety
memset(tempdb, 0, dblength);
// Copy the decoded header into place
memcpy(tempdb, &header, sizeof(header));
// Copy the bytecode into the correctly-aligned location, set offsets
db_copy_bytecode(bytes, tempdb);
if (db_check_crc(tempdb) != HS_SUCCESS) {
hs_database_free(tempdb);
return HS_INVALID;
}
*db = tempdb;
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_database_size(const hs_database_t *db, size_t *size) {
if (!size) {
return HS_INVALID;
}
hs_error_t ret = validDatabase(db);
if (unlikely(ret != HS_SUCCESS)) {
return ret;
}
*size = sizeof(struct hs_database) + db->length;
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_serialized_database_size(const char *bytes, const size_t length,
size_t *size) {
// Decode and check the header
hs_database_t header;
hs_error_t ret = db_decode_header(&bytes, length, &header);
if (ret != HS_SUCCESS) {
return ret;
}
if (!size) {
return HS_INVALID;
}
*size = sizeof(struct hs_database) + header.length;
return HS_SUCCESS;
}
hs_error_t dbIsValid(const hs_database_t *db) {
if (db->magic != HS_DB_MAGIC) {
DEBUG_PRINTF("bad magic\n");
return HS_INVALID;
}
if (db->version != HS_DB_VERSION) {
DEBUG_PRINTF("bad version\n");
return HS_DB_VERSION_ERROR;
}
if (db_check_platform(db->platform) != HS_SUCCESS) {
DEBUG_PRINTF("bad platform\n");
return HS_DB_PLATFORM_ERROR;
}
if (!ISALIGNED_16(hs_get_bytecode(db))) {
DEBUG_PRINTF("bad alignment\n");
return HS_INVALID;
}
hs_error_t rv = db_check_crc(db);
if (rv != HS_SUCCESS) {
DEBUG_PRINTF("bad crc\n");
return rv;
}
return HS_SUCCESS;
}
/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated
* \ref hs_database, ensuring that it is padded correctly to give cacheline
* alignment. */
hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
size_t db_len = sizeof(struct hs_database) + len;
DEBUG_PRINTF("db size %zu\n", db_len);
DEBUG_PRINTF("db platform %llx\n", platform);
struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len);
if (hs_check_alloc(db) != HS_SUCCESS) {
hs_database_free(db);
return NULL;
}
// So that none of our database is uninitialized
memset(db, 0, db_len);
// we need to align things manually
size_t shift = (uintptr_t)db->bytes & 0x3f;
DEBUG_PRINTF("shift is %zu\n", shift);
db->bytecode = offsetof(struct hs_database, bytes) - shift;
char *bytecode = (char *)db + db->bytecode;
assert(ISALIGNED_CL(bytecode));
db->magic = HS_DB_MAGIC;
db->version = HS_DB_VERSION;
db->length = len;
db->platform = platform;
// Copy bytecode
memcpy(bytecode, in_bytecode, len);
db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length);
return db;
}
#if defined(_WIN32)
#define SNPRINTF_COMPAT _snprintf
#else
#define SNPRINTF_COMPAT snprintf
#endif
/** Allocate a buffer and prints the database info into it. Returns an
* appropriate error code on failure, or HS_SUCCESS on success. */
static
hs_error_t print_database_string(char **s, u32 version, const platform_t plat,
u32 raw_mode) {
assert(s);
*s = NULL;
u8 release = (version >> 8) & 0xff;
u8 minor = (version >> 16) & 0xff;
u8 major = (version >> 24) & 0xff;
const char *avx2 = (plat & HS_PLATFORM_NOAVX2) ? "NOAVX2" : " AVX2";
const char *mode = NULL;
if (raw_mode == HS_MODE_STREAM) {
mode = "STREAM";
} else if (raw_mode == HS_MODE_VECTORED) {
mode = "VECTORED";
} else {
assert(raw_mode == HS_MODE_BLOCK);
mode = "BLOCK";
}
// Initial allocation size, which should be large enough to print our info.
// If it isn't, snprintf will tell us and we can resize appropriately.
size_t len = 256;
while (1) {
char *buf = hs_misc_alloc(len);
hs_error_t ret = hs_check_alloc(buf);
if (ret != HS_SUCCESS) {
hs_misc_free(buf);
return ret;
}
// Note: SNPRINTF_COMPAT is a macro defined above, to cope with systems
// that don't have snprintf but have a workalike.
int p_len = SNPRINTF_COMPAT(
buf, len, "Version: %u.%u.%u Features: %s Mode: %s",
major, minor, release, avx2, mode);
if (p_len < 0) {
DEBUG_PRINTF("snprintf output error, returned %d\n", p_len);
hs_misc_free(buf);
break;
} else if ((size_t)p_len < len) { // output fit within buffer.
assert(buf[p_len] == '\0');
*s = buf;
return HS_SUCCESS;
} else { // output didn't fit: resize and reallocate.
len = (size_t)p_len + 1; // must add one for null terminator.
hs_misc_free(buf);
}
}
return HS_NOMEM;
}
HS_PUBLIC_API
hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
char **info) {
if (!info) {
return HS_INVALID;
}
*info = NULL;
if (!bytes || length < sizeof(struct hs_database)) {
return HS_INVALID;
}
const u32 *buf = (const u32 *)bytes;
u32 magic = unaligned_load_u32(buf++);
if (magic != HS_DB_MAGIC) {
return HS_INVALID;
}
u32 version = unaligned_load_u32(buf++);
buf++; /* length */
platform_t plat;
plat = unaligned_load_u64a(buf);
buf += 2;
buf++; /* crc */
buf++; /* reserved 0 */
buf++; /* reserved 1 */
const char *t_raw = (const char *)buf;
u32 mode = unaligned_load_u32(t_raw + offsetof(struct RoseEngine, mode));
return print_database_string(info, version, plat, mode);
}
HS_PUBLIC_API
hs_error_t hs_database_info(const hs_database_t *db, char **info) {
if (!info) {
return HS_INVALID;
}
*info = NULL;
if (!db || !db_correctly_aligned(db) || db->magic != HS_DB_MAGIC) {
return HS_INVALID;
}
platform_t plat;
plat = db->platform;
const struct RoseEngine *rose = hs_get_bytecode(db);
return print_database_string(info, db->version, plat, rose->mode);
}

119
src/database.h Normal file
View File

@ -0,0 +1,119 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Runtime code for hs_database manipulation.
*/
#ifndef DATABASE_H_D467FD6F343DDE
#define DATABASE_H_D467FD6F343DDE
#ifdef __cplusplus
extern "C"
{
#endif
#include "hs_compile.h" // for HS_MODE_ flags
#include "hs_version.h"
#include "ue2common.h"
#define HS_DB_VERSION HS_VERSION_32BIT
#define HS_DB_MAGIC (0xdbdbdbdbU)
// Values in here cannot (easily) change - add new ones!
// CPU type is the low 6 bits (we can't need more than 64, surely!)
#define HS_PLATFORM_INTEL 1
#define HS_PLATFORM_CPU_MASK 0x3F
#define HS_PLATFORM_NOAVX2 (4<<13)
/** \brief Platform features bitmask. */
typedef u64a platform_t;
static UNUSED
const platform_t hs_current_platform = {
#if !defined(__AVX2__)
HS_PLATFORM_NOAVX2 |
#endif
0,
};
static UNUSED
const platform_t hs_current_platform_no_avx2 = {
HS_PLATFORM_NOAVX2 |
0,
};
/*
* a header to enclose the actual bytecode - useful for keeping info about the
* compiled data.
*/
struct hs_database {
u32 magic;
u32 version;
u32 length;
u64a platform;
u32 crc32;
u32 reserved0;
u32 reserved1;
u32 bytecode; // offset relative to db start
u32 padding[16];
char bytes[];
};
static really_inline
const void *hs_get_bytecode(const struct hs_database *db) {
return ((const char *)db + db->bytecode);
}
/**
* Cheap database sanity checks used in block mode scan calls and streaming
* mode open calls.
*/
static really_inline
hs_error_t validDatabase(const hs_database_t *db) {
if (!db || db->magic != HS_DB_MAGIC) {
return HS_INVALID;
}
if (db->version != HS_DB_VERSION) {
return HS_DB_VERSION_ERROR;
}
return HS_SUCCESS;
}
hs_error_t dbIsValid(const struct hs_database *db);
struct hs_database *dbCreate(const char *bytecode, size_t len, u64a platform);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* DATABASE_H_D467FD6F343DDE */

39
src/fdr/CMakeLists.txt Normal file
View File

@ -0,0 +1,39 @@
# The set of rules and other nastiness for generating FDR/Teddy source
# we need to add these as explicit dependencies
set(AUTOGEN_PY_FILES
arch.py
autogen.py
autogen_utils.py
base_autogen.py
fdr_autogen.py
teddy_autogen.py
)
function(fdr_autogen type out)
add_custom_command (
COMMENT "AUTOGEN ${out}"
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${out}
COMMAND ${PYTHON} ${CMAKE_CURRENT_SOURCE_DIR}/autogen.py ${type} > ${CMAKE_CURRENT_BINARY_DIR}/${out}
DEPENDS ${AUTOGEN_PY_FILES}
)
add_custom_target(autogen_${type} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${out})
endfunction(fdr_autogen)
#now build the functions
fdr_autogen(runtime fdr_autogen.c)
fdr_autogen(compiler fdr_autogen_compiler.cpp)
fdr_autogen(teddy_runtime teddy_autogen.c)
fdr_autogen(teddy_compiler teddy_autogen_compiler.cpp)
set(fdr_GENERATED_SRC
${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen.c
${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp
${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen.c
${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp
PARENT_SCOPE)
set_source_files_properties(${fdr_GENERATED_SRC} PROPERTIES GENERATED TRUE)
include_directories(${CMAKE_CURRENT_BINARY_DIR})

58
src/fdr/arch.py Executable file
View File

@ -0,0 +1,58 @@
#!/usr/bin/python
# Copyright (c) 2015, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Intel Corporation nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import autogen_utils
# wrapper for architectures
class Arch:
def __init__(self, name, extensions = []):
self.name = name
self.extensions = extensions
self.target = None
def get_guard(self):
# these defines definitely fall into the "belt-and-suspenders"
# category of paranoia
if (self.guard_list == []):
return "#if 1"
return "#if " + " && ".join(self.guard_list)
class X86Arch(Arch):
def __init__(self, name, extensions = []):
Arch.__init__(self, name, extensions)
self.guard_list = [ ]
self.target = "0"
if "AVX2" in extensions:
self.target += " | HS_CPU_FEATURES_AVX2"
self.guard_list += [ "defined(__AVX2__)" ]
arch_x86_64 = X86Arch("x86_64", extensions = [ ])
arch_x86_64_avx2 = X86Arch("x86_64_avx2", extensions = [ "AVX2" ])

159
src/fdr/autogen.py Executable file
View File

@ -0,0 +1,159 @@
#!/usr/bin/python
# Copyright (c) 2015, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Intel Corporation nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
from autogen_utils import *
from fdr_autogen import *
from teddy_autogen import *
from arch import *
# FDR setup
# these are either produced - if the guard succeeds, or #defined to zeroes.
# either the function or the zero is fine in our array of function pointers
def produce_fdr_runtimes(l):
for m in l:
m.produce_code()
def produce_fdr_compiles(l):
print "void getFdrDescriptions(vector<FDREngineDescription> *out) {"
print " static const FDREngineDef defns[] = {"
for m in l:
m.produce_compile_call()
print " };"
print " out->clear();"
print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {"
print " out->push_back(FDREngineDescription(defns[i]));"
print " }"
print "}"
def build_fdr_matchers():
all_matchers = [ ]
domains = [8, 10, 11, 12, 13]
big_domains = [ 14, 15 ]
common = { "state_width" : 128, "num_buckets" : 8, "extract_frequency" : 8, "arch" : arch_x86_64 }
for d in domains:
all_matchers += [ M3(stride = 1, domain = d, **common) ]
all_matchers += [ M3(stride = 2, domain = d, **common) ]
all_matchers += [ M3(stride = 4, domain = d, **common) ]
for d in big_domains:
all_matchers += [ M3(stride = 1, domain = d, **common) ]
return all_matchers
# teddy setup
def build_teddy_matchers():
all_matchers = [ ]
# AVX2
all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = False) ]
all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = True) ]
for n_msk in range(1, 5):
all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = False, num_masks = n_msk, num_buckets = 16) ]
all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = True, num_masks = n_msk, num_buckets = 16) ]
# SSE/SSE2/SSSE3
for n_msk in range(1, 5):
all_matchers += [ MT(arch = arch_x86_64, packed = False, num_masks = n_msk, num_buckets = 8) ]
all_matchers += [ MT(arch = arch_x86_64, packed = True, num_masks = n_msk, num_buckets = 8) ]
return all_matchers
def produce_teddy_compiles(l):
print "void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {"
print " static const TeddyEngineDef defns[] = {"
for m in l:
m.produce_compile_call()
print " };"
print " out->clear();"
print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {"
print " out->push_back(TeddyEngineDescription(defns[i]));"
print " }"
print "}"
# see below - we don't produce our 'zeros' at the point of the teddy runtimes as they
# are linked. So we either generate the function or we don't - then at the point of the
# header in fdr_autogen.c we either generate the header or we #define the zero.
def produce_teddy_runtimes(l):
# Since we're using -Wmissing-prototypes, we need headers first.
for m in l:
m.produce_guard()
print m.produce_header(visible = True, header_only = True)
m.close_guard()
for m in l:
m.produce_guard()
m.produce_code()
m.close_guard()
# see produce_teddy_runtimes() comment for the rationale
def produce_teddy_headers(l):
for m in l:
m.produce_guard()
print m.produce_header(visible = True, header_only = True)
m.produce_zero_alternative()
# general utilities
def make_fdr_function_pointers(matcher_list):
print """
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a);
static FDRFUNCTYPE funcs[] = {
"""
all_funcs = ",\n".join([ " %s" % m.get_name() for m in matcher_list ])
print all_funcs
print """
};
"""
def assign_ids(matcher_list, next_id):
for m in matcher_list:
m.id = next_id
next_id += 1
return next_id
# Main entry point
m = build_fdr_matchers()
next_id = assign_ids(m, 0)
tm = build_teddy_matchers()
next_id = assign_ids(tm, next_id)
if sys.argv[1] == "compiler":
produce_fdr_compiles(m)
elif sys.argv[1] == "runtime":
produce_fdr_runtimes(m)
produce_teddy_headers(tm)
make_fdr_function_pointers(m+tm)
elif sys.argv[1] == "teddy_runtime":
produce_teddy_runtimes(tm)
elif sys.argv[1] == "teddy_compiler":
produce_teddy_compiles(tm)

285
src/fdr/autogen_utils.py Executable file
View File

@ -0,0 +1,285 @@
#!/usr/bin/python
# Copyright (c) 2015, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Intel Corporation nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
def fail_out(msg = ""):
print >>sys.stderr, "Internal failure in autogen.py: " + msg
sys.exit(1)
class IntegerType:
def __init__(self, size):
self.size = size
def get_name(self):
return { 256: "m256", 128 : "m128", 64 : "u64a", 32 : "u32" , 16 : "u16", 8 : "u8"}[self.size]
def size_in_bytes(self):
return self.size / 8
def isSIMDOnIntel(self):
return False
def zero_expression(self):
return "0"
def constant_to_string(self, n):
if self.size == 64:
suffix = "ULL"
else:
suffix = ""
return "0x%x%s" % (n & ((1 << self.size) - 1), suffix)
def lowbits(self, n):
return (1 << n) - 1
def highbits(self, n):
return ~(self.lowbits(self.size - n))
def lowbit_mask(self, n):
return self.constant_to_string(self.lowbits(n))
def highbit_mask(self, n):
return self.constant_to_string(self.highbits(n))
def lowbit_extract_expr(self, expr_string, n):
return "(%s & %s)" % ( expr_string, self.lowbit_mask(n))
def highbit_extract_expr(self, expr_string, n):
return "(%s >> %d)" % (expr_string, self.size - n)
def flip_lowbits_expr(self, expr_string, n):
return "(%s ^ %s)" % ( expr_string, self.lowbit_mask(n))
def bit_extract_expr(self, expr_string, low, high):
lbm = self.lowbit_mask(high - low)
return "((%s >> %d) & %s)" % (expr_string, low, lbm)
# shifts are +ve if left and -ve if right
def shift_expr(self, expr_string, n):
if n <= -self.size or n >= self.size:
return self.zero_expression()
elif (n > 0):
return "(%s << %d)" % (expr_string, n)
elif (n < 0):
return "(%s >> %d)" % (expr_string, -n)
else:
return "(%s)" % (expr_string)
# code is:
# "normal" (always between buf and len) - the default
# "aligned" (means normal + aligned to a natural boundary)
# "cautious_forward" (means may go off the end of buf+len)
# "cautious_backwards" (means may go off the start of buf)
# "cautious_everywhere" (means may go off both)
def load_expr_data(self, offset = 0, code = "normal",
base_string = "ptr", bounds_lo = "buf", bounds_hi = "buf + len"):
if code is "normal":
return "lv_%s(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
elif code is "aligned":
if self.size is 8:
fail_out("no aligned byte loads")
return "lv_%s_a(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
elif code is "cautious_forward":
return "lv_%s_cf(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
elif code is "cautious_backward":
return "lv_%s_cb(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
elif code is "cautious_everywhere":
return "lv_%s_ce(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
class SIMDIntegerType(IntegerType):
def __init__(self, size):
IntegerType.__init__(self, size)
def isSIMDOnIntel(self):
return True
def zero_expression(self):
return "zeroes128()"
def lowbit_extract_expr(self, expr_string, n):
if (n <= 32):
tmpType = IntegerType(32)
tmpExpr = "movd(%s)" % expr_string
elif (32 < n <= 64):
tmpType = IntegerType(64)
tmpExpr = "movq(%s)" % expr_string
return tmpType.lowbit_extract_expr(tmpExpr, n)
def highbit_extract_expr(self, expr_string, n):
fail_out("Unimplemented high bit extract on m128")
def bit_extract_expr(self, expr_string, low, high, flip):
fail_out("Unimplemented bit extract on m128")
def shift_expr(self, expr_string, n):
if n % 8 != 0:
fail_out("Trying to shift a m128 by a bit granular value")
# should check that n is divisible by 8
if n <= -self.size or n >= self.size:
return self.zero_expression()
elif (n > 0):
return "_mm_slli_si128(%s, %s)" % (expr_string, n / 8)
elif (n < 0):
return "_mm_srli_si128(%s, %s)" % (expr_string, -n / 8)
else:
return "(%s)" % (expr_string)
def lowbit_mask(self, n):
if n % 8 != 0:
fail_out("Trying to make a lowbit mask in a m128 by a bit granular value")
return self.shift_expr("ones128()", -(128 - n))
def getRequiredType(bits):
if bits == 128:
return SIMDIntegerType(bits)
for b in [ 8, 16, 32, 64]:
if (bits <= b):
return IntegerType(b)
return None
class IntegerVariable:
def __init__(self, name, type):
self.name = name
self.type = type
def gen_initializer_stmt(self, initialization_string = None):
if initialization_string:
return "%s %s = %s;" % (self.type.get_name(), self.name, initialization_string)
else:
return "%s %s;" % (self.type.get_name(), self.name)
class Step:
def __init__(self, context, offset = 0):
self.context = context
self.matcher = context.matcher
self.offset = offset
self.latency = 1
self.dependency_list = []
self.latest = None
self.context.add_step(self)
# return a string, complete with indentation
def emit(self):
indent = " " * (self.offset*2 + self.matcher.default_body_indent)
s = "\n".join( [ indent + line for line in self.val.split("\n")] )
if self.latest:
s += " // " + str(self.debug_step) + " L" + str(self.latency) + " LTST:%d" % self.latest
if self.dependency_list:
s += " Derps: "
for (d,l) in self.dependency_list:
s += "%d/%d " % (d.debug_step,l)
return s
def add_dependency(self, step, anti_dependency = False, output_dependency = False):
if anti_dependency or output_dependency:
self.dependency_list += [ (step, 1) ]
else:
self.dependency_list += [ (step, step.latency) ]
def nv(self, type, var_name):
return self.context.new_var(self, type, var_name)
def gv(self, var_name, reader = True, writer = False):
return self.context.get_var(self, var_name, reader = reader, writer = writer)
# utility steps, generic
class LabelStep(Step):
def __init__(self, context, offset = 0, label_prefix = "off"):
Step.__init__(self, context, offset)
self.val = "%s%d: UNUSED;" % (label_prefix, offset)
class OpenScopeStep(Step):
def __init__(self, context, offset = 0):
Step.__init__(self, context, offset)
self.val = "{"
class CloseScopeStep(Step):
def __init__(self, context, offset = 0):
Step.__init__(self, context, offset)
self.val = "}"
class CodeGenContext:
def __init__(self, matcher):
self.vars = {}
self.steps = []
self.ctr = 0
self.matcher = matcher
self.var_writer = {} # var to a single writer
self.var_readers = {} # var to a list of all the readers that read the last value
def new_var(self, step, type, var_name):
var = IntegerVariable(var_name, type)
self.vars[var_name] = var
self.var_writer[var_name] = step
return var
def get_var(self, step, var_name, reader = True, writer = False):
if reader:
writer_step = self.var_writer[var_name]
if writer_step:
step.add_dependency(writer_step)
self.var_readers.setdefault(var_name, []).append(step)
if writer and not reader:
if self.var_writer[var_name]:
step.add_dependency(self.var_writer[var_name], output_dependency = True)
if writer:
if self.var_readers.has_key(var_name):
for reader in [ r for r in self.var_readers[var_name] if r is not step ]:
step.add_dependency(reader, anti_dependency = True)
self.var_readers[var_name] = []
self.var_writer[var_name] = step
return self.vars[var_name]
def add_step(self, step):
self.steps += [ step ]
step.debug_step = self.ctr
self.ctr += 1
def dontschedule(self, finals):
return "\n".join( [ s.emit() for s in self.steps ] )
def schedule(self, finals):
for f in finals:
f.latest = f.latency
worklist = finals
while worklist:
current = worklist[0]
worklist = worklist[1:]
for (dep, lat) in current.dependency_list:
if dep.latest is None or dep.latest < (current.latest + dep.latency):
dep.latest = current.latest + lat
if dep not in worklist:
worklist += [ dep ]
self.steps.sort(reverse = True, key = lambda s : s.latest)
return "\n".join( [ s.emit() for s in self.steps ] )

167
src/fdr/base_autogen.py Normal file
View File

@ -0,0 +1,167 @@
#!/usr/bin/python
# Copyright (c) 2015, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Intel Corporation nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
from autogen_utils import *
from base_autogen import *
from string import Template
class MatcherBase:
def __init__(self):
pass
def get_name(self):
return "fdr_exec_%03d" % self.id
def produce_header(self, visible, header_only = False):
s = ""
if not visible:
s += "static never_inline"
s += """
hwlm_error_t %s(UNUSED const struct FDR *fdr,
UNUSED const struct FDR_Runtime_Args * a)""" % self.get_name()
if header_only:
s += ";"
else:
s += "{"
s += "\n"
return s
def produce_guard(self):
print self.arch.get_guard()
def produce_zero_alternative(self):
print """
#else
#define %s 0
#endif
""" % self.get_name()
# trivial function for documentation/modularity
def close_guard(self):
print "#endif"
def produce_common_declarations(self):
return """
const u8 * buf = a->buf;
const size_t len = a->len;
const u8 * ptr = buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t * control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 * tryFloodDetect = a->firstFloodDetect;
UNUSED u32 bit, bitRem, confSplit, idx;
u32 byte, cf;
const struct FDRConfirm *fdrc;
u32 last_match = (u32)-1;
"""
def produce_continue_check(self):
return """if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
*a->groups = controlVal;
return HWLM_TERMINATED;
}
"""
def produce_flood_check(self):
return """
if (P0(ptr > tryFloodDetect)) {
tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, &floodBackoff, &controlVal, iterBytes);
if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
*a->groups = controlVal;
return HWLM_TERMINATED;
}
}
"""
def produce_footer(self):
return """
*a->groups = controlVal;
return HWLM_SUCCESS;
}
"""
def produce_confirm_base(self, conf_var_name, conf_var_size, offset, cautious, enable_confirmless, do_bailout = False):
if cautious:
caution_string = "VECTORING"
else:
caution_string = "NOT_CAUTIOUS"
conf_split_mask = IntegerType(32).constant_to_string(
self.conf_top_level_split - 1)
if enable_confirmless:
quick_check_string = """
if (!fdrc->mult) {
u32 id = fdrc->nBitsOrSoleID;
if ((last_match == id) && (fdrc->flags & NoRepeat))
continue;
last_match = id;
controlVal = a->cb(ptr+byte-buf, ptr+byte-buf, id, a->ctxt);
continue;
} """
else:
quick_check_string = ""
if do_bailout:
bailout_string = """
if ((ptr + byte < buf + a->start_offset) || (ptr + byte >= buf + len)) continue;"""
else:
bailout_string = ""
return Template("""
if (P0(!!$CONFVAR)) {
do {
bit = findAndClearLSB_$CONFVAR_SIZE(&$CONFVAR);
byte = bit / $NUM_BUCKETS + $OFFSET;
bitRem = bit % $NUM_BUCKETS;
$BAILOUT_STRING
confSplit = *(ptr+byte) & $SPLIT_MASK;
idx = confSplit * $NUM_BUCKETS + bitRem;
cf = confBase[idx];
if (!cf)
continue;
fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);
if (!(fdrc->groups & *control))
continue;
$QUICK_CHECK_STRING
confWithBit(fdrc, a, ptr - buf + byte, $CAUTION_STRING, $CONF_PULL_BACK, control, &last_match);
} while(P0(!!$CONFVAR));
if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
*a->groups = controlVal;
return HWLM_TERMINATED;
}
}""").substitute(CONFVAR = conf_var_name,
CONFVAR_SIZE = conf_var_size,
NUM_BUCKETS = self.num_buckets,
OFFSET = offset,
SPLIT_MASK = conf_split_mask,
QUICK_CHECK_STRING = quick_check_string,
BAILOUT_STRING = bailout_string,
CAUTION_STRING = caution_string,
CONF_PULL_BACK = self.conf_pull_back)
def indent(block, depth):
return "\n".join([ (" " * (4*depth)) + line for line in block.splitlines() ] )

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "engine_description.h"
#include "hs_compile.h" // for hs_platform_info
#include "util/target_info.h"
namespace ue2 {
EngineDescription::~EngineDescription() {}
bool EngineDescription::isValidOnTarget(const target_t &target_in) const {
return target_in.can_run_on_code_built_for(code_target);
}
target_t targetByArchFeatures(u64a cpu_features) {
hs_platform_info p;
p.tune = HS_TUNE_FAMILY_GENERIC;
p.cpu_features = cpu_features;
return target_t(p);
}
} // namespace ue2

View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ENGINE_DESCRIPTION_H
#define ENGINE_DESCRIPTION_H
#include "ue2common.h"
#include "util/target_info.h"
namespace ue2 {
class EngineDescription {
u32 id;
target_t code_target; // the target that we built this code for
u32 numBuckets;
u32 confirmPullBackDistance;
u32 confirmTopLevelSplit;
public:
EngineDescription(u32 id_in, const target_t &code_target_in,
u32 numBuckets_in, u32 confirmPullBackDistance_in,
u32 confirmTopLevelSplit_in)
: id(id_in), code_target(code_target_in), numBuckets(numBuckets_in),
confirmPullBackDistance(confirmPullBackDistance_in),
confirmTopLevelSplit(confirmTopLevelSplit_in) {}
virtual ~EngineDescription();
u32 getID() const { return id; }
u32 getNumBuckets() const { return numBuckets; }
u32 getConfirmPullBackDistance() const { return confirmPullBackDistance; }
u32 getConfirmTopLevelSplit() const { return confirmTopLevelSplit; }
bool isValidOnTarget(const target_t &target_in) const;
virtual u32 getDefaultFloodSuffixLength() const = 0;
virtual bool typicallyHoldsOneCharLits() const { return true; }
};
/** Returns a target given a CPU feature set value. */
target_t targetByArchFeatures(u64a cpu_features);
} // namespace ue2
#endif

126
src/fdr/fdr.c Normal file
View File

@ -0,0 +1,126 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "util/simd_utils.h"
#define P0(cnd) unlikely(cnd)
#include "fdr.h"
#include "fdr_internal.h"
#include "teddy_internal.h"
#include "flood_runtime.h"
#include "fdr_confirm.h"
#include "fdr_confirm_runtime.h"
#include "fdr_streaming_runtime.h"
#include "fdr_loadval.h"
static really_inline UNUSED
u32 getPreStartVal(const struct FDR_Runtime_Args *a, u32 numBits) {
u32 r = 0;
if (a->start_offset == 0) {
if (numBits <= 8) {
r = a->buf_history[a->len_history - 1];
} else {
r = a->buf_history[a->len_history - 1];
r |= (a->buf[0] << 8);
}
} else {
if (numBits <= 8) {
r = a->buf[a->start_offset - 1];
} else {
r = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len);
}
}
return r & ((1 << numBits) - 1);
}
#include "fdr_autogen.c"
#define FAKE_HISTORY_SIZE 16
static const u8 fake_history[FAKE_HISTORY_SIZE];
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, size_t start,
HWLMCallback cb, void *ctxt, hwlm_group_t groups) {
const struct FDR_Runtime_Args a = {
buf,
len,
fake_history,
0,
fake_history, // nocase
0,
start,
cb,
ctxt,
&groups,
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
0
};
if (unlikely(a.start_offset >= a.len)) {
return HWLM_SUCCESS;
} else {
assert(funcs[fdr->engineID]);
return funcs[fdr->engineID](fdr, &a);
}
}
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
hwlm_group_t groups, u8 * stream_state) {
struct FDR_Runtime_Args a = {
buf,
len,
hbuf,
hlen,
hbuf, // nocase - start same as caseful, override later if needed
hlen, // nocase
start,
cb,
ctxt,
&groups,
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
hbuf ? CONF_LOADVAL_CALL_CAUTIOUS(hbuf + hlen - 8, hbuf, hbuf + hlen)
: (u64a)0
};
fdrUnpackState(fdr, &a, stream_state);
hwlm_error_t ret;
if (unlikely(a.start_offset >= a.len)) {
ret = HWLM_SUCCESS;
} else {
assert(funcs[fdr->engineID]);
ret = funcs[fdr->engineID](fdr, &a);
}
fdrPackState(fdr, &a, stream_state);
return ret;
}

91
src/fdr/fdr.h Normal file
View File

@ -0,0 +1,91 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief FDR literal matcher: runtime API.
*/
#ifndef FDR_H
#define FDR_H
#include "ue2common.h"
#include "hwlm/hwlm.h"
// C linkage in the API
#ifdef __cplusplus
extern "C" {
#endif
struct FDR;
/** \brief Returns size in bytes of the given FDR engine. */
size_t fdrSize(const struct FDR *fdr);
/** \brief Returns non-zero if the contents of the stream state indicate that
* there is active FDR history beyond the regularly used history. */
u32 fdrStreamStateActive(const struct FDR *fdr, const u8 *stream_state);
/**
* \brief Block-mode scan.
*
* \param fdr FDR matcher engine.
* \param buf Buffer to scan.
* \param len Length of buffer to scan.
* \param start First offset in buf at which a match may end.
* \param cb Callback to call when a match is found.
* \param ctxt Caller-provided context pointer supplied to callback on match.
* \param groups Initial groups mask.
*/
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
hwlm_group_t groups);
/**
* \brief Streaming-mode scan.
*
* \param fdr FDR matcher engine.
* \param hbuf History buffer.
* \param hlen Length of history buffer (hbuf).
* \param buf Buffer to scan.
* \param len Length of buffer to scan (buf).
* \param start First offset in buf at which a match may end.
* \param cb Callback to call when a match is found.
* \param ctxt Caller-provided context pointer supplied to callback on match.
* \param groups Initial groups mask.
* \param stream_state Persistent stream state for use by FDR.
*/
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
hwlm_group_t groups, u8 *stream_state);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // FDR_H

574
src/fdr/fdr_autogen.py Executable file
View File

@ -0,0 +1,574 @@
#!/usr/bin/python
# Copyright (c) 2015, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Intel Corporation nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
from autogen_utils import *
from base_autogen import *
from string import Template
class OrStep(Step):
def __init__(self, context, offset, width):
Step.__init__(self, context, offset)
s_var = self.gv("st%d" % offset)
if width < 128:
self.val = "s |= %s;" % s_var.name
else:
self.val = "s = or%d(s, %s);" % (width, s_var.name)
class ShiftStateStep(Step):
def __init__(self, context, offset = 0, stride_used = 1):
Step.__init__(self, context, offset)
m = self.matcher
state = m.state_variable
shift_distance = -1 * stride_used * m.num_buckets
self.val = "%s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
class BulkLoadStep(Step):
def __init__(self, context, offset, size, define_var = True, aligned = True):
Step.__init__(self, context, offset)
m = self.matcher
self.latency = 4
blt = m.bulk_load_type
if aligned:
init_string = blt.load_expr_data(self.offset, code = "aligned")
else:
init_string = blt.load_expr_data(self.offset)
var_name = "current_data_%d" % offset
if define_var:
lb_var = self.nv(blt, var_name)
self.val = lb_var.gen_initializer_stmt(init_string)
else:
lb_var = self.gv(var_name, reader = False, writer = True)
self.val = "%s = %s;" % (var_name, init_string)
class ValueExtractStep(Step):
def __init__(self, context, offset, sub_load_cautious = False):
Step.__init__(self, context, offset)
m = self.matcher
self.latency = 2
dsb = m.datasize_bytes
modval = offset % dsb
if m.domain > 8 and modval == dsb - 1:
# Case 1: reading more than one byte over the end of the bulk load
self.latency = 4
if sub_load_cautious:
code_string = "cautious_forward"
else:
code_string = "normal"
load_string = m.single_load_type.load_expr_data(self.offset, code_string)
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
else:
# Case 2: reading a value that can be found entirely in the current register
if m.fdr2_force_naive_load:
load_string = m.single_load_type.load_expr_data(self.offset, "normal")
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
else:
lb_var = self.gv("current_data_%d" % (offset - modval))
if modval == 0:
# Case 2a: value is at LSB end of the register and must be left-
# shifted into place if there is a "reach_shift_adjust" required
temp_string = "(%s << %d)" % (lb_var.name, m.reach_shift_adjust)
else:
# Case 2b: value is in the middle of the register and will be
# right-shifted into place (adjusted by "reach_shift_adjust")
temp_string = "(%s >> %d)" % (lb_var.name, modval*8 - m.reach_shift_adjust)
init_string = "(%s) & 0x%x" % (temp_string, m.reach_mask)
v_var = self.nv(m.value_extract_type, "v%d" % offset)
self.val = v_var.gen_initializer_stmt(init_string)
class TableLookupStep(Step):
def __init__(self, context, reach_multiplier, offset = 0):
Step.__init__(self, context, offset)
m = self.matcher
self.latency = 4
v_var = self.gv("v%d" % offset)
s_var = self.nv(m.state_type, "st%d" % offset)
init_string = "*(const %s *)(ft + %s*%dU)" % ( m.state_type.get_name(),
v_var.name, reach_multiplier)
self.val = s_var.gen_initializer_stmt(init_string)
class ShiftReachMaskStep(Step):
def __init__(self, context, offset):
Step.__init__(self, context, offset)
m = self.matcher
extr = m.extract_frequency
modval = offset % extr
s_var = self.gv("st%d" % offset, writer = True)
self.val = "%s = %s;" % (s_var.name, s_var.type.shift_expr(s_var.name, modval * m.num_buckets))
class ConfExtractStep(Step):
def __init__(self, context, offset):
Step.__init__(self, context, offset)
m = self.matcher
if m.state_type.isSIMDOnIntel():
self.latency = 2
init_string = m.state_type.lowbit_extract_expr("s", m.extract_size)
extr_var = self.nv(m.extr_type, "extr%d" % offset)
self.val = extr_var.gen_initializer_stmt(init_string)
class ConfAccumulateStep(Step):
def __init__(self, context, extract_offset, conf_offset, define_var = True):
Step.__init__(self, context, extract_offset)
m = self.matcher
extr_var = self.gv("extr%d" % extract_offset)
extr_var_cast = "((%s)%s)" % (m.conf_type.get_name(), extr_var.name)
if extract_offset == conf_offset:
# create conf_var as a straight copy of extr
if define_var:
conf_var = self.nv(m.conf_type, "conf%d" % conf_offset)
self.val = conf_var.gen_initializer_stmt(extr_var_cast)
else:
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
self.val = "%s = %s;" % (conf_var.name, extr_var_cast)
else:
# shift extr_var and insert/OR it in conf_var
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
shift_dist = (extract_offset - conf_offset) * m.num_buckets
self.val = "%s |= %s;" % (conf_var.name, m.conf_type.shift_expr(extr_var_cast, shift_dist))
self.latency = 2
class ConfirmFlipStep(Step):
def __init__(self, context, offset):
Step.__init__(self, context, offset)
m = self.matcher
conf_var = self.gv("conf%d" % self.offset, writer = True)
self.val = "%s = %s;" % (conf_var.name,
conf_var.type.flip_lowbits_expr(conf_var.name, self.matcher.confirm_frequency * m.num_buckets))
class ConfirmStep(Step):
def __init__(self, context, offset, cautious = False):
Step.__init__(self, context, offset)
m = self.matcher
conf_var = self.gv("conf%d" % offset, writer = True)
self.val = m.produce_confirm_base(conf_var.name, conf_var.type.size, offset, cautious,
enable_confirmless = m.stride == 1, do_bailout = False)
class M3(MatcherBase):
def get_hash_safety_parameters(self):
h_size = self.single_load_type.size_in_bytes()
return (0, h_size - 1)
def produce_compile_call(self):
print " { %d, %d, %d, %d, %d, %s, %d, %d }," % (
self.id, self.state_width, self.num_buckets,
self.stride, self.domain,
self.arch.target, self.conf_pull_back, self.conf_top_level_split)
def produce_main_loop(self, switch_variant = False):
stride_offsets = xrange(0, self.loop_bytes, self.stride)
stride_offsetSet = set(stride_offsets)
so_steps_last_block = []
sh = None
last_confirm = None
ctxt = CodeGenContext(self)
if switch_variant:
print " ptr -= (iterBytes - dist);"
print " { " # need an extra scope around switch variant to stop its globals escaping
else:
print " if (doMainLoop) {"
print " for (; ptr + LOOP_READ_AHEAD < buf + len; ptr += iterBytes) {"
print self.produce_flood_check()
print " __builtin_prefetch(ptr + (iterBytes*4));"
print " assert(((size_t)ptr % START_MOD) == 0);"
# just do globally for now
if switch_variant:
subsidiary_load_cautious = True
confirm_cautious = True
else:
subsidiary_load_cautious = False
confirm_cautious = False
if not self.fdr2_force_naive_load:
bulk_load_steps = [ off for off in range(self.loop_bytes)
if off % self.datasize_bytes == 0 and
(set(range(off, off + self.datasize_bytes - 1)) & stride_offsetSet)]
else:
bulk_load_steps = []
confirm_steps = [ off for off in range(self.loop_bytes) if off % self.confirm_frequency == 0 ]
for off in bulk_load_steps:
lb_var = ctxt.new_var(None, self.bulk_load_type, "current_data_%d" % off)
print " " + lb_var.gen_initializer_stmt()
for off in confirm_steps:
var_name = "conf%d" % off
conf_def_var = ctxt.new_var(None, self.conf_type, var_name)
if switch_variant:
init_string = "(%s)-1" % self.conf_type.get_name()
else:
init_string = ""
print " " + conf_def_var.gen_initializer_stmt(init_string)
if switch_variant:
print " switch(iterBytes - dist) {"
for i in range(0, self.loop_bytes):
print " case %d:" % i
# init and poison conf; over-precise but harmless
conf_id = (i / self.confirm_frequency) * self.confirm_frequency
if i % self.confirm_frequency:
conf_fixup_bits = self.conf_type.size - (self.num_buckets * (i % self.confirm_frequency))
print " conf%d >>= %d;" % (conf_id, conf_fixup_bits)
else:
print " conf%d = 0;" % conf_id
# init state
state_fixup = i % self.extract_frequency
state = self.state_variable
shift_distance = self.num_buckets * state_fixup
if state_fixup:
print " %s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
if self.state_width < 128:
print " %s |= %s;" % (state.name, state.type.lowbit_mask(shift_distance))
else:
print " %s = or%d(%s, %s);" % (state.name, self.state_width, state.name, state.type.lowbit_mask(shift_distance))
if not self.fdr2_force_naive_load:
# init current_data (could poison it in some cases)
load_mod = i % self.datasize_bytes
load_offset = i - load_mod
if load_mod:
# not coming in on an even boundary means having to do a load var
# actually, there are a bunch of things we can do on this bulk load
# to avoid having to be 'cautious_backwards' but I'm not completely
# sure they are good ideas
init_string = self.bulk_load_type.load_expr_data(load_offset,
code = "cautious_backward")
var_name = "current_data_%d" % load_offset
lb_var = ctxt.get_var(None, var_name, reader = False, writer = True)
print " %s = %s;" % (lb_var.name, init_string)
print " goto off%d;" % i
print " case %d: goto skipSwitch;" % self.loop_bytes
print " }"
print " {"
for off in range(self.loop_bytes):
# X_mod is the offset we're up to relative to the last X operation
# X_offset is which of the last X operations matches this iteration
if (switch_variant):
LabelStep(ctxt, off)
if off in bulk_load_steps:
if not self.fdr2_force_naive_load:
BulkLoadStep(ctxt, off, self.datasize, define_var = False, aligned = not switch_variant)
if off in stride_offsets:
if switch_variant:
OpenScopeStep(ctxt, off)
ValueExtractStep(ctxt, off, sub_load_cautious = subsidiary_load_cautious)
TableLookupStep(ctxt, self.reach_mult, off)
if off % self.extract_frequency:
ShiftReachMaskStep(ctxt, off)
so = OrStep(ctxt, off, self.state_width)
if switch_variant:
CloseScopeStep(ctxt, off)
if sh != None:
so.add_dependency(sh)
so_steps_last_block += [ so ]
extract_mod = off % self.extract_frequency
extract_offset = off - extract_mod
extract_ready = extract_mod == self.extract_frequency - 1
if extract_ready:
if switch_variant:
OpenScopeStep(ctxt, off)
ex = ConfExtractStep(ctxt, extract_offset)
ConfAccumulateStep(ctxt, extract_offset, confirm_offset, define_var = False)
for so_step in so_steps_last_block:
ex.add_dependency(so_step)
if switch_variant:
CloseScopeStep(ctxt, off)
so_steps_last_block = []
sh = ShiftStateStep(ctxt, extract_offset, stride_used = self.extract_frequency)
sh.add_dependency(ex)
confirm_mod = off % self.confirm_frequency
confirm_offset = off - confirm_mod
confirm_ready = confirm_mod == self.confirm_frequency - 1
if confirm_ready:
cflip = ConfirmFlipStep(ctxt, confirm_offset)
cf = ConfirmStep(ctxt, confirm_offset, cautious = confirm_cautious )
if last_confirm:
cf.add_dependency(last_confirm)
last_confirm = cf
if not switch_variant:
print ctxt.schedule([ last_confirm, sh ])
else:
print ctxt.dontschedule([ last_confirm, sh ])
if switch_variant:
print "skipSwitch:;"
print " ptr += iterBytes;"
print " }" # close extra scope around switch variant
print " }"
def produce_init_state(self):
state = self.state_variable
s_type = self.state_type
shift_distance = -1 * self.num_buckets
shift_expr = "%s = %s" % (state.name, state.type.shift_expr(state.name, shift_distance))
s = Template("""
$TYPENAME s;
if (a->len_history) {
u32 tmp = getPreStartVal(a, $DOMAIN);
s = *((const $TYPENAME *)ft + tmp);
$SHIFT_EXPR;
} else {
s = *(const $TYPENAME *)&fdr->start;
}
""").substitute(TYPENAME = s_type.get_name(),
ZERO_EXPR = s_type.zero_expression(),
DOMAIN = self.domain,
SHIFT_EXPR = shift_expr)
return s
def produce_code(self):
(behind, ahead) = self.get_hash_safety_parameters()
loop_read_behind = behind
loop_read_ahead = self.loop_bytes + ahead
# we set up mask and shift stuff for extracting our masks from registers
#
# we have a choice as to whether to mask out the value early or
# extract the value (shift first) then mask it
#
# Intel has a free scaling factor from 1/2/4/8 so we want to combine
# the extra needed shift for SSE registers with the mask operation
ssb = self.state_type.size / 8 # state size in bytes
# Intel path
if ssb == 16 and self.domain == 16:
# obscure corner - we don't have the room in the register to
# do this for all values so we don't. domain==16 is pretty
# bad anyhow, of course
self.reach_mult = 8
else:
self.reach_mult = ssb
shift_amts = { 1 : 0, 2 : 1, 4 : 2, 8 : 3, 16: 4 }
self.reach_shift_adjust = shift_amts[ ssb/self.reach_mult ]
self.reach_mask = ((1 << self.domain) - 1) << self.reach_shift_adjust
print self.produce_header(visible = False)
print "// ",
print " Arch: " + self.arch.name,
print " State type: " + self.state_type.get_name(),
print " Num buckets: %d" % self.num_buckets,
print " Domain: %d" % self.domain,
print " Stride: %d" % self.stride
print self.produce_common_declarations()
print
print "\tconst size_t tabSize = %d;" % self.table_size
print """
const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));
const u32 * confBase = (const u32 *)(ft + tabSize);
"""
print self.produce_init_state()
print "\tconst size_t iterBytes = %d;" % self.loop_bytes
print "\tconst size_t START_MOD = %d;" % self.datasize_bytes
print "\tconst size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead
print """
while (ptr < buf + len) {
u8 doMainLoop = 1;
size_t remaining = len - (ptr - buf);
size_t dist;
if (remaining <= iterBytes) {
dist = remaining; // once through the switch and we're done
} else if (remaining < 2 * iterBytes) {
// nibble some stuff off the front, skip the main loop,
// then come back here
dist = iterBytes; // maybe could be cleverer
} else {
// now, we need to see if we can make it to a main loop iteration
// if so, we need to ensure that the main loop iteration is aligned
// to a START_MOD boundary and i >= 8 so we can read ptr + i - 8
// see if we can do it - if not, just switch the main loop off,
// eat iterBytes in cautious mode, and come back to this loop
const u8 * target = MAX(buf + 8, ptr);
target = ROUNDUP_PTR(target, START_MOD);
dist = target - ptr;
if (dist > iterBytes) {
doMainLoop = 0;
dist = iterBytes;
}
}
"""
self.produce_main_loop(switch_variant = True)
self.produce_main_loop(switch_variant = False)
print """
}
"""
print self.produce_footer()
def get_name(self):
return "fdr_exec_%s_d%d_s%d_w%d" % (self.arch.name, self.domain, self.stride, self.state_width)
def __init__(self, state_width, domain, stride,
arch,
table_state_width = None,
num_buckets = 8,
extract_frequency = None,
confirm_frequency = None):
# First - set up the values that are fundamental to how this matcher will operate
self.arch = arch
# get the width of the state width on which we operate internally
if state_width not in [ 128 ]:
fail_out("Unknown state width: %d" % state_width)
self.state_width = state_width
self.state_type = getRequiredType(self.state_width)
self.state_variable = IntegerVariable("s", self.state_type)
table_state_width = state_width
self.table_state_width = state_width
self.table_state_type = getRequiredType(self.table_state_width)
# domain is the number of bits that we draw from our input to
# index our 'reach' table
if not 8 <= domain <= 16:
fail_out("Unsupported domain: %d" % domain)
self.domain = domain
# this is the load type required for this domain if we want to
# load it one at a time
self.single_load_type = getRequiredType(self.domain)
# table size
self.table_size = 2**domain * table_state_width // 8
# stride is the frequency with which we make data-driven
# accesses to our reach table
if stride not in [ 1, 2, 4, 8]:
fail_out("Unsupported stride: %d" % stride)
if stride * num_buckets > state_width:
fail_out("Stride %d is too big for the number of buckets %d given state width %d\n" % (stride, num_buckets, state_width))
self.stride = stride
if num_buckets != 8:
fail_out("Unsupported number of buckets: %d" % num_buckets)
if state_width % num_buckets and state_width == 128:
fail_out("Bucket scheme requires bit-shifts on m128 (failing)")
self.num_buckets = num_buckets
# Second - set up derived or optimization values - these can be
# overridden by arguments that are passed in
self.datasize = 64
self.bulk_load_type = IntegerType(self.datasize)
self.datasize_bytes = self.datasize/8
self.value_extract_type = IntegerType(self.datasize)
self.fdr2_force_naive_load = False # disable everywhere for trunk
# extract frequency is how frequently (in bytes) we destructively shift
# our state value after having pulled out that many bytes into a
# confirm register (of one sort or another).
# none means a default value - datasize, our biggest easily available GPR
if extract_frequency is None:
extract_frequency = self.datasize_bytes
self.extract_frequency = extract_frequency
self.extract_size = self.extract_frequency*self.num_buckets
if extract_frequency < stride:
fail_out("Can't extract at extract frequency %d with stride %d" % (extract_frequency, stride))
if extract_frequency not in [ None, 1, 2, 4, 8, 16]:
fail_out("Weird extract frequency: %d" % extract_frequency)
if self.extract_size <= 32:
self.extr_type = IntegerType(32)
elif self.extract_size <= 64:
self.extr_type = IntegerType(64)
else:
fail_out("Implausible size %d required for confirm extract step" % size)
# extract_frequency is how often we pull out our state and place
# it somewhere in a lossless fashion
# confirm_frequency, on the other hand, is how frequently we
# take the state extracted by extract_frequency and cobble it
# together into a matching loop
# confirm_frequency must be a multiple of extract_frequency
# and must fit into a fast register; for now; we're going to
# stay in the GPR domain
if confirm_frequency is None:
confirm_frequency = self.extract_frequency
self.confirm_frequency = confirm_frequency
if confirm_frequency % self.extract_frequency:
fail_out("Confirm frequency %d must be evenly divisible by extract_frequency %d" % (confirm_frequency, self.extract_frequency))
self.conf_size = self.confirm_frequency * self.num_buckets
if self.conf_size <= 32:
self.conf_type = IntegerType(32)
elif self.conf_size <= 64:
self.conf_type = IntegerType(64)
else:
fail_out("Implausible size %d required for confirm accumulate step" % self.conf_size)
# how many bytes in flight at once
self.loop_bytes = 16
# confirm configuration
# how many entries in the top-level confirm table - 256 means
# complete split on the last character
self.conf_top_level_split = 256
# how much we 'pull back' in confirm - this is obviously related
# to the first level conf but we will keep two separate paramters
# for this to avoid the risk of conflating these
self.conf_pull_back = 1
if self.conf_pull_back > 0 and self.conf_top_level_split < 256:
fail_out("Pull back distance %d not supported by top level split %d" % (self.conf_pull_back, self.conf_top_level_split))
# minor stuff
self.default_body_indent = 8

562
src/fdr/fdr_compile.cpp Normal file
View File

@ -0,0 +1,562 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief FDR literal matcher: build API.
*/
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_compile.h"
#include "fdr_confirm.h"
#include "fdr_compile_internal.h"
#include "fdr_engine_description.h"
#include "teddy_compile.h"
#include "teddy_engine_description.h"
#include "grey.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/compare.h"
#include "util/dump_mask.h"
#include "util/target_info.h"
#include "util/ue2string.h"
#include "util/verify_types.h"
#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include <boost/core/noncopyable.hpp>
using namespace std;
namespace ue2 {
namespace {
class FDRCompiler : boost::noncopyable {
private:
const FDREngineDescription &eng;
vector<u8> tab;
const vector<hwlmLiteral> &lits;
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
bool make_small;
u8 *tabIndexToMask(u32 indexInTable);
void assignStringToBucket(LiteralIndex l, BucketIndex b);
void assignStringsToBuckets();
#ifdef DEBUG
void dumpMasks(const u8 *defaultMask);
#endif
void setupTab();
aligned_unique_ptr<FDR> setupFDR(pair<u8 *, size_t> link);
void createInitialState(FDR *fdr);
public:
FDRCompiler(const vector<hwlmLiteral> &lits_in,
const FDREngineDescription &eng_in, bool make_small_in)
: eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in),
make_small(make_small_in) {}
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
};
u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
assert(indexInTable < tab.size());
return &tab[0] + (indexInTable * (eng.getSchemeWidth() / 8));
}
static
void setbit(u8 *msk, u32 bit) {
msk[bit / 8] |= 1U << (bit % 8);
}
static
void clearbit(u8 *msk, u32 bit) {
msk[bit / 8] &= ~(1U << (bit % 8));
}
static
void andMask(u8 *dest, const u8 *a, const u8 *b, u32 num_bytes) {
for (u32 i = 0; i < num_bytes; i++) {
dest[i] = a[i] & b[i];
}
}
void FDRCompiler::createInitialState(FDR *fdr) {
u8 *start = (u8 *)&fdr->start;
/* initial state should to be 1 in each slot in the bucket up to bucket
* minlen - 1, and 0 thereafter */
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
// Find the minimum length for the literals in this bucket.
const vector<LiteralIndex> &bucket_lits = bucketToLits[b];
u32 min_len = ~0U;
for (vector<LiteralIndex>::const_iterator it = bucket_lits.begin(),
ite = bucket_lits.end();
it != ite; ++it) {
min_len = min(min_len, verify_u32(lits[*it].s.length()));
}
DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
assert(min_len);
for (PositionInBucket i = 0; i < eng.getBucketWidth(b); i++) {
if (i < min_len - 1) {
setbit(start, eng.getSchemeBit(b, i));
}
}
}
}
aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
size_t tabSize = eng.getTabSizeBytes();
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
pair<u8 *, size_t> confirmTmp =
setupFullMultiConfs(lits, eng, bucketToLits, make_small);
assert(ISALIGNED_16(tabSize));
assert(ISALIGNED_16(confirmTmp.second));
assert(ISALIGNED_16(floodControlTmp.second));
assert(ISALIGNED_16(link.second));
size_t headerSize = ROUNDUP_16(sizeof(FDR));
size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.second +
floodControlTmp.second + link.second);
DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
"total=%zu\n",
headerSize, tabSize, confirmTmp.second, floodControlTmp.second,
size);
aligned_unique_ptr<FDR> fdr = aligned_zmalloc_unique<FDR>(size);
assert(fdr); // otherwise would have thrown std::bad_alloc
fdr->size = size;
fdr->engineID = eng.getID();
fdr->maxStringLen = verify_u32(maxLen(lits));
createInitialState(fdr.get());
u8 *fdr_base = (u8 *)fdr.get();
u8 * ptr = fdr_base + ROUNDUP_16(sizeof(FDR));
copy(tab.begin(), tab.end(), ptr);
ptr += tabSize;
memcpy(ptr, confirmTmp.first, confirmTmp.second);
ptr += confirmTmp.second;
aligned_free(confirmTmp.first);
fdr->floodOffset = verify_u32(ptr - fdr_base);
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
ptr += floodControlTmp.second;
aligned_free(floodControlTmp.first);
if (link.first) {
fdr->link = verify_u32(ptr - fdr_base);
memcpy(ptr, link.first, link.second);
aligned_free(link.first);
} else {
fdr->link = 0;
}
return fdr;
}
void FDRCompiler::assignStringToBucket(LiteralIndex l, BucketIndex b) {
bucketToLits[b].push_back(l);
}
struct LitOrder {
explicit LitOrder(const vector<hwlmLiteral> &vl_) : vl(vl_) {}
bool operator()(const u32 &i1, const u32 &i2) const {
const string &i1s = vl[i1].s;
const string &i2s = vl[i2].s;
size_t len1 = i1s.size(), len2 = i2s.size();
if (len1 != len2) {
return len1 < len2;
} else {
string::const_reverse_iterator it1, it2;
tie(it1, it2) =
std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
if (it1 == i1s.rend()) {
return false;
}
return *it1 < *it2;
}
}
private:
const vector<hwlmLiteral> &vl;
};
static u64a getScoreUtil(u32 len, u32 count) {
if (len == 0) {
return (u64a)-1;
}
const u32 LEN_THRESH = 128;
const u32 elen = (len > LEN_THRESH) ? LEN_THRESH : len;
const u64a lenScore =
(LEN_THRESH * LEN_THRESH * LEN_THRESH) / (elen * elen * elen);
return count * lenScore; // deemphasize count - possibly more than needed
// this might be overkill in the other direction
}
//#define DEBUG_ASSIGNMENT
void FDRCompiler::assignStringsToBuckets() {
typedef u64a SCORE; // 'Score' type
const SCORE MAX_SCORE = (SCORE)-1;
const u32 CHUNK_MAX = 512;
const u32 BUCKET_MAX = 16;
typedef pair<SCORE, u32> SCORE_INDEX_PAIR;
u32 ls = verify_u32(lits.size());
// make a vector that contains our literals as pointers or u32 LiteralIndex values
vector<LiteralIndex> vli;
vli.resize(ls);
map<u32, u32> lenCounts;
for (LiteralIndex l = 0; l < ls; l++) {
vli[l] = l;
lenCounts[lits[l].s.size()]++;
}
// sort vector by literal length + if tied on length, 'magic' criteria of some kind (tbd)
stable_sort(vli.begin(), vli.end(), LitOrder(lits));
#ifdef DEBUG_ASSIGNMENT
for (map<u32, u32>::iterator i = lenCounts.begin(), e = lenCounts.end();
i != e; ++i) {
printf("l<%d>:%d ", i->first, i->second);
}
printf("\n");
#endif
// TODO: detailed early stage literal analysis for v. small cases (actually look at lits)
// yes - after we factor this out and merge in the Teddy style of building we can look
// at this, although the teddy merge modelling is quite different. It's still probably
// adaptable to some extent for this class of problem
u32 firstIds[CHUNK_MAX]; // how many are in this chunk (CHUNK_MAX - 1 contains 'last' bound)
u32 count[CHUNK_MAX]; // how many are in this chunk
u32 length[CHUNK_MAX]; // how long things in the chunk are
const u32 MAX_CONSIDERED_LENGTH = 16;
u32 currentChunk = 0;
u32 currentSize = 0;
u32 chunkStartID = 0;
u32 maxPerChunk = ls/(CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + 1;
for (u32 i = 0; i < ls && currentChunk < CHUNK_MAX - 1; i++) {
LiteralIndex l = vli[i];
if ((currentSize < MAX_CONSIDERED_LENGTH && (lits[l].s.size() != currentSize)) ||
(currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) {
currentSize = lits[l].s.size();
if (currentChunk) {
count[currentChunk - 1 ] = i - chunkStartID;
}
chunkStartID = firstIds[currentChunk] = i;
length[currentChunk] = currentSize;
currentChunk++;
}
}
count[currentChunk - 1] = ls - chunkStartID;
// close off chunks with an empty row
firstIds[currentChunk] = ls;
length[currentChunk] = 0;
count[currentChunk] = 0;
u32 nChunks = currentChunk + 1;
#ifdef DEBUG_ASSIGNMENT
for (u32 j = 0; j < nChunks; j++) {
printf("%d %d %d %d\n", j, firstIds[j], count[j], length[j]);
}
#endif
SCORE_INDEX_PAIR t[CHUNK_MAX][BUCKET_MAX]; // pair of score, index
u32 nb = eng.getNumBuckets();
for (u32 j = 0; j < nChunks; j++) {
u32 cnt = 0;
for (u32 k = j; k < nChunks; ++k) {
cnt += count[k];
}
t[j][0] = make_pair(getScoreUtil(length[j], cnt), 0);
}
for (u32 i = 1; i < nb; i++) {
for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row
SCORE_INDEX_PAIR best = make_pair(MAX_SCORE, 0);
u32 cnt = count[j];
for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) {
SCORE score = getScoreUtil(length[j], cnt);
if (score > best.first) {
break; // if we're now worse locally than our best score, give up
}
score += t[k][i-1].first;
if (score < best.first) {
best = make_pair(score, k);
}
}
t[j][i] = best;
}
t[nChunks - 1][i] = make_pair(0,0); // fill in empty final row for next iteration
}
#ifdef DEBUG_ASSIGNMENT
for (u32 j = 0; j < nChunks; j++) {
for (u32 i = 0; i < nb; i++) {
SCORE_INDEX_PAIR v = t[j][i];
printf("<%7lld,%3d>", v.first, v.second);
}
printf("\n");
}
#endif
// our best score is in best[0][N_BUCKETS-1] and we can follow the links
// to find where our buckets should start and what goes into them
for (u32 i = 0, n = nb; n && (i != nChunks - 1); n--) {
u32 j = t[i][n - 1].second;
if (j == 0) {
j = nChunks - 1;
}
// put chunks between i - j into bucket (NBUCKETS-1) - n
#ifdef DEBUG_ASSIGNMENT
printf("placing from %d to %d in bucket %d\n", firstIds[i], firstIds[j],
nb - n);
#endif
for (u32 k = firstIds[i]; k < firstIds[j]; k++) {
assignStringToBucket((LiteralIndex)vli[k], nb - n);
}
i = j;
}
}
#ifdef DEBUG
void FDRCompiler::dumpMasks(const u8 *defaultMask) {
const size_t width = eng.getSchemeWidth();
printf("default mask: %s\n", dumpMask(defaultMask, width).c_str());
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
u8 *m = tabIndexToMask(i);
if (memcmp(m, defaultMask, width / 8)) {
printf("tab %04x: %s\n", i, dumpMask(m, width).c_str());
}
}
}
#endif
static
bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
const vector<LiteralIndex> &vl,
const vector<hwlmLiteral> &lits,
SuffixPositionInString pos,
std::map<u32, ue2::unordered_set<u32> > &m2) {
u32 distance = 0;
if (eng.bits <= 8) {
distance = 1;
} else if (eng.bits <= 16) {
distance = 2;
} else if (eng.bits <= 32) {
distance = 4;
}
for (vector<LiteralIndex>::const_iterator i = vl.begin(), e = vl.end();
i != e; ++i) {
if (e - i > 5) {
__builtin_prefetch(&lits[*(i + 5)]);
}
const hwlmLiteral &lit = lits[*i];
const size_t sz = lit.s.size();
u32 mask = 0;
u32 dontCares = 0;
for (u32 cnt = 0; cnt < distance; cnt++) {
int newPos = pos - cnt;
u8 dontCareByte = 0x0;
u8 maskByte = 0x0;
if (newPos < 0 || ((u32)newPos >= sz)) {
dontCareByte = 0xff;
} else {
u8 c = lit.s[sz - newPos - 1];
maskByte = c;
u32 remainder = eng.bits - cnt * 8;
assert(remainder != 0);
if (remainder < 8) {
u8 cmask = (1U << remainder) - 1;
maskByte &= cmask;
dontCareByte |= ~cmask;
}
if (lit.nocase && ourisalpha(c)) {
maskByte &= 0xdf;
dontCareByte |= 0x20;
}
}
u32 loc = cnt * 8;
mask |= maskByte << loc;
dontCares |= dontCareByte << loc;
}
// truncate m and dc down to nBits
mask &= (1U << eng.bits) - 1;
dontCares &= (1U << eng.bits) - 1;
if (dontCares == ((1U << eng.bits) - 1)) {
return true;
}
m2[dontCares].insert(mask);
}
return false;
}
void FDRCompiler::setupTab() {
const size_t mask_size = eng.getSchemeWidth() / 8;
assert(mask_size);
vector<u8> defaultMask(mask_size, 0xff);
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
memcpy(tabIndexToMask(i), &defaultMask[0], mask_size);
}
typedef std::map<u32, ue2::unordered_set<u32> > M2SET;
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
const vector<LiteralIndex> &vl = bucketToLits[b];
SuffixPositionInString pLimit = eng.getBucketWidth(b);
for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
u32 bit = eng.getSchemeBit(b, pos);
M2SET m2;
bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
if (done) {
clearbit(&defaultMask[0], bit);
continue;
}
for (M2SET::const_iterator i = m2.begin(), e = m2.end(); i != e;
++i) {
u32 dc = i->first;
const ue2::unordered_set<u32> &mskSet = i->second;
u32 v = ~dc;
do {
u32 b2 = v & dc;
for (ue2::unordered_set<u32>::const_iterator
i2 = mskSet.begin(),
e2 = mskSet.end();
i2 != e2; ++i2) {
u32 val = (*i2 & ~dc) | b2;
clearbit(tabIndexToMask(val), bit);
}
v = (v + (dc & -dc)) | ~dc;
} while (v != ~dc);
}
}
}
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
u8 *m = tabIndexToMask(i);
andMask(m, m, &defaultMask[0], mask_size);
}
#ifdef DEBUG
dumpMasks(&defaultMask[0]);
#endif
}
aligned_unique_ptr<FDR> FDRCompiler::build(pair<u8 *, size_t> link) {
assignStringsToBuckets();
setupTab();
return setupFDR(link);
}
} // namespace
static
aligned_unique_ptr<FDR>
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
const target_t &target, const Grey &grey, u32 hint,
hwlmStreamingControl *stream_control) {
pair<u8 *, size_t> link(nullptr, 0);
if (stream_control) {
link = fdrBuildTableStreaming(lits, stream_control);
}
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
if (grey.fdrAllowTeddy) {
aligned_unique_ptr<FDR> fdr
= teddyBuildTableHinted(lits, make_small, hint, target, link);
if (fdr) {
DEBUG_PRINTF("build with teddy succeeded\n");
return fdr;
} else {
DEBUG_PRINTF("build with teddy failed, will try with FDR\n");
}
}
const unique_ptr<FDREngineDescription> des =
(hint == HINT_INVALID) ? chooseEngine(target, lits, make_small)
: getFdrDescription(hint);
if (!des) {
return nullptr;
}
FDRCompiler fc(lits, *des, make_small);
return fc.build(link);
}
aligned_unique_ptr<FDR> fdrBuildTable(const vector<hwlmLiteral> &lits,
bool make_small, const target_t &target,
const Grey &grey,
hwlmStreamingControl *stream_control) {
return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID,
stream_control);
}
#if !defined(RELEASE_BUILD)
aligned_unique_ptr<FDR>
fdrBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small, u32 hint,
const target_t &target, const Grey &grey,
hwlmStreamingControl *stream_control) {
pair<u8 *, size_t> link(nullptr, 0);
return fdrBuildTableInternal(lits, make_small, target, grey, hint,
stream_control);
}
#endif
} // namespace ue2
// FIXME: should be compile-time only
size_t fdrSize(const FDR *fdr) {
assert(fdr);
return fdr->size;
}

66
src/fdr/fdr_compile.h Normal file
View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief FDR literal matcher: build API.
*/
#ifndef FDR_COMPILE_H
#define FDR_COMPILE_H
#include "ue2common.h"
#include "util/alloc.h"
#include <vector>
struct FDR;
namespace ue2 {
struct hwlmLiteral;
struct hwlmStreamingControl;
struct Grey;
struct target_t;
ue2::aligned_unique_ptr<FDR>
fdrBuildTable(const std::vector<hwlmLiteral> &lits, bool make_small,
const target_t &target, const Grey &grey,
hwlmStreamingControl *stream_control = nullptr);
#if !defined(RELEASE_BUILD)
ue2::aligned_unique_ptr<FDR>
fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
u32 hint, const target_t &target, const Grey &grey,
hwlmStreamingControl *stream_control = nullptr);
#endif
} // namespace ue2
#endif

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_COMPILE_INTERNAL_H
#define FDR_COMPILE_INTERNAL_H
#include "ue2common.h"
#include "hwlm/hwlm_literal.h"
#include <map>
#include <utility>
#include <vector>
struct FDRConfirm;
struct LitInfo;
namespace ue2 {
// a pile of decorative typedefs
// good for documentation purposes more than anything else
typedef u32 LiteralIndex;
typedef u32 ConfirmIndex;
typedef u32 SuffixPositionInString; // zero is last byte, counting back
// into the string
typedef u32 BucketIndex;
typedef u32 SchemeBitIndex;
typedef u32 PositionInBucket; // zero is 'we are matching right now!",
// counting towards future matches
class EngineDescription;
class FDREngineDescription;
struct hwlmStreamingControl;
size_t getFDRConfirm(const std::vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
bool make_small);
std::pair<u8 *, size_t> setupFullMultiConfs(
const std::vector<hwlmLiteral> &lits, const EngineDescription &eng,
std::map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits,
bool make_small);
// all suffixes include an implicit max_bucket_width suffix to ensure that
// we always read a full-scale flood "behind" us in terms of what's in our
// state; if we don't have a flood that's long enough we won't be in the
// right state yet to allow blindly advancing
std::pair<u8 *, size_t>
setupFDRFloodControl(const std::vector<hwlmLiteral> &lits,
const EngineDescription &eng);
std::pair<u8 *, size_t>
fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
hwlmStreamingControl *stream_control);
static constexpr u32 HINT_INVALID = 0xffffffff;
// fdr_compile_util.cpp utilities
size_t maxLen(const std::vector<hwlmLiteral> &lits);
size_t minLenCount(const std::vector<hwlmLiteral> &lits, size_t *count);
u32 absdiff(u32 i, u32 j);
} // namespace ue2
#endif

View File

@ -0,0 +1,65 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr_compile_internal.h"
#include "hwlm/hwlm_literal.h"
#include <algorithm>
#include <vector>
using namespace std;
namespace ue2 {
size_t maxLen(const vector<hwlmLiteral> &lits) {
size_t rv = 0;
for (const auto &lit : lits) {
rv = max(rv, lit.s.size());
}
return rv;
}
size_t minLenCount(const vector<hwlmLiteral> &lits, size_t *count) {
size_t rv = (size_t)-1;
*count = 0;
for (const auto &lit : lits) {
if (lit.s.size() < rv) {
rv = lit.s.size();
*count = 1;
} else if (lit.s.size() == rv) {
(*count)++;
}
}
return rv;
}
u32 absdiff(u32 i, u32 j) {
return (i > j) ? (i - j) : (j - i);
}
} // namespace ue2

100
src/fdr/fdr_confirm.h Normal file
View File

@ -0,0 +1,100 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_CONFIRM_H
#define FDR_CONFIRM_H
#include "ue2common.h"
#include "hwlm/hwlm.h"
static really_inline
u32 mul_hash_64(u64a lv, u64a andmsk, u64a mult, u32 nBits) {
return ((lv & andmsk) * mult) >> (sizeof(u64a)*8 - nBits);
}
// data structures
// TODO: fix this hard-coding
#define CONF_TYPE u64a
#define CONF_HASH_CALL mul_hash_64
typedef enum LitInfoFlags {
NoFlags = 0,
Caseless = 1,
NoRepeat = 2,
ComplexConfirm = 4
} LitInfoFlags;
/**
* \brief Structure describing a literal, linked to by FDRConfirm.
*
* This structure is followed in memory by a variable-sized string prefix at
* LitInfo::s, for strings that are longer than CONF_TYPE.
*/
struct LitInfo {
CONF_TYPE v;
CONF_TYPE msk;
hwlm_group_t groups;
u32 size;
u32 id; // literal ID as passed in
u8 flags; /* LitInfoFlags */
u8 next;
u8 extended_size;
u8 s[1]; // literal prefix, which continues "beyond" this struct.
};
#define FDRC_FLAG_NO_CONFIRM 1
/**
* \brief FDR confirm header.
*
* This structure is followed in memory by:
*
* -# lit index mapping (array of u32)
* -# list of LitInfo structures
*/
struct FDRConfirm {
CONF_TYPE andmsk;
CONF_TYPE mult;
u32 nBitsOrSoleID; // if flags is NO_CONFIRM then this is soleID
u32 flags; // sole meaning is 'non-zero means no-confirm' (that is all)
hwlm_group_t groups;
u32 soleLitSize;
u32 soleLitCmp;
u32 soleLitMsk;
};
static really_inline
const u32 *getConfirmLitIndex(const struct FDRConfirm *fdrc) {
const u8 *base = (const u8 *)fdrc;
const u32 *litIndex =
(const u32 *)(base + ROUNDUP_N(sizeof(*fdrc), alignof(u32)));
assert(ISALIGNED(litIndex));
return litIndex;
}
#endif // FDR_CONFIRM_H

View File

@ -0,0 +1,479 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_compile_internal.h"
#include "fdr_confirm.h"
#include "engine_description.h"
#include "teddy_engine_description.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/compare.h"
#include "util/verify_types.h"
#include <algorithm>
#include <cstring>
#include <set>
using namespace std;
namespace ue2 {
typedef u8 ConfSplitType;
typedef pair<BucketIndex, ConfSplitType> BucketSplitPair;
typedef map<BucketSplitPair, pair<FDRConfirm *, size_t> > BC2CONF;
// return the number of bytes beyond a length threshold in all strings in lits
static
size_t thresholdedSize(const vector<hwlmLiteral> &lits, size_t threshold) {
size_t tot = 0;
for (const auto &lit : lits) {
size_t sz = lit.s.size();
if (sz > threshold) {
tot += ROUNDUP_N(sz - threshold, 8);
}
}
return tot;
}
static
u64a make_u64a_mask(const vector<u8> &v) {
assert(v.size() <= sizeof(u64a));
if (v.size() > sizeof(u64a)) {
throw std::exception();
}
u64a mask = 0;
size_t vlen = v.size();
size_t len = std::min(vlen, sizeof(mask));
unsigned char *m = (unsigned char *)&mask;
memcpy(m + sizeof(mask) - len, &v[vlen - len], len);
return mask;
}
/**
* Build a temporary vector of LitInfo structures (without the corresponding
* pointers to the actual strings; these cannot be laid out yet). These
* stay in 1:1 correspondence with the lits[] vector as that's the only
* place we have to obtain our full strings.
*/
static
void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
CONF_TYPE &andmsk) {
const CONF_TYPE all_ones = ~(u64a)0;
andmsk = all_ones; // fill in with 'and' of all literal masks
for (LiteralIndex i = 0; i < lits.size(); i++) {
const hwlmLiteral &lit = lits[i];
LitInfo &info = tmpLitInfo[i];
memset(&info, 0, sizeof(info));
info.id = lit.id;
u8 flags = NoFlags;
if (lit.nocase) {
flags |= Caseless;
}
if (lit.noruns) {
flags |= NoRepeat;
}
if (lit.msk.size() > lit.s.size()) {
flags |= ComplexConfirm;
info.extended_size = verify_u8(lit.msk.size());
}
info.flags = flags;
info.size = verify_u32(lit.s.size());
info.groups = lit.groups;
// these are built up assuming a LE machine
CONF_TYPE msk = all_ones;
CONF_TYPE val = 0;
for (u32 j = 0; j < sizeof(CONF_TYPE); j++) {
u32 shiftLoc = (sizeof(CONF_TYPE) - j - 1) * 8;
if (j >= lit.s.size()) {
msk &= ~((CONF_TYPE)0xff << shiftLoc);
} else {
u8 c = lit.s[lit.s.size() - j - 1];
if (lit.nocase && ourisalpha(c)) {
msk &= ~((CONF_TYPE)CASE_BIT << shiftLoc);
val |= (CONF_TYPE)(c & CASE_CLEAR) << shiftLoc;
} else {
val |= (CONF_TYPE)c << shiftLoc;
}
}
}
info.v = val;
info.msk = msk;
if (!lit.msk.empty()) {
u64a l_msk = make_u64a_mask(lit.msk);
u64a l_cmp = make_u64a_mask(lit.cmp);
// test for consistency - if there's intersection, then v and msk
// values must line up
UNUSED u64a intersection = l_msk & info.msk;
assert((info.v & intersection) == (l_cmp & intersection));
// incorporate lit.msk, lit.cmp into v and msk
info.msk |= l_msk;
info.v |= l_cmp;
}
andmsk &= info.msk;
}
}
//#define FDR_CONFIRM_DUMP 1
static
size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
bool applyOneCharOpt, bool make_small, bool make_confirm) {
vector<LitInfo> tmpLitInfo(lits.size());
CONF_TYPE andmsk;
fillLitInfo(lits, tmpLitInfo, andmsk);
#ifdef FDR_CONFIRM_DUMP
printf("-------------------\n");
#endif
// just magic numbers and crude measures for now
u32 nBits;
if (make_small) {
nBits = min(10U, lg2(lits.size()) + 1);
} else {
nBits = min(13U, lg2(lits.size()) + 4);
}
CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL;
u32 flags = 0;
// we use next three variables for 'confirmless' case to speed-up
// confirmation process
u32 soleLitSize = 0;
u32 soleLitCmp = 0;
u32 soleLitMsk = 0;
if ((applyOneCharOpt && lits.size() == 1 && lits[0].s.size() == 0 &&
lits[0].msk.empty()) || make_confirm == false) {
flags = FDRC_FLAG_NO_CONFIRM;
if (lits[0].noruns) {
flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted
}
mult = 0;
soleLitSize = lits[0].s.size() - 1;
// we can get to this point only in confirmless case;
// it means that we have only one literal per FDRConfirm (no packing),
// with no literal mask and size of literal is less or equal
// to the number of masks of Teddy engine;
// maximum number of masks for Teddy is 4, so the size of
// literal is definitely less or equal to size of u32
assert(lits[0].s.size() <= sizeof(u32));
for (u32 i = 0; i < lits[0].s.size(); i++) {
u32 shiftLoc = (sizeof(u32) - i - 1) * 8;
u8 c = lits[0].s[lits[0].s.size() - i - 1];
if (lits[0].nocase && ourisalpha(c)) {
soleLitCmp |= (u32)(c & CASE_CLEAR) << shiftLoc;
soleLitMsk |= (u32)CASE_CLEAR << shiftLoc;
}
else {
soleLitCmp |= (u32)c << shiftLoc;
soleLitMsk |= (u32)0xff << shiftLoc;
}
}
}
// we can walk the vector and assign elements from the vectors to a
// map by hash value
map<u32, vector<LiteralIndex> > res2lits;
hwlm_group_t gm = 0;
for (LiteralIndex i = 0; i < lits.size(); i++) {
LitInfo & li = tmpLitInfo[i];
u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits);
DEBUG_PRINTF("%016llx --> %u\n", li.v, hash);
res2lits[hash].push_back(i);
gm |= li.groups;
}
#ifdef FDR_CONFIRM_DUMP
// print out the literals reversed - makes it easier to line up analyses
// that are end-offset based
for (map<u32, vector<LiteralIndex> >::iterator i = res2lits.begin(),
e = res2lits.end(); i != e; ++i) {
u32 hash = i->first;
vector<LiteralIndex> & vlidx = i->second;
if (vlidx.size() > 1) {
printf("%x -> %zu literals\n", hash, vlidx.size());
u32 min_len = lits[vlidx.front()].s.size();
vector<set<u8> > vsl; // contains the set of chars at each location
// reversed from the end
vsl.resize(1024);
u32 total_string_size = 0;
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
e2 = vlidx.end(); i2 != e2; ++i2) {
LiteralIndex litIdx = *i2;
total_string_size += lits[litIdx].s.size();
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
vsl[lits[litIdx].s.size()-j].insert(lits[litIdx].s.c_str()[j - 1]);
}
min_len = MIN(min_len, lits[litIdx].s.size());
}
printf("common ");
for (u32 j = 0; j < min_len; j++) {
if (vsl[j].size() == 1) {
printf("%02x", (u32)*vsl[j].begin());
} else {
printf("__");
}
}
printf("\n");
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
e2 = vlidx.end(); i2 != e2; ++i2) {
LiteralIndex litIdx = *i2;
printf("%8x %c", lits[litIdx].id, lits[litIdx].nocase ? '!' : ' ');
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
u32 dist_from_end = lits[litIdx].s.size() - j;
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
printf("__");
} else {
printf("%02x", (u32)lits[litIdx].s.c_str()[j-1]);
}
}
printf("\n");
}
u32 total_compares = 0;
for (u32 j = 0; j < 1024; j++) { // naughty
total_compares += vsl[j].size();
}
printf("Total compare load: %d Total string size: %d\n\n", total_compares, total_string_size);
}
}
#endif
const size_t bitsToLitIndexSize = (1U << nBits) * sizeof(u32);
const size_t totalLitSize = thresholdedSize(lits, sizeof(CONF_TYPE));
// this size can now be a worst-case as we can always be a bit smaller
size_t size = ROUNDUP_N(sizeof(FDRConfirm), alignof(u32)) +
ROUNDUP_N(bitsToLitIndexSize, alignof(LitInfo)) +
sizeof(LitInfo) * lits.size() + totalLitSize;
size = ROUNDUP_N(size, alignof(FDRConfirm));
FDRConfirm *fdrc = (FDRConfirm *)aligned_zmalloc(size);
assert(fdrc); // otherwise would have thrown std::bad_alloc
fdrc->andmsk = andmsk;
fdrc->mult = mult;
fdrc->nBitsOrSoleID = (flags & FDRC_FLAG_NO_CONFIRM) ? lits[0].id : nBits;
fdrc->flags = flags;
fdrc->soleLitSize = soleLitSize;
fdrc->soleLitCmp = soleLitCmp;
fdrc->soleLitMsk = soleLitMsk;
fdrc->groups = gm;
// After the FDRConfirm, we have the lit index array.
u8 *fdrc_base = (u8 *)fdrc;
u8 *ptr = fdrc_base + sizeof(*fdrc);
ptr = ROUNDUP_PTR(ptr, alignof(u32));
u32 *bitsToLitIndex = (u32 *)ptr;
ptr += bitsToLitIndexSize;
// After the lit index array, we have the LitInfo structures themselves,
// which vary in size (as each may have a variable-length string after it).
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
// Walk the map by hash value assigning indexes and laying out the
// elements (and their associated string confirm material) in memory.
for (std::map<u32, vector<LiteralIndex> >::const_iterator
i = res2lits.begin(), e = res2lits.end(); i != e; ++i) {
const u32 hash = i->first;
const vector<LiteralIndex> &vlidx = i->second;
bitsToLitIndex[hash] = verify_u32(ptr - (u8 *)fdrc);
for (vector<LiteralIndex>::const_iterator i2 = vlidx.begin(),
e2 = vlidx.end(); i2 != e2; ++i2) {
LiteralIndex litIdx = *i2;
// Write LitInfo header.
u8 *oldPtr = ptr;
LitInfo &finalLI = *(LitInfo *)ptr;
finalLI = tmpLitInfo[litIdx];
ptr += sizeof(LitInfo); // String starts directly after LitInfo.
// Write literal prefix (everything before the last N characters,
// as the last N are already confirmed).
const string &t = lits[litIdx].s;
if (t.size() > sizeof(CONF_TYPE)) {
size_t prefix_len = t.size() - sizeof(CONF_TYPE);
memcpy(&finalLI.s[0], t.c_str(), prefix_len);
ptr = &finalLI.s[0] + prefix_len;
}
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
if (i2 + 1 == e2) {
finalLI.next = 0x0;
} else {
// our next field represents an adjustment on top of
// current address + the actual size of the literal
// so we track any rounding up done for alignment and
// add this in - that way we don't have to use bigger
// than a u8 (for now)
assert((size_t)(ptr - oldPtr) > t.size());
finalLI.next = verify_u8(ptr - oldPtr - t.size());
}
}
assert((size_t)(ptr - fdrc_base) <= size);
}
*fdrc_p = fdrc;
// Return actual used size, not worst-case size. Must be rounded up to
// FDRConfirm alignment so that the caller can lay out a sequence of these.
size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
alignof(FDRConfirm));
assert(actual_size <= size);
return actual_size;
}
static
u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
const EngineDescription &eng, BC2CONF &bc2Conf,
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
bool make_small) {
u32 pullBack = eng.getConfirmPullBackDistance();
u32 splitMask = eng.getConfirmTopLevelSplit() - 1;
bool splitHasCase = splitMask & 0x20;
bool makeConfirm = true;
unique_ptr<TeddyEngineDescription> teddyDescr =
getTeddyDescription(eng.getID());
if (teddyDescr) {
makeConfirm = teddyDescr->needConfirm(lits);
}
u32 totalConfirmSize = 0;
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
if (!bucketToLits[b].empty()) {
vector<vector<hwlmLiteral> > vl(eng.getConfirmTopLevelSplit());
for (vector<LiteralIndex>::const_iterator
i = bucketToLits[b].begin(),
e = bucketToLits[b].end();
i != e; ++i) {
hwlmLiteral lit = lits[*i]; // copy
// c is last char of this literal
u8 c = *(lit.s.rbegin());
bool suppressSplit = false;
if (pullBack) {
// make a shorter string to work over if we're pulling back
// getFDRConfirm doesn't know about that stuff
assert(lit.s.size() >= pullBack);
lit.s.resize(lit.s.size() - pullBack);
u8 c_sub, c_sub_msk;
if (lit.msk.empty()) {
c_sub = 0;
c_sub_msk = 0;
} else {
c_sub = *(lit.cmp.rbegin());
c_sub_msk = *(lit.msk.rbegin());
size_t len = lit.msk.size() -
min(lit.msk.size(), (size_t)pullBack);
lit.msk.resize(len);
lit.cmp.resize(len);
}
// if c_sub_msk is 0xff and lit.nocase
// resteer 'c' to an exact value and set suppressSplit
if ((c_sub_msk == 0xff) && (lit.nocase)) {
suppressSplit = true;
c = c_sub;
}
}
if (!suppressSplit && splitHasCase && lit.nocase &&
ourisalpha(c)) {
vl[(u8)(mytoupper(c) & splitMask)].push_back(lit);
vl[(u8)(mytolower(c) & splitMask)].push_back(lit);
} else {
vl[c & splitMask].push_back(lit);
}
}
for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) {
if (!vl[c].empty()) {
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
FDRConfirm *fdrc;
size_t size = getFDRConfirm(vl[c], &fdrc,
eng.typicallyHoldsOneCharLits(),
make_small, makeConfirm);
BucketSplitPair p = make_pair(b, c);
bc2Conf[p] = make_pair(fdrc, size);
totalConfirmSize += size;
}
}
}
}
return totalConfirmSize;
}
pair<u8 *, size_t> setupFullMultiConfs(const vector<hwlmLiteral> &lits,
const EngineDescription &eng,
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
bool make_small) {
BC2CONF bc2Conf;
u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits,
make_small);
u32 primarySwitch = eng.getConfirmTopLevelSplit();
u32 nBuckets = eng.getNumBuckets();
u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32);
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
assert(buf); // otherwise would have thrown std::bad_alloc
u32 *confBase = (u32 *)buf;
u8 *ptr = buf + totalConfSwitchSize;
for (BC2CONF::const_iterator i = bc2Conf.begin(), e = bc2Conf.end(); i != e;
++i) {
const pair<FDRConfirm *, size_t> &p = i->second;
// confirm offset is relative to the base of this structure, now
u32 confirm_offset = verify_u32(ptr - (u8 *)buf);
memcpy(ptr, p.first, p.second);
ptr += p.second;
aligned_free(p.first);
BucketIndex b = i->first.first;
u8 c = i->first.second;
u32 idx = c * nBuckets + b;
confBase[idx] = confirm_offset;
}
return make_pair(buf, totalSize);
}
} // namespace ue2

View File

@ -0,0 +1,244 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_CONFIRM_RUNTIME_H
#define FDR_CONFIRM_RUNTIME_H
#include "fdr_internal.h"
#include "fdr_loadval.h"
#include "hwlm/hwlm.h"
#include "ue2common.h"
#include "util/bitutils.h"
#include "util/compare.h"
#define CONF_LOADVAL_CALL lv_u64a
#define CONF_LOADVAL_CALL_CAUTIOUS lv_u64a_ce
// this is ordinary confirmation function which runs through
// the whole confirmation procedure
static really_inline
void confWithBit(const struct FDRConfirm * fdrc,
const struct FDR_Runtime_Args * a,
size_t i,
CautionReason r,
u32 pullBackAmount,
hwlmcb_rv_t *control,
u32 * last_match) {
assert(i < a->len);
assert(ISALIGNED(fdrc));
const u8 * buf = a->buf;
const size_t len = a->len;
CONF_TYPE v;
const u8 * confirm_loc = buf + i - pullBackAmount - 7;
if (likely(r == NOT_CAUTIOUS || confirm_loc >= buf)) {
v = CONF_LOADVAL_CALL(confirm_loc, buf, buf + len);
} else { // r == VECTORING, confirm_loc < buf
u64a histBytes = a->histBytes;
v = CONF_LOADVAL_CALL_CAUTIOUS(confirm_loc, buf, buf + len);
// stitch together v (which doesn't move) and history (which does)
u32 overhang = buf - confirm_loc;
histBytes >>= 64 - (overhang * 8);
v |= histBytes;
}
u32 c = CONF_HASH_CALL(v, fdrc->andmsk, fdrc->mult, fdrc->nBitsOrSoleID);
u32 start = getConfirmLitIndex(fdrc)[c];
if (P0(start)) {
const struct LitInfo *l =
(const struct LitInfo *)((const u8 *)fdrc + start);
u8 oldNext; // initialized in loop
do {
assert(ISALIGNED(l));
if (P0( (v & l->msk) != l->v)) {
goto out;
}
if ((*last_match == l->id) && (l->flags & NoRepeat)) {
goto out;
}
const u8 * loc = buf + i - l->size + 1 - pullBackAmount;
u8 caseless = l->flags & Caseless;
if (loc < buf) {
u32 full_overhang = buf - loc;
const u8 * history = (caseless) ?
a->buf_history_nocase : a->buf_history;
size_t len_history = (caseless) ?
a->len_history_nocase : a->len_history;
// can't do a vectored confirm either if we don't have
// the bytes
if (full_overhang > len_history) {
goto out;
}
// as for the regular case, no need to do a full confirm if
// we're a short literal
if (unlikely(l->size > sizeof(CONF_TYPE))) {
const u8 * s1 = l->s;
const u8 * s2 = s1 + full_overhang;
const u8 * loc1 = history + len_history - full_overhang;
const u8 * loc2 = buf;
size_t size1 = MIN(full_overhang,
l->size - sizeof(CONF_TYPE));
size_t wind_size2_back = sizeof(CONF_TYPE) +
full_overhang;
size_t size2 = wind_size2_back > l->size ?
0 : l->size - wind_size2_back;
if (cmpForward(loc1, s1, size1, caseless)) {
goto out;
}
if (cmpForward(loc2, s2, size2, caseless)) {
goto out;
}
}
} else { // NON-VECTORING PATH
// if string < conf_type we don't need regular string cmp
if (unlikely(l->size > sizeof(CONF_TYPE))) {
if (cmpForward(loc, l->s, l->size - sizeof(CONF_TYPE), caseless)) {
goto out;
}
}
}
if (P0(!(l->groups & *control))) {
goto out;
}
if (unlikely(l->flags & ComplexConfirm)) {
const u8 * loc2 = buf + i - l->extended_size + 1 - pullBackAmount;
if (loc2 < buf) {
u32 full_overhang = buf - loc2;
size_t len_history = (caseless) ?
a->len_history_nocase : a->len_history;
if (full_overhang > len_history) {
goto out;
}
}
}
*last_match = l->id;
*control = a->cb(loc - buf, i, l->id, a->ctxt);
out:
oldNext = l->next; // oldNext is either 0 or an 'adjust' value
l = (const struct LitInfo*)((const u8 *)l + oldNext + l->size);
} while (oldNext);
}
}
// 'light-weight' confirmation function which is used by 1-mask Teddy;
// in the 'confirmless' case it simply calls callback function,
// otherwise it calls 'confWithBit' function for the full confirmation procedure
static really_inline
void confWithBit1(const struct FDRConfirm * fdrc,
const struct FDR_Runtime_Args * a,
size_t i,
CautionReason r,
hwlmcb_rv_t *control,
u32 * last_match) {
assert(i < a->len);
assert(ISALIGNED(fdrc));
if (unlikely(fdrc->mult)) {
confWithBit(fdrc, a, i, r, 0, control, last_match);
return;
} else {
u32 id = fdrc->nBitsOrSoleID;
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
return;
}
*last_match = id;
*control = a->cb(i, i, id, a->ctxt);
}
}
// This is 'light-weight' confirmation function which is used by 2-3-4-mask Teddy
// In the 'confirmless' case it makes fast 32-bit comparison,
// otherwise it calls 'confWithBit' function for the full confirmation procedure
static really_inline
void confWithBitMany(const struct FDRConfirm * fdrc,
const struct FDR_Runtime_Args * a,
size_t i,
CautionReason r,
hwlmcb_rv_t *control,
u32 * last_match) {
assert(i < a->len);
assert(ISALIGNED(fdrc));
if (i < a->start_offset) {
return;
}
if (unlikely(fdrc->mult)) {
confWithBit(fdrc, a, i, r, 0, control, last_match);
return;
} else {
const u32 id = fdrc->nBitsOrSoleID;
const u32 len = fdrc->soleLitSize;
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
return;
}
if (r == VECTORING && len > i - a->start_offset) {
if (len > (i + a->len_history)) {
return;
}
u32 cmp = (u32)a->buf[i] << 24;
if (len <= i) {
for (u32 j = 1; j <= len; j++) {
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
}
} else {
for (u32 j = 1; j <= i; j++) {
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
}
cmp |= (u32)(a->histBytes >> (40 + i * 8));
}
if ((fdrc->soleLitMsk & cmp) != fdrc->soleLitCmp) {
return;
}
}
*last_match = id;
*control = a->cb(i - len, i, id, a->ctxt);
}
}
#endif

98
src/fdr/fdr_dump.cpp Normal file
View File

@ -0,0 +1,98 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_compile_internal.h"
#include "fdr_dump.h"
#include "fdr_engine_description.h"
#include "teddy_engine_description.h"
#include "ue2common.h"
#include <cstdio>
#include <memory>
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
using std::unique_ptr;
namespace ue2 {
static
bool fdrIsTeddy(const FDR *fdr) {
assert(fdr);
u32 engine = fdr->engineID;
/* teddys don't have an fdr engine description (which is why the dump code
* is so broken). */
return !getFdrDescription(engine);
}
void fdrPrintStats(const FDR *fdr, FILE *f) {
const bool isTeddy = fdrIsTeddy(fdr);
if (isTeddy) {
fprintf(f, "TEDDY: %u\n", fdr->engineID);
} else {
fprintf(f, "FDR: %u\n", fdr->engineID);
}
if (isTeddy) {
unique_ptr<TeddyEngineDescription> des =
getTeddyDescription(fdr->engineID);
if (des) {
fprintf(f, " masks %u\n", des->numMasks);
fprintf(f, " buckets %u\n", des->getNumBuckets());
fprintf(f, " packed %s\n", des->packed ? "true" : "false");
} else {
fprintf(f, " <unknown engine>\n");
}
} else {
unique_ptr<FDREngineDescription> des =
getFdrDescription(fdr->engineID);
if (des) {
fprintf(f, " stride %u\n", des->stride);
fprintf(f, " buckets %u\n", des->getNumBuckets());
fprintf(f, " width %u\n", des->schemeWidth);
} else {
fprintf(f, " <unknown engine>\n");
}
}
fprintf(f, " strings ???\n");
fprintf(f, " size %zu bytes\n", fdrSize(fdr));
fprintf(f, " max length %u\n", fdr->maxStringLen);
fprintf(f, " floodoff %u (%x)\n", fdr->floodOffset, fdr->floodOffset);
}
} // namespace ue2

49
src/fdr/fdr_dump.h Normal file
View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief FDR literal matcher: dump API.
*/
#ifndef FDR_DUMP_H
#define FDR_DUMP_H
#if defined(DUMP_SUPPORT)
#include <cstdio>
struct FDR;
namespace ue2 {
void fdrPrintStats(const struct FDR *fdr, FILE *f);
} // namespace ue2
#endif // DUMP_SUPPORT
#endif // FDR_DUMP_H

View File

@ -0,0 +1,216 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr_compile_internal.h"
#include "fdr_engine_description.h"
#include "hs_compile.h"
#include "util/target_info.h"
#include "util/compare.h" // for ourisalpha()
#include "util/make_unique.h"
#include <cassert>
#include <cstdlib>
#include <map>
#include <string>
using namespace std;
namespace ue2 {
#include "fdr_autogen_compiler.cpp"
FDREngineDescription::FDREngineDescription(const FDREngineDef &def)
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
def.numBuckets, def.confirmPullBackDistance,
def.confirmTopLevelSplit),
schemeWidth(def.schemeWidth), stride(def.stride), bits(def.bits) {}
u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
// rounding up, so that scheme width 32 and 6 buckets is 6 not 5!
// the +1 avoids pain due to various reach choices
return ((getSchemeWidth() + getNumBuckets() - 1) / getNumBuckets()) + 1;
}
static
u32 findDesiredStride(size_t num_lits, size_t min_len, size_t min_len_count) {
u32 desiredStride = 1; // always our safe fallback
if (min_len > 1) {
if (num_lits < 250) {
// small cases we just go for it
desiredStride = min_len;
} else if (num_lits < 800) {
// intermediate cases
desiredStride = min_len - 1;
} else if (num_lits < 5000) {
// for larger but not huge sizes, go to stride 2 only if we have at
// least minlen 3
desiredStride = MIN(min_len - 1, 2);
}
}
// patch if count is quite large - a ton of length 2 literals can
// break things
#ifdef TRY_THIS_LATER
if ((min_len == 2) && (desiredStride == 2) && (min_len_count > 20)) {
desiredStride = 1;
}
#endif
// patch stuff just for the stride 4 case; don't let min_len=4,
// desiredStride=4 through as even a few length 4 literals can break things
// (far more fragile)
if ((min_len == 4) && (desiredStride == 4) && (min_len_count > 2)) {
desiredStride = 2;
}
return desiredStride;
}
unique_ptr<FDREngineDescription> chooseEngine(const target_t &target,
const vector<hwlmLiteral> &vl,
bool make_small) {
vector<FDREngineDescription> allDescs;
getFdrDescriptions(&allDescs);
// find desired stride
size_t count;
size_t msl = minLenCount(vl, &count);
u32 desiredStride = findDesiredStride(vl.size(), msl, count);
DEBUG_PRINTF("%zu lits, msl=%zu, desiredStride=%u\n", vl.size(), msl,
desiredStride);
const FDREngineDescription *best = nullptr;
u32 best_score = 0;
for (size_t engineID = 0; engineID < allDescs.size(); engineID++) {
const FDREngineDescription &eng = allDescs[engineID];
if (!eng.isValidOnTarget(target)) {
continue;
}
if (msl < eng.stride) {
continue;
}
u32 score = 100;
score -= absdiff(desiredStride, eng.stride);
if (eng.stride <= desiredStride) {
score += eng.stride;
}
u32 effLits = vl.size(); /* * desiredStride;*/
u32 ideal;
if (effLits < eng.getNumBuckets()) {
if (eng.stride == 1) {
ideal = 8;
} else {
ideal = 10;
}
} else if (effLits < 20) {
ideal = 10;
} else if (effLits < 100) {
ideal = 11;
} else if (effLits < 1000) {
ideal = 12;
} else if (effLits < 10000) {
ideal = 13;
} else {
ideal = 15;
}
if (ideal != 8 && eng.schemeWidth == 32) {
ideal += 1;
}
if (make_small) {
ideal -= 2;
}
if (eng.stride > 1) {
ideal++;
}
DEBUG_PRINTF("effLits %u\n", effLits);
if (target.is_atom_class() && !make_small && effLits < 4000) {
/* Unless it is a very heavy case, we want to build smaller tables
* on lightweight machines due to their small caches. */
ideal -= 2;
}
score -= absdiff(ideal, eng.bits);
DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u "
"-> score=%u\n",
eng.getID(), eng.schemeWidth, eng.bits,
eng.getNumBuckets(), eng.stride, score);
if (!best || score > best_score) {
best = &eng;
best_score = score;
}
}
if (!best) {
DEBUG_PRINTF("failed to find engine\n");
return nullptr;
}
DEBUG_PRINTF("using engine %u\n", best->getID());
return ue2::make_unique<FDREngineDescription>(*best);
}
SchemeBitIndex FDREngineDescription::getSchemeBit(BucketIndex b,
PositionInBucket p) const {
assert(p < getBucketWidth(b));
SchemeBitIndex sbi = p * getNumBuckets() + b;
assert(sbi < getSchemeWidth());
return sbi;
}
u32 FDREngineDescription::getBucketWidth(BucketIndex) const {
u32 sw = getSchemeWidth();
u32 nm = getNumBuckets();
assert(sw % nm == 0);
return sw/nm;
}
unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID) {
vector<FDREngineDescription> allDescs;
getFdrDescriptions(&allDescs);
if (engineID >= allDescs.size()) {
return nullptr;
}
return ue2::make_unique<FDREngineDescription>(allDescs[engineID]);
}
} // namespace ue2

View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_ENGINE_DESCRIPTION_H
#define FDR_ENGINE_DESCRIPTION_H
#include "engine_description.h"
#include "util/ue2_containers.h"
#include <map>
#include <memory>
#include <vector>
namespace ue2 {
struct FDREngineDef {
u32 id;
u32 schemeWidth;
u32 numBuckets;
u32 stride;
u32 bits;
u64a cpu_features;
u32 confirmPullBackDistance;
u32 confirmTopLevelSplit;
};
class FDREngineDescription : public EngineDescription {
public:
u32 schemeWidth;
u32 stride;
u32 bits;
u32 getSchemeWidth() const { return schemeWidth; }
u32 getBucketWidth(BucketIndex b) const;
SchemeBitIndex getSchemeBit(BucketIndex b, PositionInBucket p) const;
u32 getNumTableEntries() const { return 1 << bits; }
u32 getTabSizeBytes() const {
return schemeWidth / 8 * getNumTableEntries();
}
explicit FDREngineDescription(const FDREngineDef &def);
u32 getDefaultFloodSuffixLength() const override;
bool typicallyHoldsOneCharLits() const override { return stride == 1; }
};
std::unique_ptr<FDREngineDescription>
chooseEngine(const target_t &target, const std::vector<hwlmLiteral> &vl,
bool make_small);
std::unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID);
void getFdrDescriptions(std::vector<FDREngineDescription> *out);
} // namespace ue2
#endif

111
src/fdr/fdr_internal.h Normal file
View File

@ -0,0 +1,111 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief FDR literal matcher: data structures.
*/
#ifndef FDR_INTERNAL_H
#define FDR_INTERNAL_H
#include "ue2common.h"
#include "hwlm/hwlm.h" // for hwlm_group_t, HWLMCallback
typedef enum {
NOT_CAUTIOUS, //!< not near a boundary (quantify?)
VECTORING //!< potentially vectoring
} CautionReason;
/** \brief number of different ids that can be triggered by floods of any given
* character. */
#define FDR_FLOOD_MAX_IDS 16
struct FDRFlood {
hwlm_group_t allGroups; //!< all the groups or'd together
u32 suffix;
/** \brief 0 to FDR_FLOOD_MAX_IDS-1 ids that are generated once per char on
* a flood.
* If larger we won't handle this through the flood path at all. */
u16 idCount;
u32 ids[FDR_FLOOD_MAX_IDS]; //!< the ids
hwlm_group_t groups[FDR_FLOOD_MAX_IDS]; //!< group ids to go with string ids
u32 len[FDR_FLOOD_MAX_IDS]; //!< lengths to go with the string ids
};
/** \brief FDR structure.
*
* 1. struct as-is
* 2. primary matching table
* 3. confirm stuff
*/
struct FDR {
u32 engineID;
u32 size;
u32 maxStringLen;
u32 floodOffset;
/** link is the relative offset of a secondary included FDR table for
* stream handling if we're a primary FDR table or the subsidiary tertiary
* structures (spillover strings and hash table) if we're a secondary
* structure. */
u32 link;
u32 pad1;
u32 pad2;
u32 pad3;
union {
u32 s_u32;
u64a s_u64a;
m128 s_m128;
} start;
};
/** \brief FDR runtime arguments.
*
* This structure handles read-only things that are passed extensively around
* the FDR run-time functions. They are set by the API, passed by value into
* the main function, then a pointer is passed around to all the various
* sub-functions (confirm & flood). */
struct FDR_Runtime_Args {
const u8 *buf;
size_t len;
const u8 *buf_history;
size_t len_history;
const u8 *buf_history_nocase;
size_t len_history_nocase;
size_t start_offset;
HWLMCallback cb;
void *ctxt;
hwlm_group_t *groups;
const u8 *firstFloodDetect;
const u64a histBytes;
};
#endif

216
src/fdr/fdr_loadval.h Normal file
View File

@ -0,0 +1,216 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_LOADVAL_H
#define FDR_LOADVAL_H
#include "fdr_internal.h"
#include "ue2common.h"
#include "util/unaligned.h"
#include "util/simd_utils.h"
#define MAKE_LOADVAL(type, name) \
static really_inline type name (const u8 * ptr, UNUSED const u8 * lo, UNUSED const u8 * hi)
#define NORMAL_SAFE(type) assert(ptr >= lo && (ptr + sizeof(type) - 1) < hi)
#define ALIGNED_SAFE(type) NORMAL_SAFE(type); assert(((size_t)ptr % sizeof(type)) == 0);
// these ones need asserts to test the property that we're not handling dynamically
#define CAUTIOUS_FORWARD_SAFE(type) assert(ptr >= lo)
#define CAUTIOUS_BACKWARD_SAFE(type) assert((ptr + sizeof(type) - 1) < hi)
#define CF_INDEX_CHECK (ptr + i < hi)
#define CB_INDEX_CHECK (lo <= ptr + i)
#define CE_INDEX_CHECK (lo <= ptr + i) && (ptr + i < hi)
#define MAKE_LOOP(TYPE, COND, SHIFT_FIDDLE) \
TYPE v = 0; \
for (TYPE i = 0; i < sizeof(TYPE); i++) { \
if (COND) { \
v += (TYPE)ptr[i] << ((SHIFT_FIDDLE)*8); \
} \
} \
return v;
#define MAKE_LOOP_BE(TYPE, COND) \
MAKE_LOOP(TYPE, COND, sizeof(TYPE)-i-1)
#define MAKE_LOOP_LE(TYPE, COND) \
MAKE_LOOP(TYPE, COND, i)
#define MAKE_LOOP_BE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CF_INDEX_CHECK)
#define MAKE_LOOP_BE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CB_INDEX_CHECK)
#define MAKE_LOOP_BE_CE(TYPE) MAKE_LOOP_BE(TYPE, CE_INDEX_CHECK)
#define MAKE_LOOP_LE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CF_INDEX_CHECK)
#define MAKE_LOOP_LE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CB_INDEX_CHECK)
#define MAKE_LOOP_LE_CE(TYPE) MAKE_LOOP_LE(TYPE, CE_INDEX_CHECK)
// no suffix = normal (unaligned)
// _a = aligned
// _cf = cautious forwards, base is always in bounds, but may read over the end of the buffer (test against hi)
// _cb = cautious backwards, final byte is always in bounds, but may read over the start of the buffer (test against lo)
// _ce = cautious everywhere (in both directions); test against hi and lo
// u8 loadvals
MAKE_LOADVAL(u8, lv_u8) {
NORMAL_SAFE(u8);
return *ptr;
}
MAKE_LOADVAL(u8, lv_u8_cf) {
CAUTIOUS_FORWARD_SAFE(u8);
if (ptr < hi) {
return *ptr;
} else {
return 0;
}
}
MAKE_LOADVAL(u8, lv_u8_cb) {
CAUTIOUS_BACKWARD_SAFE(u8);
if (lo <= ptr) {
return *ptr;
} else {
return 0;
}
}
MAKE_LOADVAL(u8, lv_u8_ce) {
if ((lo <= ptr) && (ptr < hi)) {
return *ptr;
} else {
return 0;
}
}
MAKE_LOADVAL(u16, lv_u16) {
NORMAL_SAFE(u16);
return unaligned_load_u16(ptr);
}
MAKE_LOADVAL(u16, lv_u16_a) {
ALIGNED_SAFE(u16);
return *(const u16 *)ptr;
}
MAKE_LOADVAL(u32, lv_u32) {
NORMAL_SAFE(u32);
return unaligned_load_u32(ptr);
}
MAKE_LOADVAL(u32, lv_u32_a) {
ALIGNED_SAFE(u32);
return *(const u32 *)ptr;
}
MAKE_LOADVAL(u64a, lv_u64a) {
NORMAL_SAFE(u32);
return unaligned_load_u64a(ptr);
}
MAKE_LOADVAL(u64a, lv_u64a_a) {
ALIGNED_SAFE(u64a);
return *(const u64a *)ptr;
}
MAKE_LOADVAL(u16, lv_u16_cf) { MAKE_LOOP_LE_CF(u16); }
MAKE_LOADVAL(u16, lv_u16_cb) { MAKE_LOOP_LE_CB(u16); }
MAKE_LOADVAL(u16, lv_u16_ce) { MAKE_LOOP_LE_CE(u16); }
MAKE_LOADVAL(u32, lv_u32_cf) { MAKE_LOOP_LE_CF(u32); }
MAKE_LOADVAL(u32, lv_u32_cb) { MAKE_LOOP_LE_CB(u32); }
MAKE_LOADVAL(u32, lv_u32_ce) { MAKE_LOOP_LE_CE(u32); }
MAKE_LOADVAL(u64a, lv_u64a_cf) { MAKE_LOOP_LE_CF(u64a); }
MAKE_LOADVAL(u64a, lv_u64a_cb) { MAKE_LOOP_LE_CB(u64a); }
MAKE_LOADVAL(u64a, lv_u64a_ce) { MAKE_LOOP_LE_CE(u64a); }
MAKE_LOADVAL(m128, lv_m128) {
NORMAL_SAFE(m128);
return loadu128(ptr);
}
MAKE_LOADVAL(m128, lv_m128_a) {
ALIGNED_SAFE(m128);
assert((size_t)ptr % sizeof(m128) == 0);
return *(const m128 *)ptr;
}
// m128 cases need to be manually created
MAKE_LOADVAL(m128, lv_m128_cf) {
CAUTIOUS_FORWARD_SAFE(m128);
union {
u8 val8[16];
m128 val128;
} u;
for (u32 i = 0; i < 16; i++) {
if (ptr + i < hi) {
u.val8[i] = ptr[i];
} else {
u.val8[i] = 0;
}
}
return u.val128;
}
MAKE_LOADVAL(m128, lv_m128_cb) {
CAUTIOUS_BACKWARD_SAFE(m128);
union {
u8 val8[16];
m128 val128;
} u;
for (u32 i = 0; i < 16; i++) {
if (lo <= ptr + i) {
u.val8[i] = ptr[i];
} else {
u.val8[i] = 0;
}
}
return u.val128;
}
MAKE_LOADVAL(m128, lv_m128_ce) {
union {
u8 val8[16];
m128 val128;
} u;
for (u32 i = 0; i < 16; i++) {
if ((lo <= ptr + i) && (ptr + i < hi)) {
u.val8[i] = ptr[i];
} else {
u.val8[i] = 0;
}
}
return u.val128;
}
#endif

View File

@ -0,0 +1,445 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_streaming_internal.h"
#include "fdr_compile_internal.h"
#include "hwlm/hwlm_build.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/target_info.h"
#include "util/verify_types.h"
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <cstring>
#include <deque>
#include <set>
#include <boost/dynamic_bitset.hpp>
using namespace std;
using boost::dynamic_bitset;
namespace ue2 {
namespace {
struct LongLitOrder {
bool operator()(const hwlmLiteral &i1, const hwlmLiteral &i2) const {
if (i1.nocase != i2.nocase) {
return i1.nocase < i2.nocase;
} else {
return i1.s < i2.s;
}
}
};
}
static
bool hwlmLitEqual(const hwlmLiteral &l1, const hwlmLiteral &l2) {
return l1.s == l2.s && l1.nocase == l2.nocase;
}
static
u32 roundUpToPowerOfTwo(u32 x) {
x -= 1;
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
return x + 1;
}
/**
* \brief Creates a long literals vector containing all literals of length > max_len.
*
* The last char of each literal is trimmed as we're not interested in full
* matches, only partial matches.
*
* Literals are sorted (by caseful/caseless, then lexicographical order) and
* made unique.
*
* The ID of each literal is set to its position in the vector.
*
* \return False if there aren't any long literals.
*/
static
bool setupLongLits(const vector<hwlmLiteral> &lits,
vector<hwlmLiteral> &long_lits, size_t max_len) {
long_lits.reserve(lits.size());
for (vector<hwlmLiteral>::const_iterator it = lits.begin();
it != lits.end(); ++it) {
if (it->s.length() > max_len) {
hwlmLiteral tmp = *it; // copy
tmp.s.erase(tmp.s.size() - 1, 1); // erase last char
tmp.id = 0; // recalc later
tmp.groups = 0; // filled in later by hash bucket(s)
long_lits.push_back(tmp);
}
}
if (long_lits.empty()) {
return false;
}
// sort long_literals by caseful/caseless and in lexicographical order,
// remove duplicates
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
vector<hwlmLiteral>::iterator new_end =
unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
long_lits.erase(new_end, long_lits.end());
// fill in ids; not currently used
for (vector<hwlmLiteral>::iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
i->id = i - long_lits.begin();
}
return true;
}
// boundaries are the 'start' boundaries for each 'mode'
// so boundary[CASEFUL] is the index one above the largest caseful index
// positions[CASEFUL] is the # of positions in caseful strings (stream)
// hashedPositions[CASEFUL] is the # of positions in caseful strings
// (not returned - a temporary)
// hashEntries[CASEFUL] is the # of positions hashed for caseful strings
// (rounded up to the nearest power of two)
static
void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
u32 *boundaries, u32 *positions, u32 *hashEntries) {
u32 hashedPositions[MAX_MODES];
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
boundaries[m] = verify_u32(long_lits.size());
positions[m] = 0;
hashedPositions[m] = 0;
}
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
if (i->nocase) {
boundaries[CASEFUL] = verify_u32(i - long_lits.begin());
break;
}
}
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
MODES m = i->nocase ? CASELESS : CASEFUL;
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
hashedPositions[m]++;
}
positions[m] += i->s.size();
}
for (u32 m = CASEFUL; m < MAX_MODES; m++) {
hashEntries[m] = hashedPositions[m]
? roundUpToPowerOfTwo(MAX(4096, hashedPositions[m]))
: 0;
}
#ifdef DEBUG_COMPILE
printf("analyzeLits:\n");
for (MODES m = CASEFUL; m < MAX_MODES; m++) {
printf("mode %s boundary %d positions %d hashedPositions %d "
"hashEntries %d\n",
(m == CASEFUL) ? "caseful" : "caseless", boundaries[m],
positions[m], hashedPositions[m], hashEntries[m]);
}
printf("\n");
#endif
}
static
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, MODES m) {
return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m);
}
// sort by 'distance from start'
namespace {
struct OffsetIDFromEndOrder {
const vector<hwlmLiteral> &lits; // not currently used
explicit OffsetIDFromEndOrder(const vector<hwlmLiteral> &lits_in)
: lits(lits_in) {}
bool operator()(const pair<u32, u32> &i1, const pair<u32, u32> &i2) const {
if (i1.second != i2.second) {
// longest is 'first', so > not <
return i1.second > i2.second;
}
return i1.first < i2.first;
}
};
}
static
void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
FDRSHashEntry *tab, size_t numEntries, MODES m,
map<u32, u32> &litToOffsetVal) {
const u32 nbits = lg2(numEntries);
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
map<u32, u64a> bucketToBitfield;
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
const hwlmLiteral &l = *i;
if ((m == CASELESS) != i->nocase) {
continue;
}
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
u32 h = hashLit(l, j, max_len, m);
u32 h_ent = h & ((1U << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
bucketToLitOffPairs[h_ent].push_back(make_pair(i->id, j));
bucketToBitfield[h_ent] |= (1ULL << h_low);
}
}
// this used to be a set<u32>, but a bitset is much much faster given that
// we're using it only for membership testing.
dynamic_bitset<> filledBuckets(numEntries); // all bits zero by default.
// sweep out bitfield entries and save the results swapped accordingly
// also, anything with bitfield entries is put in filledBuckets
for (map<u32, u64a>::const_iterator i = bucketToBitfield.begin(),
e = bucketToBitfield.end();
i != e; ++i) {
u32 bucket = i->first;
u64a contents = i->second;
tab[bucket].bitfield = contents;
filledBuckets.set(bucket);
}
// store out all our chains based on free values in our hash table.
// find nearest free locations that are empty (there will always be more
// entries than strings, at present)
for (map<u32, deque<pair<u32, u32> > >::iterator
i = bucketToLitOffPairs.begin(),
e = bucketToLitOffPairs.end();
i != e; ++i) {
u32 bucket = i->first;
deque<pair<u32, u32> > &d = i->second;
// sort d by distance of the residual string (len minus our depth into
// the string). We need to put the 'furthest back' string first...
stable_sort(d.begin(), d.end(), OffsetIDFromEndOrder(long_lits));
while (1) {
// first time through is always at bucket, then we fill in links
filledBuckets.set(bucket);
FDRSHashEntry *ent = &tab[bucket];
u32 lit_id = d.front().first;
u32 offset = d.front().second;
ent->state = verify_u32(litToOffsetVal[lit_id] + offset + max_len);
ent->link = (u32)LINK_INVALID;
d.pop_front();
if (d.empty()) {
break;
}
// now, if there is another value
// find a bucket for it and put in 'bucket' and repeat
// all we really need to do is find something not in filledBuckets,
// ideally something close to bucket
// we search backward and forward from bucket, trying to stay as
// close as possible.
UNUSED bool found = false;
int bucket_candidate = 0;
for (u32 k = 1; k < numEntries * 2; k++) {
bucket_candidate = bucket + (((k & 1) == 0)
? (-(int)k / 2) : (k / 2));
if (bucket_candidate < 0 ||
(size_t)bucket_candidate >= numEntries) {
continue;
}
if (!filledBuckets.test(bucket_candidate)) {
found = true;
break;
}
}
assert(found);
bucket = bucket_candidate;
ent->link = bucket;
}
}
}
static
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
size_t rv = 0;
vector<hwlmLiteral>::const_iterator it, ite;
for (it = lits.begin(), ite = lits.end(); it != ite; ++it) {
rv = max(rv, it->msk.size());
}
return rv;
}
pair<u8 *, size_t>
fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
hwlmStreamingControl *stream_control) {
// refuse to compile if we are forced to have smaller than minimum
// history required for long-literal support, full stop
// otherwise, choose the maximum of the preferred history quantity
// (currently a fairly extravagant 32) or the already used history
// quantity - subject to the limitation of stream_control->history_max
const size_t MIN_HISTORY_REQUIRED = 32;
if (MIN_HISTORY_REQUIRED > stream_control->history_max) {
throw std::logic_error("Cannot set history to minimum history required");
}
size_t max_len =
MIN(stream_control->history_max,
MAX(MIN_HISTORY_REQUIRED, stream_control->history_min));
assert(max_len >= MIN_HISTORY_REQUIRED);
size_t max_mask_len = maxMaskLen(lits);
vector<hwlmLiteral> long_lits;
if (!setupLongLits(lits, long_lits, max_len) || false) {
// "Don't need to do anything" path, not really a fail
DEBUG_PRINTF("Streaming literal path produces no table\n");
// we want enough history to manage the longest literal and the longest
// mask.
stream_control->literal_history_required =
max(maxLen(lits), max_mask_len) - 1;
stream_control->literal_stream_state_required = 0;
return make_pair(nullptr, size_t{0});
}
// Ensure that we have enough room for the longest mask.
if (max_mask_len) {
max_len = max(max_len, max_mask_len - 1);
}
u32 boundary[MAX_MODES];
u32 positions[MAX_MODES];
u32 hashEntries[MAX_MODES];
analyzeLits(long_lits, max_len, boundary, positions, hashEntries);
// first assess the size and find our caseless threshold
size_t headerSize = ROUNDUP_16(sizeof(FDRSTableHeader));
size_t litTabOffset = headerSize;
size_t litTabNumEntries = long_lits.size() + 1;
size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(FDRSLiteral));
size_t wholeLitTabOffset = litTabOffset + litTabSize;
size_t totalWholeLitTabSize = ROUNDUP_16(positions[CASEFUL] +
positions[CASELESS]);
size_t htOffset[MAX_MODES];
size_t htSize[MAX_MODES];
htOffset[CASEFUL] = wholeLitTabOffset + totalWholeLitTabSize;
htSize[CASEFUL] = hashEntries[CASEFUL] * sizeof(FDRSHashEntry);
htOffset[CASELESS] = htOffset[CASEFUL] + htSize[CASEFUL];
htSize[CASELESS] = hashEntries[CASELESS] * sizeof(FDRSHashEntry);
size_t tabSize = ROUNDUP_16(htOffset[CASELESS] + htSize[CASELESS]);
// need to add +2 to both of these to allow space for the actual largest
// value as well as handling the fact that we add one to the space when
// storing out a position to allow zero to mean "no stream state value"
u8 streamBits[MAX_MODES];
streamBits[CASEFUL] = lg2(roundUpToPowerOfTwo(positions[CASEFUL] + 2));
streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2));
u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8;
u8 * secondaryTable = (u8 *)aligned_zmalloc(tabSize);
assert(secondaryTable); // otherwise would have thrown std::bad_alloc
// then fill it in
u8 * ptr = secondaryTable;
FDRSTableHeader * header = (FDRSTableHeader *)ptr;
// fill in header
header->pseudoEngineID = (u32)0xffffffff;
header->N = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
header->boundary[m] = boundary[m];
header->hashOffset[m] = verify_u32(htOffset[m]);
header->hashNBits[m] = lg2(hashEntries[m]);
header->streamStateBits[m] = streamBits[m];
}
assert(tot_state_bytes < sizeof(u64a));
header->streamStateBytes = verify_u8(tot_state_bytes); // u8
ptr += headerSize;
// now fill in the rest
FDRSLiteral * litTabPtr = (FDRSLiteral *)ptr;
ptr += litTabSize;
map<u32, u32> litToOffsetVal;
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
u32 entry = verify_u32(i - long_lits.begin());
u32 offset = verify_u32(ptr - secondaryTable);
// point the table entry to the string location
litTabPtr[entry].offset = offset;
litToOffsetVal[entry] = offset;
// copy the string into the string location
memcpy(ptr, i->s.c_str(), i->s.size());
ptr += i->s.size(); // and the string location
}
// fill in final lit table entry with current ptr (serves as end value)
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable);
// fill hash tables
ptr = secondaryTable + htOffset[CASEFUL];
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m],
(MODES)m, litToOffsetVal);
ptr += htSize[m];
}
// tell the world what we did
stream_control->literal_history_required = max_len;
stream_control->literal_stream_state_required = tot_state_bytes;
return make_pair(secondaryTable, tabSize);
}
} // namespace ue2

View File

@ -0,0 +1,152 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_STREAMING_INTERNAL_H
#define FDR_STREAMING_INTERNAL_H
#include "ue2common.h"
#include "fdr_internal.h"
#include "util/unaligned.h"
// tertiary table:
// a header (FDRSTableHeader)
// long_lits.size()+1 entries holding an offset to the string in the
// 'whole literal table' (FDRSLiteral structure)
// the whole literal table - every string packed in (freeform)
// hash table (caseful) (FDRSHashEntry)
// hash table (caseless) (FDRSHashEntry)
typedef enum {
CASEFUL = 0,
CASELESS = 1,
MAX_MODES = 2
} MODES;
// We have one of these structures hanging off the 'link' of our secondary
// FDR table that handles streaming strings
struct FDRSTableHeader {
u32 pseudoEngineID; // set to 0xffffffff to indicate this isn't an FDR
// string id one beyond the maximum entry for this type of literal
// boundary[CASEFUL] is the end of the caseful literals
// boundary[CASELESS] is the end of the caseless literals and one beyond
// the largest literal id (the size of the littab)
u32 boundary[MAX_MODES];
// offsets are 0 if no such table exists
// offset from the base of the tertiary structure to the hash table
u32 hashOffset[MAX_MODES];
u32 hashNBits[MAX_MODES]; // lg2 of the size of the hash table
u8 streamStateBits[MAX_MODES];
u8 streamStateBytes; // total size of packed stream state in bytes
u8 N; // prefix lengths
u16 pad;
};
// One of these structures per literal entry in our secondary FDR table.
struct FDRSLiteral {
u32 offset;
// potentially - another u32 to point to the 'next lesser included literal'
// which would be a literal that overlaps this one in such a way that a
// failure to match _this_ literal can leave us in a state that we might
// still match that literal. Offset information might also be called for,
// in which case we might be wanting to use a FDRSLiteralOffset
};
typedef u32 FDRSLiteralOffset;
#define LINK_INVALID 0xffffffff
// One of these structures per hash table entry in our secondary FDR table
struct FDRSHashEntry {
u64a bitfield;
FDRSLiteralOffset state;
u32 link;
};
static really_inline
u32 get_start_lit_idx(const struct FDRSTableHeader * h, MODES m) {
return m == CASEFUL ? 0 : h->boundary[m-1];
}
static really_inline
u32 get_end_lit_idx(const struct FDRSTableHeader * h, MODES m) {
return h->boundary[m];
}
static really_inline
const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) {
return (const struct FDRSLiteral *) (((const u8 *)h) +
ROUNDUP_16(sizeof(struct FDRSTableHeader)));
}
static really_inline
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, MODES m) {
return getLitTab(h)[get_start_lit_idx(h, m)].offset;
}
static really_inline
u32 packStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
return v - getBaseOffsetOfLits(h, m) + 1;
}
static really_inline
u32 unpackStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
return v + getBaseOffsetOfLits(h, m) - 1;
}
static really_inline
u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) {
return (ent->bitfield >> bit) & 0x1;
}
static really_inline
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, MODES mode) {
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
assert(len >= 32);
u64a v1 = unaligned_load_u64a(ptr);
u64a v2 = unaligned_load_u64a(ptr + 8);
u64a v3 = unaligned_load_u64a(ptr + 16);
if (mode == CASELESS) {
v1 &= CASEMASK;
v2 &= CASEMASK;
v3 &= CASEMASK;
}
v1 *= MULTIPLIER;
v2 *= (MULTIPLIER*MULTIPLIER);
v3 *= (MULTIPLIER*MULTIPLIER*MULTIPLIER);
v1 >>= 32;
v2 >>= 32;
v3 >>= 32;
return v1 ^ v2 ^ v3;
}
#endif

View File

@ -0,0 +1,365 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_STREAMING_RUNTIME_H
#define FDR_STREAMING_RUNTIME_H
#include "fdr_streaming_internal.h"
#include "util/partial_store.h"
static really_inline
const struct FDRSTableHeader * getSHDR(const struct FDR * fdr) {
const u8 * linkPtr = ((const u8 *)fdr) + fdr->link;
// test if it's not really a engineID, but a 'pseudo engine id'
assert(*(const u32 *)linkPtr == 0xffffffff);
assert(linkPtr);
return (const struct FDRSTableHeader *)linkPtr;
}
// Reads from stream state and unpacks values into stream state table.
static really_inline
void getStreamStates(const struct FDRSTableHeader * streamingTable,
const u8 * stream_state, u32 * table) {
assert(streamingTable);
assert(stream_state);
assert(table);
u8 ss_bytes = streamingTable->streamStateBytes;
u8 ssb = streamingTable->streamStateBits[CASEFUL];
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
#if defined(ARCH_32_BIT)
// On 32-bit hosts, we may be able to avoid having to do any u64a
// manipulation at all.
if (ss_bytes <= 4) {
u32 ssb_mask = (1U << ssb) - 1;
u32 streamVal = partial_load_u32(stream_state, ss_bytes);
table[CASEFUL] = (u32)(streamVal & ssb_mask);
table[CASELESS] = (u32)(streamVal >> ssb);
return;
}
#endif
u64a ssb_mask = (1ULL << ssb) - 1;
u64a streamVal = partial_load_u64a(stream_state, ss_bytes);
table[CASEFUL] = (u32)(streamVal & ssb_mask);
table[CASELESS] = (u32)(streamVal >> (u64a)ssb);
}
#ifndef NDEBUG
// Defensive checking (used in assert) that these table values don't overflow
// outside the range available.
static really_inline UNUSED
u32 streamingTableOverflow(u32 * table, u8 ssb, u8 ssb_nc) {
u32 ssb_mask = (1ULL << (ssb)) - 1;
if (table[CASEFUL] & ~ssb_mask) {
return 1;
}
u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
if (table[CASELESS] & ~ssb_nc_mask) {
return 1;
}
return 0;
}
#endif
// Reads from stream state table and packs values into stream state.
static really_inline
void setStreamStates(const struct FDRSTableHeader * streamingTable,
u8 * stream_state, u32 * table) {
assert(streamingTable);
assert(stream_state);
assert(table);
u8 ss_bytes = streamingTable->streamStateBytes;
u8 ssb = streamingTable->streamStateBits[CASEFUL];
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
assert(!streamingTableOverflow(table, ssb, ssb_nc));
#if defined(ARCH_32_BIT)
// On 32-bit hosts, we may be able to avoid having to do any u64a
// manipulation at all.
if (ss_bytes <= 4) {
u32 stagingStreamState = table[CASEFUL];
stagingStreamState |= (table[CASELESS] << ssb);
partial_store_u32(stream_state, stagingStreamState, ss_bytes);
return;
}
#endif
u64a stagingStreamState = (u64a)table[CASEFUL];
stagingStreamState |= (u64a)table[CASELESS] << ((u64a)ssb);
partial_store_u64a(stream_state, stagingStreamState, ss_bytes);
}
u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) {
if (!stream_state) {
return 0;
}
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
u8 ss_bytes = streamingTable->streamStateBytes;
// We just care if there are any bits set, and the test below is faster
// than a partial_load_u64a (especially on 32-bit hosts).
for (u32 i = 0; i < ss_bytes; i++) {
if (*stream_state) {
return 1;
}
++stream_state;
}
return 0;
}
// binary search for the literal index that contains the current state
static really_inline
u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable,
u32 stateValue, MODES m) {
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 lo = get_start_lit_idx(streamingTable, m);
u32 hi = get_end_lit_idx(streamingTable, m);
// Now move stateValue back by one so that we're looking for the
// litTab entry that includes it the string, not the one 'one past' it
stateValue -= 1;
assert(lo != hi);
assert(litTab[lo].offset <= stateValue);
assert(litTab[hi].offset > stateValue);
// binary search to find the entry e such that:
// litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral
while (lo + 1 < hi) {
u32 mid = (lo + hi) / 2;
if (litTab[mid].offset <= stateValue) {
lo = mid;
} else { //(litTab[mid].offset > stateValue) {
hi = mid;
}
}
assert(litTab[lo].offset <= stateValue);
assert(litTab[hi].offset > stateValue);
return lo;
}
static really_inline
void fdrUnpackStateMode(struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
const struct FDRSLiteral * litTab,
const u32 *state_table,
const MODES m) {
if (!state_table[m]) {
return;
}
u32 stateValue = unpackStateVal(streamingTable, m, state_table[m]);
u32 idx = findLitTabEntry(streamingTable, stateValue, m);
size_t found_offset = litTab[idx].offset;
const u8 * found_buf = found_offset + (const u8 *)streamingTable;
size_t found_sz = stateValue - found_offset;
if (m == CASEFUL) {
a->buf_history = found_buf;
a->len_history = found_sz;
} else {
a->buf_history_nocase = found_buf;
a->len_history_nocase = found_sz;
}
}
static really_inline
void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a,
const u8 * stream_state) {
// nothing to do if there's no stream state for the case
if (!stream_state) {
return;
}
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 state_table[MAX_MODES];
getStreamStates(streamingTable, stream_state, state_table);
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASEFUL);
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASELESS);
}
static really_inline
u32 do_single_confirm(const struct FDRSTableHeader * streamingTable,
const struct FDR_Runtime_Args * a, u32 hashState, MODES m) {
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 idx = findLitTabEntry(streamingTable, hashState, m);
size_t found_offset = litTab[idx].offset;
const u8 * s1 = found_offset + (const u8 *)streamingTable;
assert(hashState > found_offset);
size_t l1 = hashState - found_offset;
const u8 * buf = a->buf;
size_t len = a->len;
const char nocase = m != CASEFUL;
if (l1 > len) {
const u8 * hist = nocase ? a->buf_history_nocase : a->buf_history;
size_t hist_len = nocase ? a->len_history_nocase : a->len_history;
if (l1 > len+hist_len) {
return 0; // Break out - not enough total history
}
size_t overhang = l1 - len;
assert(overhang <= hist_len);
if (cmpForward(hist + hist_len - overhang, s1, overhang, nocase)) {
return 0;
}
s1 += overhang;
l1 -= overhang;
}
// if we got here, we don't need history or we compared ok out of history
assert(l1 <= len);
if (cmpForward(buf + len - l1, s1, l1, nocase)) {
return 0;
}
return hashState; // our new state
}
static really_inline
void fdrFindStreamingHash(const struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
u8 hash_len, u32 *hashes) {
u8 tempbuf[128];
const u8 *base;
if (hash_len > a->len) {
assert(hash_len <= 128);
size_t overhang = hash_len - a->len;
assert(overhang <= a->len_history);
memcpy(tempbuf, a->buf_history + a->len_history - overhang, overhang);
memcpy(tempbuf + overhang, a->buf, a->len);
base = tempbuf;
} else {
assert(hash_len <= a->len);
base = a->buf + a->len - hash_len;
}
if (streamingTable->hashNBits[CASEFUL]) {
hashes[CASEFUL] = streaming_hash(base, hash_len, CASEFUL);
}
if (streamingTable->hashNBits[CASELESS]) {
hashes[CASELESS] = streaming_hash(base, hash_len, CASELESS);
}
}
static really_inline
const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
u32 h, const MODES m) {
u32 nbits = streamingTable->hashNBits[m];
if (!nbits) {
return NULL;
}
u32 h_ent = h & ((1 << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
const struct FDRSHashEntry *tab =
(const struct FDRSHashEntry *)((const u8 *)streamingTable
+ streamingTable->hashOffset[m]);
const struct FDRSHashEntry *ent = tab + h_ent;
if (!has_bit(ent, h_low)) {
return NULL;
}
return ent;
}
static really_inline
void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
const struct FDRSHashEntry *ent, const MODES m) {
assert(ent);
assert(streamingTable->hashNBits[m]);
const struct FDRSHashEntry *tab =
(const struct FDRSHashEntry *)((const u8 *)streamingTable
+ streamingTable->hashOffset[m]);
while (1) {
u32 tmp = 0;
if ((tmp = do_single_confirm(streamingTable, a, ent->state, m))) {
state_table[m] = packStateVal(streamingTable, m, tmp);
break;
}
if (ent->link == LINK_INVALID) {
break;
}
ent = tab + ent->link;
}
}
static really_inline
void fdrPackState(const struct FDR *fdr, const struct FDR_Runtime_Args *a,
u8 *stream_state) {
// nothing to do if there's no stream state for the case
if (!stream_state) {
return;
}
// get pointers to the streamer FDR and the tertiary structure
const struct FDRSTableHeader *streamingTable = getSHDR(fdr);
assert(streamingTable->N);
u32 state_table[MAX_MODES] = {0, 0};
// if we don't have enough history, we don't need to do anything
if (streamingTable->N <= a->len + a->len_history) {
u32 hashes[MAX_MODES] = {0, 0};
fdrFindStreamingHash(a, streamingTable, streamingTable->N, hashes);
const struct FDRSHashEntry *ent_ful = getEnt(streamingTable,
hashes[CASEFUL], CASEFUL);
const struct FDRSHashEntry *ent_less = getEnt(streamingTable,
hashes[CASELESS], CASELESS);
if (ent_ful) {
fdrPackStateMode(state_table, a, streamingTable, ent_ful,
CASEFUL);
}
if (ent_less) {
fdrPackStateMode(state_table, a, streamingTable, ent_less,
CASELESS);
}
}
setStreamStates(streamingTable, stream_state, state_table);
}
#endif

222
src/fdr/flood_compile.cpp Normal file
View File

@ -0,0 +1,222 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_confirm.h"
#include "fdr_compile_internal.h"
#include "fdr_engine_description.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/charreach.h"
#include "util/compare.h"
#include "util/ue2string.h"
#include "util/verify_types.h"
#include <cstring>
#include <map>
#include <memory>
#include <string>
#include <vector>
using namespace std;
namespace ue2 {
namespace {
struct FloodComparator {
bool operator()(const FDRFlood &f1, const FDRFlood &f2) const {
return std::memcmp(&f1, &f2, sizeof(f1)) < 0;
}
};
}
static
bool isDifferent(u8 oldC, u8 c, bool caseless) {
if (caseless) {
return mytolower(oldC) != mytolower(c);
} else {
return oldC != c;
}
}
static
void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) {
FDRFlood &fl = tmpFlood[c];
fl.suffix = MAX(fl.suffix, suffix + 1);
DEBUG_PRINTF("Updated Flood Suffix for char '%c' to %u\n", c, fl.suffix);
}
static
void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
u32 suffix) {
FDRFlood &fl = tmpFlood[c];
fl.suffix = MAX(fl.suffix, suffix + 1);
if (fl.idCount < FDR_FLOOD_MAX_IDS) {
fl.ids[fl.idCount] = lit.id;
fl.allGroups |= lit.groups;
fl.groups[fl.idCount] = lit.groups;
fl.len[fl.idCount] = suffix;
// when idCount gets to max_ids this flood no longer happens
// only incremented one more time to avoid arithmetic overflow
DEBUG_PRINTF("Added Flood for char '%c' suffix=%u len[%hu]=%u\n",
c, fl.suffix, fl.idCount, suffix);
fl.idCount++;
}
}
pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
const EngineDescription &eng) {
vector<FDRFlood> tmpFlood(N_CHARS);
u32 default_suffix = eng.getDefaultFloodSuffixLength();
// zero everything to avoid spurious distinctions in the compares
memset(&tmpFlood[0], 0, N_CHARS * sizeof(FDRFlood));
for (u32 c = 0; c < N_CHARS; c++) {
tmpFlood[c].suffix = default_suffix;
}
for (const auto &lit : lits) {
DEBUG_PRINTF("lit: '%s'%s\n", escapeString(lit.s).c_str(),
lit.nocase ? " (nocase)" : "");
u32 litSize = verify_u32(lit.s.size());
u32 maskSize = (u32)lit.msk.size();
u8 c = lit.s[litSize - 1];
bool nocase = ourisalpha(c) ? lit.nocase : false;
if (nocase && maskSize && (lit.msk[maskSize - 1] & CASE_BIT)) {
c = (lit.cmp[maskSize - 1] & CASE_BIT) ? mytolower(c) : mytoupper(c);
nocase = false;
}
u32 iEnd = MAX(litSize, maskSize);
u32 upSuffix = iEnd; // upSuffix is used as an upper case suffix length
// for case-less, or as a suffix length for case-sensitive;
u32 loSuffix = iEnd; // loSuffix used only for case-less as a lower case suffix
// length;
for (u32 i = 0; i < iEnd; i++) {
if (i < litSize) {
if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) {
DEBUG_PRINTF("non-flood char in literal[%u] %c != %c\n",
i, c, lit.s[litSize - i - 1]);
upSuffix = MIN(upSuffix, i);
loSuffix = MIN(loSuffix, i); // makes sense only for case-less
break;
}
}
if (i < maskSize) {
u8 m = lit.msk[maskSize - i - 1];
u8 cm = lit.cmp[maskSize - i - 1] & m;
if(nocase) {
if ((mytoupper(c) & m) != cm) {
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
i, mytoupper(c), cm);
upSuffix = MIN(upSuffix, i);
}
if ((mytolower(c) & m) != cm) {
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
i, mytolower(c), cm);
loSuffix = MIN(loSuffix, i);
}
if (loSuffix != iEnd && upSuffix != iEnd) {
break;
}
} else if ((c & m) != cm) {
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n", i, c, cm);
upSuffix = MIN(upSuffix, i);
break;
}
}
}
if(upSuffix != iEnd) {
updateFloodSuffix(tmpFlood, nocase ? mytoupper(c) : c, upSuffix);
} else {
addFlood(tmpFlood, nocase ? mytoupper(c) : c, lit, upSuffix);
}
if (nocase) {
if(loSuffix != iEnd) {
updateFloodSuffix(tmpFlood, mytolower(c), loSuffix);
} else {
addFlood(tmpFlood, mytolower(c), lit, loSuffix);
}
}
}
#ifdef DEBUG
for (u32 i = 0; i < N_CHARS; i++) {
FDRFlood &fl = tmpFlood[i];
if (!fl.idCount) {
continue;
}
printf("i is %02x fl->idCount is %hd fl->suffix is %d fl->allGroups is "
"%016llx\n", i, fl.idCount, fl.suffix, fl.allGroups);
for (u32 j = 0; j < fl.idCount; j++) {
printf("j is %d fl.groups[j] %016llx fl.len[j] %d \n", j,
fl.groups[j], fl.len[j]);
}
}
#endif
map<FDRFlood, CharReach, FloodComparator> flood2chars;
for (u32 i = 0; i < N_CHARS; i++) {
FDRFlood fl = tmpFlood[i];
flood2chars[fl].set(i);
}
u32 nDistinctFloods = flood2chars.size();
size_t floodHeaderSize = sizeof(u32) * N_CHARS;
size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
assert(buf); // otherwise would have thrown std::bad_alloc
u32 *floodHeader = (u32 *)buf;
FDRFlood *layoutFlood = (FDRFlood * )(buf + floodHeaderSize);
u32 currentFloodIndex = 0;
for (const auto &m : flood2chars) {
const FDRFlood &fl = m.first;
const CharReach &cr = m.second;
layoutFlood[currentFloodIndex] = fl;
for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
floodHeader[c] = currentFloodIndex;
}
currentFloodIndex++;
}
DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
floodHeaderSize, floodStructSize, totalSize);
return make_pair((u8 *)buf, totalSize);
}
} // namespace ue2

347
src/fdr/flood_runtime.h Normal file
View File

@ -0,0 +1,347 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FLOOD_RUNTIME
#define FLOOD_RUNTIME
#if defined(ARCH_64_BIT)
#define FLOOD_64
#else
#define FLOOD_32
#endif
#define FLOOD_MINIMUM_SIZE 256
#define FLOOD_BACKOFF_START 32
static really_inline
const u8 * nextFloodDetect(const u8 * buf, size_t len, u32 floodBackoff) {
// if we don't have a flood at either the start or end,
// or have a very small buffer, don't bother with flood detection
if (len < FLOOD_MINIMUM_SIZE) {
return buf + len;
}
/* entry points in runtime.c prefetch relevant data */
#ifndef FLOOD_32
u64a x11 = *(const u64a *)ROUNDUP_PTR(buf, 8);
u64a x12 = *(const u64a *)ROUNDUP_PTR(buf+8, 8);
if (x11 == x12) {
return buf + floodBackoff;
}
u64a x21 = *(const u64a *)ROUNDUP_PTR(buf + len/2, 8);
u64a x22 = *(const u64a *)ROUNDUP_PTR(buf + len/2 + 8, 8);
if (x21 == x22) {
return buf + floodBackoff;
}
u64a x31 = *(const u64a *)ROUNDUP_PTR(buf + len - 24, 8);
u64a x32 = *(const u64a *)ROUNDUP_PTR(buf + len - 16, 8);
if (x31 == x32) {
return buf + floodBackoff;
}
#else
u32 x11 = *(const u32 *)ROUNDUP_PTR(buf, 4);
u32 x12 = *(const u32 *)ROUNDUP_PTR(buf+4, 4);
if (x11 == x12) {
return buf + floodBackoff;
}
u32 x21 = *(const u32 *)ROUNDUP_PTR(buf + len/2, 4);
u32 x22 = *(const u32 *)ROUNDUP_PTR(buf + len/2 + 4, 4);
if (x21 == x22) {
return buf + floodBackoff;
}
u32 x31 = *(const u32 *)ROUNDUP_PTR(buf + len - 12, 4);
u32 x32 = *(const u32 *)ROUNDUP_PTR(buf + len - 8, 4);
if (x31 == x32) {
return buf + floodBackoff;
}
#endif
return buf + len;
}
static really_inline
const u8 * floodDetect(const struct FDR * fdr,
const struct FDR_Runtime_Args * a,
const u8 ** ptrPtr,
const u8 * tryFloodDetect,
u32 * floodBackoffPtr,
hwlmcb_rv_t * control,
u32 iterBytes) {
DEBUG_PRINTF("attempting flood detection at %p\n", tryFloodDetect);
const u8 * buf = a->buf;
const size_t len = a->len;
HWLMCallback cb = a->cb;
void * ctxt = a->ctxt;
const u8 * ptr = *ptrPtr;
// tryFloodDetect is never put in places where unconditional
// reads a short distance forward or backward here
// TODO: rationale for this line needs to be rediscovered!!
size_t mainLoopLen = len > iterBytes ? len - iterBytes : 0;
const u32 i = ptr - buf;
u32 j = i;
// go from c to our FDRFlood structure
u8 c = buf[i];
const u8 * fBase = ((const u8 *)fdr) + fdr->floodOffset;
u32 fIdx = ((const u32 *)fBase)[c];
const struct FDRFlood * fsb = (const struct FDRFlood *)(fBase + sizeof(u32) * 256);
const struct FDRFlood * fl = &fsb[fIdx];
#ifndef FLOOD_32
u64a cmpVal = c;
cmpVal |= cmpVal << 8;
cmpVal |= cmpVal << 16;
cmpVal |= cmpVal << 32;
u64a probe = *(const u64a *)ROUNDUP_PTR(buf+i, 8);
#else
u32 cmpVal = c;
cmpVal |= cmpVal << 8;
cmpVal |= cmpVal << 16;
u32 probe = *(const u32 *)ROUNDUP_PTR(buf+i, 4);
#endif
if ((probe != cmpVal) || (fl->idCount >= FDR_FLOOD_MAX_IDS)) {
*floodBackoffPtr *= 2;
goto floodout;
}
if (i < fl->suffix + 7) {
*floodBackoffPtr *= 2;
goto floodout;
}
j = i - fl->suffix;
#ifndef FLOOD_32
j -= (u32)((uintptr_t)buf + j) & 0x7; // push j back to yield 8-aligned addrs
for (; j + 32 < mainLoopLen; j += 32) {
u64a v = *(const u64a *)(buf + j);
u64a v2 = *(const u64a *)(buf + j + 8);
u64a v3 = *(const u64a *)(buf + j + 16);
u64a v4 = *(const u64a *)(buf + j + 24);
if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
break;
}
}
for (; j + 8 < mainLoopLen; j += 8) {
u64a v = *(const u64a *)(buf + j);
if (v != cmpVal) {
break;
}
}
#else
j -= (u32)((size_t)buf + j) & 0x3; // push j back to yield 4-aligned addrs
for (; j + 16 < mainLoopLen; j += 16) {
u32 v = *(const u32 *)(buf + j);
u32 v2 = *(const u32 *)(buf + j + 4);
u32 v3 = *(const u32 *)(buf + j + 8);
u32 v4 = *(const u32 *)(buf + j + 12);
if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
break;
}
}
for (; j + 4 < mainLoopLen; j += 4) {
u32 v = *(const u32 *)(buf + j);
if (v != cmpVal) {
break;
}
}
#endif
for (; j < mainLoopLen; j++) {
u8 v = *(const u8 *)(buf + j);
if (v != c) {
break;
}
}
if (j > i ) {
j--; // needed for some reaches
u32 itersAhead = (j-i)/iterBytes;
u32 floodSize = itersAhead*iterBytes;
DEBUG_PRINTF("flooding %u size j %u i %u fl->idCount %hu "
"*control %016llx fl->allGroups %016llx\n",
floodSize, j, i, fl->idCount, *control, fl->allGroups);
DEBUG_PRINTF("mainloopLen %zu mainStart ??? mainEnd ??? len %zu\n",
mainLoopLen, len);
if (fl->idCount && (*control & fl->allGroups)) {
switch (fl->idCount) {
#if !defined(FLOOD_DEBUG)
// Carefully unrolled code
case 1:
for (u32 t = 0; t < floodSize && (*control & fl->allGroups);
t += 4) {
DEBUG_PRINTF("aaa %u %llx\n", t, fl->groups[0]);
u32 len0 = fl->len[0] - 1;
if (*control & fl->groups[0]) {
*control = cb(i + t + 0 - len0, i + t + 0, fl->ids[0], ctxt);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
}
}
break;
case 2:
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) {
u32 len0 = fl->len[0] - 1;
u32 len1 = fl->len[1] - 1;
if (*control & fl->groups[0]) {
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
}
if (*control & fl->groups[0]) {
*control =
cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 2 - len1, i + t + 2, fl->ids[1], ctxt);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 3 - len1, i + t + 3, fl->ids[1], ctxt);
}
}
break;
case 3:
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
u32 len0 = fl->len[0] - 1;
u32 len1 = fl->len[1] - 1;
u32 len2 = fl->len[2] - 1;
if (*control & fl->groups[0]) {
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
}
if (*control & fl->groups[2]) {
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
}
if (*control & fl->groups[2]) {
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
}
}
break;
default:
// slow generalized loop
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
u32 len0 = fl->len[0] - 1;
u32 len1 = fl->len[1] - 1;
u32 len2 = fl->len[2] - 1;
u32 len3 = fl->len[3] - 1;
if (*control & fl->groups[0]) {
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
}
if (*control & fl->groups[2]) {
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
}
if (*control & fl->groups[3]) {
*control = cb(i + t - len3, i + t, fl->ids[3], ctxt);
}
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
if (*control & fl->groups[t2]) {
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
}
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
}
if (*control & fl->groups[2]) {
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
}
if (*control & fl->groups[3]) {
*control = cb(i + t + 1 - len3, i + t + 1, fl->ids[3], ctxt);
}
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
if (*control & fl->groups[t2]) {
*control = cb(i + t + 1 - (fl->len[t2] - 1), i + t + 1, fl->ids[t2], ctxt);
}
}
}
break;
#else
// Fallback for debugging
default:
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) {
for (u32 t2 = 0; t2 < fl->idCount; t2++) {
if (*control & fl->groups[t2]) {
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
}
}
}
#endif
}
}
ptr += floodSize;
} else {
*floodBackoffPtr *= 2;
}
floodout:
if (j + *floodBackoffPtr < mainLoopLen - 128) {
tryFloodDetect = buf + MAX(i,j) + *floodBackoffPtr;
} else {
tryFloodDetect = buf + mainLoopLen; // set so we never do another flood detect
}
*ptrPtr = ptr;
DEBUG_PRINTF("finished flood detection at %p (next check %p)\n",
ptr, tryFloodDetect);
return tryFloodDetect;
}
#endif

244
src/fdr/teddy.c Normal file
View File

@ -0,0 +1,244 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
static const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
};
// Note: p_mask is an output param that initialises a poison mask.
UNUSED static really_inline
m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
const u8 *buf_history, size_t len_history,
const u32 nMasks) {
union {
u8 val8[16];
m128 val128;
} u;
u.val128 = zeroes128();
if (ptr >= lo) {
u32 avail = (u32)(hi - ptr);
if (avail >= 16) {
*p_mask = load128((const void*)(p_mask_arr[16] + 16));
return loadu128(ptr);
}
*p_mask = load128((const void*)(p_mask_arr[avail] + 16));
for (u32 i = 0; i < avail; i++) {
u.val8[i] = ptr[i];
}
} else {
u32 need = MIN((u32)(lo - ptr), MIN(len_history, nMasks - 1));
u32 start = (u32)(lo - ptr);
u32 i;
for (i = start - need; ptr + i < lo; i++) {
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
}
u32 end = MIN(16, (u32)(hi - ptr));
*p_mask = loadu128((const void*)(p_mask_arr[end - start] + 16 - start));
for (; i < end; i++) {
u.val8[i] = ptr[i];
}
}
return u.val128;
}
#if defined(__AVX2__)
UNUSED static really_inline
m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
const u8 *buf_history, size_t len_history,
const u32 nMasks) {
m128 p_mask128;
m256 ret = set2x128(vectoredLoad128(&p_mask128, ptr, lo, hi, buf_history, len_history, nMasks));
*p_mask = set2x128(p_mask128);
return ret;
}
static const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = {
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
};
UNUSED static really_inline
m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
const u8 *buf_history, size_t len_history) {
union {
u8 val8[32];
m256 val256;
} u;
if (ptr >= lo) {
u32 avail = (u32)(hi - ptr);
if (avail >= 32) {
*p_mask = load256((const void*)(p_mask_arr256[32] + 32));
return loadu256(ptr);
}
*p_mask = load256((const void*)(p_mask_arr256[avail] + 32));
for (u32 i = 0; i < avail; i++) {
u.val8[i] = ptr[i];
}
} else {
// need contains "how many chars to pull from history"
// calculate based on what we need, what we have in the buffer
// and only what we need to make primary confirm work
u32 start = (u32)(lo - ptr);
u32 i;
for (i = start; ptr + i < lo; i++) {
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
}
u32 end = MIN(32, (u32)(hi - ptr));
*p_mask = loadu256((const void*)(p_mask_arr256[end - start] + 32 - start));
for (; i < end; i++) {
u.val8[i] = ptr[i];
}
}
return u.val256;
}
#endif // __AVX2__
#define P0(cnd) unlikely(cnd)
#include "fdr.h"
#include "fdr_internal.h"
#include "flood_runtime.h"
#include "fdr_confirm.h"
#include "fdr_confirm_runtime.h"
#include "fdr_loadval.h"
#include "util/bitutils.h"
#include "teddy_internal.h"
#include "teddy_autogen.c"

545
src/fdr/teddy_autogen.py Executable file
View File

@ -0,0 +1,545 @@
#!/usr/bin/python
# Copyright (c) 2015, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Intel Corporation nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
from autogen_utils import *
from base_autogen import *
from string import Template
class MT(MatcherBase):
def produce_confirm(self, iter, var_name, offset, bits, cautious = True):
if self.packed:
print self.produce_confirm_base(var_name, bits, iter*16 + offset, cautious, enable_confirmless = False, do_bailout = False)
else:
if self.num_masks == 1:
conf_func = "confWithBit1"
else:
conf_func = "confWithBitMany"
if cautious:
caution_string = "VECTORING"
else:
caution_string = "NOT_CAUTIOUS"
print " if (P0(!!%s)) {" % var_name
print " do {"
if bits == 64:
print " bit = findAndClearLSB_64(&%s);" % (var_name)
else:
print " bit = findAndClearLSB_32(&%s);" % (var_name)
print " byte = bit / %d + %d;" % (self.num_buckets, iter*16 + offset)
print " idx = bit %% %d;" % self.num_buckets
print " cf = confBase[idx];"
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
print " if (!(fdrc->groups & *control))"
print " continue;"
print " %s(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % (conf_func, caution_string)
print " } while(P0(!!%s));" % var_name
print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {"
print " *a->groups = controlVal;"
print " return HWLM_TERMINATED;"
print " }"
print " }"
def produce_needed_temporaries(self, max_iterations):
print " m128 p_mask;"
for iter in range(0, max_iterations):
print " m128 val_%d;" % iter
print " m128 val_%d_lo;" % iter
print " m128 val_%d_hi;" % iter
for x in range(self.num_masks):
print " m128 res_%d_%d;" % (iter, x)
if x != 0:
print " m128 res_shifted_%d_%d;" % (iter, x)
print " m128 r_%d;" % iter
print "#ifdef ARCH_64_BIT"
print " u64a r_%d_lopart;" % iter
print " u64a r_%d_hipart;" % iter
print "#else"
print " u32 r_%d_part1;" % iter
print " u32 r_%d_part2;" % iter
print " u32 r_%d_part3;" % iter
print " u32 r_%d_part4;" % iter
print "#endif"
def produce_one_iteration_state_calc(self, iter, effective_num_iterations,
cautious, save_old):
if cautious:
print " val_%d = vectoredLoad128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks)
else:
print " val_%d = load128(ptr + %d);" % (iter, iter*16)
print " val_%d_lo = and128(val_%d, lomask);" % (iter, iter)
print " val_%d_hi = rshift2x64(val_%d, 4);" % (iter, iter)
print " val_%d_hi = and128(val_%d_hi, lomask);" % (iter, iter)
print
for x in range(self.num_masks):
print Template("""
res_${ITER}_${X} = and128(pshufb(maskBase[${X}*2] , val_${ITER}_lo),
pshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x)
if x != 0:
if iter == 0:
print " res_shifted_%d_%d = palignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x)
else:
print " res_shifted_%d_%d = palignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x)
if x != 0 and iter == effective_num_iterations - 1 and save_old:
print " res_old_%d = res_%d_%d;" % (x, iter, x)
print
if cautious:
print " r_%d = and128(res_%d_0, p_mask);" % (iter, iter)
else:
print " r_%d = res_%d_0;" % (iter, iter)
for x in range(1, self.num_masks):
print " r_%d = and128(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x)
print
def produce_one_iteration_confirm(self, iter, confirmCautious):
setup64 = [ (0, "r_%d_lopart" % iter, "movq(r_%d)" % iter),
(8, "r_%d_hipart" % iter, "movq(byteShiftRight128(r_%d, 8))" % iter) ]
setup32 = [ (0, "r_%d_part1" % iter, "movd(r_%d)" % iter),
(4, "r_%d_part2" % iter, "movd(byteShiftRight128(r_%d, 4))" % iter),
(8, "r_%d_part3" % iter, "movd(byteShiftRight128(r_%d, 8))" % iter),
(12, "r_%d_part4" % iter, "movd(byteShiftRight128(r_%d, 12))" % iter) ]
print " if (P0(isnonzero128(r_%d))) {" % (iter)
print "#ifdef ARCH_64_BIT"
for (off, val, init) in setup64:
print " %s = %s;" % (val, init)
for (off, val, init) in setup64:
self.produce_confirm(iter, val, off, 64, cautious = confirmCautious)
print "#else"
for (off, val, init) in setup32:
print " %s = %s;" % (val, init)
for (off, val, init) in setup32:
self.produce_confirm(iter, val, off, 32, cautious = confirmCautious)
print "#endif"
print " }"
def produce_one_iteration(self, iter, effective_num_iterations, cautious = False,
confirmCautious = True, save_old = True):
self.produce_one_iteration_state_calc(iter, effective_num_iterations, cautious, save_old)
self.produce_one_iteration_confirm(iter, confirmCautious)
def produce_code(self):
print self.produce_header(visible = True, header_only = False)
print self.produce_common_declarations()
print
self.produce_needed_temporaries(self.num_iterations)
print
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));"
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32));" % self.num_masks
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);"
print " const size_t iterBytes = %d;" % (self.num_iterations * 16)
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
' buf, len, a->start_offset);'
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
' mainStart);'
for x in range(self.num_masks):
if (x != 0):
print " m128 res_old_%d = ones128();" % x
print " m128 lomask = set16x8(0xf);"
print " if (ptr < mainStart) {"
print " ptr = mainStart - 16;"
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
print " ptr += 16;"
print " }"
print " if (ptr + 16 < buf + len) {"
self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True)
print " ptr += 16;"
print " }"
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
print " __builtin_prefetch(ptr + (iterBytes*4));"
print self.produce_flood_check()
for iter in range(self.num_iterations):
self.produce_one_iteration(iter, self.num_iterations, cautious = False, confirmCautious = False)
print " }"
print " for (; ptr < buf + len; ptr += 16) {"
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
print " }"
print self.produce_footer()
def produce_compile_call(self):
packed_str = { False : "false", True : "true"}[self.packed]
print " { %d, %s, %d, %d, %s, %d, %d }," % (
self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str,
self.conf_pull_back, self.conf_top_level_split)
def get_name(self):
if self.packed:
pck_string = "_pck"
else:
pck_string = ""
if self.num_buckets == 16:
type_string = "_fat"
else:
type_string = ""
return "fdr_exec_teddy_%s_msks%d%s%s" % (self.arch.name, self.num_masks, pck_string, type_string)
def __init__(self, arch, packed = False, num_masks = 1, num_buckets = 8):
self.arch = arch
self.packed = packed
self.num_masks = num_masks
self.num_buckets = num_buckets
self.num_iterations = 2
if packed:
self.conf_top_level_split = 32
else:
self.conf_top_level_split = 1
self.conf_pull_back = 0
class MTFat(MT):
def produce_needed_temporaries(self, max_iterations):
print " m256 p_mask;"
for iter in range(0, max_iterations):
print " m256 val_%d;" % iter
print " m256 val_%d_lo;" % iter
print " m256 val_%d_hi;" % iter
for x in range(self.num_masks):
print " m256 res_%d_%d;" % (iter, x)
if x != 0:
print " m256 res_shifted_%d_%d;" % (iter, x)
print " m256 r_%d;" % iter
print "#ifdef ARCH_64_BIT"
print " u64a r_%d_part1;" % iter
print " u64a r_%d_part2;" % iter
print " u64a r_%d_part3;" % iter
print " u64a r_%d_part4;" % iter
print "#else"
print " u32 r_%d_part1;" % iter
print " u32 r_%d_part2;" % iter
print " u32 r_%d_part3;" % iter
print " u32 r_%d_part4;" % iter
print " u32 r_%d_part5;" % iter
print " u32 r_%d_part6;" % iter
print " u32 r_%d_part7;" % iter
print " u32 r_%d_part8;" % iter
print "#endif"
def produce_code(self):
print self.produce_header(visible = True, header_only = False)
print self.produce_common_declarations()
print
self.produce_needed_temporaries(self.num_iterations)
print
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
print " const m256 * maskBase = (const m256 *)((const u8 *)fdr + sizeof(struct Teddy));"
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32*2));" % self.num_masks
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);"
print " const size_t iterBytes = %d;" % (self.num_iterations * 16)
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
' buf, len, a->start_offset);'
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
' mainStart);'
for x in range(self.num_masks):
if (x != 0):
print " m256 res_old_%d = ones256();" % x
print " m256 lomask = set32x8(0xf);"
print " if (ptr < mainStart) {"
print " ptr = mainStart - 16;"
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
print " ptr += 16;"
print " }"
print " if (ptr + 16 < buf + len) {"
self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True)
print " ptr += 16;"
print " }"
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
print " __builtin_prefetch(ptr + (iterBytes*4));"
print self.produce_flood_check()
for iter in range(self.num_iterations):
self.produce_one_iteration(iter, self.num_iterations, False, confirmCautious = False)
print " }"
print " for (; ptr < buf + len; ptr += 16) {"
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
print " }"
print self.produce_footer()
def produce_one_iteration_state_calc(self, iter, effective_num_iterations,
cautious, save_old):
if cautious:
print " val_%d = vectoredLoad2x128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks)
else:
print " val_%d = load2x128(ptr + %d);" % (iter, iter*16)
print " val_%d_lo = and256(val_%d, lomask);" % (iter, iter)
print " val_%d_hi = rshift4x64(val_%d, 4);" % (iter, iter)
print " val_%d_hi = and256(val_%d_hi, lomask);" % (iter, iter)
print
for x in range(self.num_masks):
print Template("""
res_${ITER}_${X} = and256(vpshufb(maskBase[${X}*2] , val_${ITER}_lo),
vpshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x)
if x != 0:
if iter == 0:
print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x)
else:
print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x)
if x != 0 and iter == effective_num_iterations - 1 and save_old:
print " res_old_%d = res_%d_%d;" % (x, iter, x)
print
if cautious:
print " r_%d = and256(res_%d_0, p_mask);" % (iter, iter)
else:
print " r_%d = res_%d_0;" % (iter, iter)
for x in range(1, self.num_masks):
print " r_%d = and256(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x)
print
def produce_one_iteration_confirm(self, iter, confirmCautious):
setup64 = [ (0, "r_%d_part1" % iter, "extractlow64from256(r)"),
(4, "r_%d_part2" % iter, "extract64from256(r, 1);\n r = interleave256hi(r_%d, r_swap)" % (iter)),
(8, "r_%d_part3" % iter, "extractlow64from256(r)"),
(12, "r_%d_part4" % iter, "extract64from256(r, 1)") ]
setup32 = [ (0, "r_%d_part1" % iter, "extractlow32from256(r)"),
(2, "r_%d_part2" % iter, "extract32from256(r, 1)"),
(4, "r_%d_part3" % iter, "extract32from256(r, 2)"),
(6, "r_%d_part4" % iter, "extract32from256(r, 3);\n r = interleave256hi(r_%d, r_swap)" % (iter)),
(8, "r_%d_part5" % iter, "extractlow32from256(r)"),
(10, "r_%d_part6" % iter, "extract32from256(r, 1)"),
(12, "r_%d_part7" % iter, "extract32from256(r, 2)"),
(14, "r_%d_part8" % iter, "extract32from256(r, 3)") ]
print " if (P0(isnonzero256(r_%d))) {" % (iter)
print " m256 r_swap = swap128in256(r_%d);" % (iter)
print " m256 r = interleave256lo(r_%d, r_swap);" % (iter)
print "#ifdef ARCH_64_BIT"
for (off, val, init) in setup64:
print " %s = %s;" % (val, init)
for (off, val, init) in setup64:
self.produce_confirm(iter, val, off, 64, cautious = confirmCautious)
print "#else"
for (off, val, init) in setup32:
print " %s = %s;" % (val, init)
for (off, val, init) in setup32:
self.produce_confirm(iter, val, off, 32, cautious = confirmCautious)
print "#endif"
print " }"
class MTFast(MatcherBase):
def produce_confirm(self, cautious):
if cautious:
cautious_str = "VECTORING"
else:
cautious_str = "NOT_CAUTIOUS"
print " for (u32 i = 0; i < arrCnt; i++) {"
print " byte = bitArr[i] / 8;"
if self.packed:
conf_split_mask = IntegerType(32).constant_to_string(
self.conf_top_level_split - 1)
print " bitRem = bitArr[i] % 8;"
print " confSplit = *(ptr+byte) & 0x1f;"
print " idx = confSplit * %d + bitRem;" % self.num_buckets
print " cf = confBase[idx];"
print " if (!cf)"
print " continue;"
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
print " if (!(fdrc->groups & *control))"
print " continue;"
print " confWithBit(fdrc, a, ptr - buf + byte, %s, 0, control, &last_match);" % cautious_str
else:
print " cf = confBase[bitArr[i] % 8];"
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
print " confWithBit1(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % cautious_str
print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {"
print " *a->groups = controlVal;"
print " return HWLM_TERMINATED;"
print " }"
print " }"
def produce_needed_temporaries(self, max_iterations):
print " u32 arrCnt;"
print " u16 bitArr[512];"
print " m256 p_mask;"
print " m256 val_0;"
print " m256 val_0_lo;"
print " m256 val_0_hi;"
print " m256 res_0;"
print " m256 res_1;"
print " m128 lo_part;"
print " m128 hi_part;"
print "#ifdef ARCH_64_BIT"
print " u64a r_0_part;"
print "#else"
print " u32 r_0_part;"
print "#endif"
def produce_bit_scan(self, offset, bits):
print " while (P0(!!r_0_part)) {"
if bits == 64:
print " bitArr[arrCnt++] = (u16)findAndClearLSB_64(&r_0_part) + 64 * %d;" % (offset)
else:
print " bitArr[arrCnt++] = (u16)findAndClearLSB_32(&r_0_part) + 32 * %d;" % (offset)
print " }"
def produce_bit_check_128(self, var_name, offset):
print " if (P0(isnonzero128(%s))) {" % (var_name)
print "#ifdef ARCH_64_BIT"
print " r_0_part = movq(%s);" % (var_name)
self.produce_bit_scan(offset, 64)
print " r_0_part = movq(byteShiftRight128(%s, 8));" % (var_name)
self.produce_bit_scan(offset + 1, 64)
print "#else"
print " r_0_part = movd(%s);" % (var_name)
self.produce_bit_scan(offset * 2, 32)
for step in range(1, 4):
print " r_0_part = movd(byteShiftRight128(%s, %d));" % (var_name, step * 4)
self.produce_bit_scan(offset * 2 + step, 32)
print "#endif"
print " }"
def produce_bit_check_256(self, iter, single_iter, cautious):
print " if (P0(isnonzero256(res_%d))) {" % (iter)
if single_iter:
print " arrCnt = 0;"
print " lo_part = cast256to128(res_%d);" % (iter)
print " hi_part = cast256to128(swap128in256(res_%d));" % (iter)
self.produce_bit_check_128("lo_part", iter * 4)
self.produce_bit_check_128("hi_part", iter * 4 + 2)
if single_iter:
self.produce_confirm(cautious)
print " }"
def produce_one_iteration_state_calc(self, iter, cautious):
if cautious:
print " val_0 = vectoredLoad256(&p_mask, ptr + %d, buf+a->start_offset, buf+len, a->buf_history, a->len_history);" % (iter * 32)
else:
print " val_0 = load256(ptr + %d);" % (iter * 32)
print " val_0_lo = and256(val_0, lomask);"
print " val_0_hi = rshift4x64(val_0, 4);"
print " val_0_hi = and256(val_0_hi, lomask);"
print " res_%d = and256(vpshufb(maskLo , val_0_lo), vpshufb(maskHi, val_0_hi));" % (iter)
if cautious:
print " res_%d = and256(res_%d, p_mask);" % (iter, iter)
def produce_code(self):
print self.produce_header(visible = True, header_only = False)
print self.produce_common_declarations()
print
self.produce_needed_temporaries(self.num_iterations)
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));"
print " const m256 maskLo = set2x128(maskBase[0]);"
print " const m256 maskHi = set2x128(maskBase[1]);"
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + 32);"
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 32);"
print " const size_t iterBytes = %d;" % (self.num_iterations * 32)
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
' buf, len, a->start_offset);'
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
' mainStart);'
print " const m256 lomask = set32x8(0xf);"
print " if (ptr < mainStart) {"
print " ptr = mainStart - 32;"
self.produce_one_iteration_state_calc(iter = 0, cautious = True)
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
print " ptr += 32;"
print " }"
print " if (ptr + 32 < buf + len) {"
self.produce_one_iteration_state_calc(iter = 0, cautious = False)
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
print " ptr += 32;"
print " }"
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
print " __builtin_prefetch(ptr + (iterBytes*4));"
print self.produce_flood_check()
for iter in range (0, self.num_iterations):
self.produce_one_iteration_state_calc(iter = iter, cautious = False)
print " arrCnt = 0;"
for iter in range (0, self.num_iterations):
self.produce_bit_check_256(iter = iter, single_iter = False, cautious = False)
self.produce_confirm(cautious = False)
print " }"
print " for (; ptr < buf + len; ptr += 32) {"
self.produce_one_iteration_state_calc(iter = 0, cautious = True)
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
print " }"
print self.produce_footer()
def get_name(self):
if self.packed:
pck_string = "_pck"
else:
pck_string = ""
return "fdr_exec_teddy_%s_msks%d%s_fast" % (self.arch.name, self.num_masks, pck_string)
def produce_compile_call(self):
packed_str = { False : "false", True : "true"}[self.packed]
print " { %d, %s, %d, %d, %s, %d, %d }," % (
self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str,
self.conf_pull_back, self.conf_top_level_split)
def __init__(self, arch, packed = False):
self.arch = arch
self.packed = packed
self.num_masks = 1
self.num_buckets = 8
self.num_iterations = 2
self.conf_top_level_split = 1
self.conf_pull_back = 0
if packed:
self.conf_top_level_split = 32
else:
self.conf_top_level_split = 1
self.conf_pull_back = 0

459
src/fdr/teddy_compile.cpp Normal file
View File

@ -0,0 +1,459 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_compile_internal.h"
#include "fdr_confirm.h"
#include "fdr_engine_description.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/compare.h"
#include "util/popcount.h"
#include "util/target_info.h"
#include "util/verify_types.h"
#include "teddy_compile.h"
#include "teddy_internal.h"
#include "teddy_engine_description.h"
#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include <boost/core/noncopyable.hpp>
using namespace std;
namespace ue2 {
namespace {
//#define TEDDY_DEBUG
class TeddyCompiler : boost::noncopyable {
const TeddyEngineDescription &eng;
const vector<hwlmLiteral> &lits;
bool make_small;
public:
TeddyCompiler(const vector<hwlmLiteral> &lits_in,
const TeddyEngineDescription &eng_in, bool make_small_in)
: eng(eng_in), lits(lits_in), make_small(make_small_in) {}
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
};
class TeddySet {
const vector<hwlmLiteral> &lits;
u32 len;
// nibbleSets is a series of bitfields over 16 predicates
// that represent the whether shufti nibble set
// so for num_masks = 4 we will represent our strings by
// 8 u16s in the vector that indicate what a shufti bucket
// would have to look like
vector<u16> nibbleSets;
set<u32> litIds;
public:
TeddySet(const vector<hwlmLiteral> &lits_in, u32 len_in)
: lits(lits_in), len(len_in), nibbleSets(len_in * 2, 0) {}
const set<u32> & getLits() const { return litIds; }
size_t litCount() const { return litIds.size(); }
bool operator<(const TeddySet & s) const {
return litIds < s.litIds;
}
#ifdef TEDDY_DEBUG
void dump() const {
printf("TS: ");
for (u32 i = 0; i < nibbleSets.size(); i++) {
printf("%04x ", (u32)nibbleSets[i]);
}
printf("\nnlits: %zu\nLit ids: ", litCount());
printf("Prob: %llu\n", probability());
for (set<u32>::iterator i = litIds.begin(), e = litIds.end(); i != e; ++i) {
printf("%u ", *i);
}
printf("\n");
printf("Flood prone : %s\n", isRunProne()?"yes":"no");
}
#endif
bool identicalTail(const TeddySet & ts) const {
return nibbleSets == ts.nibbleSets;
}
void addLiteral(u32 lit_id) {
const string &s = lits[lit_id].s;
for (u32 i = 0; i < len; i++) {
if (i < s.size()) {
u8 c = s[s.size() - i - 1];
u8 c_hi = (c >> 4) & 0xf;
u8 c_lo = c & 0xf;
nibbleSets[i*2] = 1 << c_lo;
if (lits[lit_id].nocase && ourisalpha(c)) {
nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2));
} else {
nibbleSets[i*2+1] = 1 << c_hi;
}
} else {
nibbleSets[i*2] = nibbleSets[i*2+1] = 0xffff;
}
}
litIds.insert(lit_id);
}
void merge(const TeddySet &ts) {
for (u32 i = 0; i < nibbleSets.size(); i++) {
nibbleSets[i] |= ts.nibbleSets[i];
}
litIds.insert(ts.litIds.begin(), ts.litIds.end());
}
// return a value p from 0 .. MAXINT64 that gives p/MAXINT64
// likelihood of this TeddySet firing a first-stage accept
// if it was given a bucket of its own and random data were
// to be passed in
u64a probability() const {
u64a val = 1;
for (size_t i = 0; i < nibbleSets.size(); i++) {
val *= popcount32((u32)nibbleSets[i]);
}
return val;
}
// return a score based around the chance of this hitting times
// a small fixed cost + the cost of traversing some sort of followup
// (assumption is that the followup is linear)
u64a heuristic() const {
return probability() * (2+litCount());
}
bool isRunProne() const {
u16 lo_and = 0xffff;
u16 hi_and = 0xffff;
for (u32 i = 0; i < len; i++) {
lo_and &= nibbleSets[i*2];
hi_and &= nibbleSets[i*2+1];
}
// we're not flood-prone if there's no way to get
// through with a flood
if (!lo_and || !hi_and) {
return false;
}
return true;
}
};
bool TeddyCompiler::pack(map<BucketIndex,
std::vector<LiteralIndex> > &bucketToLits) {
set<TeddySet> sts;
for (u32 i = 0; i < lits.size(); i++) {
TeddySet ts(lits, eng.numMasks);
ts.addLiteral(i);
sts.insert(ts);
}
while (1) {
#ifdef TEDDY_DEBUG
printf("Size %zu\n", sts.size());
for (set<TeddySet>::const_iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
printf("\n"); i1->dump();
}
printf("\n===============================================\n");
#endif
set<TeddySet>::iterator m1 = sts.end(), m2 = sts.end();
u64a best = 0xffffffffffffffffULL;
for (set<TeddySet>::iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
set<TeddySet>::iterator i2 = i1;
++i2;
const TeddySet &s1 = *i1;
for (set<TeddySet>::iterator e2 = sts.end(); i2 != e2; ++i2) {
const TeddySet &s2 = *i2;
// be more conservative if we don't absolutely need to
// keep packing
if ((sts.size() <= eng.getNumBuckets()) &&
!s1.identicalTail(s2)) {
continue;
}
TeddySet tmpSet(lits, eng.numMasks);
tmpSet.merge(s1);
tmpSet.merge(s2);
u64a newScore = tmpSet.heuristic();
u64a oldScore = s1.heuristic() + s2.heuristic();
if (newScore < oldScore) {
m1 = i1;
m2 = i2;
break;
} else {
u64a score = newScore - oldScore;
bool oldRunProne = s1.isRunProne() && s2.isRunProne();
bool newRunProne = tmpSet.isRunProne();
if (newRunProne && !oldRunProne) {
continue;
}
if (score < best) {
best = score;
m1 = i1;
m2 = i2;
}
}
}
}
// if we didn't find a merge candidate, bail out
if ((m1 == sts.end()) || (m2 == sts.end())) {
break;
}
// do the merge
TeddySet nts(lits, eng.numMasks);
nts.merge(*m1);
nts.merge(*m2);
#ifdef TEDDY_DEBUG
printf("Merging\n");
printf("m1 = \n");
m1->dump();
printf("m2 = \n");
m2->dump();
printf("nts = \n");
nts.dump();
printf("\n===============================================\n");
#endif
sts.erase(m1);
sts.erase(m2);
sts.insert(nts);
}
u32 cnt = 0;
if (sts.size() > eng.getNumBuckets()) {
return false;
}
for (set<TeddySet>::const_iterator i = sts.begin(), e = sts.end(); i != e;
++i) {
for (set<u32>::const_iterator i2 = i->getLits().begin(),
e2 = i->getLits().end();
i2 != e2; ++i2) {
bucketToLits[cnt].push_back(*i2);
}
cnt++;
}
return true;
}
aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
return nullptr;
}
#ifdef TEDDY_DEBUG
for (size_t i = 0; i < lits.size(); i++) {
printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(),
lits[i].nocase ? "caseless" : "caseful");
for (size_t j = 0; j < lits[i].s.size(); j++) {
printf("%02x", ((u32)lits[i].s[j])&0xff);
}
printf("\n");
}
#endif
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
if(eng.needConfirm(lits)) {
if (!pack(bucketToLits)) {
DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
lits.size(), eng.getNumBuckets());
return nullptr;
}
} else {
for (u32 i = 0; i < lits.size(); i++) {
bucketToLits[i].push_back(i);
}
}
u32 maskWidth = eng.getNumBuckets() / 8;
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
pair<u8 *, size_t> confirmTmp
= setupFullMultiConfs(lits, eng, bucketToLits, make_small);
size_t size = ROUNDUP_N(sizeof(Teddy) +
maskLen +
confirmTmp.second +
floodControlTmp.second +
link.second, 16 * maskWidth);
aligned_unique_ptr<FDR> fdr = aligned_zmalloc_unique<FDR>(size);
assert(fdr); // otherwise would have thrown std::bad_alloc
Teddy *teddy = (Teddy *)fdr.get(); // ugly
u8 *teddy_base = (u8 *)teddy;
teddy->size = size;
teddy->engineID = eng.getID();
teddy->maxStringLen = verify_u32(maxLen(lits));
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
memcpy(ptr, confirmTmp.first, confirmTmp.second);
ptr += confirmTmp.second;
aligned_free(confirmTmp.first);
teddy->floodOffset = verify_u32(ptr - teddy_base);
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
ptr += floodControlTmp.second;
aligned_free(floodControlTmp.first);
if (link.first) {
teddy->link = verify_u32(ptr - teddy_base);
memcpy(ptr, link.first, link.second);
aligned_free(link.first);
} else {
teddy->link = 0;
}
u8 *baseMsk = teddy_base + sizeof(Teddy);
for (map<BucketIndex, std::vector<LiteralIndex> >::const_iterator
i = bucketToLits.begin(),
e = bucketToLits.end();
i != e; ++i) {
const u32 bucket_id = i->first;
const vector<LiteralIndex> &ids = i->second;
const u8 bmsk = 1U << (bucket_id % 8);
for (vector<LiteralIndex>::const_iterator i2 = ids.begin(),
e2 = ids.end();
i2 != e2; ++i2) {
LiteralIndex lit_id = *i2;
const hwlmLiteral & l = lits[lit_id];
DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
const u32 sz = verify_u32(l.s.size());
// fill in masks
for (u32 j = 0; j < eng.numMasks; j++) {
u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8);
u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8);
// if we don't have a char at this position, fill in i
// locations in these masks with '1'
if (j >= sz) {
for (u32 n = 0; n < 16; n++) {
baseMsk[msk_id_lo * 16 + n] |= bmsk;
baseMsk[msk_id_hi * 16 + n] |= bmsk;
}
} else {
u8 c = l.s[sz - 1 - j];
// if we do have a char at this position
const u32 hiShift = 4;
u32 n_hi = (c >> hiShift) & 0xf;
u32 n_lo = c & 0xf;
if (j < l.msk.size() && l.msk[l.msk.size() - 1 - j]) {
u8 m = l.msk[l.msk.size() - 1 - j];
u8 m_hi = (m >> hiShift) & 0xf;
u8 m_lo = m & 0xf;
u8 cmp = l.cmp[l.msk.size() - 1 - j];
u8 cmp_lo = cmp & 0xf;
u8 cmp_hi = (cmp >> hiShift) & 0xf;
for (u8 cm = 0; cm < 0x10; cm++) {
if ((cm & m_lo) == (cmp_lo & m_lo)) {
baseMsk[msk_id_lo * 16 + cm] |= bmsk;
}
if ((cm & m_hi) == (cmp_hi & m_hi)) {
baseMsk[msk_id_hi * 16 + cm] |= bmsk;
}
}
} else{
if (l.nocase && ourisalpha(c)) {
u32 cmHalfClear = (0xdf >> hiShift) & 0xf;
u32 cmHalfSet = (0x20 >> hiShift) & 0xf;
baseMsk[msk_id_hi * 16 + (n_hi & cmHalfClear)] |= bmsk;
baseMsk[msk_id_hi * 16 + (n_hi | cmHalfSet )] |= bmsk;
} else {
baseMsk[msk_id_hi * 16 + n_hi] |= bmsk;
}
baseMsk[msk_id_lo * 16 + n_lo] |= bmsk;
}
}
}
}
}
#ifdef TEDDY_DEBUG
for (u32 i = 0; i < eng.numMasks * 2; i++) {
for (u32 j = 0; j < 16; j++) {
u8 val = baseMsk[i * 16 + j];
for (u32 k = 0; k < 8; k++) {
printf("%s", ((val >> k) & 0x1) ? "1" : "0");
}
printf(" ");
}
printf("\n");
}
#endif
return fdr;
}
} // namespace
aligned_unique_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
bool make_small, u32 hint,
const target_t &target,
pair<u8 *, size_t> link) {
unique_ptr<TeddyEngineDescription> des;
if (hint == HINT_INVALID) {
des = chooseTeddyEngine(target, lits);
} else {
des = getTeddyDescription(hint);
}
if (!des) {
return nullptr;
}
TeddyCompiler tc(lits, *des, make_small);
return tc.build(link);
}
} // namespace ue2

56
src/fdr/teddy_compile.h Normal file
View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief FDR literal matcher: Teddy build API.
*/
#ifndef TEDDY_COMPILE_H
#define TEDDY_COMPILE_H
#include "ue2common.h"
#include "util/alloc.h"
#include <vector>
#include <utility> // std::pair
struct FDR;
struct target_t;
namespace ue2 {
struct hwlmLiteral;
ue2::aligned_unique_ptr<FDR>
teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
u32 hint, const target_t &target,
std::pair<u8 *, size_t> link);
} // namespace ue2
#endif // TEDDY_COMPILE_H

View File

@ -0,0 +1,207 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_compile_internal.h"
#include "fdr_confirm.h"
#include "ue2common.h"
#include "hs_internal.h"
#include "fdr_engine_description.h"
#include "teddy_internal.h"
#include "teddy_engine_description.h"
#include "util/make_unique.h"
#include <cmath>
using namespace std;
namespace ue2 {
TeddyEngineDescription::TeddyEngineDescription(const TeddyEngineDef &def)
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
def.numBuckets, def.confirmPullBackDistance,
def.confirmTopLevelSplit),
numMasks(def.numMasks), packed(def.packed) {}
u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
return numMasks;
}
bool TeddyEngineDescription::needConfirm(const vector<hwlmLiteral> &lits) const {
if (packed || lits.size() > getNumBuckets()) {
return true;
}
for (const auto &lit : lits) {
if (lit.s.size() > numMasks || !lit.msk.empty()) {
return true;
}
}
return false;
}
#include "teddy_autogen_compiler.cpp"
static
size_t maxFloodTailLen(const vector<hwlmLiteral> &vl) {
size_t max_flood_tail = 0;
for (const auto &lit : vl) {
const string &s = lit.s;
assert(!s.empty());
size_t j;
for (j = 1; j < s.length(); j++) {
if (s[s.length() - j - 1] != s[s.length() - 1]) {
break;
}
}
max_flood_tail = max(max_flood_tail, j);
}
return max_flood_tail;
}
/**
* \brief True if this Teddy engine is qualified to handle this set of literals
* on this target.
*/
static
bool isAllowed(const vector<hwlmLiteral> &vl, const TeddyEngineDescription &eng,
const size_t max_lit_len, const target_t &target) {
if (!eng.isValidOnTarget(target)) {
DEBUG_PRINTF("%u disallowed: not valid on target\n", eng.getID());
return false;
}
if (eng.getNumBuckets() < vl.size() && !eng.packed) {
DEBUG_PRINTF("%u disallowed: num buckets < num lits and not packed\n",
eng.getID());
return false;
}
if (eng.getNumBuckets() * TEDDY_BUCKET_LOAD < vl.size()) {
DEBUG_PRINTF("%u disallowed: too many lits for num buckets\n",
eng.getID());
return false;
}
if (eng.numMasks > max_lit_len) {
DEBUG_PRINTF("%u disallowed: more masks than max lit len (%zu)\n",
eng.getID(), max_lit_len);
return false;
}
if (vl.size() > 40) {
u32 n_small_lits = 0;
for (const auto &lit : vl) {
if (lit.s.length() < eng.numMasks) {
n_small_lits++;
}
}
if (n_small_lits * 5 > vl.size()) {
DEBUG_PRINTF("too many short literals (%u)\n", n_small_lits);
return false;
}
}
return true;
}
unique_ptr<TeddyEngineDescription>
chooseTeddyEngine(const target_t &target, const vector<hwlmLiteral> &vl) {
vector<TeddyEngineDescription> descs;
getTeddyDescriptions(&descs);
const TeddyEngineDescription *best = nullptr;
const size_t max_lit_len = maxLen(vl);
const size_t max_flood_tail = maxFloodTailLen(vl);
DEBUG_PRINTF("%zu lits, max_lit_len=%zu, max_flood_tail=%zu\n", vl.size(),
max_lit_len, max_flood_tail);
u32 best_score = 0;
for (size_t engineID = 0; engineID < descs.size(); engineID++) {
const TeddyEngineDescription &eng = descs[engineID];
if (!isAllowed(vl, eng, max_lit_len, target)) {
continue;
}
u32 score = 0;
// We prefer unpacked Teddy models.
if (!eng.packed) {
score += 100;
}
// If we're heavily loaded, we prefer to have more masks.
if (vl.size() > 4 * eng.getNumBuckets()) {
score += eng.numMasks * 4;
} else {
// Lightly loaded cases are great.
score += 100;
}
// We want enough masks to avoid becoming flood-prone.
if (eng.numMasks > max_flood_tail) {
score += 50;
}
// We prefer having 3 masks. 3 is just right.
score += 6 / (abs(3 - (int)eng.numMasks) + 1);
// We prefer cheaper, smaller Teddy models.
score += 16 / eng.getNumBuckets();
DEBUG_PRINTF("teddy %u: masks=%u, buckets=%u, packed=%u "
"-> score=%u\n",
eng.getID(), eng.numMasks, eng.getNumBuckets(),
eng.packed ? 1U : 0U, score);
if (!best || score > best_score) {
best = &eng;
best_score = score;
}
}
if (!best) {
DEBUG_PRINTF("failed to find engine\n");
return nullptr;
}
DEBUG_PRINTF("using engine %u\n", best->getID());
return ue2::make_unique<TeddyEngineDescription>(*best);
}
unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID) {
vector<TeddyEngineDescription> descs;
getTeddyDescriptions(&descs);
for (const auto &desc : descs) {
if (desc.getID() == engineID) {
return ue2::make_unique<TeddyEngineDescription>(desc);
}
}
return nullptr;
}
} // namespace ue2

View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TEDDY_ENGINE_DESCRIPTION_H
#define TEDDY_ENGINE_DESCRIPTION_H
#include "engine_description.h"
#include "fdr_compile_internal.h"
#include <memory>
#include <vector>
namespace ue2 {
#define TEDDY_BUCKET_LOAD 6
struct TeddyEngineDef {
u32 id;
u64a cpu_features;
u32 numMasks;
u32 numBuckets;
bool packed;
u32 confirmPullBackDistance;
u32 confirmTopLevelSplit;
};
class TeddyEngineDescription : public EngineDescription {
public:
u32 numMasks;
bool packed;
explicit TeddyEngineDescription(const TeddyEngineDef &def);
u32 getDefaultFloodSuffixLength() const override;
bool needConfirm(const std::vector<hwlmLiteral> &lits) const;
};
std::unique_ptr<TeddyEngineDescription>
chooseTeddyEngine(const target_t &target, const std::vector<hwlmLiteral> &vl);
std::unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID);
void getTeddyDescriptions(std::vector<TeddyEngineDescription> *out);
} // namespace ue2
#endif

46
src/fdr/teddy_internal.h Normal file
View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TEDDY_INTERNAL_H
#define TEDDY_INTERNAL_H
#include "ue2common.h"
// first part is compatible with an FDR
struct Teddy {
u32 engineID;
u32 size;
u32 maxStringLen;
u32 floodOffset;
u32 link;
u32 pad1;
u32 pad2;
u32 pad3;
};
#endif

374
src/grey.cpp Normal file
View File

@ -0,0 +1,374 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "grey.h"
#include "ue2common.h"
#include <algorithm>
#include <cstdlib> // exit
#include <string>
#include <vector>
#define DEFAULT_MAX_HISTORY 60
using namespace std;
namespace ue2 {
Grey::Grey(void) :
optimiseComponentTree(true),
performGraphSimplification(true),
prefilterReductions(true),
removeEdgeRedundancy(true),
allowGough(true),
allowHaigLit(true),
allowLitHaig(true),
allowLbr(true),
allowMcClellan(true),
allowPuff(true),
allowRose(true),
allowExtendedNFA(true), /* bounded repeats of course */
allowLimExNFA(true),
allowSidecar(true),
allowAnchoredAcyclic(true),
allowSmallLiteralSet(true),
allowCastle(true),
allowDecoratedLiteral(true),
allowNoodle(true),
fdrAllowTeddy(true),
puffImproveHead(true),
castleExclusive(true),
mergeSEP(true), /* short exhaustible passthroughs */
mergeRose(true), // roses inside rose
mergeSuffixes(true), // suffix nfas inside rose
mergeOutfixes(true),
onlyOneOutfix(false),
allowShermanStates(true),
allowMcClellan8(true),
highlanderPruneDFA(true),
minimizeDFA(true),
accelerateDFA(true),
accelerateNFA(true),
reverseAccelerate(true),
squashNFA(true),
compressNFAState(true),
numberNFAStatesWrong(false), /* debugging only */
highlanderSquash(true),
allowZombies(true),
floodAsPuffette(false),
nfaForceSize(0),
nfaForceShifts(0),
maxHistoryAvailable(DEFAULT_MAX_HISTORY),
minHistoryAvailable(0), /* debugging only */
maxAnchoredRegion(63), /* for rose's atable to run over */
minRoseLiteralLength(3),
minRoseNetflowLiteralLength(2),
maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */
minExtBoundedRepeatSize(32),
goughCopyPropagate(true),
goughRegisterAllocate(true),
shortcutLiterals(true),
roseGraphReduction(true),
roseRoleAliasing(true),
roseMasks(true),
roseMaxBadLeafLength(5),
roseConvertInfBadLeaves(true),
roseConvertFloodProneSuffixes(true),
roseMergeRosesDuringAliasing(true),
roseMultiTopRoses(true),
roseHamsterMasks(true),
roseLookaroundMasks(true),
roseMcClellanPrefix(1),
roseMcClellanSuffix(1),
roseMcClellanOutfix(2),
roseTransformDelay(true),
roseDesiredSplit(4),
earlyMcClellanPrefix(true),
earlyMcClellanInfix(true),
earlyMcClellanSuffix(true),
allowCountingMiracles(true),
allowSomChain(true),
somMaxRevNfaLength(126),
hamsterAccelForward(true),
hamsterAccelReverse(false),
miracleHistoryBonus(16),
equivalenceEnable(true),
allowSmallWrite(true), // McClellan dfas for small patterns
smallWriteLargestBuffer(70), // largest buffer that can be
// considered a small write
// all blocks larger than this
// are given to rose &co
smallWriteLargestBufferBad(35),
limitSmallWriteOutfixSize(1048576), // 1 MB
dumpFlags(0),
limitPatternCount(8000000), // 8M patterns
limitPatternLength(16000), // 16K bytes
limitGraphVertices(500000), // 500K vertices
limitGraphEdges(1000000), // 1M edges
limitReportCount(4*8000000),
limitLiteralCount(8000000), // 8M literals
limitLiteralLength(16000),
limitLiteralMatcherChars(1073741824), // 1 GB
limitLiteralMatcherSize(1073741824), // 1 GB
limitRoseRoleCount(4*8000000),
limitRoseEngineCount(8000000), // 8M engines
limitRoseAnchoredSize(1073741824), // 1 GB
limitEngineSize(1073741824), // 1 GB
limitDFASize(1073741824), // 1 GB
limitNFASize(1048576), // 1 MB
limitLBRSize(1048576) // 1 MB
{
assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
}
} // namespace ue2
#ifndef RELEASE_BUILD
#include <boost/lexical_cast.hpp>
using boost::lexical_cast;
namespace ue2 {
void applyGreyOverrides(Grey *g, const string &s) {
string::const_iterator p = s.begin();
string::const_iterator pe = s.end();
string help = "help:0";
bool invalid_key_seen = false;
Grey defaultg;
if (s == "help" || s == "help:") {
printf("Valid grey overrides:\n");
p = help.begin();
pe = help.end();
}
while (p != pe) {
string::const_iterator ke = find(p, pe, ':');
if (ke == pe) {
break;
}
string key(p, ke);
string::const_iterator ve = find(ke, pe, ';');
unsigned int value = lexical_cast<unsigned int>(string(ke + 1, ve));
bool done = false;
/* surely there exists a nice template to go with this macro to make
* all the boring code disappear */
#define G_UPDATE(k) do { \
if (key == ""#k) { g->k = value; done = 1;} \
if (key == "help") { \
printf("\t%-30s\tdefault: %s\n", #k, \
lexical_cast<string>(defaultg.k).c_str()); \
} \
} while (0)
G_UPDATE(optimiseComponentTree);
G_UPDATE(performGraphSimplification);
G_UPDATE(prefilterReductions);
G_UPDATE(removeEdgeRedundancy);
G_UPDATE(allowGough);
G_UPDATE(allowHaigLit);
G_UPDATE(allowLitHaig);
G_UPDATE(allowLbr);
G_UPDATE(allowMcClellan);
G_UPDATE(allowPuff);
G_UPDATE(allowRose);
G_UPDATE(allowExtendedNFA);
G_UPDATE(allowLimExNFA);
G_UPDATE(allowSidecar);
G_UPDATE(allowAnchoredAcyclic);
G_UPDATE(allowSmallLiteralSet);
G_UPDATE(allowCastle);
G_UPDATE(allowDecoratedLiteral);
G_UPDATE(allowNoodle);
G_UPDATE(fdrAllowTeddy);
G_UPDATE(puffImproveHead);
G_UPDATE(castleExclusive);
G_UPDATE(mergeSEP);
G_UPDATE(mergeRose);
G_UPDATE(mergeSuffixes);
G_UPDATE(mergeOutfixes);
G_UPDATE(onlyOneOutfix);
G_UPDATE(allowShermanStates);
G_UPDATE(allowMcClellan8);
G_UPDATE(highlanderPruneDFA);
G_UPDATE(minimizeDFA);
G_UPDATE(accelerateDFA);
G_UPDATE(accelerateNFA);
G_UPDATE(reverseAccelerate);
G_UPDATE(squashNFA);
G_UPDATE(compressNFAState);
G_UPDATE(numberNFAStatesWrong);
G_UPDATE(allowZombies);
G_UPDATE(floodAsPuffette);
G_UPDATE(nfaForceSize);
G_UPDATE(nfaForceShifts);
G_UPDATE(highlanderSquash);
G_UPDATE(maxHistoryAvailable);
G_UPDATE(minHistoryAvailable);
G_UPDATE(maxAnchoredRegion);
G_UPDATE(minRoseLiteralLength);
G_UPDATE(minRoseNetflowLiteralLength);
G_UPDATE(maxRoseNetflowEdges);
G_UPDATE(minExtBoundedRepeatSize);
G_UPDATE(goughCopyPropagate);
G_UPDATE(goughRegisterAllocate);
G_UPDATE(shortcutLiterals);
G_UPDATE(roseGraphReduction);
G_UPDATE(roseRoleAliasing);
G_UPDATE(roseMasks);
G_UPDATE(roseMaxBadLeafLength);
G_UPDATE(roseConvertInfBadLeaves);
G_UPDATE(roseConvertFloodProneSuffixes);
G_UPDATE(roseMergeRosesDuringAliasing);
G_UPDATE(roseMultiTopRoses);
G_UPDATE(roseHamsterMasks);
G_UPDATE(roseLookaroundMasks);
G_UPDATE(roseMcClellanPrefix);
G_UPDATE(roseMcClellanSuffix);
G_UPDATE(roseMcClellanOutfix);
G_UPDATE(roseTransformDelay);
G_UPDATE(roseDesiredSplit);
G_UPDATE(earlyMcClellanPrefix);
G_UPDATE(earlyMcClellanInfix);
G_UPDATE(earlyMcClellanSuffix);
G_UPDATE(allowSomChain);
G_UPDATE(allowCountingMiracles);
G_UPDATE(somMaxRevNfaLength);
G_UPDATE(hamsterAccelForward);
G_UPDATE(hamsterAccelReverse);
G_UPDATE(miracleHistoryBonus);
G_UPDATE(equivalenceEnable);
G_UPDATE(allowSmallWrite);
G_UPDATE(smallWriteLargestBuffer);
G_UPDATE(smallWriteLargestBufferBad);
G_UPDATE(limitSmallWriteOutfixSize);
G_UPDATE(limitPatternCount);
G_UPDATE(limitPatternLength);
G_UPDATE(limitGraphVertices);
G_UPDATE(limitGraphEdges);
G_UPDATE(limitReportCount);
G_UPDATE(limitLiteralCount);
G_UPDATE(limitLiteralLength);
G_UPDATE(limitLiteralMatcherChars);
G_UPDATE(limitLiteralMatcherSize);
G_UPDATE(limitRoseRoleCount);
G_UPDATE(limitRoseEngineCount);
G_UPDATE(limitRoseAnchoredSize);
G_UPDATE(limitEngineSize);
G_UPDATE(limitDFASize);
G_UPDATE(limitNFASize);
G_UPDATE(limitLBRSize);
#undef G_UPDATE
if (key == "simple_som") {
g->allowHaigLit = false;
g->allowLitHaig = false;
g->allowSomChain = false;
g->somMaxRevNfaLength = 0;
done = true;
}
if (key == "forceOutfixesNFA") {
g->allowAnchoredAcyclic = false;
g->allowCastle = false;
g->allowDecoratedLiteral = false;
g->allowGough = false;
g->allowHaigLit = false;
g->allowLbr = false;
g->allowLimExNFA = true;
g->allowLitHaig = false;
g->allowMcClellan = false;
g->allowPuff = false;
g->allowRose = false;
g->allowSmallLiteralSet = false;
g->roseMasks = false;
done = true;
}
if (key == "forceOutfixesDFA") {
g->allowAnchoredAcyclic = false;
g->allowCastle = false;
g->allowDecoratedLiteral = false;
g->allowGough = false;
g->allowHaigLit = false;
g->allowLbr = false;
g->allowLimExNFA = false;
g->allowLitHaig = false;
g->allowMcClellan = true;
g->allowPuff = false;
g->allowRose = false;
g->allowSmallLiteralSet = false;
g->roseMasks = false;
done = true;
}
if (key == "forceOutfixes") {
g->allowAnchoredAcyclic = false;
g->allowCastle = false;
g->allowDecoratedLiteral = false;
g->allowGough = true;
g->allowHaigLit = false;
g->allowLbr = false;
g->allowLimExNFA = true;
g->allowLitHaig = false;
g->allowMcClellan = true;
g->allowPuff = false;
g->allowRose = false;
g->allowSmallLiteralSet = false;
g->roseMasks = false;
done = true;
}
if (!done && key != "help") {
printf("Invalid grey override key %s:%u\n", key.c_str(), value);
invalid_key_seen = true;
}
p = ve;
if (p != pe) {
++p;
}
}
if (invalid_key_seen) {
applyGreyOverrides(g, "help");
exit(1);
}
assert(g->maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
}
} // namespace ue2
#endif

197
src/grey.h Normal file
View File

@ -0,0 +1,197 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef GREY_H
#define GREY_H
#include <vector>
#include <string>
#include "ue2common.h"
namespace ue2 {
struct Grey {
Grey(void);
bool optimiseComponentTree;
bool performGraphSimplification;
bool prefilterReductions;
bool removeEdgeRedundancy;
bool allowGough;
bool allowHaigLit;
bool allowLitHaig;
bool allowLbr;
bool allowMcClellan;
bool allowPuff;
bool allowRose;
bool allowExtendedNFA;
bool allowLimExNFA;
bool allowSidecar;
bool allowAnchoredAcyclic;
bool allowSmallLiteralSet;
bool allowCastle;
bool allowDecoratedLiteral;
bool allowNoodle;
bool fdrAllowTeddy;
bool puffImproveHead;
bool castleExclusive; // enable castle mutual exclusion analysis
bool mergeSEP;
bool mergeRose;
bool mergeSuffixes;
bool mergeOutfixes;
bool onlyOneOutfix; // if > 1 outfix, fail compile
bool allowShermanStates;
bool allowMcClellan8;
bool highlanderPruneDFA;
bool minimizeDFA;
bool accelerateDFA;
bool accelerateNFA;
bool reverseAccelerate;
bool squashNFA;
bool compressNFAState;
bool numberNFAStatesWrong;
bool highlanderSquash;
bool allowZombies;
bool floodAsPuffette;
u32 nfaForceSize;
u32 nfaForceShifts;
u32 maxHistoryAvailable;
u32 minHistoryAvailable;
u32 maxAnchoredRegion;
u32 minRoseLiteralLength;
u32 minRoseNetflowLiteralLength;
u32 maxRoseNetflowEdges;
u32 minExtBoundedRepeatSize; /* to be considered for ng_repeat */
bool goughCopyPropagate;
bool goughRegisterAllocate;
bool shortcutLiterals;
bool roseGraphReduction;
bool roseRoleAliasing;
bool roseMasks;
u32 roseMaxBadLeafLength;
bool roseConvertInfBadLeaves;
bool roseConvertFloodProneSuffixes;
bool roseMergeRosesDuringAliasing;
bool roseMultiTopRoses;
bool roseHamsterMasks;
bool roseLookaroundMasks;
u32 roseMcClellanPrefix; /* 0 = off, 1 = only if large nfa, 2 = always */
u32 roseMcClellanSuffix; /* 0 = off, 1 = only if very large nfa, 2 =
* always */
u32 roseMcClellanOutfix; /* 0 = off, 1 = sometimes, 2 = almost always */
bool roseTransformDelay;
u32 roseDesiredSplit;
bool earlyMcClellanPrefix;
bool earlyMcClellanInfix;
bool earlyMcClellanSuffix;
bool allowCountingMiracles;
bool allowSomChain;
u32 somMaxRevNfaLength;
bool hamsterAccelForward;
bool hamsterAccelReverse; // currently not implemented
u32 miracleHistoryBonus; /* cheap hack to make miracles better, TODO
* something dignified */
bool equivalenceEnable;
// SmallWrite engine
bool allowSmallWrite;
u32 smallWriteLargestBuffer; // largest buffer that can be small write
u32 smallWriteLargestBufferBad;// largest buffer that can be small write
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
enum DumpFlags {
DUMP_NONE = 0,
DUMP_BASICS = 1 << 0, // Dump basic textual data
DUMP_PARSE = 1 << 1, // Dump component tree to .txt
DUMP_INT_GRAPH = 1 << 2, // Dump non-implementation graphs
DUMP_IMPL = 1 << 3 // Dump implementation graphs
};
u32 dumpFlags;
std::string dumpPath;
/* Resource limits. These are somewhat arbitrary, but are intended to bound
* the input to many of our internal structures. Exceeding one of these
* limits will cause an error to be returned to the user.
*
* NOTE: Raising these limitations make cause smoke to come out of parts of
* the runtime. */
u32 limitPatternCount; //!< max number of patterns
u32 limitPatternLength; //!< max number of characters in a regex
u32 limitGraphVertices; //!< max number of states in built NFA graph
u32 limitGraphEdges; //!< max number of edges in build NFA graph
u32 limitReportCount; //!< max number of ReportIDs allocated internally
// HWLM literal matcher limits.
u32 limitLiteralCount; //!< max number of literals in an HWLM table
u32 limitLiteralLength; //!< max number of characters in a literal
u32 limitLiteralMatcherChars; //!< max characters in an HWLM literal matcher
u32 limitLiteralMatcherSize; //!< max size of an HWLM matcher (in bytes)
// Rose limits.
u32 limitRoseRoleCount; //!< max number of Rose roles
u32 limitRoseEngineCount; //!< max prefix/infix/suffix/outfix engines
u32 limitRoseAnchoredSize; //!< max total size of anchored DFAs (bytes)
// Engine (DFA/NFA/etc) limits.
u32 limitEngineSize; //!< max size of an engine (in bytes)
u32 limitDFASize; //!< max size of a DFA (in bytes)
u32 limitNFASize; //!< max size of an NFA (in bytes)
u32 limitLBRSize; //!< max size of an LBR engine (in bytes)
};
#ifndef RELEASE_BUILD
#include <string>
void applyGreyOverrides(Grey *g, const std::string &overrides);
#endif
} // namespace ue2
#endif

419
src/hs.cpp Normal file
View File

@ -0,0 +1,419 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Compiler front-end, including public API calls for compilation.
*/
#include "allocator.h"
#include "ue2common.h"
#include "grey.h"
#include "hs_compile.h"
#include "hs_internal.h"
#include "database.h"
#include "compiler/compiler.h"
#include "compiler/error.h"
#include "nfagraph/ng.h"
#include "nfagraph/ng_expr_info.h"
#include "parser/parse_error.h"
#include "parser/Parser.h"
#include "parser/prefilter.h"
#include "util/compile_error.h"
#include "util/cpuid_flags.h"
#include "util/depth.h"
#include "util/popcount.h"
#include "util/target_info.h"
#include <cassert>
#include <cstddef>
#include <cstring>
#include <limits.h>
#include <string>
#include <vector>
using namespace std;
using namespace ue2;
/** \brief Cheap check that no unexpected mode flags are on. */
static
bool validModeFlags(unsigned int mode) {
static const unsigned allModeFlags = HS_MODE_BLOCK
| HS_MODE_STREAM
| HS_MODE_VECTORED
| HS_MODE_SOM_HORIZON_LARGE
| HS_MODE_SOM_HORIZON_MEDIUM
| HS_MODE_SOM_HORIZON_SMALL;
return !(mode & ~allModeFlags);
}
/** \brief Validate mode flags. */
static
bool checkMode(unsigned int mode, hs_compile_error **comp_error) {
// First, check that only bits with meaning are on.
if (!validModeFlags(mode)) {
*comp_error = generateCompileError("Invalid parameter: "
"unrecognised mode flags.", -1);
return false;
}
// Our mode must be ONE of (block, streaming, vectored).
unsigned checkmode
= mode & (HS_MODE_STREAM | HS_MODE_BLOCK | HS_MODE_VECTORED);
if (popcount32(checkmode) != 1) {
*comp_error = generateCompileError(
"Invalid parameter: mode must have one "
"(and only one) of HS_MODE_BLOCK, HS_MODE_STREAM or "
"HS_MODE_VECTORED set.",
-1);
return false;
}
// If you specify SOM precision, you must be in streaming mode and you only
// get to have one.
unsigned somMode = mode & (HS_MODE_SOM_HORIZON_LARGE |
HS_MODE_SOM_HORIZON_MEDIUM |
HS_MODE_SOM_HORIZON_SMALL);
if (somMode) {
if (!(mode & HS_MODE_STREAM)) {
*comp_error = generateCompileError("Invalid parameter: the "
"HS_MODE_SOM_HORIZON_ mode flags may only be set in "
"streaming mode.", -1);
return false;
}
if ((somMode & (somMode - 1)) != 0) {
*comp_error = generateCompileError("Invalid parameter: only one "
"HS_MODE_SOM_HORIZON_ mode flag can be set.", -1);
return false;
}
}
return true;
}
static
bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) {
#define HS_TUNE_LAST HS_TUNE_FAMILY_BDW
#define HS_CPU_FEATURES_ALL (HS_CPU_FEATURES_AVX2)
if (!p) {
return true;
}
if (p->cpu_features & ~HS_CPU_FEATURES_ALL) {
*comp_error = generateCompileError("Invalid cpu features specified in "
"the platform information.", -1);
return false;
}
if (p->tune > HS_TUNE_LAST) {
*comp_error = generateCompileError("Invalid tuning value specified in "
"the platform information.", -1);
return false;
}
return true;
}
/** \brief Convert from SOM mode to bytes of precision. */
static
unsigned getSomPrecision(unsigned mode) {
if (mode & HS_MODE_VECTORED) {
/* always assume full precision for vectoring */
return 8;
}
if (mode & HS_MODE_SOM_HORIZON_LARGE) {
return 8;
} else if (mode & HS_MODE_SOM_HORIZON_MEDIUM) {
return 4;
} else if (mode & HS_MODE_SOM_HORIZON_SMALL) {
return 2;
}
return 0;
}
namespace ue2 {
hs_error_t
hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
const unsigned *ids, const hs_expr_ext *const *ext,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform, hs_database_t **db,
hs_compile_error_t **comp_error, const Grey &g) {
// Check the args: note that it's OK for flags, ids or ext to be null.
if (!comp_error) {
if (db) {
*db = nullptr;
}
// nowhere to write the string, but we can still report an error code
return HS_COMPILER_ERROR;
}
if (!db) {
*comp_error = generateCompileError("Invalid parameter: db is NULL", -1);
return HS_COMPILER_ERROR;
}
if (!expressions) {
*db = nullptr;
*comp_error
= generateCompileError("Invalid parameter: expressions is NULL",
-1);
return HS_COMPILER_ERROR;
}
if (elements == 0) {
*db = nullptr;
*comp_error = generateCompileError("Invalid parameter: elements is zero", -1);
return HS_COMPILER_ERROR;
}
if (!checkMode(mode, comp_error)) {
*db = nullptr;
assert(*comp_error); // set by checkMode.
return HS_COMPILER_ERROR;
}
if (!checkPlatform(platform, comp_error)) {
*db = nullptr;
assert(*comp_error); // set by checkPlatform.
return HS_COMPILER_ERROR;
}
if (elements > g.limitPatternCount) {
*db = nullptr;
*comp_error = generateCompileError("Number of patterns too large", -1);
return HS_COMPILER_ERROR;
}
// This function is simply a wrapper around both the parser and compiler
bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
bool isVectored = mode & HS_MODE_VECTORED;
unsigned somPrecision = getSomPrecision(mode);
target_t target_info = platform ? target_t(*platform)
: get_current_target();
CompileContext cc(isStreaming, isVectored, target_info, g);
NG ng(cc, somPrecision);
try {
for (unsigned int i = 0; i < elements; i++) {
// Add this expression to the compiler
try {
addExpression(ng, i, expressions[i], flags ? flags[i] : 0,
ext ? ext[i] : nullptr, ids ? ids[i] : 0);
} catch (CompileError &e) {
/* Caught a parse error:
* throw it upstream as a CompileError with a specific index */
e.setExpressionIndex(i);
throw; /* do not slice */
}
}
unsigned length = 0;
struct hs_database *out = build(ng, &length);
assert(out); // should have thrown exception on error
assert(length);
*db = out;
*comp_error = nullptr;
return HS_SUCCESS;
}
catch (const CompileError &e) {
// Compiler error occurred
*db = nullptr;
*comp_error = generateCompileError(e.reason,
e.hasIndex ? (int)e.index : -1);
return HS_COMPILER_ERROR;
}
catch (std::bad_alloc) {
*db = nullptr;
*comp_error = const_cast<hs_compile_error_t *>(&hs_enomem);
return HS_COMPILER_ERROR;
}
catch (...) {
assert(!"Internal error, unexpected exception");
*db = nullptr;
*comp_error = const_cast<hs_compile_error_t *>(&hs_einternal);
return HS_COMPILER_ERROR;
}
}
} // namespace ue2
extern "C" HS_PUBLIC_API
hs_error_t hs_compile(const char *expression, unsigned flags, unsigned mode,
const hs_platform_info_t *platform, hs_database_t **db,
hs_compile_error_t **error) {
if (expression == nullptr) {
*db = nullptr;
*error = generateCompileError("Invalid parameter: expression is NULL",
-1);
return HS_COMPILER_ERROR;
}
unsigned id = 0; // single expressions get zero as an ID
const hs_expr_ext * const *ext = nullptr; // unused for this call.
return hs_compile_multi_int(&expression, &flags, &id, ext, 1, mode,
platform, db, error, Grey());
}
extern "C" HS_PUBLIC_API
hs_error_t hs_compile_multi(const char * const *expressions,
const unsigned *flags, const unsigned *ids,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db, hs_compile_error_t **error) {
const hs_expr_ext * const *ext = nullptr; // unused for this call.
return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
platform, db, error, Grey());
}
extern "C" HS_PUBLIC_API
hs_error_t hs_compile_ext_multi(const char * const *expressions,
const unsigned *flags, const unsigned *ids,
const hs_expr_ext * const *ext,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **error) {
return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
platform, db, error, Grey());
}
static
hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
unsigned int mode, hs_expr_info_t **info,
hs_compile_error_t **error) {
if (!error) {
// nowhere to write an error, but we can still return an error code.
return HS_COMPILER_ERROR;
}
if (!info) {
*error = generateCompileError("Invalid parameter: info is NULL", -1);
return HS_COMPILER_ERROR;
}
if (!expression) {
*error = generateCompileError("Invalid parameter: expression is NULL",
-1);
return HS_COMPILER_ERROR;
}
*info = nullptr;
*error = nullptr;
hs_expr_info local_info;
memset(&local_info, 0, sizeof(local_info));
try {
bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
bool isVectored = mode & HS_MODE_VECTORED;
CompileContext cc(isStreaming, isVectored, get_current_target(),
Grey());
// Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) {
throw ParseError("Pattern length exceeds limit.");
}
ReportManager rm(cc.grey);
ParsedExpression pe(0, expression, flags, 0);
assert(pe.component);
// Apply prefiltering transformations if desired.
if (pe.prefilter) {
prefilterTree(pe.component, ParseMode(flags));
}
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, pe);
if (!g) {
DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n");
throw ParseError("Internal error.");
}
fillExpressionInfo(rm, *g, &local_info);
}
catch (const CompileError &e) {
// Compiler error occurred
*error = generateCompileError(e);
return HS_COMPILER_ERROR;
}
catch (std::bad_alloc) {
*error = const_cast<hs_compile_error_t *>(&hs_enomem);
return HS_COMPILER_ERROR;
}
catch (...) {
assert(!"Internal error, unexpected exception");
*error = const_cast<hs_compile_error_t *>(&hs_einternal);
return HS_COMPILER_ERROR;
}
hs_expr_info *rv = (hs_expr_info *)hs_misc_alloc(sizeof(*rv));
if (!rv) {
*error = const_cast<hs_compile_error_t *>(&hs_enomem);
return HS_COMPILER_ERROR;
}
*rv = local_info;
*info = rv;
return HS_SUCCESS;
}
extern "C" HS_PUBLIC_API
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
hs_expr_info_t **info,
hs_compile_error_t **error) {
return hs_expression_info_int(expression, flags, HS_MODE_BLOCK, info,
error);
}
extern "C" HS_PUBLIC_API
hs_error_t hs_populate_platform(hs_platform_info_t *platform) {
if (!platform) {
return HS_INVALID;
}
memset(platform, 0, sizeof(*platform));
platform->cpu_features = cpuid_flags();
platform->tune = cpuid_tune();
return HS_SUCCESS;
}
extern "C" HS_PUBLIC_API
hs_error_t hs_free_compile_error(hs_compile_error_t *error) {
freeCompileError(error);
return HS_SUCCESS;
}

45
src/hs.h Normal file
View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HS_H_
#define HS_H_
/**
* @file
* @brief The complete Hyperscan API definition.
*
* Hyperscan is a high speed regular expression engine.
*
* This header includes both the Hyperscan compiler and runtime components. See
* the individual component headers for documentation.
*/
#include "hs_compile.h"
#include "hs_runtime.h"
#endif /* HS_H_ */

509
src/hs_common.h Normal file
View File

@ -0,0 +1,509 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HS_COMMON_H_
#define HS_COMMON_H_
#include <stdlib.h>
/**
* @file
* @brief The Hyperscan common API definition.
*
* Hyperscan is a high speed regular expression engine.
*
* This header contains functions available to both the Hyperscan compiler and
* runtime.
*/
#ifdef __cplusplus
extern "C"
{
#endif
struct hs_database;
/**
* A Hyperscan pattern database.
*
* Generated by one of the Hyperscan compiler functions:
* - @ref hs_compile()
* - @ref hs_compile_multi()
* - @ref hs_compile_ext_multi()
*/
typedef struct hs_database hs_database_t;
/**
* A type for errors returned by Hyperscan functions.
*/
typedef int hs_error_t;
/**
* Free a compiled pattern database.
*
* The free callback set by @ref hs_set_database_allocator() (or @ref
* hs_set_allocator()) will be used by this function.
*
* @param db
* A compiled pattern database. NULL may also be safely provided, in which
* case the function does nothing.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_free_database(hs_database_t *db);
/**
* Serialize a pattern database to a stream of bytes.
*
* The allocator callback set by @ref hs_set_misc_allocator() (or @ref
* hs_set_allocator()) will be used by this function.
*
* @param db
* A compiled pattern database.
*
* @param bytes
* On success, a pointer to an array of bytes will be returned here.
* These bytes can be subsequently relocated or written to disk. The
* caller is responsible for freeing this block.
*
* @param length
* On success, the number of bytes in the generated byte array will be
* returned here.
*
* @return
* @ref HS_SUCCESS on success, @ref HS_NOMEM if the byte array cannot be
* allocated, other values may be returned if errors are detected.
*/
hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes,
size_t *length);
/**
* Reconstruct a pattern database from a stream of bytes previously generated
* by @ref hs_serialize_database().
*
* This function will allocate sufficient space for the database using the
* allocator set with @ref hs_set_database_allocator() (or @ref
* hs_set_allocator()); to use a pre-allocated region of memory, use the @ref
* hs_deserialize_database_at() function.
*
* @param bytes
* A byte array generated by @ref hs_serialize_database() representing a
* compiled pattern database.
*
* @param length
* The length of the byte array generated by @ref hs_serialize_database().
* This should be the same value as that returned by @ref
* hs_serialize_database().
*
* @param db
* On success, a pointer to a newly allocated @ref hs_database_t will be
* returned here. This database can then be used for scanning, and
* eventually freed by the caller using @ref hs_free_database().
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_deserialize_database(const char *bytes, const size_t length,
hs_database_t **db);
/**
* Reconstruct a pattern database from a stream of bytes previously generated
* by @ref hs_serialize_database() at a given memory location.
*
* This function (unlike @ref hs_deserialize_database()) will write the
* reconstructed database to the memory location given in the @a db parameter.
* The amount of space required at this location can be determined with the
* @ref hs_serialized_database_size() function.
*
* @param bytes
* A byte array generated by @ref hs_serialize_database() representing a
* compiled pattern database.
*
* @param length
* The length of the byte array generated by @ref hs_serialize_database().
* This should be the same value as that returned by @ref
* hs_serialize_database().
*
* @param db
* Pointer to an 8-byte aligned block of memory of sufficient size to hold
* the deserialized database. On success, the reconstructed database will
* be written to this location. This database can then be used for pattern
* matching. The user is responsible for freeing this memory; the @ref
* hs_free_database() call should not be used.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length,
hs_database_t *db);
/**
* Provides the size of the stream state allocated by a single stream opened
* against the given database.
*
* @param database
* Pointer to a compiled (streaming mode) pattern database.
*
* @param stream_size
* On success, the size in bytes of an individual stream opened against the
* given database is placed in this parameter.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_stream_size(const hs_database_t *database, size_t *stream_size);
/**
* Provides the size of the given database in bytes.
*
* @param database
* Pointer to compiled pattern database.
*
* @param database_size
* On success, the size of the compiled database in bytes is placed in this
* parameter.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_database_size(const hs_database_t *database,
size_t *database_size);
/**
* Utility function for reporting the size that would be required by a
* database if it were deserialized.
*
* This can be used to allocate a shared memory region or other "special"
* allocation prior to deserializing with the @ref hs_deserialize_database_at()
* function.
*
* @param bytes
* Pointer to a byte array generated by @ref hs_serialize_database()
* representing a compiled pattern database.
*
* @param length
* The length of the byte array generated by @ref hs_serialize_database().
* This should be the same value as that returned by @ref
* hs_serialize_database().
*
* @param deserialized_size
* On success, the size of the compiled database that would be generated
* by @ref hs_deserialize_database_at() is returned here.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_serialized_database_size(const char *bytes, const size_t length,
size_t *deserialized_size);
/**
* Utility function providing information about a database.
*
* @param database
* Pointer to a compiled database.
*
* @param info
* On success, a string containing the version and platform information for
* the supplied database is placed in the parameter. The string is
* allocated using the allocator supplied in @ref hs_set_misc_allocator()
* (or malloc() if no allocator was set) and should be freed by the caller.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_database_info(const hs_database_t *database, char **info);
/**
* Utility function providing information about a serialized database.
*
* @param bytes
* Pointer to a serialized database.
*
* @param length
* Length in bytes of the serialized database.
*
* @param info
* On success, a string containing the version and platform information
* for the supplied serialized database is placed in the parameter. The
* string is allocated using the allocator supplied in @ref
* hs_set_misc_allocator() (or malloc() if no allocator was set) and
* should be freed by the caller.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
char **info);
/**
* The type of the callback function that will be used by Hyperscan to allocate
* more memory at runtime as required, for example in @ref hs_open_stream() to
* allocate stream state.
*
* If Hyperscan is to be used in a multi-threaded, or similarly concurrent
* environment, the allocation function will need to be re-entrant, or
* similarly safe for concurrent use.
*
* @param size
* The number of bytes to allocate.
* @return
* A pointer to the region of memory allocated, or NULL on error.
*/
typedef void *(*hs_alloc_t)(size_t size);
/**
* The type of the callback function that will be used by Hyperscan to free
* memory regions previously allocated using the @ref hs_alloc_t function.
*
* @param ptr
* The region of memory to be freed.
*/
typedef void (*hs_free_t)(void *ptr);
/**
* Set the allocate and free functions used by Hyperscan for allocating
* memory at runtime for stream state, scratch space, database bytecode,
* and various other data structure returned by the Hyperscan API.
*
* The function is equivalent to calling @ref hs_set_stream_allocator(),
* @ref hs_set_scratch_allocator(), @ref hs_set_database_allocator() and
* @ref hs_set_misc_allocator() with the provided parameters.
*
* This call will override any previous allocators that have been set.
*
* Note: there is no way to change the allocator used for temporary objects
* created during the various compile calls (@ref hs_compile(), @ref
* hs_compile_multi(), @ref hs_compile_ext_multi()).
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_set_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
/**
* Set the allocate and free functions used by Hyperscan for allocating memory
* for database bytecode produced by the compile calls (@ref hs_compile(), @ref
* hs_compile_multi(), @ref hs_compile_ext_multi()) and by database
* deserialization (@ref hs_deserialize_database()).
*
* If no database allocation functions are set, or if NULL is used in place of
* both parameters, then memory allocation will default to standard methods
* (such as the system malloc() and free() calls).
*
* This call will override any previous database allocators that have been set.
*
* Note: the database allocator may also be set by calling @ref
* hs_set_allocator().
*
* Note: there is no way to change how temporary objects created during the
* various compile calls (@ref hs_compile(), @ref hs_compile_multi(), @ref
* hs_compile_ext_multi()) are allocated.
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_set_database_allocator(hs_alloc_t alloc_func,
hs_free_t free_func);
/**
* Set the allocate and free functions used by Hyperscan for allocating memory
* for items returned by the Hyperscan API such as @ref hs_compile_error_t, @ref
* hs_expr_info_t and serialized databases.
*
* If no misc allocation functions are set, or if NULL is used in place of both
* parameters, then memory allocation will default to standard methods (such as
* the system malloc() and free() calls).
*
* This call will override any previous misc allocators that have been set.
*
* Note: the misc allocator may also be set by calling @ref hs_set_allocator().
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_set_misc_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
/**
* Set the allocate and free functions used by Hyperscan for allocating memory
* for scratch space by @ref hs_alloc_scratch() and @ref hs_clone_scratch().
*
* If no scratch allocation functions are set, or if NULL is used in place of
* both parameters, then memory allocation will default to standard methods
* (such as the system malloc() and free() calls).
*
* This call will override any previous scratch allocators that have been set.
*
* Note: the scratch allocator may also be set by calling @ref
* hs_set_allocator().
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_set_scratch_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
/**
* Set the allocate and free functions used by Hyperscan for allocating memory
* for stream state by @ref hs_open_stream().
*
* If no stream allocation functions are set, or if NULL is used in place of
* both parameters, then memory allocation will default to standard methods
* (such as the system malloc() and free() calls).
*
* This call will override any previous stream allocators that have been set.
*
* Note: the stream allocator may also be set by calling @ref
* hs_set_allocator().
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_set_stream_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
/**
* Utility function for identifying this release version.
*
* @return
* A string containing the version number of this release build and the
* date of the build. It is allocated statically, so it does not need to
* be freed by the caller.
*/
const char *hs_version(void);
/**
* @defgroup HS_ERROR hs_error_t values
*
* @{
*/
/**
* The engine completed normally.
*/
#define HS_SUCCESS 0
/**
* A parameter passed to this function was invalid.
*/
#define HS_INVALID (-1)
/**
* A memory allocation failed.
*/
#define HS_NOMEM (-2)
/**
* The engine was terminated by callback.
*
* This return value indicates that the target buffer was partially scanned,
* but that the callback function requested that scanning cease after a match
* was located.
*/
#define HS_SCAN_TERMINATED (-3)
/**
* The pattern compiler failed, and the @ref hs_compile_error_t should be
* inspected for more detail.
*/
#define HS_COMPILER_ERROR (-4)
/**
* The given database was built for a different version of Hyperscan.
*/
#define HS_DB_VERSION_ERROR (-5)
/**
* The given database was built for a different platform (i.e., CPU type).
*/
#define HS_DB_PLATFORM_ERROR (-6)
/**
* The given database was built for a different mode of operation. This error
* is returned when streaming calls are used with a block or vectored database
* and vice versa.
*/
#define HS_DB_MODE_ERROR (-7)
/**
* A parameter passed to this function was not correctly aligned.
*/
#define HS_BAD_ALIGN (-8)
/**
* The memory allocator (either malloc() or the allocator set with @ref
* hs_set_allocator()) did not correctly return memory suitably aligned for the
* largest representable data type on this platform.
*/
#define HS_BAD_ALLOC (-9)
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* HS_COMMON_H_ */

848
src/hs_compile.h Normal file
View File

@ -0,0 +1,848 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HS_COMPILE_H_
#define HS_COMPILE_H_
/**
* @file
* @brief The Hyperscan compiler API definition.
*
* Hyperscan is a high speed regular expression engine.
*
* This header contains functions for compiling regular expressions into
* Hyperscan databases that can be used by the Hyperscan runtime.
*/
#include "hs_common.h"
#ifdef __cplusplus
extern "C"
{
#endif
/**
* A type containing error details that is returned by the compile calls (@ref
* hs_compile(), @ref hs_compile_multi() and @ref hs_compile_ext_multi()) on
* failure. The caller may inspect the values returned in this type to
* determine the cause of failure.
*
* Common errors generated during the compile process include:
*
* - *Invalid parameter*
*
* An invalid argument was specified in the compile call.
*
* - *Unrecognised flag*
*
* An unrecognised value was passed in the flags argument.
*
* - *Pattern matches empty buffer*
*
* By default, Hyperscan only supports patterns that will *always*
* consume at least one byte of input. Patterns that do not have this
* property (such as `/(abc)?/`) will produce this error unless
* the @ref HS_FLAG_ALLOWEMPTY flag is supplied. Note that such
* patterns will produce a match for *every* byte when scanned.
*
* - *Embedded anchors not supported*
*
* Hyperscan only supports the use of anchor meta-characters (such as
* `^` and `$`) in patterns where they could *only* match
* at the start or end of a buffer. A pattern containing an embedded
* anchor, such as `/abc^def/`, can never match, as there is no
* way for `abc` to precede the start of the data stream.
*
* - *Bounded repeat is too large*
*
* The pattern contains a repeated construct with very large finite
* bounds.
*
* - *Unsupported component type*
*
* An unsupported PCRE construct was used in the pattern.
*
* - *Unable to generate bytecode*
*
* This error indicates that Hyperscan was unable to compile a pattern
* that is syntactically valid. The most common cause is a pattern that is
* very long and complex or contains a large repeated subpattern.
*
* - *Unable to allocate memory*
*
* The library was unable to allocate temporary storage used during
* compilation time.
*
* - *Internal error*
*
* An unexpected error occurred: if this error is reported, please contact
* the Hyperscan team with a description of the situation.
*/
typedef struct hs_compile_error {
/**
* A human-readable error message describing the error.
*/
char *message;
/**
* The zero-based number of the expression that caused the error (if this
* can be determined). If the error is not specific to an expression, then
* this value will be less than zero.
*/
int expression;
} hs_compile_error_t;
/**
* A type containing information on the target platform which may optionally be
* provided to the compile calls (@ref hs_compile(), @ref hs_compile_multi(),
* @ref hs_compile_ext_multi()).
*
* A hs_platform_info structure may be populated for the current platform by
* using the @ref hs_populate_platform() call.
*/
typedef struct hs_platform_info {
/**
* Information about the target platform which may be used to guide the
* optimisation process of the compile.
*
* Use of this field does not limit the processors that the resulting
* database can run on, but may impact the performance of the resulting
* database.
*/
unsigned int tune;
/**
* Relevant CPU features available on the target platform
*
* This value may be produced by combining HS_CPU_FEATURE_* flags (such as
* @ref HS_CPU_FEATURES_AVX2). Multiple CPU features may be or'ed together
* to produce the value.
*/
unsigned long long cpu_features;
/**
* Reserved for future use.
*/
unsigned long long reserved1;
/**
* Reserved for future use.
*/
unsigned long long reserved2;
} hs_platform_info_t;
/**
* A type containing information related to an expression that is returned by
* @ref hs_expression_info().
*/
typedef struct hs_expr_info {
/**
* The minimum length in bytes of a match for the pattern.
*/
unsigned int min_width;
/**
* The maximum length in bytes of a match for the pattern. If the pattern
* has an unbounded maximum width, this will be set to the maximum value of
* an unsigned int (UINT_MAX).
*/
unsigned int max_width;
/**
* Whether this expression can produce matches that are not returned in
* order, such as those produced by assertions. Zero if false, non-zero if
* true.
*/
char unordered_matches;
/**
* Whether this expression can produce matches at end of data (EOD). In
* streaming mode, EOD matches are raised during @ref hs_close_stream(),
* since it is only when @ref hs_close_stream() is called that the EOD
* location is known. Zero if false, non-zero if true.
*
* Note: trailing `\b` word boundary assertions may also result in EOD
* matches as end-of-data can act as a word boundary.
*/
char matches_at_eod;
/**
* Whether this expression can *only* produce matches at end of data (EOD).
* In streaming mode, all matches for this expression are raised during
* @ref hs_close_stream(). Zero if false, non-zero if true.
*/
char matches_only_at_eod;
} hs_expr_info_t;
/**
* A structure containing additional parameters related to an expression,
* passed in at build time to @ref hs_compile_ext_multi().
*
* These parameters allow the set of matches produced by a pattern to be
* constrained at compile time, rather than relying on the application to
* process unwanted matches at runtime.
*/
typedef struct hs_expr_ext {
/**
* Flags governing which parts of this structure are to be used by the
* compiler. See @ref HS_EXT_FLAG.
*/
unsigned long long flags;
/**
* The minimum end offset in the data stream at which this expression
* should match successfully. To use this parameter, set the
* @ref HS_EXT_FLAG_MIN_OFFSET flag in the hs_expr_ext::flags field.
*/
unsigned long long min_offset;
/**
* The maximum end offset in the data stream at which this expression
* should match successfully. To use this parameter, set the
* @ref HS_EXT_FLAG_MAX_OFFSET flag in the hs_expr_ext::flags field.
*/
unsigned long long max_offset;
/**
* The minimum match length (from start to end) required to successfully
* match this expression. To use this parameter, set the
* @ref HS_EXT_FLAG_MIN_LENGTH flag in the hs_expr_ext::flags field.
*/
unsigned long long min_length;
} hs_expr_ext_t;
/**
* @defgroup HS_EXT_FLAG hs_expr_ext_t flags
*
* These flags are used in @ref hs_expr_ext_t::flags to indicate which fields
* are used.
*
* @{
*/
/** Flag indicating that the hs_expr_ext::min_offset field is used. */
#define HS_EXT_FLAG_MIN_OFFSET 1ULL
/** Flag indicating that the hs_expr_ext::max_offset field is used. */
#define HS_EXT_FLAG_MAX_OFFSET 2ULL
/** Flag indicating that the hs_expr_ext::min_length field is used. */
#define HS_EXT_FLAG_MIN_LENGTH 4ULL
/** @} */
/**
* The basic regular expression compiler.
*
* This is the function call with which an expression is compiled into a
* Hyperscan database which can be passed to the runtime functions (such as
* @ref hs_scan(), @ref hs_open_stream(), etc.)
*
* @param expression
* The NULL-terminated expression to parse. Note that this string must
* represent ONLY the pattern to be matched, with no delimiters or flags;
* any global flags should be specified with the @a flags argument. For
* example, the expression `/abc?def/i` should be compiled by providing
* `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a
* flags.
*
* @param flags
* Flags which modify the behaviour of the expression. Multiple flags may
* be used by ORing them together. Valid values are:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
* expression per stream.
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
* empty string, such as `.*`.
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
* - HS_FLAG_UCP - Use Unicode properties for character classes.
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param mode
* Compiler mode flags that affect the database as a whole. One of @ref
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
* supplied, to select between the generation of a streaming, block or
* vectored database. In addition, other flags (beginning with HS_MODE_)
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
* more details.
*
* @param platform
* If not NULL, the platform structure is used to determine the target
* platform for the database. If NULL, a database suitable for running
* on the current host platform is produced.
*
* @param db
* On success, a pointer to the generated database will be returned in
* this parameter, or NULL on failure. The caller is responsible for
* deallocating the buffer using the @ref hs_free_database() function.
*
* @param error
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the error
* parameter.
*/
hs_error_t hs_compile(const char *expression, unsigned int flags,
unsigned int mode, const hs_platform_info_t *platform,
hs_database_t **db, hs_compile_error_t **error);
/**
* The multiple regular expression compiler.
*
* This is the function call with which a set of expressions is compiled into a
* database which can be passed to the runtime functions (such as @ref
* hs_scan(), @ref hs_open_stream(), etc.) Each expression can be labelled with
* a unique integer which is passed into the match callback to identify the
* pattern that has matched.
*
* @param expressions
* Array of NULL-terminated expressions to compile. Note that (as for @ref
* hs_compile()) these strings must contain only the pattern to be
* matched, with no delimiters or flags. For example, the expression
* `/abc?def/i` should be compiled by providing `abc?def` as the first
* string in the @a expressions array, and @ref HS_FLAG_CASELESS as the
* first value in the @a flags array.
*
* @param flags
* Array of flags which modify the behaviour of each expression. Multiple
* flags may be used by ORing them together. Specifying the NULL pointer
* in place of an array will set the flags value for all patterns to zero.
* Valid values are:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns
* with this match id per stream.
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
* empty string, such as `.*`.
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
* - HS_FLAG_UCP - Use Unicode properties for character classes.
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
* corresponding pattern in the expressions array. Specifying the NULL
* pointer in place of an array will set the ID value for all patterns to
* zero.
*
* @param elements
* The number of elements in the input arrays.
*
* @param mode
* Compiler mode flags that affect the database as a whole. One of @ref
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
* supplied, to select between the generation of a streaming, block or
* vectored database. In addition, other flags (beginning with HS_MODE_)
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
* more details.
*
* @param platform
* If not NULL, the platform structure is used to determine the target
* platform for the database. If NULL, a database suitable for running
* on the current host platform is produced.
*
* @param db
* On success, a pointer to the generated database will be returned in
* this parameter, or NULL on failure. The caller is responsible for
* deallocating the buffer using the @ref hs_free_database() function.
*
* @param error
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the @a error
* parameter.
*
*/
hs_error_t hs_compile_multi(const char *const *expressions,
const unsigned int *flags, const unsigned int *ids,
unsigned int elements, unsigned int mode,
const hs_platform_info_t *platform,
hs_database_t **db, hs_compile_error_t **error);
/**
* The multiple regular expression compiler with extended pattern support.
*
* This function call compiles a group of expressions into a database in the
* same way as @ref hs_compile_multi(), but allows additional parameters to be
* specified via an @ref hs_expr_ext_t structure per expression.
*
* @param expressions
* Array of NULL-terminated expressions to compile. Note that (as for @ref
* hs_compile()) these strings must contain only the pattern to be
* matched, with no delimiters or flags. For example, the expression
* `/abc?def/i` should be compiled by providing `abc?def` as the first
* string in the @a expressions array, and @ref HS_FLAG_CASELESS as the
* first value in the @a flags array.
*
* @param flags
* Array of flags which modify the behaviour of each expression. Multiple
* flags may be used by ORing them together. Specifying the NULL pointer
* in place of an array will set the flags value for all patterns to zero.
* Valid values are:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns
* with this match id per stream.
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
* empty string, such as `.*`.
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
* - HS_FLAG_UCP - Use Unicode properties for character classes.
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
* corresponding pattern in the expressions array. Specifying the NULL
* pointer in place of an array will set the ID value for all patterns to
* zero.
*
* @param ext
* An array of pointers to filled @ref hs_expr_ext_t structures that
* define extended behaviour for each pattern. NULL may be specified if no
* extended behaviour is needed for an individual pattern, or in place of
* the whole array if it is not needed for any expressions. Memory used by
* these structures must be both allocated and freed by the caller.
*
* @param elements
* The number of elements in the input arrays.
*
* @param mode
* Compiler mode flags that affect the database as a whole. One of @ref
* HS_MODE_STREAM, @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
* supplied, to select between the generation of a streaming, block or
* vectored database. In addition, other flags (beginning with HS_MODE_)
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
* more details.
*
* @param platform
* If not NULL, the platform structure is used to determine the target
* platform for the database. If NULL, a database suitable for running
* on the current host platform is produced.
*
* @param db
* On success, a pointer to the generated database will be returned in
* this parameter, or NULL on failure. The caller is responsible for
* deallocating the buffer using the @ref hs_free_database() function.
*
* @param error
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the @a error
* parameter.
*
*/
hs_error_t hs_compile_ext_multi(const char *const *expressions,
const unsigned int *flags,
const unsigned int *ids,
const hs_expr_ext_t *const *ext,
unsigned int elements, unsigned int mode,
const hs_platform_info_t *platform,
hs_database_t **db, hs_compile_error_t **error);
/**
* Free an error structure generated by @ref hs_compile(), @ref
* hs_compile_multi() or @ref hs_compile_ext_multi().
*
* @param error
* The @ref hs_compile_error_t to be freed. NULL may also be safely
* provided.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_free_compile_error(hs_compile_error_t *error);
/**
* Utility function providing information about a regular expression. The
* information provided in @ref hs_expr_info_t includes the minimum and maximum
* width of a pattern match.
*
* @param expression
* The NULL-terminated expression to parse. Note that this string must
* represent ONLY the pattern to be matched, with no delimiters or flags;
* any global flags should be specified with the @a flags argument. For
* example, the expression `/abc?def/i` should be compiled by providing
* `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a
* flags.
*
* @param flags
* Flags which modify the behaviour of the expression. Multiple flags may
* be used by ORing them together. Valid values are:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by the
* expression per stream.
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
* empty string, such as `.*`.
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
* - HS_FLAG_UCP - Use Unicode properties for character classes.
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param info
* On success, a pointer to the pattern information will be returned in
* this parameter, or NULL on failure. This structure is allocated using
* the allocator supplied in @ref hs_set_allocator() (or malloc() if no
* allocator was set) and should be freed by the caller.
*
* @param error
* If the call fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the error
* parameter.
*/
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
hs_expr_info_t **info,
hs_compile_error_t **error);
/**
* Populates the platform information based on the current host.
*
* @param platform
* On success, the pointed to structure is populated based on the current
* host.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_populate_platform(hs_platform_info_t *platform);
/**
* @defgroup HS_PATTERN_FLAG Pattern flags
*
* @{
*/
/**
* Compile flag: Set case-insensitive matching.
*
* This flag sets the expression to be matched case-insensitively by default.
* The expression may still use PCRE tokens (notably `(?i)` and
* `(?-i)`) to switch case-insensitive matching on and off.
*/
#define HS_FLAG_CASELESS 1
/**
* Compile flag: Matching a `.` will not exclude newlines.
*
* This flag sets any instances of the `.` token to match newline characters as
* well as all other characters. The PCRE specification states that the `.`
* token does not match newline characters by default, so without this flag the
* `.` token will not cross line boundaries.
*/
#define HS_FLAG_DOTALL 2
/**
* Compile flag: Set multi-line anchoring.
*
* This flag instructs the expression to make the `^` and `$` tokens match
* newline characters as well as the start and end of the stream. If this flag
* is not specified, the `^` token will only ever match at the start of a
* stream, and the `$` token will only ever match at the end of a stream within
* the guidelines of the PCRE specification.
*/
#define HS_FLAG_MULTILINE 4
/**
* Compile flag: Set single-match only mode.
*
* This flag sets the expression's match ID to match at most once. In streaming
* mode, this means that the expression will return only a single match over
* the lifetime of the stream, rather than reporting every match as per
* standard Hyperscan semantics. In block mode or vectored mode, only the first
* match for each invocation of @ref hs_scan() or @ref hs_scan_vector() will be
* returned.
*
* If multiple expressions in the database share the same match ID, then they
* either must all specify @ref HS_FLAG_SINGLEMATCH or none of them specify
* @ref HS_FLAG_SINGLEMATCH. If a group of expressions sharing a match ID
* specify the flag, then at most one match with the match ID will be generated
* per stream.
*
* Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST
* is not currently supported.
*/
#define HS_FLAG_SINGLEMATCH 8
/**
* Compile flag: Allow expressions that can match against empty buffers.
*
* This flag instructs the compiler to allow expressions that can match against
* empty buffers, such as `.?`, `.*`, `(a|)`. Since Hyperscan can return every
* possible match for an expression, such expressions generally execute very
* slowly; the default behaviour is to return an error when an attempt to
* compile one is made. Using this flag will force the compiler to allow such
* an expression.
*/
#define HS_FLAG_ALLOWEMPTY 16
/**
* Compile flag: Enable UTF-8 mode for this expression.
*
* This flag instructs Hyperscan to treat the pattern as a sequence of UTF-8
* characters. The results of scanning invalid UTF-8 sequences with a Hyperscan
* library that has been compiled with one or more patterns using this flag are
* undefined.
*/
#define HS_FLAG_UTF8 32
/**
* Compile flag: Enable Unicode property support for this expression.
*
* This flag instructs Hyperscan to use Unicode properties, rather than the
* default ASCII interpretations, for character mnemonics like `\w` and `\s` as
* well as the POSIX character classes. It is only meaningful in conjunction
* with @ref HS_FLAG_UTF8.
*/
#define HS_FLAG_UCP 64
/**
* Compile flag: Enable prefiltering mode for this expression.
*
* This flag instructs Hyperscan to compile an "approximate" version of this
* pattern for use in a prefiltering application, even if Hyperscan does not
* support the pattern in normal operation.
*
* The set of matches returned when this flag is used is guaranteed to be a
* superset of the matches specified by the non-prefiltering expression.
*
* If the pattern contains pattern constructs not supported by Hyperscan (such
* as zero-width assertions, back-references or conditional references) these
* constructs will be replaced internally with broader constructs that may
* match more often.
*
* Furthermore, in prefiltering mode Hyperscan may simplify a pattern that
* would otherwise return a "Pattern too large" error at compile time, or for
* performance reasons (subject to the matching guarantee above).
*
* It is generally expected that the application will subsequently confirm
* prefilter matches with another regular expression matcher that can provide
* exact matches for the pattern.
*
* Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST
* is not currently supported.
*/
#define HS_FLAG_PREFILTER 128
/**
* Compile flag: Enable leftmost start of match reporting.
*
* This flag instructs Hyperscan to report the leftmost possible start of match
* offset when a match is reported for this expression. (By default, no start
* of match is returned.)
*
* Enabling this behaviour may reduce performance and increase stream state
* requirements in streaming mode.
*/
#define HS_FLAG_SOM_LEFTMOST 256
/** @} */
/**
* @defgroup HS_CPU_FEATURES_FLAG CPU feature support flags
*
* @{
*/
/**
* CPU features flag - Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2)
*
* Setting this flag indicates that the target platform supports AVX2
* instructions.
*/
#define HS_CPU_FEATURES_AVX2 (1ULL << 2)
/** @} */
/**
* @defgroup HS_TUNE_FLAG Tuning flags
*
* @{
*/
/**
* Tuning Parameter - Generic
*
* This indicates that the compiled database should not be tuned for any
* particular target platform.
*/
#define HS_TUNE_FAMILY_GENERIC 0
/**
* Tuning Parameter - Intel(R) microarchitecture code name Sandy Bridge
*
* This indicates that the compiled database should be tuned for the
* Sandy Bridge microarchitecture.
*/
#define HS_TUNE_FAMILY_SNB 1
/**
* Tuning Parameter - Intel(R) microarchitecture code name Ivy Bridge
*
* This indicates that the compiled database should be tuned for the
* Ivy Bridge microarchitecture.
*/
#define HS_TUNE_FAMILY_IVB 2
/**
* Tuning Parameter - Intel(R) microarchitecture code name Haswell
*
* This indicates that the compiled database should be tuned for the
* Haswell microarchitecture.
*/
#define HS_TUNE_FAMILY_HSW 3
/**
* Tuning Parameter - Intel(R) microarchitecture code name Silvermont
*
* This indicates that the compiled database should be tuned for the
* Silvermont microarchitecture.
*/
#define HS_TUNE_FAMILY_SLM 4
/**
* Tuning Parameter - Intel(R) microarchitecture code name Broadwell
*
* This indicates that the compiled database should be tuned for the
* Broadwell microarchitecture.
*/
#define HS_TUNE_FAMILY_BDW 5
/** @} */
/**
* @defgroup HS_MODE_FLAG Compile mode flags
*
* The mode flags are used as values for the mode parameter of the various
* compile calls (@ref hs_compile(), @ref hs_compile_multi() and @ref
* hs_compile_ext_multi()).
*
* A mode value can be built by ORing these flag values together; the only
* required flag is one of @ref HS_MODE_BLOCK, @ref HS_MODE_STREAM or @ref
* HS_MODE_VECTORED. Other flags may be added to enable support for additional
* features.
*
* @{
*/
/**
* Compiler mode flag: Block scan (non-streaming) database.
*/
#define HS_MODE_BLOCK 1
/**
* Compiler mode flag: Alias for @ref HS_MODE_BLOCK.
*/
#define HS_MODE_NOSTREAM 1
/**
* Compiler mode flag: Streaming database.
*/
#define HS_MODE_STREAM 2
/**
* Compiler mode flag: Vectored scanning database.
*/
#define HS_MODE_VECTORED 4
/**
* Compiler mode flag: use full precision to track start of match offsets in
* stream state.
*
* This mode will use the most stream state per pattern, but will always return
* an accurate start of match offset regardless of how far back in the past it
* was found.
*
* One of the SOM_HORIZON modes must be selected to use the @ref
* HS_FLAG_SOM_LEFTMOST expression flag.
*/
#define HS_MODE_SOM_HORIZON_LARGE (1U << 24)
/**
* Compiler mode flag: use medium precision to track start of match offsets in
* stream state.
*
* This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and
* will limit start of match accuracy to offsets within 2^32 bytes of the
* end of match offset reported.
*
* One of the SOM_HORIZON modes must be selected to use the @ref
* HS_FLAG_SOM_LEFTMOST expression flag.
*/
#define HS_MODE_SOM_HORIZON_MEDIUM (1U << 25)
/**
* Compiler mode flag: use limited precision to track start of match offsets in
* stream state.
*
* This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and
* will limit start of match accuracy to offsets within 2^16 bytes of the
* end of match offset reported.
*
* One of the SOM_HORIZON modes must be selected to use the @ref
* HS_FLAG_SOM_LEFTMOST expression flag.
*/
#define HS_MODE_SOM_HORIZON_SMALL (1U << 26)
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* HS_COMPILE_H_ */

78
src/hs_internal.h Normal file
View File

@ -0,0 +1,78 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Internal-use only definitions. Available to internal tools.
*/
#ifndef HS_INTERNAL_H
#define HS_INTERNAL_H
#include "ue2common.h"
#include "hs.h"
#ifdef __cplusplus
namespace ue2 {
struct Grey;
/** \brief Internal use only: takes a Grey argument so that we can use it in
* tools. */
hs_error_t hs_compile_multi_int(const char *const *expressions,
const unsigned *flags, const unsigned *ids,
const hs_expr_ext *const *ext,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **comp_error, const Grey &g);
} // namespace ue2
extern "C"
{
#endif
#define HS_MATCH_FLAG_ADJUSTED 1U
/** \brief Bitmask of all valid Hyperscan flags. */
#define HS_FLAG_ALL ( HS_FLAG_CASELESS \
| HS_FLAG_DOTALL \
| HS_FLAG_MULTILINE \
| HS_FLAG_UTF8 \
| HS_FLAG_UCP \
| HS_FLAG_PREFILTER \
| HS_FLAG_SINGLEMATCH \
| HS_FLAG_ALLOWEMPTY \
| HS_FLAG_SOM_LEFTMOST)
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

493
src/hs_runtime.h Normal file
View File

@ -0,0 +1,493 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HS_RUNTIME_H_
#define HS_RUNTIME_H_
#include <stdlib.h>
/**
* @file
* @brief The Hyperscan runtime API definition.
*
* Hyperscan is a high speed regular expression engine.
*
* This header contains functions for using compiled Hyperscan databases for
* scanning data at runtime.
*/
#include "hs_common.h"
#ifdef __cplusplus
extern "C"
{
#endif
/**
* Definition of the stream identifier type.
*/
struct hs_stream;
/**
* The stream identifier returned by @ref hs_open_stream().
*/
typedef struct hs_stream hs_stream_t;
struct hs_scratch;
/**
* A Hyperscan scratch space.
*/
typedef struct hs_scratch hs_scratch_t;
/**
* Definition of the match event callback function type.
*
* A callback function matching the defined type must be provided by the
* application calling the @ref hs_scan(), @ref hs_scan_vector() or @ref
* hs_scan_stream() functions (or other streaming calls which can produce
* matches).
*
* This callback function will be invoked whenever a match is located in the
* target data during the execution of a scan. The details of the match are
* passed in as parameters to the callback function, and the callback function
* should return a value indicating whether or not matching should continue on
* the target data. If no callbacks are desired from a scan call, NULL may be
* provided in order to suppress match production.
*
* This callback function should not attempt to call Hyperscan API functions on
* the same stream nor should it attempt to reuse the scratch space allocated
* for the API calls that caused it to be triggered. Making another call to the
* Hyperscan library with completely independent parameters should work (for
* example, scanning a different database in a new stream and with new scratch
* space), but reusing data structures like stream state and/or scratch space
* will produce undefined behavior.
*
* @param id
* The ID number of the expression that matched. If the expression was a
* single expression compiled with @ref hs_compile(), this value will be
* zero.
*
* @param from
* - If a start of match flag is enabled for the current pattern, this
* argument will be set to the start of match for the pattern assuming
* that that start of match value lies within the current 'start of match
* horizon' chosen by one of the SOM_HORIZON mode flags.
* - If the start of match value lies outside this horizon (possible only
* when the SOM_HORIZON value is not @ref HS_MODE_SOM_HORIZON_LARGE),
* the @a from value will be set to @ref HS_OFFSET_PAST_HORIZON.
* - This argument will be set to zero if the Start of Match flag is not
* enabled for the given pattern.
*
* @param to
* The offset after the last byte that matches the expression.
*
* @param flags
* This is provided for future use and is unused at present.
*
* @param context
* The pointer supplied by the user to the @ref hs_scan(), @ref
* hs_scan_vector() or @ref hs_scan_stream() function.
*
* @return
* Non-zero if the matching should cease, else zero. If scanning is
* performed in streaming mode and a non-zero value is returned, any
* subsequent calls to @ref hs_scan_stream() for that stream will
* immediately return with @ref HS_SCAN_TERMINATED.
*/
typedef int (*match_event_handler)(unsigned int id,
unsigned long long from,
unsigned long long to,
unsigned int flags,
void *context);
/**
* Open and initialise a stream.
*
* @param db
* A compiled pattern database.
*
* @param flags
* Flags modifying the behaviour of the stream. This parameter is provided
* for future use and is unused at present.
*
* @param stream
* On success, a pointer to the generated @ref hs_stream_t will be
* returned; NULL on failure.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_open_stream(const hs_database_t *db, unsigned int flags,
hs_stream_t **stream);
/**
* Write data to be scanned to the opened stream.
*
* This is the function call in which the actual pattern matching takes place
* as data is written to the stream. Matches will be returned via the @ref
* match_event_handler callback supplied.
*
* @param id
* The stream ID (returned by @ref hs_open_stream()) to which the data
* will be written.
*
* @param data
* Pointer to the data to be scanned.
*
* @param length
* The number of bytes to scan.
*
* @param flags
* Flags modifying the behaviour of the stream. This parameter is provided
* for future use and is unused at present.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param ctxt
* The user defined pointer which will be passed to the callback function
* when a match occurs.
*
* @return
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
* match callback indicated that scanning should stop; other values on
* error.
*/
hs_error_t hs_scan_stream(hs_stream_t *id, const char *data,
unsigned int length, unsigned int flags,
hs_scratch_t *scratch, match_event_handler onEvent,
void *ctxt);
/**
* Close a stream.
*
* This function must be called for any stream created with @ref
* hs_open_stream(), even if scanning has been terminated by a non-zero return
* from the match callback function.
*
* Note: This operation may result in matches being returned (via calls to the
* match event callback) for expressions anchored to the end of the data stream
* (for example, via the use of the `$` meta-character). If these matches are
* not desired, NULL may be provided as the @ref match_event_handler callback.
*
* If NULL is provided as the @ref match_event_handler callback, it is
* permissible to provide a NULL scratch.
*
* @param id
* The stream ID returned by @ref hs_open_stream().
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
* allowed to be NULL only if the @a onEvent callback is also NULL.
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param ctxt
* The user defined pointer which will be passed to the callback function
* when a match occurs.
*
* @return
* Returns @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
match_event_handler onEvent, void *ctxt);
/**
* Reset a stream to an initial state.
*
* Conceptually, this is equivalent to performing @ref hs_close_stream() on the
* given stream, followed by a @ref hs_open_stream(). This new stream replaces
* the original stream in memory, avoiding the overhead of freeing the old
* stream and allocating the new one.
*
* Note: This operation may result in matches being returned (via calls to the
* match event callback) for expressions anchored to the end of the original
* data stream (for example, via the use of the `$` meta-character). If these
* matches are not desired, NULL may be provided as the @ref match_event_handler
* callback.
*
* Note: the stream will also be tied to the same database.
*
* @param id
* The stream (as created by @ref hs_open_stream()) to be replaced.
*
* @param flags
* Flags modifying the behaviour of the stream. This parameter is provided
* for future use and is unused at present.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param context
* The user defined pointer which will be passed to the callback function
* when a match occurs.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_reset_stream(hs_stream_t *id, unsigned int flags,
hs_scratch_t *scratch, match_event_handler onEvent,
void *context);
/**
* Duplicate the given stream. The new stream will have the same state as the
* original including the current stream offset.
*
* @param to_id
* On success, a pointer to the new, copied @ref hs_stream_t will be
* returned; NULL on failure.
*
* @param from_id
* The stream (as created by @ref hs_open_stream()) to be copied.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id);
/**
* Duplicate the given 'from' stream state onto the 'to' stream. The 'to' stream
* will first be reset (reporting any EOD matches if a non-NULL @a onEvent
* callback handler is provided).
*
* Note: the 'to' stream and the 'from' stream must be open against the same
* database.
*
* @param to_id
* On success, a pointer to the new, copied @ref hs_stream_t will be
* returned; NULL on failure.
*
* @param from_id
* The stream (as created by @ref hs_open_stream()) to be copied.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param context
* The user defined pointer which will be passed to the callback function
* when a match occurs.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id,
const hs_stream_t *from_id,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context);
/**
* The block (non-streaming) regular expression scanner.
*
* This is the function call in which the actual pattern matching takes place
* for block-mode pattern databases.
*
* @param db
* A compiled pattern database.
*
* @param data
* Pointer to the data to be scanned.
*
* @param length
* The number of bytes to scan.
*
* @param flags
* Flags modifying the behaviour of this function. This parameter is
* provided for future use and is unused at present.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch() for this
* database.
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param context
* The user defined pointer which will be passed to the callback function.
*
* @return
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
* match callback indicated that scanning should stop; other values on
* error.
*/
hs_error_t hs_scan(const hs_database_t *db, const char *data,
unsigned int length, unsigned int flags,
hs_scratch_t *scratch, match_event_handler onEvent,
void *context);
/**
* The vectored regular expression scanner.
*
* This is the function call in which the actual pattern matching takes place
* for vectoring-mode pattern databases.
*
* @param db
* A compiled pattern database.
*
* @param data
* An array of pointers to the data blocks to be scanned.
*
* @param length
* An array of lengths (in bytes) of each data block to scan.
*
* @param count
* Number of data blocks to scan. This should correspond to the size of
* of the @a data and @a length arrays.
*
* @param flags
* Flags modifying the behaviour of this function. This parameter is
* provided for future use and is unused at present.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch() for
* this database.
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param context
* The user defined pointer which will be passed to the callback function.
*
* @return
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match
* callback indicated that scanning should stop; other values on error.
*/
hs_error_t hs_scan_vector(const hs_database_t *db, const char *const *data,
const unsigned int *length, unsigned int count,
unsigned int flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *context);
/**
* Allocate a "scratch" space for use by Hyperscan.
*
* This is required for runtime use, and one scratch space per thread, or
* concurrent caller, is required. Any allocator callback set by @ref
* hs_set_scratch_allocator() or @ref hs_set_allocator() will be used by this
* function.
*
* @param db
* The database, as produced by @ref hs_compile().
*
* @param scratch
* On first allocation, a pointer to NULL should be provided so a new
* scratch can be allocated. If a scratch block has been previously
* allocated, then a pointer to it should be passed back in to see if it
* is valid for this database block. If a new scratch block is required,
* the original will be freed and the new one returned, otherwise the
* previous scratch block will be returned. On success, the scratch block
* will be suitable for use with the provided database in addition to any
* databases that original scratch space was suitable for.
*
* @return
* @ref HS_SUCCESS on successful allocation; @ref HS_NOMEM if the
* allocation fails. Other errors may be returned if invalid parameters
* are specified.
*/
hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch);
/**
* Allocate a scratch space that is a clone of an existing scratch space.
*
* This is useful when multiple concurrent threads will be using the same set
* of compiled databases, and another scratch space is required. Any allocator
* callback set by @ref hs_set_scratch_allocator() or @ref hs_set_allocator()
* will be used by this function.
*
* @param src
* The existing @ref hs_scratch_t to be cloned.
*
* @param dest
* A pointer to the new scratch space will be returned here.
*
* @return
* @ref HS_SUCCESS on success; @ref HS_NOMEM if the allocation fails.
* Other errors may be returned if invalid parameters are specified.
*/
hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest);
/**
* Provides the size of the given scratch space.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch() or @ref
* hs_clone_scratch().
*
* @param scratch_size
* On success, the size of the scratch space in bytes is placed in this
* parameter.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_scratch_size(const hs_scratch_t *scratch, size_t *scratch_size);
/**
* Free a scratch block previously allocated by @ref hs_alloc_scratch() or @ref
* hs_clone_scratch().
*
* The free callback set by @ref hs_set_scratch_allocator() or @ref
* hs_set_allocator() will be used by this function.
*
* @param scratch
* The scratch block to be freed. NULL may also be safely provided.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_free_scratch(hs_scratch_t *scratch);
/**
* Callback 'from' return value, indicating that the start of this match was
* too early to be tracked with the requested SOM_HORIZON precision.
*/
#define HS_OFFSET_PAST_HORIZON (~0ULL)
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* HS_RUNTIME_H_ */

36
src/hs_version.c Normal file
View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "ue2common.h"
#include "hs_common.h"
#include "hs_version.h"
HS_PUBLIC_API
const char *hs_version(void) {
return HS_VERSION_STRING;
}

40
src/hs_version.h.in Normal file
View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HS_VERSION_H_C6428FAF8E3713
#define HS_VERSION_H_C6428FAF8E3713
/**
* A version string to identify this release of Hyperscan.
*/
#define HS_VERSION_STRING "@HS_VERSION@ @BUILD_DATE@"
#define HS_VERSION_32BIT ((@HS_MAJOR_VERSION@ << 24) | (@HS_MINOR_VERSION@ << 16) | (@HS_PATCH_VERSION@ << 8) | 0)
#endif /* HS_VERSION_H_C6428FAF8E3713 */

240
src/hwlm/hwlm.c Normal file
View File

@ -0,0 +1,240 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: runtime.
*/
#include "hwlm.h"
#include "hwlm_internal.h"
#include "noodle_engine.h"
#include "scratch.h"
#include "ue2common.h"
#include "fdr/fdr.h"
#include "nfa/accel.h"
#include "nfa/shufti.h"
#include "nfa/vermicelli.h"
#include <string.h>
#define MIN_ACCEL_LEN_BLOCK 16
#define MIN_ACCEL_LEN_STREAM 16
static really_inline
const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr,
const u8 *end) {
switch (aux->accel_type) {
case ACCEL_VERM:
DEBUG_PRINTF("single vermicelli for 0x%02hhx\n", aux->verm.c);
return vermicelliExec(aux->verm.c, 0, ptr, end);
case ACCEL_VERM_NOCASE:
DEBUG_PRINTF("single vermicelli-nocase for 0x%02hhx\n", aux->verm.c);
return vermicelliExec(aux->verm.c, 1, ptr, end);
case ACCEL_DVERM:
DEBUG_PRINTF("double vermicelli for 0x%02hhx%02hhx\n", aux->dverm.c1,
aux->dverm.c2);
return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 0, ptr, end);
case ACCEL_DVERM_NOCASE:
DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 1, ptr, end);
case ACCEL_SHUFTI:
DEBUG_PRINTF("single shufti\n");
return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
default:
/* no acceleration, fall through and return current ptr */
return ptr;
}
}
static really_inline
void do_accel_block(const union AccelAux *aux, const u8 *buf, size_t len,
size_t *start) {
if (len - *start < MIN_ACCEL_LEN_BLOCK) {
return;
}
const u8 *ptr = buf + *start;
const u8 *end = buf + len;
const u8 offset = aux->generic.offset;
ptr = run_hwlm_accel(aux, ptr, end);
if (offset) {
ptr -= offset;
if (ptr < buf) {
ptr = buf;
}
}
assert(ptr >= buf);
*start = ptr - buf;
}
static really_inline
int inaccurate_accel(u8 type) {
/* accels which don't always catch up to the boundary
* DSHUFTI is also inaccurate but it is not used by the hamsters */
return type == ACCEL_DVERM_NOCASE || type == ACCEL_DVERM;
}
static never_inline
void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen,
const u8 *buf, size_t len, size_t *start) {
if (aux->accel_type == ACCEL_NONE || len - *start < MIN_ACCEL_LEN_STREAM) {
return;
}
const u8 offset = aux->generic.offset;
DEBUG_PRINTF("using accel %hhu offset %hhu\n", aux->accel_type, offset);
// Scan history buffer, but only if the start offset (which always refers to
// buf) is zero.
if (!*start && hlen) {
const u8 *ptr1 = hbuf;
const u8 *end1 = hbuf + hlen;
if (hlen >= 16) {
ptr1 = run_hwlm_accel(aux, ptr1, end1);
}
if ((hlen <= 16 || inaccurate_accel(aux->accel_type))
&& end1 != ptr1 && end1 - ptr1 <= 16) {
DEBUG_PRINTF("already scanned %zu/%zu\n", ptr1 - hbuf, hlen);
/* see if we can finish off the history buffer completely */
u8 ALIGN_DIRECTIVE temp[17];
ptrdiff_t tlen = end1 - ptr1;
memcpy(temp, ptr1, tlen);
memset(temp + tlen, 0, 17 - tlen);
if (len) { /* for dverm */
temp[end1 - ptr1] = *buf;
}
const u8 *tempp = run_hwlm_accel(aux, temp, temp + 17);
if (tempp - temp >= tlen) {
ptr1 = end1;
}
DEBUG_PRINTF("got %zu\n", tempp - temp);
}
if (ptr1 != end1) {
DEBUG_PRINTF("bailing in history\n");
return;
}
}
DEBUG_PRINTF("scanning main buffer, start=%zu, len=%zu\n", *start, len);
const u8 *ptr2 = buf + *start;
const u8 *end2 = buf + len;
const u8 *found = run_hwlm_accel(aux, ptr2, end2);
if (found >= ptr2 + offset) {
size_t delta = found - offset - ptr2;
DEBUG_PRINTF("got %zu/%zu in 2nd buffer\n", delta, len);
*start += delta;
} else if (hlen) {
UNUSED size_t remaining = offset + ptr2 - found;
DEBUG_PRINTF("got %zu/%zu remaining in 1st buffer\n", remaining, hlen);
}
}
hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
hwlm_group_t groups) {
DEBUG_PRINTF("buf len=%zu, start=%zu, groups=%llx\n", len, start, groups);
if (!groups) {
DEBUG_PRINTF("groups all off\n");
return HWLM_SUCCESS;
}
assert(start < len);
if (t->type == HWLM_ENGINE_NOOD) {
DEBUG_PRINTF("calling noodExec\n");
return noodExec(HWLM_C_DATA(t), buf + start, len - start, start, cb,
ctxt);
} else {
assert(t->type == HWLM_ENGINE_FDR);
const union AccelAux *aa = &t->accel0;
if ((groups & ~t->accel1_groups) == 0) {
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
aa = &t->accel1;
}
do_accel_block(aa, buf, len, &start);
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
start);
return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt, groups);
}
}
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
size_t len, size_t start, HWLMCallback cb,
void *ctxt, hwlm_group_t groups,
u8 *stream_state) {
const u8 *hbuf = scratch->core_info.hbuf;
const size_t hlen = scratch->core_info.hlen;
const u8 *buf = scratch->core_info.buf;
DEBUG_PRINTF("hbuf len=%zu, buf len=%zu, start=%zu, groups=%llx\n", hlen,
len, start, groups);
if (!groups) {
return HWLM_SUCCESS;
}
assert(start < len);
if (t->type == HWLM_ENGINE_NOOD) {
DEBUG_PRINTF("calling noodExec\n");
// If we've been handed a start offset, we can use a block mode scan at
// that offset.
if (start) {
return noodExec(HWLM_C_DATA(t), buf + start, len - start, start,
cb, ctxt);
} else {
return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb,
ctxt, scratch->fdr_temp_buf,
FDR_TEMP_BUF_SIZE);
}
} else {
// t->type == HWLM_ENGINE_FDR
const union AccelAux *aa = &t->accel0;
if ((groups & ~t->accel1_groups) == 0) {
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
aa = &t->accel1;
}
// if no active stream state, use acceleration
if (!fdrStreamStateActive(HWLM_C_DATA(t), stream_state)) {
do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
}
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
start);
return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len,
start, cb, ctxt, groups, stream_state);
}
}

142
src/hwlm/hwlm.h Normal file
View File

@ -0,0 +1,142 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: runtime API.
*/
#ifndef HWLM_H
#define HWLM_H
#include "ue2common.h"
#ifdef __cplusplus
extern "C"
{
#endif
/** \brief Error return type for exec functions. */
typedef int hwlm_error_t;
/** \brief Type representing a set of groups as a bitmap. */
typedef u64a hwlm_group_t;
/** \brief HWLM callback return type. */
typedef hwlm_group_t hwlmcb_rv_t;
/** \brief Value representing all possible literal groups. */
#define HWLM_ALL_GROUPS ((hwlm_group_t)~0ULL)
/** \brief Callback return value indicating that we should continue matching. */
#define HWLM_CONTINUE_MATCHING HWLM_ALL_GROUPS
/** \brief Callback return value indicating that we should halt matching. */
#define HWLM_TERMINATE_MATCHING 0
/** \brief Matching finished without being terminated by the user. */
#define HWLM_SUCCESS 0
/** \brief The user terminated matching by returning HWLM_TERMINATE_MATCHING
* from the match callback. */
#define HWLM_TERMINATED 1
/** \brief An error occurred during matching.
*
* This should only be used if an unsupported engine was called (like one
* designed for a different architecture). */
#define HWLM_ERROR_UNKNOWN 2
struct hs_scratch;
struct HWLM;
/** \brief The type for an HWLM callback.
*
* This callback receives a start-of-match offset, an end-of-match offset, the
* ID of the match and the context pointer that was passed into \ref
* hwlmExec or \ref hwlmExecStreaming.
*
* A callback return of \ref HWLM_TERMINATE_MATCHING will stop matching.
*
* A callback return of \ref HWLM_CONTINUE_MATCHING continues matching.
*
* An arbitrary group mask may be given as the return value. This will be taken
* as a hint by the underlying engine that only literals with groups
* overlapping the provided mask need to be reported.
*
* The underlying engine may choose not to report a match if there is no group
* belonging to the literal which was active at the when the end match location
* was first reached.
*/
typedef hwlmcb_rv_t (*HWLMCallback)(size_t start, size_t end, u32 id,
void *context);
/** \brief Match strings in table.
*
* If a match occurs, the callback function given will be called with the index
* of the last character in the string and the \p context (passed through
* without interpretation).
*
* Returns \ref HWLM_TERMINATED if scanning is cancelled due to the callback
* returning \ref HWLM_TERMINATE_MATCHING.
*
* \p start is the first offset at which a match may start.
*
* The underlying engine may choose not to report any match which starts before
* the first possible match of a literal which is in the initial group mask.
*/
hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len,
size_t start, HWLMCallback callback, void *context,
hwlm_group_t groups);
/** \brief As for \ref hwlmExec, but a streaming case across two buffers.
*
* \p scratch is used to access fdr_temp_buf and to access the history buffer,
* history length and the main buffer.
*
* \p len is the length of the main buffer to be scanned.
*
* \p start is an advisory hint representing the first offset at which a match
* may start. Some underlying literal matches may not respect it.
*
* Two buffers/lengths are provided. Matches that occur entirely within
* the history buffer will not be reported by this function. The offsets
* reported for the main buffer are relative to the start of that buffer (a
* match at byte 10 of the main buffer is reported as 10). Matches that start
* in the history buffer will have starts reported with 'negative' values.
*/
hwlm_error_t hwlmExecStreaming(const struct HWLM *tab,
struct hs_scratch *scratch, size_t len,
size_t start, HWLMCallback callback,
void *context, hwlm_group_t groups,
u8 *stream_state);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

635
src/hwlm/hwlm_build.cpp Normal file
View File

@ -0,0 +1,635 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: build code.
*/
#include "grey.h"
#include "hwlm.h"
#include "hwlm_build.h"
#include "hwlm_internal.h"
#include "noodle_engine.h"
#include "noodle_build.h"
#include "ue2common.h"
#include "fdr/fdr_compile.h"
#include "fdr/fdr.h"
#include "nfa/shufticompile.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/charreach.h"
#include "util/compare.h"
#include "util/compile_context.h"
#include "util/compile_error.h"
#include "util/dump_charclass.h"
#include "util/target_info.h"
#include "util/ue2string.h"
#include "util/verify_types.h"
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <vector>
using namespace std;
namespace ue2 {
static const unsigned int MAX_ACCEL_OFFSET = 16;
static const unsigned int MAX_SHUFTI_WIDTH = 240;
static
bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
const hwlmLiteral &first = *lits.front();
struct candidate {
candidate(void)
: c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {}
candidate(const hwlmLiteral &base, u32 offset)
: c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0),
b5insens(false), valid(true) {}
char c1;
char c2;
u32 max_offset;
bool b5insens;
bool valid;
bool operator>(const candidate &other) const {
if (!valid) {
return false;
}
if (!other.valid) {
return true;
}
if (other.cdiffers() && !cdiffers()) {
return false;
}
if (!other.cdiffers() && cdiffers()) {
return true;
}
if (!other.b5insens && b5insens) {
return false;
}
if (other.b5insens && !b5insens) {
return true;
}
if (max_offset > other.max_offset) {
return false;
}
return true;
}
bool cdiffers(void) const {
if (!b5insens) {
return c1 != c2;
}
return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR);
}
};
candidate best;
for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) {
candidate curr(first, i);
/* check to see if this pair appears in each string */
for (const auto &lit_ptr : lits) {
const hwlmLiteral &lit = *lit_ptr;
if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) {
curr.b5insens = true; /* no choice but to be case insensitive */
}
bool found = false;
bool found_nc = false;
for (u32 j = 0;
!found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) {
found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
&& (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
if (curr.b5insens) {
found = found_nc;
}
}
if (!curr.b5insens && !found && found_nc) {
curr.b5insens = true;
found = true;
}
if (!found) {
goto next_candidate;
}
}
/* check to find the max offset where this appears */
for (const auto &lit_ptr : lits) {
const hwlmLiteral &lit = *lit_ptr;
for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1;
j++) {
bool found = false;
if (curr.b5insens) {
found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
&& (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
} else {
found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
}
if (found) {
curr.max_offset = MAX(curr.max_offset, j);
break;
}
}
}
if (curr > best) {
best = curr;
}
next_candidate:;
}
if (!best.valid) {
return false;
}
aux->dverm.offset = verify_u8(best.max_offset);
if (!best.b5insens) {
aux->dverm.accel_type = ACCEL_DVERM;
aux->dverm.c1 = best.c1;
aux->dverm.c2 = best.c2;
DEBUG_PRINTF("built dverm for %02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
} else {
aux->dverm.accel_type = ACCEL_DVERM_NOCASE;
aux->dverm.c1 = best.c1 & CASE_CLEAR;
aux->dverm.c2 = best.c2 & CASE_CLEAR;
DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
}
return true;
}
static
bool findSVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
const hwlmLiteral &first = *lits.front();
struct candidate {
candidate(void)
: c(0), max_offset(0), b5insens(false), valid(false) {}
candidate(const hwlmLiteral &base, u32 offset)
: c(base.s[offset]), max_offset(0),
b5insens(false), valid(true) {}
char c;
u32 max_offset;
bool b5insens;
bool valid;
bool operator>(const candidate &other) const {
if (!valid) {
return false;
}
if (!other.valid) {
return true;
}
if (!other.b5insens && b5insens) {
return false;
}
if (other.b5insens && !b5insens) {
return true;
}
if (max_offset > other.max_offset) {
return false;
}
return true;
}
};
candidate best;
for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) {
candidate curr(first, i);
/* check to see if this pair appears in each string */
for (const auto &lit_ptr : lits) {
const hwlmLiteral &lit = *lit_ptr;
if (lit.nocase && ourisalpha(curr.c)) {
curr.b5insens = true; /* no choice but to be case insensitive */
}
bool found = false;
bool found_nc = false;
for (u32 j = 0;
!found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
found |= curr.c == lit.s[j];
found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
if (curr.b5insens) {
found = found_nc;
}
}
if (!curr.b5insens && !found && found_nc) {
curr.b5insens = true;
found = true;
}
if (!found) {
goto next_candidate;
}
}
/* check to find the max offset where this appears */
for (const auto &lit_ptr : lits) {
const hwlmLiteral &lit = *lit_ptr;
for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
bool found = false;
if (curr.b5insens) {
found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
} else {
found = curr.c == lit.s[j];
}
if (found) {
curr.max_offset = MAX(curr.max_offset, j);
break;
}
}
}
if (curr > best) {
best = curr;
}
next_candidate:;
}
if (!best.valid) {
return false;
}
if (!best.b5insens) {
aux->verm.accel_type = ACCEL_VERM;
aux->verm.c = best.c;
DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c);
} else {
aux->verm.accel_type = ACCEL_VERM_NOCASE;
aux->verm.c = best.c & CASE_CLEAR;
DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c);
}
aux->verm.offset = verify_u8(best.max_offset);
return true;
}
static
void filterLits(const vector<hwlmLiteral> &lits, hwlm_group_t expected_groups,
vector<const hwlmLiteral *> *filtered_lits, u32 *min_len) {
*min_len = MAX_ACCEL_OFFSET;
for (const auto &lit : lits) {
if (!(lit.groups & expected_groups)) {
continue;
}
const size_t lit_len = lit.s.length();
if (lit_len < *min_len) {
*min_len = verify_u32(lit_len);
}
filtered_lits->push_back(&lit);
#ifdef DEBUG
DEBUG_PRINTF("lit:");
for (u32 i = 0; i < lit.s.length(); i++) {
printf("%02hhx", lit.s[i]);
}
printf("\n");
#endif
}
}
static
void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
hwlm_group_t expected_groups, AccelAux *aux) {
DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups);
u32 min_len = MAX_ACCEL_OFFSET;
vector<const hwlmLiteral *> filtered_lits;
filterLits(lits, expected_groups, &filtered_lits, &min_len);
if (filtered_lits.empty()) {
return;
}
if (findDVerm(filtered_lits, aux)
|| findSVerm(filtered_lits, aux)) {
return;
}
vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
for (const auto &lit : lits) {
if (!(lit.groups & expected_groups)) {
continue;
}
for (u32 i = 0; i < MAX_ACCEL_OFFSET && i < lit.s.length(); i++) {
unsigned char c = lit.s[i];
if (lit.nocase) {
DEBUG_PRINTF("adding %02hhx to %u\n", mytoupper(c), i);
DEBUG_PRINTF("adding %02hhx to %u\n", mytolower(c), i);
reach[i].set(mytoupper(c));
reach[i].set(mytolower(c));
} else {
DEBUG_PRINTF("adding %02hhx to %u\n", c, i);
reach[i].set(c);
}
}
}
u32 min_count = ~0U;
u32 min_offset = ~0U;
for (u32 i = 0; i < min_len; i++) {
size_t count = reach[i].count();
DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
describeClass(reach[i]).c_str(), count);
if (count < min_count) {
min_count = (u32)count;
min_offset = i;
}
}
assert(min_offset <= min_len);
if (min_count > MAX_SHUFTI_WIDTH) {
DEBUG_PRINTF("min shufti with %u chars is too wide\n", min_count);
return;
}
const CharReach &cr = reach[min_offset];
if (shuftiBuildMasks(cr, &aux->shufti.lo, &aux->shufti.hi) != -1) {
DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n",
describeClass(cr).c_str(), cr.count(), min_offset);
aux->shufti.accel_type = ACCEL_SHUFTI;
aux->shufti.offset = verify_u8(min_offset);
return;
}
DEBUG_PRINTF("fail\n");
}
static
void buildForwardAccel(HWLM *h, const vector<hwlmLiteral> &lits,
hwlm_group_t expected_groups) {
findForwardAccelScheme(lits, expected_groups, &h->accel1);
findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0);
h->accel1_groups = expected_groups;
}
static
void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
#ifdef DEBUG
DEBUG_PRINTF("building lit table for:\n");
for (const auto &lit : lits) {
printf("\t%u:%016llx %s%s\n", lit.id, lit.groups,
escapeString(lit.s).c_str(), lit.nocase ? " (nc)" : "");
}
#endif
}
#ifndef NDEBUG
// Called by an assertion.
static
bool everyoneHasGroups(const vector<hwlmLiteral> &lits) {
for (const auto &lit : lits) {
if (!lit.groups) {
return false;
}
}
return true;
}
#endif
static
bool isNoodleable(const vector<hwlmLiteral> &lits,
const hwlmStreamingControl *stream_control,
const CompileContext &cc) {
if (!cc.grey.allowNoodle) {
return false;
}
if (lits.size() != 1) {
DEBUG_PRINTF("too many literals for noodle\n");
return false;
}
if (stream_control) { // nullptr if in block mode
if (lits.front().s.length() + 1 > stream_control->history_max) {
DEBUG_PRINTF("length of %zu too long for history max %zu\n",
lits.front().s.length(),
stream_control->history_max);
return false;
}
}
if (!lits.front().msk.empty()) {
DEBUG_PRINTF("noodle can't handle supplementary masks\n");
return false;
}
return true;
}
aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
hwlmStreamingControl *stream_control,
bool make_small, const CompileContext &cc,
hwlm_group_t expected_groups) {
assert(!lits.empty());
dumpLits(lits);
if (stream_control) {
assert(stream_control->history_min <= stream_control->history_max);
}
// Check that we haven't exceeded the maximum number of literals.
if (lits.size() > cc.grey.limitLiteralCount) {
throw ResourceLimitError();
}
// Safety and resource limit checks.
u64a total_chars = 0;
for (const auto &lit : lits) {
assert(!lit.s.empty());
if (lit.s.length() > cc.grey.limitLiteralLength) {
throw ResourceLimitError();
}
total_chars += lit.s.length();
if (total_chars > cc.grey.limitLiteralMatcherChars) {
throw ResourceLimitError();
}
// We do not allow the all-ones ID, as we reserve that for internal use
// within literal matchers.
if (lit.id == 0xffffffffu) {
assert(!"reserved id 0xffffffff used");
throw CompileError("Internal error.");
}
}
u8 engType = 0;
size_t engSize = 0;
shared_ptr<void> eng;
DEBUG_PRINTF("building table with %zu strings\n", lits.size());
assert(everyoneHasGroups(lits));
if (isNoodleable(lits, stream_control, cc)) {
DEBUG_PRINTF("build noodle table\n");
engType = HWLM_ENGINE_NOOD;
const hwlmLiteral &lit = lits.front();
auto noodle = noodBuildTable((const u8 *)lit.s.c_str(), lit.s.length(),
lit.nocase, lit.id);
if (noodle) {
engSize = noodSize(noodle.get());
}
if (stream_control) {
// For now, a single literal still goes to noodle and asks
// for a great big history
stream_control->literal_history_required = lit.s.length() - 1;
assert(stream_control->literal_history_required
<= stream_control->history_max);
stream_control->literal_stream_state_required = 0;
}
eng = move(noodle);
} else {
DEBUG_PRINTF("building a new deal\n");
engType = HWLM_ENGINE_FDR;
auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey,
stream_control);
if (fdr) {
engSize = fdrSize(fdr.get());
}
eng = move(fdr);
}
if (!eng) {
return nullptr;
}
assert(engSize);
if (engSize > cc.grey.limitLiteralMatcherSize) {
throw ResourceLimitError();
}
auto h = aligned_zmalloc_unique<HWLM>(ROUNDUP_CL(sizeof(HWLM)) + engSize);
h->type = engType;
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
if (engType == HWLM_ENGINE_FDR && cc.grey.hamsterAccelForward) {
buildForwardAccel(h.get(), lits, expected_groups);
}
if (stream_control) {
DEBUG_PRINTF("requires %zu (of max %zu) bytes of history\n",
stream_control->literal_history_required,
stream_control->history_max);
assert(stream_control->literal_history_required
<= stream_control->history_max);
}
return h;
}
size_t hwlmSize(const HWLM *h) {
size_t engSize = 0;
switch (h->type) {
case HWLM_ENGINE_NOOD:
engSize = noodSize((const noodTable *)HWLM_C_DATA(h));
break;
case HWLM_ENGINE_FDR:
engSize = fdrSize((const FDR *)HWLM_C_DATA(h));
break;
}
if (!engSize) {
return 0;
}
return engSize + ROUNDUP_CL(sizeof(*h));
}
size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc) {
const size_t NO_LIMIT = ~(size_t)0;
// NOTE: this function contains a number of magic numbers which are
// conservative estimates of flood-proneness based on internal details of
// the various literal engines that fall under the HWLM aegis. If you
// change those engines, you might need to change this function too.
DEBUG_PRINTF("%zu literals\n", numLiterals);
if (cc.grey.allowNoodle && numLiterals <= 1) {
DEBUG_PRINTF("noodle\n");
return NO_LIMIT;
}
if (cc.grey.fdrAllowTeddy) {
if (numLiterals <= 48) {
DEBUG_PRINTF("teddy\n");
return 3;
}
if (cc.target_info.has_avx2() && numLiterals <= 96) {
DEBUG_PRINTF("avx2 teddy\n");
return 3;
}
}
// TODO: we had thought we could push this value up to 9, but it seems that
// hurts performance on floods in some FDR models. Super-conservative for
// now.
DEBUG_PRINTF("fdr\n");
return 3;
}
} // namespace ue2

104
src/hwlm/hwlm_build.h Normal file
View File

@ -0,0 +1,104 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: build API.
*/
#ifndef HWLM_BUILD_H
#define HWLM_BUILD_H
#include "hwlm.h"
#include "hwlm_literal.h"
#include "ue2common.h"
#include "util/alloc.h"
#include <memory>
#include <vector>
struct HWLM;
namespace ue2 {
struct CompileContext;
struct Grey;
struct target_t;
/** \brief Structure gathering together the input/output parameters related to
* streaming mode operation. */
struct hwlmStreamingControl {
/** \brief IN parameter: Upper limit on the amount of history that can be
* requested. */
size_t history_max;
/** \brief IN parameter: History already known to be used before literal
* analysis. */
size_t history_min;
/** \brief OUT parameter: History required by the literal matcher to
* correctly match all literals. */
size_t literal_history_required;
/** OUT parameter: Stream state required by literal matcher in bytes. Can
* be zero, and generally will be small (0-8 bytes). */
size_t literal_stream_state_required;
};
/** \brief Build an \ref HWLM literal matcher runtime structure for a group of
* literals.
*
* \param lits The group of literals.
* \param stream_control Streaming control parameters. If the matcher will
* operate in non-streaming (block) mode, this pointer should be NULL.
* \param make_small Optimise matcher for small size.
* \param cc Compile context.
* \param expected_groups FIXME: document me!
*
* Build failures are generally a result of memory allocation failure. These
* may result in a nullptr return value, or a std::bad_alloc exception being
* thrown.
*/
aligned_unique_ptr<HWLM>
hwlmBuild(const std::vector<hwlmLiteral> &lits,
hwlmStreamingControl *stream_control, bool make_small,
const CompileContext &cc,
hwlm_group_t expected_groups = HWLM_ALL_GROUPS);
/**
* Returns an estimate of the number of repeated characters on the end of a
* literal that will make a literal set of size \a numLiterals suffer
* performance degradation.
*/
size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc);
/** \brief Return the size in bytes of an HWLM structure. */
size_t hwlmSize(const HWLM *h);
} // namespace
#endif // HWLM_BUILD_H

70
src/hwlm/hwlm_dump.cpp Normal file
View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: dump code.
*/
#include "config.h"
#include "hwlm_dump.h"
#include "hwlm_internal.h"
#include "noodle_build.h"
#include "ue2common.h"
#include "fdr/fdr_dump.h"
#include "nfa/accel_dump.h"
#include <cstdio>
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
namespace ue2 {
void hwlmPrintStats(const HWLM *h, FILE *f) {
switch (h->type) {
case HWLM_ENGINE_NOOD:
noodPrintStats((const noodTable *)HWLM_C_DATA(h), f);
break;
case HWLM_ENGINE_FDR:
fdrPrintStats((const FDR *)HWLM_C_DATA(h), f);
break;
default:
fprintf(f, "<unknown hwlm subengine>\n");
}
fprintf(f, "accel1_groups: %016llx\n", h->accel1_groups);
fprintf(f, "accel1:");
dumpAccelInfo(f, h->accel1);
fprintf(f, "accel0:");
dumpAccelInfo(f, h->accel0);
}
} // namespace ue2

50
src/hwlm/hwlm_dump.h Normal file
View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: dump API.
*/
#ifndef HWLM_DUMP_H
#define HWLM_DUMP_H
#ifdef DUMP_SUPPORT
#include <cstdio>
struct HWLM;
namespace ue2 {
/** \brief Dump some information about the give HWLM structure. */
void hwlmPrintStats(const HWLM *h, FILE *f);
} // namespace ue2
#endif
#endif

62
src/hwlm/hwlm_internal.h Normal file
View File

@ -0,0 +1,62 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: data structures.
*/
#ifndef HWLM_INTERNAL_H
#define HWLM_INTERNAL_H
#include "hwlm.h"
#include "ue2common.h"
#include "nfa/accel.h"
/** \brief Underlying engine is FDR. */
#define HWLM_ENGINE_FDR 12
/** \brief Underlying engine is Noodle. */
#define HWLM_ENGINE_NOOD 16
/** \brief Main Hamster Wheel Literal Matcher header. Followed by
* engine-specific structure. */
struct HWLM {
u8 type; /**< HWLM_ENGINE_NOOD or HWLM_ENGINE_FDR */
hwlm_group_t accel1_groups; /**< accelerable groups. */
union AccelAux accel1; /**< used if group mask is subset of accel1_groups */
union AccelAux accel0; /**< fallback accel scheme */
};
/** \brief Fetch a const pointer to the underlying engine. */
#define HWLM_C_DATA(p) ((const void *)((const char *)(p) \
+ ROUNDUP_CL(sizeof(struct HWLM))))
/** \brief Fetch a pointer to the underlying engine. */
#define HWLM_DATA(p) ((void *)((char *)(p) + ROUNDUP_CL(sizeof(struct HWLM))))
#endif

111
src/hwlm/hwlm_literal.cpp Normal file
View File

@ -0,0 +1,111 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: literal representation at build time.
*/
#include "hwlm_literal.h"
#include "util/bitutils.h" // for CASE_BIT
#include "util/compare.h" // for ourisalpha
#include "util/ue2string.h" // for escapeString
#include <iomanip>
#include <sstream>
#include <boost/algorithm/cxx11/all_of.hpp>
using namespace std;
using namespace boost::algorithm;
namespace ue2 {
#ifdef DEBUG
static UNUSED
std::string dumpMask(const vector<u8> &v) {
ostringstream oss;
vector<u8>::const_iterator it, ite;
for (it = v.begin(), ite = v.end(); it != ite; ++it) {
oss << setfill('0') << setw(2) << hex << (unsigned int)*it;
}
return oss.str();
}
#endif
bool maskIsConsistent(const std::string &s, bool nocase, const vector<u8> &msk,
const vector<u8> &cmp) {
string::const_reverse_iterator si = s.rbegin();
vector<u8>::const_reverse_iterator mi = msk.rbegin(), ci = cmp.rbegin();
for (; si != s.rend() && mi != msk.rend(); ++si, ++mi, ++ci) {
u8 c = *si, m = *mi, v = *ci;
if (nocase && ourisalpha(c)) {
m &= ~CASE_BIT;
v &= ~CASE_BIT;
}
assert(ci != cmp.rend());
if ((c & m) != v) {
DEBUG_PRINTF("c = %02hhx; *ci = %02hhx m =%02hhx\n", c, *ci, m);
DEBUG_PRINTF("s = %s; dist = %zd\n", s.c_str(), si - s.rbegin());
return false;
}
}
return true;
}
/** \brief Complete constructor, takes group information and msk/cmp.
*
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
* \ref HWLM_MASKLEN. */
hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
bool noruns_in, u32 id_in, hwlm_group_t groups_in,
const vector<u8> &msk_in, const vector<u8> &cmp_in)
: s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
groups(groups_in), msk(msk_in), cmp(cmp_in) {
assert(msk.size() <= HWLM_MASKLEN);
assert(msk.size() == cmp.size());
DEBUG_PRINTF("literal '%s', msk=%s, cmp=%s\n",
escapeString(s).c_str(), dumpMask(msk).c_str(),
dumpMask(cmp).c_str());
// Mask and compare vectors MUST be the same size.
assert(msk.size() == cmp.size());
// We must have been passed a msk/cmp that can be applied to s.
assert(maskIsConsistent(s, nocase, msk, cmp));
// In the name of good hygiene, zap msk/cmp if msk is all zeroes.
if (all_of_equal(msk.begin(), msk.end(), 0)) {
msk.clear();
cmp.clear();
}
}
} // namespace ue2

121
src/hwlm/hwlm_literal.h Normal file
View File

@ -0,0 +1,121 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: literal representation at build time.
*/
#ifndef HWLM_LITERAL_H
#define HWLM_LITERAL_H
#include "hwlm.h"
#include "ue2common.h"
#include <string>
#include <vector>
namespace ue2 {
/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
#define HWLM_MASKLEN 8
/** \brief Class representing a literal, fed to \ref hwlmBuild. */
struct hwlmLiteral {
std::string s; //!< \brief The literal itself.
/** \brief The ID to pass to the callback if this literal matches.
*
* Note that the special value 0xFFFFFFFF is reserved for internal use and
* should not be used. */
u32 id;
bool nocase; //!< \brief True if literal is case-insensitive.
/** \brief Matches for runs of this literal can be quashed.
*
* Advisory flag meaning that there is no value in returning runs of
* additional matches for a literal after the first one, so such matches
* can be quashed by the literal matcher. */
bool noruns;
/** \brief Set of groups that literal belongs to.
*
* Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of
* the groups that are switched on. */
hwlm_group_t groups;
/** \brief Supplementary comparison mask.
*
* These two values add a supplementary comparison that is done over the
* final 8 bytes of the string -- if v is those bytes, then the string must
* match as well as (v & msk) == cmp.
*
* An empty msk is the safe way of not adding any comparison to the string
* unnecessarily filling in msk may turn off optimizations.
*
* The msk/cmp mechanism must NOT place a value into the literal that
* conflicts with the contents of the string, but can be allowed to add
* additional power within the string -- for example, to allow some case
* sensitivity within a case-insensitive string.
* Values are stored in memory order -- i.e. the last byte of the mask
* corresponds to the last byte of the string. Both vectors must be the
* same size, and must not exceed \ref HWLM_MASKLEN in length.
*/
std::vector<u8> msk;
/** \brief Supplementary comparison value.
*
* See documentation for \ref msk.
*/
std::vector<u8> cmp;
/** \brief Simple constructor: no group information, no msk/cmp. */
hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in)
: s(s_in), id(id_in), nocase(nocase_in), noruns(false),
groups(HWLM_ALL_GROUPS), msk(0), cmp(0) {}
/** \brief Complete constructor, takes group information and msk/cmp.
*
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
* \ref HWLM_MASKLEN. */
hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
u32 id_in, hwlm_group_t groups_in,
const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in);
};
/**
* Consistency test; returns false if the given msk/cmp test can never match
* the literal string s.
*/
bool maskIsConsistent(const std::string &s, bool nocase,
const std::vector<u8> &msk, const std::vector<u8> &cmp);
} // namespace ue2
#endif // HWLM_LITERAL_H

110
src/hwlm/noodle_build.cpp Normal file
View File

@ -0,0 +1,110 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Noodle literal matcher: build code.
*/
#include <cstring> // for memcpy
#include "noodle_build.h"
#include "noodle_internal.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/compare.h"
#include "util/verify_types.h"
namespace ue2 {
static
size_t findNoodFragOffset(const u8 *lit, size_t len, bool nocase) {
size_t offset = 0;
for (size_t i = 0; i + 1 < len; i++) {
int diff = 0;
const char c = lit[i];
const char d = lit[i + 1];
if (nocase && ourisalpha(c)) {
diff = (mytoupper(c) != mytoupper(d));
} else {
diff = (c != d);
}
offset = i;
if (diff) {
break;
}
}
return offset;
}
/** \brief Construct a Noodle matcher for the given literal. */
aligned_unique_ptr<noodTable> noodBuildTable(const u8 *lit, size_t len,
bool nocase, u32 id) {
size_t noodle_len = sizeof(noodTable) + len;
aligned_unique_ptr<noodTable> n =
aligned_zmalloc_unique<noodTable>(noodle_len);
assert(n);
size_t key_offset = findNoodFragOffset(lit, len, nocase);
n->id = id;
n->len = verify_u32(len);
n->key_offset = verify_u32(key_offset);
n->nocase = nocase ? 1 : 0;
memcpy(n->str, lit, len);
return n;
}
size_t noodSize(const noodTable *n) {
assert(n); // shouldn't call with null
return sizeof(*n) + n->len;
}
} // namespace ue2
#ifdef DUMP_SUPPORT
#include <cctype>
namespace ue2 {
void noodPrintStats(const noodTable *n, FILE *f) {
fprintf(f, "Noodle table\n");
fprintf(f, "Len: %u Key Offset: %u\n", n->len, n->key_offset);
fprintf(f, "String: ");
for (u32 i = 0; i < n->len; i++) {
if (isgraph(n->str[i]) && n->str[i] != '\\') {
fprintf(f, "%c", n->str[i]);
} else {
fprintf(f, "\\x%02hhx", n->str[i]);
}
}
fprintf(f, "\n");
}
} // namespace ue2
#endif

64
src/hwlm/noodle_build.h Normal file
View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Noodle literal matcher: build code.
*/
#ifndef NOODLE_BUILD_H_048A1A6D585A9A
#define NOODLE_BUILD_H_048A1A6D585A9A
#include "ue2common.h"
#include "util/alloc.h"
struct noodTable;
namespace ue2 {
/** \brief Construct a Noodle matcher for the given literal. */
ue2::aligned_unique_ptr<noodTable> noodBuildTable(const u8 *lit, size_t len,
bool nocase, u32 id);
size_t noodSize(const noodTable *n);
} // namespace ue2
#ifdef DUMP_SUPPORT
#include <cstdio>
namespace ue2 {
void noodPrintStats(const noodTable *n, FILE *f);
} // namespace ue2
#endif // DUMP_SUPPORT
#endif /* NOODLE_BUILD_H_048A1A6D585A9A */

364
src/hwlm/noodle_engine.c Normal file
View File

@ -0,0 +1,364 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Noodle literal matcher: runtime.
*/
#include "hwlm.h"
#include "noodle_engine.h"
#include "noodle_internal.h"
#include "ue2common.h"
#include "util/bitutils.h"
#include "util/compare.h"
#include "util/masked_move.h"
#include "util/simd_utils.h"
#include <ctype.h>
#include <stdbool.h>
#include <string.h>
/** \brief Noodle runtime context. */
struct cb_info {
HWLMCallback cb; //!< callback function called on match
u32 id; //!< ID to pass to callback on match
void *ctx; //!< caller-supplied context to pass to callback
size_t offsetAdj; //!< used in streaming mode
};
#define RETURN_IF_TERMINATED(x) \
{ \
if ((x) == HWLM_TERMINATED) { \
return HWLM_TERMINATED; \
} \
}
#define SINGLE_ZSCAN() \
do { \
while (unlikely(z)) { \
u32 pos = findAndClearLSB_32(&z); \
size_t matchPos = d - buf + pos; \
hwlmcb_rv_t rv = final(buf, len, key, 1, 0, 0, noCase, cbi, \
matchPos); \
RETURN_IF_TERMINATED(rv); \
} \
} while (0)
#define DOUBLE_ZSCAN() \
do { \
while (unlikely(z)) { \
u32 pos = findAndClearLSB_32(&z); \
size_t matchPos = d - buf + pos - 1; \
hwlmcb_rv_t rv = final(buf, len, key, keyLen, keyOffset, 1, \
noCase, cbi, matchPos); \
RETURN_IF_TERMINATED(rv); \
} \
} while (0)
static really_inline
u8 caseClear8(u8 x, bool noCase) {
return (u8)(noCase ? (x & (u8)0xdf) : x);
}
// Make sure the rest of the string is there. The single character scanner
// is used only for single chars with case insensitivity used correctly,
// so it can go straight to the callback if we get this far.
static really_inline
hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
size_t keyOffset, bool is_double, bool noCase,
const struct cb_info *cbi, size_t pos) {
pos -= keyOffset;
if (is_double) {
if (pos + keyLen > len) {
return HWLM_SUCCESS;
}
if (cmpForward(buf + pos, key, keyLen, noCase)) { // ret 1 on mismatch
return HWLM_SUCCESS;
}
}
pos += cbi->offsetAdj;
DEBUG_PRINTF("match @ %zu->%zu\n", pos, (pos + keyLen - 1));
hwlmcb_rv_t rv = cbi->cb(pos, (pos + keyLen - 1), cbi->id, cbi->ctx);
if (rv == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATED;
}
return HWLM_SUCCESS;
}
#if defined(__AVX2__)
#define CHUNKSIZE 32
#define MASK_TYPE m256
#include "noodle_engine_avx2.c"
#else
#define CHUNKSIZE 16
#define MASK_TYPE m128
#include "noodle_engine_sse.c"
#endif
static really_inline
hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key,
bool noCase, const struct cb_info *cbi) {
hwlm_error_t rv;
size_t end = len;
const MASK_TYPE mask1 = getMask(key[0], noCase);
const MASK_TYPE caseMask = getCaseMask();
if (len < CHUNKSIZE) {
rv = scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, len);
return rv;
}
if (len == CHUNKSIZE) {
rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi,
0, len);
return rv;
}
uintptr_t data = (uintptr_t)buf;
uintptr_t s2Start = ROUNDUP_N(data, CHUNKSIZE) - data;
uintptr_t last = data + end;
uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
uintptr_t s3Start = len - CHUNKSIZE;
if (s2Start) {
// first scan out to the fast scan starting point
DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi,
0, s2Start);
RETURN_IF_TERMINATED(rv);
}
if (likely(s2Start != s2End)) {
// scan as far as we can, bounded by the last point this key can
// possibly match
DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s2End);
rv = scanSingleFast(buf, len, key, noCase, caseMask, mask1, cbi,
s2Start, s2End);
RETURN_IF_TERMINATED(rv);
}
// if we are done bail out
if (s2End == end) {
return HWLM_SUCCESS;
}
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, end);
rv = scanSingleUnaligned(buf, len, s3Start, key, noCase, caseMask, mask1,
cbi, s2End, end);
return rv;
}
static really_inline
hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset, bool noCase,
const struct cb_info *cbi) {
hwlm_error_t rv;
// we stop scanning for the key-fragment when the rest of the key can't
// possibly fit in the remaining buffer
size_t end = len - keyLen + keyOffset + 2;
const MASK_TYPE caseMask = getCaseMask();
const MASK_TYPE mask1 = getMask(key[keyOffset + 0], noCase);
const MASK_TYPE mask2 = getMask(key[keyOffset + 1], noCase);
if (end - keyOffset < CHUNKSIZE) {
rv = scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
mask1, mask2, cbi, keyOffset, end);
return rv;
}
if (end - keyOffset == CHUNKSIZE) {
rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset,
noCase, caseMask, mask1, mask2, cbi, keyOffset,
end);
return rv;
}
uintptr_t data = (uintptr_t)buf;
uintptr_t s2Start = ROUNDUP_N(data + keyOffset, CHUNKSIZE) - data;
uintptr_t s1End = s2Start + 1;
uintptr_t last = data + end;
uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
uintptr_t s3Start = end - CHUNKSIZE;
uintptr_t off = keyOffset;
if (s2Start != keyOffset) {
// first scan out to the fast scan starting point plus one char past to
// catch the key on the overlap
DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset,
noCase, caseMask, mask1, mask2, cbi, off,
s1End);
RETURN_IF_TERMINATED(rv);
}
off = s1End;
if (s2Start >= end) {
DEBUG_PRINTF("s2 == mL %zu\n", end);
return HWLM_SUCCESS;
}
if (likely(s2Start != s2End)) {
// scan as far as we can, bounded by the last point this key can
// possibly match
DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s3Start);
rv = scanDoubleFast(buf, len, key, keyLen, keyOffset, noCase, caseMask,
mask1, mask2, cbi, s2Start, s2End);
RETURN_IF_TERMINATED(rv);
off = s2End;
}
// if there isn't enough data left to match the key, bail out
if (s2End == end) {
return HWLM_SUCCESS;
}
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s3Start, end);
rv = scanDoubleUnaligned(buf, len, s3Start, key, keyLen, keyOffset, noCase,
caseMask, mask1, mask2, cbi, off, end);
return rv;
}
static really_inline
hwlm_error_t scanSingleNoCase(const u8 *buf, size_t len, const u8 *key,
const struct cb_info *cbi) {
return scanSingleMain(buf, len, key, 1, cbi);
}
static really_inline
hwlm_error_t scanSingleCase(const u8 *buf, size_t len, const u8 *key,
const struct cb_info *cbi) {
return scanSingleMain(buf, len, key, 0, cbi);
}
// Single-character specialisation, used when keyLen = 1
static really_inline
hwlm_error_t scanSingle(const u8 *buf, size_t len, const u8 *key, bool noCase,
const struct cb_info *cbi) {
if (!ourisalpha(key[0])) {
noCase = 0; // force noCase off if we don't have an alphabetic char
}
// kinda ugly, but this forces constant propagation
if (noCase) {
return scanSingleNoCase(buf, len, key, cbi);
} else {
return scanSingleCase(buf, len, key, cbi);
}
}
static really_inline
hwlm_error_t scanDoubleNoCase(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset,
const struct cb_info *cbi) {
return scanDoubleMain(buf, len, key, keyLen, keyOffset, 1, cbi);
}
static really_inline
hwlm_error_t scanDoubleCase(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset,
const struct cb_info *cbi) {
return scanDoubleMain(buf, len, key, keyLen, keyOffset, 0, cbi);
}
static really_inline
hwlm_error_t scanDouble(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
size_t keyOffset, bool noCase,
const struct cb_info *cbi) {
// kinda ugly, but this forces constant propagation
if (noCase) {
return scanDoubleNoCase(buf, len, key, keyLen, keyOffset, cbi);
} else {
return scanDoubleCase(buf, len, key, keyLen, keyOffset, cbi);
}
}
// main entry point for the scan code
static really_inline
hwlm_error_t scan(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
size_t keyOffset, bool noCase, const struct cb_info *cbi) {
if (len < keyLen) {
// can't find string of length keyLen in a shorter buffer
return HWLM_SUCCESS;
}
if (keyLen == 1) {
assert(keyOffset == 0);
return scanSingle(buf, len, key, noCase, cbi);
} else {
return scanDouble(buf, len, key, keyLen, keyOffset, noCase, cbi);
}
}
/** \brief Block-mode scanner. */
hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
size_t offset_adj, HWLMCallback cb, void *ctxt) {
assert(n && buf);
struct cb_info cbi = { cb, n->id, ctxt, offset_adj };
DEBUG_PRINTF("nood scan of %zu bytes for %*s\n", len, n->len, n->str);
return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi);
}
/** \brief Streaming-mode scanner. */
hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
HWLMCallback cb, void *ctxt, u8 *temp_buf,
UNUSED size_t temp_buffer_size) {
assert(n);
struct cb_info cbi = {cb, n->id, ctxt, 0};
hwlm_error_t rv;
if (hlen) {
assert(hbuf);
size_t tl1 = MIN(n->len - 1, hlen);
size_t tl2 = MIN(n->len - 1, len);
size_t temp_len = tl1 + tl2;
assert(temp_len < temp_buffer_size);
memcpy(temp_buf, hbuf + hlen - tl1, tl1);
memcpy(temp_buf + tl1, buf, tl2);
cbi.offsetAdj = -tl1;
rv = scan(temp_buf, temp_len, n->str, n->len, n->key_offset, n->nocase,
&cbi);
if (rv == HWLM_TERMINATED) {
return HWLM_TERMINATED;
}
}
assert(buf);
cbi.offsetAdj = 0;
return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi);
}

Some files were not shown because too many files have changed in this diff Show More