mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Initial commit of Hyperscan
This commit is contained in:
commit
904e436f11
6
.clang-format
Normal file
6
.clang-format
Normal file
@ -0,0 +1,6 @@
|
||||
BasedOnStyle: LLVM
|
||||
IndentWidth: 4
|
||||
UseTab: false
|
||||
AllowShortIfStatementsOnASingleLine: false
|
||||
IndentCaseLabels: false
|
||||
AccessModifierOffset: -4
|
103
.gitignore
vendored
Normal file
103
.gitignore
vendored
Normal file
@ -0,0 +1,103 @@
|
||||
##
|
||||
## There are some more .gitignore files in subdirs, but this is the main place
|
||||
## to add new entries. These are mostly for the common case when ue2 is built
|
||||
## in place
|
||||
##
|
||||
|
||||
# Autogenerated stuff that we don't want to know about
|
||||
.deps
|
||||
autom4te.cache
|
||||
autojunk
|
||||
.dirstamp
|
||||
|
||||
# Temp and swap files
|
||||
*~
|
||||
.*.swp
|
||||
.sw?
|
||||
|
||||
# compiler output and binaries
|
||||
*.a
|
||||
*.o
|
||||
*.lo
|
||||
*.la
|
||||
*.so
|
||||
*.pyc
|
||||
.libs
|
||||
bin
|
||||
|
||||
# Merge files created by git.
|
||||
*.orig
|
||||
|
||||
# sigs dir is handled externally
|
||||
signatures
|
||||
|
||||
# ignore pcre symlink if it exists
|
||||
pcre
|
||||
# but not pcre subdirs!
|
||||
!pcre/
|
||||
|
||||
# ignore boost symlink if it exists
|
||||
include/boost
|
||||
|
||||
# ignore sqlite3 symlink if it exists
|
||||
sqlite3
|
||||
|
||||
# Generated files
|
||||
src/config.h
|
||||
src/config.h.in
|
||||
src/hs_version.h
|
||||
src/fdr/fdr_autogen.c
|
||||
src/fdr/fdr_autogen_compiler.cpp
|
||||
src/fdr/teddy_autogen.c
|
||||
src/fdr/teddy_autogen_compiler.cpp
|
||||
src/parser/Parser.cpp
|
||||
|
||||
# Generated PCRE files
|
||||
pcre/pcre_chartables.c
|
||||
pcre/pcregrep
|
||||
pcre/pcretest
|
||||
|
||||
# Autoconf/automake/libtool noise
|
||||
Makefile
|
||||
Makefile.in
|
||||
aclocal.m4
|
||||
config.cache
|
||||
config.log
|
||||
config.status
|
||||
configure
|
||||
libhs.pc
|
||||
libtool
|
||||
m4/libtool.m4
|
||||
m4/ltoptions.m4
|
||||
m4/ltsugar.m4
|
||||
m4/ltversion.m4
|
||||
m4/lt~obsolete.m4
|
||||
src/stamp-h1
|
||||
|
||||
# Docs
|
||||
!doc/dev-reference/Makefile # not generated
|
||||
doc/dev-reference/doxygen_sqlite3.db
|
||||
doc/dev-reference/doxygen_xml/
|
||||
doc/dev-reference/_build/
|
||||
|
||||
# Autotools noise in pcre
|
||||
pcre/INSTALL
|
||||
pcre/Makefile
|
||||
pcre/Makefile.in
|
||||
pcre/aclocal.m4
|
||||
pcre/ar-lib
|
||||
pcre/compile
|
||||
pcre/config.*
|
||||
pcre/configure
|
||||
pcre/depcomp
|
||||
pcre/install-sh
|
||||
pcre/*.pc
|
||||
pcre/libtool
|
||||
pcre/ltmain.sh
|
||||
pcre/missing
|
||||
pcre/pcre-config
|
||||
pcre/pcre.h
|
||||
pcre/pcre_stringpiece.h
|
||||
pcre/pcrecpparg.h
|
||||
pcre/stamp-h1
|
||||
pcre/test-driver
|
944
CMakeLists.txt
Normal file
944
CMakeLists.txt
Normal file
@ -0,0 +1,944 @@
|
||||
cmake_minimum_required (VERSION 2.8)
|
||||
project (Hyperscan C CXX)
|
||||
|
||||
set (HS_MAJOR_VERSION 4)
|
||||
set (HS_MINOR_VERSION 0)
|
||||
set (HS_PATCH_VERSION 0)
|
||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||
|
||||
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
|
||||
include(CheckCCompilerFlag)
|
||||
include(CheckCXXCompilerFlag)
|
||||
INCLUDE (CheckFunctionExists)
|
||||
INCLUDE (CheckIncludeFiles)
|
||||
INCLUDE (CheckIncludeFileCXX)
|
||||
INCLUDE (CheckLibraryExists)
|
||||
INCLUDE (CheckSymbolExists)
|
||||
include (CMakeDependentOption)
|
||||
include (${CMAKE_MODULE_PATH}/platform.cmake)
|
||||
include (${CMAKE_MODULE_PATH}/ragel.cmake)
|
||||
|
||||
find_package(PkgConfig QUIET)
|
||||
|
||||
if (NOT CMAKE_BUILD_TYPE)
|
||||
message(STATUS "Default build type 'Release with debug info'")
|
||||
set(CMAKE_BUILD_TYPE "RELWITHDEBINFO")
|
||||
else()
|
||||
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
|
||||
message(STATUS "Build type ${CMAKE_BUILD_TYPE}")
|
||||
endif()
|
||||
|
||||
if(CMAKE_BUILD_TYPE MATCHES RELEASE|RELWITHDEBINFO)
|
||||
set(RELEASE_BUILD TRUE)
|
||||
else()
|
||||
set(RELEASE_BUILD FALSE)
|
||||
endif()
|
||||
|
||||
set(BINDIR ${PROJECT_BINARY_DIR}/bin)
|
||||
set(LIBDIR ${PROJECT_BINARY_DIR}/lib)
|
||||
|
||||
# First for the generic no-config case
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${BINDIR})
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LIBDIR})
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LIBDIR})
|
||||
# Second, for multi-config builds (e.g. msvc)
|
||||
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
|
||||
string (TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG)
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${BINDIR})
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${LIBDIR})
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${LIBDIR})
|
||||
endforeach (OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES)
|
||||
|
||||
|
||||
if(CMAKE_GENERATOR STREQUAL Xcode)
|
||||
set(XCODE TRUE)
|
||||
endif()
|
||||
|
||||
include_directories(src .)
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
include_directories(SYSTEM include)
|
||||
|
||||
set(BOOST_USE_STATIC_LIBS OFF)
|
||||
set(BOOST_USE_MULTITHREADED OFF)
|
||||
set(BOOST_USE_STATIC_RUNTIME OFF)
|
||||
set(BOOST_MINVERSION 1.57.0)
|
||||
set(BOOST_NO_BOOST_CMAKE ON)
|
||||
|
||||
# first check for Boost installed on the system
|
||||
find_package(Boost ${BOOST_MINVERSION})
|
||||
if(NOT Boost_FOUND)
|
||||
# we might have boost in tree, so provide a hint and try again
|
||||
message(STATUS "trying include dir for boost")
|
||||
set(BOOST_INCLUDEDIR ${CMAKE_SOURCE_DIR}/include)
|
||||
find_package(Boost ${BOOST_MINVERSION})
|
||||
if(NOT Boost_FOUND)
|
||||
message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system pacakges if available or extract Boost headers to ${CMAKE_SOURCE_DIR}/include")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
|
||||
find_package(PythonInterp)
|
||||
find_program(RAGEL ragel)
|
||||
|
||||
if(PYTHONINTERP_FOUND)
|
||||
set(PYTHON ${PYTHON_EXECUTABLE})
|
||||
else()
|
||||
message(FATAL_ERROR "No python interpreter found")
|
||||
endif()
|
||||
|
||||
option(OPTIMISE "Turns off compiler optimizations (on by default unless debug output enabled or coverage testing)" TRUE)
|
||||
|
||||
option(DEBUG_OUTPUT "Enable debug output (warning: very verbose)" FALSE)
|
||||
|
||||
if(DEBUG_OUTPUT)
|
||||
add_definitions(-DDEBUG)
|
||||
set(OPTIMISE FALSE)
|
||||
endif(DEBUG_OUTPUT)
|
||||
|
||||
option(BUILD_SHARED_LIBS "Build shared libs instead of static" OFF)
|
||||
option(BUILD_STATIC_AND_SHARED "Build shared libs as well as static" OFF)
|
||||
|
||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
if (WIN32)
|
||||
message(FATAL_ERROR "Windows DLLs currently not supported")
|
||||
else()
|
||||
message(STATUS "Building shared libraries")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
#for config
|
||||
set(HS_OPTIMIZE OPTIMISE)
|
||||
|
||||
CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in release builds" ON "NOT RELEASE_BUILD" OFF)
|
||||
|
||||
option(DISABLE_ASSERTS "Disable assert(); enabled in debug builds, disabled in release builds" FALSE)
|
||||
|
||||
if (DISABLE_ASSERTS)
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "DEBUG")
|
||||
add_definitions(-DNDEBUG)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF)
|
||||
|
||||
# TODO: per platform config files?
|
||||
|
||||
# TODO: windows generator on cmake always uses msvc, even if we plan to build with icc
|
||||
if(MSVC OR MSVC_IDE)
|
||||
message(STATUS "Building for Windows")
|
||||
if (MSVC_VERSION LESS 1700)
|
||||
message(FATAL_ERROR "The project requires C++11 features.")
|
||||
else()
|
||||
if (WINDOWS_ICC)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /O3 /wd4267 /Qdiag-disable:remark")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /O2 /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
||||
else()
|
||||
#TODO: don't hardcode arch
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /O2 /wd4267")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /O2 /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
||||
endif()
|
||||
string(REGEX REPLACE "/RTC1" ""
|
||||
CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" )
|
||||
string(REGEX REPLACE "/RTC1" ""
|
||||
CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}" )
|
||||
|
||||
endif()
|
||||
|
||||
else()
|
||||
|
||||
# compiler version checks TODO: test more compilers
|
||||
if (CMAKE_COMPILER_IS_GNUCXX)
|
||||
set (GNUCXX_MINVER "4.8.1")
|
||||
exec_program(${CMAKE_CXX_COMPILER}
|
||||
ARGS ${CMAKE_CXX_COMPILER_ARG1} --version
|
||||
OUTPUT_VARIABLE _GXX_OUTPUT)
|
||||
# is the following too fragile?
|
||||
string(REGEX REPLACE ".* ([0-9]\\.[0-9](\\.[0-9])?)( |\n).*" "\\1"
|
||||
GNUCXX_VERSION "${_GXX_OUTPUT}")
|
||||
message(STATUS "g++ version ${GNUCXX_VERSION}")
|
||||
if (GNUCXX_VERSION VERSION_LESS ${GNUCXX_MINVER})
|
||||
message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++11 support")
|
||||
endif()
|
||||
unset(_GXX_OUTPUT)
|
||||
endif()
|
||||
|
||||
# set compiler flags - more are tested and added later
|
||||
set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual -Werror")
|
||||
set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Werror -Wno-shadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor")
|
||||
|
||||
if (NOT CMAKE_C_FLAGS MATCHES .*march.*)
|
||||
message(STATUS "Building for current host CPU")
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -march=native -mtune=native")
|
||||
endif()
|
||||
if (NOT CMAKE_CXX_FLAGS MATCHES .*march.*)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -march=native -mtune=native")
|
||||
endif()
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
# spurious warnings?
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-array-bounds -Wno-maybe-uninitialized")
|
||||
endif()
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0 -Wno-unused-local-typedefs -Wno-maybe-uninitialized")
|
||||
endif()
|
||||
|
||||
if(OPTIMISE)
|
||||
set(EXTRA_C_FLAGS "-O3 ${EXTRA_C_FLAGS}")
|
||||
set(EXTRA_CXX_FLAGS "-O2 ${EXTRA_CXX_FLAGS}")
|
||||
else()
|
||||
set(EXTRA_C_FLAGS "-O0 ${EXTRA_C_FLAGS}")
|
||||
set(EXTRA_CXX_FLAGS "-O0 ${EXTRA_CXX_FLAGS}")
|
||||
endif(OPTIMISE)
|
||||
|
||||
if(NOT RELEASE_BUILD)
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H)
|
||||
CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H)
|
||||
CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H)
|
||||
CHECK_INCLUDE_FILES(tmmintrin.h HAVE_TMMINTRIN_H)
|
||||
CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H)
|
||||
CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H)
|
||||
|
||||
CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN)
|
||||
CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC)
|
||||
|
||||
# these end up in the config file
|
||||
CHECK_C_COMPILER_FLAG(-fvisibility=hidden HAS_C_HIDDEN)
|
||||
CHECK_CXX_COMPILER_FLAG(-fvisibility=hidden HAS_CXX_HIDDEN)
|
||||
|
||||
# testing a builtin takes a little more work
|
||||
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
|
||||
CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED)
|
||||
|
||||
if (NOT WIN32)
|
||||
set(C_FLAGS_TO_CHECK
|
||||
# Variable length arrays are way bad, most especially at run time
|
||||
"-Wvla"
|
||||
# Pointer arith on void pointers is doing it wong.
|
||||
"-Wpointer-arith"
|
||||
# Build our C code with -Wstrict-prototypes -Wmissing-prototypes
|
||||
"-Wstrict-prototypes"
|
||||
"-Wmissing-prototypes"
|
||||
)
|
||||
foreach (FLAG ${C_FLAGS_TO_CHECK})
|
||||
# munge the name so it doesn't break things
|
||||
string(REPLACE "-" "_" FNAME C_FLAG${FLAG})
|
||||
CHECK_C_COMPILER_FLAG("${FLAG}" ${FNAME})
|
||||
if (${FNAME})
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} ${FLAG}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(CXX_FLAGS_TO_CHECK
|
||||
"-Wvla"
|
||||
"-Wpointer-arith"
|
||||
)
|
||||
foreach (FLAG ${CXX_FLAGS_TO_CHECK})
|
||||
string(REPLACE "-" "_" FNAME CXX_FLAG${FLAG})
|
||||
CHECK_CXX_COMPILER_FLAG("${FLAG}" ${FNAME})
|
||||
if (${FNAME})
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} ${FLAG}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# self-assign should be thrown away, but clang whinges
|
||||
CHECK_C_COMPILER_FLAG("-Wself-assign" CC_SELF_ASSIGN)
|
||||
if (CC_SELF_ASSIGN)
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-self-assign")
|
||||
endif()
|
||||
CHECK_CXX_COMPILER_FLAG("-Wself-assign" CXX_SELF_ASSIGN)
|
||||
if (CXX_SELF_ASSIGN)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-self-assign")
|
||||
endif()
|
||||
|
||||
# clang gets up in our face for going paren crazy with macros
|
||||
CHECK_C_COMPILER_FLAG("-Wparentheses-equality" CC_PAREN_EQUALITY)
|
||||
if (CC_PAREN_EQUALITY)
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-parentheses-equality")
|
||||
endif()
|
||||
|
||||
# clang compains about unused const vars in our Ragel-generated code.
|
||||
CHECK_CXX_COMPILER_FLAG("-Wunused-const-variable" CXX_UNUSED_CONST_VAR)
|
||||
if (CXX_UNUSED_CONST_VAR)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
|
||||
endif()
|
||||
|
||||
|
||||
# note this for later
|
||||
# g++ doesn't have this flag but clang does
|
||||
CHECK_CXX_COMPILER_FLAG("-Wweak-vtables" CXX_WEAK_VTABLES)
|
||||
if (CXX_WEAK_VTABLES)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wweak-vtables")
|
||||
endif()
|
||||
|
||||
CHECK_CXX_COMPILER_FLAG("-Wmissing-declarations" CXX_MISSING_DECLARATIONS)
|
||||
if (CXX_MISSING_DECLARATIONS)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wmissing-declarations")
|
||||
endif()
|
||||
|
||||
# gcc5 complains about this
|
||||
CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
|
||||
|
||||
endif()
|
||||
|
||||
if (NOT XCODE)
|
||||
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
|
||||
else()
|
||||
# cmake doesn't think Xcode supports isystem
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${Boost_INCLUDE_DIR}")
|
||||
endif()
|
||||
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
set(LINUX TRUE)
|
||||
endif(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
set(FREEBSD true)
|
||||
endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
|
||||
if(NOT WIN32)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -diag-error 10006 -diag-disable 177 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable=remark")
|
||||
endif()
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-error 10006 -diag-disable 177 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 1170 -diag-disable 3373 -diag-disable=remark")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_subdirectory(util)
|
||||
add_subdirectory(unit)
|
||||
add_subdirectory(doc/dev-reference)
|
||||
if (EXISTS ${CMAKE_SOURCE_DIR}/tools)
|
||||
add_subdirectory(tools)
|
||||
endif()
|
||||
|
||||
# do substitutions
|
||||
configure_file(${CMAKE_MODULE_PATH}/config.h.in ${CMAKE_BINARY_DIR}/config.h)
|
||||
configure_file(src/hs_version.h.in hs_version.h)
|
||||
|
||||
if (PKG_CONFIG_FOUND)
|
||||
# we really only need to do this if we have pkg-config
|
||||
configure_file(libhs.pc.in libhs.pc @ONLY) # only replace @ quoted vars
|
||||
install(FILES ${CMAKE_BINARY_DIR}/libhs.pc
|
||||
DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/pkgconfig")
|
||||
endif()
|
||||
|
||||
# only set these after all tests are done
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||
|
||||
|
||||
# include the autogen targets
|
||||
add_subdirectory(src/fdr)
|
||||
|
||||
include_directories(${CMAKE_BINARY_DIR}/src/fdr)
|
||||
|
||||
if(NOT WIN32)
|
||||
set(RAGEL_C_FLAGS "-Wno-unused")
|
||||
endif()
|
||||
|
||||
set_source_files_properties(
|
||||
${CMAKE_BINARY_DIR}/src/parser/Parser.cpp
|
||||
PROPERTIES
|
||||
COMPILE_FLAGS "${RAGEL_C_FLAGS}")
|
||||
|
||||
ragelmaker(src/parser/Parser.rl)
|
||||
|
||||
SET(hs_HEADERS
|
||||
src/hs.h
|
||||
src/hs_common.h
|
||||
src/hs_compile.h
|
||||
src/hs_runtime.h
|
||||
)
|
||||
install(FILES ${hs_HEADERS} DESTINATION include/hs)
|
||||
|
||||
set(fdr_autogen_targets autogen_runtime autogen_teddy_runtime)
|
||||
|
||||
set (hs_exec_SRCS
|
||||
${hs_HEADERS}
|
||||
src/hs_version.h
|
||||
src/ue2common.h
|
||||
src/alloc.c
|
||||
src/allocator.h
|
||||
src/runtime.c
|
||||
src/fdr/fdr.c
|
||||
src/fdr/fdr.h
|
||||
src/fdr/fdr_internal.h
|
||||
src/fdr/fdr_confirm.h
|
||||
src/fdr/fdr_confirm_runtime.h
|
||||
src/fdr/fdr_streaming_runtime.h
|
||||
src/fdr/flood_runtime.h
|
||||
src/fdr/fdr_loadval.h
|
||||
src/fdr/teddy.c
|
||||
src/fdr/teddy_internal.h
|
||||
src/hwlm/hwlm.c
|
||||
src/hwlm/hwlm.h
|
||||
src/hwlm/hwlm_internal.h
|
||||
src/hwlm/noodle_engine.c
|
||||
src/hwlm/noodle_engine.h
|
||||
src/hwlm/noodle_internal.h
|
||||
src/nfa/accel.c
|
||||
src/nfa/accel.h
|
||||
src/nfa/castle.c
|
||||
src/nfa/castle.h
|
||||
src/nfa/castle_internal.h
|
||||
src/nfa/gough.c
|
||||
src/nfa/gough_internal.h
|
||||
src/nfa/lbr.c
|
||||
src/nfa/lbr.h
|
||||
src/nfa/lbr_common_impl.h
|
||||
src/nfa/lbr_internal.h
|
||||
src/nfa/mcclellan.c
|
||||
src/nfa/mcclellan.h
|
||||
src/nfa/mcclellan_common_impl.h
|
||||
src/nfa/mcclellan_internal.h
|
||||
src/nfa/limex_accel.c
|
||||
src/nfa/limex_accel.h
|
||||
src/nfa/limex_exceptional.h
|
||||
src/nfa/limex_native.c
|
||||
src/nfa/limex_ring.h
|
||||
src/nfa/limex_simd128.c
|
||||
src/nfa/limex_simd256.c
|
||||
src/nfa/limex_simd384.c
|
||||
src/nfa/limex_simd512a.c
|
||||
src/nfa/limex_simd512b.c
|
||||
src/nfa/limex_simd512c.c
|
||||
src/nfa/limex.h
|
||||
src/nfa/limex_common_impl.h
|
||||
src/nfa/limex_context.h
|
||||
src/nfa/limex_internal.h
|
||||
src/nfa/limex_runtime.h
|
||||
src/nfa/limex_runtime_impl.h
|
||||
src/nfa/limex_state_impl.h
|
||||
src/nfa/mpv.h
|
||||
src/nfa/mpv.c
|
||||
src/nfa/mpv_internal.h
|
||||
src/nfa/nfa_api.h
|
||||
src/nfa/nfa_api_dispatch.c
|
||||
src/nfa/nfa_internal.h
|
||||
src/nfa/nfa_rev_api.h
|
||||
src/nfa/repeat.c
|
||||
src/nfa/repeat.h
|
||||
src/nfa/repeat_internal.h
|
||||
src/nfa/shufti.c
|
||||
src/nfa/shufti.h
|
||||
src/nfa/truffle.c
|
||||
src/nfa/truffle.h
|
||||
src/nfa/vermicelli.h
|
||||
src/nfa/vermicelli_run.h
|
||||
src/nfa/vermicelli_sse.h
|
||||
src/sidecar/sidecar.c
|
||||
src/sidecar/sidecar.h
|
||||
src/sidecar/sidecar_generic.h
|
||||
src/sidecar/sidecar_internal.h
|
||||
src/sidecar/sidecar_shufti.c
|
||||
src/sidecar/sidecar_shufti.h
|
||||
src/som/som.h
|
||||
src/som/som_runtime.h
|
||||
src/som/som_runtime.c
|
||||
src/som/som_stream.c
|
||||
src/som/som_stream.h
|
||||
src/rose/block.c
|
||||
src/rose/catchup.h
|
||||
src/rose/catchup.c
|
||||
src/rose/eod.c
|
||||
src/rose/infix.h
|
||||
src/rose/init.h
|
||||
src/rose/init.c
|
||||
src/rose/stream.c
|
||||
src/rose/match.h
|
||||
src/rose/match.c
|
||||
src/rose/miracle.h
|
||||
src/rose/runtime.h
|
||||
src/rose/rose_sidecar_runtime.h
|
||||
src/rose/rose.h
|
||||
src/rose/rose_internal.h
|
||||
src/rose/rose_types.h
|
||||
src/rose/rose_common.h
|
||||
src/util/bitutils.h
|
||||
src/util/exhaust.h
|
||||
src/util/fatbit.h
|
||||
src/util/fatbit.c
|
||||
src/util/join.h
|
||||
src/util/masked_move.c
|
||||
src/util/masked_move.h
|
||||
src/util/multibit.h
|
||||
src/util/multibit_internal.h
|
||||
src/util/multibit.c
|
||||
src/util/pack_bits.h
|
||||
src/util/popcount.h
|
||||
src/util/pqueue.h
|
||||
src/util/scatter.h
|
||||
src/util/scatter_runtime.h
|
||||
src/util/shuffle.h
|
||||
src/util/shuffle_ssse3.h
|
||||
src/util/simd_utils.h
|
||||
src/util/simd_utils_ssse3.h
|
||||
src/util/state_compress.h
|
||||
src/util/state_compress.c
|
||||
src/util/unaligned.h
|
||||
src/util/uniform_ops.h
|
||||
src/scratch.h
|
||||
src/scratch.c
|
||||
src/crc32.c
|
||||
src/crc32.h
|
||||
src/database.c
|
||||
src/database.h
|
||||
)
|
||||
|
||||
|
||||
SET (hs_SRCS
|
||||
${hs_HEADERS}
|
||||
src/crc32.h
|
||||
src/database.h
|
||||
src/grey.cpp
|
||||
src/grey.h
|
||||
src/hs.cpp
|
||||
src/hs_internal.h
|
||||
src/hs_version.c
|
||||
src/hs_version.h
|
||||
src/scratch.h
|
||||
src/state.h
|
||||
src/ue2common.h
|
||||
src/compiler/asserts.cpp
|
||||
src/compiler/asserts.h
|
||||
src/compiler/compiler.cpp
|
||||
src/compiler/compiler.h
|
||||
src/compiler/error.cpp
|
||||
src/compiler/error.h
|
||||
src/fdr/engine_description.cpp
|
||||
src/fdr/engine_description.h
|
||||
src/fdr/fdr_compile.cpp
|
||||
src/fdr/fdr_compile.h
|
||||
src/fdr/fdr_compile_internal.h
|
||||
src/fdr/fdr_compile_util.cpp
|
||||
src/fdr/fdr_confirm_compile.cpp
|
||||
src/fdr/fdr_confirm.h
|
||||
src/fdr/fdr_engine_description.cpp
|
||||
src/fdr/fdr_engine_description.h
|
||||
src/fdr/fdr_internal.h
|
||||
src/fdr/fdr_streaming_compile.cpp
|
||||
src/fdr/fdr_streaming_internal.h
|
||||
src/fdr/flood_compile.cpp
|
||||
src/fdr/teddy_compile.cpp
|
||||
src/fdr/teddy_compile.h
|
||||
src/fdr/teddy_engine_description.cpp
|
||||
src/fdr/teddy_engine_description.h
|
||||
src/fdr/teddy_internal.h
|
||||
src/hwlm/hwlm_build.cpp
|
||||
src/hwlm/hwlm_build.h
|
||||
src/hwlm/hwlm_internal.h
|
||||
src/hwlm/hwlm_literal.cpp
|
||||
src/hwlm/hwlm_literal.h
|
||||
src/hwlm/noodle_build.cpp
|
||||
src/hwlm/noodle_build.h
|
||||
src/hwlm/noodle_internal.h
|
||||
src/nfa/accel.h
|
||||
src/nfa/accelcompile.cpp
|
||||
src/nfa/accelcompile.h
|
||||
src/nfa/callback.h
|
||||
src/nfa/castlecompile.cpp
|
||||
src/nfa/castlecompile.h
|
||||
src/nfa/dfa_min.cpp
|
||||
src/nfa/dfa_min.h
|
||||
src/nfa/goughcompile.cpp
|
||||
src/nfa/goughcompile.h
|
||||
src/nfa/goughcompile_accel.cpp
|
||||
src/nfa/goughcompile_internal.h
|
||||
src/nfa/goughcompile_reg.cpp
|
||||
src/nfa/mcclellan.h
|
||||
src/nfa/mcclellan_internal.h
|
||||
src/nfa/mcclellancompile.cpp
|
||||
src/nfa/mcclellancompile.h
|
||||
src/nfa/mcclellancompile_util.cpp
|
||||
src/nfa/mcclellancompile_util.h
|
||||
src/nfa/limex_compile.cpp
|
||||
src/nfa/limex_compile.h
|
||||
src/nfa/limex_accel.h
|
||||
src/nfa/limex_internal.h
|
||||
src/nfa/mpv_internal.h
|
||||
src/nfa/mpvcompile.cpp
|
||||
src/nfa/mpvcompile.h
|
||||
src/nfa/nfa_api.h
|
||||
src/nfa/nfa_api_queue.h
|
||||
src/nfa/nfa_api_util.h
|
||||
src/nfa/nfa_build_util.cpp
|
||||
src/nfa/nfa_build_util.h
|
||||
src/nfa/nfa_internal.h
|
||||
src/nfa/nfa_kind.h
|
||||
src/nfa/rdfa.h
|
||||
src/nfa/rdfa_merge.cpp
|
||||
src/nfa/rdfa_merge.h
|
||||
src/nfa/repeat_internal.h
|
||||
src/nfa/repeatcompile.cpp
|
||||
src/nfa/repeatcompile.h
|
||||
src/nfa/shufticompile.cpp
|
||||
src/nfa/shufticompile.h
|
||||
src/nfa/trufflecompile.cpp
|
||||
src/nfa/trufflecompile.h
|
||||
src/nfagraph/ng.cpp
|
||||
src/nfagraph/ng.h
|
||||
src/nfagraph/ng_anchored_acyclic.cpp
|
||||
src/nfagraph/ng_anchored_acyclic.h
|
||||
src/nfagraph/ng_anchored_dots.cpp
|
||||
src/nfagraph/ng_anchored_dots.h
|
||||
src/nfagraph/ng_asserts.cpp
|
||||
src/nfagraph/ng_asserts.h
|
||||
src/nfagraph/ng_builder.cpp
|
||||
src/nfagraph/ng_builder.h
|
||||
src/nfagraph/ng_calc_components.cpp
|
||||
src/nfagraph/ng_calc_components.h
|
||||
src/nfagraph/ng_cyclic_redundancy.cpp
|
||||
src/nfagraph/ng_cyclic_redundancy.h
|
||||
src/nfagraph/ng_depth.cpp
|
||||
src/nfagraph/ng_depth.h
|
||||
src/nfagraph/ng_dominators.cpp
|
||||
src/nfagraph/ng_dominators.h
|
||||
src/nfagraph/ng_edge_redundancy.cpp
|
||||
src/nfagraph/ng_edge_redundancy.h
|
||||
src/nfagraph/ng_equivalence.cpp
|
||||
src/nfagraph/ng_equivalence.h
|
||||
src/nfagraph/ng_execute.cpp
|
||||
src/nfagraph/ng_execute.h
|
||||
src/nfagraph/ng_expr_info.cpp
|
||||
src/nfagraph/ng_expr_info.h
|
||||
src/nfagraph/ng_extparam.cpp
|
||||
src/nfagraph/ng_extparam.h
|
||||
src/nfagraph/ng_fixed_width.cpp
|
||||
src/nfagraph/ng_fixed_width.h
|
||||
src/nfagraph/ng_graph.h
|
||||
src/nfagraph/ng_haig.cpp
|
||||
src/nfagraph/ng_haig.h
|
||||
src/nfagraph/ng_holder.cpp
|
||||
src/nfagraph/ng_holder.h
|
||||
src/nfagraph/ng_is_equal.cpp
|
||||
src/nfagraph/ng_is_equal.h
|
||||
src/nfagraph/ng_lbr.cpp
|
||||
src/nfagraph/ng_lbr.h
|
||||
src/nfagraph/ng_literal_analysis.cpp
|
||||
src/nfagraph/ng_literal_analysis.h
|
||||
src/nfagraph/ng_literal_component.cpp
|
||||
src/nfagraph/ng_literal_component.h
|
||||
src/nfagraph/ng_literal_decorated.cpp
|
||||
src/nfagraph/ng_literal_decorated.h
|
||||
src/nfagraph/ng_mcclellan.cpp
|
||||
src/nfagraph/ng_mcclellan.h
|
||||
src/nfagraph/ng_mcclellan_internal.h
|
||||
src/nfagraph/ng_limex.cpp
|
||||
src/nfagraph/ng_limex.h
|
||||
src/nfagraph/ng_limex_accel.cpp
|
||||
src/nfagraph/ng_limex_accel.h
|
||||
src/nfagraph/ng_misc_opt.cpp
|
||||
src/nfagraph/ng_misc_opt.h
|
||||
src/nfagraph/ng_netflow.cpp
|
||||
src/nfagraph/ng_netflow.h
|
||||
src/nfagraph/ng_prefilter.cpp
|
||||
src/nfagraph/ng_prefilter.h
|
||||
src/nfagraph/ng_prune.cpp
|
||||
src/nfagraph/ng_prune.h
|
||||
src/nfagraph/ng_puff.cpp
|
||||
src/nfagraph/ng_puff.h
|
||||
src/nfagraph/ng_redundancy.cpp
|
||||
src/nfagraph/ng_redundancy.h
|
||||
src/nfagraph/ng_region.cpp
|
||||
src/nfagraph/ng_region.h
|
||||
src/nfagraph/ng_region_redundancy.cpp
|
||||
src/nfagraph/ng_region_redundancy.h
|
||||
src/nfagraph/ng_repeat.cpp
|
||||
src/nfagraph/ng_repeat.h
|
||||
src/nfagraph/ng_reports.cpp
|
||||
src/nfagraph/ng_reports.h
|
||||
src/nfagraph/ng_restructuring.cpp
|
||||
src/nfagraph/ng_restructuring.h
|
||||
src/nfagraph/ng_revacc.cpp
|
||||
src/nfagraph/ng_revacc.h
|
||||
src/nfagraph/ng_rose.cpp
|
||||
src/nfagraph/ng_rose.h
|
||||
src/nfagraph/ng_sep.cpp
|
||||
src/nfagraph/ng_sep.h
|
||||
src/nfagraph/ng_small_literal_set.cpp
|
||||
src/nfagraph/ng_small_literal_set.h
|
||||
src/nfagraph/ng_som.cpp
|
||||
src/nfagraph/ng_som.h
|
||||
src/nfagraph/ng_som_add_redundancy.cpp
|
||||
src/nfagraph/ng_som_add_redundancy.h
|
||||
src/nfagraph/ng_som_util.cpp
|
||||
src/nfagraph/ng_som_util.h
|
||||
src/nfagraph/ng_split.cpp
|
||||
src/nfagraph/ng_split.h
|
||||
src/nfagraph/ng_squash.cpp
|
||||
src/nfagraph/ng_squash.h
|
||||
src/nfagraph/ng_stop.cpp
|
||||
src/nfagraph/ng_stop.h
|
||||
src/nfagraph/ng_uncalc_components.cpp
|
||||
src/nfagraph/ng_uncalc_components.h
|
||||
src/nfagraph/ng_undirected.h
|
||||
src/nfagraph/ng_utf8.cpp
|
||||
src/nfagraph/ng_utf8.h
|
||||
src/nfagraph/ng_util.cpp
|
||||
src/nfagraph/ng_util.h
|
||||
src/nfagraph/ng_vacuous.cpp
|
||||
src/nfagraph/ng_vacuous.h
|
||||
src/nfagraph/ng_width.cpp
|
||||
src/nfagraph/ng_width.h
|
||||
src/parser/AsciiComponentClass.cpp
|
||||
src/parser/AsciiComponentClass.h
|
||||
src/parser/Component.cpp
|
||||
src/parser/Component.h
|
||||
src/parser/ComponentAlternation.cpp
|
||||
src/parser/ComponentAlternation.h
|
||||
src/parser/ComponentAssertion.cpp
|
||||
src/parser/ComponentAssertion.h
|
||||
src/parser/ComponentAtomicGroup.cpp
|
||||
src/parser/ComponentAtomicGroup.h
|
||||
src/parser/ComponentBackReference.cpp
|
||||
src/parser/ComponentBackReference.h
|
||||
src/parser/ComponentBoundary.cpp
|
||||
src/parser/ComponentBoundary.h
|
||||
src/parser/ComponentByte.cpp
|
||||
src/parser/ComponentByte.h
|
||||
src/parser/ComponentClass.cpp
|
||||
src/parser/ComponentClass.h
|
||||
src/parser/ComponentCondReference.cpp
|
||||
src/parser/ComponentCondReference.h
|
||||
src/parser/ComponentEUS.cpp
|
||||
src/parser/ComponentEUS.h
|
||||
src/parser/ComponentEmpty.cpp
|
||||
src/parser/ComponentEmpty.h
|
||||
src/parser/ComponentRepeat.cpp
|
||||
src/parser/ComponentRepeat.h
|
||||
src/parser/ComponentSequence.cpp
|
||||
src/parser/ComponentSequence.h
|
||||
src/parser/ComponentVisitor.cpp
|
||||
src/parser/ComponentVisitor.h
|
||||
src/parser/ComponentWordBoundary.cpp
|
||||
src/parser/ComponentWordBoundary.h
|
||||
src/parser/ConstComponentVisitor.cpp
|
||||
src/parser/ConstComponentVisitor.h
|
||||
src/parser/Parser.cpp
|
||||
src/parser/Parser.h
|
||||
src/parser/Utf8ComponentClass.cpp
|
||||
src/parser/Utf8ComponentClass.h
|
||||
src/parser/buildstate.cpp
|
||||
src/parser/buildstate.h
|
||||
src/parser/check_refs.cpp
|
||||
src/parser/check_refs.h
|
||||
src/parser/parse_error.cpp
|
||||
src/parser/parse_error.h
|
||||
src/parser/parser_util.cpp
|
||||
src/parser/position.h
|
||||
src/parser/position_info.h
|
||||
src/parser/prefilter.cpp
|
||||
src/parser/prefilter.h
|
||||
src/parser/shortcut_literal.cpp
|
||||
src/parser/shortcut_literal.h
|
||||
src/parser/ucp_table.cpp
|
||||
src/parser/ucp_table.h
|
||||
src/parser/unsupported.cpp
|
||||
src/parser/unsupported.h
|
||||
src/parser/utf8_validate.h
|
||||
src/parser/utf8_validate.cpp
|
||||
src/sidecar/sidecar_compile.cpp
|
||||
src/sidecar/sidecar_compile.h
|
||||
src/smallwrite/smallwrite_build.cpp
|
||||
src/smallwrite/smallwrite_build.h
|
||||
src/smallwrite/smallwrite_internal.h
|
||||
src/som/slot_manager.cpp
|
||||
src/som/slot_manager.h
|
||||
src/som/slot_manager_internal.h
|
||||
src/som/som.h
|
||||
src/rose/rose_build.h
|
||||
src/rose/rose_build_add.cpp
|
||||
src/rose/rose_build_add_internal.h
|
||||
src/rose/rose_build_add_mask.cpp
|
||||
src/rose/rose_build_anchored.cpp
|
||||
src/rose/rose_build_anchored.h
|
||||
src/rose/rose_build_bytecode.cpp
|
||||
src/rose/rose_build_compile.cpp
|
||||
src/rose/rose_build_convert.cpp
|
||||
src/rose/rose_build_convert.h
|
||||
src/rose/rose_build_impl.h
|
||||
src/rose/rose_build_infix.cpp
|
||||
src/rose/rose_build_infix.h
|
||||
src/rose/rose_build_lookaround.cpp
|
||||
src/rose/rose_build_lookaround.h
|
||||
src/rose/rose_build_merge.cpp
|
||||
src/rose/rose_build_merge.h
|
||||
src/rose/rose_build_misc.cpp
|
||||
src/rose/rose_build_role_aliasing.cpp
|
||||
src/rose/rose_build_scatter.cpp
|
||||
src/rose/rose_build_scatter.h
|
||||
src/rose/rose_build_util.h
|
||||
src/rose/rose_build_width.cpp
|
||||
src/rose/rose_build_width.h
|
||||
src/rose/rose_graph.h
|
||||
src/rose/rose_in_graph.h
|
||||
src/rose/rose_in_util.cpp
|
||||
src/rose/rose_in_util.h
|
||||
src/util/alloc.cpp
|
||||
src/util/alloc.h
|
||||
src/util/bitfield.h
|
||||
src/util/boundary_reports.h
|
||||
src/util/charreach.cpp
|
||||
src/util/charreach.h
|
||||
src/util/charreach_util.h
|
||||
src/util/compare.h
|
||||
src/util/compile_context.cpp
|
||||
src/util/compile_context.h
|
||||
src/util/compile_error.cpp
|
||||
src/util/compile_error.h
|
||||
src/util/container.h
|
||||
src/util/cpuid_flags.c
|
||||
src/util/cpuid_flags.h
|
||||
src/util/depth.cpp
|
||||
src/util/depth.h
|
||||
src/util/determinise.h
|
||||
src/util/dump_mask.cpp
|
||||
src/util/dump_mask.h
|
||||
src/util/graph.h
|
||||
src/util/internal_report.h
|
||||
src/util/multibit_build.cpp
|
||||
src/util/multibit_build.h
|
||||
src/util/order_check.h
|
||||
src/util/partial_store.h
|
||||
src/util/partitioned_set.h
|
||||
src/util/popcount.h
|
||||
src/util/queue_index_factory.h
|
||||
src/util/report.cpp
|
||||
src/util/report.h
|
||||
src/util/report_manager.cpp
|
||||
src/util/report_manager.h
|
||||
src/util/simd_utils.h
|
||||
src/util/simd_utils_ssse3.h
|
||||
src/util/target_info.cpp
|
||||
src/util/target_info.h
|
||||
src/util/ue2_containers.h
|
||||
src/util/ue2string.cpp
|
||||
src/util/ue2string.h
|
||||
src/util/unaligned.h
|
||||
src/util/unicode_def.h
|
||||
src/util/unicode_set.h
|
||||
src/util/uniform_ops.h
|
||||
src/util/verify_types.h
|
||||
)
|
||||
|
||||
set(hs_dump_SRCS
|
||||
src/scratch_dump.cpp
|
||||
src/scratch_dump.h
|
||||
src/fdr/fdr_dump.cpp
|
||||
src/hwlm/hwlm_dump.cpp
|
||||
src/hwlm/hwlm_dump.h
|
||||
src/nfa/accel_dump.cpp
|
||||
src/nfa/accel_dump.h
|
||||
src/nfa/castle_dump.cpp
|
||||
src/nfa/castle_dump.h
|
||||
src/nfagraph/ng_dump.cpp
|
||||
src/nfagraph/ng_dump.h
|
||||
src/nfa/goughcompile_dump.cpp
|
||||
src/nfa/goughcompile_dump.h
|
||||
src/nfa/goughdump.cpp
|
||||
src/nfa/goughdump.h
|
||||
src/nfa/lbr_dump.cpp
|
||||
src/nfa/limex_dump.cpp
|
||||
src/nfa/mcclellandump.cpp
|
||||
src/nfa/mcclellandump.h
|
||||
src/nfa/mpv_dump.cpp
|
||||
src/nfa/nfa_dump_api.h
|
||||
src/nfa/nfa_dump_dispatch.cpp
|
||||
src/nfa/nfa_dump_internal.cpp
|
||||
src/nfa/nfa_dump_internal.h
|
||||
src/parser/dump.cpp
|
||||
src/parser/dump.h
|
||||
src/parser/position_dump.h
|
||||
src/sidecar/sidecar_dump.cpp
|
||||
src/sidecar/sidecar_dump.h
|
||||
src/smallwrite/smallwrite_dump.cpp
|
||||
src/smallwrite/smallwrite_dump.h
|
||||
src/som/slot_manager_dump.cpp
|
||||
src/som/slot_manager_dump.h
|
||||
src/rose/rose_build_dump.cpp
|
||||
src/rose/rose_build_dump.h
|
||||
src/rose/rose_in_dump.cpp
|
||||
src/rose/rose_in_dump.h
|
||||
src/rose/rose_dump.cpp
|
||||
src/rose/rose_dump.h
|
||||
src/util/dump_charclass.cpp
|
||||
src/util/dump_charclass.h
|
||||
)
|
||||
|
||||
if (DUMP_SUPPORT)
|
||||
set(hs_SRCS ${hs_SRCS} ${hs_dump_SRCS})
|
||||
endif()
|
||||
|
||||
# we group things by sublibraries, specifying shared and static and then
|
||||
# choose which ones to build
|
||||
|
||||
set (LIB_VERSION ${HS_VERSION})
|
||||
set (LIB_SOVERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION})
|
||||
|
||||
add_library(hs_exec OBJECT ${hs_exec_SRCS})
|
||||
add_dependencies(hs_exec ${fdr_autogen_targets})
|
||||
|
||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
add_library(hs_exec_shared OBJECT ${hs_exec_SRCS})
|
||||
add_dependencies(hs_exec_shared ${fdr_autogen_targets})
|
||||
set_target_properties(hs_exec_shared PROPERTIES
|
||||
POSITION_INDEPENDENT_CODE TRUE)
|
||||
endif()
|
||||
|
||||
# hs_version.c is added explicitly to avoid some build systems that refuse to
|
||||
# create a lib without any src (I'm looking at you Xcode)
|
||||
|
||||
add_library(hs_runtime STATIC src/hs_version.c $<TARGET_OBJECTS:hs_exec>)
|
||||
|
||||
set_target_properties(hs_runtime PROPERTIES
|
||||
LINKER_LANGUAGE C)
|
||||
if (NOT BUILD_SHARED_LIBS)
|
||||
install(TARGETS hs_runtime DESTINATION lib)
|
||||
endif()
|
||||
|
||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
add_library(hs_runtime_shared SHARED src/hs_version.c $<TARGET_OBJECTS:hs_exec_shared>)
|
||||
set_target_properties(hs_runtime_shared PROPERTIES
|
||||
VERSION ${LIB_VERSION}
|
||||
SOVERSION ${LIB_SOVERSION}
|
||||
OUTPUT_NAME hs_runtime
|
||||
MACOSX_RPATH ON
|
||||
LINKER_LANGUAGE C)
|
||||
install(TARGETS hs_runtime_shared DESTINATION lib)
|
||||
endif()
|
||||
|
||||
# we want the static lib for testing
|
||||
add_library(hs STATIC ${hs_SRCS} $<TARGET_OBJECTS:hs_exec>)
|
||||
|
||||
add_dependencies(hs ragel_Parser)
|
||||
add_dependencies(hs autogen_compiler autogen_teddy_compiler)
|
||||
|
||||
if (NOT BUILD_SHARED_LIBS)
|
||||
install(TARGETS hs DESTINATION lib)
|
||||
endif()
|
||||
|
||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
add_library(hs_shared SHARED ${hs_SRCS} $<TARGET_OBJECTS:hs_exec_shared>)
|
||||
add_dependencies(hs_shared ragel_Parser)
|
||||
add_dependencies(hs_shared autogen_compiler autogen_teddy_compiler)
|
||||
set_target_properties(hs_shared PROPERTIES
|
||||
OUTPUT_NAME hs
|
||||
VERSION ${LIB_VERSION}
|
||||
SOVERSION ${LIB_SOVERSION}
|
||||
MACOSX_RPATH ON)
|
||||
install(TARGETS hs_shared DESTINATION lib)
|
||||
endif()
|
||||
|
||||
if(NOT WIN32)
|
||||
add_subdirectory(examples)
|
||||
endif()
|
26
COPYING
Normal file
26
COPYING
Normal file
@ -0,0 +1,26 @@
|
||||
Copyright (c) 2015, Intel Corporation
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
118
LICENSE
Normal file
118
LICENSE
Normal file
@ -0,0 +1,118 @@
|
||||
Hyperscan is licensed under the BSD License.
|
||||
|
||||
Copyright (c) 2015, Intel Corporation
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
This product also contains code from third parties, under the following
|
||||
licenses:
|
||||
|
||||
Intel's Slicing-by-8 CRC32 implementation
|
||||
-----------------------------------------
|
||||
|
||||
Copyright (c) 2004-2006, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Boost C++ Headers Library
|
||||
-------------------------
|
||||
|
||||
Boost Software License - Version 1.0 - August 17th, 2003
|
||||
|
||||
Permission is hereby granted, free of charge, to any person or organization
|
||||
obtaining a copy of the software and accompanying documentation covered by
|
||||
this license (the "Software") to use, reproduce, display, distribute,
|
||||
execute, and transmit the Software, and to prepare derivative works of the
|
||||
Software, and to permit third-parties to whom the Software is furnished to
|
||||
do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including
|
||||
the above license grant, this restriction and the following disclaimer,
|
||||
must be included in all copies of the Software, in whole or in part, and
|
||||
all derivative works of the Software, unless such copies or derivative
|
||||
works are solely in the form of machine-executable object code generated by
|
||||
a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
|
||||
The Google C++ Testing Framework (Google Test)
|
||||
----------------------------------------------
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
22
README.md
Normal file
22
README.md
Normal file
@ -0,0 +1,22 @@
|
||||
# Hyperscan
|
||||
|
||||
Hyperscan is a high-performance multiple regex matching library. It follows the
|
||||
regular expression syntax of the commonly-used libpcre library, but is a
|
||||
standalone library with its own C API.
|
||||
|
||||
Hyperscan uses hybrid automata techniques to allow simultaneous matching of
|
||||
large numbers (up to tens of thousands) of regular expressions and for the
|
||||
matching of regular expressions across streams of data.
|
||||
|
||||
Hyperscan is typically used in a DPI library stack.
|
||||
|
||||
# Documentation
|
||||
|
||||
Information on building the Hyperscan library and using its API is available in
|
||||
the [Developer Reference Guide](http://01org.github.io/hyperscan/dev-reference/).
|
||||
|
||||
# License
|
||||
|
||||
Hyperscan is licensed under the BSD License. See the LICENSE file in the
|
||||
project repository.
|
||||
|
54
cmake/backtrace.cmake
Normal file
54
cmake/backtrace.cmake
Normal file
@ -0,0 +1,54 @@
|
||||
# The `backtrace' function is available on Linux via glibc, and on FreeBSD if
|
||||
# the 'libexecinfo' package is installed.
|
||||
|
||||
CHECK_C_SOURCE_COMPILES(
|
||||
"#include <stdlib.h>\n#include <execinfo.h>\nint main () { backtrace(NULL, 0); }"
|
||||
BACKTRACE_LIBC)
|
||||
|
||||
if(BACKTRACE_LIBC)
|
||||
set(HAVE_BACKTRACE TRUE)
|
||||
set(BACKTRACE_CFLAGS "")
|
||||
set(BACKTRACE_LDFLAGS "")
|
||||
endif()
|
||||
|
||||
if(NOT BACKTRACE_LIBC)
|
||||
# FreeBSD 10 has backtrace but requires libexecinfo
|
||||
list(INSERT CMAKE_REQUIRED_LIBRARIES 0 "-lexecinfo")
|
||||
CHECK_C_SOURCE_COMPILES(
|
||||
"#include <stdlib.h>\n#include <execinfo.h>\nint main () { backtrace(NULL, 0); }"
|
||||
BACKTRACE_LIBEXECINFO)
|
||||
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES "-lexecinfo")
|
||||
|
||||
if(BACKTRACE_LIBEXECINFO)
|
||||
set(HAVE_BACKTRACE TRUE)
|
||||
set(BACKTRACE_CFLAGS "")
|
||||
set(BACKTRACE_LDFLAGS "-lexecinfo")
|
||||
else()
|
||||
# older FreeBSD requires it from ports
|
||||
list(INSERT CMAKE_REQUIRED_INCLUDES 0 "/usr/local/include")
|
||||
list(INSERT CMAKE_REQUIRED_LIBRARIES 0 "-L/usr/local/lib -lexecinfo")
|
||||
CHECK_C_SOURCE_COMPILES(
|
||||
"#include <stdlib.h>\n#include <execinfo.h>\nint main () { backtrace(NULL, 0); }"
|
||||
BACKTRACE_LIBEXECINFO_LOCAL)
|
||||
list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES 0 "/usr/local/include")
|
||||
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES "-L/usr/local/lib -lexecinfo")
|
||||
if(BACKTRACE_LIBEXECINFO_LOCAL)
|
||||
set(HAVE_BACKTRACE TRUE)
|
||||
set(BACKTRACE_CFLAGS "-I/usr/local/include")
|
||||
set(BACKTRACE_LDFLAGS "-L/usr/local/lib -lexecinfo")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(HAVE_BACKTRACE)
|
||||
CHECK_C_COMPILER_FLAG(-rdynamic HAS_RDYNAMIC)
|
||||
if(HAS_RDYNAMIC)
|
||||
list(INSERT BACKTRACE_LDFLAGS 0 -rdynamic)
|
||||
endif()
|
||||
# cmake scope fun
|
||||
set(HAVE_BACKTRACE ${HAVE_BACKTRACE} PARENT_SCOPE)
|
||||
else()
|
||||
set(BACKTRACE_CFLAGS "")
|
||||
set(BACKTRACE_LDFLAGS "")
|
||||
endif()
|
||||
|
101
cmake/config.h.in
Normal file
101
cmake/config.h.in
Normal file
@ -0,0 +1,101 @@
|
||||
/* used by cmake */
|
||||
|
||||
/* "Define if the build is 32 bit" */
|
||||
#cmakedefine ARCH_32_BIT
|
||||
|
||||
/* "Define if the build is 64 bit" */
|
||||
#cmakedefine ARCH_64_BIT
|
||||
|
||||
/* "Define if building for IA32" */
|
||||
#cmakedefine ARCH_IA32
|
||||
|
||||
/* "Define if building for EM64T" */
|
||||
#cmakedefine ARCH_X86_64
|
||||
|
||||
/* internal build, switch on dump support. */
|
||||
#cmakedefine DUMP_SUPPORT
|
||||
|
||||
/* Build tools with threading support */
|
||||
#cmakedefine ENABLE_TOOLS_THREADS
|
||||
|
||||
/* Define to 1 if `backtrace' works. */
|
||||
#cmakedefine HAVE_BACKTRACE
|
||||
|
||||
/* C compiler has __builtin_assume_aligned */
|
||||
#cmakedefine HAVE_CC_BUILTIN_ASSUME_ALIGNED
|
||||
|
||||
/* C++ compiler has __builtin_assume_aligned */
|
||||
#cmakedefine HAVE_CXX_BUILTIN_ASSUME_ALIGNED
|
||||
|
||||
/* C++ compiler has x86intrin.h */
|
||||
#cmakedefine HAVE_CXX_X86INTRIN_H
|
||||
|
||||
/* C compiler has x86intrin.h */
|
||||
#cmakedefine HAVE_C_X86INTRIN_H
|
||||
|
||||
/* C++ compiler has intrin.h */
|
||||
#cmakedefine HAVE_CXX_INTRIN_H
|
||||
|
||||
/* C compiler has intrin.h */
|
||||
#cmakedefine HAVE_C_INTRIN_H
|
||||
|
||||
/* Define to 1 if you have the declaration of `pthread_barrier_init', and to 0
|
||||
if you don't. */
|
||||
#cmakedefine HAVE_DECL_PTHREAD_BARRIER_INIT
|
||||
|
||||
/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
|
||||
0 if you don't. */
|
||||
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP
|
||||
|
||||
/* Define to 1 if you have the `malloc_info' function. */
|
||||
#cmakedefine HAVE_MALLOC_INFO
|
||||
|
||||
/* Define to 1 if you have the `memmem' function. */
|
||||
#cmakedefine HAVE_MEMMEM
|
||||
|
||||
/* Define to 1 if you have a working `mmap' system call. */
|
||||
#cmakedefine HAVE_MMAP
|
||||
|
||||
/* Define to 1 if `posix_memalign' works. */
|
||||
#cmakedefine HAVE_POSIX_MEMALIGN
|
||||
|
||||
/* Define to 1 if you have the <pthread.h> header file. */
|
||||
#cmakedefine HAVE_PTHREAD_H
|
||||
|
||||
/* Define to 1 if you have the `setrlimit' function. */
|
||||
#cmakedefine HAVE_SETRLIMIT
|
||||
|
||||
/* Define to 1 if you have the `shmget' function. */
|
||||
#cmakedefine HAVE_SHMGET
|
||||
|
||||
/* Define to 1 if you have the `sigaction' function. */
|
||||
#cmakedefine HAVE_SIGACTION
|
||||
|
||||
/* Define to 1 if you have the `sigaltstack' function. */
|
||||
#cmakedefine HAVE_SIGALTSTACK
|
||||
|
||||
/* Define if the sqlite3_open_v2 call is available */
|
||||
#cmakedefine HAVE_SQLITE3_OPEN_V2
|
||||
|
||||
/* Define to 1 if you have the <tmmintrin.h> header file. */
|
||||
#cmakedefine HAVE_TMMINTRIN_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#cmakedefine HAVE_UNISTD_H
|
||||
|
||||
/* Define to 1 if you have the `_aligned_malloc' function. */
|
||||
#cmakedefine HAVE__ALIGNED_MALLOC
|
||||
|
||||
/* Optimize, inline critical functions */
|
||||
#cmakedefine HS_OPTIMIZE
|
||||
|
||||
#cmakedefine HS_VERSION
|
||||
#cmakedefine HS_MAJOR_VERSION
|
||||
#cmakedefine HS_MINOR_VERSION
|
||||
#cmakedefine HS_PATCH_VERSION
|
||||
|
||||
#cmakedefine BUILD_DATE
|
||||
|
||||
/* define if this is a release build. */
|
||||
#cmakedefine RELEASE_BUILD
|
||||
|
9
cmake/platform.cmake
Normal file
9
cmake/platform.cmake
Normal file
@ -0,0 +1,9 @@
|
||||
# determine the target arch
|
||||
|
||||
# really only interested in the preprocessor here
|
||||
CHECK_C_SOURCE_COMPILES("#if !(defined(__x86_64__) || defined(_M_X64))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_64_BIT)
|
||||
|
||||
CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_32_BIT)
|
||||
|
||||
set(ARCH_X86_64 ${ARCH_64_BIT})
|
||||
set(ARCH_IA32 ${ARCH_32_BIT})
|
16
cmake/ragel.cmake
Normal file
16
cmake/ragel.cmake
Normal file
@ -0,0 +1,16 @@
|
||||
# function for doing all the dirty work in turning a .rl into C++
|
||||
|
||||
function(ragelmaker src_rl)
|
||||
get_filename_component(src_dir ${src_rl} PATH) # old cmake needs PATH
|
||||
get_filename_component(src_file ${src_rl} NAME_WE)
|
||||
set(rl_out ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}/${src_file}.cpp)
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}/${src_file}.cpp
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}
|
||||
COMMAND ${RAGEL} ${CMAKE_CURRENT_SOURCE_DIR}/${src_rl} -o ${rl_out}
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${src_rl}
|
||||
)
|
||||
add_custom_target(ragel_${src_file} DEPENDS ${rl_out})
|
||||
set_source_files_properties(${rl_out} PROPERTIES GENERATED TRUE)
|
||||
endfunction(ragelmaker)
|
||||
|
35
doc/dev-reference/CMakeLists.txt
Normal file
35
doc/dev-reference/CMakeLists.txt
Normal file
@ -0,0 +1,35 @@
|
||||
find_program(DOXYGEN doxygen)
|
||||
|
||||
if (DOXYGEN STREQUAL DOXYGEN-NOTFOUND)
|
||||
message(STATUS "Doxygen not found, unable to generate API reference")
|
||||
else()
|
||||
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/hyperscan.doxyfile.in"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/hyperscan.doxyfile" @ONLY)
|
||||
|
||||
add_custom_target(dev-reference-doxygen
|
||||
${DOXYGEN} ${CMAKE_CURRENT_BINARY_DIR}/hyperscan.doxyfile
|
||||
COMMENT "Building doxygen XML for API reference")
|
||||
endif()
|
||||
|
||||
find_program(SPHINX_BUILD sphinx-build)
|
||||
|
||||
if (SPHINX_BUILD STREQUAL SPHINX_BUILD-NOTFOUND)
|
||||
message(STATUS "Sphinx not found, unable to generate developer reference")
|
||||
else()
|
||||
set(SPHINX_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build")
|
||||
set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
|
||||
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
|
||||
|
||||
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/conf.py" @ONLY)
|
||||
|
||||
add_custom_target(dev-reference
|
||||
${SPHINX_BUILD}
|
||||
-b html
|
||||
-c "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
-d "${SPHINX_CACHE_DIR}"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
"${SPHINX_HTML_DIR}"
|
||||
DEPENDS dev-reference-doxygen
|
||||
COMMENT "Building HTML dev reference with Sphinx")
|
||||
endif()
|
4
doc/dev-reference/_static/hyperscan.css
Normal file
4
doc/dev-reference/_static/hyperscan.css
Normal file
@ -0,0 +1,4 @@
|
||||
/* Differentiate the way we display regex fragments. */
|
||||
.regexp {
|
||||
color: darkred !important;
|
||||
}
|
53
doc/dev-reference/api_constants.rst
Normal file
53
doc/dev-reference/api_constants.rst
Normal file
@ -0,0 +1,53 @@
|
||||
.. _api_constants:
|
||||
|
||||
########################
|
||||
API Reference: Constants
|
||||
########################
|
||||
|
||||
***********
|
||||
Error Codes
|
||||
***********
|
||||
|
||||
.. doxygengroup:: HS_ERROR
|
||||
:content-only:
|
||||
:no-link:
|
||||
|
||||
*****************
|
||||
hs_expr_ext flags
|
||||
*****************
|
||||
|
||||
.. doxygengroup:: HS_EXT_FLAG
|
||||
:content-only:
|
||||
:no-link:
|
||||
|
||||
*************
|
||||
Pattern flags
|
||||
*************
|
||||
|
||||
.. doxygengroup:: HS_PATTERN_FLAG
|
||||
:content-only:
|
||||
:no-link:
|
||||
|
||||
*************************
|
||||
CPU feature support flags
|
||||
*************************
|
||||
|
||||
.. doxygengroup:: HS_CPU_FEATURES_FLAG
|
||||
:content-only:
|
||||
:no-link:
|
||||
|
||||
****************
|
||||
CPU tuning flags
|
||||
****************
|
||||
|
||||
.. doxygengroup:: HS_TUNE_FLAG
|
||||
:content-only:
|
||||
:no-link:
|
||||
|
||||
******************
|
||||
Compile mode flags
|
||||
******************
|
||||
|
||||
.. doxygengroup:: HS_MODE_FLAG
|
||||
:content-only:
|
||||
:no-link:
|
29
doc/dev-reference/api_files.rst
Normal file
29
doc/dev-reference/api_files.rst
Normal file
@ -0,0 +1,29 @@
|
||||
.. _api_files:
|
||||
|
||||
####################
|
||||
API Reference: Files
|
||||
####################
|
||||
|
||||
**********
|
||||
File: hs.h
|
||||
**********
|
||||
|
||||
.. doxygenfile:: hs.h
|
||||
|
||||
*****************
|
||||
File: hs_common.h
|
||||
*****************
|
||||
|
||||
.. doxygenfile:: hs_common.h
|
||||
|
||||
******************
|
||||
File: hs_compile.h
|
||||
******************
|
||||
|
||||
.. doxygenfile:: hs_compile.h
|
||||
|
||||
******************
|
||||
File: hs_runtime.h
|
||||
******************
|
||||
|
||||
.. doxygenfile:: hs_runtime.h
|
365
doc/dev-reference/compilation.rst
Normal file
365
doc/dev-reference/compilation.rst
Normal file
@ -0,0 +1,365 @@
|
||||
.. include:: <isonum.txt>
|
||||
.. _compilation:
|
||||
|
||||
##################
|
||||
Compiling Patterns
|
||||
##################
|
||||
|
||||
*******************
|
||||
Building a Database
|
||||
*******************
|
||||
|
||||
The Hyperscan compiler API accepts regular expressions and converts them into a
|
||||
compiled pattern database that can then be used to scan data.
|
||||
|
||||
The API provides three functions that compile regular expressions into
|
||||
databases:
|
||||
|
||||
#. :c:func:`hs_compile`: compiles a single expression into a pattern database.
|
||||
|
||||
#. :c:func:`hs_compile_multi`: compiles an array of expressions into a pattern
|
||||
database. All of the supplied patterns will be scanned for concurrently at
|
||||
scan time, with user-supplied identifiers returned when they match.
|
||||
|
||||
#. :c:func:`hs_compile_ext_multi`: compiles an array of expressions as above,
|
||||
but allows :ref:`extparam` to be specified for each expression.
|
||||
|
||||
Compilation allows the Hyperscan library to analyze the given pattern(s) and
|
||||
pre-determine how to scan for these patterns in an optimized fashion that would
|
||||
be far too expensive to compute at run-time.
|
||||
|
||||
When compiling expressions, a decision needs to be made whether the resulting
|
||||
compiled patterns are to be used in a streaming, block or vectored mode:
|
||||
|
||||
- **Streaming mode**: the target data to be scanned is a continuous stream, not
|
||||
all of which is available at once; blocks of data are scanned in sequence and
|
||||
matches may span multiple blocks in a stream. In streaming mode, each stream
|
||||
requires a block of memory to store its state between scan calls.
|
||||
|
||||
- **Block mode**: the target data is a discrete, contiguous block which can be
|
||||
scanned in one call and does not require state to be retained.
|
||||
|
||||
- **Vectored mode**: the target data consists of a list of non-contiguous
|
||||
blocks that are available all at once. As for block mode, no retention of
|
||||
state is required.
|
||||
|
||||
To compile patterns to be used in streaming mode, the ``mode`` parameter of
|
||||
:c:func:`hs_compile` must be set to :c:member:`HS_MODE_STREAM`; similarly,
|
||||
block mode requires the use of :c:member:`HS_MODE_BLOCK` and vectored mode
|
||||
requires the use of :c:member:`HS_MODE_VECTORED`. A pattern database compiled
|
||||
for one mode (streaming, block or vectored) can only be used in that mode. The
|
||||
version of Hyperscan used to produce a compiled pattern database must match the
|
||||
version of Hyperscan used to scan with it.
|
||||
|
||||
Hyperscan provides support for targeting a database at a particular CPU
|
||||
platform; see :ref:`instr_specialization` for details.
|
||||
|
||||
***************
|
||||
Pattern Support
|
||||
***************
|
||||
|
||||
Hyperscan supports the pattern syntax used by the PCRE library ("libpcre"),
|
||||
described at <http://www.pcre.org/>. However, not all constructs available in
|
||||
libpcre are supported. The use of unsupported constructs will result in
|
||||
compilation errors.
|
||||
|
||||
====================
|
||||
Supported Constructs
|
||||
====================
|
||||
|
||||
The following regex constructs are supported by Hyperscan:
|
||||
|
||||
* Literal characters and strings, with all libpcre quoting and character
|
||||
escapes.
|
||||
|
||||
* Character classes such as :regexp:`.` (dot), :regexp:`[abc]`, and
|
||||
:regexp:`[^abc]`, as well as the predefined character classes :regexp:`\\s`,
|
||||
:regexp:`\\d`, :regexp:`\\w`, :regexp:`\\v`, and :regexp:`\\h` and their
|
||||
negated counterparts (:regexp:`\\S`, :regexp:`\\D`, :regexp:`\\W`,
|
||||
:regexp:`\\V`, and :regexp:`\\H`).
|
||||
|
||||
* The POSIX named character classes :regexp:`[[:xxx:]]` and negated named
|
||||
character classes :regexp:`[[:^xxx:]]`.
|
||||
|
||||
* Unicode character properties, such as :regexp:`\\p{L}`, :regexp:`\\P{Sc}`,
|
||||
:regexp:`\\p{Greek}`.
|
||||
|
||||
* Quantifiers:
|
||||
|
||||
* Quantifiers such as :regexp:`?`, :regexp:`*` and :regexp:`+` are supported
|
||||
when applied to arbitrary supported sub-expressions.
|
||||
|
||||
* Bounded repeat qualifiers such as :regexp:`{n}`, :regexp:`{m,n}`,
|
||||
:regexp:`{n,}` are supported with limitations.
|
||||
|
||||
* For arbitrary repeated sub-patterns: *n* and *m* should be either small
|
||||
or infinite, e.g. :regexp:`(a|b}{4}`, :regexp:`(ab?c?d){4,10}` or
|
||||
:regexp:`(ab(cd)*){6,}`.
|
||||
|
||||
* For single-character width sub-patterns such as :regexp:`[^\\a]` or
|
||||
:regexp:`.` or :regexp:`x`, nearly all repeat counts are supported, except
|
||||
where repeats are extremely large (maximum bound greater than 32767).
|
||||
Stream states may be very large for large bounded repeats, e.g.
|
||||
:regexp:`a.{2000}b`. Note: such sub-patterns may be considerably
|
||||
cheaper if at the beginning or end of patterns and especially if the
|
||||
:c:member:`HS_FLAG_SINGLEMATCH` flag is on for that pattern.
|
||||
|
||||
* Lazy modifiers (:regexp:`?` appended to another quantifier, e.g.
|
||||
:regexp:`\\w+?`) are supported but ignored (as Hyperscan reports all
|
||||
matches).
|
||||
|
||||
* Parenthesization, including the named and unnamed capturing and
|
||||
non-capturing forms. However, capturing is ignored.
|
||||
|
||||
* Alternation with the :regexp:`|` symbol, as in :regexp:`foo|bar`.
|
||||
|
||||
* The anchors :regexp:`^`, :regexp:`$`, :regexp:`\\A`, :regexp:`\\Z` and
|
||||
:regexp:`\\z`.
|
||||
|
||||
* Option modifiers for:
|
||||
|
||||
* Case-sensitivity: :regexp:`(?i)` and :regexp:`(?-i)`
|
||||
* Multi-line: :regexp:`(?m)` and :regexp:`(?-m)`
|
||||
* Dot-all: :regexp:`(?s)` and :regexp:`(?-s)`
|
||||
* Extended syntax: :regexp:`(?s)` and :regexp:`(?-s)`
|
||||
|
||||
* The :regexp:`\\b` and :regexp:`\\B` zero-width assertions (word boundary and
|
||||
'not word boundary', respectively).
|
||||
|
||||
* Comments in :regexp:`(?# comment)` syntax.
|
||||
|
||||
* The :regexp:`(*UTF8)` and :regexp:`(*UCP)` control verbs at the beginning of a
|
||||
pattern, used to enable UTF-8 and UCP mode.
|
||||
|
||||
.. note:: Bounded-repeat quantifiers with large repeat counts of arbitrary
|
||||
expressions (e.g. :regexp:`([a-z]|bc*d|xy?z){1000,5000}`) will result in a
|
||||
"Pattern too large" error at pattern compile time.
|
||||
|
||||
.. note:: At this time, not all patterns can be successfully compiled with the
|
||||
:c:member:`HS_FLAG_SOM_LEFTMOST` flag, which enables per-pattern support for
|
||||
:ref:`som`. The patterns that support this flag are a subset of patterns that
|
||||
can be successfully compiled with Hyperscan; notably, many bounded repeat
|
||||
forms that can be compiled with Hyperscan without the Start of Match flag
|
||||
enabled cannot be compiled with the flag enabled.
|
||||
|
||||
======================
|
||||
Unsupported Constructs
|
||||
======================
|
||||
|
||||
The following regex constructs are not supported by Hyperscan:
|
||||
|
||||
* Backreferences and capturing sub-expressions.
|
||||
* Arbitrary zero-width assertions.
|
||||
* Subroutine references and recursive patterns.
|
||||
* Conditional patterns.
|
||||
* Backtracking control verbs.
|
||||
* The :regexp:`\\C` "single-byte" directive (which breaks UTF-8 sequences).
|
||||
* The :regexp:`\\R` newline match.
|
||||
* The :regexp:`\\K` start of match reset directive.
|
||||
* Callouts and embedded code.
|
||||
* Atomic grouping and possessive quantifiers.
|
||||
|
||||
*********
|
||||
Semantics
|
||||
*********
|
||||
|
||||
While Hyperscan follows libpcre syntax, it provides different semantics. The
|
||||
major departures from libpcre semantics are motivated by the requirements of
|
||||
streaming and multiple simultaneous pattern matching.
|
||||
|
||||
The major departures from libpcre semantics are:
|
||||
|
||||
#. **Multiple pattern matching**: Hyperscan allows matches to be reported for
|
||||
several patterns simultaneously. This is not equivalent to separating the
|
||||
patterns by :regexp:`|` in libpcre, which evaluates alternations
|
||||
left-to-right.
|
||||
|
||||
#. **Lack of ordering**: the multiple matches that Hyperscan produces are not
|
||||
guaranteed to be ordered, although they will always fall within the bounds of
|
||||
the current scan.
|
||||
|
||||
#. **End offsets only**: Hyperscan's default behaviour is only to report the end
|
||||
offset of a match. Reporting of the start offset can be enabled with
|
||||
per-expression flags at pattern compile time. See :ref:`som` for details.
|
||||
|
||||
#. **"All matches" reported**: scanning :regexp:`/foo.*bar/` against
|
||||
``fooxyzbarbar`` will return two matches from Hyperscan -- at the points
|
||||
corresponding to the ends of ``fooxyzbar`` and ``fooxyzbarbar``. In contrast,
|
||||
libpcre semantics by default would report only one match at ``fooxyzbarbar``
|
||||
(greedy semantics) or, if non-greedy semantics were switched on, one match at
|
||||
``fooxyzbar``. This means that switching between greedy and non-greedy
|
||||
semantics is a no-op in Hyperscan.
|
||||
|
||||
To support libpcre quantifier semantics while accurately reporting streaming
|
||||
matches at the time they occur is impossible. For example, consider the pattern
|
||||
above, :regexp:`/foo.*bar/`, in streaming mode, against the following
|
||||
stream (three blocks scanned in sequence):
|
||||
|
||||
============= ======= ========
|
||||
block 1 block 2 block 3
|
||||
============= ======= ========
|
||||
``fooxyzbar`` ``baz`` ``qbar``
|
||||
============= ======= ========
|
||||
|
||||
Since the :regexp:`.*` repeat in the pattern is a *greedy* repeat in libpcre, it
|
||||
must match as much as possible without causing the rest of the pattern to fail.
|
||||
However, in streaming mode, this would require knowledge of data in the stream
|
||||
beyond the current block being scanned.
|
||||
|
||||
In this example, the match at offset 9 in the first block is only the correct
|
||||
match (under libpcre semantics) if there is no ``bar`` in a subsequent block --
|
||||
as in block 3 -- which would constitute a better match for the pattern.
|
||||
|
||||
.. _som:
|
||||
|
||||
==============
|
||||
Start of Match
|
||||
==============
|
||||
|
||||
In standard operation, Hyperscan will only provide the end offset of a match
|
||||
when the match callback is called. If the :c:member:`HS_FLAG_SOM_LEFTMOST` flag
|
||||
is specified for a particular pattern, then the same set of matches is
|
||||
returned, but each match will also provide the leftmost possible start offset
|
||||
corresponding to its end offset.
|
||||
|
||||
Using the SOM flag entails a number of trade-offs and limitations:
|
||||
|
||||
* Reduced pattern support: For many patterns, tracking SOM is complex and can
|
||||
result in Hyperscan failing to compile a pattern with a "Pattern too
|
||||
large" error, even if the pattern is supported in normal operation.
|
||||
* Increased stream state: At scan time, state space is required to track
|
||||
potential SOM offsets, and this must be stored in persistent stream state in
|
||||
streaming mode. Accordingly, SOM will generally increase the stream state
|
||||
required to match a pattern.
|
||||
* Performance overhead: Similarly, there is generally a performance cost
|
||||
associated with tracking SOM.
|
||||
* Incompatible features: Some other Hyperscan pattern flags (such as
|
||||
:c:member:`HS_FLAG_SINGLEMATCH` and :c:member:`HS_FLAG_PREFILTER`) can not be
|
||||
used in combination with SOM. Specifying them together with
|
||||
:c:member:`HS_FLAG_SOM_LEFTMOST` will result in a compilation error.
|
||||
|
||||
In streaming mode, the amount of precision delivered by SOM can be controlled
|
||||
with the SOM horizon flags. These instruct Hyperscan to deliver accurate SOM
|
||||
information within a certain distance of the end offset, and return a special
|
||||
start offset of :c:member:`HS_OFFSET_PAST_HORIZON` otherwise. Specifying a
|
||||
small or medium SOM horizon will usually reduce the stream state required for a
|
||||
given database.
|
||||
|
||||
.. note:: In streaming mode, the start offset returned for a match may refer to
|
||||
a point in the stream *before* the current block being scanned. Hyperscan
|
||||
provides no facility for accessing earlier blocks; if the calling application
|
||||
needs to inspect historical data, then it must store it itself.
|
||||
|
||||
.. _extparam:
|
||||
|
||||
===================
|
||||
Extended Parameters
|
||||
===================
|
||||
|
||||
In some circumstances, more control over the matching behaviour of a pattern is
|
||||
required than can be specified easily using regular expression syntax. For
|
||||
these scenarios, Hyperscan provides the :c:func:`hs_compile_ext_multi` function
|
||||
that allows a set of "extended parameters" to be set on a per-pattern basis.
|
||||
|
||||
Extended parameters are specified using an :c:type:`hs_expr_ext_t` structure,
|
||||
which provides the following fields:
|
||||
|
||||
* ``flags``: Flags governing which of the other fields in the structure are
|
||||
used.
|
||||
* ``min_offset``: The minimum end offset in the data stream at which this
|
||||
expression should match successfully.
|
||||
* ``max_offset``: The maximum end offset in the data stream at which this
|
||||
expression should match successfully.
|
||||
* ``min_length``: The minimum match length (from start to end) required to
|
||||
successfully match this expression.
|
||||
|
||||
These parameters allow the set of matches produced by a pattern to be
|
||||
constrained at compile time, rather than relying on the application to process
|
||||
unwanted matches at runtime.
|
||||
|
||||
For example, the pattern :regexp:`/foo.*bar/` when given a ``min_offset`` of 10
|
||||
and a ``max_offset`` of 15 will not produce matches when scanned against
|
||||
``foobar`` or ``foo0123456789bar`` but will produce a match against the data
|
||||
streams ``foo0123bar`` or ``foo0123456bar``.
|
||||
|
||||
=================
|
||||
Prefiltering Mode
|
||||
=================
|
||||
|
||||
Hyperscan provides a per-pattern flag, :c:member:`HS_FLAG_PREFILTER`, which can
|
||||
be used to implement a prefilter for a pattern than Hyperscan would not
|
||||
ordinarily support.
|
||||
|
||||
This flag instructs Hyperscan to compile an "approximate" version of this
|
||||
pattern for use in a prefiltering application, even if Hyperscan does not
|
||||
support the pattern in normal operation.
|
||||
|
||||
The set of matches returned when this flag is used is guaranteed to be a
|
||||
superset of the matches specified by the non-prefiltering expression.
|
||||
|
||||
If the pattern contains pattern constructs not supported by Hyperscan (such as
|
||||
zero-width assertions, back-references or conditional references) these
|
||||
constructs will be replaced internally with broader constructs that may match
|
||||
more often.
|
||||
|
||||
For example, the pattern :regexp:`/(\\w+) again \\1/` contains the
|
||||
back-reference :regexp:`\\1`. In prefiltering mode, this pattern might be
|
||||
approximated by having its back-reference replaced with its referent, forming
|
||||
:regexp:`/\\w+ again \\w+/`.
|
||||
|
||||
Furthermore, in prefiltering mode Hyperscan may simplify a pattern that would
|
||||
otherwise return a "Pattern too large" error at compile time, or for performance
|
||||
reasons (subject to the matching guarantee above).
|
||||
|
||||
It is generally expected that the application will subsequently confirm
|
||||
prefilter matches with another regular expression matcher that can provide exact
|
||||
matches for the pattern.
|
||||
|
||||
.. note:: The use of this flag in combination with Start of Match mode (using
|
||||
the :c:member:`HS_FLAG_SOM_LEFTMOST` flag) is not currently supported and
|
||||
will result in a pattern compilation error.
|
||||
|
||||
.. _instr_specialization:
|
||||
|
||||
******************************
|
||||
Instruction Set Specialization
|
||||
******************************
|
||||
|
||||
Hyperscan is able to make use of several modern instruction set features found
|
||||
on x86 processors to provide improvements in scanning performance.
|
||||
|
||||
Some of these features are selected when the library is built; for example,
|
||||
Hyperscan will use the native ``POPCNT`` instruction on processors where it is
|
||||
available and the library has been optimized for the host architecture.
|
||||
|
||||
.. note:: By default, the Hyperscan runtime is built with the ``-march=native``
|
||||
compiler flag and (where possible) will make use of all instructions known by
|
||||
the host's C compiler.
|
||||
|
||||
To use some instruction set features, however, Hyperscan must build a
|
||||
specialized database to support them. This means that the target platform must
|
||||
be specified at pattern compile time.
|
||||
|
||||
The Hyperscan compiler API functions all accept an optional
|
||||
:c:type:`hs_platform_info_t` argument, which describes the target platform
|
||||
for the database to be built. If this argument is NULL, the database will be
|
||||
targeted at the current host platform.
|
||||
|
||||
The :c:type:`hs_platform_info_t` structure has two fields:
|
||||
|
||||
#. ``tune``: This allows the application to specify information about the target
|
||||
platform which may be used to guide the optimisation process of the compile.
|
||||
Use of this field does not limit the processors that the resulting database
|
||||
can run on, but may impact the performance of the resulting database.
|
||||
|
||||
#. ``cpu_features``: This allows the application to specify a mask of CPU
|
||||
features that may be used on the target platform. For example,
|
||||
:c:member:`HS_CPU_FEATURES_AVX2` can be specified for Intel\ |reg| Advanced
|
||||
Vector Extensions +2 (Intel\ |reg| AVX2) instruction set support. If a flag
|
||||
for a particular CPU feature is specified, the database will not be usable on
|
||||
a CPU without that feature.
|
||||
|
||||
An :c:type:`hs_platform_info_t` structure targeted at the current host can be
|
||||
built with the :c:func:`hs_populate_platform` function.
|
||||
|
||||
See :ref:`api_constants` for the full list of CPU tuning and feature flags.
|
275
doc/dev-reference/conf.py.in
Normal file
275
doc/dev-reference/conf.py.in
Normal file
@ -0,0 +1,275 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Hyperscan documentation build configuration file, created by
|
||||
# sphinx-quickstart on Tue Sep 29 15:59:19 2015.
|
||||
#
|
||||
# This file is execfile()d with the current directory set to its
|
||||
# containing dir.
|
||||
#
|
||||
# Note that not all possible configuration values are present in this
|
||||
# autogenerated file.
|
||||
#
|
||||
# All configuration values have a default; values that are commented out
|
||||
# serve to show the default.
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#sys.path.insert(0, os.path.abspath('.'))
|
||||
|
||||
# -- General configuration ------------------------------------------------
|
||||
|
||||
# If your documentation needs a minimal Sphinx version, state it here.
|
||||
#needs_sphinx = '1.0'
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = ['breathe']
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# The suffix of source filenames.
|
||||
source_suffix = '.rst'
|
||||
|
||||
# The encoding of source files.
|
||||
#source_encoding = 'utf-8-sig'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = u'Hyperscan'
|
||||
copyright = u'2015, Intel Corporation'
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = '@HS_MAJOR_VERSION@.@HS_MINOR_VERSION@'
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = '@HS_VERSION@'
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
#language = None
|
||||
|
||||
# There are two options for replacing |today|: either, you set today to some
|
||||
# non-false value, then it is used:
|
||||
#today = ''
|
||||
# Else, today_fmt is used as the format for a strftime call.
|
||||
#today_fmt = '%B %d, %Y'
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
exclude_patterns = ['_build']
|
||||
|
||||
# The reST default role (used for this markup: `text`) to use for all
|
||||
# documents.
|
||||
#default_role = None
|
||||
|
||||
# If true, '()' will be appended to :func: etc. cross-reference text.
|
||||
#add_function_parentheses = True
|
||||
|
||||
# If true, the current module name will be prepended to all description
|
||||
# unit titles (such as .. function::).
|
||||
#add_module_names = True
|
||||
|
||||
# If true, sectionauthor and moduleauthor directives will be shown in the
|
||||
# output. They are ignored by default.
|
||||
#show_authors = False
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = 'sphinx'
|
||||
|
||||
# A list of ignored prefixes for module index sorting.
|
||||
#modindex_common_prefix = []
|
||||
|
||||
# If true, keep warnings as "system message" paragraphs in the built documents.
|
||||
#keep_warnings = False
|
||||
|
||||
|
||||
# -- Options for HTML output ----------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
html_theme = 'alabaster'
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
html_theme_options = {
|
||||
# Change some style colors; these are used for admonitions
|
||||
'pink_1' : '#e0f8ff',
|
||||
'pink_2' : '#e0f8ff'
|
||||
}
|
||||
|
||||
# Add any paths that contain custom themes here, relative to this directory.
|
||||
#html_theme_path = []
|
||||
|
||||
# The name for this set of Sphinx documents. If None, it defaults to
|
||||
# "<project> v<release> documentation".
|
||||
#html_title = None
|
||||
|
||||
# A shorter title for the navigation bar. Default is the same as html_title.
|
||||
#html_short_title = None
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top
|
||||
# of the sidebar.
|
||||
#html_logo = None
|
||||
|
||||
# The name of an image file (within the static path) to use as favicon of the
|
||||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
||||
# pixels large.
|
||||
#html_favicon = None
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['@CMAKE_CURRENT_SOURCE_DIR@/_static']
|
||||
|
||||
# Add any extra paths that contain custom files (such as robots.txt or
|
||||
# .htaccess) here, relative to this directory. These files are copied
|
||||
# directly to the root of the documentation.
|
||||
#html_extra_path = []
|
||||
|
||||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
||||
# using the given strftime format.
|
||||
#html_last_updated_fmt = '%b %d, %Y'
|
||||
|
||||
# If true, SmartyPants will be used to convert quotes and dashes to
|
||||
# typographically correct entities.
|
||||
#html_use_smartypants = True
|
||||
|
||||
# Custom sidebar templates, maps document names to template names.
|
||||
html_sidebars = {
|
||||
'**': ['globaltoc.html', 'searchbox.html']
|
||||
}
|
||||
|
||||
# Additional templates that should be rendered to pages, maps page names to
|
||||
# template names.
|
||||
#html_additional_pages = {}
|
||||
|
||||
# If false, no module index is generated.
|
||||
#html_domain_indices = True
|
||||
|
||||
# If false, no index is generated.
|
||||
#html_use_index = True
|
||||
|
||||
# If true, the index is split into individual pages for each letter.
|
||||
#html_split_index = False
|
||||
|
||||
# If true, links to the reST sources are added to the pages.
|
||||
html_show_sourcelink = False
|
||||
|
||||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
||||
#html_show_sphinx = True
|
||||
|
||||
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
||||
#html_show_copyright = True
|
||||
|
||||
# If true, an OpenSearch description file will be output, and all pages will
|
||||
# contain a <link> tag referring to it. The value of this option must be the
|
||||
# base URL from which the finished HTML is served.
|
||||
#html_use_opensearch = ''
|
||||
|
||||
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
||||
#html_file_suffix = None
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = 'Hyperscandoc'
|
||||
|
||||
|
||||
# -- Options for LaTeX output ---------------------------------------------
|
||||
|
||||
latex_elements = {
|
||||
# The paper size ('letterpaper' or 'a4paper').
|
||||
#'papersize': 'letterpaper',
|
||||
|
||||
# The font size ('10pt', '11pt' or '12pt').
|
||||
#'pointsize': '10pt',
|
||||
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
#'preamble': '',
|
||||
}
|
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples
|
||||
# (source start file, target name, title,
|
||||
# author, documentclass [howto, manual, or own class]).
|
||||
latex_documents = [
|
||||
('index', 'Hyperscan.tex', u'Hyperscan Documentation',
|
||||
u'Intel Corporation', 'manual'),
|
||||
]
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top of
|
||||
# the title page.
|
||||
#latex_logo = None
|
||||
|
||||
# For "manual" documents, if this is true, then toplevel headings are parts,
|
||||
# not chapters.
|
||||
#latex_use_parts = False
|
||||
|
||||
# If true, show page references after internal links.
|
||||
#latex_show_pagerefs = False
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
#latex_show_urls = False
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
#latex_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
#latex_domain_indices = True
|
||||
|
||||
|
||||
# -- Options for manual page output ---------------------------------------
|
||||
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [
|
||||
('index', 'hyperscan', u'Hyperscan Documentation',
|
||||
[u'Intel Corporation'], 1)
|
||||
]
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
#man_show_urls = False
|
||||
|
||||
|
||||
# -- Options for Texinfo output -------------------------------------------
|
||||
|
||||
# Grouping the document tree into Texinfo files. List of tuples
|
||||
# (source start file, target name, title, author,
|
||||
# dir menu entry, description, category)
|
||||
texinfo_documents = [
|
||||
('index', 'Hyperscan', u'Hyperscan Documentation',
|
||||
u'Intel Corporation', 'Hyperscan', 'High-performance regular expression matcher.',
|
||||
'Miscellaneous'),
|
||||
]
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
#texinfo_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
#texinfo_domain_indices = True
|
||||
|
||||
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
||||
#texinfo_show_urls = 'footnote'
|
||||
|
||||
# If true, do not generate a @detailmenu in the "Top" node's menu.
|
||||
#texinfo_no_detailmenu = False
|
||||
|
||||
# -- Options for Breathe doxygen import -----------------------------------
|
||||
|
||||
breathe_projects = { "hyperscan": "doxygen_xml" }
|
||||
breathe_default_project = "hyperscan"
|
||||
breathe_domain_by_extension = {"h" : "c"}
|
||||
|
||||
# -- Add some customisation -----------------------------------------------
|
||||
|
||||
def setup(app):
|
||||
app.add_stylesheet("hyperscan.css") # Custom stylesheet for e.g. :regex:
|
33
doc/dev-reference/copyright.rst
Normal file
33
doc/dev-reference/copyright.rst
Normal file
@ -0,0 +1,33 @@
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
#########
|
||||
Copyright
|
||||
#########
|
||||
|
||||
No license (express or implied, by estoppel or otherwise) to any intellectual
|
||||
property rights is granted by this document.
|
||||
|
||||
Intel disclaims all express and implied warranties, including without
|
||||
limitation, the implied warranties of merchantability, fitness for a particular
|
||||
purpose, and non-infringement, as well as any warranty arising from course of
|
||||
performance, course of dealing, or usage in trade.
|
||||
|
||||
This document contains information on products, services and/or processes in
|
||||
development. All information provided here is subject to change without
|
||||
notice. Contact your Intel representative to obtain the latest forecast,
|
||||
schedule, specifications and roadmaps.
|
||||
|
||||
The products and services described may contain defects or errors known as
|
||||
errata which may cause deviations from published specifications. Current
|
||||
characterized errata are available on request.
|
||||
|
||||
Copies of documents which have an order number and are referenced in this
|
||||
document, or other Intel literature, may be obtained by calling 1-800-548-4725,
|
||||
or go to: <http://www.intel.com/design/literature.htm>.
|
||||
|
||||
Intel, and the Intel logo, are trademarks of Intel Corporation in the U.S.
|
||||
and/or other countries.
|
||||
|
||||
\*Other names and brands may be claimed as the property of others.
|
||||
|
||||
Copyright |copy| 2015, Intel Corporation. All rights reserved.
|
211
doc/dev-reference/getting_started.rst
Normal file
211
doc/dev-reference/getting_started.rst
Normal file
@ -0,0 +1,211 @@
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
###############
|
||||
Getting Started
|
||||
###############
|
||||
|
||||
Very Quick Start
|
||||
****************
|
||||
|
||||
#. Clone Hyperscan ::
|
||||
|
||||
cd <where-you-want-hyperscan-source>
|
||||
git clone git://github/01org/hyperscan
|
||||
|
||||
#. Configure Hyperscan
|
||||
|
||||
Ensure that you have the correct :ref:`dependencies <software>` present,
|
||||
and then:
|
||||
|
||||
::
|
||||
|
||||
cd <where-you-want-to-build-hyperscan>
|
||||
mkdir <build-dir>
|
||||
cd <build-dir>
|
||||
cmake [-G <generator>] [options] <hyperscan-source-path>
|
||||
|
||||
Known working generators:
|
||||
* ``Unix Makefiles`` --- make-compatible makefiles (default on Linux/FreeBSD/Mac OS X)
|
||||
* ``Ninja`` --- `Ninja <http://martine.github.io/ninja/>`_ build files.
|
||||
|
||||
Generators that might work include:
|
||||
* ``Xcode`` --- OS X Xcode projects.
|
||||
* ``Visual Studio`` --- Visual Studio projects - very experimental
|
||||
|
||||
#. Build Hyperscan
|
||||
|
||||
Depending on the generator used:
|
||||
* ``cmake --build .`` --- will build everything
|
||||
* ``make -j<jobs>`` --- use makefiles in parallel
|
||||
* ``ninja`` --- use Ninja build
|
||||
* etc.
|
||||
|
||||
#. Check Hyperscan
|
||||
|
||||
Run the Hyperscan unit tests: ::
|
||||
|
||||
bin/unit-hyperscan
|
||||
|
||||
Requirements
|
||||
************
|
||||
|
||||
Hardware
|
||||
========
|
||||
|
||||
Hyperscan will run on x86 processors in 64-bit (Intel\ |reg| 64 Architecture) and
|
||||
32-bit (IA-32 Architecture) modes.
|
||||
|
||||
Hyperscan is a high performance software library that takes advantage of recent
|
||||
Intel architecture advances. At a minimum, support for Supplemental Streaming
|
||||
SIMD Extensions 3 (SSSE3) is required, which should be available on any modern
|
||||
x86 processor.
|
||||
|
||||
Additionally, Hyperscan can make use of:
|
||||
|
||||
* Intel Streaming SIMD Extensions 4.2 (SSE4.2)
|
||||
* the POPCNT instruction
|
||||
* Bit Manipulation Instructions (BMI, BMI2)
|
||||
* Intel Advanced Vector Extensions 2 (Intel AVX2)
|
||||
|
||||
if present.
|
||||
|
||||
These can be determined at library compile time, see :ref:`target_arch`.
|
||||
|
||||
.. _software:
|
||||
|
||||
Software
|
||||
========
|
||||
|
||||
As a software library, Hyperscan doesn't impose any particular runtime
|
||||
software requirements, however to build the Hyperscan library we require a
|
||||
modern C and C++ compiler -- in particular, Hyperscan requires C99 and C++11
|
||||
compiler support. The supported compilers are:
|
||||
|
||||
* GCC, v4.8.1 or higher
|
||||
* Clang, v3.4 or higher (with libstdc++ or libc++)
|
||||
* Intel C++ Compiler v15 or higher
|
||||
|
||||
Examples of operating systems that Hyperscan is known to work on include:
|
||||
|
||||
Linux:
|
||||
|
||||
* Ubuntu 14.04 LTS or newer
|
||||
* RedHat/CentOS 7 or newer
|
||||
|
||||
FreeBSD:
|
||||
|
||||
* 10.0 or newer
|
||||
|
||||
Mac OS X:
|
||||
|
||||
* 10.8 or newer, using XCode/Clang
|
||||
|
||||
Hyperscan *may* compile and run on other platforms, but there is no guarantee.
|
||||
We currently have experimental support for Windows using Intel C++ Compiler
|
||||
or Visual Studio 2015.
|
||||
|
||||
In addition, the following software is required for compiling the Hyperscan library:
|
||||
|
||||
======================================================= =========== ======================================
|
||||
Dependency Version Notes
|
||||
======================================================= =========== ======================================
|
||||
`CMake <http://www.cmake.org/>`_ >=2.8.11
|
||||
`Ragel <http://www.colm.net/open-source/ragel/>`_ 6.9
|
||||
`Python <http://www.python.org/>`_ 2.7
|
||||
`Boost <http://boost.org/>`_ >=1.57 Boost headers required
|
||||
`Pcap <http://tcpdump.org>`_ >=0.8 Optional: needed for example code only
|
||||
======================================================= =========== ======================================
|
||||
|
||||
Most of these dependencies can be provided by the package manager on the build
|
||||
system (e.g. Debian/Ubuntu/RedHat packages, FreeBSD ports, etc). However,
|
||||
ensure that the correct version is present.
|
||||
|
||||
Boost Headers
|
||||
-------------
|
||||
|
||||
Compiling Hyperscan depends on a recent version of the Boost C++ header
|
||||
library. If the Boost libraries are installed on the build machine in the
|
||||
usual paths, CMake will find them. An alternative is to put a copy of (or a
|
||||
symlink to) the boost subdirectory in ``<hyperscan-source-path>/include/boost``.
|
||||
|
||||
For example: for the Boost-1.59.0 release: ::
|
||||
|
||||
ln -s boost_1_59_0/boost <hyperscan-source-path>/include/boost
|
||||
|
||||
As Hyperscan uses the header-only parts of Boost, it is not necessary to
|
||||
compile the Boost libraries.
|
||||
|
||||
CMake Configuration
|
||||
===================
|
||||
|
||||
When CMake is invoked, it generates build files using the given options.
|
||||
Options are passed to CMake in the form ``-D<variable name>=<value>``.
|
||||
Common options for CMake include:
|
||||
|
||||
+------------------------+----------------------------------------------------+
|
||||
| Variable | Description |
|
||||
+========================+====================================================+
|
||||
| CMAKE_C_COMPILER | C compiler to use. Default is /usr/bin/cc. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| CMAKE_CXX_COMPILER | C++ compiler to use. Default is /usr/bin/c++. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| CMAKE_INSTALL_PREFIX | Install directory for ``install`` target |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| CMAKE_BUILD_TYPE | Define which kind of build to generate. |
|
||||
| | Valid options are Debug, Release, RelWithDebInfo, |
|
||||
| | and MinSizeRel. Default is RelWithDebInfo. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| BUILD_SHARED_LIBS | Build Hyperscan as a shared library instead of |
|
||||
| | the default static library. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| BUILD_STATIC_AND_SHARED| Build both static and shared Hyperscan libs. |
|
||||
| | Default off. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
| DEBUG_OUTPUT | Enable very verbose debug output. Default off. |
|
||||
+------------------------+----------------------------------------------------+
|
||||
|
||||
For example, to generate a ``Debug`` build: ::
|
||||
|
||||
cd <build-dir>
|
||||
cmake -DCMAKE_BUILD_TYPE=Debug <hyperscan-source-path>
|
||||
|
||||
|
||||
|
||||
Build Type
|
||||
----------
|
||||
|
||||
CMake determines a number of features for a build based on the Build Type.
|
||||
Hyperscan defaults to ``RelWithDebInfo``, i.e. "release with debugging
|
||||
information". This is a performance optimized build without runtime assertions
|
||||
but with debug symbols enabled.
|
||||
|
||||
The other types of builds are:
|
||||
|
||||
* ``Release``: as above, but without debug symbols
|
||||
* ``MinSizeRel``: a stripped release build
|
||||
* ``Debug``: used when developing Hyperscan. Includes runtime assertions
|
||||
(which has a large impact on runtime performance), and will also enable
|
||||
some other build features like building internal unit
|
||||
tests.
|
||||
|
||||
.. _target_arch:
|
||||
|
||||
Target Architecture
|
||||
-------------------
|
||||
|
||||
By default, Hyperscan will be compiled to target the instruction set of the
|
||||
processor of the machine that being used for compilation. This is done via
|
||||
the use of ``-march=native``. The result of this means that a library built on
|
||||
one machine may not work on a different machine if they differ in supported
|
||||
instruction subsets.
|
||||
|
||||
To override the use of ``-march=native``, set appropriate flags for the
|
||||
compiler in ``CFLAGS`` and ``CXXFLAGS`` environment variables before invoking
|
||||
CMake, or ``CMAKE_C_FLAGS`` and ``CMAKE_CXX_FLAGS`` on the CMake command line. For
|
||||
example, to set the instruction subsets up to ``SSE4.2`` using GCC 4.8: ::
|
||||
|
||||
cmake -DCMAKE_C_FLAGS="-march=corei7" \
|
||||
-DCMAKE_CXX_FLAGS="-march=corei7" <hyperscan-source-path>
|
||||
|
||||
For more information, refer to :ref:`instr_specialization`.
|
||||
|
2383
doc/dev-reference/hyperscan.doxyfile.in
Normal file
2383
doc/dev-reference/hyperscan.doxyfile.in
Normal file
File diff suppressed because it is too large
Load Diff
20
doc/dev-reference/index.rst
Normal file
20
doc/dev-reference/index.rst
Normal file
@ -0,0 +1,20 @@
|
||||
###############################################
|
||||
Hyperscan |version| Developer's Reference Guide
|
||||
###############################################
|
||||
|
||||
-------
|
||||
|today|
|
||||
-------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
copyright
|
||||
preface
|
||||
intro
|
||||
getting_started
|
||||
compilation
|
||||
runtime
|
||||
performance
|
||||
api_constants
|
||||
api_files
|
78
doc/dev-reference/intro.rst
Normal file
78
doc/dev-reference/intro.rst
Normal file
@ -0,0 +1,78 @@
|
||||
.. include:: <isonum.txt>
|
||||
.. _intro:
|
||||
|
||||
############
|
||||
Introduction
|
||||
############
|
||||
|
||||
Hyperscan is a software regular expression matching engine designed with
|
||||
high performance and flexibility in mind. It is implemented as a library that
|
||||
exposes a straightforward C API.
|
||||
|
||||
The Hyperscan API itself is composed of two major components:
|
||||
|
||||
***********
|
||||
Compilation
|
||||
***********
|
||||
|
||||
These functions take a group of regular expressions, along with identifiers and
|
||||
option flags, and compile them into an immutable database that can be used by
|
||||
the Hyperscan scanning API. This compilation process performs considerable
|
||||
analysis and optimization work in order to build a database that will match the
|
||||
given expressions efficiently.
|
||||
|
||||
If a pattern cannot be built into a database for any reason (such as the use of
|
||||
an unsupported expression construct, or the overflowing of a resource limit),
|
||||
an error will be returned by the pattern compiler.
|
||||
|
||||
Compiled databases can be serialized and relocated, so that they can be stored
|
||||
to disk or moved between hosts. They can also be targeted to particular
|
||||
platform features (for example, the use of Intel\ |reg| Advanced Vector Extensions
|
||||
2 (Intel\ |reg| AVX2) instructions).
|
||||
|
||||
See :ref:`compilation` for more detail.
|
||||
|
||||
********
|
||||
Scanning
|
||||
********
|
||||
|
||||
Once a Hyperscan database has been created, it can be used to scan data in
|
||||
memory. Hyperscan provides several scanning modes, depending on whether the
|
||||
data to be scanned is available as a single contiguous block, whether it is
|
||||
distributed amongst several blocks in memory at the same time, or whether it is
|
||||
to be scanned as a sequence of blocks in a stream.
|
||||
|
||||
Matches are delivered to the application via a user-supplied callback function
|
||||
that is called synchronously for each match.
|
||||
|
||||
For a given database, Hyperscan provides several guarantees:
|
||||
|
||||
* No memory allocations occur at runtime with the exception of two
|
||||
fixed-size allocations, both of which should be done ahead of time for
|
||||
performance-critical applications:
|
||||
|
||||
- **Scratch space**: temporary memory used for internal data at scan time.
|
||||
Structures in scratch space do not persist beyond the end of a single scan
|
||||
call.
|
||||
- **Stream state**: in streaming mode only, some state space is required to
|
||||
store data that persists between scan calls for each stream. This allows
|
||||
Hyperscan to track matches that span multiple blocks of data.
|
||||
|
||||
* The sizes of the scratch space and stream state (in streaming mode) required
|
||||
for a given database are fixed and determined at database compile time. This
|
||||
means that the memory requirements of the application are known ahead of
|
||||
time, and these structures can be pre-allocated if required for performance
|
||||
reasons.
|
||||
|
||||
* Any pattern that has successfully been compiled by the Hyperscan compiler can
|
||||
be scanned against any input. There are no internal resource limits or other
|
||||
limitations at runtime that could cause a scan call to return an error.
|
||||
|
||||
See :ref:`runtime` for more detail.
|
||||
|
||||
************
|
||||
Example Code
|
||||
************
|
||||
|
||||
Some simple example code demonstrating the use of the Hyperscan API is
|
||||
available in the ``examples/`` subdirectory of the Hyperscan distribution.
|
335
doc/dev-reference/performance.rst
Normal file
335
doc/dev-reference/performance.rst
Normal file
@ -0,0 +1,335 @@
|
||||
.. _perf:
|
||||
|
||||
##########################
|
||||
Performance Considerations
|
||||
##########################
|
||||
|
||||
Hyperscan supports a wide range of patterns in all three scanning modes. It is
|
||||
capable of extremely high levels of performance, but certain patterns can
|
||||
reduce performance markedly.
|
||||
|
||||
The following guidelines will help construct patterns and pattern sets that
|
||||
will perform better:
|
||||
|
||||
*****************************
|
||||
Regular expression constructs
|
||||
*****************************
|
||||
|
||||
.. tip:: Do not hand-optimize regular expression constructs.
|
||||
|
||||
Quite a large number of regular expressions can be written in multiple ways.
|
||||
For example, caseless matching of :regexp:`/abc/` can be written as:
|
||||
|
||||
* :regexp:`/[Aa][Bb][Cc]/`
|
||||
* :regexp:`/(A|a)(B|b)(C|c)/`
|
||||
* :regexp:`/(?i)abc(?-i)/`
|
||||
* :regexp:`/abc/i`
|
||||
|
||||
Hyperscan is capable of handling all these constructs. Unless there is a
|
||||
specific reason otherwise, do not rewrite patterns from one form to another.
|
||||
|
||||
As another example, matching of :regexp:`/foo(bar|baz)(frotz)?/` can be
|
||||
equivalently written as:
|
||||
|
||||
* :regexp:`/foobarfrotz|foobazfrotz|foobar|foobaz/`
|
||||
|
||||
This change will not improve performance or reduce overheads.
|
||||
|
||||
*************
|
||||
Library usage
|
||||
*************
|
||||
|
||||
.. tip:: Do not hand-optimize library usage.
|
||||
|
||||
The Hyperscan library is capable of dealing with small writes, unusually large
|
||||
and small pattern sets, etc. Unless there is a specific performance problem
|
||||
with some usage of the library, it is best to use Hyperscan in a simple and
|
||||
direct fashion. For example, it is unlikely for there to be much benefit in
|
||||
buffering input to the library into larger blocks unless streaming writes are
|
||||
tiny (say, 1-2 bytes at a time).
|
||||
|
||||
Unlike many other pattern matching products, Hyperscan will run faster with
|
||||
small numbers of patterns and slower with large numbers of patterns in a smooth
|
||||
fashion (as opposed to, typically, running at a moderate speed up to some fixed
|
||||
limit then either breaking or running half as fast).
|
||||
|
||||
Hyperscan also provides high-throughput matching with a single thread of
|
||||
control per core; if a database runs at 3.0 Gbps in Hyperscan it means that a
|
||||
3000-bit block of data will be scanned in 1 microsecond in a single thread of
|
||||
control, not that it is required to scan 22 3000-bit blocks of data in 22
|
||||
microseconds. Thus, it is not usually necessary to buffer data to supply
|
||||
Hyperscan with available parallelism.
|
||||
|
||||
********************
|
||||
Block-based matching
|
||||
********************
|
||||
|
||||
.. tip:: Prefer block-based matching to streaming matching where possible.
|
||||
|
||||
Whenever input data appears in discrete records, or already requires some sort
|
||||
of transformation (e.g. URI normalization) that requires all the data to be
|
||||
accumulated before processing, it should be scanned in block rather than in
|
||||
streaming mode.
|
||||
|
||||
Unnecessary use of streaming mode reduces the number of optimizations that can
|
||||
be applied in Hyperscan and may make some patterns run slower.
|
||||
|
||||
If there is a mixture of 'block' and 'streaming' mode patterns, these should be
|
||||
scanned in separate databases except in the case that the streaming patterns
|
||||
vastly outnumber the block mode patterns.
|
||||
|
||||
*********************
|
||||
Unnecessary databases
|
||||
*********************
|
||||
|
||||
.. tip:: Avoid unnecessary 'union' databases.
|
||||
|
||||
If there are 5 different types of network traffic T1 through T5 that must
|
||||
be scanned against 5 different signature sets, it will be far more efficient to
|
||||
construct 5 separate databases and scan traffic against the appropriate one
|
||||
than it will be to merge all 5 signature sets and remove inappropriate matches
|
||||
after the fact.
|
||||
|
||||
This will be true even in the case where there is substantial overlap among the
|
||||
signatures. Only if the common subset of the signatures is overwhelmingly large
|
||||
(say, 90% of the signatures appear in all 5 traffic types) should a database
|
||||
that merges all 5 signature sets be considered, and only then if there are no
|
||||
performance issues with specific patterns that appear outside the common
|
||||
subset.
|
||||
|
||||
******************************
|
||||
Allocate scratch ahead of time
|
||||
******************************
|
||||
|
||||
.. tip:: Do not allocate scratch space for your pattern database just before
|
||||
calling a scan function. Instead, do it just after the pattern database is
|
||||
compiled or deserialized.
|
||||
|
||||
Scratch allocation is not necessarily a cheap operation. Since it is the first
|
||||
time (after compilation or deserialization) that a pattern database is used,
|
||||
Hyperscan performs some validation checks inside :c:func:`hs_alloc_scratch` and
|
||||
must also allocate memory.
|
||||
|
||||
Therefore, it is important to ensure that :c:func:`hs_alloc_scratch` is not
|
||||
called in the application's scanning path just before :c:func:`hs_scan` (for
|
||||
example).
|
||||
|
||||
Instead, scratch should be allocated immediately after a pattern database is
|
||||
compiled or deserialized, then retained for later scanning operations.
|
||||
|
||||
***********************************************
|
||||
Allocate one scratch space per scanning context
|
||||
***********************************************
|
||||
|
||||
.. tip:: A scratch space can be allocated so that it can be used with any one of
|
||||
a number of databases. Each concurrent scan operation (such as a thread)
|
||||
needs its own scratch space.
|
||||
|
||||
The :c:func:`hs_alloc_scratch` function can accept an existing scratch space and
|
||||
"grow" it to support scanning with another pattern database. This means that
|
||||
instead of allocating one scratch space for every database used by an
|
||||
application, one can call :c:func:`hs_alloc_scratch` with a pointer to the same
|
||||
:c:type:`hs_scratch_t` and it will be sized appropriately for use with any of
|
||||
the given databases. For example:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
hs_database_t *db1 = buildDatabaseOne();
|
||||
hs_database_t *db2 = buildDatabaseTwo();
|
||||
hs_database_t *db3 = buildDatabaseThree();
|
||||
|
||||
hs_error_t err;
|
||||
hs_scratch_t *scratch = NULL;
|
||||
err = hs_alloc_scratch(db1, &scratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
printf("hs_alloc_scratch failed!");
|
||||
exit(1);
|
||||
}
|
||||
err = hs_alloc_scratch(db2, &scratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
printf("hs_alloc_scratch failed!");
|
||||
exit(1);
|
||||
}
|
||||
err = hs_alloc_scratch(db3, &scratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
printf("hs_alloc_scratch failed!");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* scratch may now be used to scan against any of
|
||||
the databases db1, db2, db3. */
|
||||
|
||||
*****************
|
||||
Anchored patterns
|
||||
*****************
|
||||
|
||||
.. tip:: If a pattern is meant to appear at the start of data, be sure to
|
||||
anchor it.
|
||||
|
||||
Anchored patterns (:regexp:`/^.../`) are far simpler to match than other
|
||||
patterns, especially patterns anchored to the start of the buffer (or stream, in
|
||||
streaming mode). Anchoring patterns to the end of the buffer results in less of
|
||||
a performance gain, especially in streaming mode.
|
||||
|
||||
There are a variety of ways to anchor a pattern to a particular offset:
|
||||
|
||||
- The :regexp:`^` and :regexp:`\\A` constructs anchor the pattern to the start
|
||||
of the buffer. For example, :regexp:`/^foo/` can *only* match at offset 3.
|
||||
|
||||
- The :regexp:`$`, :regexp:`\\z` and :regexp:`\\Z` constructs anchor the pattern
|
||||
to the end of the buffer. For example, :regexp:`/foo\\z/` can only match when
|
||||
the data buffer being scanned ends in ``foo``. (It should be noted that
|
||||
:regexp:`$` and :regexp:`\\Z` will also match before a newline at the end of
|
||||
the buffer, so :regexp:`/foo\\z/` would match against either ``abc foo`` or
|
||||
``abc foo\n``.)
|
||||
|
||||
- The ``min_offset`` and ``max_offset`` extended parameters may also be used to
|
||||
constrain where a pattern could match. For example, the pattern
|
||||
:regexp:`/foo/` with a ``max_offset`` of 10 will only match at offsets less
|
||||
than or equal to 10 in the buffer. (This pattern could also be written as
|
||||
:regexp:`/^.{0,7}foo/`, compiled with the :c:member:`HS_FLAG_DOTALL` flag).
|
||||
|
||||
|
||||
*******************
|
||||
Matching everywhere
|
||||
*******************
|
||||
|
||||
.. tip:: Avoid patterns that match everywhere, and remember that our semantics
|
||||
are 'match everywhere, end of match only'.
|
||||
|
||||
Pattern that match everywhere will run slowly due to the sheer number of
|
||||
matches that they return.
|
||||
|
||||
Patterns like :regexp:`/.*/` in an automata-based matcher will match before and
|
||||
after every single character position, so a buffer with 100 characters will
|
||||
return 101 matches. Greedy pattern matchers such as libpcre will return a
|
||||
single match in this case, but our semantics is to return all matches. This is
|
||||
likely to be very expensive for our code and for the client code of the
|
||||
library.
|
||||
|
||||
Another result of our semantics ("match everywhere") is that patterns that have
|
||||
optional start or ending sections -- for example :regexp:`/x?abcd*/` -- may not
|
||||
perform as expected.
|
||||
|
||||
Firstly, the :regexp:`x?` portion of the pattern is unnecessary, as it will not
|
||||
affect the match results.
|
||||
|
||||
Secondly, the above pattern will match 'more' than :regexp:`/abc/` but
|
||||
:regexp:`/abc/` will always detect any input data that will be matched by
|
||||
:regexp:`/x?abcd*/` -- it will just produce fewer matches.
|
||||
|
||||
For example, input data ``0123abcdddd`` will match :regexp:`/abc/` once but
|
||||
:regexp:`/abcd*/` five times (at ``abc``, ``abcd``, ``abcdd``, ``abcddd``, and
|
||||
``abcdddd``).
|
||||
|
||||
*********************************
|
||||
Bounded repeats in streaming mode
|
||||
*********************************
|
||||
|
||||
.. tip:: Bounded repeats are expensive in streaming mode.
|
||||
|
||||
A bounded repeat construction such as :regexp:`/X.{1000,1001}abcd/` is extremely
|
||||
expensive in streaming mode, of necessity. It requires us to take action on
|
||||
each ``X`` character (itself expensive, relative to searching for longer strings)
|
||||
and potentially record a history of hundreds of offsets where ``X`` occurred in
|
||||
case the ``X`` and ``abcd`` characters are separated by a stream boundary.
|
||||
|
||||
Heavy and unnecessary use of bounded repeats should be avoided, especially
|
||||
where other parts of a signature are quite specific. For example, a virus
|
||||
signature that matches a virus payload may be sufficient without including a
|
||||
prefix that includes, for example, a 2-character Windows executable prefix and
|
||||
a bounded repeat beforehand.
|
||||
|
||||
***************
|
||||
Prefer literals
|
||||
***************
|
||||
|
||||
.. tip:: Where possible, prefer patterns which 'require' literals, especially
|
||||
longer literals, and in streaming mode, prefer signatures that 'require'
|
||||
literals earlier in the pattern.
|
||||
|
||||
Patterns which must match on a literal will run faster than patterns that do
|
||||
not. For example:
|
||||
|
||||
- :regexp:`/\\wab\\d*\\w\\w\\w/` will run faster than
|
||||
- :regexp:`/\\w\\w\\d*\\w\\w/`, or, for that matter
|
||||
- :regexp:`/\\w(abc)?\\d*\\w\\w\\w/` (this contains a literal but it need
|
||||
not appear in the input).
|
||||
|
||||
Even implicit literals are better than none: :regexp:`/[0-2][3-5].*\\w\\w/`
|
||||
still effectively contains 9 2-character literals. No hand-optimization of this
|
||||
case is required; this pattern will not run faster if rewritten as:
|
||||
:regexp:`/(03|04|05|13|14|15|23|24|25).*\\w\\w/`.
|
||||
|
||||
Under all circumstances it is better to use longer literals than shorter ones.
|
||||
A database consisting of 100 14-character literals will scan considerably
|
||||
faster than one consisting of 100 4-character literals and return fewer
|
||||
positives.
|
||||
|
||||
Additionally, in streaming mode, a signature that contains a longer literal
|
||||
early in the pattern is preferred to one that does not.
|
||||
|
||||
For example: :regexp:`/b\\w*foobar/` is not as good a pattern as
|
||||
:regexp:`/blah\\w*foobar/`.
|
||||
|
||||
The disparity between these patterns is much smaller in block mode.
|
||||
|
||||
Longer literals anywhere in the pattern are still preferred in streaming mode.
|
||||
For example, both of the above patterns are stronger and will scan faster than
|
||||
:regexp:`/b\\w*fo/` even in streaming mode.
|
||||
|
||||
**************
|
||||
"Dot all" mode
|
||||
**************
|
||||
|
||||
.. tip:: Use "dot all" mode where possible.
|
||||
|
||||
Not using the :c:member:`HS_FLAG_DOTALL` pattern flag can be expensive, as
|
||||
implicitly, it means that patterns of the form :regexp:`/A.*B/` become
|
||||
:regexp:`/A[^\\n]*B/`.
|
||||
|
||||
It is likely that scanning tasks without the DOTALL flag are better done 'line
|
||||
at a time', with the newline sequences marking the beginning and end of each
|
||||
block.
|
||||
|
||||
This will be true in most use-cases (an exception being where the DOTALL flag
|
||||
is off but the pattern contains either explicit newlines or constructs such as
|
||||
:regexp:`\\s` that implicitly match a newline character).
|
||||
|
||||
*****************
|
||||
Single-match flag
|
||||
*****************
|
||||
|
||||
.. tip:: Consider using the single-match flag to limit matches to one match per
|
||||
pattern only if possible.
|
||||
|
||||
If only one match per pattern is required, use the flag provided to indicate
|
||||
this (:c:member:`HS_FLAG_SINGLEMATCH`). This flag can allow a number of
|
||||
optimizations to be applied, allowing both performance improvements and state
|
||||
space reductions when streaming.
|
||||
|
||||
However, there is some overhead associated with tracking whether each pattern in
|
||||
the pattern set has matched, and some applications with infrequent matches may
|
||||
see reduced performance when the single-match flag is used.
|
||||
|
||||
********************
|
||||
Start of Match flag
|
||||
********************
|
||||
|
||||
.. tip:: Do not request Start of Match information if it is not not needed.
|
||||
|
||||
Start of Match (SOM) information can be expensive to gather and can require
|
||||
large amounts of stream state to store in streaming mode. As such, SOM
|
||||
information should only be requested with the :c:member:`HS_FLAG_SOM_LEFTMOST`
|
||||
flag for patterns that require it.
|
||||
|
||||
SOM information is not generally expected to be cheaper (in either performance
|
||||
terms or in stream state overhead) than the use of bounded repeats.
|
||||
Consequently, :regexp:`/foo.*bar/L` with a check on start of match values after
|
||||
the callback is considerably more expensive and general than
|
||||
:regexp:`/foo.{300}bar/`.
|
||||
|
||||
Similarly, the :c:member:`hs_expr_ext::min_length` extended parameter can be
|
||||
used to specify a lower bound on the length of the matches for a pattern. Using
|
||||
this facility may be more lightweight in some circumstances than using the SOM
|
||||
flag and post-confirming match length in the calling application.
|
47
doc/dev-reference/preface.rst
Normal file
47
doc/dev-reference/preface.rst
Normal file
@ -0,0 +1,47 @@
|
||||
#######
|
||||
Preface
|
||||
#######
|
||||
|
||||
********
|
||||
Overview
|
||||
********
|
||||
|
||||
Hyperscan is a regular expression engine designed to offer high performance, the
|
||||
ability to match multiple expressions simultaneously and flexibility in
|
||||
scanning operation.
|
||||
|
||||
Patterns are provided to a compilation interface which generates an immutable
|
||||
pattern database. The scan interface then can be used to scan a target data
|
||||
buffer for the given patterns, returning any matching results from that data
|
||||
buffer. Hyperscan also provides a streaming mode, in which matches that span
|
||||
several blocks in a stream are detected.
|
||||
|
||||
This document is designed to facilitate code-level integration of the Hyperscan
|
||||
library with existing or new applications.
|
||||
|
||||
:ref:`intro` is a short overview of the Hyperscan library, with more detail on
|
||||
the Hyperscan API provided in the subsequent sections: :ref:`compilation` and
|
||||
:ref:`runtime`.
|
||||
|
||||
:ref:`perf` provides details on various factors which may impact the
|
||||
performance of a Hyperscan integration.
|
||||
|
||||
:ref:`api_constants` and :ref:`api_files` provides a detailed summary of the
|
||||
Hyperscan Application Programming Interface (API).
|
||||
|
||||
********
|
||||
Audience
|
||||
********
|
||||
|
||||
This guide is aimed at developers interested in integrating Hyperscan into an
|
||||
application. For information on building the Hyperscan library, see the Quick
|
||||
Start Guide.
|
||||
|
||||
***********
|
||||
Conventions
|
||||
***********
|
||||
|
||||
* Text in a ``fixed-width font`` refers to a code element, e.g. type name;
|
||||
function or method name.
|
||||
* Text in a :regexp:`coloured fixed-width font` refers to a regular
|
||||
expression or a part of a regular expression.
|
198
doc/dev-reference/runtime.rst
Normal file
198
doc/dev-reference/runtime.rst
Normal file
@ -0,0 +1,198 @@
|
||||
.. _runtime:
|
||||
|
||||
#####################
|
||||
Scanning for Patterns
|
||||
#####################
|
||||
|
||||
Hyperscan provides three different scanning modes, each with its own scan
|
||||
function beginning with ``hs_scan``. In addition, streaming mode has a number
|
||||
of other API functions for managing stream state.
|
||||
|
||||
****************
|
||||
Handling Matches
|
||||
****************
|
||||
|
||||
All of these functions will call a user-supplied callback function when a match
|
||||
is found. This function has the following signature:
|
||||
|
||||
.. doxygentypedef:: match_event_handler
|
||||
:outline:
|
||||
:no-link:
|
||||
|
||||
The *id* argument will be set to the identifier for the matching expression
|
||||
provided at compile time, and the *to* argument will be set to the end-offset
|
||||
of the match. If SOM was requested for the pattern (see :ref:`som`), the
|
||||
*from* argument will be set to the leftmost possible start-offset for the match.
|
||||
|
||||
The match callback function has the capability to halt scanning
|
||||
by returning a non-zero value.
|
||||
|
||||
See :c:type:`match_event_handler` for more information.
|
||||
|
||||
**************
|
||||
Streaming Mode
|
||||
**************
|
||||
|
||||
The streaming runtime API consists of functions to open, scan, and close
|
||||
Hyperscan data streams -- these functions being :c:func:`hs_open_stream`,
|
||||
:c:func:`hs_scan_stream`, and :c:func:`hs_close_stream`. Any matches detected
|
||||
in the written data are returned to the calling application via a function
|
||||
pointer callback.
|
||||
|
||||
The match callback function has the capability to halt scanning of the current
|
||||
data stream by returning a non-zero value. In streaming mode, the result of
|
||||
this is that the stream is then left in a state where no more data can be
|
||||
scanned, and any subsequent calls to :c:func:`hs_scan_stream` for that stream
|
||||
will return immediately with :c:member:`HS_SCAN_TERMINATED`. The caller must
|
||||
still call :c:func:`hs_close_stream` to complete the clean-up process for that
|
||||
stream.
|
||||
|
||||
Streams exist in the Hyperscan library so that pattern matching state can be
|
||||
maintained across multiple blocks of target data -- without maintaining this
|
||||
state, it would not be possible to detect patterns that span these blocks of
|
||||
data. This, however, does come at the cost of requiring an amount of storage
|
||||
per-stream (the size of this storage is fixed at compile time), and a slight
|
||||
performance penalty in some cases to manage the state.
|
||||
|
||||
While Hyperscan does always support a strict ordering of multiple matches,
|
||||
streaming matches will not be delivered at offsets before the current stream
|
||||
write, with the exception of zero-width asserts, where constructs such as
|
||||
:regexp:`\\b` and :regexp:`$` can cause a match on the final character of a
|
||||
stream write to be delayed until the next stream write or stream close
|
||||
operation.
|
||||
|
||||
=================
|
||||
Stream Management
|
||||
=================
|
||||
|
||||
In addition to :c:func:`hs_open_stream`, :c:func:`hs_scan_stream`, and
|
||||
:c:func:`hs_close_stream`, the Hyperscan API provides a number of other
|
||||
functions for the management of streams:
|
||||
|
||||
* :c:func:`hs_reset_stream`: resets a stream to its initial state; this is
|
||||
equivalent to calling :c:func:`hs_close_stream` but will not free the memory
|
||||
used for stream state.
|
||||
|
||||
* :c:func:`hs_copy_stream`: constructs a (newly allocated) duplicate of a
|
||||
stream.
|
||||
|
||||
* :c:func:`hs_reset_and_copy_stream`: constructs a duplicate of a stream into
|
||||
another, resetting the destination stream first. This call avoids the
|
||||
allocation done by :c:func:`hs_copy_stream`.
|
||||
|
||||
**********
|
||||
Block Mode
|
||||
**********
|
||||
|
||||
The block mode runtime API consists of a single function: :c:func:`hs_scan`. Using
|
||||
the compiled patterns this function identifies matches in the target data,
|
||||
using a function pointer callback to communicate with the application.
|
||||
|
||||
This single :c:func:`hs_scan` function is essentially equivalent to calling
|
||||
:c:func:`hs_open_stream`, making a single call to :c:func:`hs_scan_stream`, and
|
||||
then :c:func:`hs_close_stream`, except that block mode operation does not
|
||||
incur all the stream related overhead.
|
||||
|
||||
*************
|
||||
Vectored Mode
|
||||
*************
|
||||
|
||||
The vectored mode runtime API, like the block mode API, consists of a single
|
||||
function: :c:func:`hs_scan_vector`. This function accepts an array of data
|
||||
pointers and lengths, facilitating the scanning in sequence of a set of data
|
||||
blocks that are not contiguous in memory.
|
||||
|
||||
From the caller's perspective, this mode will produce the same matches as if
|
||||
the set of data blocks were (a) scanned in sequence with a series of streaming
|
||||
mode scans, or (b) copied in sequence into a single block of memory and then
|
||||
scanned in block mode.
|
||||
|
||||
*************
|
||||
Scratch Space
|
||||
*************
|
||||
|
||||
While scanning data, Hyperscan needs a small amount of temporary memory to store
|
||||
on-the-fly internal data. This amount is unfortunately too large to fit on the
|
||||
stack, particularly for embedded applications, and allocating memory dynamically
|
||||
is too expensive, so a pre-allocated "scratch" space must be provided to the
|
||||
scanning functions.
|
||||
|
||||
The function :c:func:`hs_alloc_scratch` allocates a large enough region of
|
||||
scratch space to support a given database. If the application uses multiple
|
||||
databases, only a single scratch region is necessary: in this case, calling
|
||||
:c:func:`hs_alloc_scratch` on each database (with the same ``scratch`` pointer)
|
||||
will ensure that the scratch space is large enough to support scanning against
|
||||
any of the given databases.
|
||||
|
||||
Importantly, only one such space is required per thread and can (and indeed
|
||||
should) be allocated before data scanning is to commence. In a scenario where a
|
||||
set of expressions are compiled by a single "master" thread and data will be
|
||||
scanned by multiple "worker" threads, the convenience function
|
||||
:c:func:`hs_clone_scratch` allows multiple copies of an existing scratch space
|
||||
to be made for each thread (rather than forcing the caller to pass all the
|
||||
compiled databases through :c:func:`hs_alloc_scratch` multiple times).
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
hs_error_t err;
|
||||
hs_scratch_t *scratch_prototype = NULL;
|
||||
err = hs_alloc_scratch(db, &scratch_prototype);
|
||||
if (err != HS_SUCCESS) {
|
||||
printf("hs_alloc_scratch failed!");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
hs_scratch_t *scratch_thread1 = NULL;
|
||||
hs_scratch_t *scratch_thread2 = NULL;
|
||||
|
||||
err = hs_clone_scratch(scratch_prototype, &scratch_thread1);
|
||||
if (err != HS_SUCCESS) {
|
||||
printf("hs_clone_scratch failed!");
|
||||
exit(1);
|
||||
}
|
||||
err = hs_clone_scratch(scratch_prototype, &scratch_thread2);
|
||||
if (err != HS_SUCCESS) {
|
||||
printf("hs_clone_scratch failed!");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
hs_free_scratch(scratch_prototype);
|
||||
|
||||
/* Now two threads can both scan against database db,
|
||||
each with its own scratch space. */
|
||||
|
||||
While the Hyperscan library is re-entrant, the use of scratch spaces is not.
|
||||
For example, if by design it is deemed necessary to run recursive or nested
|
||||
scanning (say, from the match callback function), then an additional scratch
|
||||
space is required for that context.
|
||||
|
||||
The easiest way to achieve this is to build up a single scratch space as a
|
||||
prototype, then clone it for each context:
|
||||
|
||||
*****************
|
||||
Custom Allocators
|
||||
*****************
|
||||
|
||||
By default, structures used by Hyperscan at runtime (scratch space, stream
|
||||
state, etc) are allocated with the default system allocators, usually
|
||||
``malloc()`` and ``free()``.
|
||||
|
||||
The Hyperscan API provides a facility for changing this behaviour to support
|
||||
applications that use custom memory allocators.
|
||||
|
||||
These functions are:
|
||||
|
||||
- :c:func:`hs_set_database_allocator`, which sets the allocate and free functions
|
||||
used for compiled pattern databases.
|
||||
- :c:func:`hs_set_scratch_allocator`, which sets the allocate and free
|
||||
functions used for scratch space.
|
||||
- :c:func:`hs_set_stream_allocator`, which sets the allocate and free functions
|
||||
used for stream state in streaming mode.
|
||||
- :c:func:`hs_set_misc_allocator`, which sets the allocate and free functions
|
||||
used for miscellaneous data, such as compile error structures and
|
||||
informational strings.
|
||||
|
||||
The :c:func:`hs_set_allocator` function can be used to set all of the custom
|
||||
allocators to the same allocate/free pair.
|
24
examples/CMakeLists.txt
Normal file
24
examples/CMakeLists.txt
Normal file
@ -0,0 +1,24 @@
|
||||
find_library(PCAP_LIBRARY pcap)
|
||||
|
||||
if (NOT PCAP_LIBRARY)
|
||||
message(STATUS "Could not find libpcap - some examples will not be built")
|
||||
endif()
|
||||
|
||||
add_executable(simplegrep simplegrep.c)
|
||||
set_source_files_properties(simplegrep.c PROPERTIES COMPILE_FLAGS
|
||||
"-Wall -Wno-unused-parameter")
|
||||
target_link_libraries(simplegrep hs)
|
||||
|
||||
if (PCAP_LIBRARY)
|
||||
add_executable(pcapscan pcapscan.cc)
|
||||
set_source_files_properties(pcapscan.cc PROPERTIES COMPILE_FLAGS
|
||||
"-Wall -Wno-unused-parameter")
|
||||
target_link_libraries(pcapscan hs pcap)
|
||||
endif()
|
||||
|
||||
if (PCAP_LIBRARY)
|
||||
add_executable(patbench patbench.cc)
|
||||
set_source_files_properties(patbench.cc PROPERTIES COMPILE_FLAGS
|
||||
"-Wall -Wno-unused-parameter")
|
||||
target_link_libraries(patbench hs pcap)
|
||||
endif()
|
155
examples/README.md
Normal file
155
examples/README.md
Normal file
@ -0,0 +1,155 @@
|
||||
Hyperscan Example Code
|
||||
======================
|
||||
|
||||
Copyright (C) 2015 Intel Corporation. All rights reserved.
|
||||
|
||||
The files in this directory contain example code demonstrating the use of the
|
||||
Hyperscan regular expression matching library. The examples have been
|
||||
constructed to be useful utility programs, but they have been simplified
|
||||
somewhat, so generally contain "shortcuts" that one would not take if building
|
||||
a "real" system.
|
||||
|
||||
The examples each contain a short description in a comment at the top of the
|
||||
file, including build instructions.
|
||||
|
||||
---
|
||||
|
||||
|
||||
Example 1: simplegrep
|
||||
---------------------
|
||||
|
||||
The first example program (`simplegrep.c`) is modelled on the ubiquitous grep
|
||||
tool to search a file for a single regular expression. 'simplegrep' does the
|
||||
same, but eschews a lot of grep's complexity: it is unable to read data from
|
||||
`stdin`, and doesn't support grep's plethora of command-line arguments.
|
||||
|
||||
This code is intended to be simple portable C99.
|
||||
|
||||
simplegrep demonstrates the following Hyperscan concepts:
|
||||
|
||||
- Single pattern compilation: As simplegrep can scan for one pattern only, it
|
||||
uses the `hs_compile` function instead of the multi-pattern variant:
|
||||
`hs_compile_multi`.
|
||||
|
||||
- Block mode pattern-matching: simplegrep will search a single data buffer
|
||||
for the given pattern, so it has no need to set up and tear down streams.
|
||||
(See the next section for a streaming mode example)
|
||||
|
||||
- Scratch space allocation and use: Hyperscan requires a small amount of
|
||||
temporary memory that is used in the `hs_scan` call. The caller needs to
|
||||
guarantee that only one instance of `hs_scan` is using the scratch space at a
|
||||
time, but there is no requirement that the same scratch area be used on
|
||||
consecutive calls to `hs_scan`. Given that it is expensive to allocate the
|
||||
scratch space, one would typically allocate all necessary scratch space at
|
||||
system startup and reuse it throughout execution of the program.
|
||||
|
||||
|
||||
Example 2: pcapscan
|
||||
-------------------
|
||||
|
||||
The second example program (`pcapscan.cc`) is a very simple packet scanning
|
||||
benchmark. It scans a given PCAP file full of network traffic against a group
|
||||
of regular expressions and returns some coarse performance measurements. This
|
||||
example provides a quick way to examine the performance achievable on a
|
||||
particular combination of platform, pattern set and input data.
|
||||
|
||||
In block mode, pcapscan scans each packet individually against a Hyperscan
|
||||
database. In streaming mode, pcapscan assigns packets to flows using a
|
||||
rudimentary connection tracker, then scans the packets in each flow with
|
||||
Hyperscan's streaming mode interface. This demonstrates the use of streaming
|
||||
mode operation to detect matches that straddle packet boundaries.
|
||||
|
||||
**Note**: the flow assignment implemented here is intended as a simple demo; it
|
||||
merely ensures that packets with the same 5-tuple are written to the same
|
||||
stream in the order in which they appear in the PCAP file. No packet
|
||||
re-ordering or connection state tracking (as you would expect to find in a real
|
||||
network scanning application) is done.
|
||||
|
||||
pcapscan introduces the following Hyperscan concepts:
|
||||
|
||||
- Multi-pattern compilation: Unlike simplegrep, pcapscan requires a file of
|
||||
expressions as input instead of a single pattern. pcapscan will read this
|
||||
file in, one pattern per line, and use it as input to the `hs_compile_multi`
|
||||
function. This function generates a pattern database that will match all the
|
||||
input patterns in parallel.
|
||||
|
||||
- Streamed pattern-matching: pcapscan uses the `hs_scan_stream` function
|
||||
(instead of the block-mode `hs_scan` call) to allow it to identify matches
|
||||
that occur in a stream of data, even if they straddle the boundaries between blocks.
|
||||
Streaming mode operation has a number of unique properties:
|
||||
|
||||
- Stream state that persists for the lifetime of the stream must be allocated
|
||||
with the `hs_open_stream` function before scanning can take place.
|
||||
Similarly, it must be freed with `hs_close_stream` after it is no longer
|
||||
needed. Each stream being scanned concurrently requires its own stream
|
||||
state.
|
||||
|
||||
- In streaming mode, a non-zero return from the user-specified event-handler
|
||||
function has consequences for the rest of that stream's lifetime: when a
|
||||
non-zero return occurs, it signals that no more of the stream should be
|
||||
scanned. Consequently if the user makes a subsequent call to
|
||||
`hs_scan_stream` on a stream whose processing was terminated in this way,
|
||||
hs_scan_stream will return `HS_SCAN_TERMINATED`. This case has not been
|
||||
demonstrated in pcapscan, as its callback always returns 0.
|
||||
|
||||
- Match handling during stream shutdown: As matches may occur when the
|
||||
`hs_close_stream` function is called, it too must be provided with scratch
|
||||
space in order to perform this match processing. Similarly, the user must
|
||||
be prepared to be issued match event callbacks during the `hs_close_stream`
|
||||
call. For this reason, we advise that stream shutdown be an integral part
|
||||
of the system design.
|
||||
|
||||
|
||||
Example 3: patbench
|
||||
-------------------
|
||||
|
||||
This program allows users to detect which signatures may be the most expensive
|
||||
in a set of patterns. It is designed for use with small to medium pattern set
|
||||
sizes (e.g. 5-500). If used with very large pattern sets it may take a very
|
||||
long time - the number of recompiles done is `g * O(lg2(n))` where `g` is the
|
||||
number of generations and `n` is the number of patterns (assuming that `n >>
|
||||
g`).
|
||||
|
||||
This utility will return a cumulative series of removed patterns. The first
|
||||
generation will find and remove a single pattern. The second generation will
|
||||
begin with the first pattern removed and find another pattern to remove, etc.
|
||||
So if we have 100 patterns and 15 generations, the final generation's score
|
||||
will be a run over 85 patterns.
|
||||
|
||||
This utility is probabilistic. It is possible that the pattern removed in a
|
||||
generation is not a particularly expensive pattern. To reduce noise in the
|
||||
results use 'taskset' and set the number of repeats to a level that still
|
||||
completes in reasonable time (this will reduce the effect of random measurement
|
||||
noise).
|
||||
|
||||
The criterion for performance can be altered by use of the `-C<x>` flag where
|
||||
`<x>` can be `t,r,s,c,b`, selecting pattern matching throughput, scratch size,
|
||||
stream state size (only available in streaming mode), compile time and bytecode
|
||||
size respectively.
|
||||
|
||||
This utility will also not produce good results if all the patterns are roughly
|
||||
equally expensive.
|
||||
|
||||
### Factor Group Size:
|
||||
|
||||
If there are multiple expensive patterns that are very similar on the
|
||||
left-hand-side or identical, this utility will typically not find these groups
|
||||
unless the `-F` flag is used to search for a group size that is equal to or
|
||||
larger than the size of the group of similar patterns.
|
||||
|
||||
Otherwise, removing a portion of the similar patterns will have no or almost no
|
||||
effect, and the search procedure used relies on the ability to remove all of
|
||||
the similar patterns in at least one search case, something which will only
|
||||
happen if the `factor_group_size` is large enough.
|
||||
|
||||
This alters the operation of the tool so that instead of trying to find the
|
||||
single pattern whose removal has the most effect by binary search (the default
|
||||
with `factor_group_size == 1`), we attempt to find the N patterns whose removal
|
||||
has the most effect by searching over `N + 1` evenly sized groups, removing
|
||||
only `1/(N + 1)` of the search signatures per iteration.
|
||||
|
||||
Note that the number of recompiles done greatly increases with increased factor
|
||||
group size. For example, with `factor_group_size = 1`, we do `g * 2 * lg2(n)`
|
||||
recompiles, while with `factor_group_size = 4`, we do `g * 4 * log(5/4)(n)`.
|
||||
Informally the number of generations we require goes up as we eliminate a
|
||||
smaller number of signatures and the we have to do more work per generation.
|
892
examples/patbench.cc
Normal file
892
examples/patbench.cc
Normal file
@ -0,0 +1,892 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Hyperscan pattern benchmarker.
|
||||
*
|
||||
* This program allows users to detect which signatures may be the most
|
||||
* expensive in a set of patterns. It is designed for use with small to medium
|
||||
* pattern set sizes (e.g. 5-500). If used with very large pattern sets it may
|
||||
* take a very long time - the number of recompiles done is g * O(lg2(n)) where
|
||||
* g is the number of generations and n is the number of patterns (assuming
|
||||
* that n >> g).
|
||||
*
|
||||
* This utility will return a cumulative series of removed patterns. The first
|
||||
* generation will find and remove a single pattern. The second generation will
|
||||
* begin with the first pattern removed and find another pattern to remove,
|
||||
* etc. So if we have 100 patterns and 15 generations, the final generation's
|
||||
* score will be a run over 85 patterns.
|
||||
*
|
||||
* This utility is probabilistic. It is possible that the pattern removed in a
|
||||
* generation is not a particularly expensive pattern. To reduce noise in the
|
||||
* results use 'taskset' and set the number of repeats to a level that still
|
||||
* completes in reasonable time (this will reduce the effect of random
|
||||
* measurement noise).
|
||||
*
|
||||
* The criterion for performance can be altered by use of the -C<x> flag where
|
||||
* <x> can be t,r,s,c,b, selecting pattern matching throughput, scratch size,
|
||||
* stream state size (only available in streaming mode), compile time and
|
||||
* bytecode size respectively.
|
||||
*
|
||||
* This utility will also not produce good results if all the patterns are
|
||||
* roughly equally expensive.
|
||||
*
|
||||
* Factor Group Size:
|
||||
*
|
||||
* If there are multiple expensive patterns that are very similar on the
|
||||
* left-hand-side or identical, this utility will typically not find these
|
||||
* groups unless the -F flag is used to search for a group size that is equal
|
||||
* to or larger than the size of the group of similar patterns.
|
||||
*
|
||||
* Otherwise, removing a portion of the similar patterns will have no or almost
|
||||
* no effect, and the search procedure used relies on the ability to remove all
|
||||
* of the similar patterns in at least one search case, something which will
|
||||
* only happen if the factor_group_size is large enough.
|
||||
*
|
||||
* This alters the operation of our tool so that instead of trying to find the
|
||||
* single pattern whose removal has the most effect by binary search (the
|
||||
* default with factor_group_size == 1), we attempt to find the N patterns
|
||||
* whose removal has the most effect by searching over N+1 evenly sized groups,
|
||||
* removing only 1/(N+1) of the search signatures per iteration.
|
||||
*
|
||||
* Note that the number of recompiles done greatly increases with increased
|
||||
* factor group size. For example, with factor_group_size = 1, we do g * 2 *
|
||||
* lg2(n) recompiles, while with factor_group_size = 4, we do g * 4 *
|
||||
* log(5/4)(n). Informally the number of generations we require goes up as we
|
||||
* eliminate a smaller number of signatures and the we have to do more work per
|
||||
* generation.
|
||||
*
|
||||
*
|
||||
* Build instructions:
|
||||
*
|
||||
* g++ -o patbench patbench.cc $(pkg-config --cflags --libs libhs) -lpcap
|
||||
*
|
||||
* Usage:
|
||||
*
|
||||
* ./patbench [ -n repeats] [ -G generations] [ -C criterion ]
|
||||
* [ -F factor_group_size ] [ -N | -S ] <pattern file> <pcap file>
|
||||
*
|
||||
* -n repeats sets the number of times the PCAP is repeatedly scanned
|
||||
* with the pattern
|
||||
* -G generations sets the number of generations that the algorithm is
|
||||
* run for
|
||||
* -N sets non-streaming mode, -S sets streaming mode (default)
|
||||
* -F sets the factor group size (must be >0); this allows the detection
|
||||
* of multiple interacting factors
|
||||
*
|
||||
* -C sets the "criterion", which can be either:
|
||||
* t throughput (the default) - this requires a pcap file
|
||||
* r scratch size
|
||||
* s stream state size
|
||||
* c compile time
|
||||
* b bytecode size
|
||||
*
|
||||
* We recommend the use of a utility like 'taskset' on multiprocessor hosts to
|
||||
* lock execution to a single processor: this will remove processor migration
|
||||
* by the scheduler as a source of noise in the results.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
// We use the BSD primitives throughout as they exist on both BSD and Linux.
|
||||
#define __FAVOR_BSD
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_systm.h>
|
||||
#include <netinet/ip.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <netinet/udp.h>
|
||||
#include <netinet/ip_icmp.h>
|
||||
#include <net/ethernet.h>
|
||||
#include <arpa/inet.h>
|
||||
|
||||
#include <pcap.h>
|
||||
|
||||
#include <hs.h>
|
||||
|
||||
using std::cerr;
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
using std::ifstream;
|
||||
using std::string;
|
||||
using std::unordered_map;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::min;
|
||||
using std::max;
|
||||
using std::copy;
|
||||
|
||||
enum Criterion {
|
||||
CRITERION_THROUGHPUT,
|
||||
CRITERION_BYTECODE_SIZE,
|
||||
CRITERION_COMPILE_TIME,
|
||||
CRITERION_STREAM_STATE,
|
||||
CRITERION_SCRATCH_SIZE
|
||||
};
|
||||
|
||||
static bool higher_is_better(Criterion c) {
|
||||
return c == CRITERION_THROUGHPUT;
|
||||
}
|
||||
|
||||
static void print_criterion(Criterion c, double val) {
|
||||
switch (c) {
|
||||
case CRITERION_THROUGHPUT:
|
||||
cout << std::fixed << std::setprecision(3) << val << " Megabits/s";
|
||||
break;
|
||||
case CRITERION_COMPILE_TIME:
|
||||
cout << std::fixed << std::setprecision(3) << val << " seconds";
|
||||
break;
|
||||
case CRITERION_BYTECODE_SIZE:
|
||||
case CRITERION_STREAM_STATE:
|
||||
case CRITERION_SCRATCH_SIZE:
|
||||
default:
|
||||
cout << static_cast<size_t>(val) << " bytes";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Key for identifying a stream in our pcap input data, using data from its IP
|
||||
// headers.
|
||||
struct FiveTuple {
|
||||
unsigned int protocol;
|
||||
unsigned int srcAddr;
|
||||
unsigned int srcPort;
|
||||
unsigned int dstAddr;
|
||||
unsigned int dstPort;
|
||||
|
||||
// Construct a FiveTuple from a TCP or UDP packet.
|
||||
FiveTuple(const struct ip *iphdr) {
|
||||
// IP fields
|
||||
protocol = iphdr->ip_p;
|
||||
srcAddr = iphdr->ip_src.s_addr;
|
||||
dstAddr = iphdr->ip_dst.s_addr;
|
||||
|
||||
// UDP/TCP ports
|
||||
const struct udphdr *uh = (const struct udphdr *)
|
||||
(((const char *)iphdr) + (iphdr->ip_hl * 4));
|
||||
srcPort = uh->uh_sport;
|
||||
dstPort = uh->uh_dport;
|
||||
}
|
||||
|
||||
bool operator==(const FiveTuple &a) const {
|
||||
return protocol == a.protocol && srcAddr == a.srcAddr &&
|
||||
srcPort == a.srcPort && dstAddr == a.dstAddr &&
|
||||
dstPort == a.dstPort;
|
||||
}
|
||||
};
|
||||
|
||||
// A *very* simple hash function, used when we create an unordered_map of
|
||||
// FiveTuple objects.
|
||||
struct FiveTupleHash {
|
||||
size_t operator()(const FiveTuple &x) const {
|
||||
return x.srcAddr ^ x.dstAddr ^ x.protocol ^ x.srcPort ^ x.dstPort;
|
||||
}
|
||||
};
|
||||
|
||||
// Helper function. See end of file.
|
||||
static bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset,
|
||||
unsigned int *length);
|
||||
|
||||
// Match event handler: called every time Hyperscan finds a match.
|
||||
static
|
||||
int onMatch(unsigned int id, unsigned long long from, unsigned long long to,
|
||||
unsigned int flags, void *ctx) {
|
||||
// Our context points to a size_t storing the match count
|
||||
size_t *matches = (size_t *)ctx;
|
||||
(*matches)++;
|
||||
return 0; // continue matching
|
||||
}
|
||||
|
||||
// Simple timing class
|
||||
class Clock {
|
||||
public:
|
||||
void start() {
|
||||
time_start = std::chrono::system_clock::now();
|
||||
}
|
||||
|
||||
void stop() {
|
||||
time_end = std::chrono::system_clock::now();
|
||||
}
|
||||
|
||||
double seconds() const {
|
||||
std::chrono::duration<double> delta = time_end - time_start;
|
||||
return delta.count();
|
||||
}
|
||||
private:
|
||||
std::chrono::time_point<std::chrono::system_clock> time_start, time_end;
|
||||
};
|
||||
|
||||
// Class wrapping all state associated with the benchmark
|
||||
class Benchmark {
|
||||
private:
|
||||
// Packet data to be scanned
|
||||
vector<string> packets;
|
||||
|
||||
// Stream ID for each packet
|
||||
vector<size_t> stream_ids;
|
||||
|
||||
// Map used to construct stream_ids
|
||||
unordered_map<FiveTuple, size_t, FiveTupleHash> stream_map;
|
||||
|
||||
// Hyperscan compiled database
|
||||
hs_database_t *db = nullptr;
|
||||
|
||||
// Hyperscan temporary scratch space
|
||||
hs_scratch_t *scratch = nullptr;
|
||||
|
||||
// Vector of Hyperscan stream state
|
||||
vector<hs_stream_t *> streams;
|
||||
|
||||
// Count of matches found while scanning
|
||||
size_t matchCount = 0;
|
||||
public:
|
||||
~Benchmark() {
|
||||
hs_free_scratch(scratch);
|
||||
hs_free_database(db);
|
||||
}
|
||||
|
||||
// Initialisation; after this call, Benchmark owns the database and will
|
||||
// ensure it is freed.
|
||||
void setDatabase(hs_database_t *hs_db) {
|
||||
hs_free_database(db); // Free previous database.
|
||||
db = hs_db;
|
||||
// (Re)allocate scratch to ensure that it is large enough to handle the
|
||||
// database.
|
||||
hs_error_t err = hs_alloc_scratch(db, &scratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: could not allocate scratch space. Exiting." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
const hs_database_t *getDatabase() const {
|
||||
return db;
|
||||
}
|
||||
|
||||
size_t getScratchSize() const {
|
||||
size_t scratch_size;
|
||||
hs_error_t err = hs_scratch_size(scratch, &scratch_size);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: could not query scratch space size. Exiting."
|
||||
<< endl;
|
||||
exit(-1);
|
||||
}
|
||||
return scratch_size;
|
||||
}
|
||||
|
||||
// Read a set of streams from a pcap file
|
||||
bool readStreams(const char *pcapFile) {
|
||||
// Open PCAP file for input
|
||||
char errbuf[PCAP_ERRBUF_SIZE];
|
||||
pcap_t *pcapHandle = pcap_open_offline(pcapFile, errbuf);
|
||||
if (pcapHandle == nullptr) {
|
||||
cerr << "ERROR: Unable to open pcap file \"" << pcapFile
|
||||
<< "\": " << errbuf << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
struct pcap_pkthdr pktHeader;
|
||||
const unsigned char *pktData;
|
||||
while ((pktData = pcap_next(pcapHandle, &pktHeader)) != nullptr) {
|
||||
unsigned int offset = 0, length = 0;
|
||||
if (!payloadOffset(pktData, &offset, &length)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Valid TCP or UDP packet
|
||||
const struct ip *iphdr = (const struct ip *)(pktData
|
||||
+ sizeof(struct ether_header));
|
||||
const char *payload = (const char *)pktData + offset;
|
||||
|
||||
size_t id = stream_map.insert(std::make_pair(FiveTuple(iphdr),
|
||||
stream_map.size())).first->second;
|
||||
|
||||
packets.push_back(string(payload, length));
|
||||
stream_ids.push_back(id);
|
||||
}
|
||||
pcap_close(pcapHandle);
|
||||
|
||||
return !packets.empty();
|
||||
}
|
||||
|
||||
// Return the number of bytes scanned
|
||||
size_t bytes() const {
|
||||
size_t sum = 0;
|
||||
for (const auto &packet : packets) {
|
||||
sum += packet.size();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
// Return the number of matches found.
|
||||
size_t matches() const {
|
||||
return matchCount;
|
||||
}
|
||||
|
||||
// Clear the number of matches found.
|
||||
void clearMatches() {
|
||||
matchCount = 0;
|
||||
}
|
||||
|
||||
// Open a Hyperscan stream for each stream in stream_ids
|
||||
void openStreams() {
|
||||
streams.resize(stream_map.size());
|
||||
for (auto &stream : streams) {
|
||||
hs_error_t err = hs_open_stream(db, 0, &stream);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: Unable to open stream. Exiting." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close all open Hyperscan streams (potentially generating any
|
||||
// end-anchored matches)
|
||||
void closeStreams() {
|
||||
for (auto &stream : streams) {
|
||||
hs_error_t err =
|
||||
hs_close_stream(stream, scratch, onMatch, &matchCount);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: Unable to close stream. Exiting." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan each packet (in the ordering given in the PCAP file) through
|
||||
// Hyperscan using the streaming interface.
|
||||
void scanStreams() {
|
||||
for (size_t i = 0; i != packets.size(); ++i) {
|
||||
const std::string &pkt = packets[i];
|
||||
hs_error_t err = hs_scan_stream(streams[stream_ids[i]],
|
||||
pkt.c_str(), pkt.length(), 0,
|
||||
scratch, onMatch, &matchCount);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: Unable to scan packet. Exiting." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan each packet (in the ordering given in the PCAP file) through
|
||||
// Hyperscan using the block-mode interface.
|
||||
void scanBlock() {
|
||||
for (size_t i = 0; i != packets.size(); ++i) {
|
||||
const std::string &pkt = packets[i];
|
||||
hs_error_t err = hs_scan(db, pkt.c_str(), pkt.length(), 0,
|
||||
scratch, onMatch, &matchCount);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: Unable to scan packet. Exiting." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// helper function - see end of file
|
||||
static void parseFile(const char *filename, vector<string> &patterns,
|
||||
vector<unsigned> &flags, vector<unsigned> &ids,
|
||||
vector<string> &originals);
|
||||
|
||||
class Sigdata {
|
||||
vector<unsigned> flags;
|
||||
vector<unsigned> ids;
|
||||
vector<string> patterns;
|
||||
vector<string> originals;
|
||||
|
||||
public:
|
||||
Sigdata() {}
|
||||
Sigdata(const char *filename) {
|
||||
parseFile(filename, patterns, flags, ids, originals);
|
||||
|
||||
}
|
||||
|
||||
const string &get_original(unsigned index) const {
|
||||
return originals[index];
|
||||
}
|
||||
|
||||
hs_database_t *compileDatabase(unsigned mode, double *compileTime) const {
|
||||
hs_database_t *db = nullptr;
|
||||
hs_compile_error_t *compileErr;
|
||||
|
||||
// Turn our vector of strings into a vector of char*'s to pass in to
|
||||
// hs_compile_multi. (This is just using the vector of strings as
|
||||
// dynamic storage.)
|
||||
vector<const char *> cstrPatterns;
|
||||
cstrPatterns.reserve(patterns.size());
|
||||
for (const auto &pattern : patterns) {
|
||||
cstrPatterns.push_back(pattern.c_str());
|
||||
}
|
||||
|
||||
Clock clock;
|
||||
clock.start();
|
||||
hs_error_t err = hs_compile_multi(cstrPatterns.data(), flags.data(),
|
||||
ids.data(), cstrPatterns.size(), mode,
|
||||
nullptr, &db, &compileErr);
|
||||
clock.stop();
|
||||
if (err != HS_SUCCESS) {
|
||||
if (compileErr->expression < 0) {
|
||||
// The error does not refer to a particular expression.
|
||||
cerr << "ERROR: " << compileErr->message << endl;
|
||||
} else {
|
||||
cerr << "ERROR: Pattern '"
|
||||
<< patterns[compileErr->expression]
|
||||
<< "' failed with error '" << compileErr->message << "'"
|
||||
<< endl;
|
||||
}
|
||||
// As the compileErr pointer points to dynamically allocated memory,
|
||||
// if we get an error, we must be sure to release it. This is not
|
||||
// necessary when no error is detected.
|
||||
hs_free_compile_error(compileErr);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
*compileTime = clock.seconds();
|
||||
return db;
|
||||
}
|
||||
|
||||
unsigned size() const {
|
||||
return patterns.size();
|
||||
}
|
||||
|
||||
Sigdata cloneExclude(const set<unsigned> &excludeIndexSet) const {
|
||||
Sigdata c;
|
||||
for (unsigned i = 0, e = size(); i != e; ++i) {
|
||||
if (excludeIndexSet.find(i) == excludeIndexSet.end()) {
|
||||
c.flags.push_back(flags[i]);
|
||||
c.ids.push_back(ids[i]);
|
||||
c.patterns.push_back(patterns[i]);
|
||||
c.originals.push_back(originals[i]);
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
};
|
||||
|
||||
static
|
||||
void usage(const char *) {
|
||||
cerr << "Usage:" << endl << endl;
|
||||
cerr << " patbench [-n repeats] [ -G generations] [ -C criterion ]" << endl
|
||||
<< " [ -F factor_group_size ] [ -N | -S ] "
|
||||
<< "<pattern file> <pcap file>" << endl << endl
|
||||
<< " -n repeats sets the number of times the PCAP is repeatedly "
|
||||
"scanned" << endl << " with the pattern." << endl
|
||||
<< " -G generations sets the number of generations that the "
|
||||
"algorithm is" << endl << " run for." << endl
|
||||
<< " -N sets non-streaming mode, -S sets streaming mode (default)."
|
||||
<< endl << " -F sets the factor group size (must be >0); this "
|
||||
"allows the detection" << endl
|
||||
<< " of multiple interacting factors." << endl << "" << endl
|
||||
<< " -C sets the 'criterion', which can be either:" << endl
|
||||
<< " t throughput (the default) - this requires a pcap file"
|
||||
<< endl << " r scratch size" << endl
|
||||
<< " s stream state size" << endl
|
||||
<< " c compile time" << endl << " b bytecode size"
|
||||
<< endl << endl
|
||||
<< "We recommend the use of a utility like 'taskset' on "
|
||||
"multiprocessor hosts to" << endl
|
||||
<< "lock execution to a single processor: this will remove processor "
|
||||
"migration" << endl
|
||||
<< "by the scheduler as a source of noise in the results." << endl;
|
||||
}
|
||||
|
||||
static
|
||||
double measure_stream_time(Benchmark &bench, unsigned int repeatCount) {
|
||||
Clock clock;
|
||||
bench.clearMatches();
|
||||
clock.start();
|
||||
for (unsigned int i = 0; i < repeatCount; i++) {
|
||||
bench.openStreams();
|
||||
bench.scanStreams();
|
||||
bench.closeStreams();
|
||||
}
|
||||
clock.stop();
|
||||
double secsScan = clock.seconds();
|
||||
return secsScan;
|
||||
}
|
||||
|
||||
static
|
||||
double measure_block_time(Benchmark &bench, unsigned int repeatCount) {
|
||||
Clock clock;
|
||||
bench.clearMatches();
|
||||
clock.start();
|
||||
for (unsigned int i = 0; i < repeatCount; i++) {
|
||||
bench.scanBlock();
|
||||
}
|
||||
clock.stop();
|
||||
double secsScan = clock.seconds();
|
||||
return secsScan;
|
||||
}
|
||||
|
||||
static
|
||||
double eval_set(Benchmark &bench, Sigdata &sigs, unsigned int mode,
|
||||
unsigned repeatCount, Criterion criterion,
|
||||
bool diagnose = true) {
|
||||
double compileTime = 0;
|
||||
bench.setDatabase(sigs.compileDatabase(mode, &compileTime));
|
||||
|
||||
switch (criterion) {
|
||||
case CRITERION_BYTECODE_SIZE: {
|
||||
size_t dbSize;
|
||||
hs_error_t err = hs_database_size(bench.getDatabase(), &dbSize);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: could not retrieve bytecode size" << endl;
|
||||
exit(1);
|
||||
}
|
||||
return dbSize;
|
||||
}
|
||||
case CRITERION_COMPILE_TIME:
|
||||
return compileTime;
|
||||
case CRITERION_STREAM_STATE: {
|
||||
size_t streamStateSize;
|
||||
hs_error_t err = hs_stream_size(bench.getDatabase(), &streamStateSize);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: could not retrieve stream state size" << endl;
|
||||
exit(1);
|
||||
}
|
||||
return streamStateSize;
|
||||
}
|
||||
case CRITERION_SCRATCH_SIZE:
|
||||
return bench.getScratchSize();
|
||||
case CRITERION_THROUGHPUT:
|
||||
default:
|
||||
break; // do nothing - we are THROUGHPUT
|
||||
}
|
||||
double scan_time;
|
||||
if (mode == HS_MODE_NOSTREAM) {
|
||||
scan_time = measure_block_time(bench, repeatCount);
|
||||
} else {
|
||||
scan_time = measure_stream_time(bench, repeatCount);
|
||||
}
|
||||
size_t bytes = bench.bytes();
|
||||
size_t matches = bench.matches();
|
||||
if (diagnose) {
|
||||
cout << "Scan time " << std::fixed << std::setprecision(3) << scan_time
|
||||
<< " sec, Scanned " << bytes * repeatCount << " bytes, Throughput "
|
||||
<< std::fixed << std::setprecision(3)
|
||||
<< (bytes * 8 * repeatCount) / (scan_time * 1000000)
|
||||
<< " Mbps, Matches " << matches << endl;
|
||||
}
|
||||
return (bytes * 8 * repeatCount) / (scan_time * 1000000);
|
||||
}
|
||||
|
||||
// Main entry point.
|
||||
int main(int argc, char **argv) {
|
||||
unsigned int repeatCount = 1;
|
||||
unsigned int mode = HS_MODE_STREAM;
|
||||
Criterion criterion = CRITERION_THROUGHPUT;
|
||||
unsigned int gen_max = 10;
|
||||
unsigned int factor_max = 1;
|
||||
// Process command line arguments.
|
||||
int opt;
|
||||
while ((opt = getopt(argc, argv, "SNn:G:F:C:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'F':
|
||||
factor_max = atoi(optarg);
|
||||
break;
|
||||
case 'G':
|
||||
gen_max = atoi(optarg);
|
||||
break;
|
||||
case 'S':
|
||||
mode = HS_MODE_STREAM;
|
||||
break;
|
||||
case 'N':
|
||||
mode = HS_MODE_NOSTREAM;
|
||||
break;
|
||||
case 'C':
|
||||
switch (optarg[0]) {
|
||||
case 't':
|
||||
criterion = CRITERION_THROUGHPUT;
|
||||
break;
|
||||
case 'b':
|
||||
criterion = CRITERION_BYTECODE_SIZE;
|
||||
break;
|
||||
case 'c':
|
||||
criterion = CRITERION_COMPILE_TIME;
|
||||
break;
|
||||
case 's':
|
||||
criterion = CRITERION_STREAM_STATE;
|
||||
break;
|
||||
case 'r':
|
||||
criterion = CRITERION_SCRATCH_SIZE;
|
||||
break;
|
||||
default:
|
||||
cerr << "Unrecognised criterion: " << optarg[0] << endl;
|
||||
usage(argv[0]);
|
||||
exit(-1);
|
||||
}
|
||||
break;
|
||||
case 'n':
|
||||
repeatCount = atoi(optarg);
|
||||
break;
|
||||
default:
|
||||
usage(argv[0]);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
if (argc - optind != ((criterion == CRITERION_THROUGHPUT) ? 2 : 1)) {
|
||||
usage(argv[0]);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
const char *patternFile = argv[optind];
|
||||
const char *pcapFile = argv[optind + 1];
|
||||
|
||||
// Read our input PCAP file in
|
||||
Benchmark bench;
|
||||
if (criterion == CRITERION_THROUGHPUT) {
|
||||
if (!bench.readStreams(pcapFile)) {
|
||||
cerr << "Unable to read packets from PCAP file. Exiting." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
if ((criterion == CRITERION_STREAM_STATE) && (mode != HS_MODE_STREAM)) {
|
||||
cerr << "Cannot evaluate stream state for block mode compile. Exiting."
|
||||
<< endl;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
cout << "Base signatures: " << patternFile;
|
||||
if (pcapFile) {
|
||||
cout << "\tPCAP input file: " << pcapFile
|
||||
<< "\tRepeat count: " << repeatCount;
|
||||
}
|
||||
if (mode == HS_MODE_STREAM) {
|
||||
cout << "\tMode: streaming";
|
||||
} else {
|
||||
cout << "\tMode: block";
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
Sigdata sigs(patternFile);
|
||||
|
||||
// calculate and show a baseline
|
||||
eval_set(bench, sigs, mode, repeatCount, criterion);
|
||||
|
||||
set<unsigned> work_sigs, exclude;
|
||||
|
||||
for (unsigned i = 0; i < sigs.size(); ++i) {
|
||||
work_sigs.insert(i);
|
||||
}
|
||||
|
||||
double score_base =
|
||||
eval_set(bench, sigs, mode, repeatCount, criterion, false);
|
||||
bool maximize = higher_is_better(criterion);
|
||||
cout << "Number of signatures: " << sigs.size() << endl;
|
||||
cout << "Base performance: ";
|
||||
print_criterion(criterion, score_base);
|
||||
cout << endl;
|
||||
|
||||
unsigned generations = min(gen_max, (sigs.size() - 1) / factor_max);
|
||||
|
||||
cout << "Cutting signatures cumulatively for " << generations
|
||||
<< " generations" << endl;
|
||||
for (unsigned gen = 0; gen < generations; ++gen) {
|
||||
cout << "Generation " << gen << " ";
|
||||
set<unsigned> s(work_sigs.begin(), work_sigs.end());
|
||||
double best = maximize ? 0 : 1000000000000.0;
|
||||
unsigned count = 0;
|
||||
while (s.size() > factor_max) {
|
||||
count++;
|
||||
cout << "." << std::flush;
|
||||
vector<unsigned> sv(s.begin(), s.end());
|
||||
random_shuffle(sv.begin(), sv.end());
|
||||
unsigned groups = factor_max + 1;
|
||||
for (unsigned current_group = 0; current_group < groups;
|
||||
current_group++) {
|
||||
unsigned sz = sv.size();
|
||||
unsigned lo = (current_group * sz) / groups;
|
||||
unsigned hi = ((current_group + 1) * sz) / groups;
|
||||
|
||||
set<unsigned> s_part1(sv.begin(), sv.begin() + lo);
|
||||
set<unsigned> s_part2(sv.begin() + hi, sv.end());
|
||||
set<unsigned> s_tmp = s_part1;
|
||||
s_tmp.insert(s_part2.begin(), s_part2.end());
|
||||
set<unsigned> tmp = s_tmp;
|
||||
tmp.insert(exclude.begin(), exclude.end());
|
||||
Sigdata sigs_tmp = sigs.cloneExclude(tmp);
|
||||
double score = eval_set(bench, sigs_tmp, mode, repeatCount,
|
||||
criterion, false);
|
||||
|
||||
if ((current_group == 0) ||
|
||||
(!maximize ? (score < best) : (score > best))) {
|
||||
s = s_tmp;
|
||||
best = score;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (unsigned i = count; i < 16; i++) {
|
||||
cout << " ";
|
||||
}
|
||||
cout << "Performance: ";
|
||||
print_criterion(criterion, best);
|
||||
cout << " (" << std::fixed << std::setprecision(3) << (best / score_base)
|
||||
<< "x) after cutting:" << endl;
|
||||
|
||||
// s now has factor_max signatures
|
||||
for (const auto &found : s) {
|
||||
exclude.insert(found);
|
||||
work_sigs.erase(found);
|
||||
cout << sigs.get_original(found) << endl;
|
||||
}
|
||||
|
||||
cout << endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to locate the offset of the first byte of the payload in the
|
||||
* given ethernet frame. Offset into the packet, and the length of the payload
|
||||
* are returned in the arguments @a offset and @a length.
|
||||
*/
|
||||
static
|
||||
bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset,
|
||||
unsigned int *length) {
|
||||
const ip *iph = (const ip *)(pkt_data + sizeof(ether_header));
|
||||
const tcphdr *th = nullptr;
|
||||
|
||||
// Ignore packets that aren't IPv4
|
||||
if (iph->ip_v != 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Ignore fragmented packets.
|
||||
if (iph->ip_off & htons(IP_MF | IP_OFFMASK)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// IP header length, and transport header length.
|
||||
unsigned int ihlen = iph->ip_hl * 4;
|
||||
unsigned int thlen = 0;
|
||||
|
||||
switch (iph->ip_p) {
|
||||
case IPPROTO_TCP:
|
||||
th = (const tcphdr *)((const char *)iph + ihlen);
|
||||
thlen = th->th_off * 4;
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
thlen = sizeof(udphdr);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
*offset = sizeof(ether_header) + ihlen + thlen;
|
||||
*length = sizeof(ether_header) + ntohs(iph->ip_len) - *offset;
|
||||
|
||||
return *length != 0;
|
||||
}
|
||||
|
||||
static unsigned parseFlags(const string &flagsStr) {
|
||||
unsigned flags = 0;
|
||||
for (const auto &c : flagsStr) {
|
||||
switch (c) {
|
||||
case 'i':
|
||||
flags |= HS_FLAG_CASELESS; break;
|
||||
case 'm':
|
||||
flags |= HS_FLAG_MULTILINE; break;
|
||||
case 's':
|
||||
flags |= HS_FLAG_DOTALL; break;
|
||||
case 'H':
|
||||
flags |= HS_FLAG_SINGLEMATCH; break;
|
||||
case 'V':
|
||||
flags |= HS_FLAG_ALLOWEMPTY; break;
|
||||
case '8':
|
||||
flags |= HS_FLAG_UTF8; break;
|
||||
case 'W':
|
||||
flags |= HS_FLAG_UCP; break;
|
||||
default:
|
||||
cerr << "Unsupported flag \'" << c << "\'" << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
static void parseFile(const char *filename, vector<string> &patterns,
|
||||
vector<unsigned> &flags, vector<unsigned> &ids,
|
||||
vector<string> &originals) {
|
||||
ifstream inFile(filename);
|
||||
if (!inFile.good()) {
|
||||
cerr << "ERROR: Can't open pattern file \"" << filename << "\"" << endl;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
for (unsigned i = 1; !inFile.eof(); ++i) {
|
||||
string line;
|
||||
getline(inFile, line);
|
||||
|
||||
// if line is empty, or a comment, we can skip it
|
||||
if (line.empty() || line[0] == '#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// otherwise, it should be ID:PCRE, e.g.
|
||||
// 10001:/foobar/is
|
||||
|
||||
size_t colonIdx = line.find_first_of(':');
|
||||
if (colonIdx == string::npos) {
|
||||
cerr << "ERROR: Could not parse line " << i << endl;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
// we should have an unsigned int as an ID, before the colon
|
||||
unsigned id = std::stoi(line.substr(0, colonIdx).c_str());
|
||||
|
||||
// rest of the expression is the PCRE
|
||||
const string expr(line.substr(colonIdx + 1));
|
||||
|
||||
size_t flagsStart = expr.find_last_of('/');
|
||||
if (flagsStart == string::npos) {
|
||||
cerr << "ERROR: no trailing '/' char" << endl;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
string pcre(expr.substr(1, flagsStart - 1));
|
||||
string flagsStr(expr.substr(flagsStart + 1, expr.size() - flagsStart));
|
||||
unsigned flag = parseFlags(flagsStr);
|
||||
|
||||
originals.push_back(line);
|
||||
patterns.push_back(pcre);
|
||||
flags.push_back(flag);
|
||||
ids.push_back(id);
|
||||
}
|
||||
}
|
679
examples/pcapscan.cc
Normal file
679
examples/pcapscan.cc
Normal file
@ -0,0 +1,679 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Hyperscan example program 2: pcapscan
|
||||
*
|
||||
* This example is a very simple packet scanning benchmark. It scans a given
|
||||
* PCAP file full of network traffic against a group of regular expressions and
|
||||
* returns some coarse performance measurements. This example provides a quick
|
||||
* way to examine the performance achievable on a particular combination of
|
||||
* platform, pattern set and input data.
|
||||
*
|
||||
* Build instructions:
|
||||
*
|
||||
* g++ -std=c++11 -O2 -o pcapscan pcapscan.cc $(pkg-config --cflags --libs libhs) -lpcap
|
||||
*
|
||||
* Usage:
|
||||
*
|
||||
* ./pcapscan [-n repeats] <pattern file> <pcap file>
|
||||
*
|
||||
* We recommend the use of a utility like 'taskset' on multiprocessor hosts to
|
||||
* pin execution to a single processor: this will remove processor migration
|
||||
* by the scheduler as a source of noise in the results.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
// We use the BSD primitives throughout as they exist on both BSD and Linux.
|
||||
#define __FAVOR_BSD
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_systm.h>
|
||||
#include <netinet/ip.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <netinet/udp.h>
|
||||
#include <netinet/ip_icmp.h>
|
||||
#include <net/ethernet.h>
|
||||
#include <arpa/inet.h>
|
||||
|
||||
#include <pcap.h>
|
||||
|
||||
#include <hs.h>
|
||||
|
||||
using std::cerr;
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
using std::ifstream;
|
||||
using std::string;
|
||||
using std::unordered_map;
|
||||
using std::vector;
|
||||
|
||||
// Key for identifying a stream in our pcap input data, using data from its IP
|
||||
// headers.
|
||||
struct FiveTuple {
|
||||
unsigned int protocol;
|
||||
unsigned int srcAddr;
|
||||
unsigned int srcPort;
|
||||
unsigned int dstAddr;
|
||||
unsigned int dstPort;
|
||||
|
||||
// Construct a FiveTuple from a TCP or UDP packet.
|
||||
FiveTuple(const struct ip *iphdr) {
|
||||
// IP fields
|
||||
protocol = iphdr->ip_p;
|
||||
srcAddr = iphdr->ip_src.s_addr;
|
||||
dstAddr = iphdr->ip_dst.s_addr;
|
||||
|
||||
// UDP/TCP ports
|
||||
const struct udphdr *uh =
|
||||
(const struct udphdr *)(((const char *)iphdr) + (iphdr->ip_hl * 4));
|
||||
srcPort = uh->uh_sport;
|
||||
dstPort = uh->uh_dport;
|
||||
}
|
||||
|
||||
bool operator==(const FiveTuple &a) const {
|
||||
return protocol == a.protocol && srcAddr == a.srcAddr &&
|
||||
srcPort == a.srcPort && dstAddr == a.dstAddr &&
|
||||
dstPort == a.dstPort;
|
||||
}
|
||||
};
|
||||
|
||||
// A *very* simple hash function, used when we create an unordered_map of
|
||||
// FiveTuple objects.
|
||||
struct FiveTupleHash {
|
||||
size_t operator()(const FiveTuple &x) const {
|
||||
return x.srcAddr ^ x.dstAddr ^ x.protocol ^ x.srcPort ^ x.dstPort;
|
||||
}
|
||||
};
|
||||
|
||||
// Helper function. See end of file.
|
||||
static bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset,
|
||||
unsigned int *length);
|
||||
|
||||
// Match event handler: called every time Hyperscan finds a match.
|
||||
static
|
||||
int onMatch(unsigned int id, unsigned long long from, unsigned long long to,
|
||||
unsigned int flags, void *ctx) {
|
||||
// Our context points to a size_t storing the match count
|
||||
size_t *matches = (size_t *)ctx;
|
||||
(*matches)++;
|
||||
return 0; // continue matching
|
||||
}
|
||||
|
||||
// Simple timing class
|
||||
class Clock {
|
||||
public:
|
||||
void start() {
|
||||
time_start = std::chrono::system_clock::now();
|
||||
}
|
||||
|
||||
void stop() {
|
||||
time_end = std::chrono::system_clock::now();
|
||||
}
|
||||
|
||||
double seconds() const {
|
||||
std::chrono::duration<double> delta = time_end - time_start;
|
||||
return delta.count();
|
||||
}
|
||||
private:
|
||||
std::chrono::time_point<std::chrono::system_clock> time_start, time_end;
|
||||
};
|
||||
|
||||
// Class wrapping all state associated with the benchmark
|
||||
class Benchmark {
|
||||
private:
|
||||
// Packet data to be scanned.
|
||||
vector<string> packets;
|
||||
|
||||
// The stream ID to which each packet belongs
|
||||
vector<size_t> stream_ids;
|
||||
|
||||
// Map used to construct stream_ids
|
||||
unordered_map<FiveTuple, size_t, FiveTupleHash> stream_map;
|
||||
|
||||
// Hyperscan compiled database (streaming mode)
|
||||
const hs_database_t *db_streaming;
|
||||
|
||||
// Hyperscan compiled database (block mode)
|
||||
const hs_database_t *db_block;
|
||||
|
||||
// Hyperscan temporary scratch space (used in both modes)
|
||||
hs_scratch_t *scratch;
|
||||
|
||||
// Vector of Hyperscan stream state (used in streaming mode)
|
||||
vector<hs_stream_t *> streams;
|
||||
|
||||
// Count of matches found during scanning
|
||||
size_t matchCount;
|
||||
|
||||
public:
|
||||
Benchmark(const hs_database_t *streaming, const hs_database_t *block)
|
||||
: db_streaming(streaming), db_block(block), scratch(nullptr),
|
||||
matchCount(0) {
|
||||
// Allocate enough scratch space to handle either streaming or block
|
||||
// mode, so we only need the one scratch region.
|
||||
hs_error_t err = hs_alloc_scratch(db_streaming, &scratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: could not allocate scratch space. Exiting." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
// This second call will increase the scratch size if more is required
|
||||
// for block mode.
|
||||
err = hs_alloc_scratch(db_block, &scratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: could not allocate scratch space. Exiting." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
~Benchmark() {
|
||||
// Free scratch region
|
||||
hs_free_scratch(scratch);
|
||||
}
|
||||
|
||||
// Read a set of streams from a pcap file
|
||||
bool readStreams(const char *pcapFile) {
|
||||
// Open PCAP file for input
|
||||
char errbuf[PCAP_ERRBUF_SIZE];
|
||||
pcap_t *pcapHandle = pcap_open_offline(pcapFile, errbuf);
|
||||
if (pcapHandle == nullptr) {
|
||||
cerr << "ERROR: Unable to open pcap file \"" << pcapFile
|
||||
<< "\": " << errbuf << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
struct pcap_pkthdr pktHeader;
|
||||
const unsigned char *pktData;
|
||||
while ((pktData = pcap_next(pcapHandle, &pktHeader)) != nullptr) {
|
||||
unsigned int offset = 0, length = 0;
|
||||
if (!payloadOffset(pktData, &offset, &length)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Valid TCP or UDP packet
|
||||
const struct ip *iphdr = (const struct ip *)(pktData
|
||||
+ sizeof(struct ether_header));
|
||||
const char *payload = (const char *)pktData + offset;
|
||||
|
||||
size_t id = stream_map.insert(std::make_pair(FiveTuple(iphdr),
|
||||
stream_map.size())).first->second;
|
||||
|
||||
packets.push_back(string(payload, length));
|
||||
stream_ids.push_back(id);
|
||||
}
|
||||
pcap_close(pcapHandle);
|
||||
|
||||
return !packets.empty();
|
||||
}
|
||||
|
||||
// Return the number of bytes scanned
|
||||
size_t bytes() const {
|
||||
size_t sum = 0;
|
||||
for (const auto &packet : packets) {
|
||||
sum += packet.size();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
// Return the number of matches found.
|
||||
size_t matches() const {
|
||||
return matchCount;
|
||||
}
|
||||
|
||||
// Clear the number of matches found.
|
||||
void clearMatches() {
|
||||
matchCount = 0;
|
||||
}
|
||||
|
||||
// Open a Hyperscan stream for each stream in stream_ids
|
||||
void openStreams() {
|
||||
streams.resize(stream_map.size());
|
||||
for (auto &stream : streams) {
|
||||
hs_error_t err = hs_open_stream(db_streaming, 0, &stream);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: Unable to open stream. Exiting." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close all open Hyperscan streams (potentially generating any
|
||||
// end-anchored matches)
|
||||
void closeStreams() {
|
||||
for (auto &stream : streams) {
|
||||
hs_error_t err = hs_close_stream(stream, scratch, onMatch,
|
||||
&matchCount);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: Unable to close stream. Exiting." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan each packet (in the ordering given in the PCAP file) through
|
||||
// Hyperscan using the streaming interface.
|
||||
void scanStreams() {
|
||||
for (size_t i = 0; i != packets.size(); ++i) {
|
||||
const std::string &pkt = packets[i];
|
||||
hs_error_t err = hs_scan_stream(streams[stream_ids[i]],
|
||||
pkt.c_str(), pkt.length(), 0,
|
||||
scratch, onMatch, &matchCount);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: Unable to scan packet. Exiting." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan each packet (in the ordering given in the PCAP file) through
|
||||
// Hyperscan using the block-mode interface.
|
||||
void scanBlock() {
|
||||
for (size_t i = 0; i != packets.size(); ++i) {
|
||||
const std::string &pkt = packets[i];
|
||||
hs_error_t err = hs_scan(db_block, pkt.c_str(), pkt.length(), 0,
|
||||
scratch, onMatch, &matchCount);
|
||||
if (err != HS_SUCCESS) {
|
||||
cerr << "ERROR: Unable to scan packet. Exiting." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Display some information about the compiled database and scanned data.
|
||||
void displayStats() {
|
||||
size_t numPackets = packets.size();
|
||||
size_t numStreams = stream_map.size();
|
||||
size_t numBytes = bytes();
|
||||
hs_error_t err;
|
||||
|
||||
cout << numPackets << " packets in " << numStreams
|
||||
<< " streams, totalling " << numBytes << " bytes." << endl;
|
||||
cout << "Average packet length: " << numBytes / numPackets << " bytes."
|
||||
<< endl;
|
||||
cout << "Average stream length: " << numBytes / numStreams << " bytes."
|
||||
<< endl;
|
||||
cout << endl;
|
||||
|
||||
size_t dbStream_size = 0;
|
||||
err = hs_database_size(db_streaming, &dbStream_size);
|
||||
if (err == HS_SUCCESS) {
|
||||
cout << "Streaming mode Hyperscan database size : "
|
||||
<< dbStream_size << " bytes." << endl;
|
||||
} else {
|
||||
cout << "Error getting streaming mode Hyperscan database size"
|
||||
<< endl;
|
||||
}
|
||||
|
||||
size_t dbBlock_size = 0;
|
||||
err = hs_database_size(db_block, &dbBlock_size);
|
||||
if (err == HS_SUCCESS) {
|
||||
cout << "Block mode Hyperscan database size : "
|
||||
<< dbBlock_size << " bytes." << endl;
|
||||
} else {
|
||||
cout << "Error getting block mode Hyperscan database size"
|
||||
<< endl;
|
||||
}
|
||||
|
||||
size_t stream_size = 0;
|
||||
err = hs_stream_size(db_streaming, &stream_size);
|
||||
if (err == HS_SUCCESS) {
|
||||
cout << "Streaming mode Hyperscan stream state size: "
|
||||
<< stream_size << " bytes (per stream)." << endl;
|
||||
} else {
|
||||
cout << "Error getting stream state size" << endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// helper function - see end of file
|
||||
static void parseFile(const char *filename, vector<string> &patterns,
|
||||
vector<unsigned> &flags, vector<unsigned> &ids);
|
||||
|
||||
static hs_database_t *buildDatabase(const vector<const char *> &expressions,
|
||||
const vector<unsigned> flags,
|
||||
const vector<unsigned> ids,
|
||||
unsigned int mode) {
|
||||
hs_database_t *db;
|
||||
hs_compile_error_t *compileErr;
|
||||
hs_error_t err;
|
||||
|
||||
Clock clock;
|
||||
clock.start();
|
||||
|
||||
err = hs_compile_multi(expressions.data(), flags.data(), ids.data(),
|
||||
expressions.size(), mode, nullptr, &db, &compileErr);
|
||||
|
||||
clock.stop();
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
if (compileErr->expression < 0) {
|
||||
// The error does not refer to a particular expression.
|
||||
cerr << "ERROR: " << compileErr->message << endl;
|
||||
} else {
|
||||
cerr << "ERROR: Pattern '" << expressions[compileErr->expression]
|
||||
<< "' failed compilation with error: " << compileErr->message
|
||||
<< endl;
|
||||
}
|
||||
// As the compileErr pointer points to dynamically allocated memory, if
|
||||
// we get an error, we must be sure to release it. This is not
|
||||
// necessary when no error is detected.
|
||||
hs_free_compile_error(compileErr);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
cout << "Hyperscan " << (mode == HS_MODE_STREAM ? "streaming" : "block")
|
||||
<< " mode database compiled in " << clock.seconds() << " seconds."
|
||||
<< endl;
|
||||
|
||||
return db;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function will read in the file with the specified name, with an
|
||||
* expression per line, ignoring lines starting with '#' and build a Hyperscan
|
||||
* database for it.
|
||||
*/
|
||||
static void databasesFromFile(const char *filename,
|
||||
hs_database_t **db_streaming,
|
||||
hs_database_t **db_block) {
|
||||
// hs_compile_multi requires three parallel arrays containing the patterns,
|
||||
// flags and ids that we want to work with. To achieve this we use
|
||||
// vectors and new entries onto each for each valid line of input from
|
||||
// the pattern file.
|
||||
vector<string> patterns;
|
||||
vector<unsigned> flags;
|
||||
vector<unsigned> ids;
|
||||
|
||||
// do the actual file reading and string handling
|
||||
parseFile(filename, patterns, flags, ids);
|
||||
|
||||
// Turn our vector of strings into a vector of char*'s to pass in to
|
||||
// hs_compile_multi. (This is just using the vector of strings as dynamic
|
||||
// storage.)
|
||||
vector<const char*> cstrPatterns;
|
||||
for (const auto &pattern : patterns) {
|
||||
cstrPatterns.push_back(pattern.c_str());
|
||||
}
|
||||
|
||||
cout << "Compiling Hyperscan databases with " << patterns.size()
|
||||
<< " patterns." << endl;
|
||||
|
||||
*db_streaming = buildDatabase(cstrPatterns, flags, ids, HS_MODE_STREAM);
|
||||
*db_block = buildDatabase(cstrPatterns, flags, ids, HS_MODE_BLOCK);
|
||||
}
|
||||
|
||||
static void usage(const char *prog) {
|
||||
cerr << "Usage: " << prog << " [-n repeats] <pattern file> <pcap file>" << endl;
|
||||
}
|
||||
|
||||
// Main entry point.
|
||||
int main(int argc, char **argv) {
|
||||
unsigned int repeatCount = 1;
|
||||
|
||||
// Process command line arguments.
|
||||
int opt;
|
||||
while ((opt = getopt(argc, argv, "n:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'n':
|
||||
repeatCount = atoi(optarg);
|
||||
break;
|
||||
default:
|
||||
usage(argv[0]);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
if (argc - optind != 2) {
|
||||
usage(argv[0]);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
const char *patternFile = argv[optind];
|
||||
const char *pcapFile = argv[optind + 1];
|
||||
|
||||
// Read our pattern set in and build Hyperscan databases from it.
|
||||
cout << "Pattern file: " << patternFile << endl;
|
||||
hs_database_t *db_streaming, *db_block;
|
||||
databasesFromFile(patternFile, &db_streaming, &db_block);
|
||||
|
||||
// Read our input PCAP file in
|
||||
Benchmark bench(db_streaming, db_block);
|
||||
cout << "PCAP input file: " << pcapFile << endl;
|
||||
if (!bench.readStreams(pcapFile)) {
|
||||
cerr << "Unable to read packets from PCAP file. Exiting." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if (repeatCount != 1) {
|
||||
cout << "Repeating PCAP scan " << repeatCount << " times." << endl;
|
||||
}
|
||||
|
||||
bench.displayStats();
|
||||
|
||||
Clock clock;
|
||||
|
||||
// Streaming mode scans.
|
||||
double secsStreamingScan = 0.0, secsStreamingOpenClose = 0.0;
|
||||
for (unsigned int i = 0; i < repeatCount; i++) {
|
||||
// Open streams.
|
||||
clock.start();
|
||||
bench.openStreams();
|
||||
clock.stop();
|
||||
secsStreamingOpenClose += clock.seconds();
|
||||
|
||||
// Scan all our packets in streaming mode.
|
||||
clock.start();
|
||||
bench.scanStreams();
|
||||
clock.stop();
|
||||
secsStreamingScan += clock.seconds();
|
||||
|
||||
// Close streams.
|
||||
clock.start();
|
||||
bench.closeStreams();
|
||||
clock.stop();
|
||||
secsStreamingOpenClose += clock.seconds();
|
||||
}
|
||||
|
||||
// Collect data from streaming mode scans.
|
||||
size_t bytes = bench.bytes();
|
||||
double tputStreamScanning = (bytes * 8 * repeatCount) / secsStreamingScan;
|
||||
double tputStreamOverhead = (bytes * 8 * repeatCount) / (secsStreamingScan + secsStreamingOpenClose);
|
||||
size_t matchesStream = bench.matches();
|
||||
double matchRateStream = matchesStream / ((bytes * repeatCount) / 1024.0); // matches per kilobyte
|
||||
|
||||
// Scan all our packets in block mode.
|
||||
bench.clearMatches();
|
||||
clock.start();
|
||||
for (unsigned int i = 0; i < repeatCount; i++) {
|
||||
bench.scanBlock();
|
||||
}
|
||||
clock.stop();
|
||||
double secsScanBlock = clock.seconds();
|
||||
|
||||
// Collect data from block mode scans.
|
||||
double tputBlockScanning = (bytes * 8 * repeatCount) / secsScanBlock;
|
||||
size_t matchesBlock = bench.matches();
|
||||
double matchRateBlock = matchesBlock / ((bytes * repeatCount) / 1024.0); // matches per kilobyte
|
||||
|
||||
cout << endl << "Streaming mode:" << endl << endl;
|
||||
cout << " Total matches: " << matchesStream << endl;
|
||||
cout << std::fixed << std::setprecision(4);
|
||||
cout << " Match rate: " << matchRateStream << " matches/kilobyte" << endl;
|
||||
cout << std::fixed << std::setprecision(2);
|
||||
cout << " Throughput (with stream overhead): "
|
||||
<< tputStreamOverhead/1000000 << " megabits/sec" << endl;
|
||||
cout << " Throughput (no stream overhead): "
|
||||
<< tputStreamScanning/1000000 << " megabits/sec" << endl;
|
||||
|
||||
cout << endl << "Block mode:" << endl << endl;
|
||||
cout << " Total matches: " << matchesBlock << endl;
|
||||
cout << std::fixed << std::setprecision(4);
|
||||
cout << " Match rate: " << matchRateBlock << " matches/kilobyte" << endl;
|
||||
cout << std::fixed << std::setprecision(2);
|
||||
cout << " Throughput: "
|
||||
<< tputBlockScanning/1000000 << " megabits/sec" << endl;
|
||||
|
||||
cout << endl;
|
||||
if (bytes < (2*1024*1024)) {
|
||||
cout << endl << "WARNING: Input PCAP file is less than 2MB in size." << endl
|
||||
<< "This test may have been too short to calculate accurate results." << endl;
|
||||
}
|
||||
|
||||
// Close Hyperscan databases
|
||||
hs_free_database(db_streaming);
|
||||
hs_free_database(db_block);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to locate the offset of the first byte of the payload in the
|
||||
* given ethernet frame. Offset into the packet, and the length of the payload
|
||||
* are returned in the arguments @a offset and @a length.
|
||||
*/
|
||||
static bool payloadOffset(const unsigned char *pkt_data, unsigned int *offset,
|
||||
unsigned int *length) {
|
||||
const ip *iph = (const ip *)(pkt_data + sizeof(ether_header));
|
||||
const tcphdr *th = nullptr;
|
||||
|
||||
// Ignore packets that aren't IPv4
|
||||
if (iph->ip_v != 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Ignore fragmented packets.
|
||||
if (iph->ip_off & htons(IP_MF|IP_OFFMASK)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// IP header length, and transport header length.
|
||||
unsigned int ihlen = iph->ip_hl * 4;
|
||||
unsigned int thlen = 0;
|
||||
|
||||
switch (iph->ip_p) {
|
||||
case IPPROTO_TCP:
|
||||
th = (const tcphdr *)((const char *)iph + ihlen);
|
||||
thlen = th->th_off * 4;
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
thlen = sizeof(udphdr);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
*offset = sizeof(ether_header) + ihlen + thlen;
|
||||
*length = sizeof(ether_header) + ntohs(iph->ip_len) - *offset;
|
||||
|
||||
return *length != 0;
|
||||
}
|
||||
|
||||
static unsigned parseFlags(const string &flagsStr) {
|
||||
unsigned flags = 0;
|
||||
for (const auto &c : flagsStr) {
|
||||
switch (c) {
|
||||
case 'i':
|
||||
flags |= HS_FLAG_CASELESS; break;
|
||||
case 'm':
|
||||
flags |= HS_FLAG_MULTILINE; break;
|
||||
case 's':
|
||||
flags |= HS_FLAG_DOTALL; break;
|
||||
case 'H':
|
||||
flags |= HS_FLAG_SINGLEMATCH; break;
|
||||
case 'V':
|
||||
flags |= HS_FLAG_ALLOWEMPTY; break;
|
||||
case '8':
|
||||
flags |= HS_FLAG_UTF8; break;
|
||||
case 'W':
|
||||
flags |= HS_FLAG_UCP; break;
|
||||
default:
|
||||
cerr << "Unsupported flag \'" << c << "\'" << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
static void parseFile(const char *filename, vector<string> &patterns,
|
||||
vector<unsigned> &flags, vector<unsigned> &ids) {
|
||||
ifstream inFile(filename);
|
||||
if (!inFile.good()) {
|
||||
cerr << "ERROR: Can't open pattern file \"" << filename << "\"" << endl;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
for (unsigned i = 1; !inFile.eof(); ++i) {
|
||||
string line;
|
||||
getline(inFile, line);
|
||||
|
||||
// if line is empty, or a comment, we can skip it
|
||||
if (line.empty() || line[0] == '#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// otherwise, it should be ID:PCRE, e.g.
|
||||
// 10001:/foobar/is
|
||||
|
||||
size_t colonIdx = line.find_first_of(':');
|
||||
if (colonIdx == string::npos) {
|
||||
cerr << "ERROR: Could not parse line " << i << endl;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
// we should have an unsigned int as an ID, before the colon
|
||||
unsigned id = std::stoi(line.substr(0, colonIdx).c_str());
|
||||
|
||||
// rest of the expression is the PCRE
|
||||
const string expr(line.substr(colonIdx + 1));
|
||||
|
||||
size_t flagsStart = expr.find_last_of('/');
|
||||
if (flagsStart == string::npos) {
|
||||
cerr << "ERROR: no trailing '/' char" << endl;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
string pcre(expr.substr(1, flagsStart - 1));
|
||||
string flagsStr(expr.substr(flagsStart + 1, expr.size() - flagsStart));
|
||||
unsigned flag = parseFlags(flagsStr);
|
||||
|
||||
patterns.push_back(pcre);
|
||||
flags.push_back(flag);
|
||||
ids.push_back(id);
|
||||
}
|
||||
}
|
||||
|
221
examples/simplegrep.c
Normal file
221
examples/simplegrep.c
Normal file
@ -0,0 +1,221 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Hyperscan example program 1: simplegrep
|
||||
*
|
||||
* This is a simple example of Hyperscan's most basic functionality: it will
|
||||
* search a given input file for a pattern supplied as a command-line argument.
|
||||
* It is intended to demonstrate correct usage of the hs_compile and hs_scan
|
||||
* functions of Hyperscan.
|
||||
*
|
||||
* Patterns are scanned in 'DOTALL' mode, which is equivalent to PCRE's '/s'
|
||||
* modifier. This behaviour can be changed by modifying the "flags" argument to
|
||||
* hs_compile.
|
||||
*
|
||||
* Build instructions:
|
||||
*
|
||||
* gcc -o simplegrep simplegrep.c $(pkg-config --cflags --libs libhs)
|
||||
*
|
||||
* Usage:
|
||||
*
|
||||
* ./simplegrep <pattern> <input file>
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ./simplegrep int simplegrep.c
|
||||
*
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <hs.h>
|
||||
|
||||
/**
|
||||
* This is the function that will be called for each match that occurs. @a ctx
|
||||
* is to allow you to have some application-specific state that you will get
|
||||
* access to for each match. In our simple example we're just going to use it
|
||||
* to pass in the pattern that was being searched for so we can print it out.
|
||||
*/
|
||||
static int eventHandler(unsigned int id, unsigned long long from,
|
||||
unsigned long long to, unsigned int flags, void *ctx) {
|
||||
printf("Match for pattern \"%s\" at offset %llu\n", (char *)ctx, to);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill a data buffer from the given filename, returning it and filling @a
|
||||
* length with its length. Returns NULL on failure.
|
||||
*/
|
||||
static char *readInputData(const char *inputFN, unsigned int *length) {
|
||||
FILE *f = fopen(inputFN, "r");
|
||||
if (!f) {
|
||||
fprintf(stderr, "ERROR: unable to open file \"%s\": %s\n", inputFN,
|
||||
strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* We use fseek/ftell to get our data length, in order to keep this example
|
||||
* code as portable as possible. */
|
||||
if (fseek(f, 0, SEEK_END) != 0) {
|
||||
fprintf(stderr, "ERROR: unable to seek file \"%s\": %s\n", inputFN,
|
||||
strerror(errno));
|
||||
fclose(f);
|
||||
return NULL;
|
||||
}
|
||||
long dataLen = ftell(f);
|
||||
if (dataLen < 0) {
|
||||
fprintf(stderr, "ERROR: ftell() failed: %s\n", strerror(errno));
|
||||
fclose(f);
|
||||
return NULL;
|
||||
}
|
||||
if (fseek(f, 0, SEEK_SET) != 0) {
|
||||
fprintf(stderr, "ERROR: unable to seek file \"%s\": %s\n", inputFN,
|
||||
strerror(errno));
|
||||
fclose(f);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Hyperscan's hs_scan function accepts length as an unsigned int, so we
|
||||
* limit the size of our buffer appropriately. */
|
||||
if ((unsigned long)dataLen > UINT_MAX) {
|
||||
dataLen = UINT_MAX;
|
||||
printf("WARNING: clipping data to %lu bytes\n", dataLen);
|
||||
} else if (dataLen == 0) {
|
||||
fprintf(stderr, "ERROR: input file \"%s\" is empty\n", inputFN);
|
||||
fclose(f);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *inputData = malloc(dataLen);
|
||||
if (!inputData) {
|
||||
fprintf(stderr, "ERROR: unable to malloc %lu bytes\n", dataLen);
|
||||
fclose(f);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *p = inputData;
|
||||
size_t bytesLeft = dataLen;
|
||||
while (bytesLeft) {
|
||||
size_t bytesRead = fread(p, 1, bytesLeft, f);
|
||||
bytesLeft -= bytesRead;
|
||||
p += bytesRead;
|
||||
if (ferror(f) != 0) {
|
||||
fprintf(stderr, "ERROR: fread() failed\n");
|
||||
free(inputData);
|
||||
fclose(f);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
|
||||
*length = (unsigned int)dataLen;
|
||||
return inputData;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc != 3) {
|
||||
fprintf(stderr, "Usage: %s <pattern> <input file>\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *pattern = argv[1];
|
||||
char *inputFN = argv[2];
|
||||
|
||||
/* First, we attempt to compile the pattern provided on the command line.
|
||||
* We assume 'DOTALL' semantics, meaning that the '.' meta-character will
|
||||
* match newline characters. The compiler will analyse the given pattern and
|
||||
* either return a compiled Hyperscan database, or an error message
|
||||
* explaining why the pattern didn't compile.
|
||||
*/
|
||||
hs_database_t *database;
|
||||
hs_compile_error_t *compile_err;
|
||||
if (hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, NULL, &database,
|
||||
&compile_err) != HS_SUCCESS) {
|
||||
fprintf(stderr, "ERROR: Unable to compile pattern \"%s\": %s\n",
|
||||
pattern, compile_err->message);
|
||||
hs_free_compile_error(compile_err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Next, we read the input data file into a buffer. */
|
||||
unsigned int length;
|
||||
char *inputData = readInputData(inputFN, &length);
|
||||
if (!inputData) {
|
||||
hs_free_database(database);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Finally, we issue a call to hs_scan, which will search the input buffer
|
||||
* for the pattern represented in the bytecode. Note that in order to do
|
||||
* this, scratch space needs to be allocated with the hs_alloc_scratch
|
||||
* function. In typical usage, you would reuse this scratch space for many
|
||||
* calls to hs_scan, but as we're only doing one, we'll be allocating it
|
||||
* and deallocating it as soon as our matching is done.
|
||||
*
|
||||
* When matches occur, the specified callback function (eventHandler in
|
||||
* this file) will be called. Note that although it is reminiscent of
|
||||
* asynchronous APIs, Hyperscan operates synchronously: all matches will be
|
||||
* found, and all callbacks issued, *before* hs_scan returns.
|
||||
*
|
||||
* In this example, we provide the input pattern as the context pointer so
|
||||
* that the callback is able to print out the pattern that matched on each
|
||||
* match event.
|
||||
*/
|
||||
hs_scratch_t *scratch = NULL;
|
||||
if (hs_alloc_scratch(database, &scratch) != HS_SUCCESS) {
|
||||
fprintf(stderr, "ERROR: Unable to allocate scratch space. Exiting.\n");
|
||||
free(inputData);
|
||||
hs_free_database(database);
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("Scanning %u bytes with Hyperscan\n", length);
|
||||
|
||||
if (hs_scan(database, inputData, length, 0, scratch, eventHandler,
|
||||
pattern) != HS_SUCCESS) {
|
||||
fprintf(stderr, "ERROR: Unable to scan input buffer. Exiting.\n");
|
||||
hs_free_scratch(scratch);
|
||||
free(inputData);
|
||||
hs_free_database(database);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Scanning is complete, any matches have been handled, so now we just
|
||||
* clean up and exit.
|
||||
*/
|
||||
hs_free_scratch(scratch);
|
||||
free(inputData);
|
||||
hs_free_database(database);
|
||||
return 0;
|
||||
}
|
501
include/boost-patched/graph/dominator_tree.hpp
Normal file
501
include/boost-patched/graph/dominator_tree.hpp
Normal file
@ -0,0 +1,501 @@
|
||||
//=======================================================================
|
||||
// Copyright (C) 2005-2009 Jongsoo Park <jongsoo.park -at- gmail.com>
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
//=======================================================================
|
||||
|
||||
#ifndef BOOST_GRAPH_DOMINATOR_HPP
|
||||
#define BOOST_GRAPH_DOMINATOR_HPP
|
||||
|
||||
#include <boost/config.hpp>
|
||||
#include <deque>
|
||||
#include <set>
|
||||
#include <boost/graph/depth_first_search.hpp>
|
||||
#include <boost/concept/assert.hpp>
|
||||
|
||||
// Dominator tree computation
|
||||
|
||||
// NOTE: This file contains modifications from the distributed Boost version to
|
||||
// correctly support supplying a vertex index map to the algorithm. To
|
||||
// differentiate it, it has been moved into the boost_ue2 namespace.
|
||||
|
||||
namespace boost_ue2 {
|
||||
|
||||
using namespace boost;
|
||||
|
||||
namespace detail {
|
||||
/**
|
||||
* An extended time_stamper which also records vertices for each dfs number
|
||||
*/
|
||||
template<class TimeMap, class VertexVector, class TimeT, class Tag>
|
||||
class time_stamper_with_vertex_vector
|
||||
: public base_visitor<
|
||||
time_stamper_with_vertex_vector<TimeMap, VertexVector, TimeT, Tag> >
|
||||
{
|
||||
public :
|
||||
typedef Tag event_filter;
|
||||
time_stamper_with_vertex_vector(TimeMap timeMap, VertexVector& v,
|
||||
TimeT& t)
|
||||
: timeStamper_(timeMap, t), v_(v) { }
|
||||
|
||||
template<class Graph>
|
||||
void
|
||||
operator()(const typename property_traits<TimeMap>::key_type& v,
|
||||
const Graph& g)
|
||||
{
|
||||
timeStamper_(v, g);
|
||||
v_[timeStamper_.m_time] = v;
|
||||
}
|
||||
|
||||
private :
|
||||
time_stamper<TimeMap, TimeT, Tag> timeStamper_;
|
||||
VertexVector& v_;
|
||||
};
|
||||
|
||||
/**
|
||||
* A convenient way to create a time_stamper_with_vertex_vector
|
||||
*/
|
||||
template<class TimeMap, class VertexVector, class TimeT, class Tag>
|
||||
time_stamper_with_vertex_vector<TimeMap, VertexVector, TimeT, Tag>
|
||||
stamp_times_with_vertex_vector(TimeMap timeMap, VertexVector& v, TimeT& t,
|
||||
Tag)
|
||||
{
|
||||
return time_stamper_with_vertex_vector<TimeMap, VertexVector, TimeT,
|
||||
Tag>(timeMap, v, t);
|
||||
}
|
||||
|
||||
template<class Graph, class IndexMap, class TimeMap, class PredMap,
|
||||
class DomTreePredMap>
|
||||
class dominator_visitor
|
||||
{
|
||||
typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
|
||||
typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
|
||||
|
||||
public :
|
||||
/**
|
||||
* @param g [in] the target graph of the dominator tree
|
||||
* @param entry [in] the entry node of g
|
||||
* @param indexMap [in] the vertex index map for g
|
||||
* @param domTreePredMap [out] the immediate dominator map
|
||||
* (parent map in dominator tree)
|
||||
*/
|
||||
dominator_visitor(const Graph& g, const Vertex& entry,
|
||||
const IndexMap& indexMap,
|
||||
DomTreePredMap domTreePredMap)
|
||||
: semi_(num_vertices(g)),
|
||||
ancestor_(num_vertices(g), graph_traits<Graph>::null_vertex()),
|
||||
samedom_(ancestor_),
|
||||
best_(semi_),
|
||||
semiMap_(make_iterator_property_map(semi_.begin(),
|
||||
indexMap)),
|
||||
ancestorMap_(make_iterator_property_map(ancestor_.begin(),
|
||||
indexMap)),
|
||||
bestMap_(make_iterator_property_map(best_.begin(),
|
||||
indexMap)),
|
||||
buckets_(num_vertices(g)),
|
||||
bucketMap_(make_iterator_property_map(buckets_.begin(),
|
||||
indexMap)),
|
||||
entry_(entry),
|
||||
domTreePredMap_(domTreePredMap),
|
||||
numOfVertices_(num_vertices(g)),
|
||||
samedomMap(make_iterator_property_map(samedom_.begin(),
|
||||
indexMap))
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
operator()(const Vertex& n, const TimeMap& dfnumMap,
|
||||
const PredMap& parentMap, const Graph& g)
|
||||
{
|
||||
if (n == entry_) return;
|
||||
|
||||
const Vertex p(get(parentMap, n));
|
||||
Vertex s(p);
|
||||
|
||||
// 1. Calculate the semidominator of n,
|
||||
// based on the semidominator thm.
|
||||
// * Semidominator thm. : To find the semidominator of a node n,
|
||||
// consider all predecessors v of n in the CFG (Control Flow Graph).
|
||||
// - If v is a proper ancestor of n in the spanning tree
|
||||
// (so dfnum(v) < dfnum(n)), then v is a candidate for semi(n)
|
||||
// - If v is a non-ancestor of n (so dfnum(v) > dfnum(n))
|
||||
// then for each u that is an ancestor of v (or u = v),
|
||||
// Let semi(u) be a candidate for semi(n)
|
||||
// of all these candidates, the one with lowest dfnum is
|
||||
// the semidominator of n.
|
||||
|
||||
// For each predecessor of n
|
||||
typename graph_traits<Graph>::in_edge_iterator inItr, inEnd;
|
||||
for (boost::tie(inItr, inEnd) = in_edges(n, g); inItr != inEnd; ++inItr)
|
||||
{
|
||||
const Vertex v = source(*inItr, g);
|
||||
// To deal with unreachable nodes
|
||||
if (get(dfnumMap, v) < 0 || get(dfnumMap, v) >= numOfVertices_)
|
||||
continue;
|
||||
|
||||
Vertex s2;
|
||||
if (get(dfnumMap, v) <= get(dfnumMap, n))
|
||||
s2 = v;
|
||||
else
|
||||
s2 = get(semiMap_, ancestor_with_lowest_semi_(v, dfnumMap));
|
||||
|
||||
if (get(dfnumMap, s2) < get(dfnumMap, s))
|
||||
s = s2;
|
||||
}
|
||||
put(semiMap_, n, s);
|
||||
|
||||
// 2. Calculation of n's dominator is deferred until
|
||||
// the path from s to n has been linked into the forest
|
||||
get(bucketMap_, s).push_back(n);
|
||||
get(ancestorMap_, n) = p;
|
||||
get(bestMap_, n) = n;
|
||||
|
||||
// 3. Now that the path from p to v has been linked into
|
||||
// the spanning forest, these lines calculate the dominator of v,
|
||||
// based on the dominator thm., or else defer the calculation
|
||||
// until y's dominator is known
|
||||
// * Dominator thm. : On the spanning-tree path below semi(n) and
|
||||
// above or including n, let y be the node
|
||||
// with the smallest-numbered semidominator. Then,
|
||||
//
|
||||
// idom(n) = semi(n) if semi(y)=semi(n) or
|
||||
// idom(y) if semi(y) != semi(n)
|
||||
typename std::deque<Vertex>::iterator buckItr;
|
||||
for (buckItr = get(bucketMap_, p).begin();
|
||||
buckItr != get(bucketMap_, p).end();
|
||||
++buckItr)
|
||||
{
|
||||
const Vertex v(*buckItr);
|
||||
const Vertex y(ancestor_with_lowest_semi_(v, dfnumMap));
|
||||
if (get(semiMap_, y) == get(semiMap_, v))
|
||||
put(domTreePredMap_, v, p);
|
||||
else
|
||||
put(samedomMap, v, y);
|
||||
}
|
||||
|
||||
get(bucketMap_, p).clear();
|
||||
}
|
||||
|
||||
protected :
|
||||
|
||||
/**
|
||||
* Evaluate function in Tarjan's path compression
|
||||
*/
|
||||
const Vertex
|
||||
ancestor_with_lowest_semi_(const Vertex& v, const TimeMap& dfnumMap)
|
||||
{
|
||||
const Vertex a(get(ancestorMap_, v));
|
||||
|
||||
if (get(ancestorMap_, a) != graph_traits<Graph>::null_vertex())
|
||||
{
|
||||
const Vertex b(ancestor_with_lowest_semi_(a, dfnumMap));
|
||||
|
||||
put(ancestorMap_, v, get(ancestorMap_, a));
|
||||
|
||||
if (get(dfnumMap, get(semiMap_, b)) <
|
||||
get(dfnumMap, get(semiMap_, get(bestMap_, v))))
|
||||
put(bestMap_, v, b);
|
||||
}
|
||||
|
||||
return get(bestMap_, v);
|
||||
}
|
||||
|
||||
std::vector<Vertex> semi_, ancestor_, samedom_, best_;
|
||||
PredMap semiMap_, ancestorMap_, bestMap_;
|
||||
std::vector< std::deque<Vertex> > buckets_;
|
||||
|
||||
iterator_property_map<typename std::vector<std::deque<Vertex> >::iterator,
|
||||
IndexMap> bucketMap_;
|
||||
|
||||
const Vertex& entry_;
|
||||
DomTreePredMap domTreePredMap_;
|
||||
const VerticesSizeType numOfVertices_;
|
||||
|
||||
public :
|
||||
|
||||
PredMap samedomMap;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
/**
|
||||
* @brief Build dominator tree using Lengauer-Tarjan algorithm.
|
||||
* It takes O((V+E)log(V+E)) time.
|
||||
*
|
||||
* @pre dfnumMap, parentMap and verticesByDFNum have dfs results corresponding
|
||||
* indexMap.
|
||||
* If dfs has already run before,
|
||||
* this function would be good for saving computations.
|
||||
* @pre Unreachable nodes must be masked as
|
||||
* graph_traits<Graph>::null_vertex in parentMap.
|
||||
* @pre Unreachable nodes must be masked as
|
||||
* (std::numeric_limits<VerticesSizeType>::max)() in dfnumMap.
|
||||
*
|
||||
* @param domTreePredMap [out] : immediate dominator map (parent map
|
||||
* in dom. tree)
|
||||
*
|
||||
* @note reference Appel. p. 452~453. algorithm 19.9, 19.10.
|
||||
*
|
||||
* @todo : Optimization in Finding Dominators in Practice, Loukas Georgiadis
|
||||
*/
|
||||
template<class Graph, class IndexMap, class TimeMap, class PredMap,
|
||||
class VertexVector, class DomTreePredMap>
|
||||
void
|
||||
lengauer_tarjan_dominator_tree_without_dfs
|
||||
(const Graph& g,
|
||||
const typename graph_traits<Graph>::vertex_descriptor& entry,
|
||||
const IndexMap& indexMap,
|
||||
TimeMap dfnumMap, PredMap parentMap, VertexVector& verticesByDFNum,
|
||||
DomTreePredMap domTreePredMap)
|
||||
{
|
||||
// Typedefs and concept check
|
||||
typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
|
||||
typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
|
||||
|
||||
BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept<Graph> ));
|
||||
|
||||
const VerticesSizeType numOfVertices = num_vertices(g);
|
||||
if (numOfVertices == 0) return;
|
||||
|
||||
// 1. Visit each vertex in reverse post order and calculate sdom.
|
||||
detail::dominator_visitor<Graph, IndexMap, TimeMap, PredMap, DomTreePredMap>
|
||||
visitor(g, entry, indexMap, domTreePredMap);
|
||||
|
||||
VerticesSizeType i;
|
||||
for (i = 0; i < numOfVertices; ++i)
|
||||
{
|
||||
const Vertex u(verticesByDFNum[numOfVertices - 1 - i]);
|
||||
if (u != graph_traits<Graph>::null_vertex())
|
||||
visitor(u, dfnumMap, parentMap, g);
|
||||
}
|
||||
|
||||
// 2. Now all the deferred dominator calculations,
|
||||
// based on the second clause of the dominator thm., are performed
|
||||
for (i = 0; i < numOfVertices; ++i)
|
||||
{
|
||||
const Vertex n(verticesByDFNum[i]);
|
||||
|
||||
if (n == entry || n == graph_traits<Graph>::null_vertex())
|
||||
continue;
|
||||
|
||||
Vertex u = get(visitor.samedomMap, n);
|
||||
if (u != graph_traits<Graph>::null_vertex())
|
||||
{
|
||||
put(domTreePredMap, n, get(domTreePredMap, u));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unlike lengauer_tarjan_dominator_tree_without_dfs,
|
||||
* dfs is run in this function and
|
||||
* the result is written to dfnumMap, parentMap, vertices.
|
||||
*
|
||||
* If the result of dfs required after this algorithm,
|
||||
* this function can eliminate the need of rerunning dfs.
|
||||
*/
|
||||
template<class Graph, class IndexMap, class TimeMap, class PredMap,
|
||||
class VertexVector, class DomTreePredMap>
|
||||
void
|
||||
lengauer_tarjan_dominator_tree
|
||||
(const Graph& g,
|
||||
const typename graph_traits<Graph>::vertex_descriptor& entry,
|
||||
const IndexMap& indexMap,
|
||||
TimeMap dfnumMap, PredMap parentMap, VertexVector& verticesByDFNum,
|
||||
DomTreePredMap domTreePredMap)
|
||||
{
|
||||
// Typedefs and concept check
|
||||
typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
|
||||
|
||||
BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept<Graph> ));
|
||||
|
||||
// 1. Depth first visit
|
||||
const VerticesSizeType numOfVertices = num_vertices(g);
|
||||
if (numOfVertices == 0) return;
|
||||
|
||||
VerticesSizeType time =
|
||||
(std::numeric_limits<VerticesSizeType>::max)();
|
||||
std::vector<default_color_type>
|
||||
colors(numOfVertices, color_traits<default_color_type>::white());
|
||||
depth_first_visit
|
||||
(g, entry,
|
||||
make_dfs_visitor
|
||||
(make_pair(record_predecessors(parentMap, on_tree_edge()),
|
||||
detail::stamp_times_with_vertex_vector
|
||||
(dfnumMap, verticesByDFNum, time, on_discover_vertex()))),
|
||||
make_iterator_property_map(colors.begin(), indexMap));
|
||||
|
||||
// 2. Run main algorithm.
|
||||
lengauer_tarjan_dominator_tree_without_dfs(g, entry, indexMap, dfnumMap,
|
||||
parentMap, verticesByDFNum,
|
||||
domTreePredMap);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use vertex_index as IndexMap and make dfnumMap, parentMap, verticesByDFNum
|
||||
* internally.
|
||||
* If we don't need the result of dfs (dfnumMap, parentMap, verticesByDFNum),
|
||||
* this function would be more convenient one.
|
||||
*/
|
||||
template<class Graph, class DomTreePredMap>
|
||||
void
|
||||
lengauer_tarjan_dominator_tree
|
||||
(const Graph& g,
|
||||
const typename graph_traits<Graph>::vertex_descriptor& entry,
|
||||
DomTreePredMap domTreePredMap)
|
||||
{
|
||||
// typedefs
|
||||
typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
|
||||
typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
|
||||
typedef typename property_map<Graph, vertex_index_t>::const_type IndexMap;
|
||||
typedef
|
||||
iterator_property_map<typename std::vector<VerticesSizeType>::iterator,
|
||||
IndexMap> TimeMap;
|
||||
typedef
|
||||
iterator_property_map<typename std::vector<Vertex>::iterator, IndexMap>
|
||||
PredMap;
|
||||
|
||||
// Make property maps
|
||||
const VerticesSizeType numOfVertices = num_vertices(g);
|
||||
if (numOfVertices == 0) return;
|
||||
|
||||
const IndexMap indexMap = get(vertex_index, g);
|
||||
|
||||
std::vector<VerticesSizeType> dfnum(numOfVertices, 0);
|
||||
TimeMap dfnumMap(make_iterator_property_map(dfnum.begin(), indexMap));
|
||||
|
||||
std::vector<Vertex> parent(numOfVertices,
|
||||
graph_traits<Graph>::null_vertex());
|
||||
PredMap parentMap(make_iterator_property_map(parent.begin(), indexMap));
|
||||
|
||||
std::vector<Vertex> verticesByDFNum(parent);
|
||||
|
||||
// Run main algorithm
|
||||
lengauer_tarjan_dominator_tree(g, entry,
|
||||
indexMap, dfnumMap, parentMap,
|
||||
verticesByDFNum, domTreePredMap);
|
||||
}
|
||||
|
||||
/**
|
||||
* Muchnick. p. 182, 184
|
||||
*
|
||||
* using iterative bit vector analysis
|
||||
*/
|
||||
template<class Graph, class IndexMap, class DomTreePredMap>
|
||||
void
|
||||
iterative_bit_vector_dominator_tree
|
||||
(const Graph& g,
|
||||
const typename graph_traits<Graph>::vertex_descriptor& entry,
|
||||
const IndexMap& indexMap,
|
||||
DomTreePredMap domTreePredMap)
|
||||
{
|
||||
typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
|
||||
typedef typename graph_traits<Graph>::vertex_iterator vertexItr;
|
||||
typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
|
||||
typedef
|
||||
iterator_property_map<typename std::vector< std::set<Vertex> >::iterator,
|
||||
IndexMap> vertexSetMap;
|
||||
|
||||
BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept<Graph> ));
|
||||
|
||||
// 1. Finding dominator
|
||||
// 1.1. Initialize
|
||||
const VerticesSizeType numOfVertices = num_vertices(g);
|
||||
if (numOfVertices == 0) return;
|
||||
|
||||
vertexItr vi, viend;
|
||||
boost::tie(vi, viend) = vertices(g);
|
||||
const std::set<Vertex> N(vi, viend);
|
||||
|
||||
bool change = true;
|
||||
|
||||
std::vector< std::set<Vertex> > dom(numOfVertices, N);
|
||||
vertexSetMap domMap(make_iterator_property_map(dom.begin(), indexMap));
|
||||
get(domMap, entry).clear();
|
||||
get(domMap, entry).insert(entry);
|
||||
|
||||
while (change)
|
||||
{
|
||||
change = false;
|
||||
for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
|
||||
{
|
||||
if (*vi == entry) continue;
|
||||
|
||||
std::set<Vertex> T(N);
|
||||
|
||||
typename graph_traits<Graph>::in_edge_iterator inItr, inEnd;
|
||||
for (boost::tie(inItr, inEnd) = in_edges(*vi, g); inItr != inEnd; ++inItr)
|
||||
{
|
||||
const Vertex p = source(*inItr, g);
|
||||
|
||||
std::set<Vertex> tempSet;
|
||||
std::set_intersection(T.begin(), T.end(),
|
||||
get(domMap, p).begin(),
|
||||
get(domMap, p).end(),
|
||||
std::inserter(tempSet, tempSet.begin()));
|
||||
T.swap(tempSet);
|
||||
}
|
||||
|
||||
T.insert(*vi);
|
||||
if (T != get(domMap, *vi))
|
||||
{
|
||||
change = true;
|
||||
get(domMap, *vi).swap(T);
|
||||
}
|
||||
} // end of for (boost::tie(vi, viend) = vertices(g)
|
||||
} // end of while(change)
|
||||
|
||||
// 2. Build dominator tree
|
||||
for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
|
||||
get(domMap, *vi).erase(*vi);
|
||||
|
||||
Graph domTree(numOfVertices);
|
||||
|
||||
for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
|
||||
{
|
||||
if (*vi == entry) continue;
|
||||
|
||||
// We have to iterate through copied dominator set
|
||||
const std::set<Vertex> tempSet(get(domMap, *vi));
|
||||
typename std::set<Vertex>::const_iterator s;
|
||||
for (s = tempSet.begin(); s != tempSet.end(); ++s)
|
||||
{
|
||||
typename std::set<Vertex>::iterator t;
|
||||
for (t = get(domMap, *vi).begin(); t != get(domMap, *vi).end(); )
|
||||
{
|
||||
typename std::set<Vertex>::iterator old_t = t;
|
||||
++t; // Done early because t may become invalid
|
||||
if (*old_t == *s) continue;
|
||||
if (get(domMap, *s).find(*old_t) != get(domMap, *s).end())
|
||||
get(domMap, *vi).erase(old_t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
|
||||
{
|
||||
if (*vi != entry && get(domMap, *vi).size() == 1)
|
||||
{
|
||||
Vertex temp = *get(domMap, *vi).begin();
|
||||
put(domTreePredMap, *vi, temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Graph, class DomTreePredMap>
|
||||
void
|
||||
iterative_bit_vector_dominator_tree
|
||||
(const Graph& g,
|
||||
const typename graph_traits<Graph>::vertex_descriptor& entry,
|
||||
DomTreePredMap domTreePredMap)
|
||||
{
|
||||
typename property_map<Graph, vertex_index_t>::const_type
|
||||
indexMap = get(vertex_index, g);
|
||||
|
||||
iterative_bit_vector_dominator_tree(g, entry, indexMap, domTreePredMap);
|
||||
}
|
||||
} // namespace boost
|
||||
|
||||
#endif // BOOST_GRAPH_DOMINATOR_HPP
|
10
libhs.pc.in
Normal file
10
libhs.pc.in
Normal file
@ -0,0 +1,10 @@
|
||||
prefix=@CMAKE_INSTALL_PREFIX@
|
||||
exec_prefix=@CMAKE_INSTALL_PREFIX@
|
||||
libdir=@CMAKE_INSTALL_PREFIX@/lib
|
||||
includedir=@CMAKE_INSTALL_PREFIX@/include
|
||||
|
||||
Name: libhs
|
||||
Description: Intel(R) Hyperscan Library
|
||||
Version: @HS_VERSION@
|
||||
Libs: -L${libdir} -lhs
|
||||
Cflags: -I${includedir}/hs
|
109
src/alloc.c
Normal file
109
src/alloc.c
Normal file
@ -0,0 +1,109 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Runtime functions for setting custom allocators.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "allocator.h"
|
||||
|
||||
#define default_malloc malloc
|
||||
#define default_free free
|
||||
|
||||
hs_alloc_t hs_database_alloc = default_malloc;
|
||||
hs_alloc_t hs_misc_alloc = default_malloc;
|
||||
hs_alloc_t hs_scratch_alloc = default_malloc;
|
||||
hs_alloc_t hs_stream_alloc = default_malloc;
|
||||
|
||||
hs_free_t hs_database_free = default_free;
|
||||
hs_free_t hs_misc_free = default_free;
|
||||
hs_free_t hs_scratch_free = default_free;
|
||||
hs_free_t hs_stream_free = default_free;
|
||||
|
||||
static
|
||||
hs_alloc_t normalise_alloc(hs_alloc_t a) {
|
||||
if (!a) {
|
||||
return default_malloc;
|
||||
} else {
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
hs_free_t normalise_free(hs_free_t f) {
|
||||
if (!f) {
|
||||
return default_free;
|
||||
} else {
|
||||
return f;
|
||||
}
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
||||
hs_set_database_allocator(allocfunc, freefunc);
|
||||
hs_set_misc_allocator(allocfunc, freefunc);
|
||||
hs_set_stream_allocator(allocfunc, freefunc);
|
||||
hs_set_scratch_allocator(allocfunc, freefunc);
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_set_database_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
||||
hs_database_alloc = normalise_alloc(allocfunc);
|
||||
hs_database_free = normalise_free(freefunc);
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_set_misc_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
||||
hs_misc_alloc = normalise_alloc(allocfunc);
|
||||
hs_misc_free = normalise_free(freefunc);
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_set_scratch_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
||||
hs_scratch_alloc = normalise_alloc(allocfunc);
|
||||
hs_scratch_free = normalise_free(freefunc);
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_set_stream_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
||||
hs_stream_alloc = normalise_alloc(allocfunc);
|
||||
hs_stream_free = normalise_free(freefunc);
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
66
src/allocator.h
Normal file
66
src/allocator.h
Normal file
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ALLOCATOR_H
|
||||
#define ALLOCATOR_H
|
||||
|
||||
#include "hs_common.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
extern hs_alloc_t hs_database_alloc;
|
||||
extern hs_alloc_t hs_misc_alloc;
|
||||
extern hs_alloc_t hs_scratch_alloc;
|
||||
extern hs_alloc_t hs_stream_alloc;
|
||||
|
||||
extern hs_free_t hs_database_free;
|
||||
extern hs_free_t hs_misc_free;
|
||||
extern hs_free_t hs_scratch_free;
|
||||
extern hs_free_t hs_stream_free;
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
#endif
|
||||
/** \brief Check the results of an alloc done with hs_alloc for alignment.
|
||||
*
|
||||
* If we have incorrect alignment, return an error. Caller should free the
|
||||
* offending block. */
|
||||
static really_inline
|
||||
hs_error_t hs_check_alloc(const void *mem) {
|
||||
hs_error_t ret = HS_SUCCESS;
|
||||
if (!mem) {
|
||||
ret = HS_NOMEM;
|
||||
} else if (!ISALIGNED_N(mem, alignof(unsigned long long))) {
|
||||
ret = HS_BAD_ALLOC;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
310
src/compiler/asserts.cpp
Normal file
310
src/compiler/asserts.cpp
Normal file
@ -0,0 +1,310 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Convert temporary assert vertices (from construction method) to
|
||||
* edge-based flags.
|
||||
*
|
||||
* This pass converts the temporary assert vertices created by the Glushkov
|
||||
* construction process above (vertices with special assertions flags) into
|
||||
* edges between those vertices' neighbours in the graph.
|
||||
*
|
||||
* These edges have the appropriate flags applied to them -- a path (u,t,v)
|
||||
* through an assert vertex t will be replaced with the edge (u,v) with the
|
||||
* assertion flags from t.
|
||||
*
|
||||
* Edges with mutually incompatible flags (such as the conjunction of
|
||||
* word-to-word and word-to-nonword) are dropped.
|
||||
*/
|
||||
#include "asserts.h"
|
||||
#include "nfagraph/ng.h"
|
||||
#include "nfagraph/ng_prune.h"
|
||||
#include "nfagraph/ng_redundancy.h"
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "parser/position.h" // for POS flags
|
||||
#include "util/compile_error.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <queue>
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** Hard limit on the maximum number of edges we'll clone before we throw up
|
||||
* our hands and report 'Pattern too large.' */
|
||||
static const size_t MAX_ASSERT_EDGES = 300000;
|
||||
|
||||
/** Flags representing the word-boundary assertions, \\b or \\B. */
|
||||
static const int WORDBOUNDARY_FLAGS = POS_FLAG_ASSERT_WORD_TO_WORD
|
||||
| POS_FLAG_ASSERT_WORD_TO_NONWORD
|
||||
| POS_FLAG_ASSERT_NONWORD_TO_WORD
|
||||
| POS_FLAG_ASSERT_NONWORD_TO_NONWORD
|
||||
| POS_FLAG_ASSERT_WORD_TO_WORD_UCP
|
||||
| POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP
|
||||
| POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP
|
||||
| POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP;
|
||||
|
||||
#define OPEN_EDGE 0U
|
||||
#define DEAD_EDGE (~0U)
|
||||
|
||||
static
|
||||
u32 disjunct(u32 flags1, u32 flags2) {
|
||||
/* from two asserts in parallel */
|
||||
DEBUG_PRINTF("disjunct %x %x\n", flags1, flags2);
|
||||
u32 rv;
|
||||
if (flags1 == DEAD_EDGE) {
|
||||
rv = flags2;
|
||||
} else if (flags2 == DEAD_EDGE) {
|
||||
rv = flags1;
|
||||
} else if (flags1 == OPEN_EDGE || flags2 == OPEN_EDGE) {
|
||||
rv = OPEN_EDGE;
|
||||
} else {
|
||||
rv = flags1 | flags2;
|
||||
}
|
||||
DEBUG_PRINTF("--> %x\n", rv);
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
u32 conjunct(u32 flags1, u32 flags2) {
|
||||
/* from two asserts in series */
|
||||
DEBUG_PRINTF("conjunct %x %x\n", flags1, flags2);
|
||||
u32 rv;
|
||||
if (flags1 == OPEN_EDGE) {
|
||||
rv = flags2;
|
||||
} else if (flags2 == OPEN_EDGE) {
|
||||
rv = flags1;
|
||||
} else if (flags1 & flags2) {
|
||||
rv = flags1 & flags2;
|
||||
} else {
|
||||
rv = DEAD_EDGE; /* the conjunction of two different word boundary
|
||||
* assertion is impassable */
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("--> %x\n", rv);
|
||||
return rv;
|
||||
}
|
||||
|
||||
typedef map<pair<NFAVertex, NFAVertex>, NFAEdge> edge_cache_t;
|
||||
|
||||
static
|
||||
void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
|
||||
u32 &assert_edge_count) {
|
||||
DEBUG_PRINTF("replacing assert vertex %u\n", g[t].index);
|
||||
|
||||
const u32 flags = g[t].assert_flags;
|
||||
DEBUG_PRINTF("consider assert vertex %u with flags %u\n",
|
||||
g[t].index, flags);
|
||||
|
||||
// Wire up all the predecessors to all the successors.
|
||||
|
||||
for (const auto &inEdge : in_edges_range(t, g)) {
|
||||
NFAVertex u = source(inEdge, g);
|
||||
if (u == t) {
|
||||
continue; // ignore self-loops
|
||||
}
|
||||
|
||||
const u32 flags_inc_in = conjunct(g[inEdge].assert_flags,
|
||||
flags);
|
||||
if (flags_inc_in == DEAD_EDGE) {
|
||||
DEBUG_PRINTF("fail, in-edge has bad flags %d\n",
|
||||
g[inEdge].assert_flags);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const auto &outEdge : out_edges_range(t, g)) {
|
||||
NFAVertex v = target(outEdge, g);
|
||||
|
||||
DEBUG_PRINTF("consider path [%u,%u,%u]\n", g[u].index,
|
||||
g[t].index, g[v].index);
|
||||
|
||||
if (v == t) {
|
||||
continue; // ignore self-loops
|
||||
}
|
||||
|
||||
const u32 flags_final = conjunct(g[outEdge].assert_flags,
|
||||
flags_inc_in);
|
||||
|
||||
if (flags_final == DEAD_EDGE) {
|
||||
DEBUG_PRINTF("fail, out-edge has bad flags %d\n",
|
||||
g[outEdge].assert_flags);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((g[u].assert_flags & POS_FLAG_MULTILINE_START)
|
||||
&& v == g.acceptEod) {
|
||||
DEBUG_PRINTF("fail, (?m)^ does not match \\n at eod\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Replace path (u,t,v) with direct edge (u,v), unless the edge
|
||||
* already exists, in which case we just need to edit its
|
||||
* properties.
|
||||
*
|
||||
* Use edge_cache to prevent us going O(N).
|
||||
*/
|
||||
auto cache_key = make_pair(u, v);
|
||||
auto ecit = edge_cache.find(cache_key);
|
||||
if (ecit == edge_cache.end()) {
|
||||
DEBUG_PRINTF("adding edge %u %u\n", g[u].index,
|
||||
g[v].index);
|
||||
NFAEdge e = add_edge(u, v, g).first;
|
||||
edge_cache.emplace(cache_key, e);
|
||||
g[e].assert_flags = flags;
|
||||
if (++assert_edge_count > MAX_ASSERT_EDGES) {
|
||||
throw CompileError(g.expressionIndex,
|
||||
"Pattern is too large.");
|
||||
}
|
||||
} else {
|
||||
NFAEdge e = ecit->second;
|
||||
DEBUG_PRINTF("updating edge %u %u [a %u]\n", g[u].index,
|
||||
g[v].index, g[t].index);
|
||||
// Edge already exists.
|
||||
u32 &e_flags = g[e].assert_flags;
|
||||
e_flags = disjunct(e_flags, flags_final);
|
||||
assert(e_flags != DEAD_EDGE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clear vertex t to remove all the old edges.
|
||||
/* no need to clear the cache, as we will never look up its edge as it is
|
||||
* unreachable */
|
||||
clear_vertex(t, g);
|
||||
}
|
||||
|
||||
static
|
||||
void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) {
|
||||
// Don't try and set the report ID of a special vertex.
|
||||
assert(!is_special(v, g));
|
||||
|
||||
// There should be no reports set already.
|
||||
assert(g[v].reports.empty());
|
||||
|
||||
Report r = rm.getBasicInternalReport(g, adj);
|
||||
|
||||
g[v].reports.insert(rm.getInternalId(r));
|
||||
DEBUG_PRINTF("set report id for vertex %u, adj %d\n",
|
||||
g[v].index, adj);
|
||||
}
|
||||
|
||||
static
|
||||
void checkForMultilineStart(ReportManager &rm, NGWrapper &g) {
|
||||
vector<NFAEdge> dead;
|
||||
for (auto v : adjacent_vertices_range(g.start, g)) {
|
||||
if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) {
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("mls %u %08x\n", g[v].index,
|
||||
g[v].assert_flags);
|
||||
|
||||
/* we have found a multi-line start (maybe more than one) */
|
||||
|
||||
/* we need to interpose a dummy dot vertex between v and accept if
|
||||
* required so that ^ doesn't match trailing \n */
|
||||
for (const auto &e : out_edges_range(v, g)) {
|
||||
if (target(e, g) == g.accept) {
|
||||
dead.push_back(e);
|
||||
}
|
||||
}
|
||||
/* assert has been resolved; clear flag */
|
||||
g[v].assert_flags &= ~POS_FLAG_MULTILINE_START;
|
||||
}
|
||||
|
||||
for (const auto &e : dead) {
|
||||
NFAVertex dummy = add_vertex(g);
|
||||
g[dummy].char_reach.setall();
|
||||
setReportId(rm, g, dummy, -1);
|
||||
add_edge(source(e, g), dummy, g[e], g);
|
||||
add_edge(dummy, g.accept, g);
|
||||
}
|
||||
|
||||
remove_edges(dead, g);
|
||||
}
|
||||
|
||||
static
|
||||
bool hasAssertVertices(const NGHolder &g) {
|
||||
for (auto v : vertices_range(g)) {
|
||||
int flags = g[v].assert_flags;
|
||||
if (flags & WORDBOUNDARY_FLAGS) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** \brief Convert temporary assert vertices (from construction method) to
|
||||
* edge-based flags.
|
||||
*
|
||||
* Remove the horrors that are the temporary assert vertices which arise from
|
||||
* our construction method. Allows the rest of our code base to live in
|
||||
* blissful ignorance of their existence. */
|
||||
void removeAssertVertices(ReportManager &rm, NGWrapper &g) {
|
||||
size_t num = 0;
|
||||
|
||||
DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g));
|
||||
|
||||
// Sweep over the graph and ascertain that we do actually have vertices
|
||||
// with assertion flags set. Otherwise, we're done.
|
||||
if (!hasAssertVertices(g)) {
|
||||
DEBUG_PRINTF("no assert vertices, done\n");
|
||||
return;
|
||||
}
|
||||
|
||||
u32 assert_edge_count = 0;
|
||||
|
||||
// Build a cache of (u, v) vertex pairs to edge descriptors.
|
||||
edge_cache_t edge_cache;
|
||||
for (const auto &e : edges_range(g)) {
|
||||
edge_cache[make_pair(source(e, g), target(e, g))] = e;
|
||||
}
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (g[v].assert_flags & WORDBOUNDARY_FLAGS) {
|
||||
replaceAssertVertex(g, v, edge_cache, assert_edge_count);
|
||||
num++;
|
||||
}
|
||||
}
|
||||
|
||||
checkForMultilineStart(rm, g);
|
||||
|
||||
if (num) {
|
||||
DEBUG_PRINTF("resolved %zu assert vertices\n", num);
|
||||
pruneUseless(g);
|
||||
pruneEmptyVertices(g);
|
||||
g.renumberVertices();
|
||||
g.renumberEdges();
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("after: graph has %zu vertices\n", num_vertices(g));
|
||||
assert(!hasAssertVertices(g));
|
||||
}
|
||||
|
||||
} // namespace ue2
|
51
src/compiler/asserts.h
Normal file
51
src/compiler/asserts.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Convert temporary assert vertices (from construction method) to
|
||||
* edge-based flags.
|
||||
*/
|
||||
#ifndef ASSERTS_H
|
||||
#define ASSERTS_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class ReportManager;
|
||||
class NGWrapper;
|
||||
|
||||
/** \brief Convert temporary assert vertices (from construction method) to
|
||||
* edge-based flags.
|
||||
*
|
||||
* Remove the horrors that are the temporary assert vertices which arise from
|
||||
* our construction method. Allows the rest of our code base to live in
|
||||
* blissful ignorance of their existence. */
|
||||
void removeAssertVertices(ReportManager &rm, NGWrapper &g);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // ASSERTS_H
|
459
src/compiler/compiler.cpp
Normal file
459
src/compiler/compiler.cpp
Normal file
@ -0,0 +1,459 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Compiler front-end interface.
|
||||
*/
|
||||
#include "asserts.h"
|
||||
#include "compiler.h"
|
||||
#include "database.h"
|
||||
#include "grey.h"
|
||||
#include "hs_internal.h"
|
||||
#include "hs_runtime.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfagraph/ng_builder.h"
|
||||
#include "nfagraph/ng_dump.h"
|
||||
#include "nfagraph/ng.h"
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "parser/buildstate.h"
|
||||
#include "parser/dump.h"
|
||||
#include "parser/Component.h"
|
||||
#include "parser/parse_error.h"
|
||||
#include "parser/Parser.h" // for flags
|
||||
#include "parser/position.h"
|
||||
#include "parser/position_dump.h"
|
||||
#include "parser/position_info.h"
|
||||
#include "parser/prefilter.h"
|
||||
#include "parser/shortcut_literal.h"
|
||||
#include "parser/unsupported.h"
|
||||
#include "parser/utf8_validate.h"
|
||||
#include "smallwrite/smallwrite_build.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "rose/rose_build_dump.h"
|
||||
#include "som/slot_manager_dump.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
|
||||
static
|
||||
void validateExt(const hs_expr_ext &ext) {
|
||||
static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET |
|
||||
HS_EXT_FLAG_MAX_OFFSET |
|
||||
HS_EXT_FLAG_MIN_LENGTH;
|
||||
if (ext.flags & ~ALL_EXT_FLAGS) {
|
||||
throw CompileError("Invalid hs_expr_ext flag set.");
|
||||
}
|
||||
|
||||
if ((ext.flags & HS_EXT_FLAG_MIN_OFFSET) &&
|
||||
(ext.flags & HS_EXT_FLAG_MAX_OFFSET) &&
|
||||
(ext.min_offset > ext.max_offset)) {
|
||||
throw CompileError("In hs_expr_ext, min_offset must be less than or "
|
||||
"equal to max_offset.");
|
||||
}
|
||||
|
||||
if ((ext.flags & HS_EXT_FLAG_MIN_LENGTH) &&
|
||||
(ext.flags & HS_EXT_FLAG_MAX_OFFSET) &&
|
||||
(ext.min_length > ext.max_offset)) {
|
||||
throw CompileError("In hs_expr_ext, min_length must be less than or "
|
||||
"equal to max_offset.");
|
||||
}
|
||||
}
|
||||
|
||||
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
||||
unsigned flags, ReportID actionId,
|
||||
const hs_expr_ext *ext)
|
||||
: utf8(false),
|
||||
allow_vacuous(flags & HS_FLAG_ALLOWEMPTY),
|
||||
highlander(flags & HS_FLAG_SINGLEMATCH),
|
||||
prefilter(flags & HS_FLAG_PREFILTER),
|
||||
som(SOM_NONE),
|
||||
index(index_in),
|
||||
id(actionId),
|
||||
min_offset(0),
|
||||
max_offset(MAX_OFFSET),
|
||||
min_length(0) {
|
||||
ParseMode mode(flags);
|
||||
|
||||
component = parse(expression, mode);
|
||||
|
||||
utf8 = mode.utf8; /* utf8 may be set by parse() */
|
||||
|
||||
if (utf8 && !isValidUtf8(expression)) {
|
||||
throw ParseError("Expression is not valid UTF-8.");
|
||||
}
|
||||
|
||||
if (!component) {
|
||||
assert(0); // parse() should have thrown a ParseError.
|
||||
throw ParseError("Parse error.");
|
||||
}
|
||||
|
||||
if (flags & ~HS_FLAG_ALL) {
|
||||
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
|
||||
throw CompileError("Unrecognised flag.");
|
||||
}
|
||||
|
||||
// FIXME: we disallow highlander + SOM, see UE-1850.
|
||||
if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) {
|
||||
throw CompileError("HS_FLAG_SINGLEMATCH is not supported in "
|
||||
"combination with HS_FLAG_SOM_LEFTMOST.");
|
||||
}
|
||||
|
||||
// FIXME: we disallow prefilter + SOM, see UE-1899.
|
||||
if ((flags & HS_FLAG_PREFILTER) && (flags & HS_FLAG_SOM_LEFTMOST)) {
|
||||
throw CompileError("HS_FLAG_PREFILTER is not supported in "
|
||||
"combination with HS_FLAG_SOM_LEFTMOST.");
|
||||
}
|
||||
|
||||
// Set SOM type.
|
||||
if (flags & HS_FLAG_SOM_LEFTMOST) {
|
||||
som = SOM_LEFT;
|
||||
}
|
||||
|
||||
// Set extended parameters, if we have them.
|
||||
if (ext) {
|
||||
// Ensure that the given parameters make sense.
|
||||
validateExt(*ext);
|
||||
|
||||
if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
|
||||
min_offset = ext->min_offset;
|
||||
}
|
||||
if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
|
||||
max_offset = ext->max_offset;
|
||||
}
|
||||
if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) {
|
||||
min_length = ext->min_length;
|
||||
}
|
||||
}
|
||||
|
||||
// These are validated in validateExt, so an error will already have been
|
||||
// thrown if these conditions don't hold.
|
||||
assert(max_offset >= min_offset);
|
||||
assert(max_offset >= min_length);
|
||||
|
||||
// Since prefiltering and SOM aren't supported together, we must squash any
|
||||
// min_length constraint as well.
|
||||
if (flags & HS_FLAG_PREFILTER && min_length) {
|
||||
DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n");
|
||||
min_length = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(DUMP_SUPPORT) || defined(DEBUG)
|
||||
/**
|
||||
* \brief Dumps the parse tree to screen in debug mode and to disk in dump
|
||||
* mode.
|
||||
*/
|
||||
void dumpExpression(UNUSED const ParsedExpression &expr,
|
||||
UNUSED const char *stage, UNUSED const Grey &grey) {
|
||||
#if defined(DEBUG)
|
||||
DEBUG_PRINTF("===== Rule ID: %u (internalID: %u) =====\n", expr.id,
|
||||
expr.index);
|
||||
ostringstream debug_tree;
|
||||
dumpTree(debug_tree, expr.component.get());
|
||||
printf("%s\n", debug_tree.str().c_str());
|
||||
#endif // DEBUG
|
||||
|
||||
#if defined(DUMP_SUPPORT)
|
||||
if (grey.dumpFlags & Grey::DUMP_PARSE) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "Expr_" << expr.index << "_componenttree_"
|
||||
<< stage << ".txt";
|
||||
ofstream out(ss.str().c_str());
|
||||
out << "Component Tree for " << expr.id << endl;
|
||||
dumpTree(out, expr.component.get());
|
||||
if (expr.utf8) {
|
||||
out << "UTF8 mode" << endl;
|
||||
}
|
||||
}
|
||||
#endif // DEBUG
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \brief Run Component tree optimisations on \a expr. */
|
||||
static
|
||||
void optimise(ParsedExpression &expr) {
|
||||
if (expr.min_length || expr.som) {
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("optimising\n");
|
||||
expr.component->optimise(true /* root is connected to sds */);
|
||||
}
|
||||
|
||||
void addExpression(NG &ng, unsigned index, const char *expression,
|
||||
unsigned flags, const hs_expr_ext *ext, ReportID id) {
|
||||
assert(expression);
|
||||
const CompileContext &cc = ng.cc;
|
||||
DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s'\n", index, id, flags,
|
||||
expression);
|
||||
|
||||
// Ensure that our pattern isn't too long (in characters).
|
||||
if (strlen(expression) > cc.grey.limitPatternLength) {
|
||||
throw CompileError("Pattern length exceeds limit.");
|
||||
}
|
||||
|
||||
// Do per-expression processing: errors here will result in an exception
|
||||
// being thrown up to our caller
|
||||
ParsedExpression expr(index, expression, flags, id, ext);
|
||||
dumpExpression(expr, "orig", cc.grey);
|
||||
|
||||
// Apply prefiltering transformations if desired.
|
||||
if (expr.prefilter) {
|
||||
prefilterTree(expr.component, ParseMode(flags));
|
||||
dumpExpression(expr, "prefiltered", cc.grey);
|
||||
}
|
||||
|
||||
// Expressions containing zero-width assertions and other extended pcre
|
||||
// types aren't supported yet. This call will throw a ParseError exception
|
||||
// if the component tree contains such a construct.
|
||||
checkUnsupported(*expr.component);
|
||||
|
||||
expr.component->checkEmbeddedStartAnchor(true);
|
||||
expr.component->checkEmbeddedEndAnchor(true);
|
||||
|
||||
if (cc.grey.optimiseComponentTree) {
|
||||
optimise(expr);
|
||||
dumpExpression(expr, "opt", cc.grey);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n",
|
||||
expr.component.get(), expr.index, expr.id);
|
||||
|
||||
// You can only use the SOM flags if you've also specified an SOM
|
||||
// precision mode.
|
||||
if (expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
|
||||
throw CompileError("To use a SOM expression flag in streaming mode, "
|
||||
"an SOM precision mode (e.g. "
|
||||
"HS_MODE_SOM_HORIZON_LARGE) must be specified.");
|
||||
}
|
||||
|
||||
// If this expression is a literal, we can feed it directly to Rose rather
|
||||
// than building the NFA graph.
|
||||
if (shortcutLiteral(ng, expr)) {
|
||||
DEBUG_PRINTF("took literal short cut\n");
|
||||
return;
|
||||
}
|
||||
|
||||
unique_ptr<NGWrapper> g = buildWrapper(ng.rm, cc, expr);
|
||||
|
||||
if (!g) {
|
||||
DEBUG_PRINTF("NFA build failed on ID %u, but no exception was "
|
||||
"thrown.\n", expr.id);
|
||||
throw CompileError("Internal error.");
|
||||
}
|
||||
|
||||
if (!expr.allow_vacuous && matches_everywhere(*g)) {
|
||||
throw CompileError("Pattern matches empty buffer; use "
|
||||
"HS_FLAG_ALLOWEMPTY to enable support.");
|
||||
}
|
||||
|
||||
if (!ng.addGraph(*g)) {
|
||||
DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", expr.id);
|
||||
throw CompileError("Error compiling expression.");
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<RoseEngine> generateRoseEngine(NG &ng) {
|
||||
const u32 minWidth =
|
||||
ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF;
|
||||
auto rose = ng.rose->buildRose(minWidth);
|
||||
|
||||
if (!rose) {
|
||||
DEBUG_PRINTF("error building rose\n");
|
||||
assert(0);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* avoid building a smwr if just a pure floating case. */
|
||||
if (!roseIsPureLiteral(rose.get())) {
|
||||
u32 qual = roseQuality(rose.get());
|
||||
auto smwr = ng.smwr->build(qual);
|
||||
if (smwr) {
|
||||
rose = roseAddSmallWrite(rose.get(), smwr.get());
|
||||
}
|
||||
}
|
||||
|
||||
dumpRose(*ng.rose, rose.get(), ng.cc.grey);
|
||||
dumpReportManager(ng.rm, ng.cc.grey);
|
||||
dumpSomSlotManager(ng.ssm, ng.cc.grey);
|
||||
dumpSmallWrite(rose.get(), ng.cc.grey);
|
||||
|
||||
return rose;
|
||||
}
|
||||
|
||||
platform_t target_to_platform(const target_t &target_info) {
|
||||
platform_t p;
|
||||
p = 0;
|
||||
|
||||
if (!target_info.has_avx2()) {
|
||||
p |= HS_PLATFORM_NOAVX2;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
struct hs_database *build(NG &ng, unsigned int *length) {
|
||||
assert(length);
|
||||
|
||||
auto rose = generateRoseEngine(ng);
|
||||
if (!rose) {
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
*length = roseSize(rose.get());
|
||||
if (!*length) {
|
||||
DEBUG_PRINTF("RoseEngine has zero length\n");
|
||||
assert(0);
|
||||
throw CompileError("Internal error.");
|
||||
}
|
||||
|
||||
const char *bytecode = (const char *)(rose.get());
|
||||
const platform_t p = target_to_platform(ng.cc.target_info);
|
||||
struct hs_database *db = dbCreate(bytecode, *length, p);
|
||||
if (!db) {
|
||||
throw CompileError("Could not allocate memory for bytecode.");
|
||||
}
|
||||
|
||||
return db;
|
||||
}
|
||||
|
||||
static
|
||||
void stripFromPositions(vector<PositionInfo> &v, Position pos) {
|
||||
auto removed = remove(v.begin(), v.end(), PositionInfo(pos));
|
||||
v.erase(removed, v.end());
|
||||
}
|
||||
|
||||
static
|
||||
void connectInitialStates(GlushkovBuildState &bs,
|
||||
const ParsedExpression &expr) {
|
||||
vector<PositionInfo> initials = expr.component->first();
|
||||
const NFABuilder &builder = bs.getBuilder();
|
||||
const Position startState = builder.getStart();
|
||||
const Position startDotStarState = builder.getStartDotStar();
|
||||
|
||||
DEBUG_PRINTF("wiring initials = %s\n",
|
||||
dumpPositions(initials.begin(), initials.end()).c_str());
|
||||
|
||||
vector<PositionInfo> starts = {startState, startDotStarState};
|
||||
|
||||
// strip start and startDs, which can be present due to boundaries
|
||||
stripFromPositions(initials, startState);
|
||||
stripFromPositions(initials, startDotStarState);
|
||||
|
||||
// replace epsilons with accepts
|
||||
for (const auto &s : initials) {
|
||||
if (s.pos != GlushkovBuildState::POS_EPSILON) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(starts.size() == 2); /* start, startds */
|
||||
vector<PositionInfo> starts_temp = starts;
|
||||
starts_temp[0].flags = s.flags;
|
||||
starts_temp[1].flags = s.flags;
|
||||
bs.connectAccepts(starts_temp);
|
||||
}
|
||||
|
||||
if (!initials.empty()) {
|
||||
bs.connectRegions(starts, initials);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void connectFinalStates(GlushkovBuildState &bs, const ParsedExpression &expr) {
|
||||
vector<PositionInfo> finals = expr.component->last();
|
||||
|
||||
DEBUG_PRINTF("wiring finals = %s\n",
|
||||
dumpPositions(finals.begin(), finals.end()).c_str());
|
||||
|
||||
bs.connectAccepts(finals);
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
static
|
||||
bool isSupported(const Component &c) {
|
||||
try {
|
||||
checkUnsupported(c);
|
||||
return true;
|
||||
}
|
||||
catch (ParseError &) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
unique_ptr<NGWrapper> buildWrapper(ReportManager &rm, const CompileContext &cc,
|
||||
const ParsedExpression &expr) {
|
||||
assert(isSupported(*expr.component));
|
||||
|
||||
const unique_ptr<NFABuilder> builder = makeNFABuilder(rm, cc, expr);
|
||||
assert(builder);
|
||||
|
||||
// Set up START and ACCEPT states; retrieve the special states
|
||||
const auto bs = makeGlushkovBuildState(*builder, expr.prefilter);
|
||||
|
||||
// Map position IDs to characters/components
|
||||
expr.component->notePositions(*bs);
|
||||
|
||||
// Wire the start dotstar state to the firsts
|
||||
connectInitialStates(*bs, expr);
|
||||
|
||||
DEBUG_PRINTF("wire up body of expr\n");
|
||||
// Build the rest of the FOLLOW set
|
||||
vector<PositionInfo> initials = {builder->getStartDotStar(),
|
||||
builder->getStart()};
|
||||
expr.component->buildFollowSet(*bs, initials);
|
||||
|
||||
// Wire the lasts to the accept state
|
||||
connectFinalStates(*bs, expr);
|
||||
|
||||
// Create our edges
|
||||
bs->buildEdges();
|
||||
|
||||
auto g = builder->getGraph();
|
||||
assert(g);
|
||||
|
||||
dumpDotWrapper(*g, "00_before_asserts", cc.grey);
|
||||
removeAssertVertices(rm, *g);
|
||||
|
||||
return g;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
152
src/compiler/compiler.h
Normal file
152
src/compiler/compiler.h
Normal file
@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Compiler front-end interface
|
||||
*/
|
||||
|
||||
#ifndef COMPILER_H
|
||||
#define COMPILER_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "database.h"
|
||||
#include "parser/Component.h"
|
||||
#include "som/som.h"
|
||||
|
||||
#include <memory>
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
struct hs_database;
|
||||
struct hs_expr_ext;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct CompileContext;
|
||||
struct Grey;
|
||||
struct target_t;
|
||||
class NG;
|
||||
class ReportManager;
|
||||
class NGWrapper;
|
||||
|
||||
/** Class gathering together the pieces of a parsed expression.
|
||||
* Note: Owns the provided component.
|
||||
*/
|
||||
class ParsedExpression : boost::noncopyable {
|
||||
public:
|
||||
ParsedExpression(unsigned index, const char *expression, unsigned flags,
|
||||
ReportID actionId, const hs_expr_ext *ext = nullptr);
|
||||
|
||||
bool utf8; //!< UTF-8 mode flag specified
|
||||
|
||||
/** \brief root node of parsed component tree. */
|
||||
std::unique_ptr<ue2::Component> component;
|
||||
|
||||
const bool allow_vacuous; //!< HS_FLAG_ALLOWEMPTY specified
|
||||
const bool highlander; //!< HS_FLAG_SINGLEMATCH specified
|
||||
const bool prefilter; //!< HS_FLAG_PREFILTER specified
|
||||
som_type som; //!< chosen SOM mode, or SOM_NONE
|
||||
|
||||
/** \brief index in expressions array passed to \ref hs_compile_multi */
|
||||
const unsigned index;
|
||||
|
||||
const ReportID id; //!< user-specified pattern ID
|
||||
u64a min_offset; //!< 0 if not used
|
||||
u64a max_offset; //!< MAX_OFFSET if not used
|
||||
u64a min_length; //!< 0 if not used
|
||||
};
|
||||
|
||||
/**
|
||||
* Add an expression to the compiler.
|
||||
*
|
||||
* @param ng
|
||||
* The global NG object.
|
||||
* @param index
|
||||
* The index of the expression (used for errors)
|
||||
* @param expression
|
||||
* NULL-terminated PCRE expression
|
||||
* @param flags
|
||||
* The full set of Hyperscan flags associated with this rule.
|
||||
* @param ext
|
||||
* Struct containing extra parameters for this expression, or NULL if
|
||||
* none.
|
||||
* @param actionId
|
||||
* The identifier to associate with the expression; returned by engine on
|
||||
* match.
|
||||
*/
|
||||
void addExpression(NG &ng, unsigned index, const char *expression,
|
||||
unsigned flags, const hs_expr_ext *ext, ReportID actionId);
|
||||
|
||||
/**
|
||||
* Build a Hyperscan database out of the expressions we've been given. A
|
||||
* fatal error will result in an exception being thrown.
|
||||
*
|
||||
* @param ng
|
||||
* The global NG object.
|
||||
* @param[out] length
|
||||
* The number of bytes occupied by the compiled structure.
|
||||
* @return
|
||||
* The compiled structure. Should be deallocated with the
|
||||
* hs_database_free() function.
|
||||
*/
|
||||
struct hs_database *build(NG &ng, unsigned int *length);
|
||||
|
||||
/**
|
||||
* Constructs an NFA graph from the given expression tree.
|
||||
*
|
||||
* @param rm
|
||||
* Global ReportManager for this compile.
|
||||
* @param cc
|
||||
* Global compile context for this compile.
|
||||
* @param expr
|
||||
* ParsedExpression object.
|
||||
* @return
|
||||
* nullptr on error.
|
||||
*/
|
||||
std::unique_ptr<NGWrapper> buildWrapper(ReportManager &rm,
|
||||
const CompileContext &cc,
|
||||
const ParsedExpression &expr);
|
||||
|
||||
/**
|
||||
* Build a platform_t out of a target_t.
|
||||
*/
|
||||
platform_t target_to_platform(const target_t &target_info);
|
||||
|
||||
#if defined(DUMP_SUPPORT) || defined(DEBUG)
|
||||
void dumpExpression(const ParsedExpression &expr, const char *stage,
|
||||
const Grey &grey);
|
||||
#else
|
||||
static really_inline
|
||||
void dumpExpression(UNUSED const ParsedExpression &expr,
|
||||
UNUSED const char *stage, UNUSED const Grey &grey) {
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // COMPILER_H
|
95
src/compiler/error.cpp
Normal file
95
src/compiler/error.cpp
Normal file
@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Compile-time error utils.
|
||||
*/
|
||||
#include "allocator.h"
|
||||
#include "error.h"
|
||||
#include "ue2common.h"
|
||||
#include "hs_compile.h"
|
||||
#include "util/compile_error.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
||||
using std::string;
|
||||
|
||||
static const char failureNoMemory[] = "Unable to allocate memory.";
|
||||
static const char failureInternal[] = "Internal error.";
|
||||
|
||||
extern const hs_compile_error_t hs_enomem = {
|
||||
const_cast<char *>(failureNoMemory), 0
|
||||
};
|
||||
extern const hs_compile_error_t hs_einternal = {
|
||||
const_cast<char *>(failureInternal), 0
|
||||
};
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
hs_compile_error_t *generateCompileError(const string &err, int expression) {
|
||||
hs_compile_error_t *ret =
|
||||
(struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t));
|
||||
if (ret) {
|
||||
char *msg = (char *)hs_misc_alloc(err.size() + 1);
|
||||
if (msg) {
|
||||
memcpy(msg, err.c_str(), err.size() + 1);
|
||||
ret->message = msg;
|
||||
} else {
|
||||
hs_misc_free(ret);
|
||||
ret = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret || !ret->message) {
|
||||
return const_cast<hs_compile_error_t *>(&hs_enomem);
|
||||
}
|
||||
|
||||
ret->expression = expression;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
hs_compile_error_t *generateCompileError(const CompileError &e) {
|
||||
return generateCompileError(e.reason, e.hasIndex ? (int)e.index : -1);
|
||||
}
|
||||
|
||||
void freeCompileError(hs_compile_error_t *error) {
|
||||
if (!error) {
|
||||
return;
|
||||
}
|
||||
if (error == &hs_enomem || error == &hs_einternal) {
|
||||
// These are not allocated.
|
||||
return;
|
||||
}
|
||||
|
||||
hs_misc_free(error->message);
|
||||
hs_misc_free(error);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
55
src/compiler/error.h
Normal file
55
src/compiler/error.h
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Compile-time error utils.
|
||||
*/
|
||||
|
||||
#ifndef COMPILE_ERROR_H
|
||||
#define COMPILE_ERROR_H
|
||||
|
||||
#include <string>
|
||||
|
||||
struct hs_compile_error;
|
||||
|
||||
// Special errors that aren't allocated with hs_alloc/hs_free.
|
||||
extern const hs_compile_error hs_enomem;
|
||||
extern const hs_compile_error hs_einternal;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class CompileError;
|
||||
|
||||
hs_compile_error *generateCompileError(const std::string &err, int expression);
|
||||
hs_compile_error *generateCompileError(const CompileError &e);
|
||||
|
||||
void freeCompileError(hs_compile_error *error);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
652
src/crc32.c
Normal file
652
src/crc32.c
Normal file
@ -0,0 +1,652 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "crc32.h"
|
||||
#include "config.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#if defined(HAVE_C_X86INTRIN_H)
|
||||
#include <x86intrin.h>
|
||||
#elif defined(HAVE_C_INTRIN_H)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#ifndef __SSE4_2__
|
||||
|
||||
/***
|
||||
*** What follows is derived from Intel's Slicing-by-8 CRC32 impl, which is BSD
|
||||
*** licensed and available from http://sourceforge.net/projects/slicing-by-8/
|
||||
***/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2004-2006 Intel Corporation - All Rights Reserved
|
||||
*
|
||||
*
|
||||
* This software program is licensed subject to the BSD License,
|
||||
* available at http://www.opensource.org/licenses/bsd-license.html.
|
||||
*
|
||||
* Abstract:
|
||||
*
|
||||
* Tables for software CRC generation
|
||||
*/
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o32[256] =
|
||||
{
|
||||
0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
|
||||
0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
|
||||
0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
|
||||
0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
|
||||
0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
|
||||
0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
|
||||
0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
|
||||
0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
|
||||
0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
|
||||
0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
|
||||
0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
|
||||
0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
|
||||
0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
|
||||
0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
|
||||
0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
|
||||
0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
|
||||
0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
|
||||
0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
|
||||
0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
|
||||
0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
|
||||
0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
|
||||
0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
|
||||
0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
|
||||
0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
|
||||
0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
|
||||
0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
|
||||
0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
|
||||
0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
|
||||
0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
|
||||
0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
|
||||
0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
|
||||
0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o32
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o40[256] =
|
||||
{
|
||||
0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945,
|
||||
0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD,
|
||||
0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4,
|
||||
0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C,
|
||||
0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47,
|
||||
0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF,
|
||||
0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6,
|
||||
0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E,
|
||||
0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41,
|
||||
0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9,
|
||||
0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0,
|
||||
0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78,
|
||||
0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43,
|
||||
0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB,
|
||||
0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2,
|
||||
0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A,
|
||||
0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC,
|
||||
0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004,
|
||||
0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D,
|
||||
0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185,
|
||||
0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE,
|
||||
0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306,
|
||||
0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F,
|
||||
0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287,
|
||||
0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8,
|
||||
0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600,
|
||||
0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439,
|
||||
0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781,
|
||||
0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA,
|
||||
0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502,
|
||||
0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B,
|
||||
0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o40
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o48[256] =
|
||||
{
|
||||
0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469,
|
||||
0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC,
|
||||
0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3,
|
||||
0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726,
|
||||
0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D,
|
||||
0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8,
|
||||
0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7,
|
||||
0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32,
|
||||
0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0,
|
||||
0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75,
|
||||
0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A,
|
||||
0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF,
|
||||
0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4,
|
||||
0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161,
|
||||
0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E,
|
||||
0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB,
|
||||
0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A,
|
||||
0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF,
|
||||
0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0,
|
||||
0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065,
|
||||
0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E,
|
||||
0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB,
|
||||
0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4,
|
||||
0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71,
|
||||
0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3,
|
||||
0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36,
|
||||
0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79,
|
||||
0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC,
|
||||
0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7,
|
||||
0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622,
|
||||
0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D,
|
||||
0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o48
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o56[256] =
|
||||
{
|
||||
0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA,
|
||||
0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C,
|
||||
0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7,
|
||||
0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11,
|
||||
0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41,
|
||||
0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7,
|
||||
0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C,
|
||||
0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A,
|
||||
0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D,
|
||||
0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB,
|
||||
0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610,
|
||||
0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6,
|
||||
0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6,
|
||||
0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040,
|
||||
0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B,
|
||||
0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D,
|
||||
0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5,
|
||||
0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213,
|
||||
0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8,
|
||||
0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E,
|
||||
0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E,
|
||||
0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698,
|
||||
0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443,
|
||||
0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5,
|
||||
0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12,
|
||||
0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4,
|
||||
0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F,
|
||||
0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9,
|
||||
0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99,
|
||||
0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F,
|
||||
0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4,
|
||||
0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o56
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o64[256] =
|
||||
{
|
||||
0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44,
|
||||
0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5,
|
||||
0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97,
|
||||
0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406,
|
||||
0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13,
|
||||
0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082,
|
||||
0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0,
|
||||
0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151,
|
||||
0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA,
|
||||
0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B,
|
||||
0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539,
|
||||
0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8,
|
||||
0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD,
|
||||
0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C,
|
||||
0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E,
|
||||
0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF,
|
||||
0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18,
|
||||
0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089,
|
||||
0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB,
|
||||
0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A,
|
||||
0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F,
|
||||
0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE,
|
||||
0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C,
|
||||
0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D,
|
||||
0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6,
|
||||
0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27,
|
||||
0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065,
|
||||
0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4,
|
||||
0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1,
|
||||
0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70,
|
||||
0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532,
|
||||
0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o64
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o72[256] =
|
||||
{
|
||||
0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD,
|
||||
0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2,
|
||||
0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93,
|
||||
0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C,
|
||||
0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20,
|
||||
0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F,
|
||||
0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E,
|
||||
0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201,
|
||||
0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746,
|
||||
0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59,
|
||||
0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778,
|
||||
0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67,
|
||||
0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB,
|
||||
0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4,
|
||||
0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5,
|
||||
0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA,
|
||||
0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B,
|
||||
0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364,
|
||||
0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45,
|
||||
0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A,
|
||||
0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6,
|
||||
0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9,
|
||||
0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8,
|
||||
0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7,
|
||||
0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090,
|
||||
0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F,
|
||||
0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE,
|
||||
0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1,
|
||||
0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D,
|
||||
0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02,
|
||||
0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623,
|
||||
0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o72
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o80[256] =
|
||||
{
|
||||
0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089,
|
||||
0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA,
|
||||
0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F,
|
||||
0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C,
|
||||
0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334,
|
||||
0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67,
|
||||
0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992,
|
||||
0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1,
|
||||
0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3,
|
||||
0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0,
|
||||
0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55,
|
||||
0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006,
|
||||
0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E,
|
||||
0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D,
|
||||
0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8,
|
||||
0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB,
|
||||
0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D,
|
||||
0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E,
|
||||
0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB,
|
||||
0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988,
|
||||
0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0,
|
||||
0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093,
|
||||
0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766,
|
||||
0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35,
|
||||
0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907,
|
||||
0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454,
|
||||
0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1,
|
||||
0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2,
|
||||
0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA,
|
||||
0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9,
|
||||
0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C,
|
||||
0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o80
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o88[256] =
|
||||
{
|
||||
0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504,
|
||||
0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE,
|
||||
0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0,
|
||||
0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A,
|
||||
0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D,
|
||||
0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447,
|
||||
0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929,
|
||||
0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3,
|
||||
0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36,
|
||||
0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC,
|
||||
0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782,
|
||||
0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358,
|
||||
0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF,
|
||||
0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75,
|
||||
0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B,
|
||||
0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1,
|
||||
0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360,
|
||||
0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA,
|
||||
0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4,
|
||||
0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E,
|
||||
0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9,
|
||||
0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223,
|
||||
0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D,
|
||||
0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97,
|
||||
0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852,
|
||||
0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88,
|
||||
0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6,
|
||||
0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C,
|
||||
0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB,
|
||||
0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911,
|
||||
0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F,
|
||||
0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o88
|
||||
*/
|
||||
|
||||
//#define VERIFY_ASSERTION
|
||||
|
||||
#ifdef VERIFY_ASSERTION
|
||||
|
||||
// Trivial byte-by-byte version: you can switch on the assertion in the
|
||||
// Crc32_ComputeBuf function (by defining VERIFY_ASSERTION) to check this
|
||||
// against the slicing variant.
|
||||
static really_inline
|
||||
u32 crc32c(u32 running_crc, const unsigned char* p_buf, size_t length) {
|
||||
u32 crc = running_crc;
|
||||
while (length--) {
|
||||
crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
|
||||
#endif // VERIFY_ASSERTION
|
||||
|
||||
// Slicing-by-8 approach, which is much faster. Derived from Intel's
|
||||
// BSD-licensed code, with additions to handled aligned case automatically.
|
||||
static really_inline
|
||||
u32 crc32c_sb8_64_bit(u32 running_crc, const unsigned char* p_buf,
|
||||
const size_t length) {
|
||||
u32 crc = running_crc;
|
||||
|
||||
// Process byte-by-byte until p_buf is aligned
|
||||
|
||||
const unsigned char *aligned_buf = ROUNDUP_PTR(p_buf, 4);
|
||||
size_t init_bytes = aligned_buf - p_buf;
|
||||
size_t running_length = ((length - init_bytes)/8)*8;
|
||||
size_t end_bytes = length - init_bytes - running_length;
|
||||
|
||||
while (p_buf < aligned_buf) {
|
||||
crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
|
||||
}
|
||||
|
||||
// Main aligned loop, processes eight bytes at a time.
|
||||
|
||||
u32 term1, term2;
|
||||
for (size_t li = 0; li < running_length/8; li++) {
|
||||
u32 block = *(const u32 *)p_buf;
|
||||
crc ^= block;
|
||||
p_buf += 4;
|
||||
term1 = crc_tableil8_o88[crc & 0x000000FF] ^
|
||||
crc_tableil8_o80[(crc >> 8) & 0x000000FF];
|
||||
term2 = crc >> 16;
|
||||
crc = term1 ^
|
||||
crc_tableil8_o72[term2 & 0x000000FF] ^
|
||||
crc_tableil8_o64[(term2 >> 8) & 0x000000FF];
|
||||
|
||||
|
||||
block = *(const u32 *)p_buf;
|
||||
|
||||
term1 = crc_tableil8_o56[block & 0x000000FF] ^
|
||||
crc_tableil8_o48[(block >> 8) & 0x000000FF];
|
||||
|
||||
term2 = block >> 16;
|
||||
crc = crc ^
|
||||
term1 ^
|
||||
crc_tableil8_o40[term2 & 0x000000FF] ^
|
||||
crc_tableil8_o32[(term2 >> 8) & 0x000000FF];
|
||||
p_buf += 4;
|
||||
}
|
||||
|
||||
// Remaining bytes
|
||||
|
||||
for(size_t li = 0; li < end_bytes; li++) {
|
||||
crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
#else // __SSE4_2__
|
||||
|
||||
#ifdef ARCH_64_BIT
|
||||
#define CRC_WORD 8
|
||||
#define CRC_TYPE u64a
|
||||
#define CRC_FUNC _mm_crc32_u64
|
||||
#else
|
||||
#define CRC_WORD 4
|
||||
#define CRC_TYPE u32
|
||||
#define CRC_FUNC _mm_crc32_u32
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Use the crc32 instruction from SSE4.2 to compute our checksum - same
|
||||
* polynomial as the above function.
|
||||
*/
|
||||
static really_inline
|
||||
u32 crc32c_sse42(u32 running_crc, const unsigned char* p_buf,
|
||||
const size_t length) {
|
||||
u32 crc = running_crc;
|
||||
|
||||
// Process byte-by-byte until p_buf is aligned
|
||||
|
||||
const unsigned char *aligned_buf = ROUNDUP_PTR(p_buf, CRC_WORD);
|
||||
size_t init_bytes = aligned_buf - p_buf;
|
||||
size_t running_length = ((length - init_bytes)/CRC_WORD)*CRC_WORD;
|
||||
size_t end_bytes = length - init_bytes - running_length;
|
||||
|
||||
while (p_buf < aligned_buf) {
|
||||
crc = _mm_crc32_u8(crc, *p_buf++);
|
||||
}
|
||||
|
||||
// Main aligned loop, processes a word at a time.
|
||||
|
||||
for (size_t li = 0; li < running_length/CRC_WORD; li++) {
|
||||
CRC_TYPE block = *(const CRC_TYPE *)p_buf;
|
||||
crc = CRC_FUNC(crc, block);
|
||||
p_buf += CRC_WORD;
|
||||
}
|
||||
|
||||
// Remaining bytes
|
||||
|
||||
for(size_t li = 0; li < end_bytes; li++) {
|
||||
crc = _mm_crc32_u8(crc, *p_buf++);
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef VERIFY_ASSERTION
|
||||
#include <assert.h>
|
||||
#endif
|
||||
|
||||
// Externally visible function
|
||||
u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen) {
|
||||
#ifdef __SSE4_2__
|
||||
u32 crc = crc32c_sse42(inCrc32, (const unsigned char *)buf, bufLen);
|
||||
#else
|
||||
u32 crc = crc32c_sb8_64_bit(inCrc32, (const unsigned char *)buf, bufLen);
|
||||
#endif
|
||||
|
||||
#ifdef VERIFY_ASSERTION
|
||||
assert(crc == crc32c(inCrc32, (const unsigned char *)buf, bufLen));
|
||||
#endif
|
||||
|
||||
return crc;
|
||||
}
|
46
src/crc32.h
Normal file
46
src/crc32.h
Normal file
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CRC32_H_36A5015B5840C1
|
||||
#define CRC32_H_36A5015B5840C1
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* CRC32_H_36A5015B5840C1 */
|
||||
|
507
src/database.c
Normal file
507
src/database.c
Normal file
@ -0,0 +1,507 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Runtime code for hs_database manipulation.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "allocator.h"
|
||||
#include "hs_common.h"
|
||||
#include "hs_internal.h"
|
||||
#include "hs_version.h"
|
||||
#include "ue2common.h"
|
||||
#include "database.h"
|
||||
#include "crc32.h"
|
||||
#include "rose/rose_internal.h"
|
||||
#include "util/unaligned.h"
|
||||
|
||||
static really_inline
|
||||
int db_correctly_aligned(const void *db) {
|
||||
return ISALIGNED_N(db, alignof(unsigned long long));
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_free_database(hs_database_t *db) {
|
||||
if (db && db->magic != HS_DB_MAGIC) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
hs_database_free(db);
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes,
|
||||
size_t *serialized_length) {
|
||||
if (!db || !bytes || !serialized_length) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
if (!db_correctly_aligned(db)) {
|
||||
return HS_BAD_ALIGN;
|
||||
}
|
||||
|
||||
hs_error_t ret = validDatabase(db);
|
||||
if (ret != HS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t length = sizeof(struct hs_database) + db->length;
|
||||
|
||||
char *out = hs_misc_alloc(length);
|
||||
ret = hs_check_alloc(out);
|
||||
if (ret != HS_SUCCESS) {
|
||||
hs_misc_free(out);
|
||||
return ret;
|
||||
}
|
||||
|
||||
memset(out, 0, length);
|
||||
|
||||
u32 *buf = (u32 *)out;
|
||||
*buf = db->magic;
|
||||
buf++;
|
||||
*buf = db->version;
|
||||
buf++;
|
||||
*buf = db->length;
|
||||
buf++;
|
||||
memcpy(buf, &db->platform, sizeof(u64a));
|
||||
buf += 2;
|
||||
*buf = db->crc32;
|
||||
buf++;
|
||||
*buf = db->reserved0;
|
||||
buf++;
|
||||
*buf = db->reserved1;
|
||||
buf++;
|
||||
|
||||
const char *bytecode = hs_get_bytecode(db);
|
||||
memcpy(buf, bytecode, db->length);
|
||||
|
||||
*bytes = out;
|
||||
*serialized_length = length;
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
// check that the database header's platform is compatible with the current
|
||||
// runtime platform.
|
||||
static
|
||||
hs_error_t db_check_platform(const u64a p) {
|
||||
if (p != hs_current_platform
|
||||
&& p != hs_current_platform_no_avx2) {
|
||||
return HS_DB_PLATFORM_ERROR;
|
||||
}
|
||||
// passed all checks
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
// Decode and check the database header, returning appropriate errors or
|
||||
// HS_SUCCESS if it's OK. The header should be allocated on the stack
|
||||
// and later copied into the deserialized database.
|
||||
static
|
||||
hs_error_t db_decode_header(const char **bytes, const size_t length,
|
||||
struct hs_database *header) {
|
||||
if (!*bytes) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
if (length < sizeof(struct hs_database)) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
// There's no requirement, really, that the serialized stream of bytes
|
||||
// we've been given is 4-byte aligned, so we use unaligned loads here.
|
||||
|
||||
const u32 *buf = (const u32 *)*bytes;
|
||||
|
||||
// Zero header so that none of it (e.g. its padding) is uninitialized.
|
||||
memset(header, 0, sizeof(struct hs_database));
|
||||
|
||||
header->magic = unaligned_load_u32(buf++);
|
||||
if (header->magic != HS_DB_MAGIC) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
header->version = unaligned_load_u32(buf++);
|
||||
if (header->version != HS_DB_VERSION) {
|
||||
return HS_DB_VERSION_ERROR;
|
||||
}
|
||||
|
||||
header->length = unaligned_load_u32(buf++);
|
||||
if (length != sizeof(struct hs_database) + header->length) {
|
||||
DEBUG_PRINTF("bad length %zu, expecting %zu\n", length,
|
||||
sizeof(struct hs_database) + header->length);
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
header->platform = unaligned_load_u64a(buf);
|
||||
buf += 2;
|
||||
header->crc32 = unaligned_load_u32(buf++);
|
||||
header->reserved0 = unaligned_load_u32(buf++);
|
||||
header->reserved1 = unaligned_load_u32(buf++);
|
||||
|
||||
*bytes = (const char *)buf;
|
||||
|
||||
return HS_SUCCESS; // Header checks out
|
||||
}
|
||||
|
||||
// Check the CRC on a database
|
||||
static
|
||||
hs_error_t db_check_crc(const hs_database_t *db) {
|
||||
const char *bytecode = hs_get_bytecode(db);
|
||||
u32 crc = Crc32c_ComputeBuf(0, bytecode, db->length);
|
||||
if (crc != db->crc32) {
|
||||
DEBUG_PRINTF("crc mismatch! 0x%x != 0x%x\n", crc, db->crc32);
|
||||
return HS_INVALID;
|
||||
}
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
static
|
||||
void db_copy_bytecode(const char *serialized, hs_database_t *db) {
|
||||
// we need to align things manually
|
||||
uintptr_t shift = (uintptr_t)db->bytes & 0x3f;
|
||||
db->bytecode = offsetof(struct hs_database, bytes) - shift;
|
||||
char *bytecode = (char *)db + db->bytecode;
|
||||
|
||||
// Copy the bytecode into place
|
||||
memcpy(bytecode, serialized, db->length);
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length,
|
||||
hs_database_t *db) {
|
||||
if (!bytes || !db) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
// We require the user to deserialize into an 8-byte aligned region.
|
||||
if (!ISALIGNED_N(db, 8)) {
|
||||
return HS_BAD_ALIGN;
|
||||
}
|
||||
|
||||
// Decode the header
|
||||
hs_database_t header;
|
||||
hs_error_t ret = db_decode_header(&bytes, length, &header);
|
||||
if (ret != HS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Make sure the serialized database is for our platform
|
||||
ret = db_check_platform(header.platform);
|
||||
if (ret != HS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Zero new space for safety
|
||||
size_t dblength = sizeof(struct hs_database) + header.length;
|
||||
memset(db, 0, dblength);
|
||||
|
||||
// Copy the decoded header into place
|
||||
memcpy(db, &header, sizeof(header));
|
||||
|
||||
// Copy the bytecode into the correctly-aligned location, set offsets
|
||||
db_copy_bytecode(bytes, db);
|
||||
|
||||
if (db_check_crc(db) != HS_SUCCESS) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_deserialize_database(const char *bytes, const size_t length,
|
||||
hs_database_t **db) {
|
||||
if (!bytes || !db) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
*db = NULL;
|
||||
|
||||
// Decode and check the header
|
||||
hs_database_t header;
|
||||
hs_error_t ret = db_decode_header(&bytes, length, &header);
|
||||
if (ret != HS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Make sure the serialized database is for our platform
|
||||
ret = db_check_platform(header.platform);
|
||||
if (ret != HS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Allocate space for new database
|
||||
size_t dblength = sizeof(struct hs_database) + header.length;
|
||||
struct hs_database *tempdb = hs_database_alloc(dblength);
|
||||
ret = hs_check_alloc(tempdb);
|
||||
if (ret != HS_SUCCESS) {
|
||||
hs_database_free(tempdb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Zero new space for safety
|
||||
memset(tempdb, 0, dblength);
|
||||
|
||||
// Copy the decoded header into place
|
||||
memcpy(tempdb, &header, sizeof(header));
|
||||
|
||||
// Copy the bytecode into the correctly-aligned location, set offsets
|
||||
db_copy_bytecode(bytes, tempdb);
|
||||
|
||||
if (db_check_crc(tempdb) != HS_SUCCESS) {
|
||||
hs_database_free(tempdb);
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
*db = tempdb;
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_database_size(const hs_database_t *db, size_t *size) {
|
||||
if (!size) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
hs_error_t ret = validDatabase(db);
|
||||
if (unlikely(ret != HS_SUCCESS)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
*size = sizeof(struct hs_database) + db->length;
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_serialized_database_size(const char *bytes, const size_t length,
|
||||
size_t *size) {
|
||||
// Decode and check the header
|
||||
hs_database_t header;
|
||||
hs_error_t ret = db_decode_header(&bytes, length, &header);
|
||||
if (ret != HS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!size) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
*size = sizeof(struct hs_database) + header.length;
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
hs_error_t dbIsValid(const hs_database_t *db) {
|
||||
if (db->magic != HS_DB_MAGIC) {
|
||||
DEBUG_PRINTF("bad magic\n");
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
if (db->version != HS_DB_VERSION) {
|
||||
DEBUG_PRINTF("bad version\n");
|
||||
return HS_DB_VERSION_ERROR;
|
||||
}
|
||||
|
||||
if (db_check_platform(db->platform) != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("bad platform\n");
|
||||
return HS_DB_PLATFORM_ERROR;
|
||||
}
|
||||
|
||||
if (!ISALIGNED_16(hs_get_bytecode(db))) {
|
||||
DEBUG_PRINTF("bad alignment\n");
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
hs_error_t rv = db_check_crc(db);
|
||||
if (rv != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("bad crc\n");
|
||||
return rv;
|
||||
}
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated
|
||||
* \ref hs_database, ensuring that it is padded correctly to give cacheline
|
||||
* alignment. */
|
||||
hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
|
||||
size_t db_len = sizeof(struct hs_database) + len;
|
||||
DEBUG_PRINTF("db size %zu\n", db_len);
|
||||
DEBUG_PRINTF("db platform %llx\n", platform);
|
||||
|
||||
struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len);
|
||||
if (hs_check_alloc(db) != HS_SUCCESS) {
|
||||
hs_database_free(db);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// So that none of our database is uninitialized
|
||||
memset(db, 0, db_len);
|
||||
|
||||
// we need to align things manually
|
||||
size_t shift = (uintptr_t)db->bytes & 0x3f;
|
||||
DEBUG_PRINTF("shift is %zu\n", shift);
|
||||
|
||||
db->bytecode = offsetof(struct hs_database, bytes) - shift;
|
||||
char *bytecode = (char *)db + db->bytecode;
|
||||
assert(ISALIGNED_CL(bytecode));
|
||||
|
||||
db->magic = HS_DB_MAGIC;
|
||||
db->version = HS_DB_VERSION;
|
||||
db->length = len;
|
||||
db->platform = platform;
|
||||
|
||||
// Copy bytecode
|
||||
memcpy(bytecode, in_bytecode, len);
|
||||
|
||||
db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length);
|
||||
return db;
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define SNPRINTF_COMPAT _snprintf
|
||||
#else
|
||||
#define SNPRINTF_COMPAT snprintf
|
||||
#endif
|
||||
|
||||
/** Allocate a buffer and prints the database info into it. Returns an
|
||||
* appropriate error code on failure, or HS_SUCCESS on success. */
|
||||
static
|
||||
hs_error_t print_database_string(char **s, u32 version, const platform_t plat,
|
||||
u32 raw_mode) {
|
||||
assert(s);
|
||||
*s = NULL;
|
||||
|
||||
u8 release = (version >> 8) & 0xff;
|
||||
u8 minor = (version >> 16) & 0xff;
|
||||
u8 major = (version >> 24) & 0xff;
|
||||
|
||||
const char *avx2 = (plat & HS_PLATFORM_NOAVX2) ? "NOAVX2" : " AVX2";
|
||||
|
||||
const char *mode = NULL;
|
||||
|
||||
if (raw_mode == HS_MODE_STREAM) {
|
||||
mode = "STREAM";
|
||||
} else if (raw_mode == HS_MODE_VECTORED) {
|
||||
mode = "VECTORED";
|
||||
} else {
|
||||
assert(raw_mode == HS_MODE_BLOCK);
|
||||
mode = "BLOCK";
|
||||
}
|
||||
|
||||
// Initial allocation size, which should be large enough to print our info.
|
||||
// If it isn't, snprintf will tell us and we can resize appropriately.
|
||||
size_t len = 256;
|
||||
|
||||
while (1) {
|
||||
char *buf = hs_misc_alloc(len);
|
||||
hs_error_t ret = hs_check_alloc(buf);
|
||||
if (ret != HS_SUCCESS) {
|
||||
hs_misc_free(buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Note: SNPRINTF_COMPAT is a macro defined above, to cope with systems
|
||||
// that don't have snprintf but have a workalike.
|
||||
int p_len = SNPRINTF_COMPAT(
|
||||
buf, len, "Version: %u.%u.%u Features: %s Mode: %s",
|
||||
major, minor, release, avx2, mode);
|
||||
if (p_len < 0) {
|
||||
DEBUG_PRINTF("snprintf output error, returned %d\n", p_len);
|
||||
hs_misc_free(buf);
|
||||
break;
|
||||
} else if ((size_t)p_len < len) { // output fit within buffer.
|
||||
assert(buf[p_len] == '\0');
|
||||
*s = buf;
|
||||
return HS_SUCCESS;
|
||||
} else { // output didn't fit: resize and reallocate.
|
||||
len = (size_t)p_len + 1; // must add one for null terminator.
|
||||
hs_misc_free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
return HS_NOMEM;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
|
||||
char **info) {
|
||||
if (!info) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
*info = NULL;
|
||||
|
||||
if (!bytes || length < sizeof(struct hs_database)) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
const u32 *buf = (const u32 *)bytes;
|
||||
|
||||
u32 magic = unaligned_load_u32(buf++);
|
||||
if (magic != HS_DB_MAGIC) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
u32 version = unaligned_load_u32(buf++);
|
||||
|
||||
buf++; /* length */
|
||||
|
||||
platform_t plat;
|
||||
plat = unaligned_load_u64a(buf);
|
||||
buf += 2;
|
||||
|
||||
buf++; /* crc */
|
||||
buf++; /* reserved 0 */
|
||||
buf++; /* reserved 1 */
|
||||
|
||||
const char *t_raw = (const char *)buf;
|
||||
u32 mode = unaligned_load_u32(t_raw + offsetof(struct RoseEngine, mode));
|
||||
|
||||
return print_database_string(info, version, plat, mode);
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_database_info(const hs_database_t *db, char **info) {
|
||||
if (!info) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
*info = NULL;
|
||||
|
||||
if (!db || !db_correctly_aligned(db) || db->magic != HS_DB_MAGIC) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
platform_t plat;
|
||||
plat = db->platform;
|
||||
|
||||
const struct RoseEngine *rose = hs_get_bytecode(db);
|
||||
|
||||
return print_database_string(info, db->version, plat, rose->mode);
|
||||
}
|
119
src/database.h
Normal file
119
src/database.h
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Runtime code for hs_database manipulation.
|
||||
*/
|
||||
|
||||
#ifndef DATABASE_H_D467FD6F343DDE
|
||||
#define DATABASE_H_D467FD6F343DDE
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#include "hs_compile.h" // for HS_MODE_ flags
|
||||
#include "hs_version.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#define HS_DB_VERSION HS_VERSION_32BIT
|
||||
#define HS_DB_MAGIC (0xdbdbdbdbU)
|
||||
|
||||
// Values in here cannot (easily) change - add new ones!
|
||||
|
||||
// CPU type is the low 6 bits (we can't need more than 64, surely!)
|
||||
|
||||
#define HS_PLATFORM_INTEL 1
|
||||
#define HS_PLATFORM_CPU_MASK 0x3F
|
||||
|
||||
#define HS_PLATFORM_NOAVX2 (4<<13)
|
||||
|
||||
/** \brief Platform features bitmask. */
|
||||
typedef u64a platform_t;
|
||||
|
||||
static UNUSED
|
||||
const platform_t hs_current_platform = {
|
||||
#if !defined(__AVX2__)
|
||||
HS_PLATFORM_NOAVX2 |
|
||||
#endif
|
||||
0,
|
||||
};
|
||||
|
||||
static UNUSED
|
||||
const platform_t hs_current_platform_no_avx2 = {
|
||||
HS_PLATFORM_NOAVX2 |
|
||||
0,
|
||||
};
|
||||
|
||||
/*
|
||||
* a header to enclose the actual bytecode - useful for keeping info about the
|
||||
* compiled data.
|
||||
*/
|
||||
struct hs_database {
|
||||
u32 magic;
|
||||
u32 version;
|
||||
u32 length;
|
||||
u64a platform;
|
||||
u32 crc32;
|
||||
u32 reserved0;
|
||||
u32 reserved1;
|
||||
u32 bytecode; // offset relative to db start
|
||||
u32 padding[16];
|
||||
char bytes[];
|
||||
};
|
||||
|
||||
static really_inline
|
||||
const void *hs_get_bytecode(const struct hs_database *db) {
|
||||
return ((const char *)db + db->bytecode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cheap database sanity checks used in block mode scan calls and streaming
|
||||
* mode open calls.
|
||||
*/
|
||||
static really_inline
|
||||
hs_error_t validDatabase(const hs_database_t *db) {
|
||||
if (!db || db->magic != HS_DB_MAGIC) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
if (db->version != HS_DB_VERSION) {
|
||||
return HS_DB_VERSION_ERROR;
|
||||
}
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
hs_error_t dbIsValid(const struct hs_database *db);
|
||||
struct hs_database *dbCreate(const char *bytecode, size_t len, u64a platform);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* DATABASE_H_D467FD6F343DDE */
|
39
src/fdr/CMakeLists.txt
Normal file
39
src/fdr/CMakeLists.txt
Normal file
@ -0,0 +1,39 @@
|
||||
# The set of rules and other nastiness for generating FDR/Teddy source
|
||||
|
||||
# we need to add these as explicit dependencies
|
||||
set(AUTOGEN_PY_FILES
|
||||
arch.py
|
||||
autogen.py
|
||||
autogen_utils.py
|
||||
base_autogen.py
|
||||
fdr_autogen.py
|
||||
teddy_autogen.py
|
||||
)
|
||||
|
||||
function(fdr_autogen type out)
|
||||
add_custom_command (
|
||||
COMMENT "AUTOGEN ${out}"
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${out}
|
||||
COMMAND ${PYTHON} ${CMAKE_CURRENT_SOURCE_DIR}/autogen.py ${type} > ${CMAKE_CURRENT_BINARY_DIR}/${out}
|
||||
DEPENDS ${AUTOGEN_PY_FILES}
|
||||
)
|
||||
add_custom_target(autogen_${type} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${out})
|
||||
endfunction(fdr_autogen)
|
||||
|
||||
#now build the functions
|
||||
fdr_autogen(runtime fdr_autogen.c)
|
||||
fdr_autogen(compiler fdr_autogen_compiler.cpp)
|
||||
fdr_autogen(teddy_runtime teddy_autogen.c)
|
||||
fdr_autogen(teddy_compiler teddy_autogen_compiler.cpp)
|
||||
|
||||
set(fdr_GENERATED_SRC
|
||||
${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen.c
|
||||
${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp
|
||||
${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen.c
|
||||
${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp
|
||||
PARENT_SCOPE)
|
||||
|
||||
set_source_files_properties(${fdr_GENERATED_SRC} PROPERTIES GENERATED TRUE)
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
|
58
src/fdr/arch.py
Executable file
58
src/fdr/arch.py
Executable file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import autogen_utils
|
||||
|
||||
# wrapper for architectures
|
||||
|
||||
class Arch:
|
||||
def __init__(self, name, extensions = []):
|
||||
self.name = name
|
||||
self.extensions = extensions
|
||||
self.target = None
|
||||
|
||||
def get_guard(self):
|
||||
# these defines definitely fall into the "belt-and-suspenders"
|
||||
# category of paranoia
|
||||
if (self.guard_list == []):
|
||||
return "#if 1"
|
||||
|
||||
return "#if " + " && ".join(self.guard_list)
|
||||
|
||||
class X86Arch(Arch):
|
||||
def __init__(self, name, extensions = []):
|
||||
Arch.__init__(self, name, extensions)
|
||||
self.guard_list = [ ]
|
||||
self.target = "0"
|
||||
|
||||
if "AVX2" in extensions:
|
||||
self.target += " | HS_CPU_FEATURES_AVX2"
|
||||
self.guard_list += [ "defined(__AVX2__)" ]
|
||||
|
||||
|
||||
arch_x86_64 = X86Arch("x86_64", extensions = [ ])
|
||||
arch_x86_64_avx2 = X86Arch("x86_64_avx2", extensions = [ "AVX2" ])
|
159
src/fdr/autogen.py
Executable file
159
src/fdr/autogen.py
Executable file
@ -0,0 +1,159 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from fdr_autogen import *
|
||||
from teddy_autogen import *
|
||||
from arch import *
|
||||
|
||||
# FDR setup
|
||||
|
||||
# these are either produced - if the guard succeeds, or #defined to zeroes.
|
||||
# either the function or the zero is fine in our array of function pointers
|
||||
|
||||
def produce_fdr_runtimes(l):
|
||||
for m in l:
|
||||
m.produce_code()
|
||||
|
||||
def produce_fdr_compiles(l):
|
||||
print "void getFdrDescriptions(vector<FDREngineDescription> *out) {"
|
||||
print " static const FDREngineDef defns[] = {"
|
||||
for m in l:
|
||||
m.produce_compile_call()
|
||||
print " };"
|
||||
print " out->clear();"
|
||||
print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {"
|
||||
print " out->push_back(FDREngineDescription(defns[i]));"
|
||||
print " }"
|
||||
print "}"
|
||||
|
||||
def build_fdr_matchers():
|
||||
all_matchers = [ ]
|
||||
domains = [8, 10, 11, 12, 13]
|
||||
big_domains = [ 14, 15 ]
|
||||
|
||||
common = { "state_width" : 128, "num_buckets" : 8, "extract_frequency" : 8, "arch" : arch_x86_64 }
|
||||
for d in domains:
|
||||
all_matchers += [ M3(stride = 1, domain = d, **common) ]
|
||||
all_matchers += [ M3(stride = 2, domain = d, **common) ]
|
||||
all_matchers += [ M3(stride = 4, domain = d, **common) ]
|
||||
for d in big_domains:
|
||||
all_matchers += [ M3(stride = 1, domain = d, **common) ]
|
||||
|
||||
return all_matchers
|
||||
|
||||
# teddy setup
|
||||
|
||||
def build_teddy_matchers():
|
||||
all_matchers = [ ]
|
||||
|
||||
# AVX2
|
||||
all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = False) ]
|
||||
all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = True) ]
|
||||
for n_msk in range(1, 5):
|
||||
all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = False, num_masks = n_msk, num_buckets = 16) ]
|
||||
all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = True, num_masks = n_msk, num_buckets = 16) ]
|
||||
|
||||
# SSE/SSE2/SSSE3
|
||||
for n_msk in range(1, 5):
|
||||
all_matchers += [ MT(arch = arch_x86_64, packed = False, num_masks = n_msk, num_buckets = 8) ]
|
||||
all_matchers += [ MT(arch = arch_x86_64, packed = True, num_masks = n_msk, num_buckets = 8) ]
|
||||
|
||||
return all_matchers
|
||||
|
||||
def produce_teddy_compiles(l):
|
||||
print "void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {"
|
||||
print " static const TeddyEngineDef defns[] = {"
|
||||
for m in l:
|
||||
m.produce_compile_call()
|
||||
print " };"
|
||||
print " out->clear();"
|
||||
print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {"
|
||||
print " out->push_back(TeddyEngineDescription(defns[i]));"
|
||||
print " }"
|
||||
print "}"
|
||||
|
||||
# see below - we don't produce our 'zeros' at the point of the teddy runtimes as they
|
||||
# are linked. So we either generate the function or we don't - then at the point of the
|
||||
# header in fdr_autogen.c we either generate the header or we #define the zero.
|
||||
|
||||
def produce_teddy_runtimes(l):
|
||||
# Since we're using -Wmissing-prototypes, we need headers first.
|
||||
for m in l:
|
||||
m.produce_guard()
|
||||
print m.produce_header(visible = True, header_only = True)
|
||||
m.close_guard()
|
||||
|
||||
for m in l:
|
||||
m.produce_guard()
|
||||
m.produce_code()
|
||||
m.close_guard()
|
||||
|
||||
# see produce_teddy_runtimes() comment for the rationale
|
||||
|
||||
def produce_teddy_headers(l):
|
||||
for m in l:
|
||||
m.produce_guard()
|
||||
print m.produce_header(visible = True, header_only = True)
|
||||
m.produce_zero_alternative()
|
||||
|
||||
# general utilities
|
||||
|
||||
def make_fdr_function_pointers(matcher_list):
|
||||
print """
|
||||
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a);
|
||||
static FDRFUNCTYPE funcs[] = {
|
||||
"""
|
||||
all_funcs = ",\n".join([ " %s" % m.get_name() for m in matcher_list ])
|
||||
print all_funcs
|
||||
print """
|
||||
};
|
||||
"""
|
||||
|
||||
def assign_ids(matcher_list, next_id):
|
||||
for m in matcher_list:
|
||||
m.id = next_id
|
||||
next_id += 1
|
||||
return next_id
|
||||
|
||||
# Main entry point
|
||||
|
||||
m = build_fdr_matchers()
|
||||
next_id = assign_ids(m, 0)
|
||||
tm = build_teddy_matchers()
|
||||
next_id = assign_ids(tm, next_id)
|
||||
if sys.argv[1] == "compiler":
|
||||
produce_fdr_compiles(m)
|
||||
elif sys.argv[1] == "runtime":
|
||||
produce_fdr_runtimes(m)
|
||||
produce_teddy_headers(tm)
|
||||
make_fdr_function_pointers(m+tm)
|
||||
elif sys.argv[1] == "teddy_runtime":
|
||||
produce_teddy_runtimes(tm)
|
||||
elif sys.argv[1] == "teddy_compiler":
|
||||
produce_teddy_compiles(tm)
|
285
src/fdr/autogen_utils.py
Executable file
285
src/fdr/autogen_utils.py
Executable file
@ -0,0 +1,285 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
|
||||
def fail_out(msg = ""):
|
||||
print >>sys.stderr, "Internal failure in autogen.py: " + msg
|
||||
sys.exit(1)
|
||||
|
||||
class IntegerType:
|
||||
def __init__(self, size):
|
||||
self.size = size
|
||||
|
||||
def get_name(self):
|
||||
return { 256: "m256", 128 : "m128", 64 : "u64a", 32 : "u32" , 16 : "u16", 8 : "u8"}[self.size]
|
||||
|
||||
def size_in_bytes(self):
|
||||
return self.size / 8
|
||||
|
||||
def isSIMDOnIntel(self):
|
||||
return False
|
||||
|
||||
def zero_expression(self):
|
||||
return "0"
|
||||
|
||||
def constant_to_string(self, n):
|
||||
if self.size == 64:
|
||||
suffix = "ULL"
|
||||
else:
|
||||
suffix = ""
|
||||
return "0x%x%s" % (n & ((1 << self.size) - 1), suffix)
|
||||
|
||||
def lowbits(self, n):
|
||||
return (1 << n) - 1
|
||||
|
||||
def highbits(self, n):
|
||||
return ~(self.lowbits(self.size - n))
|
||||
|
||||
def lowbit_mask(self, n):
|
||||
return self.constant_to_string(self.lowbits(n))
|
||||
|
||||
def highbit_mask(self, n):
|
||||
return self.constant_to_string(self.highbits(n))
|
||||
|
||||
def lowbit_extract_expr(self, expr_string, n):
|
||||
return "(%s & %s)" % ( expr_string, self.lowbit_mask(n))
|
||||
|
||||
def highbit_extract_expr(self, expr_string, n):
|
||||
return "(%s >> %d)" % (expr_string, self.size - n)
|
||||
|
||||
def flip_lowbits_expr(self, expr_string, n):
|
||||
return "(%s ^ %s)" % ( expr_string, self.lowbit_mask(n))
|
||||
|
||||
def bit_extract_expr(self, expr_string, low, high):
|
||||
lbm = self.lowbit_mask(high - low)
|
||||
return "((%s >> %d) & %s)" % (expr_string, low, lbm)
|
||||
|
||||
# shifts are +ve if left and -ve if right
|
||||
def shift_expr(self, expr_string, n):
|
||||
if n <= -self.size or n >= self.size:
|
||||
return self.zero_expression()
|
||||
elif (n > 0):
|
||||
return "(%s << %d)" % (expr_string, n)
|
||||
elif (n < 0):
|
||||
return "(%s >> %d)" % (expr_string, -n)
|
||||
else:
|
||||
return "(%s)" % (expr_string)
|
||||
|
||||
# code is:
|
||||
# "normal" (always between buf and len) - the default
|
||||
# "aligned" (means normal + aligned to a natural boundary)
|
||||
# "cautious_forward" (means may go off the end of buf+len)
|
||||
# "cautious_backwards" (means may go off the start of buf)
|
||||
# "cautious_everywhere" (means may go off both)
|
||||
|
||||
def load_expr_data(self, offset = 0, code = "normal",
|
||||
base_string = "ptr", bounds_lo = "buf", bounds_hi = "buf + len"):
|
||||
if code is "normal":
|
||||
return "lv_%s(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "aligned":
|
||||
if self.size is 8:
|
||||
fail_out("no aligned byte loads")
|
||||
return "lv_%s_a(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "cautious_forward":
|
||||
return "lv_%s_cf(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "cautious_backward":
|
||||
return "lv_%s_cb(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "cautious_everywhere":
|
||||
return "lv_%s_ce(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
|
||||
|
||||
class SIMDIntegerType(IntegerType):
|
||||
def __init__(self, size):
|
||||
IntegerType.__init__(self, size)
|
||||
|
||||
def isSIMDOnIntel(self):
|
||||
return True
|
||||
|
||||
def zero_expression(self):
|
||||
return "zeroes128()"
|
||||
|
||||
def lowbit_extract_expr(self, expr_string, n):
|
||||
if (n <= 32):
|
||||
tmpType = IntegerType(32)
|
||||
tmpExpr = "movd(%s)" % expr_string
|
||||
elif (32 < n <= 64):
|
||||
tmpType = IntegerType(64)
|
||||
tmpExpr = "movq(%s)" % expr_string
|
||||
return tmpType.lowbit_extract_expr(tmpExpr, n)
|
||||
|
||||
def highbit_extract_expr(self, expr_string, n):
|
||||
fail_out("Unimplemented high bit extract on m128")
|
||||
|
||||
def bit_extract_expr(self, expr_string, low, high, flip):
|
||||
fail_out("Unimplemented bit extract on m128")
|
||||
|
||||
def shift_expr(self, expr_string, n):
|
||||
if n % 8 != 0:
|
||||
fail_out("Trying to shift a m128 by a bit granular value")
|
||||
|
||||
# should check that n is divisible by 8
|
||||
if n <= -self.size or n >= self.size:
|
||||
return self.zero_expression()
|
||||
elif (n > 0):
|
||||
return "_mm_slli_si128(%s, %s)" % (expr_string, n / 8)
|
||||
elif (n < 0):
|
||||
return "_mm_srli_si128(%s, %s)" % (expr_string, -n / 8)
|
||||
else:
|
||||
return "(%s)" % (expr_string)
|
||||
|
||||
def lowbit_mask(self, n):
|
||||
if n % 8 != 0:
|
||||
fail_out("Trying to make a lowbit mask in a m128 by a bit granular value")
|
||||
return self.shift_expr("ones128()", -(128 - n))
|
||||
|
||||
def getRequiredType(bits):
|
||||
if bits == 128:
|
||||
return SIMDIntegerType(bits)
|
||||
for b in [ 8, 16, 32, 64]:
|
||||
if (bits <= b):
|
||||
return IntegerType(b)
|
||||
return None
|
||||
|
||||
class IntegerVariable:
|
||||
def __init__(self, name, type):
|
||||
self.name = name
|
||||
self.type = type
|
||||
|
||||
def gen_initializer_stmt(self, initialization_string = None):
|
||||
if initialization_string:
|
||||
return "%s %s = %s;" % (self.type.get_name(), self.name, initialization_string)
|
||||
else:
|
||||
return "%s %s;" % (self.type.get_name(), self.name)
|
||||
|
||||
|
||||
class Step:
|
||||
def __init__(self, context, offset = 0):
|
||||
self.context = context
|
||||
self.matcher = context.matcher
|
||||
self.offset = offset
|
||||
self.latency = 1
|
||||
self.dependency_list = []
|
||||
self.latest = None
|
||||
self.context.add_step(self)
|
||||
|
||||
# return a string, complete with indentation
|
||||
def emit(self):
|
||||
indent = " " * (self.offset*2 + self.matcher.default_body_indent)
|
||||
s = "\n".join( [ indent + line for line in self.val.split("\n")] )
|
||||
if self.latest:
|
||||
s += " // " + str(self.debug_step) + " L" + str(self.latency) + " LTST:%d" % self.latest
|
||||
if self.dependency_list:
|
||||
s += " Derps: "
|
||||
for (d,l) in self.dependency_list:
|
||||
s += "%d/%d " % (d.debug_step,l)
|
||||
return s
|
||||
|
||||
def add_dependency(self, step, anti_dependency = False, output_dependency = False):
|
||||
if anti_dependency or output_dependency:
|
||||
self.dependency_list += [ (step, 1) ]
|
||||
else:
|
||||
self.dependency_list += [ (step, step.latency) ]
|
||||
|
||||
def nv(self, type, var_name):
|
||||
return self.context.new_var(self, type, var_name)
|
||||
|
||||
def gv(self, var_name, reader = True, writer = False):
|
||||
return self.context.get_var(self, var_name, reader = reader, writer = writer)
|
||||
|
||||
# utility steps, generic
|
||||
|
||||
class LabelStep(Step):
|
||||
def __init__(self, context, offset = 0, label_prefix = "off"):
|
||||
Step.__init__(self, context, offset)
|
||||
self.val = "%s%d: UNUSED;" % (label_prefix, offset)
|
||||
|
||||
class OpenScopeStep(Step):
|
||||
def __init__(self, context, offset = 0):
|
||||
Step.__init__(self, context, offset)
|
||||
self.val = "{"
|
||||
|
||||
class CloseScopeStep(Step):
|
||||
def __init__(self, context, offset = 0):
|
||||
Step.__init__(self, context, offset)
|
||||
self.val = "}"
|
||||
|
||||
|
||||
class CodeGenContext:
|
||||
def __init__(self, matcher):
|
||||
self.vars = {}
|
||||
self.steps = []
|
||||
self.ctr = 0
|
||||
self.matcher = matcher
|
||||
self.var_writer = {} # var to a single writer
|
||||
self.var_readers = {} # var to a list of all the readers that read the last value
|
||||
|
||||
def new_var(self, step, type, var_name):
|
||||
var = IntegerVariable(var_name, type)
|
||||
self.vars[var_name] = var
|
||||
self.var_writer[var_name] = step
|
||||
return var
|
||||
|
||||
def get_var(self, step, var_name, reader = True, writer = False):
|
||||
if reader:
|
||||
writer_step = self.var_writer[var_name]
|
||||
if writer_step:
|
||||
step.add_dependency(writer_step)
|
||||
self.var_readers.setdefault(var_name, []).append(step)
|
||||
if writer and not reader:
|
||||
if self.var_writer[var_name]:
|
||||
step.add_dependency(self.var_writer[var_name], output_dependency = True)
|
||||
if writer:
|
||||
if self.var_readers.has_key(var_name):
|
||||
for reader in [ r for r in self.var_readers[var_name] if r is not step ]:
|
||||
step.add_dependency(reader, anti_dependency = True)
|
||||
self.var_readers[var_name] = []
|
||||
self.var_writer[var_name] = step
|
||||
return self.vars[var_name]
|
||||
|
||||
def add_step(self, step):
|
||||
self.steps += [ step ]
|
||||
step.debug_step = self.ctr
|
||||
self.ctr += 1
|
||||
|
||||
def dontschedule(self, finals):
|
||||
return "\n".join( [ s.emit() for s in self.steps ] )
|
||||
|
||||
def schedule(self, finals):
|
||||
for f in finals:
|
||||
f.latest = f.latency
|
||||
worklist = finals
|
||||
while worklist:
|
||||
current = worklist[0]
|
||||
worklist = worklist[1:]
|
||||
for (dep, lat) in current.dependency_list:
|
||||
if dep.latest is None or dep.latest < (current.latest + dep.latency):
|
||||
dep.latest = current.latest + lat
|
||||
if dep not in worklist:
|
||||
worklist += [ dep ]
|
||||
self.steps.sort(reverse = True, key = lambda s : s.latest)
|
||||
return "\n".join( [ s.emit() for s in self.steps ] )
|
167
src/fdr/base_autogen.py
Normal file
167
src/fdr/base_autogen.py
Normal file
@ -0,0 +1,167 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from base_autogen import *
|
||||
from string import Template
|
||||
|
||||
class MatcherBase:
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def get_name(self):
|
||||
return "fdr_exec_%03d" % self.id
|
||||
|
||||
def produce_header(self, visible, header_only = False):
|
||||
s = ""
|
||||
if not visible:
|
||||
s += "static never_inline"
|
||||
s += """
|
||||
hwlm_error_t %s(UNUSED const struct FDR *fdr,
|
||||
UNUSED const struct FDR_Runtime_Args * a)""" % self.get_name()
|
||||
if header_only:
|
||||
s += ";"
|
||||
else:
|
||||
s += "{"
|
||||
s += "\n"
|
||||
return s
|
||||
|
||||
def produce_guard(self):
|
||||
print self.arch.get_guard()
|
||||
|
||||
def produce_zero_alternative(self):
|
||||
print """
|
||||
#else
|
||||
#define %s 0
|
||||
#endif
|
||||
""" % self.get_name()
|
||||
|
||||
# trivial function for documentation/modularity
|
||||
def close_guard(self):
|
||||
print "#endif"
|
||||
|
||||
def produce_common_declarations(self):
|
||||
return """
|
||||
const u8 * buf = a->buf;
|
||||
const size_t len = a->len;
|
||||
const u8 * ptr = buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t * control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 * tryFloodDetect = a->firstFloodDetect;
|
||||
UNUSED u32 bit, bitRem, confSplit, idx;
|
||||
u32 byte, cf;
|
||||
const struct FDRConfirm *fdrc;
|
||||
u32 last_match = (u32)-1;
|
||||
"""
|
||||
|
||||
def produce_continue_check(self):
|
||||
return """if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
|
||||
*a->groups = controlVal;
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
"""
|
||||
def produce_flood_check(self):
|
||||
return """
|
||||
if (P0(ptr > tryFloodDetect)) {
|
||||
tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, &floodBackoff, &controlVal, iterBytes);
|
||||
if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
|
||||
*a->groups = controlVal;
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def produce_footer(self):
|
||||
return """
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
"""
|
||||
|
||||
def produce_confirm_base(self, conf_var_name, conf_var_size, offset, cautious, enable_confirmless, do_bailout = False):
|
||||
if cautious:
|
||||
caution_string = "VECTORING"
|
||||
else:
|
||||
caution_string = "NOT_CAUTIOUS"
|
||||
conf_split_mask = IntegerType(32).constant_to_string(
|
||||
self.conf_top_level_split - 1)
|
||||
if enable_confirmless:
|
||||
quick_check_string = """
|
||||
if (!fdrc->mult) {
|
||||
u32 id = fdrc->nBitsOrSoleID;
|
||||
if ((last_match == id) && (fdrc->flags & NoRepeat))
|
||||
continue;
|
||||
last_match = id;
|
||||
controlVal = a->cb(ptr+byte-buf, ptr+byte-buf, id, a->ctxt);
|
||||
continue;
|
||||
} """
|
||||
else:
|
||||
quick_check_string = ""
|
||||
if do_bailout:
|
||||
bailout_string = """
|
||||
if ((ptr + byte < buf + a->start_offset) || (ptr + byte >= buf + len)) continue;"""
|
||||
else:
|
||||
bailout_string = ""
|
||||
|
||||
return Template("""
|
||||
if (P0(!!$CONFVAR)) {
|
||||
do {
|
||||
bit = findAndClearLSB_$CONFVAR_SIZE(&$CONFVAR);
|
||||
byte = bit / $NUM_BUCKETS + $OFFSET;
|
||||
bitRem = bit % $NUM_BUCKETS;
|
||||
$BAILOUT_STRING
|
||||
confSplit = *(ptr+byte) & $SPLIT_MASK;
|
||||
idx = confSplit * $NUM_BUCKETS + bitRem;
|
||||
cf = confBase[idx];
|
||||
if (!cf)
|
||||
continue;
|
||||
fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);
|
||||
if (!(fdrc->groups & *control))
|
||||
continue;
|
||||
$QUICK_CHECK_STRING
|
||||
confWithBit(fdrc, a, ptr - buf + byte, $CAUTION_STRING, $CONF_PULL_BACK, control, &last_match);
|
||||
} while(P0(!!$CONFVAR));
|
||||
if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
|
||||
*a->groups = controlVal;
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
}""").substitute(CONFVAR = conf_var_name,
|
||||
CONFVAR_SIZE = conf_var_size,
|
||||
NUM_BUCKETS = self.num_buckets,
|
||||
OFFSET = offset,
|
||||
SPLIT_MASK = conf_split_mask,
|
||||
QUICK_CHECK_STRING = quick_check_string,
|
||||
BAILOUT_STRING = bailout_string,
|
||||
CAUTION_STRING = caution_string,
|
||||
CONF_PULL_BACK = self.conf_pull_back)
|
||||
|
||||
|
||||
def indent(block, depth):
|
||||
return "\n".join([ (" " * (4*depth)) + line for line in block.splitlines() ] )
|
49
src/fdr/engine_description.cpp
Normal file
49
src/fdr/engine_description.cpp
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "engine_description.h"
|
||||
#include "hs_compile.h" // for hs_platform_info
|
||||
#include "util/target_info.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
EngineDescription::~EngineDescription() {}
|
||||
|
||||
bool EngineDescription::isValidOnTarget(const target_t &target_in) const {
|
||||
return target_in.can_run_on_code_built_for(code_target);
|
||||
}
|
||||
|
||||
target_t targetByArchFeatures(u64a cpu_features) {
|
||||
hs_platform_info p;
|
||||
p.tune = HS_TUNE_FAMILY_GENERIC;
|
||||
p.cpu_features = cpu_features;
|
||||
|
||||
return target_t(p);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
70
src/fdr/engine_description.h
Normal file
70
src/fdr/engine_description.h
Normal file
@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ENGINE_DESCRIPTION_H
|
||||
#define ENGINE_DESCRIPTION_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/target_info.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class EngineDescription {
|
||||
u32 id;
|
||||
target_t code_target; // the target that we built this code for
|
||||
u32 numBuckets;
|
||||
u32 confirmPullBackDistance;
|
||||
u32 confirmTopLevelSplit;
|
||||
|
||||
public:
|
||||
EngineDescription(u32 id_in, const target_t &code_target_in,
|
||||
u32 numBuckets_in, u32 confirmPullBackDistance_in,
|
||||
u32 confirmTopLevelSplit_in)
|
||||
: id(id_in), code_target(code_target_in), numBuckets(numBuckets_in),
|
||||
confirmPullBackDistance(confirmPullBackDistance_in),
|
||||
confirmTopLevelSplit(confirmTopLevelSplit_in) {}
|
||||
|
||||
virtual ~EngineDescription();
|
||||
|
||||
u32 getID() const { return id; }
|
||||
u32 getNumBuckets() const { return numBuckets; }
|
||||
u32 getConfirmPullBackDistance() const { return confirmPullBackDistance; }
|
||||
u32 getConfirmTopLevelSplit() const { return confirmTopLevelSplit; }
|
||||
|
||||
bool isValidOnTarget(const target_t &target_in) const;
|
||||
virtual u32 getDefaultFloodSuffixLength() const = 0;
|
||||
|
||||
virtual bool typicallyHoldsOneCharLits() const { return true; }
|
||||
};
|
||||
|
||||
/** Returns a target given a CPU feature set value. */
|
||||
target_t targetByArchFeatures(u64a cpu_features);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
126
src/fdr/fdr.c
Normal file
126
src/fdr/fdr.c
Normal file
@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#define P0(cnd) unlikely(cnd)
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "teddy_internal.h"
|
||||
|
||||
#include "flood_runtime.h"
|
||||
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_confirm_runtime.h"
|
||||
#include "fdr_streaming_runtime.h"
|
||||
#include "fdr_loadval.h"
|
||||
|
||||
static really_inline UNUSED
|
||||
u32 getPreStartVal(const struct FDR_Runtime_Args *a, u32 numBits) {
|
||||
u32 r = 0;
|
||||
if (a->start_offset == 0) {
|
||||
if (numBits <= 8) {
|
||||
r = a->buf_history[a->len_history - 1];
|
||||
} else {
|
||||
r = a->buf_history[a->len_history - 1];
|
||||
r |= (a->buf[0] << 8);
|
||||
}
|
||||
} else {
|
||||
if (numBits <= 8) {
|
||||
r = a->buf[a->start_offset - 1];
|
||||
} else {
|
||||
r = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len);
|
||||
}
|
||||
}
|
||||
return r & ((1 << numBits) - 1);
|
||||
}
|
||||
|
||||
#include "fdr_autogen.c"
|
||||
|
||||
#define FAKE_HISTORY_SIZE 16
|
||||
static const u8 fake_history[FAKE_HISTORY_SIZE];
|
||||
|
||||
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, size_t start,
|
||||
HWLMCallback cb, void *ctxt, hwlm_group_t groups) {
|
||||
|
||||
const struct FDR_Runtime_Args a = {
|
||||
buf,
|
||||
len,
|
||||
fake_history,
|
||||
0,
|
||||
fake_history, // nocase
|
||||
0,
|
||||
start,
|
||||
cb,
|
||||
ctxt,
|
||||
&groups,
|
||||
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
||||
0
|
||||
};
|
||||
if (unlikely(a.start_offset >= a.len)) {
|
||||
return HWLM_SUCCESS;
|
||||
} else {
|
||||
assert(funcs[fdr->engineID]);
|
||||
return funcs[fdr->engineID](fdr, &a);
|
||||
}
|
||||
}
|
||||
|
||||
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
||||
size_t hlen, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
hwlm_group_t groups, u8 * stream_state) {
|
||||
struct FDR_Runtime_Args a = {
|
||||
buf,
|
||||
len,
|
||||
hbuf,
|
||||
hlen,
|
||||
hbuf, // nocase - start same as caseful, override later if needed
|
||||
hlen, // nocase
|
||||
start,
|
||||
cb,
|
||||
ctxt,
|
||||
&groups,
|
||||
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
||||
hbuf ? CONF_LOADVAL_CALL_CAUTIOUS(hbuf + hlen - 8, hbuf, hbuf + hlen)
|
||||
: (u64a)0
|
||||
|
||||
};
|
||||
fdrUnpackState(fdr, &a, stream_state);
|
||||
|
||||
hwlm_error_t ret;
|
||||
if (unlikely(a.start_offset >= a.len)) {
|
||||
ret = HWLM_SUCCESS;
|
||||
} else {
|
||||
assert(funcs[fdr->engineID]);
|
||||
ret = funcs[fdr->engineID](fdr, &a);
|
||||
}
|
||||
|
||||
fdrPackState(fdr, &a, stream_state);
|
||||
return ret;
|
||||
}
|
91
src/fdr/fdr.h
Normal file
91
src/fdr/fdr.h
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: runtime API.
|
||||
*/
|
||||
|
||||
#ifndef FDR_H
|
||||
#define FDR_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm.h"
|
||||
|
||||
// C linkage in the API
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct FDR;
|
||||
|
||||
/** \brief Returns size in bytes of the given FDR engine. */
|
||||
size_t fdrSize(const struct FDR *fdr);
|
||||
|
||||
/** \brief Returns non-zero if the contents of the stream state indicate that
|
||||
* there is active FDR history beyond the regularly used history. */
|
||||
u32 fdrStreamStateActive(const struct FDR *fdr, const u8 *stream_state);
|
||||
|
||||
/**
|
||||
* \brief Block-mode scan.
|
||||
*
|
||||
* \param fdr FDR matcher engine.
|
||||
* \param buf Buffer to scan.
|
||||
* \param len Length of buffer to scan.
|
||||
* \param start First offset in buf at which a match may end.
|
||||
* \param cb Callback to call when a match is found.
|
||||
* \param ctxt Caller-provided context pointer supplied to callback on match.
|
||||
* \param groups Initial groups mask.
|
||||
*/
|
||||
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
hwlm_group_t groups);
|
||||
|
||||
/**
|
||||
* \brief Streaming-mode scan.
|
||||
*
|
||||
* \param fdr FDR matcher engine.
|
||||
* \param hbuf History buffer.
|
||||
* \param hlen Length of history buffer (hbuf).
|
||||
* \param buf Buffer to scan.
|
||||
* \param len Length of buffer to scan (buf).
|
||||
* \param start First offset in buf at which a match may end.
|
||||
* \param cb Callback to call when a match is found.
|
||||
* \param ctxt Caller-provided context pointer supplied to callback on match.
|
||||
* \param groups Initial groups mask.
|
||||
* \param stream_state Persistent stream state for use by FDR.
|
||||
*/
|
||||
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
||||
size_t hlen, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
hwlm_group_t groups, u8 *stream_state);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // FDR_H
|
574
src/fdr/fdr_autogen.py
Executable file
574
src/fdr/fdr_autogen.py
Executable file
@ -0,0 +1,574 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from base_autogen import *
|
||||
from string import Template
|
||||
|
||||
class OrStep(Step):
|
||||
def __init__(self, context, offset, width):
|
||||
Step.__init__(self, context, offset)
|
||||
s_var = self.gv("st%d" % offset)
|
||||
if width < 128:
|
||||
self.val = "s |= %s;" % s_var.name
|
||||
else:
|
||||
self.val = "s = or%d(s, %s);" % (width, s_var.name)
|
||||
|
||||
class ShiftStateStep(Step):
|
||||
def __init__(self, context, offset = 0, stride_used = 1):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
state = m.state_variable
|
||||
shift_distance = -1 * stride_used * m.num_buckets
|
||||
self.val = "%s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
||||
|
||||
class BulkLoadStep(Step):
|
||||
def __init__(self, context, offset, size, define_var = True, aligned = True):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
self.latency = 4
|
||||
blt = m.bulk_load_type
|
||||
if aligned:
|
||||
init_string = blt.load_expr_data(self.offset, code = "aligned")
|
||||
else:
|
||||
init_string = blt.load_expr_data(self.offset)
|
||||
|
||||
var_name = "current_data_%d" % offset
|
||||
if define_var:
|
||||
lb_var = self.nv(blt, var_name)
|
||||
self.val = lb_var.gen_initializer_stmt(init_string)
|
||||
else:
|
||||
lb_var = self.gv(var_name, reader = False, writer = True)
|
||||
self.val = "%s = %s;" % (var_name, init_string)
|
||||
|
||||
class ValueExtractStep(Step):
|
||||
def __init__(self, context, offset, sub_load_cautious = False):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
self.latency = 2
|
||||
dsb = m.datasize_bytes
|
||||
modval = offset % dsb
|
||||
|
||||
if m.domain > 8 and modval == dsb - 1:
|
||||
# Case 1: reading more than one byte over the end of the bulk load
|
||||
|
||||
self.latency = 4
|
||||
if sub_load_cautious:
|
||||
code_string = "cautious_forward"
|
||||
else:
|
||||
code_string = "normal"
|
||||
load_string = m.single_load_type.load_expr_data(self.offset, code_string)
|
||||
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
|
||||
else:
|
||||
# Case 2: reading a value that can be found entirely in the current register
|
||||
if m.fdr2_force_naive_load:
|
||||
load_string = m.single_load_type.load_expr_data(self.offset, "normal")
|
||||
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
|
||||
else:
|
||||
lb_var = self.gv("current_data_%d" % (offset - modval))
|
||||
if modval == 0:
|
||||
# Case 2a: value is at LSB end of the register and must be left-
|
||||
# shifted into place if there is a "reach_shift_adjust" required
|
||||
temp_string = "(%s << %d)" % (lb_var.name, m.reach_shift_adjust)
|
||||
else:
|
||||
# Case 2b: value is in the middle of the register and will be
|
||||
# right-shifted into place (adjusted by "reach_shift_adjust")
|
||||
temp_string = "(%s >> %d)" % (lb_var.name, modval*8 - m.reach_shift_adjust)
|
||||
|
||||
|
||||
init_string = "(%s) & 0x%x" % (temp_string, m.reach_mask)
|
||||
v_var = self.nv(m.value_extract_type, "v%d" % offset)
|
||||
self.val = v_var.gen_initializer_stmt(init_string)
|
||||
|
||||
class TableLookupStep(Step):
|
||||
def __init__(self, context, reach_multiplier, offset = 0):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
self.latency = 4
|
||||
v_var = self.gv("v%d" % offset)
|
||||
s_var = self.nv(m.state_type, "st%d" % offset)
|
||||
init_string = "*(const %s *)(ft + %s*%dU)" % ( m.state_type.get_name(),
|
||||
v_var.name, reach_multiplier)
|
||||
self.val = s_var.gen_initializer_stmt(init_string)
|
||||
|
||||
class ShiftReachMaskStep(Step):
|
||||
def __init__(self, context, offset):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
extr = m.extract_frequency
|
||||
modval = offset % extr
|
||||
s_var = self.gv("st%d" % offset, writer = True)
|
||||
self.val = "%s = %s;" % (s_var.name, s_var.type.shift_expr(s_var.name, modval * m.num_buckets))
|
||||
|
||||
class ConfExtractStep(Step):
|
||||
def __init__(self, context, offset):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
if m.state_type.isSIMDOnIntel():
|
||||
self.latency = 2
|
||||
init_string = m.state_type.lowbit_extract_expr("s", m.extract_size)
|
||||
extr_var = self.nv(m.extr_type, "extr%d" % offset)
|
||||
self.val = extr_var.gen_initializer_stmt(init_string)
|
||||
|
||||
class ConfAccumulateStep(Step):
|
||||
def __init__(self, context, extract_offset, conf_offset, define_var = True):
|
||||
Step.__init__(self, context, extract_offset)
|
||||
m = self.matcher
|
||||
extr_var = self.gv("extr%d" % extract_offset)
|
||||
extr_var_cast = "((%s)%s)" % (m.conf_type.get_name(), extr_var.name)
|
||||
if extract_offset == conf_offset:
|
||||
# create conf_var as a straight copy of extr
|
||||
if define_var:
|
||||
conf_var = self.nv(m.conf_type, "conf%d" % conf_offset)
|
||||
self.val = conf_var.gen_initializer_stmt(extr_var_cast)
|
||||
else:
|
||||
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
|
||||
self.val = "%s = %s;" % (conf_var.name, extr_var_cast)
|
||||
else:
|
||||
# shift extr_var and insert/OR it in conf_var
|
||||
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
|
||||
shift_dist = (extract_offset - conf_offset) * m.num_buckets
|
||||
self.val = "%s |= %s;" % (conf_var.name, m.conf_type.shift_expr(extr_var_cast, shift_dist))
|
||||
self.latency = 2
|
||||
|
||||
class ConfirmFlipStep(Step):
|
||||
def __init__(self, context, offset):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
conf_var = self.gv("conf%d" % self.offset, writer = True)
|
||||
self.val = "%s = %s;" % (conf_var.name,
|
||||
conf_var.type.flip_lowbits_expr(conf_var.name, self.matcher.confirm_frequency * m.num_buckets))
|
||||
|
||||
class ConfirmStep(Step):
|
||||
def __init__(self, context, offset, cautious = False):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
conf_var = self.gv("conf%d" % offset, writer = True)
|
||||
self.val = m.produce_confirm_base(conf_var.name, conf_var.type.size, offset, cautious,
|
||||
enable_confirmless = m.stride == 1, do_bailout = False)
|
||||
|
||||
class M3(MatcherBase):
|
||||
def get_hash_safety_parameters(self):
|
||||
h_size = self.single_load_type.size_in_bytes()
|
||||
return (0, h_size - 1)
|
||||
|
||||
def produce_compile_call(self):
|
||||
print " { %d, %d, %d, %d, %d, %s, %d, %d }," % (
|
||||
self.id, self.state_width, self.num_buckets,
|
||||
self.stride, self.domain,
|
||||
self.arch.target, self.conf_pull_back, self.conf_top_level_split)
|
||||
|
||||
def produce_main_loop(self, switch_variant = False):
|
||||
stride_offsets = xrange(0, self.loop_bytes, self.stride)
|
||||
stride_offsetSet = set(stride_offsets)
|
||||
so_steps_last_block = []
|
||||
sh = None
|
||||
last_confirm = None
|
||||
ctxt = CodeGenContext(self)
|
||||
|
||||
if switch_variant:
|
||||
print " ptr -= (iterBytes - dist);"
|
||||
print " { " # need an extra scope around switch variant to stop its globals escaping
|
||||
else:
|
||||
print " if (doMainLoop) {"
|
||||
print " for (; ptr + LOOP_READ_AHEAD < buf + len; ptr += iterBytes) {"
|
||||
print self.produce_flood_check()
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print " assert(((size_t)ptr % START_MOD) == 0);"
|
||||
|
||||
|
||||
# just do globally for now
|
||||
if switch_variant:
|
||||
subsidiary_load_cautious = True
|
||||
confirm_cautious = True
|
||||
else:
|
||||
subsidiary_load_cautious = False
|
||||
confirm_cautious = False
|
||||
|
||||
if not self.fdr2_force_naive_load:
|
||||
bulk_load_steps = [ off for off in range(self.loop_bytes)
|
||||
if off % self.datasize_bytes == 0 and
|
||||
(set(range(off, off + self.datasize_bytes - 1)) & stride_offsetSet)]
|
||||
else:
|
||||
bulk_load_steps = []
|
||||
|
||||
confirm_steps = [ off for off in range(self.loop_bytes) if off % self.confirm_frequency == 0 ]
|
||||
|
||||
for off in bulk_load_steps:
|
||||
lb_var = ctxt.new_var(None, self.bulk_load_type, "current_data_%d" % off)
|
||||
print " " + lb_var.gen_initializer_stmt()
|
||||
|
||||
|
||||
for off in confirm_steps:
|
||||
var_name = "conf%d" % off
|
||||
conf_def_var = ctxt.new_var(None, self.conf_type, var_name)
|
||||
if switch_variant:
|
||||
init_string = "(%s)-1" % self.conf_type.get_name()
|
||||
else:
|
||||
init_string = ""
|
||||
print " " + conf_def_var.gen_initializer_stmt(init_string)
|
||||
|
||||
if switch_variant:
|
||||
print " switch(iterBytes - dist) {"
|
||||
for i in range(0, self.loop_bytes):
|
||||
print " case %d:" % i
|
||||
|
||||
# init and poison conf; over-precise but harmless
|
||||
conf_id = (i / self.confirm_frequency) * self.confirm_frequency
|
||||
if i % self.confirm_frequency:
|
||||
conf_fixup_bits = self.conf_type.size - (self.num_buckets * (i % self.confirm_frequency))
|
||||
print " conf%d >>= %d;" % (conf_id, conf_fixup_bits)
|
||||
else:
|
||||
print " conf%d = 0;" % conf_id
|
||||
|
||||
# init state
|
||||
state_fixup = i % self.extract_frequency
|
||||
state = self.state_variable
|
||||
shift_distance = self.num_buckets * state_fixup
|
||||
if state_fixup:
|
||||
print " %s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
||||
if self.state_width < 128:
|
||||
print " %s |= %s;" % (state.name, state.type.lowbit_mask(shift_distance))
|
||||
else:
|
||||
print " %s = or%d(%s, %s);" % (state.name, self.state_width, state.name, state.type.lowbit_mask(shift_distance))
|
||||
|
||||
if not self.fdr2_force_naive_load:
|
||||
# init current_data (could poison it in some cases)
|
||||
load_mod = i % self.datasize_bytes
|
||||
load_offset = i - load_mod
|
||||
if load_mod:
|
||||
# not coming in on an even boundary means having to do a load var
|
||||
# actually, there are a bunch of things we can do on this bulk load
|
||||
# to avoid having to be 'cautious_backwards' but I'm not completely
|
||||
# sure they are good ideas
|
||||
init_string = self.bulk_load_type.load_expr_data(load_offset,
|
||||
code = "cautious_backward")
|
||||
var_name = "current_data_%d" % load_offset
|
||||
lb_var = ctxt.get_var(None, var_name, reader = False, writer = True)
|
||||
print " %s = %s;" % (lb_var.name, init_string)
|
||||
|
||||
print " goto off%d;" % i
|
||||
print " case %d: goto skipSwitch;" % self.loop_bytes
|
||||
print " }"
|
||||
print " {"
|
||||
|
||||
|
||||
for off in range(self.loop_bytes):
|
||||
# X_mod is the offset we're up to relative to the last X operation
|
||||
# X_offset is which of the last X operations matches this iteration
|
||||
|
||||
if (switch_variant):
|
||||
LabelStep(ctxt, off)
|
||||
|
||||
if off in bulk_load_steps:
|
||||
if not self.fdr2_force_naive_load:
|
||||
BulkLoadStep(ctxt, off, self.datasize, define_var = False, aligned = not switch_variant)
|
||||
|
||||
if off in stride_offsets:
|
||||
if switch_variant:
|
||||
OpenScopeStep(ctxt, off)
|
||||
ValueExtractStep(ctxt, off, sub_load_cautious = subsidiary_load_cautious)
|
||||
TableLookupStep(ctxt, self.reach_mult, off)
|
||||
if off % self.extract_frequency:
|
||||
ShiftReachMaskStep(ctxt, off)
|
||||
so = OrStep(ctxt, off, self.state_width)
|
||||
if switch_variant:
|
||||
CloseScopeStep(ctxt, off)
|
||||
if sh != None:
|
||||
so.add_dependency(sh)
|
||||
so_steps_last_block += [ so ]
|
||||
|
||||
extract_mod = off % self.extract_frequency
|
||||
extract_offset = off - extract_mod
|
||||
extract_ready = extract_mod == self.extract_frequency - 1
|
||||
if extract_ready:
|
||||
if switch_variant:
|
||||
OpenScopeStep(ctxt, off)
|
||||
ex = ConfExtractStep(ctxt, extract_offset)
|
||||
ConfAccumulateStep(ctxt, extract_offset, confirm_offset, define_var = False)
|
||||
for so_step in so_steps_last_block:
|
||||
ex.add_dependency(so_step)
|
||||
if switch_variant:
|
||||
CloseScopeStep(ctxt, off)
|
||||
so_steps_last_block = []
|
||||
sh = ShiftStateStep(ctxt, extract_offset, stride_used = self.extract_frequency)
|
||||
sh.add_dependency(ex)
|
||||
|
||||
confirm_mod = off % self.confirm_frequency
|
||||
confirm_offset = off - confirm_mod
|
||||
confirm_ready = confirm_mod == self.confirm_frequency - 1
|
||||
if confirm_ready:
|
||||
cflip = ConfirmFlipStep(ctxt, confirm_offset)
|
||||
cf = ConfirmStep(ctxt, confirm_offset, cautious = confirm_cautious )
|
||||
if last_confirm:
|
||||
cf.add_dependency(last_confirm)
|
||||
last_confirm = cf
|
||||
|
||||
|
||||
if not switch_variant:
|
||||
print ctxt.schedule([ last_confirm, sh ])
|
||||
else:
|
||||
print ctxt.dontschedule([ last_confirm, sh ])
|
||||
|
||||
if switch_variant:
|
||||
print "skipSwitch:;"
|
||||
print " ptr += iterBytes;"
|
||||
print " }" # close extra scope around switch variant
|
||||
print " }"
|
||||
|
||||
|
||||
def produce_init_state(self):
|
||||
state = self.state_variable
|
||||
s_type = self.state_type
|
||||
shift_distance = -1 * self.num_buckets
|
||||
shift_expr = "%s = %s" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
||||
|
||||
s = Template("""
|
||||
$TYPENAME s;
|
||||
if (a->len_history) {
|
||||
u32 tmp = getPreStartVal(a, $DOMAIN);
|
||||
s = *((const $TYPENAME *)ft + tmp);
|
||||
$SHIFT_EXPR;
|
||||
} else {
|
||||
s = *(const $TYPENAME *)&fdr->start;
|
||||
}
|
||||
""").substitute(TYPENAME = s_type.get_name(),
|
||||
ZERO_EXPR = s_type.zero_expression(),
|
||||
DOMAIN = self.domain,
|
||||
SHIFT_EXPR = shift_expr)
|
||||
return s
|
||||
|
||||
def produce_code(self):
|
||||
|
||||
(behind, ahead) = self.get_hash_safety_parameters()
|
||||
loop_read_behind = behind
|
||||
loop_read_ahead = self.loop_bytes + ahead
|
||||
|
||||
# we set up mask and shift stuff for extracting our masks from registers
|
||||
#
|
||||
# we have a choice as to whether to mask out the value early or
|
||||
# extract the value (shift first) then mask it
|
||||
#
|
||||
# Intel has a free scaling factor from 1/2/4/8 so we want to combine
|
||||
# the extra needed shift for SSE registers with the mask operation
|
||||
|
||||
ssb = self.state_type.size / 8 # state size in bytes
|
||||
|
||||
# Intel path
|
||||
if ssb == 16 and self.domain == 16:
|
||||
# obscure corner - we don't have the room in the register to
|
||||
# do this for all values so we don't. domain==16 is pretty
|
||||
# bad anyhow, of course
|
||||
self.reach_mult = 8
|
||||
else:
|
||||
self.reach_mult = ssb
|
||||
|
||||
shift_amts = { 1 : 0, 2 : 1, 4 : 2, 8 : 3, 16: 4 }
|
||||
self.reach_shift_adjust = shift_amts[ ssb/self.reach_mult ]
|
||||
self.reach_mask = ((1 << self.domain) - 1) << self.reach_shift_adjust
|
||||
|
||||
print self.produce_header(visible = False)
|
||||
|
||||
print "// ",
|
||||
print " Arch: " + self.arch.name,
|
||||
print " State type: " + self.state_type.get_name(),
|
||||
print " Num buckets: %d" % self.num_buckets,
|
||||
print " Domain: %d" % self.domain,
|
||||
print " Stride: %d" % self.stride
|
||||
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
print "\tconst size_t tabSize = %d;" % self.table_size
|
||||
print """
|
||||
const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));
|
||||
const u32 * confBase = (const u32 *)(ft + tabSize);
|
||||
"""
|
||||
print self.produce_init_state()
|
||||
print "\tconst size_t iterBytes = %d;" % self.loop_bytes
|
||||
print "\tconst size_t START_MOD = %d;" % self.datasize_bytes
|
||||
print "\tconst size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead
|
||||
|
||||
print """
|
||||
while (ptr < buf + len) {
|
||||
|
||||
u8 doMainLoop = 1;
|
||||
size_t remaining = len - (ptr - buf);
|
||||
size_t dist;
|
||||
if (remaining <= iterBytes) {
|
||||
dist = remaining; // once through the switch and we're done
|
||||
} else if (remaining < 2 * iterBytes) {
|
||||
// nibble some stuff off the front, skip the main loop,
|
||||
// then come back here
|
||||
dist = iterBytes; // maybe could be cleverer
|
||||
} else {
|
||||
// now, we need to see if we can make it to a main loop iteration
|
||||
// if so, we need to ensure that the main loop iteration is aligned
|
||||
// to a START_MOD boundary and i >= 8 so we can read ptr + i - 8
|
||||
|
||||
// see if we can do it - if not, just switch the main loop off,
|
||||
// eat iterBytes in cautious mode, and come back to this loop
|
||||
|
||||
const u8 * target = MAX(buf + 8, ptr);
|
||||
target = ROUNDUP_PTR(target, START_MOD);
|
||||
dist = target - ptr;
|
||||
if (dist > iterBytes) {
|
||||
doMainLoop = 0;
|
||||
dist = iterBytes;
|
||||
}
|
||||
}
|
||||
"""
|
||||
self.produce_main_loop(switch_variant = True)
|
||||
self.produce_main_loop(switch_variant = False)
|
||||
print """
|
||||
}
|
||||
"""
|
||||
print self.produce_footer()
|
||||
|
||||
def get_name(self):
|
||||
return "fdr_exec_%s_d%d_s%d_w%d" % (self.arch.name, self.domain, self.stride, self.state_width)
|
||||
|
||||
def __init__(self, state_width, domain, stride,
|
||||
arch,
|
||||
table_state_width = None,
|
||||
num_buckets = 8,
|
||||
extract_frequency = None,
|
||||
confirm_frequency = None):
|
||||
|
||||
# First - set up the values that are fundamental to how this matcher will operate
|
||||
self.arch = arch
|
||||
|
||||
# get the width of the state width on which we operate internally
|
||||
if state_width not in [ 128 ]:
|
||||
fail_out("Unknown state width: %d" % state_width)
|
||||
self.state_width = state_width
|
||||
self.state_type = getRequiredType(self.state_width)
|
||||
self.state_variable = IntegerVariable("s", self.state_type)
|
||||
|
||||
table_state_width = state_width
|
||||
self.table_state_width = state_width
|
||||
self.table_state_type = getRequiredType(self.table_state_width)
|
||||
|
||||
# domain is the number of bits that we draw from our input to
|
||||
# index our 'reach' table
|
||||
if not 8 <= domain <= 16:
|
||||
fail_out("Unsupported domain: %d" % domain)
|
||||
self.domain = domain
|
||||
# this is the load type required for this domain if we want to
|
||||
# load it one at a time
|
||||
self.single_load_type = getRequiredType(self.domain)
|
||||
|
||||
# table size
|
||||
self.table_size = 2**domain * table_state_width // 8
|
||||
|
||||
# stride is the frequency with which we make data-driven
|
||||
# accesses to our reach table
|
||||
if stride not in [ 1, 2, 4, 8]:
|
||||
fail_out("Unsupported stride: %d" % stride)
|
||||
if stride * num_buckets > state_width:
|
||||
fail_out("Stride %d is too big for the number of buckets %d given state width %d\n" % (stride, num_buckets, state_width))
|
||||
self.stride = stride
|
||||
|
||||
if num_buckets != 8:
|
||||
fail_out("Unsupported number of buckets: %d" % num_buckets)
|
||||
if state_width % num_buckets and state_width == 128:
|
||||
fail_out("Bucket scheme requires bit-shifts on m128 (failing)")
|
||||
self.num_buckets = num_buckets
|
||||
|
||||
# Second - set up derived or optimization values - these can be
|
||||
# overridden by arguments that are passed in
|
||||
|
||||
self.datasize = 64
|
||||
self.bulk_load_type = IntegerType(self.datasize)
|
||||
self.datasize_bytes = self.datasize/8
|
||||
|
||||
self.value_extract_type = IntegerType(self.datasize)
|
||||
|
||||
self.fdr2_force_naive_load = False # disable everywhere for trunk
|
||||
|
||||
# extract frequency is how frequently (in bytes) we destructively shift
|
||||
# our state value after having pulled out that many bytes into a
|
||||
# confirm register (of one sort or another).
|
||||
# none means a default value - datasize, our biggest easily available GPR
|
||||
if extract_frequency is None:
|
||||
extract_frequency = self.datasize_bytes
|
||||
self.extract_frequency = extract_frequency
|
||||
self.extract_size = self.extract_frequency*self.num_buckets
|
||||
if extract_frequency < stride:
|
||||
fail_out("Can't extract at extract frequency %d with stride %d" % (extract_frequency, stride))
|
||||
if extract_frequency not in [ None, 1, 2, 4, 8, 16]:
|
||||
fail_out("Weird extract frequency: %d" % extract_frequency)
|
||||
|
||||
if self.extract_size <= 32:
|
||||
self.extr_type = IntegerType(32)
|
||||
elif self.extract_size <= 64:
|
||||
self.extr_type = IntegerType(64)
|
||||
else:
|
||||
fail_out("Implausible size %d required for confirm extract step" % size)
|
||||
|
||||
# extract_frequency is how often we pull out our state and place
|
||||
# it somewhere in a lossless fashion
|
||||
# confirm_frequency, on the other hand, is how frequently we
|
||||
# take the state extracted by extract_frequency and cobble it
|
||||
# together into a matching loop
|
||||
# confirm_frequency must be a multiple of extract_frequency
|
||||
# and must fit into a fast register; for now; we're going to
|
||||
# stay in the GPR domain
|
||||
if confirm_frequency is None:
|
||||
confirm_frequency = self.extract_frequency
|
||||
self.confirm_frequency = confirm_frequency
|
||||
if confirm_frequency % self.extract_frequency:
|
||||
fail_out("Confirm frequency %d must be evenly divisible by extract_frequency %d" % (confirm_frequency, self.extract_frequency))
|
||||
|
||||
self.conf_size = self.confirm_frequency * self.num_buckets
|
||||
if self.conf_size <= 32:
|
||||
self.conf_type = IntegerType(32)
|
||||
elif self.conf_size <= 64:
|
||||
self.conf_type = IntegerType(64)
|
||||
else:
|
||||
fail_out("Implausible size %d required for confirm accumulate step" % self.conf_size)
|
||||
|
||||
# how many bytes in flight at once
|
||||
self.loop_bytes = 16
|
||||
|
||||
# confirm configuration
|
||||
|
||||
# how many entries in the top-level confirm table - 256 means
|
||||
# complete split on the last character
|
||||
self.conf_top_level_split = 256
|
||||
|
||||
# how much we 'pull back' in confirm - this is obviously related
|
||||
# to the first level conf but we will keep two separate paramters
|
||||
# for this to avoid the risk of conflating these
|
||||
self.conf_pull_back = 1
|
||||
|
||||
if self.conf_pull_back > 0 and self.conf_top_level_split < 256:
|
||||
fail_out("Pull back distance %d not supported by top level split %d" % (self.conf_pull_back, self.conf_top_level_split))
|
||||
|
||||
# minor stuff
|
||||
self.default_body_indent = 8
|
562
src/fdr/fdr_compile.cpp
Normal file
562
src/fdr/fdr_compile.cpp
Normal file
@ -0,0 +1,562 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: build API.
|
||||
*/
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "teddy_compile.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "grey.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/dump_mask.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
class FDRCompiler : boost::noncopyable {
|
||||
private:
|
||||
const FDREngineDescription ŋ
|
||||
vector<u8> tab;
|
||||
const vector<hwlmLiteral> &lits;
|
||||
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
|
||||
bool make_small;
|
||||
|
||||
u8 *tabIndexToMask(u32 indexInTable);
|
||||
void assignStringToBucket(LiteralIndex l, BucketIndex b);
|
||||
void assignStringsToBuckets();
|
||||
#ifdef DEBUG
|
||||
void dumpMasks(const u8 *defaultMask);
|
||||
#endif
|
||||
void setupTab();
|
||||
aligned_unique_ptr<FDR> setupFDR(pair<u8 *, size_t> link);
|
||||
void createInitialState(FDR *fdr);
|
||||
|
||||
public:
|
||||
FDRCompiler(const vector<hwlmLiteral> &lits_in,
|
||||
const FDREngineDescription &eng_in, bool make_small_in)
|
||||
: eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in),
|
||||
make_small(make_small_in) {}
|
||||
|
||||
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
|
||||
};
|
||||
|
||||
u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
|
||||
assert(indexInTable < tab.size());
|
||||
return &tab[0] + (indexInTable * (eng.getSchemeWidth() / 8));
|
||||
}
|
||||
|
||||
static
|
||||
void setbit(u8 *msk, u32 bit) {
|
||||
msk[bit / 8] |= 1U << (bit % 8);
|
||||
}
|
||||
|
||||
static
|
||||
void clearbit(u8 *msk, u32 bit) {
|
||||
msk[bit / 8] &= ~(1U << (bit % 8));
|
||||
}
|
||||
|
||||
static
|
||||
void andMask(u8 *dest, const u8 *a, const u8 *b, u32 num_bytes) {
|
||||
for (u32 i = 0; i < num_bytes; i++) {
|
||||
dest[i] = a[i] & b[i];
|
||||
}
|
||||
}
|
||||
|
||||
void FDRCompiler::createInitialState(FDR *fdr) {
|
||||
u8 *start = (u8 *)&fdr->start;
|
||||
|
||||
/* initial state should to be 1 in each slot in the bucket up to bucket
|
||||
* minlen - 1, and 0 thereafter */
|
||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||
// Find the minimum length for the literals in this bucket.
|
||||
const vector<LiteralIndex> &bucket_lits = bucketToLits[b];
|
||||
u32 min_len = ~0U;
|
||||
for (vector<LiteralIndex>::const_iterator it = bucket_lits.begin(),
|
||||
ite = bucket_lits.end();
|
||||
it != ite; ++it) {
|
||||
min_len = min(min_len, verify_u32(lits[*it].s.length()));
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
|
||||
assert(min_len);
|
||||
|
||||
for (PositionInBucket i = 0; i < eng.getBucketWidth(b); i++) {
|
||||
if (i < min_len - 1) {
|
||||
setbit(start, eng.getSchemeBit(b, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
|
||||
size_t tabSize = eng.getTabSizeBytes();
|
||||
|
||||
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
|
||||
|
||||
pair<u8 *, size_t> confirmTmp =
|
||||
setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
||||
|
||||
assert(ISALIGNED_16(tabSize));
|
||||
assert(ISALIGNED_16(confirmTmp.second));
|
||||
assert(ISALIGNED_16(floodControlTmp.second));
|
||||
assert(ISALIGNED_16(link.second));
|
||||
size_t headerSize = ROUNDUP_16(sizeof(FDR));
|
||||
size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.second +
|
||||
floodControlTmp.second + link.second);
|
||||
|
||||
DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
|
||||
"total=%zu\n",
|
||||
headerSize, tabSize, confirmTmp.second, floodControlTmp.second,
|
||||
size);
|
||||
|
||||
aligned_unique_ptr<FDR> fdr = aligned_zmalloc_unique<FDR>(size);
|
||||
assert(fdr); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
fdr->size = size;
|
||||
fdr->engineID = eng.getID();
|
||||
fdr->maxStringLen = verify_u32(maxLen(lits));
|
||||
createInitialState(fdr.get());
|
||||
|
||||
u8 *fdr_base = (u8 *)fdr.get();
|
||||
u8 * ptr = fdr_base + ROUNDUP_16(sizeof(FDR));
|
||||
copy(tab.begin(), tab.end(), ptr);
|
||||
ptr += tabSize;
|
||||
|
||||
memcpy(ptr, confirmTmp.first, confirmTmp.second);
|
||||
ptr += confirmTmp.second;
|
||||
aligned_free(confirmTmp.first);
|
||||
|
||||
fdr->floodOffset = verify_u32(ptr - fdr_base);
|
||||
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
|
||||
ptr += floodControlTmp.second;
|
||||
aligned_free(floodControlTmp.first);
|
||||
|
||||
if (link.first) {
|
||||
fdr->link = verify_u32(ptr - fdr_base);
|
||||
memcpy(ptr, link.first, link.second);
|
||||
aligned_free(link.first);
|
||||
} else {
|
||||
fdr->link = 0;
|
||||
}
|
||||
|
||||
return fdr;
|
||||
}
|
||||
|
||||
void FDRCompiler::assignStringToBucket(LiteralIndex l, BucketIndex b) {
|
||||
bucketToLits[b].push_back(l);
|
||||
}
|
||||
|
||||
struct LitOrder {
|
||||
explicit LitOrder(const vector<hwlmLiteral> &vl_) : vl(vl_) {}
|
||||
bool operator()(const u32 &i1, const u32 &i2) const {
|
||||
const string &i1s = vl[i1].s;
|
||||
const string &i2s = vl[i2].s;
|
||||
|
||||
size_t len1 = i1s.size(), len2 = i2s.size();
|
||||
|
||||
if (len1 != len2) {
|
||||
return len1 < len2;
|
||||
} else {
|
||||
string::const_reverse_iterator it1, it2;
|
||||
tie(it1, it2) =
|
||||
std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
|
||||
if (it1 == i1s.rend()) {
|
||||
return false;
|
||||
}
|
||||
return *it1 < *it2;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const vector<hwlmLiteral> &vl;
|
||||
};
|
||||
|
||||
static u64a getScoreUtil(u32 len, u32 count) {
|
||||
if (len == 0) {
|
||||
return (u64a)-1;
|
||||
}
|
||||
const u32 LEN_THRESH = 128;
|
||||
const u32 elen = (len > LEN_THRESH) ? LEN_THRESH : len;
|
||||
const u64a lenScore =
|
||||
(LEN_THRESH * LEN_THRESH * LEN_THRESH) / (elen * elen * elen);
|
||||
return count * lenScore; // deemphasize count - possibly more than needed
|
||||
// this might be overkill in the other direction
|
||||
}
|
||||
|
||||
//#define DEBUG_ASSIGNMENT
|
||||
void FDRCompiler::assignStringsToBuckets() {
|
||||
typedef u64a SCORE; // 'Score' type
|
||||
const SCORE MAX_SCORE = (SCORE)-1;
|
||||
const u32 CHUNK_MAX = 512;
|
||||
const u32 BUCKET_MAX = 16;
|
||||
typedef pair<SCORE, u32> SCORE_INDEX_PAIR;
|
||||
|
||||
u32 ls = verify_u32(lits.size());
|
||||
// make a vector that contains our literals as pointers or u32 LiteralIndex values
|
||||
vector<LiteralIndex> vli;
|
||||
vli.resize(ls);
|
||||
map<u32, u32> lenCounts;
|
||||
for (LiteralIndex l = 0; l < ls; l++) {
|
||||
vli[l] = l;
|
||||
lenCounts[lits[l].s.size()]++;
|
||||
}
|
||||
// sort vector by literal length + if tied on length, 'magic' criteria of some kind (tbd)
|
||||
stable_sort(vli.begin(), vli.end(), LitOrder(lits));
|
||||
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
for (map<u32, u32>::iterator i = lenCounts.begin(), e = lenCounts.end();
|
||||
i != e; ++i) {
|
||||
printf("l<%d>:%d ", i->first, i->second);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
// TODO: detailed early stage literal analysis for v. small cases (actually look at lits)
|
||||
// yes - after we factor this out and merge in the Teddy style of building we can look
|
||||
// at this, although the teddy merge modelling is quite different. It's still probably
|
||||
// adaptable to some extent for this class of problem
|
||||
|
||||
u32 firstIds[CHUNK_MAX]; // how many are in this chunk (CHUNK_MAX - 1 contains 'last' bound)
|
||||
u32 count[CHUNK_MAX]; // how many are in this chunk
|
||||
u32 length[CHUNK_MAX]; // how long things in the chunk are
|
||||
|
||||
const u32 MAX_CONSIDERED_LENGTH = 16;
|
||||
u32 currentChunk = 0;
|
||||
u32 currentSize = 0;
|
||||
u32 chunkStartID = 0;
|
||||
u32 maxPerChunk = ls/(CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + 1;
|
||||
|
||||
for (u32 i = 0; i < ls && currentChunk < CHUNK_MAX - 1; i++) {
|
||||
LiteralIndex l = vli[i];
|
||||
if ((currentSize < MAX_CONSIDERED_LENGTH && (lits[l].s.size() != currentSize)) ||
|
||||
(currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) {
|
||||
currentSize = lits[l].s.size();
|
||||
if (currentChunk) {
|
||||
count[currentChunk - 1 ] = i - chunkStartID;
|
||||
}
|
||||
chunkStartID = firstIds[currentChunk] = i;
|
||||
length[currentChunk] = currentSize;
|
||||
currentChunk++;
|
||||
}
|
||||
}
|
||||
count[currentChunk - 1] = ls - chunkStartID;
|
||||
// close off chunks with an empty row
|
||||
firstIds[currentChunk] = ls;
|
||||
length[currentChunk] = 0;
|
||||
count[currentChunk] = 0;
|
||||
u32 nChunks = currentChunk + 1;
|
||||
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
for (u32 j = 0; j < nChunks; j++) {
|
||||
printf("%d %d %d %d\n", j, firstIds[j], count[j], length[j]);
|
||||
}
|
||||
#endif
|
||||
|
||||
SCORE_INDEX_PAIR t[CHUNK_MAX][BUCKET_MAX]; // pair of score, index
|
||||
u32 nb = eng.getNumBuckets();
|
||||
|
||||
for (u32 j = 0; j < nChunks; j++) {
|
||||
u32 cnt = 0;
|
||||
for (u32 k = j; k < nChunks; ++k) {
|
||||
cnt += count[k];
|
||||
}
|
||||
t[j][0] = make_pair(getScoreUtil(length[j], cnt), 0);
|
||||
}
|
||||
|
||||
for (u32 i = 1; i < nb; i++) {
|
||||
for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row
|
||||
SCORE_INDEX_PAIR best = make_pair(MAX_SCORE, 0);
|
||||
u32 cnt = count[j];
|
||||
for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) {
|
||||
SCORE score = getScoreUtil(length[j], cnt);
|
||||
if (score > best.first) {
|
||||
break; // if we're now worse locally than our best score, give up
|
||||
}
|
||||
score += t[k][i-1].first;
|
||||
if (score < best.first) {
|
||||
best = make_pair(score, k);
|
||||
}
|
||||
}
|
||||
t[j][i] = best;
|
||||
}
|
||||
t[nChunks - 1][i] = make_pair(0,0); // fill in empty final row for next iteration
|
||||
}
|
||||
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
for (u32 j = 0; j < nChunks; j++) {
|
||||
for (u32 i = 0; i < nb; i++) {
|
||||
SCORE_INDEX_PAIR v = t[j][i];
|
||||
printf("<%7lld,%3d>", v.first, v.second);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
// our best score is in best[0][N_BUCKETS-1] and we can follow the links
|
||||
// to find where our buckets should start and what goes into them
|
||||
for (u32 i = 0, n = nb; n && (i != nChunks - 1); n--) {
|
||||
u32 j = t[i][n - 1].second;
|
||||
if (j == 0) {
|
||||
j = nChunks - 1;
|
||||
}
|
||||
// put chunks between i - j into bucket (NBUCKETS-1) - n
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
printf("placing from %d to %d in bucket %d\n", firstIds[i], firstIds[j],
|
||||
nb - n);
|
||||
#endif
|
||||
for (u32 k = firstIds[i]; k < firstIds[j]; k++) {
|
||||
assignStringToBucket((LiteralIndex)vli[k], nb - n);
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
void FDRCompiler::dumpMasks(const u8 *defaultMask) {
|
||||
const size_t width = eng.getSchemeWidth();
|
||||
printf("default mask: %s\n", dumpMask(defaultMask, width).c_str());
|
||||
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
|
||||
u8 *m = tabIndexToMask(i);
|
||||
if (memcmp(m, defaultMask, width / 8)) {
|
||||
printf("tab %04x: %s\n", i, dumpMask(m, width).c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
|
||||
const vector<LiteralIndex> &vl,
|
||||
const vector<hwlmLiteral> &lits,
|
||||
SuffixPositionInString pos,
|
||||
std::map<u32, ue2::unordered_set<u32> > &m2) {
|
||||
u32 distance = 0;
|
||||
if (eng.bits <= 8) {
|
||||
distance = 1;
|
||||
} else if (eng.bits <= 16) {
|
||||
distance = 2;
|
||||
} else if (eng.bits <= 32) {
|
||||
distance = 4;
|
||||
}
|
||||
|
||||
for (vector<LiteralIndex>::const_iterator i = vl.begin(), e = vl.end();
|
||||
i != e; ++i) {
|
||||
if (e - i > 5) {
|
||||
__builtin_prefetch(&lits[*(i + 5)]);
|
||||
}
|
||||
const hwlmLiteral &lit = lits[*i];
|
||||
const size_t sz = lit.s.size();
|
||||
u32 mask = 0;
|
||||
u32 dontCares = 0;
|
||||
for (u32 cnt = 0; cnt < distance; cnt++) {
|
||||
int newPos = pos - cnt;
|
||||
u8 dontCareByte = 0x0;
|
||||
u8 maskByte = 0x0;
|
||||
if (newPos < 0 || ((u32)newPos >= sz)) {
|
||||
dontCareByte = 0xff;
|
||||
} else {
|
||||
u8 c = lit.s[sz - newPos - 1];
|
||||
maskByte = c;
|
||||
u32 remainder = eng.bits - cnt * 8;
|
||||
assert(remainder != 0);
|
||||
if (remainder < 8) {
|
||||
u8 cmask = (1U << remainder) - 1;
|
||||
maskByte &= cmask;
|
||||
dontCareByte |= ~cmask;
|
||||
}
|
||||
if (lit.nocase && ourisalpha(c)) {
|
||||
maskByte &= 0xdf;
|
||||
dontCareByte |= 0x20;
|
||||
}
|
||||
}
|
||||
u32 loc = cnt * 8;
|
||||
mask |= maskByte << loc;
|
||||
dontCares |= dontCareByte << loc;
|
||||
}
|
||||
|
||||
// truncate m and dc down to nBits
|
||||
mask &= (1U << eng.bits) - 1;
|
||||
dontCares &= (1U << eng.bits) - 1;
|
||||
if (dontCares == ((1U << eng.bits) - 1)) {
|
||||
return true;
|
||||
}
|
||||
m2[dontCares].insert(mask);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void FDRCompiler::setupTab() {
|
||||
const size_t mask_size = eng.getSchemeWidth() / 8;
|
||||
assert(mask_size);
|
||||
|
||||
vector<u8> defaultMask(mask_size, 0xff);
|
||||
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
|
||||
memcpy(tabIndexToMask(i), &defaultMask[0], mask_size);
|
||||
}
|
||||
|
||||
typedef std::map<u32, ue2::unordered_set<u32> > M2SET;
|
||||
|
||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||
const vector<LiteralIndex> &vl = bucketToLits[b];
|
||||
SuffixPositionInString pLimit = eng.getBucketWidth(b);
|
||||
for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
|
||||
u32 bit = eng.getSchemeBit(b, pos);
|
||||
M2SET m2;
|
||||
bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
|
||||
if (done) {
|
||||
clearbit(&defaultMask[0], bit);
|
||||
continue;
|
||||
}
|
||||
for (M2SET::const_iterator i = m2.begin(), e = m2.end(); i != e;
|
||||
++i) {
|
||||
u32 dc = i->first;
|
||||
const ue2::unordered_set<u32> &mskSet = i->second;
|
||||
u32 v = ~dc;
|
||||
do {
|
||||
u32 b2 = v & dc;
|
||||
for (ue2::unordered_set<u32>::const_iterator
|
||||
i2 = mskSet.begin(),
|
||||
e2 = mskSet.end();
|
||||
i2 != e2; ++i2) {
|
||||
u32 val = (*i2 & ~dc) | b2;
|
||||
clearbit(tabIndexToMask(val), bit);
|
||||
}
|
||||
v = (v + (dc & -dc)) | ~dc;
|
||||
} while (v != ~dc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
|
||||
u8 *m = tabIndexToMask(i);
|
||||
andMask(m, m, &defaultMask[0], mask_size);
|
||||
}
|
||||
#ifdef DEBUG
|
||||
dumpMasks(&defaultMask[0]);
|
||||
#endif
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> FDRCompiler::build(pair<u8 *, size_t> link) {
|
||||
assignStringsToBuckets();
|
||||
setupTab();
|
||||
return setupFDR(link);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
static
|
||||
aligned_unique_ptr<FDR>
|
||||
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
|
||||
const target_t &target, const Grey &grey, u32 hint,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
pair<u8 *, size_t> link(nullptr, 0);
|
||||
if (stream_control) {
|
||||
link = fdrBuildTableStreaming(lits, stream_control);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
|
||||
|
||||
if (grey.fdrAllowTeddy) {
|
||||
aligned_unique_ptr<FDR> fdr
|
||||
= teddyBuildTableHinted(lits, make_small, hint, target, link);
|
||||
if (fdr) {
|
||||
DEBUG_PRINTF("build with teddy succeeded\n");
|
||||
return fdr;
|
||||
} else {
|
||||
DEBUG_PRINTF("build with teddy failed, will try with FDR\n");
|
||||
}
|
||||
}
|
||||
|
||||
const unique_ptr<FDREngineDescription> des =
|
||||
(hint == HINT_INVALID) ? chooseEngine(target, lits, make_small)
|
||||
: getFdrDescription(hint);
|
||||
|
||||
if (!des) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
FDRCompiler fc(lits, *des, make_small);
|
||||
return fc.build(link);
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> fdrBuildTable(const vector<hwlmLiteral> &lits,
|
||||
bool make_small, const target_t &target,
|
||||
const Grey &grey,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID,
|
||||
stream_control);
|
||||
}
|
||||
|
||||
#if !defined(RELEASE_BUILD)
|
||||
|
||||
aligned_unique_ptr<FDR>
|
||||
fdrBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small, u32 hint,
|
||||
const target_t &target, const Grey &grey,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
pair<u8 *, size_t> link(nullptr, 0);
|
||||
return fdrBuildTableInternal(lits, make_small, target, grey, hint,
|
||||
stream_control);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
// FIXME: should be compile-time only
|
||||
size_t fdrSize(const FDR *fdr) {
|
||||
assert(fdr);
|
||||
return fdr->size;
|
||||
}
|
66
src/fdr/fdr_compile.h
Normal file
66
src/fdr/fdr_compile.h
Normal file
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: build API.
|
||||
*/
|
||||
|
||||
#ifndef FDR_COMPILE_H
|
||||
#define FDR_COMPILE_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
struct FDR;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct hwlmLiteral;
|
||||
struct hwlmStreamingControl;
|
||||
struct Grey;
|
||||
struct target_t;
|
||||
|
||||
ue2::aligned_unique_ptr<FDR>
|
||||
fdrBuildTable(const std::vector<hwlmLiteral> &lits, bool make_small,
|
||||
const target_t &target, const Grey &grey,
|
||||
hwlmStreamingControl *stream_control = nullptr);
|
||||
|
||||
#if !defined(RELEASE_BUILD)
|
||||
|
||||
ue2::aligned_unique_ptr<FDR>
|
||||
fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
|
||||
u32 hint, const target_t &target, const Grey &grey,
|
||||
hwlmStreamingControl *stream_control = nullptr);
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
88
src/fdr/fdr_compile_internal.h
Normal file
88
src/fdr/fdr_compile_internal.h
Normal file
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_COMPILE_INTERNAL_H
|
||||
#define FDR_COMPILE_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm_literal.h"
|
||||
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
struct FDRConfirm;
|
||||
struct LitInfo;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
// a pile of decorative typedefs
|
||||
// good for documentation purposes more than anything else
|
||||
typedef u32 LiteralIndex;
|
||||
typedef u32 ConfirmIndex;
|
||||
typedef u32 SuffixPositionInString; // zero is last byte, counting back
|
||||
// into the string
|
||||
typedef u32 BucketIndex;
|
||||
typedef u32 SchemeBitIndex;
|
||||
typedef u32 PositionInBucket; // zero is 'we are matching right now!",
|
||||
// counting towards future matches
|
||||
|
||||
class EngineDescription;
|
||||
class FDREngineDescription;
|
||||
struct hwlmStreamingControl;
|
||||
|
||||
size_t getFDRConfirm(const std::vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
bool make_small);
|
||||
|
||||
std::pair<u8 *, size_t> setupFullMultiConfs(
|
||||
const std::vector<hwlmLiteral> &lits, const EngineDescription &eng,
|
||||
std::map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits,
|
||||
bool make_small);
|
||||
|
||||
// all suffixes include an implicit max_bucket_width suffix to ensure that
|
||||
// we always read a full-scale flood "behind" us in terms of what's in our
|
||||
// state; if we don't have a flood that's long enough we won't be in the
|
||||
// right state yet to allow blindly advancing
|
||||
std::pair<u8 *, size_t>
|
||||
setupFDRFloodControl(const std::vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng);
|
||||
|
||||
std::pair<u8 *, size_t>
|
||||
fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
|
||||
hwlmStreamingControl *stream_control);
|
||||
|
||||
static constexpr u32 HINT_INVALID = 0xffffffff;
|
||||
|
||||
// fdr_compile_util.cpp utilities
|
||||
size_t maxLen(const std::vector<hwlmLiteral> &lits);
|
||||
size_t minLenCount(const std::vector<hwlmLiteral> &lits, size_t *count);
|
||||
u32 absdiff(u32 i, u32 j);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
65
src/fdr/fdr_compile_util.cpp
Normal file
65
src/fdr/fdr_compile_util.cpp
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "hwlm/hwlm_literal.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
size_t maxLen(const vector<hwlmLiteral> &lits) {
|
||||
size_t rv = 0;
|
||||
for (const auto &lit : lits) {
|
||||
rv = max(rv, lit.s.size());
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
size_t minLenCount(const vector<hwlmLiteral> &lits, size_t *count) {
|
||||
size_t rv = (size_t)-1;
|
||||
*count = 0;
|
||||
for (const auto &lit : lits) {
|
||||
if (lit.s.size() < rv) {
|
||||
rv = lit.s.size();
|
||||
*count = 1;
|
||||
} else if (lit.s.size() == rv) {
|
||||
(*count)++;
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
u32 absdiff(u32 i, u32 j) {
|
||||
return (i > j) ? (i - j) : (j - i);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
100
src/fdr/fdr_confirm.h
Normal file
100
src/fdr/fdr_confirm.h
Normal file
@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_CONFIRM_H
|
||||
#define FDR_CONFIRM_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm.h"
|
||||
|
||||
static really_inline
|
||||
u32 mul_hash_64(u64a lv, u64a andmsk, u64a mult, u32 nBits) {
|
||||
return ((lv & andmsk) * mult) >> (sizeof(u64a)*8 - nBits);
|
||||
}
|
||||
|
||||
// data structures
|
||||
// TODO: fix this hard-coding
|
||||
#define CONF_TYPE u64a
|
||||
#define CONF_HASH_CALL mul_hash_64
|
||||
|
||||
typedef enum LitInfoFlags {
|
||||
NoFlags = 0,
|
||||
Caseless = 1,
|
||||
NoRepeat = 2,
|
||||
ComplexConfirm = 4
|
||||
} LitInfoFlags;
|
||||
|
||||
/**
|
||||
* \brief Structure describing a literal, linked to by FDRConfirm.
|
||||
*
|
||||
* This structure is followed in memory by a variable-sized string prefix at
|
||||
* LitInfo::s, for strings that are longer than CONF_TYPE.
|
||||
*/
|
||||
struct LitInfo {
|
||||
CONF_TYPE v;
|
||||
CONF_TYPE msk;
|
||||
hwlm_group_t groups;
|
||||
u32 size;
|
||||
u32 id; // literal ID as passed in
|
||||
u8 flags; /* LitInfoFlags */
|
||||
u8 next;
|
||||
u8 extended_size;
|
||||
u8 s[1]; // literal prefix, which continues "beyond" this struct.
|
||||
};
|
||||
|
||||
#define FDRC_FLAG_NO_CONFIRM 1
|
||||
|
||||
/**
|
||||
* \brief FDR confirm header.
|
||||
*
|
||||
* This structure is followed in memory by:
|
||||
*
|
||||
* -# lit index mapping (array of u32)
|
||||
* -# list of LitInfo structures
|
||||
*/
|
||||
struct FDRConfirm {
|
||||
CONF_TYPE andmsk;
|
||||
CONF_TYPE mult;
|
||||
u32 nBitsOrSoleID; // if flags is NO_CONFIRM then this is soleID
|
||||
u32 flags; // sole meaning is 'non-zero means no-confirm' (that is all)
|
||||
hwlm_group_t groups;
|
||||
u32 soleLitSize;
|
||||
u32 soleLitCmp;
|
||||
u32 soleLitMsk;
|
||||
};
|
||||
|
||||
static really_inline
|
||||
const u32 *getConfirmLitIndex(const struct FDRConfirm *fdrc) {
|
||||
const u8 *base = (const u8 *)fdrc;
|
||||
const u32 *litIndex =
|
||||
(const u32 *)(base + ROUNDUP_N(sizeof(*fdrc), alignof(u32)));
|
||||
assert(ISALIGNED(litIndex));
|
||||
return litIndex;
|
||||
}
|
||||
|
||||
#endif // FDR_CONFIRM_H
|
479
src/fdr/fdr_confirm_compile.cpp
Normal file
479
src/fdr/fdr_confirm_compile.cpp
Normal file
@ -0,0 +1,479 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "engine_description.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
typedef u8 ConfSplitType;
|
||||
typedef pair<BucketIndex, ConfSplitType> BucketSplitPair;
|
||||
typedef map<BucketSplitPair, pair<FDRConfirm *, size_t> > BC2CONF;
|
||||
|
||||
// return the number of bytes beyond a length threshold in all strings in lits
|
||||
static
|
||||
size_t thresholdedSize(const vector<hwlmLiteral> &lits, size_t threshold) {
|
||||
size_t tot = 0;
|
||||
for (const auto &lit : lits) {
|
||||
size_t sz = lit.s.size();
|
||||
if (sz > threshold) {
|
||||
tot += ROUNDUP_N(sz - threshold, 8);
|
||||
}
|
||||
}
|
||||
return tot;
|
||||
}
|
||||
|
||||
static
|
||||
u64a make_u64a_mask(const vector<u8> &v) {
|
||||
assert(v.size() <= sizeof(u64a));
|
||||
if (v.size() > sizeof(u64a)) {
|
||||
throw std::exception();
|
||||
}
|
||||
|
||||
u64a mask = 0;
|
||||
size_t vlen = v.size();
|
||||
size_t len = std::min(vlen, sizeof(mask));
|
||||
unsigned char *m = (unsigned char *)&mask;
|
||||
memcpy(m + sizeof(mask) - len, &v[vlen - len], len);
|
||||
return mask;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a temporary vector of LitInfo structures (without the corresponding
|
||||
* pointers to the actual strings; these cannot be laid out yet). These
|
||||
* stay in 1:1 correspondence with the lits[] vector as that's the only
|
||||
* place we have to obtain our full strings.
|
||||
*/
|
||||
static
|
||||
void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
|
||||
CONF_TYPE &andmsk) {
|
||||
const CONF_TYPE all_ones = ~(u64a)0;
|
||||
andmsk = all_ones; // fill in with 'and' of all literal masks
|
||||
|
||||
for (LiteralIndex i = 0; i < lits.size(); i++) {
|
||||
const hwlmLiteral &lit = lits[i];
|
||||
LitInfo &info = tmpLitInfo[i];
|
||||
memset(&info, 0, sizeof(info));
|
||||
info.id = lit.id;
|
||||
u8 flags = NoFlags;
|
||||
if (lit.nocase) {
|
||||
flags |= Caseless;
|
||||
}
|
||||
if (lit.noruns) {
|
||||
flags |= NoRepeat;
|
||||
}
|
||||
if (lit.msk.size() > lit.s.size()) {
|
||||
flags |= ComplexConfirm;
|
||||
info.extended_size = verify_u8(lit.msk.size());
|
||||
}
|
||||
info.flags = flags;
|
||||
info.size = verify_u32(lit.s.size());
|
||||
info.groups = lit.groups;
|
||||
|
||||
// these are built up assuming a LE machine
|
||||
CONF_TYPE msk = all_ones;
|
||||
CONF_TYPE val = 0;
|
||||
for (u32 j = 0; j < sizeof(CONF_TYPE); j++) {
|
||||
u32 shiftLoc = (sizeof(CONF_TYPE) - j - 1) * 8;
|
||||
if (j >= lit.s.size()) {
|
||||
msk &= ~((CONF_TYPE)0xff << shiftLoc);
|
||||
} else {
|
||||
u8 c = lit.s[lit.s.size() - j - 1];
|
||||
if (lit.nocase && ourisalpha(c)) {
|
||||
msk &= ~((CONF_TYPE)CASE_BIT << shiftLoc);
|
||||
val |= (CONF_TYPE)(c & CASE_CLEAR) << shiftLoc;
|
||||
} else {
|
||||
val |= (CONF_TYPE)c << shiftLoc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info.v = val;
|
||||
info.msk = msk;
|
||||
if (!lit.msk.empty()) {
|
||||
u64a l_msk = make_u64a_mask(lit.msk);
|
||||
u64a l_cmp = make_u64a_mask(lit.cmp);
|
||||
|
||||
// test for consistency - if there's intersection, then v and msk
|
||||
// values must line up
|
||||
UNUSED u64a intersection = l_msk & info.msk;
|
||||
assert((info.v & intersection) == (l_cmp & intersection));
|
||||
|
||||
// incorporate lit.msk, lit.cmp into v and msk
|
||||
info.msk |= l_msk;
|
||||
info.v |= l_cmp;
|
||||
}
|
||||
|
||||
andmsk &= info.msk;
|
||||
}
|
||||
}
|
||||
|
||||
//#define FDR_CONFIRM_DUMP 1
|
||||
|
||||
static
|
||||
size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
bool applyOneCharOpt, bool make_small, bool make_confirm) {
|
||||
vector<LitInfo> tmpLitInfo(lits.size());
|
||||
CONF_TYPE andmsk;
|
||||
fillLitInfo(lits, tmpLitInfo, andmsk);
|
||||
|
||||
#ifdef FDR_CONFIRM_DUMP
|
||||
printf("-------------------\n");
|
||||
#endif
|
||||
|
||||
// just magic numbers and crude measures for now
|
||||
u32 nBits;
|
||||
if (make_small) {
|
||||
nBits = min(10U, lg2(lits.size()) + 1);
|
||||
} else {
|
||||
nBits = min(13U, lg2(lits.size()) + 4);
|
||||
}
|
||||
|
||||
CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL;
|
||||
u32 flags = 0;
|
||||
// we use next three variables for 'confirmless' case to speed-up
|
||||
// confirmation process
|
||||
u32 soleLitSize = 0;
|
||||
u32 soleLitCmp = 0;
|
||||
u32 soleLitMsk = 0;
|
||||
|
||||
if ((applyOneCharOpt && lits.size() == 1 && lits[0].s.size() == 0 &&
|
||||
lits[0].msk.empty()) || make_confirm == false) {
|
||||
flags = FDRC_FLAG_NO_CONFIRM;
|
||||
if (lits[0].noruns) {
|
||||
flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted
|
||||
}
|
||||
mult = 0;
|
||||
soleLitSize = lits[0].s.size() - 1;
|
||||
// we can get to this point only in confirmless case;
|
||||
// it means that we have only one literal per FDRConfirm (no packing),
|
||||
// with no literal mask and size of literal is less or equal
|
||||
// to the number of masks of Teddy engine;
|
||||
// maximum number of masks for Teddy is 4, so the size of
|
||||
// literal is definitely less or equal to size of u32
|
||||
assert(lits[0].s.size() <= sizeof(u32));
|
||||
for (u32 i = 0; i < lits[0].s.size(); i++) {
|
||||
u32 shiftLoc = (sizeof(u32) - i - 1) * 8;
|
||||
u8 c = lits[0].s[lits[0].s.size() - i - 1];
|
||||
if (lits[0].nocase && ourisalpha(c)) {
|
||||
soleLitCmp |= (u32)(c & CASE_CLEAR) << shiftLoc;
|
||||
soleLitMsk |= (u32)CASE_CLEAR << shiftLoc;
|
||||
}
|
||||
else {
|
||||
soleLitCmp |= (u32)c << shiftLoc;
|
||||
soleLitMsk |= (u32)0xff << shiftLoc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we can walk the vector and assign elements from the vectors to a
|
||||
// map by hash value
|
||||
map<u32, vector<LiteralIndex> > res2lits;
|
||||
hwlm_group_t gm = 0;
|
||||
for (LiteralIndex i = 0; i < lits.size(); i++) {
|
||||
LitInfo & li = tmpLitInfo[i];
|
||||
u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits);
|
||||
DEBUG_PRINTF("%016llx --> %u\n", li.v, hash);
|
||||
res2lits[hash].push_back(i);
|
||||
gm |= li.groups;
|
||||
}
|
||||
|
||||
#ifdef FDR_CONFIRM_DUMP
|
||||
// print out the literals reversed - makes it easier to line up analyses
|
||||
// that are end-offset based
|
||||
for (map<u32, vector<LiteralIndex> >::iterator i = res2lits.begin(),
|
||||
e = res2lits.end(); i != e; ++i) {
|
||||
u32 hash = i->first;
|
||||
vector<LiteralIndex> & vlidx = i->second;
|
||||
if (vlidx.size() > 1) {
|
||||
printf("%x -> %zu literals\n", hash, vlidx.size());
|
||||
u32 min_len = lits[vlidx.front()].s.size();
|
||||
vector<set<u8> > vsl; // contains the set of chars at each location
|
||||
// reversed from the end
|
||||
vsl.resize(1024);
|
||||
u32 total_string_size = 0;
|
||||
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
|
||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
||||
LiteralIndex litIdx = *i2;
|
||||
total_string_size += lits[litIdx].s.size();
|
||||
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
|
||||
vsl[lits[litIdx].s.size()-j].insert(lits[litIdx].s.c_str()[j - 1]);
|
||||
}
|
||||
min_len = MIN(min_len, lits[litIdx].s.size());
|
||||
}
|
||||
printf("common ");
|
||||
for (u32 j = 0; j < min_len; j++) {
|
||||
if (vsl[j].size() == 1) {
|
||||
printf("%02x", (u32)*vsl[j].begin());
|
||||
} else {
|
||||
printf("__");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
|
||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
||||
LiteralIndex litIdx = *i2;
|
||||
printf("%8x %c", lits[litIdx].id, lits[litIdx].nocase ? '!' : ' ');
|
||||
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
|
||||
u32 dist_from_end = lits[litIdx].s.size() - j;
|
||||
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
|
||||
printf("__");
|
||||
} else {
|
||||
printf("%02x", (u32)lits[litIdx].s.c_str()[j-1]);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
u32 total_compares = 0;
|
||||
for (u32 j = 0; j < 1024; j++) { // naughty
|
||||
total_compares += vsl[j].size();
|
||||
}
|
||||
printf("Total compare load: %d Total string size: %d\n\n", total_compares, total_string_size);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
const size_t bitsToLitIndexSize = (1U << nBits) * sizeof(u32);
|
||||
const size_t totalLitSize = thresholdedSize(lits, sizeof(CONF_TYPE));
|
||||
|
||||
// this size can now be a worst-case as we can always be a bit smaller
|
||||
size_t size = ROUNDUP_N(sizeof(FDRConfirm), alignof(u32)) +
|
||||
ROUNDUP_N(bitsToLitIndexSize, alignof(LitInfo)) +
|
||||
sizeof(LitInfo) * lits.size() + totalLitSize;
|
||||
size = ROUNDUP_N(size, alignof(FDRConfirm));
|
||||
|
||||
FDRConfirm *fdrc = (FDRConfirm *)aligned_zmalloc(size);
|
||||
assert(fdrc); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
fdrc->andmsk = andmsk;
|
||||
fdrc->mult = mult;
|
||||
fdrc->nBitsOrSoleID = (flags & FDRC_FLAG_NO_CONFIRM) ? lits[0].id : nBits;
|
||||
fdrc->flags = flags;
|
||||
fdrc->soleLitSize = soleLitSize;
|
||||
fdrc->soleLitCmp = soleLitCmp;
|
||||
fdrc->soleLitMsk = soleLitMsk;
|
||||
|
||||
fdrc->groups = gm;
|
||||
|
||||
// After the FDRConfirm, we have the lit index array.
|
||||
u8 *fdrc_base = (u8 *)fdrc;
|
||||
u8 *ptr = fdrc_base + sizeof(*fdrc);
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(u32));
|
||||
u32 *bitsToLitIndex = (u32 *)ptr;
|
||||
ptr += bitsToLitIndexSize;
|
||||
|
||||
// After the lit index array, we have the LitInfo structures themselves,
|
||||
// which vary in size (as each may have a variable-length string after it).
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
|
||||
|
||||
// Walk the map by hash value assigning indexes and laying out the
|
||||
// elements (and their associated string confirm material) in memory.
|
||||
for (std::map<u32, vector<LiteralIndex> >::const_iterator
|
||||
i = res2lits.begin(), e = res2lits.end(); i != e; ++i) {
|
||||
const u32 hash = i->first;
|
||||
const vector<LiteralIndex> &vlidx = i->second;
|
||||
bitsToLitIndex[hash] = verify_u32(ptr - (u8 *)fdrc);
|
||||
for (vector<LiteralIndex>::const_iterator i2 = vlidx.begin(),
|
||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
||||
LiteralIndex litIdx = *i2;
|
||||
|
||||
// Write LitInfo header.
|
||||
u8 *oldPtr = ptr;
|
||||
LitInfo &finalLI = *(LitInfo *)ptr;
|
||||
finalLI = tmpLitInfo[litIdx];
|
||||
|
||||
ptr += sizeof(LitInfo); // String starts directly after LitInfo.
|
||||
|
||||
// Write literal prefix (everything before the last N characters,
|
||||
// as the last N are already confirmed).
|
||||
const string &t = lits[litIdx].s;
|
||||
if (t.size() > sizeof(CONF_TYPE)) {
|
||||
size_t prefix_len = t.size() - sizeof(CONF_TYPE);
|
||||
memcpy(&finalLI.s[0], t.c_str(), prefix_len);
|
||||
ptr = &finalLI.s[0] + prefix_len;
|
||||
}
|
||||
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
|
||||
if (i2 + 1 == e2) {
|
||||
finalLI.next = 0x0;
|
||||
} else {
|
||||
// our next field represents an adjustment on top of
|
||||
// current address + the actual size of the literal
|
||||
// so we track any rounding up done for alignment and
|
||||
// add this in - that way we don't have to use bigger
|
||||
// than a u8 (for now)
|
||||
assert((size_t)(ptr - oldPtr) > t.size());
|
||||
finalLI.next = verify_u8(ptr - oldPtr - t.size());
|
||||
}
|
||||
}
|
||||
assert((size_t)(ptr - fdrc_base) <= size);
|
||||
}
|
||||
|
||||
*fdrc_p = fdrc;
|
||||
|
||||
// Return actual used size, not worst-case size. Must be rounded up to
|
||||
// FDRConfirm alignment so that the caller can lay out a sequence of these.
|
||||
size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
|
||||
alignof(FDRConfirm));
|
||||
assert(actual_size <= size);
|
||||
return actual_size;
|
||||
}
|
||||
|
||||
static
|
||||
u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng, BC2CONF &bc2Conf,
|
||||
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
|
||||
bool make_small) {
|
||||
u32 pullBack = eng.getConfirmPullBackDistance();
|
||||
u32 splitMask = eng.getConfirmTopLevelSplit() - 1;
|
||||
bool splitHasCase = splitMask & 0x20;
|
||||
|
||||
bool makeConfirm = true;
|
||||
unique_ptr<TeddyEngineDescription> teddyDescr =
|
||||
getTeddyDescription(eng.getID());
|
||||
if (teddyDescr) {
|
||||
makeConfirm = teddyDescr->needConfirm(lits);
|
||||
}
|
||||
|
||||
u32 totalConfirmSize = 0;
|
||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||
if (!bucketToLits[b].empty()) {
|
||||
vector<vector<hwlmLiteral> > vl(eng.getConfirmTopLevelSplit());
|
||||
for (vector<LiteralIndex>::const_iterator
|
||||
i = bucketToLits[b].begin(),
|
||||
e = bucketToLits[b].end();
|
||||
i != e; ++i) {
|
||||
hwlmLiteral lit = lits[*i]; // copy
|
||||
// c is last char of this literal
|
||||
u8 c = *(lit.s.rbegin());
|
||||
|
||||
bool suppressSplit = false;
|
||||
if (pullBack) {
|
||||
// make a shorter string to work over if we're pulling back
|
||||
// getFDRConfirm doesn't know about that stuff
|
||||
assert(lit.s.size() >= pullBack);
|
||||
lit.s.resize(lit.s.size() - pullBack);
|
||||
|
||||
u8 c_sub, c_sub_msk;
|
||||
if (lit.msk.empty()) {
|
||||
c_sub = 0;
|
||||
c_sub_msk = 0;
|
||||
} else {
|
||||
c_sub = *(lit.cmp.rbegin());
|
||||
c_sub_msk = *(lit.msk.rbegin());
|
||||
size_t len = lit.msk.size() -
|
||||
min(lit.msk.size(), (size_t)pullBack);
|
||||
lit.msk.resize(len);
|
||||
lit.cmp.resize(len);
|
||||
}
|
||||
|
||||
// if c_sub_msk is 0xff and lit.nocase
|
||||
// resteer 'c' to an exact value and set suppressSplit
|
||||
if ((c_sub_msk == 0xff) && (lit.nocase)) {
|
||||
suppressSplit = true;
|
||||
c = c_sub;
|
||||
}
|
||||
}
|
||||
|
||||
if (!suppressSplit && splitHasCase && lit.nocase &&
|
||||
ourisalpha(c)) {
|
||||
vl[(u8)(mytoupper(c) & splitMask)].push_back(lit);
|
||||
vl[(u8)(mytolower(c) & splitMask)].push_back(lit);
|
||||
} else {
|
||||
vl[c & splitMask].push_back(lit);
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) {
|
||||
if (!vl[c].empty()) {
|
||||
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
|
||||
FDRConfirm *fdrc;
|
||||
size_t size = getFDRConfirm(vl[c], &fdrc,
|
||||
eng.typicallyHoldsOneCharLits(),
|
||||
make_small, makeConfirm);
|
||||
BucketSplitPair p = make_pair(b, c);
|
||||
bc2Conf[p] = make_pair(fdrc, size);
|
||||
totalConfirmSize += size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return totalConfirmSize;
|
||||
}
|
||||
|
||||
pair<u8 *, size_t> setupFullMultiConfs(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng,
|
||||
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
|
||||
bool make_small) {
|
||||
BC2CONF bc2Conf;
|
||||
u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits,
|
||||
make_small);
|
||||
|
||||
u32 primarySwitch = eng.getConfirmTopLevelSplit();
|
||||
u32 nBuckets = eng.getNumBuckets();
|
||||
u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32);
|
||||
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
|
||||
|
||||
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
|
||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
u32 *confBase = (u32 *)buf;
|
||||
u8 *ptr = buf + totalConfSwitchSize;
|
||||
|
||||
for (BC2CONF::const_iterator i = bc2Conf.begin(), e = bc2Conf.end(); i != e;
|
||||
++i) {
|
||||
const pair<FDRConfirm *, size_t> &p = i->second;
|
||||
// confirm offset is relative to the base of this structure, now
|
||||
u32 confirm_offset = verify_u32(ptr - (u8 *)buf);
|
||||
memcpy(ptr, p.first, p.second);
|
||||
ptr += p.second;
|
||||
aligned_free(p.first);
|
||||
BucketIndex b = i->first.first;
|
||||
u8 c = i->first.second;
|
||||
u32 idx = c * nBuckets + b;
|
||||
confBase[idx] = confirm_offset;
|
||||
}
|
||||
return make_pair(buf, totalSize);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
244
src/fdr/fdr_confirm_runtime.h
Normal file
244
src/fdr/fdr_confirm_runtime.h
Normal file
@ -0,0 +1,244 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_CONFIRM_RUNTIME_H
|
||||
#define FDR_CONFIRM_RUNTIME_H
|
||||
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_loadval.h"
|
||||
#include "hwlm/hwlm.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/compare.h"
|
||||
|
||||
#define CONF_LOADVAL_CALL lv_u64a
|
||||
#define CONF_LOADVAL_CALL_CAUTIOUS lv_u64a_ce
|
||||
|
||||
// this is ordinary confirmation function which runs through
|
||||
// the whole confirmation procedure
|
||||
static really_inline
|
||||
void confWithBit(const struct FDRConfirm * fdrc,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
size_t i,
|
||||
CautionReason r,
|
||||
u32 pullBackAmount,
|
||||
hwlmcb_rv_t *control,
|
||||
u32 * last_match) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
const u8 * buf = a->buf;
|
||||
const size_t len = a->len;
|
||||
|
||||
CONF_TYPE v;
|
||||
const u8 * confirm_loc = buf + i - pullBackAmount - 7;
|
||||
if (likely(r == NOT_CAUTIOUS || confirm_loc >= buf)) {
|
||||
v = CONF_LOADVAL_CALL(confirm_loc, buf, buf + len);
|
||||
} else { // r == VECTORING, confirm_loc < buf
|
||||
u64a histBytes = a->histBytes;
|
||||
v = CONF_LOADVAL_CALL_CAUTIOUS(confirm_loc, buf, buf + len);
|
||||
// stitch together v (which doesn't move) and history (which does)
|
||||
u32 overhang = buf - confirm_loc;
|
||||
histBytes >>= 64 - (overhang * 8);
|
||||
v |= histBytes;
|
||||
}
|
||||
|
||||
u32 c = CONF_HASH_CALL(v, fdrc->andmsk, fdrc->mult, fdrc->nBitsOrSoleID);
|
||||
u32 start = getConfirmLitIndex(fdrc)[c];
|
||||
if (P0(start)) {
|
||||
const struct LitInfo *l =
|
||||
(const struct LitInfo *)((const u8 *)fdrc + start);
|
||||
|
||||
u8 oldNext; // initialized in loop
|
||||
do {
|
||||
assert(ISALIGNED(l));
|
||||
|
||||
if (P0( (v & l->msk) != l->v)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((*last_match == l->id) && (l->flags & NoRepeat)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
const u8 * loc = buf + i - l->size + 1 - pullBackAmount;
|
||||
|
||||
u8 caseless = l->flags & Caseless;
|
||||
if (loc < buf) {
|
||||
u32 full_overhang = buf - loc;
|
||||
|
||||
const u8 * history = (caseless) ?
|
||||
a->buf_history_nocase : a->buf_history;
|
||||
size_t len_history = (caseless) ?
|
||||
a->len_history_nocase : a->len_history;
|
||||
|
||||
// can't do a vectored confirm either if we don't have
|
||||
// the bytes
|
||||
if (full_overhang > len_history) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
// as for the regular case, no need to do a full confirm if
|
||||
// we're a short literal
|
||||
if (unlikely(l->size > sizeof(CONF_TYPE))) {
|
||||
const u8 * s1 = l->s;
|
||||
const u8 * s2 = s1 + full_overhang;
|
||||
const u8 * loc1 = history + len_history - full_overhang;
|
||||
const u8 * loc2 = buf;
|
||||
size_t size1 = MIN(full_overhang,
|
||||
l->size - sizeof(CONF_TYPE));
|
||||
size_t wind_size2_back = sizeof(CONF_TYPE) +
|
||||
full_overhang;
|
||||
size_t size2 = wind_size2_back > l->size ?
|
||||
0 : l->size - wind_size2_back;
|
||||
|
||||
if (cmpForward(loc1, s1, size1, caseless)) {
|
||||
goto out;
|
||||
}
|
||||
if (cmpForward(loc2, s2, size2, caseless)) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
} else { // NON-VECTORING PATH
|
||||
|
||||
// if string < conf_type we don't need regular string cmp
|
||||
if (unlikely(l->size > sizeof(CONF_TYPE))) {
|
||||
if (cmpForward(loc, l->s, l->size - sizeof(CONF_TYPE), caseless)) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (P0(!(l->groups & *control))) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely(l->flags & ComplexConfirm)) {
|
||||
const u8 * loc2 = buf + i - l->extended_size + 1 - pullBackAmount;
|
||||
if (loc2 < buf) {
|
||||
u32 full_overhang = buf - loc2;
|
||||
size_t len_history = (caseless) ?
|
||||
a->len_history_nocase : a->len_history;
|
||||
if (full_overhang > len_history) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*last_match = l->id;
|
||||
*control = a->cb(loc - buf, i, l->id, a->ctxt);
|
||||
out:
|
||||
oldNext = l->next; // oldNext is either 0 or an 'adjust' value
|
||||
l = (const struct LitInfo*)((const u8 *)l + oldNext + l->size);
|
||||
} while (oldNext);
|
||||
}
|
||||
}
|
||||
|
||||
// 'light-weight' confirmation function which is used by 1-mask Teddy;
|
||||
// in the 'confirmless' case it simply calls callback function,
|
||||
// otherwise it calls 'confWithBit' function for the full confirmation procedure
|
||||
static really_inline
|
||||
void confWithBit1(const struct FDRConfirm * fdrc,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
size_t i,
|
||||
CautionReason r,
|
||||
hwlmcb_rv_t *control,
|
||||
u32 * last_match) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
if (unlikely(fdrc->mult)) {
|
||||
confWithBit(fdrc, a, i, r, 0, control, last_match);
|
||||
return;
|
||||
} else {
|
||||
u32 id = fdrc->nBitsOrSoleID;
|
||||
|
||||
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
|
||||
return;
|
||||
}
|
||||
*last_match = id;
|
||||
*control = a->cb(i, i, id, a->ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
// This is 'light-weight' confirmation function which is used by 2-3-4-mask Teddy
|
||||
// In the 'confirmless' case it makes fast 32-bit comparison,
|
||||
// otherwise it calls 'confWithBit' function for the full confirmation procedure
|
||||
static really_inline
|
||||
void confWithBitMany(const struct FDRConfirm * fdrc,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
size_t i,
|
||||
CautionReason r,
|
||||
hwlmcb_rv_t *control,
|
||||
u32 * last_match) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
if (i < a->start_offset) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (unlikely(fdrc->mult)) {
|
||||
confWithBit(fdrc, a, i, r, 0, control, last_match);
|
||||
return;
|
||||
} else {
|
||||
const u32 id = fdrc->nBitsOrSoleID;
|
||||
const u32 len = fdrc->soleLitSize;
|
||||
|
||||
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (r == VECTORING && len > i - a->start_offset) {
|
||||
if (len > (i + a->len_history)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 cmp = (u32)a->buf[i] << 24;
|
||||
|
||||
if (len <= i) {
|
||||
for (u32 j = 1; j <= len; j++) {
|
||||
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
|
||||
}
|
||||
} else {
|
||||
for (u32 j = 1; j <= i; j++) {
|
||||
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
|
||||
}
|
||||
cmp |= (u32)(a->histBytes >> (40 + i * 8));
|
||||
}
|
||||
|
||||
if ((fdrc->soleLitMsk & cmp) != fdrc->soleLitCmp) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
*last_match = id;
|
||||
*control = a->cb(i - len, i, id, a->ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
98
src/fdr/fdr_dump.cpp
Normal file
98
src/fdr/fdr_dump.cpp
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_dump.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <memory>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
using std::unique_ptr;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
bool fdrIsTeddy(const FDR *fdr) {
|
||||
assert(fdr);
|
||||
u32 engine = fdr->engineID;
|
||||
|
||||
/* teddys don't have an fdr engine description (which is why the dump code
|
||||
* is so broken). */
|
||||
|
||||
return !getFdrDescription(engine);
|
||||
}
|
||||
|
||||
void fdrPrintStats(const FDR *fdr, FILE *f) {
|
||||
const bool isTeddy = fdrIsTeddy(fdr);
|
||||
|
||||
if (isTeddy) {
|
||||
fprintf(f, "TEDDY: %u\n", fdr->engineID);
|
||||
} else {
|
||||
fprintf(f, "FDR: %u\n", fdr->engineID);
|
||||
}
|
||||
|
||||
if (isTeddy) {
|
||||
unique_ptr<TeddyEngineDescription> des =
|
||||
getTeddyDescription(fdr->engineID);
|
||||
if (des) {
|
||||
fprintf(f, " masks %u\n", des->numMasks);
|
||||
fprintf(f, " buckets %u\n", des->getNumBuckets());
|
||||
fprintf(f, " packed %s\n", des->packed ? "true" : "false");
|
||||
} else {
|
||||
fprintf(f, " <unknown engine>\n");
|
||||
}
|
||||
} else {
|
||||
unique_ptr<FDREngineDescription> des =
|
||||
getFdrDescription(fdr->engineID);
|
||||
if (des) {
|
||||
fprintf(f, " stride %u\n", des->stride);
|
||||
fprintf(f, " buckets %u\n", des->getNumBuckets());
|
||||
fprintf(f, " width %u\n", des->schemeWidth);
|
||||
} else {
|
||||
fprintf(f, " <unknown engine>\n");
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(f, " strings ???\n");
|
||||
fprintf(f, " size %zu bytes\n", fdrSize(fdr));
|
||||
fprintf(f, " max length %u\n", fdr->maxStringLen);
|
||||
fprintf(f, " floodoff %u (%x)\n", fdr->floodOffset, fdr->floodOffset);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
49
src/fdr/fdr_dump.h
Normal file
49
src/fdr/fdr_dump.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: dump API.
|
||||
*/
|
||||
|
||||
#ifndef FDR_DUMP_H
|
||||
#define FDR_DUMP_H
|
||||
|
||||
#if defined(DUMP_SUPPORT)
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
struct FDR;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void fdrPrintStats(const struct FDR *fdr, FILE *f);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // DUMP_SUPPORT
|
||||
#endif // FDR_DUMP_H
|
216
src/fdr/fdr_engine_description.cpp
Normal file
216
src/fdr/fdr_engine_description.cpp
Normal file
@ -0,0 +1,216 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "hs_compile.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/compare.h" // for ourisalpha()
|
||||
#include "util/make_unique.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#include "fdr_autogen_compiler.cpp"
|
||||
|
||||
FDREngineDescription::FDREngineDescription(const FDREngineDef &def)
|
||||
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
|
||||
def.numBuckets, def.confirmPullBackDistance,
|
||||
def.confirmTopLevelSplit),
|
||||
schemeWidth(def.schemeWidth), stride(def.stride), bits(def.bits) {}
|
||||
|
||||
u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
|
||||
// rounding up, so that scheme width 32 and 6 buckets is 6 not 5!
|
||||
// the +1 avoids pain due to various reach choices
|
||||
return ((getSchemeWidth() + getNumBuckets() - 1) / getNumBuckets()) + 1;
|
||||
}
|
||||
|
||||
static
|
||||
u32 findDesiredStride(size_t num_lits, size_t min_len, size_t min_len_count) {
|
||||
u32 desiredStride = 1; // always our safe fallback
|
||||
if (min_len > 1) {
|
||||
if (num_lits < 250) {
|
||||
// small cases we just go for it
|
||||
desiredStride = min_len;
|
||||
} else if (num_lits < 800) {
|
||||
// intermediate cases
|
||||
desiredStride = min_len - 1;
|
||||
} else if (num_lits < 5000) {
|
||||
// for larger but not huge sizes, go to stride 2 only if we have at
|
||||
// least minlen 3
|
||||
desiredStride = MIN(min_len - 1, 2);
|
||||
}
|
||||
}
|
||||
|
||||
// patch if count is quite large - a ton of length 2 literals can
|
||||
// break things
|
||||
#ifdef TRY_THIS_LATER
|
||||
if ((min_len == 2) && (desiredStride == 2) && (min_len_count > 20)) {
|
||||
desiredStride = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
// patch stuff just for the stride 4 case; don't let min_len=4,
|
||||
// desiredStride=4 through as even a few length 4 literals can break things
|
||||
// (far more fragile)
|
||||
if ((min_len == 4) && (desiredStride == 4) && (min_len_count > 2)) {
|
||||
desiredStride = 2;
|
||||
}
|
||||
|
||||
return desiredStride;
|
||||
}
|
||||
|
||||
unique_ptr<FDREngineDescription> chooseEngine(const target_t &target,
|
||||
const vector<hwlmLiteral> &vl,
|
||||
bool make_small) {
|
||||
vector<FDREngineDescription> allDescs;
|
||||
getFdrDescriptions(&allDescs);
|
||||
|
||||
// find desired stride
|
||||
size_t count;
|
||||
size_t msl = minLenCount(vl, &count);
|
||||
u32 desiredStride = findDesiredStride(vl.size(), msl, count);
|
||||
|
||||
DEBUG_PRINTF("%zu lits, msl=%zu, desiredStride=%u\n", vl.size(), msl,
|
||||
desiredStride);
|
||||
|
||||
const FDREngineDescription *best = nullptr;
|
||||
u32 best_score = 0;
|
||||
|
||||
for (size_t engineID = 0; engineID < allDescs.size(); engineID++) {
|
||||
const FDREngineDescription &eng = allDescs[engineID];
|
||||
if (!eng.isValidOnTarget(target)) {
|
||||
continue;
|
||||
}
|
||||
if (msl < eng.stride) {
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 score = 100;
|
||||
|
||||
score -= absdiff(desiredStride, eng.stride);
|
||||
|
||||
if (eng.stride <= desiredStride) {
|
||||
score += eng.stride;
|
||||
}
|
||||
|
||||
u32 effLits = vl.size(); /* * desiredStride;*/
|
||||
u32 ideal;
|
||||
if (effLits < eng.getNumBuckets()) {
|
||||
if (eng.stride == 1) {
|
||||
ideal = 8;
|
||||
} else {
|
||||
ideal = 10;
|
||||
}
|
||||
} else if (effLits < 20) {
|
||||
ideal = 10;
|
||||
} else if (effLits < 100) {
|
||||
ideal = 11;
|
||||
} else if (effLits < 1000) {
|
||||
ideal = 12;
|
||||
} else if (effLits < 10000) {
|
||||
ideal = 13;
|
||||
} else {
|
||||
ideal = 15;
|
||||
}
|
||||
|
||||
if (ideal != 8 && eng.schemeWidth == 32) {
|
||||
ideal += 1;
|
||||
}
|
||||
|
||||
if (make_small) {
|
||||
ideal -= 2;
|
||||
}
|
||||
|
||||
if (eng.stride > 1) {
|
||||
ideal++;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("effLits %u\n", effLits);
|
||||
|
||||
if (target.is_atom_class() && !make_small && effLits < 4000) {
|
||||
/* Unless it is a very heavy case, we want to build smaller tables
|
||||
* on lightweight machines due to their small caches. */
|
||||
ideal -= 2;
|
||||
}
|
||||
|
||||
score -= absdiff(ideal, eng.bits);
|
||||
|
||||
DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u "
|
||||
"-> score=%u\n",
|
||||
eng.getID(), eng.schemeWidth, eng.bits,
|
||||
eng.getNumBuckets(), eng.stride, score);
|
||||
|
||||
if (!best || score > best_score) {
|
||||
best = ŋ
|
||||
best_score = score;
|
||||
}
|
||||
}
|
||||
|
||||
if (!best) {
|
||||
DEBUG_PRINTF("failed to find engine\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("using engine %u\n", best->getID());
|
||||
return ue2::make_unique<FDREngineDescription>(*best);
|
||||
}
|
||||
|
||||
SchemeBitIndex FDREngineDescription::getSchemeBit(BucketIndex b,
|
||||
PositionInBucket p) const {
|
||||
assert(p < getBucketWidth(b));
|
||||
SchemeBitIndex sbi = p * getNumBuckets() + b;
|
||||
assert(sbi < getSchemeWidth());
|
||||
return sbi;
|
||||
}
|
||||
|
||||
u32 FDREngineDescription::getBucketWidth(BucketIndex) const {
|
||||
u32 sw = getSchemeWidth();
|
||||
u32 nm = getNumBuckets();
|
||||
assert(sw % nm == 0);
|
||||
return sw/nm;
|
||||
}
|
||||
|
||||
unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID) {
|
||||
vector<FDREngineDescription> allDescs;
|
||||
getFdrDescriptions(&allDescs);
|
||||
|
||||
if (engineID >= allDescs.size()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return ue2::make_unique<FDREngineDescription>(allDescs[engineID]);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
80
src/fdr/fdr_engine_description.h
Normal file
80
src/fdr/fdr_engine_description.h
Normal file
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_ENGINE_DESCRIPTION_H
|
||||
#define FDR_ENGINE_DESCRIPTION_H
|
||||
|
||||
#include "engine_description.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct FDREngineDef {
|
||||
u32 id;
|
||||
u32 schemeWidth;
|
||||
u32 numBuckets;
|
||||
u32 stride;
|
||||
u32 bits;
|
||||
u64a cpu_features;
|
||||
u32 confirmPullBackDistance;
|
||||
u32 confirmTopLevelSplit;
|
||||
};
|
||||
|
||||
class FDREngineDescription : public EngineDescription {
|
||||
public:
|
||||
u32 schemeWidth;
|
||||
u32 stride;
|
||||
u32 bits;
|
||||
|
||||
u32 getSchemeWidth() const { return schemeWidth; }
|
||||
u32 getBucketWidth(BucketIndex b) const;
|
||||
SchemeBitIndex getSchemeBit(BucketIndex b, PositionInBucket p) const;
|
||||
u32 getNumTableEntries() const { return 1 << bits; }
|
||||
u32 getTabSizeBytes() const {
|
||||
return schemeWidth / 8 * getNumTableEntries();
|
||||
}
|
||||
|
||||
explicit FDREngineDescription(const FDREngineDef &def);
|
||||
|
||||
u32 getDefaultFloodSuffixLength() const override;
|
||||
bool typicallyHoldsOneCharLits() const override { return stride == 1; }
|
||||
};
|
||||
|
||||
std::unique_ptr<FDREngineDescription>
|
||||
chooseEngine(const target_t &target, const std::vector<hwlmLiteral> &vl,
|
||||
bool make_small);
|
||||
std::unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID);
|
||||
void getFdrDescriptions(std::vector<FDREngineDescription> *out);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
111
src/fdr/fdr_internal.h
Normal file
111
src/fdr/fdr_internal.h
Normal file
@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: data structures.
|
||||
*/
|
||||
|
||||
#ifndef FDR_INTERNAL_H
|
||||
#define FDR_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm.h" // for hwlm_group_t, HWLMCallback
|
||||
|
||||
typedef enum {
|
||||
NOT_CAUTIOUS, //!< not near a boundary (quantify?)
|
||||
VECTORING //!< potentially vectoring
|
||||
} CautionReason;
|
||||
|
||||
/** \brief number of different ids that can be triggered by floods of any given
|
||||
* character. */
|
||||
#define FDR_FLOOD_MAX_IDS 16
|
||||
|
||||
struct FDRFlood {
|
||||
hwlm_group_t allGroups; //!< all the groups or'd together
|
||||
u32 suffix;
|
||||
|
||||
/** \brief 0 to FDR_FLOOD_MAX_IDS-1 ids that are generated once per char on
|
||||
* a flood.
|
||||
* If larger we won't handle this through the flood path at all. */
|
||||
u16 idCount;
|
||||
|
||||
u32 ids[FDR_FLOOD_MAX_IDS]; //!< the ids
|
||||
hwlm_group_t groups[FDR_FLOOD_MAX_IDS]; //!< group ids to go with string ids
|
||||
u32 len[FDR_FLOOD_MAX_IDS]; //!< lengths to go with the string ids
|
||||
};
|
||||
|
||||
/** \brief FDR structure.
|
||||
*
|
||||
* 1. struct as-is
|
||||
* 2. primary matching table
|
||||
* 3. confirm stuff
|
||||
*/
|
||||
struct FDR {
|
||||
u32 engineID;
|
||||
u32 size;
|
||||
u32 maxStringLen;
|
||||
u32 floodOffset;
|
||||
|
||||
/** link is the relative offset of a secondary included FDR table for
|
||||
* stream handling if we're a primary FDR table or the subsidiary tertiary
|
||||
* structures (spillover strings and hash table) if we're a secondary
|
||||
* structure. */
|
||||
u32 link;
|
||||
u32 pad1;
|
||||
u32 pad2;
|
||||
u32 pad3;
|
||||
|
||||
union {
|
||||
u32 s_u32;
|
||||
u64a s_u64a;
|
||||
m128 s_m128;
|
||||
} start;
|
||||
};
|
||||
|
||||
/** \brief FDR runtime arguments.
|
||||
*
|
||||
* This structure handles read-only things that are passed extensively around
|
||||
* the FDR run-time functions. They are set by the API, passed by value into
|
||||
* the main function, then a pointer is passed around to all the various
|
||||
* sub-functions (confirm & flood). */
|
||||
struct FDR_Runtime_Args {
|
||||
const u8 *buf;
|
||||
size_t len;
|
||||
const u8 *buf_history;
|
||||
size_t len_history;
|
||||
const u8 *buf_history_nocase;
|
||||
size_t len_history_nocase;
|
||||
size_t start_offset;
|
||||
HWLMCallback cb;
|
||||
void *ctxt;
|
||||
hwlm_group_t *groups;
|
||||
const u8 *firstFloodDetect;
|
||||
const u64a histBytes;
|
||||
};
|
||||
|
||||
#endif
|
216
src/fdr/fdr_loadval.h
Normal file
216
src/fdr/fdr_loadval.h
Normal file
@ -0,0 +1,216 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_LOADVAL_H
|
||||
#define FDR_LOADVAL_H
|
||||
|
||||
#include "fdr_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/unaligned.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#define MAKE_LOADVAL(type, name) \
|
||||
static really_inline type name (const u8 * ptr, UNUSED const u8 * lo, UNUSED const u8 * hi)
|
||||
|
||||
#define NORMAL_SAFE(type) assert(ptr >= lo && (ptr + sizeof(type) - 1) < hi)
|
||||
#define ALIGNED_SAFE(type) NORMAL_SAFE(type); assert(((size_t)ptr % sizeof(type)) == 0);
|
||||
// these ones need asserts to test the property that we're not handling dynamically
|
||||
#define CAUTIOUS_FORWARD_SAFE(type) assert(ptr >= lo)
|
||||
#define CAUTIOUS_BACKWARD_SAFE(type) assert((ptr + sizeof(type) - 1) < hi)
|
||||
|
||||
#define CF_INDEX_CHECK (ptr + i < hi)
|
||||
#define CB_INDEX_CHECK (lo <= ptr + i)
|
||||
#define CE_INDEX_CHECK (lo <= ptr + i) && (ptr + i < hi)
|
||||
|
||||
#define MAKE_LOOP(TYPE, COND, SHIFT_FIDDLE) \
|
||||
TYPE v = 0; \
|
||||
for (TYPE i = 0; i < sizeof(TYPE); i++) { \
|
||||
if (COND) { \
|
||||
v += (TYPE)ptr[i] << ((SHIFT_FIDDLE)*8); \
|
||||
} \
|
||||
} \
|
||||
return v;
|
||||
|
||||
#define MAKE_LOOP_BE(TYPE, COND) \
|
||||
MAKE_LOOP(TYPE, COND, sizeof(TYPE)-i-1)
|
||||
|
||||
#define MAKE_LOOP_LE(TYPE, COND) \
|
||||
MAKE_LOOP(TYPE, COND, i)
|
||||
|
||||
|
||||
#define MAKE_LOOP_BE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CF_INDEX_CHECK)
|
||||
#define MAKE_LOOP_BE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CB_INDEX_CHECK)
|
||||
#define MAKE_LOOP_BE_CE(TYPE) MAKE_LOOP_BE(TYPE, CE_INDEX_CHECK)
|
||||
#define MAKE_LOOP_LE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CF_INDEX_CHECK)
|
||||
#define MAKE_LOOP_LE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CB_INDEX_CHECK)
|
||||
#define MAKE_LOOP_LE_CE(TYPE) MAKE_LOOP_LE(TYPE, CE_INDEX_CHECK)
|
||||
|
||||
// no suffix = normal (unaligned)
|
||||
// _a = aligned
|
||||
// _cf = cautious forwards, base is always in bounds, but may read over the end of the buffer (test against hi)
|
||||
// _cb = cautious backwards, final byte is always in bounds, but may read over the start of the buffer (test against lo)
|
||||
// _ce = cautious everywhere (in both directions); test against hi and lo
|
||||
|
||||
// u8 loadvals
|
||||
MAKE_LOADVAL(u8, lv_u8) {
|
||||
NORMAL_SAFE(u8);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u8, lv_u8_cf) {
|
||||
CAUTIOUS_FORWARD_SAFE(u8);
|
||||
if (ptr < hi) {
|
||||
return *ptr;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u8, lv_u8_cb) {
|
||||
CAUTIOUS_BACKWARD_SAFE(u8);
|
||||
if (lo <= ptr) {
|
||||
return *ptr;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u8, lv_u8_ce) {
|
||||
if ((lo <= ptr) && (ptr < hi)) {
|
||||
return *ptr;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u16, lv_u16) {
|
||||
NORMAL_SAFE(u16);
|
||||
return unaligned_load_u16(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u16, lv_u16_a) {
|
||||
ALIGNED_SAFE(u16);
|
||||
return *(const u16 *)ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u32, lv_u32) {
|
||||
NORMAL_SAFE(u32);
|
||||
return unaligned_load_u32(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u32, lv_u32_a) {
|
||||
ALIGNED_SAFE(u32);
|
||||
return *(const u32 *)ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u64a, lv_u64a) {
|
||||
NORMAL_SAFE(u32);
|
||||
return unaligned_load_u64a(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u64a, lv_u64a_a) {
|
||||
ALIGNED_SAFE(u64a);
|
||||
return *(const u64a *)ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u16, lv_u16_cf) { MAKE_LOOP_LE_CF(u16); }
|
||||
MAKE_LOADVAL(u16, lv_u16_cb) { MAKE_LOOP_LE_CB(u16); }
|
||||
MAKE_LOADVAL(u16, lv_u16_ce) { MAKE_LOOP_LE_CE(u16); }
|
||||
|
||||
MAKE_LOADVAL(u32, lv_u32_cf) { MAKE_LOOP_LE_CF(u32); }
|
||||
MAKE_LOADVAL(u32, lv_u32_cb) { MAKE_LOOP_LE_CB(u32); }
|
||||
MAKE_LOADVAL(u32, lv_u32_ce) { MAKE_LOOP_LE_CE(u32); }
|
||||
|
||||
MAKE_LOADVAL(u64a, lv_u64a_cf) { MAKE_LOOP_LE_CF(u64a); }
|
||||
MAKE_LOADVAL(u64a, lv_u64a_cb) { MAKE_LOOP_LE_CB(u64a); }
|
||||
MAKE_LOADVAL(u64a, lv_u64a_ce) { MAKE_LOOP_LE_CE(u64a); }
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128) {
|
||||
NORMAL_SAFE(m128);
|
||||
return loadu128(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_a) {
|
||||
ALIGNED_SAFE(m128);
|
||||
assert((size_t)ptr % sizeof(m128) == 0);
|
||||
return *(const m128 *)ptr;
|
||||
}
|
||||
|
||||
// m128 cases need to be manually created
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_cf) {
|
||||
CAUTIOUS_FORWARD_SAFE(m128);
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
if (ptr + i < hi) {
|
||||
u.val8[i] = ptr[i];
|
||||
} else {
|
||||
u.val8[i] = 0;
|
||||
}
|
||||
}
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_cb) {
|
||||
CAUTIOUS_BACKWARD_SAFE(m128);
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
if (lo <= ptr + i) {
|
||||
u.val8[i] = ptr[i];
|
||||
} else {
|
||||
u.val8[i] = 0;
|
||||
}
|
||||
}
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_ce) {
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
if ((lo <= ptr + i) && (ptr + i < hi)) {
|
||||
u.val8[i] = ptr[i];
|
||||
} else {
|
||||
u.val8[i] = 0;
|
||||
}
|
||||
}
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
#endif
|
445
src/fdr/fdr_streaming_compile.cpp
Normal file
445
src/fdr/fdr_streaming_compile.cpp
Normal file
@ -0,0 +1,445 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_streaming_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <deque>
|
||||
#include <set>
|
||||
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::dynamic_bitset;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
struct LongLitOrder {
|
||||
bool operator()(const hwlmLiteral &i1, const hwlmLiteral &i2) const {
|
||||
if (i1.nocase != i2.nocase) {
|
||||
return i1.nocase < i2.nocase;
|
||||
} else {
|
||||
return i1.s < i2.s;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
bool hwlmLitEqual(const hwlmLiteral &l1, const hwlmLiteral &l2) {
|
||||
return l1.s == l2.s && l1.nocase == l2.nocase;
|
||||
}
|
||||
|
||||
static
|
||||
u32 roundUpToPowerOfTwo(u32 x) {
|
||||
x -= 1;
|
||||
x |= (x >> 1);
|
||||
x |= (x >> 2);
|
||||
x |= (x >> 4);
|
||||
x |= (x >> 8);
|
||||
x |= (x >> 16);
|
||||
return x + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Creates a long literals vector containing all literals of length > max_len.
|
||||
*
|
||||
* The last char of each literal is trimmed as we're not interested in full
|
||||
* matches, only partial matches.
|
||||
*
|
||||
* Literals are sorted (by caseful/caseless, then lexicographical order) and
|
||||
* made unique.
|
||||
*
|
||||
* The ID of each literal is set to its position in the vector.
|
||||
*
|
||||
* \return False if there aren't any long literals.
|
||||
*/
|
||||
static
|
||||
bool setupLongLits(const vector<hwlmLiteral> &lits,
|
||||
vector<hwlmLiteral> &long_lits, size_t max_len) {
|
||||
long_lits.reserve(lits.size());
|
||||
for (vector<hwlmLiteral>::const_iterator it = lits.begin();
|
||||
it != lits.end(); ++it) {
|
||||
if (it->s.length() > max_len) {
|
||||
hwlmLiteral tmp = *it; // copy
|
||||
tmp.s.erase(tmp.s.size() - 1, 1); // erase last char
|
||||
tmp.id = 0; // recalc later
|
||||
tmp.groups = 0; // filled in later by hash bucket(s)
|
||||
long_lits.push_back(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
if (long_lits.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// sort long_literals by caseful/caseless and in lexicographical order,
|
||||
// remove duplicates
|
||||
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
|
||||
vector<hwlmLiteral>::iterator new_end =
|
||||
unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
|
||||
long_lits.erase(new_end, long_lits.end());
|
||||
|
||||
// fill in ids; not currently used
|
||||
for (vector<hwlmLiteral>::iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
i->id = i - long_lits.begin();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// boundaries are the 'start' boundaries for each 'mode'
|
||||
// so boundary[CASEFUL] is the index one above the largest caseful index
|
||||
// positions[CASEFUL] is the # of positions in caseful strings (stream)
|
||||
// hashedPositions[CASEFUL] is the # of positions in caseful strings
|
||||
// (not returned - a temporary)
|
||||
// hashEntries[CASEFUL] is the # of positions hashed for caseful strings
|
||||
// (rounded up to the nearest power of two)
|
||||
static
|
||||
void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||
u32 *boundaries, u32 *positions, u32 *hashEntries) {
|
||||
u32 hashedPositions[MAX_MODES];
|
||||
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
|
||||
boundaries[m] = verify_u32(long_lits.size());
|
||||
positions[m] = 0;
|
||||
hashedPositions[m] = 0;
|
||||
}
|
||||
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
if (i->nocase) {
|
||||
boundaries[CASEFUL] = verify_u32(i - long_lits.begin());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
MODES m = i->nocase ? CASELESS : CASEFUL;
|
||||
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
|
||||
hashedPositions[m]++;
|
||||
}
|
||||
positions[m] += i->s.size();
|
||||
}
|
||||
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; m++) {
|
||||
hashEntries[m] = hashedPositions[m]
|
||||
? roundUpToPowerOfTwo(MAX(4096, hashedPositions[m]))
|
||||
: 0;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_COMPILE
|
||||
printf("analyzeLits:\n");
|
||||
for (MODES m = CASEFUL; m < MAX_MODES; m++) {
|
||||
printf("mode %s boundary %d positions %d hashedPositions %d "
|
||||
"hashEntries %d\n",
|
||||
(m == CASEFUL) ? "caseful" : "caseless", boundaries[m],
|
||||
positions[m], hashedPositions[m], hashEntries[m]);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
static
|
||||
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, MODES m) {
|
||||
return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m);
|
||||
}
|
||||
|
||||
// sort by 'distance from start'
|
||||
namespace {
|
||||
struct OffsetIDFromEndOrder {
|
||||
const vector<hwlmLiteral> &lits; // not currently used
|
||||
explicit OffsetIDFromEndOrder(const vector<hwlmLiteral> &lits_in)
|
||||
: lits(lits_in) {}
|
||||
bool operator()(const pair<u32, u32> &i1, const pair<u32, u32> &i2) const {
|
||||
if (i1.second != i2.second) {
|
||||
// longest is 'first', so > not <
|
||||
return i1.second > i2.second;
|
||||
}
|
||||
return i1.first < i2.first;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||
FDRSHashEntry *tab, size_t numEntries, MODES m,
|
||||
map<u32, u32> &litToOffsetVal) {
|
||||
const u32 nbits = lg2(numEntries);
|
||||
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
|
||||
map<u32, u64a> bucketToBitfield;
|
||||
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
const hwlmLiteral &l = *i;
|
||||
if ((m == CASELESS) != i->nocase) {
|
||||
continue;
|
||||
}
|
||||
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
|
||||
u32 h = hashLit(l, j, max_len, m);
|
||||
u32 h_ent = h & ((1U << nbits) - 1);
|
||||
u32 h_low = (h >> nbits) & 63;
|
||||
bucketToLitOffPairs[h_ent].push_back(make_pair(i->id, j));
|
||||
bucketToBitfield[h_ent] |= (1ULL << h_low);
|
||||
}
|
||||
}
|
||||
|
||||
// this used to be a set<u32>, but a bitset is much much faster given that
|
||||
// we're using it only for membership testing.
|
||||
dynamic_bitset<> filledBuckets(numEntries); // all bits zero by default.
|
||||
|
||||
// sweep out bitfield entries and save the results swapped accordingly
|
||||
// also, anything with bitfield entries is put in filledBuckets
|
||||
for (map<u32, u64a>::const_iterator i = bucketToBitfield.begin(),
|
||||
e = bucketToBitfield.end();
|
||||
i != e; ++i) {
|
||||
u32 bucket = i->first;
|
||||
u64a contents = i->second;
|
||||
tab[bucket].bitfield = contents;
|
||||
filledBuckets.set(bucket);
|
||||
}
|
||||
|
||||
// store out all our chains based on free values in our hash table.
|
||||
// find nearest free locations that are empty (there will always be more
|
||||
// entries than strings, at present)
|
||||
for (map<u32, deque<pair<u32, u32> > >::iterator
|
||||
i = bucketToLitOffPairs.begin(),
|
||||
e = bucketToLitOffPairs.end();
|
||||
i != e; ++i) {
|
||||
u32 bucket = i->first;
|
||||
deque<pair<u32, u32> > &d = i->second;
|
||||
|
||||
// sort d by distance of the residual string (len minus our depth into
|
||||
// the string). We need to put the 'furthest back' string first...
|
||||
stable_sort(d.begin(), d.end(), OffsetIDFromEndOrder(long_lits));
|
||||
|
||||
while (1) {
|
||||
// first time through is always at bucket, then we fill in links
|
||||
filledBuckets.set(bucket);
|
||||
FDRSHashEntry *ent = &tab[bucket];
|
||||
u32 lit_id = d.front().first;
|
||||
u32 offset = d.front().second;
|
||||
|
||||
ent->state = verify_u32(litToOffsetVal[lit_id] + offset + max_len);
|
||||
ent->link = (u32)LINK_INVALID;
|
||||
|
||||
d.pop_front();
|
||||
if (d.empty()) {
|
||||
break;
|
||||
}
|
||||
// now, if there is another value
|
||||
// find a bucket for it and put in 'bucket' and repeat
|
||||
// all we really need to do is find something not in filledBuckets,
|
||||
// ideally something close to bucket
|
||||
// we search backward and forward from bucket, trying to stay as
|
||||
// close as possible.
|
||||
UNUSED bool found = false;
|
||||
int bucket_candidate = 0;
|
||||
for (u32 k = 1; k < numEntries * 2; k++) {
|
||||
bucket_candidate = bucket + (((k & 1) == 0)
|
||||
? (-(int)k / 2) : (k / 2));
|
||||
if (bucket_candidate < 0 ||
|
||||
(size_t)bucket_candidate >= numEntries) {
|
||||
continue;
|
||||
}
|
||||
if (!filledBuckets.test(bucket_candidate)) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(found);
|
||||
bucket = bucket_candidate;
|
||||
ent->link = bucket;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
|
||||
size_t rv = 0;
|
||||
vector<hwlmLiteral>::const_iterator it, ite;
|
||||
for (it = lits.begin(), ite = lits.end(); it != ite; ++it) {
|
||||
rv = max(rv, it->msk.size());
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
pair<u8 *, size_t>
|
||||
fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
// refuse to compile if we are forced to have smaller than minimum
|
||||
// history required for long-literal support, full stop
|
||||
// otherwise, choose the maximum of the preferred history quantity
|
||||
// (currently a fairly extravagant 32) or the already used history
|
||||
// quantity - subject to the limitation of stream_control->history_max
|
||||
|
||||
const size_t MIN_HISTORY_REQUIRED = 32;
|
||||
|
||||
if (MIN_HISTORY_REQUIRED > stream_control->history_max) {
|
||||
throw std::logic_error("Cannot set history to minimum history required");
|
||||
}
|
||||
|
||||
size_t max_len =
|
||||
MIN(stream_control->history_max,
|
||||
MAX(MIN_HISTORY_REQUIRED, stream_control->history_min));
|
||||
assert(max_len >= MIN_HISTORY_REQUIRED);
|
||||
size_t max_mask_len = maxMaskLen(lits);
|
||||
|
||||
vector<hwlmLiteral> long_lits;
|
||||
if (!setupLongLits(lits, long_lits, max_len) || false) {
|
||||
// "Don't need to do anything" path, not really a fail
|
||||
DEBUG_PRINTF("Streaming literal path produces no table\n");
|
||||
|
||||
// we want enough history to manage the longest literal and the longest
|
||||
// mask.
|
||||
stream_control->literal_history_required =
|
||||
max(maxLen(lits), max_mask_len) - 1;
|
||||
stream_control->literal_stream_state_required = 0;
|
||||
return make_pair(nullptr, size_t{0});
|
||||
}
|
||||
|
||||
// Ensure that we have enough room for the longest mask.
|
||||
if (max_mask_len) {
|
||||
max_len = max(max_len, max_mask_len - 1);
|
||||
}
|
||||
|
||||
u32 boundary[MAX_MODES];
|
||||
u32 positions[MAX_MODES];
|
||||
u32 hashEntries[MAX_MODES];
|
||||
|
||||
analyzeLits(long_lits, max_len, boundary, positions, hashEntries);
|
||||
|
||||
// first assess the size and find our caseless threshold
|
||||
size_t headerSize = ROUNDUP_16(sizeof(FDRSTableHeader));
|
||||
|
||||
size_t litTabOffset = headerSize;
|
||||
|
||||
size_t litTabNumEntries = long_lits.size() + 1;
|
||||
size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(FDRSLiteral));
|
||||
|
||||
size_t wholeLitTabOffset = litTabOffset + litTabSize;
|
||||
size_t totalWholeLitTabSize = ROUNDUP_16(positions[CASEFUL] +
|
||||
positions[CASELESS]);
|
||||
|
||||
size_t htOffset[MAX_MODES];
|
||||
size_t htSize[MAX_MODES];
|
||||
|
||||
htOffset[CASEFUL] = wholeLitTabOffset + totalWholeLitTabSize;
|
||||
htSize[CASEFUL] = hashEntries[CASEFUL] * sizeof(FDRSHashEntry);
|
||||
htOffset[CASELESS] = htOffset[CASEFUL] + htSize[CASEFUL];
|
||||
htSize[CASELESS] = hashEntries[CASELESS] * sizeof(FDRSHashEntry);
|
||||
|
||||
size_t tabSize = ROUNDUP_16(htOffset[CASELESS] + htSize[CASELESS]);
|
||||
|
||||
// need to add +2 to both of these to allow space for the actual largest
|
||||
// value as well as handling the fact that we add one to the space when
|
||||
// storing out a position to allow zero to mean "no stream state value"
|
||||
u8 streamBits[MAX_MODES];
|
||||
streamBits[CASEFUL] = lg2(roundUpToPowerOfTwo(positions[CASEFUL] + 2));
|
||||
streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2));
|
||||
u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8;
|
||||
|
||||
u8 * secondaryTable = (u8 *)aligned_zmalloc(tabSize);
|
||||
assert(secondaryTable); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
// then fill it in
|
||||
u8 * ptr = secondaryTable;
|
||||
FDRSTableHeader * header = (FDRSTableHeader *)ptr;
|
||||
// fill in header
|
||||
header->pseudoEngineID = (u32)0xffffffff;
|
||||
header->N = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
|
||||
header->boundary[m] = boundary[m];
|
||||
header->hashOffset[m] = verify_u32(htOffset[m]);
|
||||
header->hashNBits[m] = lg2(hashEntries[m]);
|
||||
header->streamStateBits[m] = streamBits[m];
|
||||
}
|
||||
assert(tot_state_bytes < sizeof(u64a));
|
||||
header->streamStateBytes = verify_u8(tot_state_bytes); // u8
|
||||
|
||||
ptr += headerSize;
|
||||
|
||||
// now fill in the rest
|
||||
|
||||
FDRSLiteral * litTabPtr = (FDRSLiteral *)ptr;
|
||||
ptr += litTabSize;
|
||||
|
||||
map<u32, u32> litToOffsetVal;
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
u32 entry = verify_u32(i - long_lits.begin());
|
||||
u32 offset = verify_u32(ptr - secondaryTable);
|
||||
|
||||
// point the table entry to the string location
|
||||
litTabPtr[entry].offset = offset;
|
||||
|
||||
litToOffsetVal[entry] = offset;
|
||||
|
||||
// copy the string into the string location
|
||||
memcpy(ptr, i->s.c_str(), i->s.size());
|
||||
|
||||
ptr += i->s.size(); // and the string location
|
||||
}
|
||||
|
||||
// fill in final lit table entry with current ptr (serves as end value)
|
||||
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable);
|
||||
|
||||
// fill hash tables
|
||||
ptr = secondaryTable + htOffset[CASEFUL];
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
|
||||
fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m],
|
||||
(MODES)m, litToOffsetVal);
|
||||
ptr += htSize[m];
|
||||
}
|
||||
|
||||
// tell the world what we did
|
||||
stream_control->literal_history_required = max_len;
|
||||
stream_control->literal_stream_state_required = tot_state_bytes;
|
||||
return make_pair(secondaryTable, tabSize);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
152
src/fdr/fdr_streaming_internal.h
Normal file
152
src/fdr/fdr_streaming_internal.h
Normal file
@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_STREAMING_INTERNAL_H
|
||||
#define FDR_STREAMING_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "util/unaligned.h"
|
||||
|
||||
// tertiary table:
|
||||
// a header (FDRSTableHeader)
|
||||
// long_lits.size()+1 entries holding an offset to the string in the
|
||||
// 'whole literal table' (FDRSLiteral structure)
|
||||
// the whole literal table - every string packed in (freeform)
|
||||
// hash table (caseful) (FDRSHashEntry)
|
||||
// hash table (caseless) (FDRSHashEntry)
|
||||
|
||||
typedef enum {
|
||||
CASEFUL = 0,
|
||||
CASELESS = 1,
|
||||
MAX_MODES = 2
|
||||
} MODES;
|
||||
|
||||
// We have one of these structures hanging off the 'link' of our secondary
|
||||
// FDR table that handles streaming strings
|
||||
struct FDRSTableHeader {
|
||||
u32 pseudoEngineID; // set to 0xffffffff to indicate this isn't an FDR
|
||||
|
||||
// string id one beyond the maximum entry for this type of literal
|
||||
// boundary[CASEFUL] is the end of the caseful literals
|
||||
// boundary[CASELESS] is the end of the caseless literals and one beyond
|
||||
// the largest literal id (the size of the littab)
|
||||
u32 boundary[MAX_MODES];
|
||||
|
||||
// offsets are 0 if no such table exists
|
||||
// offset from the base of the tertiary structure to the hash table
|
||||
u32 hashOffset[MAX_MODES];
|
||||
u32 hashNBits[MAX_MODES]; // lg2 of the size of the hash table
|
||||
|
||||
u8 streamStateBits[MAX_MODES];
|
||||
u8 streamStateBytes; // total size of packed stream state in bytes
|
||||
u8 N; // prefix lengths
|
||||
u16 pad;
|
||||
};
|
||||
|
||||
// One of these structures per literal entry in our secondary FDR table.
|
||||
struct FDRSLiteral {
|
||||
u32 offset;
|
||||
// potentially - another u32 to point to the 'next lesser included literal'
|
||||
// which would be a literal that overlaps this one in such a way that a
|
||||
// failure to match _this_ literal can leave us in a state that we might
|
||||
// still match that literal. Offset information might also be called for,
|
||||
// in which case we might be wanting to use a FDRSLiteralOffset
|
||||
};
|
||||
|
||||
typedef u32 FDRSLiteralOffset;
|
||||
|
||||
#define LINK_INVALID 0xffffffff
|
||||
|
||||
// One of these structures per hash table entry in our secondary FDR table
|
||||
struct FDRSHashEntry {
|
||||
u64a bitfield;
|
||||
FDRSLiteralOffset state;
|
||||
u32 link;
|
||||
};
|
||||
|
||||
static really_inline
|
||||
u32 get_start_lit_idx(const struct FDRSTableHeader * h, MODES m) {
|
||||
return m == CASEFUL ? 0 : h->boundary[m-1];
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 get_end_lit_idx(const struct FDRSTableHeader * h, MODES m) {
|
||||
return h->boundary[m];
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) {
|
||||
return (const struct FDRSLiteral *) (((const u8 *)h) +
|
||||
ROUNDUP_16(sizeof(struct FDRSTableHeader)));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, MODES m) {
|
||||
return getLitTab(h)[get_start_lit_idx(h, m)].offset;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 packStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
|
||||
return v - getBaseOffsetOfLits(h, m) + 1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 unpackStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
|
||||
return v + getBaseOffsetOfLits(h, m) - 1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) {
|
||||
return (ent->bitfield >> bit) & 0x1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, MODES mode) {
|
||||
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
|
||||
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
|
||||
assert(len >= 32);
|
||||
|
||||
u64a v1 = unaligned_load_u64a(ptr);
|
||||
u64a v2 = unaligned_load_u64a(ptr + 8);
|
||||
u64a v3 = unaligned_load_u64a(ptr + 16);
|
||||
if (mode == CASELESS) {
|
||||
v1 &= CASEMASK;
|
||||
v2 &= CASEMASK;
|
||||
v3 &= CASEMASK;
|
||||
}
|
||||
v1 *= MULTIPLIER;
|
||||
v2 *= (MULTIPLIER*MULTIPLIER);
|
||||
v3 *= (MULTIPLIER*MULTIPLIER*MULTIPLIER);
|
||||
v1 >>= 32;
|
||||
v2 >>= 32;
|
||||
v3 >>= 32;
|
||||
return v1 ^ v2 ^ v3;
|
||||
}
|
||||
|
||||
#endif
|
365
src/fdr/fdr_streaming_runtime.h
Normal file
365
src/fdr/fdr_streaming_runtime.h
Normal file
@ -0,0 +1,365 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_STREAMING_RUNTIME_H
|
||||
#define FDR_STREAMING_RUNTIME_H
|
||||
|
||||
#include "fdr_streaming_internal.h"
|
||||
#include "util/partial_store.h"
|
||||
|
||||
static really_inline
|
||||
const struct FDRSTableHeader * getSHDR(const struct FDR * fdr) {
|
||||
const u8 * linkPtr = ((const u8 *)fdr) + fdr->link;
|
||||
// test if it's not really a engineID, but a 'pseudo engine id'
|
||||
assert(*(const u32 *)linkPtr == 0xffffffff);
|
||||
assert(linkPtr);
|
||||
return (const struct FDRSTableHeader *)linkPtr;
|
||||
}
|
||||
|
||||
// Reads from stream state and unpacks values into stream state table.
|
||||
static really_inline
|
||||
void getStreamStates(const struct FDRSTableHeader * streamingTable,
|
||||
const u8 * stream_state, u32 * table) {
|
||||
assert(streamingTable);
|
||||
assert(stream_state);
|
||||
assert(table);
|
||||
|
||||
u8 ss_bytes = streamingTable->streamStateBytes;
|
||||
u8 ssb = streamingTable->streamStateBits[CASEFUL];
|
||||
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
|
||||
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
|
||||
|
||||
#if defined(ARCH_32_BIT)
|
||||
// On 32-bit hosts, we may be able to avoid having to do any u64a
|
||||
// manipulation at all.
|
||||
if (ss_bytes <= 4) {
|
||||
u32 ssb_mask = (1U << ssb) - 1;
|
||||
u32 streamVal = partial_load_u32(stream_state, ss_bytes);
|
||||
table[CASEFUL] = (u32)(streamVal & ssb_mask);
|
||||
table[CASELESS] = (u32)(streamVal >> ssb);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
u64a ssb_mask = (1ULL << ssb) - 1;
|
||||
u64a streamVal = partial_load_u64a(stream_state, ss_bytes);
|
||||
table[CASEFUL] = (u32)(streamVal & ssb_mask);
|
||||
table[CASELESS] = (u32)(streamVal >> (u64a)ssb);
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Defensive checking (used in assert) that these table values don't overflow
|
||||
// outside the range available.
|
||||
static really_inline UNUSED
|
||||
u32 streamingTableOverflow(u32 * table, u8 ssb, u8 ssb_nc) {
|
||||
u32 ssb_mask = (1ULL << (ssb)) - 1;
|
||||
if (table[CASEFUL] & ~ssb_mask) {
|
||||
return 1;
|
||||
}
|
||||
u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
|
||||
if (table[CASELESS] & ~ssb_nc_mask) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Reads from stream state table and packs values into stream state.
|
||||
static really_inline
|
||||
void setStreamStates(const struct FDRSTableHeader * streamingTable,
|
||||
u8 * stream_state, u32 * table) {
|
||||
assert(streamingTable);
|
||||
assert(stream_state);
|
||||
assert(table);
|
||||
|
||||
u8 ss_bytes = streamingTable->streamStateBytes;
|
||||
u8 ssb = streamingTable->streamStateBits[CASEFUL];
|
||||
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
|
||||
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
|
||||
assert(!streamingTableOverflow(table, ssb, ssb_nc));
|
||||
|
||||
#if defined(ARCH_32_BIT)
|
||||
// On 32-bit hosts, we may be able to avoid having to do any u64a
|
||||
// manipulation at all.
|
||||
if (ss_bytes <= 4) {
|
||||
u32 stagingStreamState = table[CASEFUL];
|
||||
stagingStreamState |= (table[CASELESS] << ssb);
|
||||
|
||||
partial_store_u32(stream_state, stagingStreamState, ss_bytes);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
u64a stagingStreamState = (u64a)table[CASEFUL];
|
||||
stagingStreamState |= (u64a)table[CASELESS] << ((u64a)ssb);
|
||||
partial_store_u64a(stream_state, stagingStreamState, ss_bytes);
|
||||
}
|
||||
|
||||
u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) {
|
||||
if (!stream_state) {
|
||||
return 0;
|
||||
}
|
||||
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
|
||||
u8 ss_bytes = streamingTable->streamStateBytes;
|
||||
|
||||
// We just care if there are any bits set, and the test below is faster
|
||||
// than a partial_load_u64a (especially on 32-bit hosts).
|
||||
for (u32 i = 0; i < ss_bytes; i++) {
|
||||
if (*stream_state) {
|
||||
return 1;
|
||||
}
|
||||
++stream_state;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// binary search for the literal index that contains the current state
|
||||
static really_inline
|
||||
u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable,
|
||||
u32 stateValue, MODES m) {
|
||||
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
||||
u32 lo = get_start_lit_idx(streamingTable, m);
|
||||
u32 hi = get_end_lit_idx(streamingTable, m);
|
||||
|
||||
// Now move stateValue back by one so that we're looking for the
|
||||
// litTab entry that includes it the string, not the one 'one past' it
|
||||
stateValue -= 1;
|
||||
assert(lo != hi);
|
||||
assert(litTab[lo].offset <= stateValue);
|
||||
assert(litTab[hi].offset > stateValue);
|
||||
|
||||
// binary search to find the entry e such that:
|
||||
// litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral
|
||||
while (lo + 1 < hi) {
|
||||
u32 mid = (lo + hi) / 2;
|
||||
if (litTab[mid].offset <= stateValue) {
|
||||
lo = mid;
|
||||
} else { //(litTab[mid].offset > stateValue) {
|
||||
hi = mid;
|
||||
}
|
||||
}
|
||||
assert(litTab[lo].offset <= stateValue);
|
||||
assert(litTab[hi].offset > stateValue);
|
||||
return lo;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrUnpackStateMode(struct FDR_Runtime_Args *a,
|
||||
const struct FDRSTableHeader *streamingTable,
|
||||
const struct FDRSLiteral * litTab,
|
||||
const u32 *state_table,
|
||||
const MODES m) {
|
||||
if (!state_table[m]) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 stateValue = unpackStateVal(streamingTable, m, state_table[m]);
|
||||
u32 idx = findLitTabEntry(streamingTable, stateValue, m);
|
||||
size_t found_offset = litTab[idx].offset;
|
||||
const u8 * found_buf = found_offset + (const u8 *)streamingTable;
|
||||
size_t found_sz = stateValue - found_offset;
|
||||
if (m == CASEFUL) {
|
||||
a->buf_history = found_buf;
|
||||
a->len_history = found_sz;
|
||||
} else {
|
||||
a->buf_history_nocase = found_buf;
|
||||
a->len_history_nocase = found_sz;
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a,
|
||||
const u8 * stream_state) {
|
||||
// nothing to do if there's no stream state for the case
|
||||
if (!stream_state) {
|
||||
return;
|
||||
}
|
||||
|
||||
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
|
||||
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
||||
|
||||
u32 state_table[MAX_MODES];
|
||||
getStreamStates(streamingTable, stream_state, state_table);
|
||||
|
||||
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASEFUL);
|
||||
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASELESS);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 do_single_confirm(const struct FDRSTableHeader * streamingTable,
|
||||
const struct FDR_Runtime_Args * a, u32 hashState, MODES m) {
|
||||
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
||||
u32 idx = findLitTabEntry(streamingTable, hashState, m);
|
||||
size_t found_offset = litTab[idx].offset;
|
||||
const u8 * s1 = found_offset + (const u8 *)streamingTable;
|
||||
assert(hashState > found_offset);
|
||||
size_t l1 = hashState - found_offset;
|
||||
const u8 * buf = a->buf;
|
||||
size_t len = a->len;
|
||||
const char nocase = m != CASEFUL;
|
||||
|
||||
if (l1 > len) {
|
||||
const u8 * hist = nocase ? a->buf_history_nocase : a->buf_history;
|
||||
size_t hist_len = nocase ? a->len_history_nocase : a->len_history;
|
||||
|
||||
if (l1 > len+hist_len) {
|
||||
return 0; // Break out - not enough total history
|
||||
}
|
||||
|
||||
size_t overhang = l1 - len;
|
||||
assert(overhang <= hist_len);
|
||||
|
||||
if (cmpForward(hist + hist_len - overhang, s1, overhang, nocase)) {
|
||||
return 0;
|
||||
}
|
||||
s1 += overhang;
|
||||
l1 -= overhang;
|
||||
}
|
||||
// if we got here, we don't need history or we compared ok out of history
|
||||
assert(l1 <= len);
|
||||
|
||||
if (cmpForward(buf + len - l1, s1, l1, nocase)) {
|
||||
return 0;
|
||||
}
|
||||
return hashState; // our new state
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrFindStreamingHash(const struct FDR_Runtime_Args *a,
|
||||
const struct FDRSTableHeader *streamingTable,
|
||||
u8 hash_len, u32 *hashes) {
|
||||
u8 tempbuf[128];
|
||||
const u8 *base;
|
||||
if (hash_len > a->len) {
|
||||
assert(hash_len <= 128);
|
||||
size_t overhang = hash_len - a->len;
|
||||
assert(overhang <= a->len_history);
|
||||
memcpy(tempbuf, a->buf_history + a->len_history - overhang, overhang);
|
||||
memcpy(tempbuf + overhang, a->buf, a->len);
|
||||
base = tempbuf;
|
||||
} else {
|
||||
assert(hash_len <= a->len);
|
||||
base = a->buf + a->len - hash_len;
|
||||
}
|
||||
|
||||
if (streamingTable->hashNBits[CASEFUL]) {
|
||||
hashes[CASEFUL] = streaming_hash(base, hash_len, CASEFUL);
|
||||
}
|
||||
if (streamingTable->hashNBits[CASELESS]) {
|
||||
hashes[CASELESS] = streaming_hash(base, hash_len, CASELESS);
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
|
||||
u32 h, const MODES m) {
|
||||
u32 nbits = streamingTable->hashNBits[m];
|
||||
if (!nbits) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
u32 h_ent = h & ((1 << nbits) - 1);
|
||||
u32 h_low = (h >> nbits) & 63;
|
||||
|
||||
const struct FDRSHashEntry *tab =
|
||||
(const struct FDRSHashEntry *)((const u8 *)streamingTable
|
||||
+ streamingTable->hashOffset[m]);
|
||||
const struct FDRSHashEntry *ent = tab + h_ent;
|
||||
|
||||
if (!has_bit(ent, h_low)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ent;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a,
|
||||
const struct FDRSTableHeader *streamingTable,
|
||||
const struct FDRSHashEntry *ent, const MODES m) {
|
||||
assert(ent);
|
||||
assert(streamingTable->hashNBits[m]);
|
||||
|
||||
const struct FDRSHashEntry *tab =
|
||||
(const struct FDRSHashEntry *)((const u8 *)streamingTable
|
||||
+ streamingTable->hashOffset[m]);
|
||||
|
||||
while (1) {
|
||||
u32 tmp = 0;
|
||||
if ((tmp = do_single_confirm(streamingTable, a, ent->state, m))) {
|
||||
state_table[m] = packStateVal(streamingTable, m, tmp);
|
||||
break;
|
||||
}
|
||||
if (ent->link == LINK_INVALID) {
|
||||
break;
|
||||
}
|
||||
ent = tab + ent->link;
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrPackState(const struct FDR *fdr, const struct FDR_Runtime_Args *a,
|
||||
u8 *stream_state) {
|
||||
// nothing to do if there's no stream state for the case
|
||||
if (!stream_state) {
|
||||
return;
|
||||
}
|
||||
|
||||
// get pointers to the streamer FDR and the tertiary structure
|
||||
const struct FDRSTableHeader *streamingTable = getSHDR(fdr);
|
||||
|
||||
assert(streamingTable->N);
|
||||
|
||||
u32 state_table[MAX_MODES] = {0, 0};
|
||||
|
||||
// if we don't have enough history, we don't need to do anything
|
||||
if (streamingTable->N <= a->len + a->len_history) {
|
||||
u32 hashes[MAX_MODES] = {0, 0};
|
||||
|
||||
fdrFindStreamingHash(a, streamingTable, streamingTable->N, hashes);
|
||||
|
||||
const struct FDRSHashEntry *ent_ful = getEnt(streamingTable,
|
||||
hashes[CASEFUL], CASEFUL);
|
||||
const struct FDRSHashEntry *ent_less = getEnt(streamingTable,
|
||||
hashes[CASELESS], CASELESS);
|
||||
|
||||
if (ent_ful) {
|
||||
fdrPackStateMode(state_table, a, streamingTable, ent_ful,
|
||||
CASEFUL);
|
||||
}
|
||||
|
||||
if (ent_less) {
|
||||
fdrPackStateMode(state_table, a, streamingTable, ent_less,
|
||||
CASELESS);
|
||||
}
|
||||
}
|
||||
|
||||
setStreamStates(streamingTable, stream_state, state_table);
|
||||
}
|
||||
|
||||
#endif
|
222
src/fdr/flood_compile.cpp
Normal file
222
src/fdr/flood_compile.cpp
Normal file
@ -0,0 +1,222 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
struct FloodComparator {
|
||||
bool operator()(const FDRFlood &f1, const FDRFlood &f2) const {
|
||||
return std::memcmp(&f1, &f2, sizeof(f1)) < 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
bool isDifferent(u8 oldC, u8 c, bool caseless) {
|
||||
if (caseless) {
|
||||
return mytolower(oldC) != mytolower(c);
|
||||
} else {
|
||||
return oldC != c;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) {
|
||||
FDRFlood &fl = tmpFlood[c];
|
||||
fl.suffix = MAX(fl.suffix, suffix + 1);
|
||||
DEBUG_PRINTF("Updated Flood Suffix for char '%c' to %u\n", c, fl.suffix);
|
||||
}
|
||||
|
||||
static
|
||||
void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
|
||||
u32 suffix) {
|
||||
FDRFlood &fl = tmpFlood[c];
|
||||
fl.suffix = MAX(fl.suffix, suffix + 1);
|
||||
if (fl.idCount < FDR_FLOOD_MAX_IDS) {
|
||||
fl.ids[fl.idCount] = lit.id;
|
||||
fl.allGroups |= lit.groups;
|
||||
fl.groups[fl.idCount] = lit.groups;
|
||||
fl.len[fl.idCount] = suffix;
|
||||
// when idCount gets to max_ids this flood no longer happens
|
||||
// only incremented one more time to avoid arithmetic overflow
|
||||
DEBUG_PRINTF("Added Flood for char '%c' suffix=%u len[%hu]=%u\n",
|
||||
c, fl.suffix, fl.idCount, suffix);
|
||||
fl.idCount++;
|
||||
}
|
||||
}
|
||||
|
||||
pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng) {
|
||||
vector<FDRFlood> tmpFlood(N_CHARS);
|
||||
u32 default_suffix = eng.getDefaultFloodSuffixLength();
|
||||
|
||||
// zero everything to avoid spurious distinctions in the compares
|
||||
memset(&tmpFlood[0], 0, N_CHARS * sizeof(FDRFlood));
|
||||
|
||||
for (u32 c = 0; c < N_CHARS; c++) {
|
||||
tmpFlood[c].suffix = default_suffix;
|
||||
}
|
||||
|
||||
for (const auto &lit : lits) {
|
||||
DEBUG_PRINTF("lit: '%s'%s\n", escapeString(lit.s).c_str(),
|
||||
lit.nocase ? " (nocase)" : "");
|
||||
u32 litSize = verify_u32(lit.s.size());
|
||||
u32 maskSize = (u32)lit.msk.size();
|
||||
u8 c = lit.s[litSize - 1];
|
||||
bool nocase = ourisalpha(c) ? lit.nocase : false;
|
||||
|
||||
if (nocase && maskSize && (lit.msk[maskSize - 1] & CASE_BIT)) {
|
||||
c = (lit.cmp[maskSize - 1] & CASE_BIT) ? mytolower(c) : mytoupper(c);
|
||||
nocase = false;
|
||||
}
|
||||
|
||||
u32 iEnd = MAX(litSize, maskSize);
|
||||
u32 upSuffix = iEnd; // upSuffix is used as an upper case suffix length
|
||||
// for case-less, or as a suffix length for case-sensitive;
|
||||
u32 loSuffix = iEnd; // loSuffix used only for case-less as a lower case suffix
|
||||
// length;
|
||||
|
||||
for (u32 i = 0; i < iEnd; i++) {
|
||||
if (i < litSize) {
|
||||
if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) {
|
||||
DEBUG_PRINTF("non-flood char in literal[%u] %c != %c\n",
|
||||
i, c, lit.s[litSize - i - 1]);
|
||||
upSuffix = MIN(upSuffix, i);
|
||||
loSuffix = MIN(loSuffix, i); // makes sense only for case-less
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i < maskSize) {
|
||||
u8 m = lit.msk[maskSize - i - 1];
|
||||
u8 cm = lit.cmp[maskSize - i - 1] & m;
|
||||
if(nocase) {
|
||||
if ((mytoupper(c) & m) != cm) {
|
||||
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
|
||||
i, mytoupper(c), cm);
|
||||
upSuffix = MIN(upSuffix, i);
|
||||
}
|
||||
if ((mytolower(c) & m) != cm) {
|
||||
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
|
||||
i, mytolower(c), cm);
|
||||
loSuffix = MIN(loSuffix, i);
|
||||
}
|
||||
if (loSuffix != iEnd && upSuffix != iEnd) {
|
||||
break;
|
||||
}
|
||||
} else if ((c & m) != cm) {
|
||||
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n", i, c, cm);
|
||||
upSuffix = MIN(upSuffix, i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(upSuffix != iEnd) {
|
||||
updateFloodSuffix(tmpFlood, nocase ? mytoupper(c) : c, upSuffix);
|
||||
} else {
|
||||
addFlood(tmpFlood, nocase ? mytoupper(c) : c, lit, upSuffix);
|
||||
}
|
||||
if (nocase) {
|
||||
if(loSuffix != iEnd) {
|
||||
updateFloodSuffix(tmpFlood, mytolower(c), loSuffix);
|
||||
} else {
|
||||
addFlood(tmpFlood, mytolower(c), lit, loSuffix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
FDRFlood &fl = tmpFlood[i];
|
||||
if (!fl.idCount) {
|
||||
continue;
|
||||
}
|
||||
|
||||
printf("i is %02x fl->idCount is %hd fl->suffix is %d fl->allGroups is "
|
||||
"%016llx\n", i, fl.idCount, fl.suffix, fl.allGroups);
|
||||
for (u32 j = 0; j < fl.idCount; j++) {
|
||||
printf("j is %d fl.groups[j] %016llx fl.len[j] %d \n", j,
|
||||
fl.groups[j], fl.len[j]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
map<FDRFlood, CharReach, FloodComparator> flood2chars;
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
FDRFlood fl = tmpFlood[i];
|
||||
flood2chars[fl].set(i);
|
||||
}
|
||||
|
||||
u32 nDistinctFloods = flood2chars.size();
|
||||
size_t floodHeaderSize = sizeof(u32) * N_CHARS;
|
||||
size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
|
||||
size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
|
||||
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
|
||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
u32 *floodHeader = (u32 *)buf;
|
||||
FDRFlood *layoutFlood = (FDRFlood * )(buf + floodHeaderSize);
|
||||
|
||||
u32 currentFloodIndex = 0;
|
||||
for (const auto &m : flood2chars) {
|
||||
const FDRFlood &fl = m.first;
|
||||
const CharReach &cr = m.second;
|
||||
layoutFlood[currentFloodIndex] = fl;
|
||||
for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
|
||||
floodHeader[c] = currentFloodIndex;
|
||||
}
|
||||
currentFloodIndex++;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
|
||||
floodHeaderSize, floodStructSize, totalSize);
|
||||
|
||||
return make_pair((u8 *)buf, totalSize);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
347
src/fdr/flood_runtime.h
Normal file
347
src/fdr/flood_runtime.h
Normal file
@ -0,0 +1,347 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FLOOD_RUNTIME
|
||||
#define FLOOD_RUNTIME
|
||||
|
||||
#if defined(ARCH_64_BIT)
|
||||
#define FLOOD_64
|
||||
#else
|
||||
#define FLOOD_32
|
||||
#endif
|
||||
#define FLOOD_MINIMUM_SIZE 256
|
||||
#define FLOOD_BACKOFF_START 32
|
||||
|
||||
static really_inline
|
||||
const u8 * nextFloodDetect(const u8 * buf, size_t len, u32 floodBackoff) {
|
||||
// if we don't have a flood at either the start or end,
|
||||
// or have a very small buffer, don't bother with flood detection
|
||||
if (len < FLOOD_MINIMUM_SIZE) {
|
||||
return buf + len;
|
||||
}
|
||||
|
||||
/* entry points in runtime.c prefetch relevant data */
|
||||
#ifndef FLOOD_32
|
||||
u64a x11 = *(const u64a *)ROUNDUP_PTR(buf, 8);
|
||||
u64a x12 = *(const u64a *)ROUNDUP_PTR(buf+8, 8);
|
||||
if (x11 == x12) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
u64a x21 = *(const u64a *)ROUNDUP_PTR(buf + len/2, 8);
|
||||
u64a x22 = *(const u64a *)ROUNDUP_PTR(buf + len/2 + 8, 8);
|
||||
if (x21 == x22) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
u64a x31 = *(const u64a *)ROUNDUP_PTR(buf + len - 24, 8);
|
||||
u64a x32 = *(const u64a *)ROUNDUP_PTR(buf + len - 16, 8);
|
||||
if (x31 == x32) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
#else
|
||||
u32 x11 = *(const u32 *)ROUNDUP_PTR(buf, 4);
|
||||
u32 x12 = *(const u32 *)ROUNDUP_PTR(buf+4, 4);
|
||||
if (x11 == x12) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
u32 x21 = *(const u32 *)ROUNDUP_PTR(buf + len/2, 4);
|
||||
u32 x22 = *(const u32 *)ROUNDUP_PTR(buf + len/2 + 4, 4);
|
||||
if (x21 == x22) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
u32 x31 = *(const u32 *)ROUNDUP_PTR(buf + len - 12, 4);
|
||||
u32 x32 = *(const u32 *)ROUNDUP_PTR(buf + len - 8, 4);
|
||||
if (x31 == x32) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
#endif
|
||||
return buf + len;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 * floodDetect(const struct FDR * fdr,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
const u8 ** ptrPtr,
|
||||
const u8 * tryFloodDetect,
|
||||
u32 * floodBackoffPtr,
|
||||
hwlmcb_rv_t * control,
|
||||
u32 iterBytes) {
|
||||
DEBUG_PRINTF("attempting flood detection at %p\n", tryFloodDetect);
|
||||
const u8 * buf = a->buf;
|
||||
const size_t len = a->len;
|
||||
HWLMCallback cb = a->cb;
|
||||
void * ctxt = a->ctxt;
|
||||
|
||||
const u8 * ptr = *ptrPtr;
|
||||
// tryFloodDetect is never put in places where unconditional
|
||||
// reads a short distance forward or backward here
|
||||
// TODO: rationale for this line needs to be rediscovered!!
|
||||
size_t mainLoopLen = len > iterBytes ? len - iterBytes : 0;
|
||||
const u32 i = ptr - buf;
|
||||
u32 j = i;
|
||||
|
||||
// go from c to our FDRFlood structure
|
||||
u8 c = buf[i];
|
||||
const u8 * fBase = ((const u8 *)fdr) + fdr->floodOffset;
|
||||
u32 fIdx = ((const u32 *)fBase)[c];
|
||||
const struct FDRFlood * fsb = (const struct FDRFlood *)(fBase + sizeof(u32) * 256);
|
||||
const struct FDRFlood * fl = &fsb[fIdx];
|
||||
|
||||
#ifndef FLOOD_32
|
||||
u64a cmpVal = c;
|
||||
cmpVal |= cmpVal << 8;
|
||||
cmpVal |= cmpVal << 16;
|
||||
cmpVal |= cmpVal << 32;
|
||||
u64a probe = *(const u64a *)ROUNDUP_PTR(buf+i, 8);
|
||||
#else
|
||||
u32 cmpVal = c;
|
||||
cmpVal |= cmpVal << 8;
|
||||
cmpVal |= cmpVal << 16;
|
||||
u32 probe = *(const u32 *)ROUNDUP_PTR(buf+i, 4);
|
||||
#endif
|
||||
|
||||
if ((probe != cmpVal) || (fl->idCount >= FDR_FLOOD_MAX_IDS)) {
|
||||
*floodBackoffPtr *= 2;
|
||||
goto floodout;
|
||||
}
|
||||
|
||||
if (i < fl->suffix + 7) {
|
||||
*floodBackoffPtr *= 2;
|
||||
goto floodout;
|
||||
}
|
||||
|
||||
j = i - fl->suffix;
|
||||
|
||||
#ifndef FLOOD_32
|
||||
j -= (u32)((uintptr_t)buf + j) & 0x7; // push j back to yield 8-aligned addrs
|
||||
for (; j + 32 < mainLoopLen; j += 32) {
|
||||
u64a v = *(const u64a *)(buf + j);
|
||||
u64a v2 = *(const u64a *)(buf + j + 8);
|
||||
u64a v3 = *(const u64a *)(buf + j + 16);
|
||||
u64a v4 = *(const u64a *)(buf + j + 24);
|
||||
if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (; j + 8 < mainLoopLen; j += 8) {
|
||||
u64a v = *(const u64a *)(buf + j);
|
||||
if (v != cmpVal) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
j -= (u32)((size_t)buf + j) & 0x3; // push j back to yield 4-aligned addrs
|
||||
for (; j + 16 < mainLoopLen; j += 16) {
|
||||
u32 v = *(const u32 *)(buf + j);
|
||||
u32 v2 = *(const u32 *)(buf + j + 4);
|
||||
u32 v3 = *(const u32 *)(buf + j + 8);
|
||||
u32 v4 = *(const u32 *)(buf + j + 12);
|
||||
if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (; j + 4 < mainLoopLen; j += 4) {
|
||||
u32 v = *(const u32 *)(buf + j);
|
||||
if (v != cmpVal) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (; j < mainLoopLen; j++) {
|
||||
u8 v = *(const u8 *)(buf + j);
|
||||
if (v != c) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j > i ) {
|
||||
j--; // needed for some reaches
|
||||
u32 itersAhead = (j-i)/iterBytes;
|
||||
u32 floodSize = itersAhead*iterBytes;
|
||||
|
||||
DEBUG_PRINTF("flooding %u size j %u i %u fl->idCount %hu "
|
||||
"*control %016llx fl->allGroups %016llx\n",
|
||||
floodSize, j, i, fl->idCount, *control, fl->allGroups);
|
||||
DEBUG_PRINTF("mainloopLen %zu mainStart ??? mainEnd ??? len %zu\n",
|
||||
mainLoopLen, len);
|
||||
|
||||
if (fl->idCount && (*control & fl->allGroups)) {
|
||||
switch (fl->idCount) {
|
||||
#if !defined(FLOOD_DEBUG)
|
||||
// Carefully unrolled code
|
||||
case 1:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups);
|
||||
t += 4) {
|
||||
DEBUG_PRINTF("aaa %u %llx\n", t, fl->groups[0]);
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 0 - len0, i + t + 0, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) {
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
u32 len1 = fl->len[1] - 1;
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control =
|
||||
cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 2 - len1, i + t + 2, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 3 - len1, i + t + 3, fl->ids[1], ctxt);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
u32 len1 = fl->len[1] - 1;
|
||||
u32 len2 = fl->len[2] - 1;
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// slow generalized loop
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
u32 len1 = fl->len[1] - 1;
|
||||
u32 len2 = fl->len[2] - 1;
|
||||
u32 len3 = fl->len[3] - 1;
|
||||
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[3]) {
|
||||
*control = cb(i + t - len3, i + t, fl->ids[3], ctxt);
|
||||
}
|
||||
|
||||
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
|
||||
if (*control & fl->groups[t2]) {
|
||||
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[3]) {
|
||||
*control = cb(i + t + 1 - len3, i + t + 1, fl->ids[3], ctxt);
|
||||
}
|
||||
|
||||
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
|
||||
if (*control & fl->groups[t2]) {
|
||||
*control = cb(i + t + 1 - (fl->len[t2] - 1), i + t + 1, fl->ids[t2], ctxt);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
#else
|
||||
// Fallback for debugging
|
||||
default:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) {
|
||||
for (u32 t2 = 0; t2 < fl->idCount; t2++) {
|
||||
if (*control & fl->groups[t2]) {
|
||||
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
ptr += floodSize;
|
||||
} else {
|
||||
*floodBackoffPtr *= 2;
|
||||
}
|
||||
|
||||
floodout:
|
||||
if (j + *floodBackoffPtr < mainLoopLen - 128) {
|
||||
tryFloodDetect = buf + MAX(i,j) + *floodBackoffPtr;
|
||||
} else {
|
||||
tryFloodDetect = buf + mainLoopLen; // set so we never do another flood detect
|
||||
}
|
||||
*ptrPtr = ptr;
|
||||
DEBUG_PRINTF("finished flood detection at %p (next check %p)\n",
|
||||
ptr, tryFloodDetect);
|
||||
return tryFloodDetect;
|
||||
}
|
||||
|
||||
#endif
|
244
src/fdr/teddy.c
Normal file
244
src/fdr/teddy.c
Normal file
@ -0,0 +1,244 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
static const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
||||
};
|
||||
|
||||
// Note: p_mask is an output param that initialises a poison mask.
|
||||
UNUSED static really_inline
|
||||
m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history,
|
||||
const u32 nMasks) {
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
u.val128 = zeroes128();
|
||||
|
||||
if (ptr >= lo) {
|
||||
u32 avail = (u32)(hi - ptr);
|
||||
if (avail >= 16) {
|
||||
*p_mask = load128((const void*)(p_mask_arr[16] + 16));
|
||||
return loadu128(ptr);
|
||||
}
|
||||
*p_mask = load128((const void*)(p_mask_arr[avail] + 16));
|
||||
for (u32 i = 0; i < avail; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
} else {
|
||||
u32 need = MIN((u32)(lo - ptr), MIN(len_history, nMasks - 1));
|
||||
u32 start = (u32)(lo - ptr);
|
||||
u32 i;
|
||||
for (i = start - need; ptr + i < lo; i++) {
|
||||
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
|
||||
}
|
||||
u32 end = MIN(16, (u32)(hi - ptr));
|
||||
*p_mask = loadu128((const void*)(p_mask_arr[end - start] + 16 - start));
|
||||
for (; i < end; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
}
|
||||
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
UNUSED static really_inline
|
||||
m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history,
|
||||
const u32 nMasks) {
|
||||
m128 p_mask128;
|
||||
m256 ret = set2x128(vectoredLoad128(&p_mask128, ptr, lo, hi, buf_history, len_history, nMasks));
|
||||
*p_mask = set2x128(p_mask128);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = {
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
||||
};
|
||||
|
||||
|
||||
UNUSED static really_inline
|
||||
m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history) {
|
||||
union {
|
||||
u8 val8[32];
|
||||
m256 val256;
|
||||
} u;
|
||||
|
||||
if (ptr >= lo) {
|
||||
u32 avail = (u32)(hi - ptr);
|
||||
if (avail >= 32) {
|
||||
*p_mask = load256((const void*)(p_mask_arr256[32] + 32));
|
||||
return loadu256(ptr);
|
||||
}
|
||||
*p_mask = load256((const void*)(p_mask_arr256[avail] + 32));
|
||||
for (u32 i = 0; i < avail; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
} else {
|
||||
// need contains "how many chars to pull from history"
|
||||
// calculate based on what we need, what we have in the buffer
|
||||
// and only what we need to make primary confirm work
|
||||
u32 start = (u32)(lo - ptr);
|
||||
u32 i;
|
||||
for (i = start; ptr + i < lo; i++) {
|
||||
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
|
||||
}
|
||||
u32 end = MIN(32, (u32)(hi - ptr));
|
||||
*p_mask = loadu256((const void*)(p_mask_arr256[end - start] + 32 - start));
|
||||
for (; i < end; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
}
|
||||
|
||||
return u.val256;
|
||||
}
|
||||
|
||||
|
||||
#endif // __AVX2__
|
||||
|
||||
#define P0(cnd) unlikely(cnd)
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "flood_runtime.h"
|
||||
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_confirm_runtime.h"
|
||||
|
||||
#include "fdr_loadval.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "teddy_internal.h"
|
||||
|
||||
#include "teddy_autogen.c"
|
545
src/fdr/teddy_autogen.py
Executable file
545
src/fdr/teddy_autogen.py
Executable file
@ -0,0 +1,545 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from base_autogen import *
|
||||
from string import Template
|
||||
|
||||
class MT(MatcherBase):
|
||||
def produce_confirm(self, iter, var_name, offset, bits, cautious = True):
|
||||
if self.packed:
|
||||
print self.produce_confirm_base(var_name, bits, iter*16 + offset, cautious, enable_confirmless = False, do_bailout = False)
|
||||
else:
|
||||
if self.num_masks == 1:
|
||||
conf_func = "confWithBit1"
|
||||
else:
|
||||
conf_func = "confWithBitMany"
|
||||
|
||||
if cautious:
|
||||
caution_string = "VECTORING"
|
||||
else:
|
||||
caution_string = "NOT_CAUTIOUS"
|
||||
|
||||
print " if (P0(!!%s)) {" % var_name
|
||||
print " do {"
|
||||
if bits == 64:
|
||||
print " bit = findAndClearLSB_64(&%s);" % (var_name)
|
||||
else:
|
||||
print " bit = findAndClearLSB_32(&%s);" % (var_name)
|
||||
print " byte = bit / %d + %d;" % (self.num_buckets, iter*16 + offset)
|
||||
print " idx = bit %% %d;" % self.num_buckets
|
||||
print " cf = confBase[idx];"
|
||||
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
|
||||
print " if (!(fdrc->groups & *control))"
|
||||
print " continue;"
|
||||
print " %s(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % (conf_func, caution_string)
|
||||
print " } while(P0(!!%s));" % var_name
|
||||
print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {"
|
||||
print " *a->groups = controlVal;"
|
||||
print " return HWLM_TERMINATED;"
|
||||
print " }"
|
||||
print " }"
|
||||
|
||||
def produce_needed_temporaries(self, max_iterations):
|
||||
print " m128 p_mask;"
|
||||
for iter in range(0, max_iterations):
|
||||
print " m128 val_%d;" % iter
|
||||
print " m128 val_%d_lo;" % iter
|
||||
print " m128 val_%d_hi;" % iter
|
||||
for x in range(self.num_masks):
|
||||
print " m128 res_%d_%d;" % (iter, x)
|
||||
if x != 0:
|
||||
print " m128 res_shifted_%d_%d;" % (iter, x)
|
||||
print " m128 r_%d;" % iter
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " u64a r_%d_lopart;" % iter
|
||||
print " u64a r_%d_hipart;" % iter
|
||||
print "#else"
|
||||
print " u32 r_%d_part1;" % iter
|
||||
print " u32 r_%d_part2;" % iter
|
||||
print " u32 r_%d_part3;" % iter
|
||||
print " u32 r_%d_part4;" % iter
|
||||
print "#endif"
|
||||
|
||||
def produce_one_iteration_state_calc(self, iter, effective_num_iterations,
|
||||
cautious, save_old):
|
||||
if cautious:
|
||||
print " val_%d = vectoredLoad128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks)
|
||||
else:
|
||||
print " val_%d = load128(ptr + %d);" % (iter, iter*16)
|
||||
print " val_%d_lo = and128(val_%d, lomask);" % (iter, iter)
|
||||
print " val_%d_hi = rshift2x64(val_%d, 4);" % (iter, iter)
|
||||
print " val_%d_hi = and128(val_%d_hi, lomask);" % (iter, iter)
|
||||
print
|
||||
for x in range(self.num_masks):
|
||||
print Template("""
|
||||
res_${ITER}_${X} = and128(pshufb(maskBase[${X}*2] , val_${ITER}_lo),
|
||||
pshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x)
|
||||
if x != 0:
|
||||
if iter == 0:
|
||||
print " res_shifted_%d_%d = palignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x)
|
||||
else:
|
||||
print " res_shifted_%d_%d = palignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x)
|
||||
if x != 0 and iter == effective_num_iterations - 1 and save_old:
|
||||
print " res_old_%d = res_%d_%d;" % (x, iter, x)
|
||||
print
|
||||
if cautious:
|
||||
print " r_%d = and128(res_%d_0, p_mask);" % (iter, iter)
|
||||
else:
|
||||
print " r_%d = res_%d_0;" % (iter, iter)
|
||||
for x in range(1, self.num_masks):
|
||||
print " r_%d = and128(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x)
|
||||
print
|
||||
|
||||
def produce_one_iteration_confirm(self, iter, confirmCautious):
|
||||
setup64 = [ (0, "r_%d_lopart" % iter, "movq(r_%d)" % iter),
|
||||
(8, "r_%d_hipart" % iter, "movq(byteShiftRight128(r_%d, 8))" % iter) ]
|
||||
|
||||
setup32 = [ (0, "r_%d_part1" % iter, "movd(r_%d)" % iter),
|
||||
(4, "r_%d_part2" % iter, "movd(byteShiftRight128(r_%d, 4))" % iter),
|
||||
(8, "r_%d_part3" % iter, "movd(byteShiftRight128(r_%d, 8))" % iter),
|
||||
(12, "r_%d_part4" % iter, "movd(byteShiftRight128(r_%d, 12))" % iter) ]
|
||||
|
||||
print " if (P0(isnonzero128(r_%d))) {" % (iter)
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
for (off, val, init) in setup64:
|
||||
print " %s = %s;" % (val, init)
|
||||
for (off, val, init) in setup64:
|
||||
self.produce_confirm(iter, val, off, 64, cautious = confirmCautious)
|
||||
print "#else"
|
||||
for (off, val, init) in setup32:
|
||||
print " %s = %s;" % (val, init)
|
||||
for (off, val, init) in setup32:
|
||||
self.produce_confirm(iter, val, off, 32, cautious = confirmCautious)
|
||||
print "#endif"
|
||||
print " }"
|
||||
|
||||
def produce_one_iteration(self, iter, effective_num_iterations, cautious = False,
|
||||
confirmCautious = True, save_old = True):
|
||||
self.produce_one_iteration_state_calc(iter, effective_num_iterations, cautious, save_old)
|
||||
self.produce_one_iteration_confirm(iter, confirmCautious)
|
||||
|
||||
def produce_code(self):
|
||||
print self.produce_header(visible = True, header_only = False)
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
self.produce_needed_temporaries(self.num_iterations)
|
||||
print
|
||||
|
||||
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
|
||||
print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));"
|
||||
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32));" % self.num_masks
|
||||
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);"
|
||||
print " const size_t iterBytes = %d;" % (self.num_iterations * 16)
|
||||
|
||||
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
|
||||
' buf, len, a->start_offset);'
|
||||
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
|
||||
' mainStart);'
|
||||
|
||||
for x in range(self.num_masks):
|
||||
if (x != 0):
|
||||
print " m128 res_old_%d = ones128();" % x
|
||||
print " m128 lomask = set16x8(0xf);"
|
||||
|
||||
print " if (ptr < mainStart) {"
|
||||
print " ptr = mainStart - 16;"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " if (ptr + 16 < buf + len) {"
|
||||
self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print self.produce_flood_check()
|
||||
|
||||
for iter in range(self.num_iterations):
|
||||
self.produce_one_iteration(iter, self.num_iterations, cautious = False, confirmCautious = False)
|
||||
|
||||
print " }"
|
||||
|
||||
print " for (; ptr < buf + len; ptr += 16) {"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " }"
|
||||
|
||||
print self.produce_footer()
|
||||
|
||||
def produce_compile_call(self):
|
||||
packed_str = { False : "false", True : "true"}[self.packed]
|
||||
print " { %d, %s, %d, %d, %s, %d, %d }," % (
|
||||
self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str,
|
||||
self.conf_pull_back, self.conf_top_level_split)
|
||||
|
||||
def get_name(self):
|
||||
if self.packed:
|
||||
pck_string = "_pck"
|
||||
else:
|
||||
pck_string = ""
|
||||
|
||||
if self.num_buckets == 16:
|
||||
type_string = "_fat"
|
||||
else:
|
||||
type_string = ""
|
||||
|
||||
return "fdr_exec_teddy_%s_msks%d%s%s" % (self.arch.name, self.num_masks, pck_string, type_string)
|
||||
|
||||
def __init__(self, arch, packed = False, num_masks = 1, num_buckets = 8):
|
||||
self.arch = arch
|
||||
self.packed = packed
|
||||
self.num_masks = num_masks
|
||||
self.num_buckets = num_buckets
|
||||
self.num_iterations = 2
|
||||
|
||||
if packed:
|
||||
self.conf_top_level_split = 32
|
||||
else:
|
||||
self.conf_top_level_split = 1
|
||||
self.conf_pull_back = 0
|
||||
|
||||
class MTFat(MT):
|
||||
def produce_needed_temporaries(self, max_iterations):
|
||||
print " m256 p_mask;"
|
||||
for iter in range(0, max_iterations):
|
||||
print " m256 val_%d;" % iter
|
||||
print " m256 val_%d_lo;" % iter
|
||||
print " m256 val_%d_hi;" % iter
|
||||
for x in range(self.num_masks):
|
||||
print " m256 res_%d_%d;" % (iter, x)
|
||||
if x != 0:
|
||||
print " m256 res_shifted_%d_%d;" % (iter, x)
|
||||
print " m256 r_%d;" % iter
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " u64a r_%d_part1;" % iter
|
||||
print " u64a r_%d_part2;" % iter
|
||||
print " u64a r_%d_part3;" % iter
|
||||
print " u64a r_%d_part4;" % iter
|
||||
print "#else"
|
||||
print " u32 r_%d_part1;" % iter
|
||||
print " u32 r_%d_part2;" % iter
|
||||
print " u32 r_%d_part3;" % iter
|
||||
print " u32 r_%d_part4;" % iter
|
||||
print " u32 r_%d_part5;" % iter
|
||||
print " u32 r_%d_part6;" % iter
|
||||
print " u32 r_%d_part7;" % iter
|
||||
print " u32 r_%d_part8;" % iter
|
||||
print "#endif"
|
||||
|
||||
def produce_code(self):
|
||||
print self.produce_header(visible = True, header_only = False)
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
self.produce_needed_temporaries(self.num_iterations)
|
||||
print
|
||||
|
||||
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
|
||||
print " const m256 * maskBase = (const m256 *)((const u8 *)fdr + sizeof(struct Teddy));"
|
||||
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32*2));" % self.num_masks
|
||||
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);"
|
||||
print " const size_t iterBytes = %d;" % (self.num_iterations * 16)
|
||||
|
||||
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
|
||||
' buf, len, a->start_offset);'
|
||||
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
|
||||
' mainStart);'
|
||||
|
||||
for x in range(self.num_masks):
|
||||
if (x != 0):
|
||||
print " m256 res_old_%d = ones256();" % x
|
||||
print " m256 lomask = set32x8(0xf);"
|
||||
|
||||
print " if (ptr < mainStart) {"
|
||||
print " ptr = mainStart - 16;"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " if (ptr + 16 < buf + len) {"
|
||||
self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print self.produce_flood_check()
|
||||
|
||||
for iter in range(self.num_iterations):
|
||||
self.produce_one_iteration(iter, self.num_iterations, False, confirmCautious = False)
|
||||
|
||||
print " }"
|
||||
|
||||
print " for (; ptr < buf + len; ptr += 16) {"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " }"
|
||||
|
||||
print self.produce_footer()
|
||||
|
||||
def produce_one_iteration_state_calc(self, iter, effective_num_iterations,
|
||||
cautious, save_old):
|
||||
if cautious:
|
||||
print " val_%d = vectoredLoad2x128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks)
|
||||
else:
|
||||
print " val_%d = load2x128(ptr + %d);" % (iter, iter*16)
|
||||
print " val_%d_lo = and256(val_%d, lomask);" % (iter, iter)
|
||||
print " val_%d_hi = rshift4x64(val_%d, 4);" % (iter, iter)
|
||||
print " val_%d_hi = and256(val_%d_hi, lomask);" % (iter, iter)
|
||||
print
|
||||
for x in range(self.num_masks):
|
||||
print Template("""
|
||||
res_${ITER}_${X} = and256(vpshufb(maskBase[${X}*2] , val_${ITER}_lo),
|
||||
vpshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x)
|
||||
if x != 0:
|
||||
if iter == 0:
|
||||
print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x)
|
||||
else:
|
||||
print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x)
|
||||
if x != 0 and iter == effective_num_iterations - 1 and save_old:
|
||||
print " res_old_%d = res_%d_%d;" % (x, iter, x)
|
||||
print
|
||||
if cautious:
|
||||
print " r_%d = and256(res_%d_0, p_mask);" % (iter, iter)
|
||||
else:
|
||||
print " r_%d = res_%d_0;" % (iter, iter)
|
||||
for x in range(1, self.num_masks):
|
||||
print " r_%d = and256(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x)
|
||||
print
|
||||
|
||||
def produce_one_iteration_confirm(self, iter, confirmCautious):
|
||||
setup64 = [ (0, "r_%d_part1" % iter, "extractlow64from256(r)"),
|
||||
(4, "r_%d_part2" % iter, "extract64from256(r, 1);\n r = interleave256hi(r_%d, r_swap)" % (iter)),
|
||||
(8, "r_%d_part3" % iter, "extractlow64from256(r)"),
|
||||
(12, "r_%d_part4" % iter, "extract64from256(r, 1)") ]
|
||||
|
||||
setup32 = [ (0, "r_%d_part1" % iter, "extractlow32from256(r)"),
|
||||
(2, "r_%d_part2" % iter, "extract32from256(r, 1)"),
|
||||
(4, "r_%d_part3" % iter, "extract32from256(r, 2)"),
|
||||
(6, "r_%d_part4" % iter, "extract32from256(r, 3);\n r = interleave256hi(r_%d, r_swap)" % (iter)),
|
||||
(8, "r_%d_part5" % iter, "extractlow32from256(r)"),
|
||||
(10, "r_%d_part6" % iter, "extract32from256(r, 1)"),
|
||||
(12, "r_%d_part7" % iter, "extract32from256(r, 2)"),
|
||||
(14, "r_%d_part8" % iter, "extract32from256(r, 3)") ]
|
||||
|
||||
print " if (P0(isnonzero256(r_%d))) {" % (iter)
|
||||
print " m256 r_swap = swap128in256(r_%d);" % (iter)
|
||||
print " m256 r = interleave256lo(r_%d, r_swap);" % (iter)
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
for (off, val, init) in setup64:
|
||||
print " %s = %s;" % (val, init)
|
||||
|
||||
for (off, val, init) in setup64:
|
||||
self.produce_confirm(iter, val, off, 64, cautious = confirmCautious)
|
||||
print "#else"
|
||||
for (off, val, init) in setup32:
|
||||
print " %s = %s;" % (val, init)
|
||||
|
||||
for (off, val, init) in setup32:
|
||||
self.produce_confirm(iter, val, off, 32, cautious = confirmCautious)
|
||||
print "#endif"
|
||||
print " }"
|
||||
|
||||
class MTFast(MatcherBase):
|
||||
|
||||
def produce_confirm(self, cautious):
|
||||
if cautious:
|
||||
cautious_str = "VECTORING"
|
||||
else:
|
||||
cautious_str = "NOT_CAUTIOUS"
|
||||
|
||||
print " for (u32 i = 0; i < arrCnt; i++) {"
|
||||
print " byte = bitArr[i] / 8;"
|
||||
if self.packed:
|
||||
conf_split_mask = IntegerType(32).constant_to_string(
|
||||
self.conf_top_level_split - 1)
|
||||
print " bitRem = bitArr[i] % 8;"
|
||||
print " confSplit = *(ptr+byte) & 0x1f;"
|
||||
print " idx = confSplit * %d + bitRem;" % self.num_buckets
|
||||
print " cf = confBase[idx];"
|
||||
print " if (!cf)"
|
||||
print " continue;"
|
||||
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
|
||||
print " if (!(fdrc->groups & *control))"
|
||||
print " continue;"
|
||||
print " confWithBit(fdrc, a, ptr - buf + byte, %s, 0, control, &last_match);" % cautious_str
|
||||
else:
|
||||
print " cf = confBase[bitArr[i] % 8];"
|
||||
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
|
||||
print " confWithBit1(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % cautious_str
|
||||
print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {"
|
||||
print " *a->groups = controlVal;"
|
||||
print " return HWLM_TERMINATED;"
|
||||
print " }"
|
||||
print " }"
|
||||
|
||||
def produce_needed_temporaries(self, max_iterations):
|
||||
print " u32 arrCnt;"
|
||||
print " u16 bitArr[512];"
|
||||
print " m256 p_mask;"
|
||||
print " m256 val_0;"
|
||||
print " m256 val_0_lo;"
|
||||
print " m256 val_0_hi;"
|
||||
print " m256 res_0;"
|
||||
print " m256 res_1;"
|
||||
print " m128 lo_part;"
|
||||
print " m128 hi_part;"
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " u64a r_0_part;"
|
||||
print "#else"
|
||||
print " u32 r_0_part;"
|
||||
print "#endif"
|
||||
|
||||
def produce_bit_scan(self, offset, bits):
|
||||
print " while (P0(!!r_0_part)) {"
|
||||
if bits == 64:
|
||||
print " bitArr[arrCnt++] = (u16)findAndClearLSB_64(&r_0_part) + 64 * %d;" % (offset)
|
||||
else:
|
||||
print " bitArr[arrCnt++] = (u16)findAndClearLSB_32(&r_0_part) + 32 * %d;" % (offset)
|
||||
print " }"
|
||||
|
||||
def produce_bit_check_128(self, var_name, offset):
|
||||
print " if (P0(isnonzero128(%s))) {" % (var_name)
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " r_0_part = movq(%s);" % (var_name)
|
||||
self.produce_bit_scan(offset, 64)
|
||||
print " r_0_part = movq(byteShiftRight128(%s, 8));" % (var_name)
|
||||
self.produce_bit_scan(offset + 1, 64)
|
||||
print "#else"
|
||||
print " r_0_part = movd(%s);" % (var_name)
|
||||
self.produce_bit_scan(offset * 2, 32)
|
||||
for step in range(1, 4):
|
||||
print " r_0_part = movd(byteShiftRight128(%s, %d));" % (var_name, step * 4)
|
||||
self.produce_bit_scan(offset * 2 + step, 32)
|
||||
print "#endif"
|
||||
print " }"
|
||||
|
||||
def produce_bit_check_256(self, iter, single_iter, cautious):
|
||||
print " if (P0(isnonzero256(res_%d))) {" % (iter)
|
||||
if single_iter:
|
||||
print " arrCnt = 0;"
|
||||
print " lo_part = cast256to128(res_%d);" % (iter)
|
||||
print " hi_part = cast256to128(swap128in256(res_%d));" % (iter)
|
||||
self.produce_bit_check_128("lo_part", iter * 4)
|
||||
self.produce_bit_check_128("hi_part", iter * 4 + 2)
|
||||
if single_iter:
|
||||
self.produce_confirm(cautious)
|
||||
print " }"
|
||||
|
||||
def produce_one_iteration_state_calc(self, iter, cautious):
|
||||
if cautious:
|
||||
print " val_0 = vectoredLoad256(&p_mask, ptr + %d, buf+a->start_offset, buf+len, a->buf_history, a->len_history);" % (iter * 32)
|
||||
else:
|
||||
print " val_0 = load256(ptr + %d);" % (iter * 32)
|
||||
print " val_0_lo = and256(val_0, lomask);"
|
||||
print " val_0_hi = rshift4x64(val_0, 4);"
|
||||
print " val_0_hi = and256(val_0_hi, lomask);"
|
||||
print " res_%d = and256(vpshufb(maskLo , val_0_lo), vpshufb(maskHi, val_0_hi));" % (iter)
|
||||
if cautious:
|
||||
print " res_%d = and256(res_%d, p_mask);" % (iter, iter)
|
||||
|
||||
def produce_code(self):
|
||||
print self.produce_header(visible = True, header_only = False)
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
self.produce_needed_temporaries(self.num_iterations)
|
||||
|
||||
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
|
||||
print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));"
|
||||
print " const m256 maskLo = set2x128(maskBase[0]);"
|
||||
print " const m256 maskHi = set2x128(maskBase[1]);"
|
||||
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + 32);"
|
||||
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 32);"
|
||||
print " const size_t iterBytes = %d;" % (self.num_iterations * 32)
|
||||
|
||||
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
|
||||
' buf, len, a->start_offset);'
|
||||
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
|
||||
' mainStart);'
|
||||
print " const m256 lomask = set32x8(0xf);"
|
||||
|
||||
print " if (ptr < mainStart) {"
|
||||
print " ptr = mainStart - 32;"
|
||||
self.produce_one_iteration_state_calc(iter = 0, cautious = True)
|
||||
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
|
||||
print " ptr += 32;"
|
||||
print " }"
|
||||
|
||||
print " if (ptr + 32 < buf + len) {"
|
||||
self.produce_one_iteration_state_calc(iter = 0, cautious = False)
|
||||
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
|
||||
print " ptr += 32;"
|
||||
print " }"
|
||||
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print self.produce_flood_check()
|
||||
for iter in range (0, self.num_iterations):
|
||||
self.produce_one_iteration_state_calc(iter = iter, cautious = False)
|
||||
print " arrCnt = 0;"
|
||||
for iter in range (0, self.num_iterations):
|
||||
self.produce_bit_check_256(iter = iter, single_iter = False, cautious = False)
|
||||
self.produce_confirm(cautious = False)
|
||||
print " }"
|
||||
|
||||
print " for (; ptr < buf + len; ptr += 32) {"
|
||||
self.produce_one_iteration_state_calc(iter = 0, cautious = True)
|
||||
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
|
||||
print " }"
|
||||
|
||||
print self.produce_footer()
|
||||
|
||||
def get_name(self):
|
||||
if self.packed:
|
||||
pck_string = "_pck"
|
||||
else:
|
||||
pck_string = ""
|
||||
return "fdr_exec_teddy_%s_msks%d%s_fast" % (self.arch.name, self.num_masks, pck_string)
|
||||
|
||||
def produce_compile_call(self):
|
||||
packed_str = { False : "false", True : "true"}[self.packed]
|
||||
print " { %d, %s, %d, %d, %s, %d, %d }," % (
|
||||
self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str,
|
||||
self.conf_pull_back, self.conf_top_level_split)
|
||||
|
||||
def __init__(self, arch, packed = False):
|
||||
self.arch = arch
|
||||
self.packed = packed
|
||||
self.num_masks = 1
|
||||
self.num_buckets = 8
|
||||
self.num_iterations = 2
|
||||
|
||||
self.conf_top_level_split = 1
|
||||
self.conf_pull_back = 0
|
||||
if packed:
|
||||
self.conf_top_level_split = 32
|
||||
else:
|
||||
self.conf_top_level_split = 1
|
||||
self.conf_pull_back = 0
|
459
src/fdr/teddy_compile.cpp
Normal file
459
src/fdr/teddy_compile.cpp
Normal file
@ -0,0 +1,459 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/popcount.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include "teddy_compile.h"
|
||||
#include "teddy_internal.h"
|
||||
#include "teddy_engine_description.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
//#define TEDDY_DEBUG
|
||||
|
||||
class TeddyCompiler : boost::noncopyable {
|
||||
const TeddyEngineDescription ŋ
|
||||
const vector<hwlmLiteral> &lits;
|
||||
bool make_small;
|
||||
|
||||
public:
|
||||
TeddyCompiler(const vector<hwlmLiteral> &lits_in,
|
||||
const TeddyEngineDescription &eng_in, bool make_small_in)
|
||||
: eng(eng_in), lits(lits_in), make_small(make_small_in) {}
|
||||
|
||||
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
|
||||
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
|
||||
};
|
||||
|
||||
class TeddySet {
|
||||
const vector<hwlmLiteral> &lits;
|
||||
u32 len;
|
||||
// nibbleSets is a series of bitfields over 16 predicates
|
||||
// that represent the whether shufti nibble set
|
||||
// so for num_masks = 4 we will represent our strings by
|
||||
// 8 u16s in the vector that indicate what a shufti bucket
|
||||
// would have to look like
|
||||
vector<u16> nibbleSets;
|
||||
set<u32> litIds;
|
||||
public:
|
||||
TeddySet(const vector<hwlmLiteral> &lits_in, u32 len_in)
|
||||
: lits(lits_in), len(len_in), nibbleSets(len_in * 2, 0) {}
|
||||
const set<u32> & getLits() const { return litIds; }
|
||||
size_t litCount() const { return litIds.size(); }
|
||||
|
||||
bool operator<(const TeddySet & s) const {
|
||||
return litIds < s.litIds;
|
||||
}
|
||||
|
||||
#ifdef TEDDY_DEBUG
|
||||
void dump() const {
|
||||
printf("TS: ");
|
||||
for (u32 i = 0; i < nibbleSets.size(); i++) {
|
||||
printf("%04x ", (u32)nibbleSets[i]);
|
||||
}
|
||||
printf("\nnlits: %zu\nLit ids: ", litCount());
|
||||
printf("Prob: %llu\n", probability());
|
||||
for (set<u32>::iterator i = litIds.begin(), e = litIds.end(); i != e; ++i) {
|
||||
printf("%u ", *i);
|
||||
}
|
||||
printf("\n");
|
||||
printf("Flood prone : %s\n", isRunProne()?"yes":"no");
|
||||
}
|
||||
#endif
|
||||
|
||||
bool identicalTail(const TeddySet & ts) const {
|
||||
return nibbleSets == ts.nibbleSets;
|
||||
}
|
||||
|
||||
void addLiteral(u32 lit_id) {
|
||||
const string &s = lits[lit_id].s;
|
||||
for (u32 i = 0; i < len; i++) {
|
||||
if (i < s.size()) {
|
||||
u8 c = s[s.size() - i - 1];
|
||||
u8 c_hi = (c >> 4) & 0xf;
|
||||
u8 c_lo = c & 0xf;
|
||||
nibbleSets[i*2] = 1 << c_lo;
|
||||
if (lits[lit_id].nocase && ourisalpha(c)) {
|
||||
nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2));
|
||||
} else {
|
||||
nibbleSets[i*2+1] = 1 << c_hi;
|
||||
}
|
||||
} else {
|
||||
nibbleSets[i*2] = nibbleSets[i*2+1] = 0xffff;
|
||||
}
|
||||
}
|
||||
litIds.insert(lit_id);
|
||||
}
|
||||
|
||||
void merge(const TeddySet &ts) {
|
||||
for (u32 i = 0; i < nibbleSets.size(); i++) {
|
||||
nibbleSets[i] |= ts.nibbleSets[i];
|
||||
}
|
||||
litIds.insert(ts.litIds.begin(), ts.litIds.end());
|
||||
}
|
||||
|
||||
// return a value p from 0 .. MAXINT64 that gives p/MAXINT64
|
||||
// likelihood of this TeddySet firing a first-stage accept
|
||||
// if it was given a bucket of its own and random data were
|
||||
// to be passed in
|
||||
u64a probability() const {
|
||||
u64a val = 1;
|
||||
for (size_t i = 0; i < nibbleSets.size(); i++) {
|
||||
val *= popcount32((u32)nibbleSets[i]);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
// return a score based around the chance of this hitting times
|
||||
// a small fixed cost + the cost of traversing some sort of followup
|
||||
// (assumption is that the followup is linear)
|
||||
u64a heuristic() const {
|
||||
return probability() * (2+litCount());
|
||||
}
|
||||
|
||||
bool isRunProne() const {
|
||||
u16 lo_and = 0xffff;
|
||||
u16 hi_and = 0xffff;
|
||||
for (u32 i = 0; i < len; i++) {
|
||||
lo_and &= nibbleSets[i*2];
|
||||
hi_and &= nibbleSets[i*2+1];
|
||||
}
|
||||
// we're not flood-prone if there's no way to get
|
||||
// through with a flood
|
||||
if (!lo_and || !hi_and) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
bool TeddyCompiler::pack(map<BucketIndex,
|
||||
std::vector<LiteralIndex> > &bucketToLits) {
|
||||
set<TeddySet> sts;
|
||||
|
||||
for (u32 i = 0; i < lits.size(); i++) {
|
||||
TeddySet ts(lits, eng.numMasks);
|
||||
ts.addLiteral(i);
|
||||
sts.insert(ts);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
#ifdef TEDDY_DEBUG
|
||||
printf("Size %zu\n", sts.size());
|
||||
for (set<TeddySet>::const_iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
|
||||
printf("\n"); i1->dump();
|
||||
}
|
||||
printf("\n===============================================\n");
|
||||
#endif
|
||||
|
||||
set<TeddySet>::iterator m1 = sts.end(), m2 = sts.end();
|
||||
u64a best = 0xffffffffffffffffULL;
|
||||
|
||||
for (set<TeddySet>::iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
|
||||
set<TeddySet>::iterator i2 = i1;
|
||||
++i2;
|
||||
const TeddySet &s1 = *i1;
|
||||
for (set<TeddySet>::iterator e2 = sts.end(); i2 != e2; ++i2) {
|
||||
const TeddySet &s2 = *i2;
|
||||
|
||||
// be more conservative if we don't absolutely need to
|
||||
// keep packing
|
||||
if ((sts.size() <= eng.getNumBuckets()) &&
|
||||
!s1.identicalTail(s2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
TeddySet tmpSet(lits, eng.numMasks);
|
||||
tmpSet.merge(s1);
|
||||
tmpSet.merge(s2);
|
||||
u64a newScore = tmpSet.heuristic();
|
||||
u64a oldScore = s1.heuristic() + s2.heuristic();
|
||||
if (newScore < oldScore) {
|
||||
m1 = i1;
|
||||
m2 = i2;
|
||||
break;
|
||||
} else {
|
||||
u64a score = newScore - oldScore;
|
||||
bool oldRunProne = s1.isRunProne() && s2.isRunProne();
|
||||
bool newRunProne = tmpSet.isRunProne();
|
||||
if (newRunProne && !oldRunProne) {
|
||||
continue;
|
||||
}
|
||||
if (score < best) {
|
||||
best = score;
|
||||
m1 = i1;
|
||||
m2 = i2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// if we didn't find a merge candidate, bail out
|
||||
if ((m1 == sts.end()) || (m2 == sts.end())) {
|
||||
break;
|
||||
}
|
||||
|
||||
// do the merge
|
||||
TeddySet nts(lits, eng.numMasks);
|
||||
nts.merge(*m1);
|
||||
nts.merge(*m2);
|
||||
#ifdef TEDDY_DEBUG
|
||||
printf("Merging\n");
|
||||
printf("m1 = \n");
|
||||
m1->dump();
|
||||
printf("m2 = \n");
|
||||
m2->dump();
|
||||
printf("nts = \n");
|
||||
nts.dump();
|
||||
printf("\n===============================================\n");
|
||||
#endif
|
||||
sts.erase(m1);
|
||||
sts.erase(m2);
|
||||
sts.insert(nts);
|
||||
}
|
||||
u32 cnt = 0;
|
||||
|
||||
if (sts.size() > eng.getNumBuckets()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (set<TeddySet>::const_iterator i = sts.begin(), e = sts.end(); i != e;
|
||||
++i) {
|
||||
for (set<u32>::const_iterator i2 = i->getLits().begin(),
|
||||
e2 = i->getLits().end();
|
||||
i2 != e2; ++i2) {
|
||||
bucketToLits[cnt].push_back(*i2);
|
||||
}
|
||||
cnt++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
|
||||
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
|
||||
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#ifdef TEDDY_DEBUG
|
||||
for (size_t i = 0; i < lits.size(); i++) {
|
||||
printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(),
|
||||
lits[i].nocase ? "caseless" : "caseful");
|
||||
for (size_t j = 0; j < lits[i].s.size(); j++) {
|
||||
printf("%02x", ((u32)lits[i].s[j])&0xff);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
|
||||
if(eng.needConfirm(lits)) {
|
||||
if (!pack(bucketToLits)) {
|
||||
DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
|
||||
lits.size(), eng.getNumBuckets());
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
for (u32 i = 0; i < lits.size(); i++) {
|
||||
bucketToLits[i].push_back(i);
|
||||
}
|
||||
}
|
||||
u32 maskWidth = eng.getNumBuckets() / 8;
|
||||
|
||||
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
|
||||
|
||||
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
|
||||
pair<u8 *, size_t> confirmTmp
|
||||
= setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
||||
|
||||
size_t size = ROUNDUP_N(sizeof(Teddy) +
|
||||
maskLen +
|
||||
confirmTmp.second +
|
||||
floodControlTmp.second +
|
||||
link.second, 16 * maskWidth);
|
||||
|
||||
aligned_unique_ptr<FDR> fdr = aligned_zmalloc_unique<FDR>(size);
|
||||
assert(fdr); // otherwise would have thrown std::bad_alloc
|
||||
Teddy *teddy = (Teddy *)fdr.get(); // ugly
|
||||
u8 *teddy_base = (u8 *)teddy;
|
||||
|
||||
teddy->size = size;
|
||||
teddy->engineID = eng.getID();
|
||||
teddy->maxStringLen = verify_u32(maxLen(lits));
|
||||
|
||||
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
|
||||
memcpy(ptr, confirmTmp.first, confirmTmp.second);
|
||||
ptr += confirmTmp.second;
|
||||
aligned_free(confirmTmp.first);
|
||||
|
||||
teddy->floodOffset = verify_u32(ptr - teddy_base);
|
||||
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
|
||||
ptr += floodControlTmp.second;
|
||||
aligned_free(floodControlTmp.first);
|
||||
|
||||
if (link.first) {
|
||||
teddy->link = verify_u32(ptr - teddy_base);
|
||||
memcpy(ptr, link.first, link.second);
|
||||
aligned_free(link.first);
|
||||
} else {
|
||||
teddy->link = 0;
|
||||
}
|
||||
|
||||
u8 *baseMsk = teddy_base + sizeof(Teddy);
|
||||
|
||||
for (map<BucketIndex, std::vector<LiteralIndex> >::const_iterator
|
||||
i = bucketToLits.begin(),
|
||||
e = bucketToLits.end();
|
||||
i != e; ++i) {
|
||||
const u32 bucket_id = i->first;
|
||||
const vector<LiteralIndex> &ids = i->second;
|
||||
const u8 bmsk = 1U << (bucket_id % 8);
|
||||
|
||||
for (vector<LiteralIndex>::const_iterator i2 = ids.begin(),
|
||||
e2 = ids.end();
|
||||
i2 != e2; ++i2) {
|
||||
LiteralIndex lit_id = *i2;
|
||||
const hwlmLiteral & l = lits[lit_id];
|
||||
DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
|
||||
const u32 sz = verify_u32(l.s.size());
|
||||
|
||||
// fill in masks
|
||||
for (u32 j = 0; j < eng.numMasks; j++) {
|
||||
u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8);
|
||||
u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8);
|
||||
|
||||
// if we don't have a char at this position, fill in i
|
||||
// locations in these masks with '1'
|
||||
if (j >= sz) {
|
||||
for (u32 n = 0; n < 16; n++) {
|
||||
baseMsk[msk_id_lo * 16 + n] |= bmsk;
|
||||
baseMsk[msk_id_hi * 16 + n] |= bmsk;
|
||||
}
|
||||
} else {
|
||||
u8 c = l.s[sz - 1 - j];
|
||||
// if we do have a char at this position
|
||||
const u32 hiShift = 4;
|
||||
u32 n_hi = (c >> hiShift) & 0xf;
|
||||
u32 n_lo = c & 0xf;
|
||||
|
||||
if (j < l.msk.size() && l.msk[l.msk.size() - 1 - j]) {
|
||||
u8 m = l.msk[l.msk.size() - 1 - j];
|
||||
u8 m_hi = (m >> hiShift) & 0xf;
|
||||
u8 m_lo = m & 0xf;
|
||||
u8 cmp = l.cmp[l.msk.size() - 1 - j];
|
||||
u8 cmp_lo = cmp & 0xf;
|
||||
u8 cmp_hi = (cmp >> hiShift) & 0xf;
|
||||
|
||||
for (u8 cm = 0; cm < 0x10; cm++) {
|
||||
if ((cm & m_lo) == (cmp_lo & m_lo)) {
|
||||
baseMsk[msk_id_lo * 16 + cm] |= bmsk;
|
||||
}
|
||||
if ((cm & m_hi) == (cmp_hi & m_hi)) {
|
||||
baseMsk[msk_id_hi * 16 + cm] |= bmsk;
|
||||
}
|
||||
}
|
||||
} else{
|
||||
if (l.nocase && ourisalpha(c)) {
|
||||
u32 cmHalfClear = (0xdf >> hiShift) & 0xf;
|
||||
u32 cmHalfSet = (0x20 >> hiShift) & 0xf;
|
||||
baseMsk[msk_id_hi * 16 + (n_hi & cmHalfClear)] |= bmsk;
|
||||
baseMsk[msk_id_hi * 16 + (n_hi | cmHalfSet )] |= bmsk;
|
||||
} else {
|
||||
baseMsk[msk_id_hi * 16 + n_hi] |= bmsk;
|
||||
}
|
||||
baseMsk[msk_id_lo * 16 + n_lo] |= bmsk;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef TEDDY_DEBUG
|
||||
for (u32 i = 0; i < eng.numMasks * 2; i++) {
|
||||
for (u32 j = 0; j < 16; j++) {
|
||||
u8 val = baseMsk[i * 16 + j];
|
||||
for (u32 k = 0; k < 8; k++) {
|
||||
printf("%s", ((val >> k) & 0x1) ? "1" : "0");
|
||||
}
|
||||
printf(" ");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
return fdr;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
aligned_unique_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
|
||||
bool make_small, u32 hint,
|
||||
const target_t &target,
|
||||
pair<u8 *, size_t> link) {
|
||||
unique_ptr<TeddyEngineDescription> des;
|
||||
if (hint == HINT_INVALID) {
|
||||
des = chooseTeddyEngine(target, lits);
|
||||
} else {
|
||||
des = getTeddyDescription(hint);
|
||||
}
|
||||
if (!des) {
|
||||
return nullptr;
|
||||
}
|
||||
TeddyCompiler tc(lits, *des, make_small);
|
||||
return tc.build(link);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
56
src/fdr/teddy_compile.h
Normal file
56
src/fdr/teddy_compile.h
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: Teddy build API.
|
||||
*/
|
||||
|
||||
#ifndef TEDDY_COMPILE_H
|
||||
#define TEDDY_COMPILE_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
#include <vector>
|
||||
#include <utility> // std::pair
|
||||
|
||||
struct FDR;
|
||||
struct target_t;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct hwlmLiteral;
|
||||
|
||||
ue2::aligned_unique_ptr<FDR>
|
||||
teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
|
||||
u32 hint, const target_t &target,
|
||||
std::pair<u8 *, size_t> link);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // TEDDY_COMPILE_H
|
207
src/fdr/teddy_engine_description.cpp
Normal file
207
src/fdr/teddy_engine_description.cpp
Normal file
@ -0,0 +1,207 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "ue2common.h"
|
||||
#include "hs_internal.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "teddy_internal.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "util/make_unique.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
TeddyEngineDescription::TeddyEngineDescription(const TeddyEngineDef &def)
|
||||
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
|
||||
def.numBuckets, def.confirmPullBackDistance,
|
||||
def.confirmTopLevelSplit),
|
||||
numMasks(def.numMasks), packed(def.packed) {}
|
||||
|
||||
u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
|
||||
return numMasks;
|
||||
}
|
||||
|
||||
bool TeddyEngineDescription::needConfirm(const vector<hwlmLiteral> &lits) const {
|
||||
if (packed || lits.size() > getNumBuckets()) {
|
||||
return true;
|
||||
}
|
||||
for (const auto &lit : lits) {
|
||||
if (lit.s.size() > numMasks || !lit.msk.empty()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#include "teddy_autogen_compiler.cpp"
|
||||
|
||||
static
|
||||
size_t maxFloodTailLen(const vector<hwlmLiteral> &vl) {
|
||||
size_t max_flood_tail = 0;
|
||||
for (const auto &lit : vl) {
|
||||
const string &s = lit.s;
|
||||
assert(!s.empty());
|
||||
size_t j;
|
||||
for (j = 1; j < s.length(); j++) {
|
||||
if (s[s.length() - j - 1] != s[s.length() - 1]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
max_flood_tail = max(max_flood_tail, j);
|
||||
}
|
||||
return max_flood_tail;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief True if this Teddy engine is qualified to handle this set of literals
|
||||
* on this target.
|
||||
*/
|
||||
static
|
||||
bool isAllowed(const vector<hwlmLiteral> &vl, const TeddyEngineDescription &eng,
|
||||
const size_t max_lit_len, const target_t &target) {
|
||||
if (!eng.isValidOnTarget(target)) {
|
||||
DEBUG_PRINTF("%u disallowed: not valid on target\n", eng.getID());
|
||||
return false;
|
||||
}
|
||||
if (eng.getNumBuckets() < vl.size() && !eng.packed) {
|
||||
DEBUG_PRINTF("%u disallowed: num buckets < num lits and not packed\n",
|
||||
eng.getID());
|
||||
return false;
|
||||
}
|
||||
if (eng.getNumBuckets() * TEDDY_BUCKET_LOAD < vl.size()) {
|
||||
DEBUG_PRINTF("%u disallowed: too many lits for num buckets\n",
|
||||
eng.getID());
|
||||
return false;
|
||||
}
|
||||
if (eng.numMasks > max_lit_len) {
|
||||
DEBUG_PRINTF("%u disallowed: more masks than max lit len (%zu)\n",
|
||||
eng.getID(), max_lit_len);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (vl.size() > 40) {
|
||||
u32 n_small_lits = 0;
|
||||
for (const auto &lit : vl) {
|
||||
if (lit.s.length() < eng.numMasks) {
|
||||
n_small_lits++;
|
||||
}
|
||||
}
|
||||
if (n_small_lits * 5 > vl.size()) {
|
||||
DEBUG_PRINTF("too many short literals (%u)\n", n_small_lits);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unique_ptr<TeddyEngineDescription>
|
||||
chooseTeddyEngine(const target_t &target, const vector<hwlmLiteral> &vl) {
|
||||
vector<TeddyEngineDescription> descs;
|
||||
getTeddyDescriptions(&descs);
|
||||
const TeddyEngineDescription *best = nullptr;
|
||||
|
||||
const size_t max_lit_len = maxLen(vl);
|
||||
const size_t max_flood_tail = maxFloodTailLen(vl);
|
||||
DEBUG_PRINTF("%zu lits, max_lit_len=%zu, max_flood_tail=%zu\n", vl.size(),
|
||||
max_lit_len, max_flood_tail);
|
||||
|
||||
u32 best_score = 0;
|
||||
for (size_t engineID = 0; engineID < descs.size(); engineID++) {
|
||||
const TeddyEngineDescription &eng = descs[engineID];
|
||||
if (!isAllowed(vl, eng, max_lit_len, target)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 score = 0;
|
||||
|
||||
// We prefer unpacked Teddy models.
|
||||
if (!eng.packed) {
|
||||
score += 100;
|
||||
}
|
||||
|
||||
// If we're heavily loaded, we prefer to have more masks.
|
||||
if (vl.size() > 4 * eng.getNumBuckets()) {
|
||||
score += eng.numMasks * 4;
|
||||
} else {
|
||||
// Lightly loaded cases are great.
|
||||
score += 100;
|
||||
}
|
||||
|
||||
// We want enough masks to avoid becoming flood-prone.
|
||||
if (eng.numMasks > max_flood_tail) {
|
||||
score += 50;
|
||||
}
|
||||
|
||||
// We prefer having 3 masks. 3 is just right.
|
||||
score += 6 / (abs(3 - (int)eng.numMasks) + 1);
|
||||
|
||||
// We prefer cheaper, smaller Teddy models.
|
||||
score += 16 / eng.getNumBuckets();
|
||||
|
||||
DEBUG_PRINTF("teddy %u: masks=%u, buckets=%u, packed=%u "
|
||||
"-> score=%u\n",
|
||||
eng.getID(), eng.numMasks, eng.getNumBuckets(),
|
||||
eng.packed ? 1U : 0U, score);
|
||||
|
||||
if (!best || score > best_score) {
|
||||
best = ŋ
|
||||
best_score = score;
|
||||
}
|
||||
}
|
||||
|
||||
if (!best) {
|
||||
DEBUG_PRINTF("failed to find engine\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("using engine %u\n", best->getID());
|
||||
return ue2::make_unique<TeddyEngineDescription>(*best);
|
||||
}
|
||||
|
||||
unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID) {
|
||||
vector<TeddyEngineDescription> descs;
|
||||
getTeddyDescriptions(&descs);
|
||||
|
||||
for (const auto &desc : descs) {
|
||||
if (desc.getID() == engineID) {
|
||||
return ue2::make_unique<TeddyEngineDescription>(desc);
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
70
src/fdr/teddy_engine_description.h
Normal file
70
src/fdr/teddy_engine_description.h
Normal file
@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef TEDDY_ENGINE_DESCRIPTION_H
|
||||
#define TEDDY_ENGINE_DESCRIPTION_H
|
||||
|
||||
#include "engine_description.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define TEDDY_BUCKET_LOAD 6
|
||||
|
||||
struct TeddyEngineDef {
|
||||
u32 id;
|
||||
u64a cpu_features;
|
||||
u32 numMasks;
|
||||
u32 numBuckets;
|
||||
bool packed;
|
||||
u32 confirmPullBackDistance;
|
||||
u32 confirmTopLevelSplit;
|
||||
};
|
||||
|
||||
class TeddyEngineDescription : public EngineDescription {
|
||||
public:
|
||||
u32 numMasks;
|
||||
bool packed;
|
||||
|
||||
explicit TeddyEngineDescription(const TeddyEngineDef &def);
|
||||
|
||||
u32 getDefaultFloodSuffixLength() const override;
|
||||
bool needConfirm(const std::vector<hwlmLiteral> &lits) const;
|
||||
};
|
||||
|
||||
std::unique_ptr<TeddyEngineDescription>
|
||||
chooseTeddyEngine(const target_t &target, const std::vector<hwlmLiteral> &vl);
|
||||
std::unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID);
|
||||
void getTeddyDescriptions(std::vector<TeddyEngineDescription> *out);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
46
src/fdr/teddy_internal.h
Normal file
46
src/fdr/teddy_internal.h
Normal file
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef TEDDY_INTERNAL_H
|
||||
#define TEDDY_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
// first part is compatible with an FDR
|
||||
struct Teddy {
|
||||
u32 engineID;
|
||||
u32 size;
|
||||
u32 maxStringLen;
|
||||
u32 floodOffset;
|
||||
u32 link;
|
||||
u32 pad1;
|
||||
u32 pad2;
|
||||
u32 pad3;
|
||||
};
|
||||
|
||||
#endif
|
374
src/grey.cpp
Normal file
374
src/grey.cpp
Normal file
@ -0,0 +1,374 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "grey.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib> // exit
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define DEFAULT_MAX_HISTORY 60
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
Grey::Grey(void) :
|
||||
optimiseComponentTree(true),
|
||||
performGraphSimplification(true),
|
||||
prefilterReductions(true),
|
||||
removeEdgeRedundancy(true),
|
||||
allowGough(true),
|
||||
allowHaigLit(true),
|
||||
allowLitHaig(true),
|
||||
allowLbr(true),
|
||||
allowMcClellan(true),
|
||||
allowPuff(true),
|
||||
allowRose(true),
|
||||
allowExtendedNFA(true), /* bounded repeats of course */
|
||||
allowLimExNFA(true),
|
||||
allowSidecar(true),
|
||||
allowAnchoredAcyclic(true),
|
||||
allowSmallLiteralSet(true),
|
||||
allowCastle(true),
|
||||
allowDecoratedLiteral(true),
|
||||
allowNoodle(true),
|
||||
fdrAllowTeddy(true),
|
||||
puffImproveHead(true),
|
||||
castleExclusive(true),
|
||||
mergeSEP(true), /* short exhaustible passthroughs */
|
||||
mergeRose(true), // roses inside rose
|
||||
mergeSuffixes(true), // suffix nfas inside rose
|
||||
mergeOutfixes(true),
|
||||
onlyOneOutfix(false),
|
||||
allowShermanStates(true),
|
||||
allowMcClellan8(true),
|
||||
highlanderPruneDFA(true),
|
||||
minimizeDFA(true),
|
||||
accelerateDFA(true),
|
||||
accelerateNFA(true),
|
||||
reverseAccelerate(true),
|
||||
squashNFA(true),
|
||||
compressNFAState(true),
|
||||
numberNFAStatesWrong(false), /* debugging only */
|
||||
highlanderSquash(true),
|
||||
allowZombies(true),
|
||||
floodAsPuffette(false),
|
||||
nfaForceSize(0),
|
||||
nfaForceShifts(0),
|
||||
maxHistoryAvailable(DEFAULT_MAX_HISTORY),
|
||||
minHistoryAvailable(0), /* debugging only */
|
||||
maxAnchoredRegion(63), /* for rose's atable to run over */
|
||||
minRoseLiteralLength(3),
|
||||
minRoseNetflowLiteralLength(2),
|
||||
maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */
|
||||
minExtBoundedRepeatSize(32),
|
||||
goughCopyPropagate(true),
|
||||
goughRegisterAllocate(true),
|
||||
shortcutLiterals(true),
|
||||
roseGraphReduction(true),
|
||||
roseRoleAliasing(true),
|
||||
roseMasks(true),
|
||||
roseMaxBadLeafLength(5),
|
||||
roseConvertInfBadLeaves(true),
|
||||
roseConvertFloodProneSuffixes(true),
|
||||
roseMergeRosesDuringAliasing(true),
|
||||
roseMultiTopRoses(true),
|
||||
roseHamsterMasks(true),
|
||||
roseLookaroundMasks(true),
|
||||
roseMcClellanPrefix(1),
|
||||
roseMcClellanSuffix(1),
|
||||
roseMcClellanOutfix(2),
|
||||
roseTransformDelay(true),
|
||||
roseDesiredSplit(4),
|
||||
earlyMcClellanPrefix(true),
|
||||
earlyMcClellanInfix(true),
|
||||
earlyMcClellanSuffix(true),
|
||||
allowCountingMiracles(true),
|
||||
allowSomChain(true),
|
||||
somMaxRevNfaLength(126),
|
||||
hamsterAccelForward(true),
|
||||
hamsterAccelReverse(false),
|
||||
miracleHistoryBonus(16),
|
||||
equivalenceEnable(true),
|
||||
|
||||
allowSmallWrite(true), // McClellan dfas for small patterns
|
||||
|
||||
smallWriteLargestBuffer(70), // largest buffer that can be
|
||||
// considered a small write
|
||||
// all blocks larger than this
|
||||
// are given to rose &co
|
||||
smallWriteLargestBufferBad(35),
|
||||
limitSmallWriteOutfixSize(1048576), // 1 MB
|
||||
dumpFlags(0),
|
||||
limitPatternCount(8000000), // 8M patterns
|
||||
limitPatternLength(16000), // 16K bytes
|
||||
limitGraphVertices(500000), // 500K vertices
|
||||
limitGraphEdges(1000000), // 1M edges
|
||||
limitReportCount(4*8000000),
|
||||
limitLiteralCount(8000000), // 8M literals
|
||||
limitLiteralLength(16000),
|
||||
limitLiteralMatcherChars(1073741824), // 1 GB
|
||||
limitLiteralMatcherSize(1073741824), // 1 GB
|
||||
limitRoseRoleCount(4*8000000),
|
||||
limitRoseEngineCount(8000000), // 8M engines
|
||||
limitRoseAnchoredSize(1073741824), // 1 GB
|
||||
limitEngineSize(1073741824), // 1 GB
|
||||
limitDFASize(1073741824), // 1 GB
|
||||
limitNFASize(1048576), // 1 MB
|
||||
limitLBRSize(1048576) // 1 MB
|
||||
{
|
||||
assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
using boost::lexical_cast;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void applyGreyOverrides(Grey *g, const string &s) {
|
||||
string::const_iterator p = s.begin();
|
||||
string::const_iterator pe = s.end();
|
||||
string help = "help:0";
|
||||
bool invalid_key_seen = false;
|
||||
Grey defaultg;
|
||||
|
||||
if (s == "help" || s == "help:") {
|
||||
printf("Valid grey overrides:\n");
|
||||
p = help.begin();
|
||||
pe = help.end();
|
||||
}
|
||||
|
||||
while (p != pe) {
|
||||
string::const_iterator ke = find(p, pe, ':');
|
||||
|
||||
if (ke == pe) {
|
||||
break;
|
||||
}
|
||||
|
||||
string key(p, ke);
|
||||
|
||||
string::const_iterator ve = find(ke, pe, ';');
|
||||
|
||||
unsigned int value = lexical_cast<unsigned int>(string(ke + 1, ve));
|
||||
bool done = false;
|
||||
|
||||
/* surely there exists a nice template to go with this macro to make
|
||||
* all the boring code disappear */
|
||||
#define G_UPDATE(k) do { \
|
||||
if (key == ""#k) { g->k = value; done = 1;} \
|
||||
if (key == "help") { \
|
||||
printf("\t%-30s\tdefault: %s\n", #k, \
|
||||
lexical_cast<string>(defaultg.k).c_str()); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
G_UPDATE(optimiseComponentTree);
|
||||
G_UPDATE(performGraphSimplification);
|
||||
G_UPDATE(prefilterReductions);
|
||||
G_UPDATE(removeEdgeRedundancy);
|
||||
G_UPDATE(allowGough);
|
||||
G_UPDATE(allowHaigLit);
|
||||
G_UPDATE(allowLitHaig);
|
||||
G_UPDATE(allowLbr);
|
||||
G_UPDATE(allowMcClellan);
|
||||
G_UPDATE(allowPuff);
|
||||
G_UPDATE(allowRose);
|
||||
G_UPDATE(allowExtendedNFA);
|
||||
G_UPDATE(allowLimExNFA);
|
||||
G_UPDATE(allowSidecar);
|
||||
G_UPDATE(allowAnchoredAcyclic);
|
||||
G_UPDATE(allowSmallLiteralSet);
|
||||
G_UPDATE(allowCastle);
|
||||
G_UPDATE(allowDecoratedLiteral);
|
||||
G_UPDATE(allowNoodle);
|
||||
G_UPDATE(fdrAllowTeddy);
|
||||
G_UPDATE(puffImproveHead);
|
||||
G_UPDATE(castleExclusive);
|
||||
G_UPDATE(mergeSEP);
|
||||
G_UPDATE(mergeRose);
|
||||
G_UPDATE(mergeSuffixes);
|
||||
G_UPDATE(mergeOutfixes);
|
||||
G_UPDATE(onlyOneOutfix);
|
||||
G_UPDATE(allowShermanStates);
|
||||
G_UPDATE(allowMcClellan8);
|
||||
G_UPDATE(highlanderPruneDFA);
|
||||
G_UPDATE(minimizeDFA);
|
||||
G_UPDATE(accelerateDFA);
|
||||
G_UPDATE(accelerateNFA);
|
||||
G_UPDATE(reverseAccelerate);
|
||||
G_UPDATE(squashNFA);
|
||||
G_UPDATE(compressNFAState);
|
||||
G_UPDATE(numberNFAStatesWrong);
|
||||
G_UPDATE(allowZombies);
|
||||
G_UPDATE(floodAsPuffette);
|
||||
G_UPDATE(nfaForceSize);
|
||||
G_UPDATE(nfaForceShifts);
|
||||
G_UPDATE(highlanderSquash);
|
||||
G_UPDATE(maxHistoryAvailable);
|
||||
G_UPDATE(minHistoryAvailable);
|
||||
G_UPDATE(maxAnchoredRegion);
|
||||
G_UPDATE(minRoseLiteralLength);
|
||||
G_UPDATE(minRoseNetflowLiteralLength);
|
||||
G_UPDATE(maxRoseNetflowEdges);
|
||||
G_UPDATE(minExtBoundedRepeatSize);
|
||||
G_UPDATE(goughCopyPropagate);
|
||||
G_UPDATE(goughRegisterAllocate);
|
||||
G_UPDATE(shortcutLiterals);
|
||||
G_UPDATE(roseGraphReduction);
|
||||
G_UPDATE(roseRoleAliasing);
|
||||
G_UPDATE(roseMasks);
|
||||
G_UPDATE(roseMaxBadLeafLength);
|
||||
G_UPDATE(roseConvertInfBadLeaves);
|
||||
G_UPDATE(roseConvertFloodProneSuffixes);
|
||||
G_UPDATE(roseMergeRosesDuringAliasing);
|
||||
G_UPDATE(roseMultiTopRoses);
|
||||
G_UPDATE(roseHamsterMasks);
|
||||
G_UPDATE(roseLookaroundMasks);
|
||||
G_UPDATE(roseMcClellanPrefix);
|
||||
G_UPDATE(roseMcClellanSuffix);
|
||||
G_UPDATE(roseMcClellanOutfix);
|
||||
G_UPDATE(roseTransformDelay);
|
||||
G_UPDATE(roseDesiredSplit);
|
||||
G_UPDATE(earlyMcClellanPrefix);
|
||||
G_UPDATE(earlyMcClellanInfix);
|
||||
G_UPDATE(earlyMcClellanSuffix);
|
||||
G_UPDATE(allowSomChain);
|
||||
G_UPDATE(allowCountingMiracles);
|
||||
G_UPDATE(somMaxRevNfaLength);
|
||||
G_UPDATE(hamsterAccelForward);
|
||||
G_UPDATE(hamsterAccelReverse);
|
||||
G_UPDATE(miracleHistoryBonus);
|
||||
G_UPDATE(equivalenceEnable);
|
||||
G_UPDATE(allowSmallWrite);
|
||||
G_UPDATE(smallWriteLargestBuffer);
|
||||
G_UPDATE(smallWriteLargestBufferBad);
|
||||
G_UPDATE(limitSmallWriteOutfixSize);
|
||||
G_UPDATE(limitPatternCount);
|
||||
G_UPDATE(limitPatternLength);
|
||||
G_UPDATE(limitGraphVertices);
|
||||
G_UPDATE(limitGraphEdges);
|
||||
G_UPDATE(limitReportCount);
|
||||
G_UPDATE(limitLiteralCount);
|
||||
G_UPDATE(limitLiteralLength);
|
||||
G_UPDATE(limitLiteralMatcherChars);
|
||||
G_UPDATE(limitLiteralMatcherSize);
|
||||
G_UPDATE(limitRoseRoleCount);
|
||||
G_UPDATE(limitRoseEngineCount);
|
||||
G_UPDATE(limitRoseAnchoredSize);
|
||||
G_UPDATE(limitEngineSize);
|
||||
G_UPDATE(limitDFASize);
|
||||
G_UPDATE(limitNFASize);
|
||||
G_UPDATE(limitLBRSize);
|
||||
|
||||
#undef G_UPDATE
|
||||
if (key == "simple_som") {
|
||||
g->allowHaigLit = false;
|
||||
g->allowLitHaig = false;
|
||||
g->allowSomChain = false;
|
||||
g->somMaxRevNfaLength = 0;
|
||||
done = true;
|
||||
}
|
||||
if (key == "forceOutfixesNFA") {
|
||||
g->allowAnchoredAcyclic = false;
|
||||
g->allowCastle = false;
|
||||
g->allowDecoratedLiteral = false;
|
||||
g->allowGough = false;
|
||||
g->allowHaigLit = false;
|
||||
g->allowLbr = false;
|
||||
g->allowLimExNFA = true;
|
||||
g->allowLitHaig = false;
|
||||
g->allowMcClellan = false;
|
||||
g->allowPuff = false;
|
||||
g->allowRose = false;
|
||||
g->allowSmallLiteralSet = false;
|
||||
g->roseMasks = false;
|
||||
done = true;
|
||||
}
|
||||
if (key == "forceOutfixesDFA") {
|
||||
g->allowAnchoredAcyclic = false;
|
||||
g->allowCastle = false;
|
||||
g->allowDecoratedLiteral = false;
|
||||
g->allowGough = false;
|
||||
g->allowHaigLit = false;
|
||||
g->allowLbr = false;
|
||||
g->allowLimExNFA = false;
|
||||
g->allowLitHaig = false;
|
||||
g->allowMcClellan = true;
|
||||
g->allowPuff = false;
|
||||
g->allowRose = false;
|
||||
g->allowSmallLiteralSet = false;
|
||||
g->roseMasks = false;
|
||||
done = true;
|
||||
}
|
||||
if (key == "forceOutfixes") {
|
||||
g->allowAnchoredAcyclic = false;
|
||||
g->allowCastle = false;
|
||||
g->allowDecoratedLiteral = false;
|
||||
g->allowGough = true;
|
||||
g->allowHaigLit = false;
|
||||
g->allowLbr = false;
|
||||
g->allowLimExNFA = true;
|
||||
g->allowLitHaig = false;
|
||||
g->allowMcClellan = true;
|
||||
g->allowPuff = false;
|
||||
g->allowRose = false;
|
||||
g->allowSmallLiteralSet = false;
|
||||
g->roseMasks = false;
|
||||
done = true;
|
||||
}
|
||||
|
||||
if (!done && key != "help") {
|
||||
printf("Invalid grey override key %s:%u\n", key.c_str(), value);
|
||||
invalid_key_seen = true;
|
||||
}
|
||||
|
||||
p = ve;
|
||||
|
||||
if (p != pe) {
|
||||
++p;
|
||||
}
|
||||
}
|
||||
|
||||
if (invalid_key_seen) {
|
||||
applyGreyOverrides(g, "help");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
assert(g->maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
197
src/grey.h
Normal file
197
src/grey.h
Normal file
@ -0,0 +1,197 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef GREY_H
|
||||
#define GREY_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct Grey {
|
||||
Grey(void);
|
||||
|
||||
bool optimiseComponentTree;
|
||||
|
||||
bool performGraphSimplification;
|
||||
bool prefilterReductions;
|
||||
bool removeEdgeRedundancy;
|
||||
|
||||
bool allowGough;
|
||||
bool allowHaigLit;
|
||||
bool allowLitHaig;
|
||||
bool allowLbr;
|
||||
bool allowMcClellan;
|
||||
bool allowPuff;
|
||||
bool allowRose;
|
||||
bool allowExtendedNFA;
|
||||
bool allowLimExNFA;
|
||||
bool allowSidecar;
|
||||
bool allowAnchoredAcyclic;
|
||||
bool allowSmallLiteralSet;
|
||||
bool allowCastle;
|
||||
bool allowDecoratedLiteral;
|
||||
|
||||
bool allowNoodle;
|
||||
bool fdrAllowTeddy;
|
||||
|
||||
bool puffImproveHead;
|
||||
bool castleExclusive; // enable castle mutual exclusion analysis
|
||||
|
||||
bool mergeSEP;
|
||||
bool mergeRose;
|
||||
bool mergeSuffixes;
|
||||
bool mergeOutfixes;
|
||||
bool onlyOneOutfix; // if > 1 outfix, fail compile
|
||||
|
||||
bool allowShermanStates;
|
||||
bool allowMcClellan8;
|
||||
bool highlanderPruneDFA;
|
||||
bool minimizeDFA;
|
||||
|
||||
bool accelerateDFA;
|
||||
bool accelerateNFA;
|
||||
bool reverseAccelerate;
|
||||
|
||||
bool squashNFA;
|
||||
bool compressNFAState;
|
||||
bool numberNFAStatesWrong;
|
||||
bool highlanderSquash;
|
||||
bool allowZombies;
|
||||
bool floodAsPuffette;
|
||||
|
||||
u32 nfaForceSize;
|
||||
u32 nfaForceShifts;
|
||||
|
||||
u32 maxHistoryAvailable;
|
||||
u32 minHistoryAvailable;
|
||||
u32 maxAnchoredRegion;
|
||||
u32 minRoseLiteralLength;
|
||||
u32 minRoseNetflowLiteralLength;
|
||||
u32 maxRoseNetflowEdges;
|
||||
|
||||
u32 minExtBoundedRepeatSize; /* to be considered for ng_repeat */
|
||||
|
||||
bool goughCopyPropagate;
|
||||
bool goughRegisterAllocate;
|
||||
|
||||
bool shortcutLiterals;
|
||||
|
||||
bool roseGraphReduction;
|
||||
bool roseRoleAliasing;
|
||||
bool roseMasks;
|
||||
u32 roseMaxBadLeafLength;
|
||||
bool roseConvertInfBadLeaves;
|
||||
bool roseConvertFloodProneSuffixes;
|
||||
bool roseMergeRosesDuringAliasing;
|
||||
bool roseMultiTopRoses;
|
||||
bool roseHamsterMasks;
|
||||
bool roseLookaroundMasks;
|
||||
u32 roseMcClellanPrefix; /* 0 = off, 1 = only if large nfa, 2 = always */
|
||||
u32 roseMcClellanSuffix; /* 0 = off, 1 = only if very large nfa, 2 =
|
||||
* always */
|
||||
u32 roseMcClellanOutfix; /* 0 = off, 1 = sometimes, 2 = almost always */
|
||||
bool roseTransformDelay;
|
||||
u32 roseDesiredSplit;
|
||||
|
||||
bool earlyMcClellanPrefix;
|
||||
bool earlyMcClellanInfix;
|
||||
bool earlyMcClellanSuffix;
|
||||
|
||||
bool allowCountingMiracles;
|
||||
|
||||
bool allowSomChain;
|
||||
u32 somMaxRevNfaLength;
|
||||
|
||||
bool hamsterAccelForward;
|
||||
bool hamsterAccelReverse; // currently not implemented
|
||||
|
||||
u32 miracleHistoryBonus; /* cheap hack to make miracles better, TODO
|
||||
* something dignified */
|
||||
|
||||
bool equivalenceEnable;
|
||||
|
||||
// SmallWrite engine
|
||||
bool allowSmallWrite;
|
||||
u32 smallWriteLargestBuffer; // largest buffer that can be small write
|
||||
u32 smallWriteLargestBufferBad;// largest buffer that can be small write
|
||||
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
|
||||
|
||||
enum DumpFlags {
|
||||
DUMP_NONE = 0,
|
||||
DUMP_BASICS = 1 << 0, // Dump basic textual data
|
||||
DUMP_PARSE = 1 << 1, // Dump component tree to .txt
|
||||
DUMP_INT_GRAPH = 1 << 2, // Dump non-implementation graphs
|
||||
DUMP_IMPL = 1 << 3 // Dump implementation graphs
|
||||
};
|
||||
|
||||
u32 dumpFlags;
|
||||
std::string dumpPath;
|
||||
|
||||
/* Resource limits. These are somewhat arbitrary, but are intended to bound
|
||||
* the input to many of our internal structures. Exceeding one of these
|
||||
* limits will cause an error to be returned to the user.
|
||||
*
|
||||
* NOTE: Raising these limitations make cause smoke to come out of parts of
|
||||
* the runtime. */
|
||||
|
||||
u32 limitPatternCount; //!< max number of patterns
|
||||
u32 limitPatternLength; //!< max number of characters in a regex
|
||||
u32 limitGraphVertices; //!< max number of states in built NFA graph
|
||||
u32 limitGraphEdges; //!< max number of edges in build NFA graph
|
||||
u32 limitReportCount; //!< max number of ReportIDs allocated internally
|
||||
|
||||
// HWLM literal matcher limits.
|
||||
u32 limitLiteralCount; //!< max number of literals in an HWLM table
|
||||
u32 limitLiteralLength; //!< max number of characters in a literal
|
||||
u32 limitLiteralMatcherChars; //!< max characters in an HWLM literal matcher
|
||||
u32 limitLiteralMatcherSize; //!< max size of an HWLM matcher (in bytes)
|
||||
|
||||
// Rose limits.
|
||||
u32 limitRoseRoleCount; //!< max number of Rose roles
|
||||
u32 limitRoseEngineCount; //!< max prefix/infix/suffix/outfix engines
|
||||
u32 limitRoseAnchoredSize; //!< max total size of anchored DFAs (bytes)
|
||||
|
||||
// Engine (DFA/NFA/etc) limits.
|
||||
u32 limitEngineSize; //!< max size of an engine (in bytes)
|
||||
u32 limitDFASize; //!< max size of a DFA (in bytes)
|
||||
u32 limitNFASize; //!< max size of an NFA (in bytes)
|
||||
u32 limitLBRSize; //!< max size of an LBR engine (in bytes)
|
||||
};
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
#include <string>
|
||||
void applyGreyOverrides(Grey *g, const std::string &overrides);
|
||||
#endif
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
419
src/hs.cpp
Normal file
419
src/hs.cpp
Normal file
@ -0,0 +1,419 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Compiler front-end, including public API calls for compilation.
|
||||
*/
|
||||
#include "allocator.h"
|
||||
#include "ue2common.h"
|
||||
#include "grey.h"
|
||||
#include "hs_compile.h"
|
||||
#include "hs_internal.h"
|
||||
#include "database.h"
|
||||
#include "compiler/compiler.h"
|
||||
#include "compiler/error.h"
|
||||
#include "nfagraph/ng.h"
|
||||
#include "nfagraph/ng_expr_info.h"
|
||||
#include "parser/parse_error.h"
|
||||
#include "parser/Parser.h"
|
||||
#include "parser/prefilter.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/cpuid_flags.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/popcount.h"
|
||||
#include "util/target_info.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <limits.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
using namespace ue2;
|
||||
|
||||
/** \brief Cheap check that no unexpected mode flags are on. */
|
||||
static
|
||||
bool validModeFlags(unsigned int mode) {
|
||||
static const unsigned allModeFlags = HS_MODE_BLOCK
|
||||
| HS_MODE_STREAM
|
||||
| HS_MODE_VECTORED
|
||||
| HS_MODE_SOM_HORIZON_LARGE
|
||||
| HS_MODE_SOM_HORIZON_MEDIUM
|
||||
| HS_MODE_SOM_HORIZON_SMALL;
|
||||
|
||||
return !(mode & ~allModeFlags);
|
||||
}
|
||||
|
||||
/** \brief Validate mode flags. */
|
||||
static
|
||||
bool checkMode(unsigned int mode, hs_compile_error **comp_error) {
|
||||
// First, check that only bits with meaning are on.
|
||||
if (!validModeFlags(mode)) {
|
||||
*comp_error = generateCompileError("Invalid parameter: "
|
||||
"unrecognised mode flags.", -1);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Our mode must be ONE of (block, streaming, vectored).
|
||||
unsigned checkmode
|
||||
= mode & (HS_MODE_STREAM | HS_MODE_BLOCK | HS_MODE_VECTORED);
|
||||
if (popcount32(checkmode) != 1) {
|
||||
*comp_error = generateCompileError(
|
||||
"Invalid parameter: mode must have one "
|
||||
"(and only one) of HS_MODE_BLOCK, HS_MODE_STREAM or "
|
||||
"HS_MODE_VECTORED set.",
|
||||
-1);
|
||||
return false;
|
||||
}
|
||||
|
||||
// If you specify SOM precision, you must be in streaming mode and you only
|
||||
// get to have one.
|
||||
unsigned somMode = mode & (HS_MODE_SOM_HORIZON_LARGE |
|
||||
HS_MODE_SOM_HORIZON_MEDIUM |
|
||||
HS_MODE_SOM_HORIZON_SMALL);
|
||||
if (somMode) {
|
||||
if (!(mode & HS_MODE_STREAM)) {
|
||||
*comp_error = generateCompileError("Invalid parameter: the "
|
||||
"HS_MODE_SOM_HORIZON_ mode flags may only be set in "
|
||||
"streaming mode.", -1);
|
||||
return false;
|
||||
|
||||
}
|
||||
if ((somMode & (somMode - 1)) != 0) {
|
||||
*comp_error = generateCompileError("Invalid parameter: only one "
|
||||
"HS_MODE_SOM_HORIZON_ mode flag can be set.", -1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) {
|
||||
#define HS_TUNE_LAST HS_TUNE_FAMILY_BDW
|
||||
#define HS_CPU_FEATURES_ALL (HS_CPU_FEATURES_AVX2)
|
||||
|
||||
if (!p) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (p->cpu_features & ~HS_CPU_FEATURES_ALL) {
|
||||
*comp_error = generateCompileError("Invalid cpu features specified in "
|
||||
"the platform information.", -1);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (p->tune > HS_TUNE_LAST) {
|
||||
*comp_error = generateCompileError("Invalid tuning value specified in "
|
||||
"the platform information.", -1);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \brief Convert from SOM mode to bytes of precision. */
|
||||
static
|
||||
unsigned getSomPrecision(unsigned mode) {
|
||||
if (mode & HS_MODE_VECTORED) {
|
||||
/* always assume full precision for vectoring */
|
||||
return 8;
|
||||
}
|
||||
|
||||
if (mode & HS_MODE_SOM_HORIZON_LARGE) {
|
||||
return 8;
|
||||
} else if (mode & HS_MODE_SOM_HORIZON_MEDIUM) {
|
||||
return 4;
|
||||
} else if (mode & HS_MODE_SOM_HORIZON_SMALL) {
|
||||
return 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
hs_error_t
|
||||
hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
|
||||
const unsigned *ids, const hs_expr_ext *const *ext,
|
||||
unsigned elements, unsigned mode,
|
||||
const hs_platform_info_t *platform, hs_database_t **db,
|
||||
hs_compile_error_t **comp_error, const Grey &g) {
|
||||
// Check the args: note that it's OK for flags, ids or ext to be null.
|
||||
if (!comp_error) {
|
||||
if (db) {
|
||||
*db = nullptr;
|
||||
}
|
||||
// nowhere to write the string, but we can still report an error code
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
if (!db) {
|
||||
*comp_error = generateCompileError("Invalid parameter: db is NULL", -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
if (!expressions) {
|
||||
*db = nullptr;
|
||||
*comp_error
|
||||
= generateCompileError("Invalid parameter: expressions is NULL",
|
||||
-1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
if (elements == 0) {
|
||||
*db = nullptr;
|
||||
*comp_error = generateCompileError("Invalid parameter: elements is zero", -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!checkMode(mode, comp_error)) {
|
||||
*db = nullptr;
|
||||
assert(*comp_error); // set by checkMode.
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!checkPlatform(platform, comp_error)) {
|
||||
*db = nullptr;
|
||||
assert(*comp_error); // set by checkPlatform.
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (elements > g.limitPatternCount) {
|
||||
*db = nullptr;
|
||||
*comp_error = generateCompileError("Number of patterns too large", -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
// This function is simply a wrapper around both the parser and compiler
|
||||
bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
|
||||
bool isVectored = mode & HS_MODE_VECTORED;
|
||||
unsigned somPrecision = getSomPrecision(mode);
|
||||
|
||||
target_t target_info = platform ? target_t(*platform)
|
||||
: get_current_target();
|
||||
|
||||
CompileContext cc(isStreaming, isVectored, target_info, g);
|
||||
NG ng(cc, somPrecision);
|
||||
|
||||
try {
|
||||
for (unsigned int i = 0; i < elements; i++) {
|
||||
// Add this expression to the compiler
|
||||
try {
|
||||
addExpression(ng, i, expressions[i], flags ? flags[i] : 0,
|
||||
ext ? ext[i] : nullptr, ids ? ids[i] : 0);
|
||||
} catch (CompileError &e) {
|
||||
/* Caught a parse error:
|
||||
* throw it upstream as a CompileError with a specific index */
|
||||
e.setExpressionIndex(i);
|
||||
throw; /* do not slice */
|
||||
}
|
||||
}
|
||||
|
||||
unsigned length = 0;
|
||||
struct hs_database *out = build(ng, &length);
|
||||
|
||||
assert(out); // should have thrown exception on error
|
||||
assert(length);
|
||||
|
||||
*db = out;
|
||||
*comp_error = nullptr;
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
catch (const CompileError &e) {
|
||||
// Compiler error occurred
|
||||
*db = nullptr;
|
||||
*comp_error = generateCompileError(e.reason,
|
||||
e.hasIndex ? (int)e.index : -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
catch (std::bad_alloc) {
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<hs_compile_error_t *>(&hs_enomem);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
catch (...) {
|
||||
assert(!"Internal error, unexpected exception");
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<hs_compile_error_t *>(&hs_einternal);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_compile(const char *expression, unsigned flags, unsigned mode,
|
||||
const hs_platform_info_t *platform, hs_database_t **db,
|
||||
hs_compile_error_t **error) {
|
||||
if (expression == nullptr) {
|
||||
*db = nullptr;
|
||||
*error = generateCompileError("Invalid parameter: expression is NULL",
|
||||
-1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
unsigned id = 0; // single expressions get zero as an ID
|
||||
const hs_expr_ext * const *ext = nullptr; // unused for this call.
|
||||
|
||||
return hs_compile_multi_int(&expression, &flags, &id, ext, 1, mode,
|
||||
platform, db, error, Grey());
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_compile_multi(const char * const *expressions,
|
||||
const unsigned *flags, const unsigned *ids,
|
||||
unsigned elements, unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db, hs_compile_error_t **error) {
|
||||
const hs_expr_ext * const *ext = nullptr; // unused for this call.
|
||||
return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
|
||||
platform, db, error, Grey());
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_compile_ext_multi(const char * const *expressions,
|
||||
const unsigned *flags, const unsigned *ids,
|
||||
const hs_expr_ext * const *ext,
|
||||
unsigned elements, unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db,
|
||||
hs_compile_error_t **error) {
|
||||
return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
|
||||
platform, db, error, Grey());
|
||||
}
|
||||
|
||||
static
|
||||
hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
||||
unsigned int mode, hs_expr_info_t **info,
|
||||
hs_compile_error_t **error) {
|
||||
if (!error) {
|
||||
// nowhere to write an error, but we can still return an error code.
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!info) {
|
||||
*error = generateCompileError("Invalid parameter: info is NULL", -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!expression) {
|
||||
*error = generateCompileError("Invalid parameter: expression is NULL",
|
||||
-1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
*info = nullptr;
|
||||
*error = nullptr;
|
||||
|
||||
hs_expr_info local_info;
|
||||
memset(&local_info, 0, sizeof(local_info));
|
||||
|
||||
try {
|
||||
bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
|
||||
bool isVectored = mode & HS_MODE_VECTORED;
|
||||
|
||||
CompileContext cc(isStreaming, isVectored, get_current_target(),
|
||||
Grey());
|
||||
|
||||
// Ensure that our pattern isn't too long (in characters).
|
||||
if (strlen(expression) > cc.grey.limitPatternLength) {
|
||||
throw ParseError("Pattern length exceeds limit.");
|
||||
}
|
||||
|
||||
ReportManager rm(cc.grey);
|
||||
ParsedExpression pe(0, expression, flags, 0);
|
||||
assert(pe.component);
|
||||
|
||||
// Apply prefiltering transformations if desired.
|
||||
if (pe.prefilter) {
|
||||
prefilterTree(pe.component, ParseMode(flags));
|
||||
}
|
||||
|
||||
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, pe);
|
||||
|
||||
if (!g) {
|
||||
DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n");
|
||||
throw ParseError("Internal error.");
|
||||
}
|
||||
|
||||
fillExpressionInfo(rm, *g, &local_info);
|
||||
}
|
||||
catch (const CompileError &e) {
|
||||
// Compiler error occurred
|
||||
*error = generateCompileError(e);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
catch (std::bad_alloc) {
|
||||
*error = const_cast<hs_compile_error_t *>(&hs_enomem);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
catch (...) {
|
||||
assert(!"Internal error, unexpected exception");
|
||||
*error = const_cast<hs_compile_error_t *>(&hs_einternal);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
hs_expr_info *rv = (hs_expr_info *)hs_misc_alloc(sizeof(*rv));
|
||||
if (!rv) {
|
||||
*error = const_cast<hs_compile_error_t *>(&hs_enomem);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
*rv = local_info;
|
||||
*info = rv;
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error) {
|
||||
return hs_expression_info_int(expression, flags, HS_MODE_BLOCK, info,
|
||||
error);
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_populate_platform(hs_platform_info_t *platform) {
|
||||
if (!platform) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
memset(platform, 0, sizeof(*platform));
|
||||
|
||||
platform->cpu_features = cpuid_flags();
|
||||
platform->tune = cpuid_tune();
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_free_compile_error(hs_compile_error_t *error) {
|
||||
freeCompileError(error);
|
||||
return HS_SUCCESS;
|
||||
}
|
45
src/hs.h
Normal file
45
src/hs.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HS_H_
|
||||
#define HS_H_
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The complete Hyperscan API definition.
|
||||
*
|
||||
* Hyperscan is a high speed regular expression engine.
|
||||
*
|
||||
* This header includes both the Hyperscan compiler and runtime components. See
|
||||
* the individual component headers for documentation.
|
||||
*/
|
||||
|
||||
#include "hs_compile.h"
|
||||
#include "hs_runtime.h"
|
||||
|
||||
#endif /* HS_H_ */
|
509
src/hs_common.h
Normal file
509
src/hs_common.h
Normal file
@ -0,0 +1,509 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HS_COMMON_H_
|
||||
#define HS_COMMON_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The Hyperscan common API definition.
|
||||
*
|
||||
* Hyperscan is a high speed regular expression engine.
|
||||
*
|
||||
* This header contains functions available to both the Hyperscan compiler and
|
||||
* runtime.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
struct hs_database;
|
||||
|
||||
/**
|
||||
* A Hyperscan pattern database.
|
||||
*
|
||||
* Generated by one of the Hyperscan compiler functions:
|
||||
* - @ref hs_compile()
|
||||
* - @ref hs_compile_multi()
|
||||
* - @ref hs_compile_ext_multi()
|
||||
*/
|
||||
typedef struct hs_database hs_database_t;
|
||||
|
||||
/**
|
||||
* A type for errors returned by Hyperscan functions.
|
||||
*/
|
||||
typedef int hs_error_t;
|
||||
|
||||
/**
|
||||
* Free a compiled pattern database.
|
||||
*
|
||||
* The free callback set by @ref hs_set_database_allocator() (or @ref
|
||||
* hs_set_allocator()) will be used by this function.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database. NULL may also be safely provided, in which
|
||||
* case the function does nothing.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_free_database(hs_database_t *db);
|
||||
|
||||
/**
|
||||
* Serialize a pattern database to a stream of bytes.
|
||||
*
|
||||
* The allocator callback set by @ref hs_set_misc_allocator() (or @ref
|
||||
* hs_set_allocator()) will be used by this function.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database.
|
||||
*
|
||||
* @param bytes
|
||||
* On success, a pointer to an array of bytes will be returned here.
|
||||
* These bytes can be subsequently relocated or written to disk. The
|
||||
* caller is responsible for freeing this block.
|
||||
*
|
||||
* @param length
|
||||
* On success, the number of bytes in the generated byte array will be
|
||||
* returned here.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, @ref HS_NOMEM if the byte array cannot be
|
||||
* allocated, other values may be returned if errors are detected.
|
||||
*/
|
||||
hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes,
|
||||
size_t *length);
|
||||
|
||||
/**
|
||||
* Reconstruct a pattern database from a stream of bytes previously generated
|
||||
* by @ref hs_serialize_database().
|
||||
*
|
||||
* This function will allocate sufficient space for the database using the
|
||||
* allocator set with @ref hs_set_database_allocator() (or @ref
|
||||
* hs_set_allocator()); to use a pre-allocated region of memory, use the @ref
|
||||
* hs_deserialize_database_at() function.
|
||||
*
|
||||
* @param bytes
|
||||
* A byte array generated by @ref hs_serialize_database() representing a
|
||||
* compiled pattern database.
|
||||
*
|
||||
* @param length
|
||||
* The length of the byte array generated by @ref hs_serialize_database().
|
||||
* This should be the same value as that returned by @ref
|
||||
* hs_serialize_database().
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to a newly allocated @ref hs_database_t will be
|
||||
* returned here. This database can then be used for scanning, and
|
||||
* eventually freed by the caller using @ref hs_free_database().
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_deserialize_database(const char *bytes, const size_t length,
|
||||
hs_database_t **db);
|
||||
|
||||
/**
|
||||
* Reconstruct a pattern database from a stream of bytes previously generated
|
||||
* by @ref hs_serialize_database() at a given memory location.
|
||||
*
|
||||
* This function (unlike @ref hs_deserialize_database()) will write the
|
||||
* reconstructed database to the memory location given in the @a db parameter.
|
||||
* The amount of space required at this location can be determined with the
|
||||
* @ref hs_serialized_database_size() function.
|
||||
*
|
||||
* @param bytes
|
||||
* A byte array generated by @ref hs_serialize_database() representing a
|
||||
* compiled pattern database.
|
||||
*
|
||||
* @param length
|
||||
* The length of the byte array generated by @ref hs_serialize_database().
|
||||
* This should be the same value as that returned by @ref
|
||||
* hs_serialize_database().
|
||||
*
|
||||
* @param db
|
||||
* Pointer to an 8-byte aligned block of memory of sufficient size to hold
|
||||
* the deserialized database. On success, the reconstructed database will
|
||||
* be written to this location. This database can then be used for pattern
|
||||
* matching. The user is responsible for freeing this memory; the @ref
|
||||
* hs_free_database() call should not be used.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length,
|
||||
hs_database_t *db);
|
||||
|
||||
/**
|
||||
* Provides the size of the stream state allocated by a single stream opened
|
||||
* against the given database.
|
||||
*
|
||||
* @param database
|
||||
* Pointer to a compiled (streaming mode) pattern database.
|
||||
*
|
||||
* @param stream_size
|
||||
* On success, the size in bytes of an individual stream opened against the
|
||||
* given database is placed in this parameter.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_stream_size(const hs_database_t *database, size_t *stream_size);
|
||||
|
||||
/**
|
||||
* Provides the size of the given database in bytes.
|
||||
*
|
||||
* @param database
|
||||
* Pointer to compiled pattern database.
|
||||
*
|
||||
* @param database_size
|
||||
* On success, the size of the compiled database in bytes is placed in this
|
||||
* parameter.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_database_size(const hs_database_t *database,
|
||||
size_t *database_size);
|
||||
|
||||
/**
|
||||
* Utility function for reporting the size that would be required by a
|
||||
* database if it were deserialized.
|
||||
*
|
||||
* This can be used to allocate a shared memory region or other "special"
|
||||
* allocation prior to deserializing with the @ref hs_deserialize_database_at()
|
||||
* function.
|
||||
*
|
||||
* @param bytes
|
||||
* Pointer to a byte array generated by @ref hs_serialize_database()
|
||||
* representing a compiled pattern database.
|
||||
*
|
||||
* @param length
|
||||
* The length of the byte array generated by @ref hs_serialize_database().
|
||||
* This should be the same value as that returned by @ref
|
||||
* hs_serialize_database().
|
||||
*
|
||||
* @param deserialized_size
|
||||
* On success, the size of the compiled database that would be generated
|
||||
* by @ref hs_deserialize_database_at() is returned here.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_serialized_database_size(const char *bytes, const size_t length,
|
||||
size_t *deserialized_size);
|
||||
|
||||
/**
|
||||
* Utility function providing information about a database.
|
||||
*
|
||||
* @param database
|
||||
* Pointer to a compiled database.
|
||||
*
|
||||
* @param info
|
||||
* On success, a string containing the version and platform information for
|
||||
* the supplied database is placed in the parameter. The string is
|
||||
* allocated using the allocator supplied in @ref hs_set_misc_allocator()
|
||||
* (or malloc() if no allocator was set) and should be freed by the caller.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_database_info(const hs_database_t *database, char **info);
|
||||
|
||||
/**
|
||||
* Utility function providing information about a serialized database.
|
||||
*
|
||||
* @param bytes
|
||||
* Pointer to a serialized database.
|
||||
*
|
||||
* @param length
|
||||
* Length in bytes of the serialized database.
|
||||
*
|
||||
* @param info
|
||||
* On success, a string containing the version and platform information
|
||||
* for the supplied serialized database is placed in the parameter. The
|
||||
* string is allocated using the allocator supplied in @ref
|
||||
* hs_set_misc_allocator() (or malloc() if no allocator was set) and
|
||||
* should be freed by the caller.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
|
||||
char **info);
|
||||
|
||||
/**
|
||||
* The type of the callback function that will be used by Hyperscan to allocate
|
||||
* more memory at runtime as required, for example in @ref hs_open_stream() to
|
||||
* allocate stream state.
|
||||
*
|
||||
* If Hyperscan is to be used in a multi-threaded, or similarly concurrent
|
||||
* environment, the allocation function will need to be re-entrant, or
|
||||
* similarly safe for concurrent use.
|
||||
*
|
||||
* @param size
|
||||
* The number of bytes to allocate.
|
||||
* @return
|
||||
* A pointer to the region of memory allocated, or NULL on error.
|
||||
*/
|
||||
typedef void *(*hs_alloc_t)(size_t size);
|
||||
|
||||
/**
|
||||
* The type of the callback function that will be used by Hyperscan to free
|
||||
* memory regions previously allocated using the @ref hs_alloc_t function.
|
||||
*
|
||||
* @param ptr
|
||||
* The region of memory to be freed.
|
||||
*/
|
||||
typedef void (*hs_free_t)(void *ptr);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Hyperscan for allocating
|
||||
* memory at runtime for stream state, scratch space, database bytecode,
|
||||
* and various other data structure returned by the Hyperscan API.
|
||||
*
|
||||
* The function is equivalent to calling @ref hs_set_stream_allocator(),
|
||||
* @ref hs_set_scratch_allocator(), @ref hs_set_database_allocator() and
|
||||
* @ref hs_set_misc_allocator() with the provided parameters.
|
||||
*
|
||||
* This call will override any previous allocators that have been set.
|
||||
*
|
||||
* Note: there is no way to change the allocator used for temporary objects
|
||||
* created during the various compile calls (@ref hs_compile(), @ref
|
||||
* hs_compile_multi(), @ref hs_compile_ext_multi()).
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_set_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Hyperscan for allocating memory
|
||||
* for database bytecode produced by the compile calls (@ref hs_compile(), @ref
|
||||
* hs_compile_multi(), @ref hs_compile_ext_multi()) and by database
|
||||
* deserialization (@ref hs_deserialize_database()).
|
||||
*
|
||||
* If no database allocation functions are set, or if NULL is used in place of
|
||||
* both parameters, then memory allocation will default to standard methods
|
||||
* (such as the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous database allocators that have been set.
|
||||
*
|
||||
* Note: the database allocator may also be set by calling @ref
|
||||
* hs_set_allocator().
|
||||
*
|
||||
* Note: there is no way to change how temporary objects created during the
|
||||
* various compile calls (@ref hs_compile(), @ref hs_compile_multi(), @ref
|
||||
* hs_compile_ext_multi()) are allocated.
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_set_database_allocator(hs_alloc_t alloc_func,
|
||||
hs_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Hyperscan for allocating memory
|
||||
* for items returned by the Hyperscan API such as @ref hs_compile_error_t, @ref
|
||||
* hs_expr_info_t and serialized databases.
|
||||
*
|
||||
* If no misc allocation functions are set, or if NULL is used in place of both
|
||||
* parameters, then memory allocation will default to standard methods (such as
|
||||
* the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous misc allocators that have been set.
|
||||
*
|
||||
* Note: the misc allocator may also be set by calling @ref hs_set_allocator().
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_set_misc_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Hyperscan for allocating memory
|
||||
* for scratch space by @ref hs_alloc_scratch() and @ref hs_clone_scratch().
|
||||
*
|
||||
* If no scratch allocation functions are set, or if NULL is used in place of
|
||||
* both parameters, then memory allocation will default to standard methods
|
||||
* (such as the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous scratch allocators that have been set.
|
||||
*
|
||||
* Note: the scratch allocator may also be set by calling @ref
|
||||
* hs_set_allocator().
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_set_scratch_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Hyperscan for allocating memory
|
||||
* for stream state by @ref hs_open_stream().
|
||||
*
|
||||
* If no stream allocation functions are set, or if NULL is used in place of
|
||||
* both parameters, then memory allocation will default to standard methods
|
||||
* (such as the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous stream allocators that have been set.
|
||||
*
|
||||
* Note: the stream allocator may also be set by calling @ref
|
||||
* hs_set_allocator().
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_set_stream_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
||||
|
||||
/**
|
||||
* Utility function for identifying this release version.
|
||||
*
|
||||
* @return
|
||||
* A string containing the version number of this release build and the
|
||||
* date of the build. It is allocated statically, so it does not need to
|
||||
* be freed by the caller.
|
||||
*/
|
||||
const char *hs_version(void);
|
||||
|
||||
/**
|
||||
* @defgroup HS_ERROR hs_error_t values
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* The engine completed normally.
|
||||
*/
|
||||
#define HS_SUCCESS 0
|
||||
|
||||
/**
|
||||
* A parameter passed to this function was invalid.
|
||||
*/
|
||||
#define HS_INVALID (-1)
|
||||
|
||||
/**
|
||||
* A memory allocation failed.
|
||||
*/
|
||||
#define HS_NOMEM (-2)
|
||||
|
||||
/**
|
||||
* The engine was terminated by callback.
|
||||
*
|
||||
* This return value indicates that the target buffer was partially scanned,
|
||||
* but that the callback function requested that scanning cease after a match
|
||||
* was located.
|
||||
*/
|
||||
#define HS_SCAN_TERMINATED (-3)
|
||||
|
||||
/**
|
||||
* The pattern compiler failed, and the @ref hs_compile_error_t should be
|
||||
* inspected for more detail.
|
||||
*/
|
||||
#define HS_COMPILER_ERROR (-4)
|
||||
|
||||
/**
|
||||
* The given database was built for a different version of Hyperscan.
|
||||
*/
|
||||
#define HS_DB_VERSION_ERROR (-5)
|
||||
|
||||
/**
|
||||
* The given database was built for a different platform (i.e., CPU type).
|
||||
*/
|
||||
#define HS_DB_PLATFORM_ERROR (-6)
|
||||
|
||||
/**
|
||||
* The given database was built for a different mode of operation. This error
|
||||
* is returned when streaming calls are used with a block or vectored database
|
||||
* and vice versa.
|
||||
*/
|
||||
#define HS_DB_MODE_ERROR (-7)
|
||||
|
||||
/**
|
||||
* A parameter passed to this function was not correctly aligned.
|
||||
*/
|
||||
#define HS_BAD_ALIGN (-8)
|
||||
|
||||
/**
|
||||
* The memory allocator (either malloc() or the allocator set with @ref
|
||||
* hs_set_allocator()) did not correctly return memory suitably aligned for the
|
||||
* largest representable data type on this platform.
|
||||
*/
|
||||
#define HS_BAD_ALLOC (-9)
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* HS_COMMON_H_ */
|
848
src/hs_compile.h
Normal file
848
src/hs_compile.h
Normal file
@ -0,0 +1,848 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HS_COMPILE_H_
|
||||
#define HS_COMPILE_H_
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The Hyperscan compiler API definition.
|
||||
*
|
||||
* Hyperscan is a high speed regular expression engine.
|
||||
*
|
||||
* This header contains functions for compiling regular expressions into
|
||||
* Hyperscan databases that can be used by the Hyperscan runtime.
|
||||
*/
|
||||
|
||||
#include "hs_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* A type containing error details that is returned by the compile calls (@ref
|
||||
* hs_compile(), @ref hs_compile_multi() and @ref hs_compile_ext_multi()) on
|
||||
* failure. The caller may inspect the values returned in this type to
|
||||
* determine the cause of failure.
|
||||
*
|
||||
* Common errors generated during the compile process include:
|
||||
*
|
||||
* - *Invalid parameter*
|
||||
*
|
||||
* An invalid argument was specified in the compile call.
|
||||
*
|
||||
* - *Unrecognised flag*
|
||||
*
|
||||
* An unrecognised value was passed in the flags argument.
|
||||
*
|
||||
* - *Pattern matches empty buffer*
|
||||
*
|
||||
* By default, Hyperscan only supports patterns that will *always*
|
||||
* consume at least one byte of input. Patterns that do not have this
|
||||
* property (such as `/(abc)?/`) will produce this error unless
|
||||
* the @ref HS_FLAG_ALLOWEMPTY flag is supplied. Note that such
|
||||
* patterns will produce a match for *every* byte when scanned.
|
||||
*
|
||||
* - *Embedded anchors not supported*
|
||||
*
|
||||
* Hyperscan only supports the use of anchor meta-characters (such as
|
||||
* `^` and `$`) in patterns where they could *only* match
|
||||
* at the start or end of a buffer. A pattern containing an embedded
|
||||
* anchor, such as `/abc^def/`, can never match, as there is no
|
||||
* way for `abc` to precede the start of the data stream.
|
||||
*
|
||||
* - *Bounded repeat is too large*
|
||||
*
|
||||
* The pattern contains a repeated construct with very large finite
|
||||
* bounds.
|
||||
*
|
||||
* - *Unsupported component type*
|
||||
*
|
||||
* An unsupported PCRE construct was used in the pattern.
|
||||
*
|
||||
* - *Unable to generate bytecode*
|
||||
*
|
||||
* This error indicates that Hyperscan was unable to compile a pattern
|
||||
* that is syntactically valid. The most common cause is a pattern that is
|
||||
* very long and complex or contains a large repeated subpattern.
|
||||
*
|
||||
* - *Unable to allocate memory*
|
||||
*
|
||||
* The library was unable to allocate temporary storage used during
|
||||
* compilation time.
|
||||
*
|
||||
* - *Internal error*
|
||||
*
|
||||
* An unexpected error occurred: if this error is reported, please contact
|
||||
* the Hyperscan team with a description of the situation.
|
||||
*/
|
||||
typedef struct hs_compile_error {
|
||||
/**
|
||||
* A human-readable error message describing the error.
|
||||
*/
|
||||
char *message;
|
||||
|
||||
/**
|
||||
* The zero-based number of the expression that caused the error (if this
|
||||
* can be determined). If the error is not specific to an expression, then
|
||||
* this value will be less than zero.
|
||||
*/
|
||||
int expression;
|
||||
} hs_compile_error_t;
|
||||
|
||||
/**
|
||||
* A type containing information on the target platform which may optionally be
|
||||
* provided to the compile calls (@ref hs_compile(), @ref hs_compile_multi(),
|
||||
* @ref hs_compile_ext_multi()).
|
||||
*
|
||||
* A hs_platform_info structure may be populated for the current platform by
|
||||
* using the @ref hs_populate_platform() call.
|
||||
*/
|
||||
typedef struct hs_platform_info {
|
||||
/**
|
||||
* Information about the target platform which may be used to guide the
|
||||
* optimisation process of the compile.
|
||||
*
|
||||
* Use of this field does not limit the processors that the resulting
|
||||
* database can run on, but may impact the performance of the resulting
|
||||
* database.
|
||||
*/
|
||||
unsigned int tune;
|
||||
|
||||
/**
|
||||
* Relevant CPU features available on the target platform
|
||||
*
|
||||
* This value may be produced by combining HS_CPU_FEATURE_* flags (such as
|
||||
* @ref HS_CPU_FEATURES_AVX2). Multiple CPU features may be or'ed together
|
||||
* to produce the value.
|
||||
*/
|
||||
unsigned long long cpu_features;
|
||||
|
||||
/**
|
||||
* Reserved for future use.
|
||||
*/
|
||||
unsigned long long reserved1;
|
||||
|
||||
/**
|
||||
* Reserved for future use.
|
||||
*/
|
||||
unsigned long long reserved2;
|
||||
} hs_platform_info_t;
|
||||
|
||||
/**
|
||||
* A type containing information related to an expression that is returned by
|
||||
* @ref hs_expression_info().
|
||||
*/
|
||||
typedef struct hs_expr_info {
|
||||
/**
|
||||
* The minimum length in bytes of a match for the pattern.
|
||||
*/
|
||||
unsigned int min_width;
|
||||
|
||||
/**
|
||||
* The maximum length in bytes of a match for the pattern. If the pattern
|
||||
* has an unbounded maximum width, this will be set to the maximum value of
|
||||
* an unsigned int (UINT_MAX).
|
||||
*/
|
||||
unsigned int max_width;
|
||||
|
||||
/**
|
||||
* Whether this expression can produce matches that are not returned in
|
||||
* order, such as those produced by assertions. Zero if false, non-zero if
|
||||
* true.
|
||||
*/
|
||||
char unordered_matches;
|
||||
|
||||
/**
|
||||
* Whether this expression can produce matches at end of data (EOD). In
|
||||
* streaming mode, EOD matches are raised during @ref hs_close_stream(),
|
||||
* since it is only when @ref hs_close_stream() is called that the EOD
|
||||
* location is known. Zero if false, non-zero if true.
|
||||
*
|
||||
* Note: trailing `\b` word boundary assertions may also result in EOD
|
||||
* matches as end-of-data can act as a word boundary.
|
||||
*/
|
||||
char matches_at_eod;
|
||||
|
||||
/**
|
||||
* Whether this expression can *only* produce matches at end of data (EOD).
|
||||
* In streaming mode, all matches for this expression are raised during
|
||||
* @ref hs_close_stream(). Zero if false, non-zero if true.
|
||||
*/
|
||||
char matches_only_at_eod;
|
||||
} hs_expr_info_t;
|
||||
|
||||
/**
|
||||
* A structure containing additional parameters related to an expression,
|
||||
* passed in at build time to @ref hs_compile_ext_multi().
|
||||
*
|
||||
* These parameters allow the set of matches produced by a pattern to be
|
||||
* constrained at compile time, rather than relying on the application to
|
||||
* process unwanted matches at runtime.
|
||||
*/
|
||||
typedef struct hs_expr_ext {
|
||||
/**
|
||||
* Flags governing which parts of this structure are to be used by the
|
||||
* compiler. See @ref HS_EXT_FLAG.
|
||||
*/
|
||||
unsigned long long flags;
|
||||
|
||||
/**
|
||||
* The minimum end offset in the data stream at which this expression
|
||||
* should match successfully. To use this parameter, set the
|
||||
* @ref HS_EXT_FLAG_MIN_OFFSET flag in the hs_expr_ext::flags field.
|
||||
*/
|
||||
unsigned long long min_offset;
|
||||
|
||||
/**
|
||||
* The maximum end offset in the data stream at which this expression
|
||||
* should match successfully. To use this parameter, set the
|
||||
* @ref HS_EXT_FLAG_MAX_OFFSET flag in the hs_expr_ext::flags field.
|
||||
*/
|
||||
unsigned long long max_offset;
|
||||
|
||||
/**
|
||||
* The minimum match length (from start to end) required to successfully
|
||||
* match this expression. To use this parameter, set the
|
||||
* @ref HS_EXT_FLAG_MIN_LENGTH flag in the hs_expr_ext::flags field.
|
||||
*/
|
||||
unsigned long long min_length;
|
||||
} hs_expr_ext_t;
|
||||
|
||||
/**
|
||||
* @defgroup HS_EXT_FLAG hs_expr_ext_t flags
|
||||
*
|
||||
* These flags are used in @ref hs_expr_ext_t::flags to indicate which fields
|
||||
* are used.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** Flag indicating that the hs_expr_ext::min_offset field is used. */
|
||||
#define HS_EXT_FLAG_MIN_OFFSET 1ULL
|
||||
|
||||
/** Flag indicating that the hs_expr_ext::max_offset field is used. */
|
||||
#define HS_EXT_FLAG_MAX_OFFSET 2ULL
|
||||
|
||||
/** Flag indicating that the hs_expr_ext::min_length field is used. */
|
||||
#define HS_EXT_FLAG_MIN_LENGTH 4ULL
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* The basic regular expression compiler.
|
||||
*
|
||||
* This is the function call with which an expression is compiled into a
|
||||
* Hyperscan database which can be passed to the runtime functions (such as
|
||||
* @ref hs_scan(), @ref hs_open_stream(), etc.)
|
||||
*
|
||||
* @param expression
|
||||
* The NULL-terminated expression to parse. Note that this string must
|
||||
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
||||
* any global flags should be specified with the @a flags argument. For
|
||||
* example, the expression `/abc?def/i` should be compiled by providing
|
||||
* `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a
|
||||
* flags.
|
||||
*
|
||||
* @param flags
|
||||
* Flags which modify the behaviour of the expression. Multiple flags may
|
||||
* be used by ORing them together. Valid values are:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
|
||||
* expression per stream.
|
||||
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
|
||||
* empty string, such as `.*`.
|
||||
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - HS_FLAG_UCP - Use Unicode properties for character classes.
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flags that affect the database as a whole. One of @ref
|
||||
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
|
||||
* supplied, to select between the generation of a streaming, block or
|
||||
* vectored database. In addition, other flags (beginning with HS_MODE_)
|
||||
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
|
||||
* more details.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref hs_free_database() function.
|
||||
*
|
||||
* @param error
|
||||
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* hs_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||
* HS_COMPILER_ERROR on failure, with details provided in the error
|
||||
* parameter.
|
||||
*/
|
||||
hs_error_t hs_compile(const char *expression, unsigned int flags,
|
||||
unsigned int mode, const hs_platform_info_t *platform,
|
||||
hs_database_t **db, hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* The multiple regular expression compiler.
|
||||
*
|
||||
* This is the function call with which a set of expressions is compiled into a
|
||||
* database which can be passed to the runtime functions (such as @ref
|
||||
* hs_scan(), @ref hs_open_stream(), etc.) Each expression can be labelled with
|
||||
* a unique integer which is passed into the match callback to identify the
|
||||
* pattern that has matched.
|
||||
*
|
||||
* @param expressions
|
||||
* Array of NULL-terminated expressions to compile. Note that (as for @ref
|
||||
* hs_compile()) these strings must contain only the pattern to be
|
||||
* matched, with no delimiters or flags. For example, the expression
|
||||
* `/abc?def/i` should be compiled by providing `abc?def` as the first
|
||||
* string in the @a expressions array, and @ref HS_FLAG_CASELESS as the
|
||||
* first value in the @a flags array.
|
||||
*
|
||||
* @param flags
|
||||
* Array of flags which modify the behaviour of each expression. Multiple
|
||||
* flags may be used by ORing them together. Specifying the NULL pointer
|
||||
* in place of an array will set the flags value for all patterns to zero.
|
||||
* Valid values are:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns
|
||||
* with this match id per stream.
|
||||
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
|
||||
* empty string, such as `.*`.
|
||||
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - HS_FLAG_UCP - Use Unicode properties for character classes.
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
*
|
||||
* @param ids
|
||||
* An array of integers specifying the ID number to be associated with the
|
||||
* corresponding pattern in the expressions array. Specifying the NULL
|
||||
* pointer in place of an array will set the ID value for all patterns to
|
||||
* zero.
|
||||
*
|
||||
* @param elements
|
||||
* The number of elements in the input arrays.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flags that affect the database as a whole. One of @ref
|
||||
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
|
||||
* supplied, to select between the generation of a streaming, block or
|
||||
* vectored database. In addition, other flags (beginning with HS_MODE_)
|
||||
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
|
||||
* more details.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref hs_free_database() function.
|
||||
*
|
||||
* @param error
|
||||
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* hs_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||
* HS_COMPILER_ERROR on failure, with details provided in the @a error
|
||||
* parameter.
|
||||
*
|
||||
*/
|
||||
hs_error_t hs_compile_multi(const char *const *expressions,
|
||||
const unsigned int *flags, const unsigned int *ids,
|
||||
unsigned int elements, unsigned int mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db, hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* The multiple regular expression compiler with extended pattern support.
|
||||
*
|
||||
* This function call compiles a group of expressions into a database in the
|
||||
* same way as @ref hs_compile_multi(), but allows additional parameters to be
|
||||
* specified via an @ref hs_expr_ext_t structure per expression.
|
||||
*
|
||||
* @param expressions
|
||||
* Array of NULL-terminated expressions to compile. Note that (as for @ref
|
||||
* hs_compile()) these strings must contain only the pattern to be
|
||||
* matched, with no delimiters or flags. For example, the expression
|
||||
* `/abc?def/i` should be compiled by providing `abc?def` as the first
|
||||
* string in the @a expressions array, and @ref HS_FLAG_CASELESS as the
|
||||
* first value in the @a flags array.
|
||||
*
|
||||
* @param flags
|
||||
* Array of flags which modify the behaviour of each expression. Multiple
|
||||
* flags may be used by ORing them together. Specifying the NULL pointer
|
||||
* in place of an array will set the flags value for all patterns to zero.
|
||||
* Valid values are:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns
|
||||
* with this match id per stream.
|
||||
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
|
||||
* empty string, such as `.*`.
|
||||
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - HS_FLAG_UCP - Use Unicode properties for character classes.
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
*
|
||||
* @param ids
|
||||
* An array of integers specifying the ID number to be associated with the
|
||||
* corresponding pattern in the expressions array. Specifying the NULL
|
||||
* pointer in place of an array will set the ID value for all patterns to
|
||||
* zero.
|
||||
*
|
||||
* @param ext
|
||||
* An array of pointers to filled @ref hs_expr_ext_t structures that
|
||||
* define extended behaviour for each pattern. NULL may be specified if no
|
||||
* extended behaviour is needed for an individual pattern, or in place of
|
||||
* the whole array if it is not needed for any expressions. Memory used by
|
||||
* these structures must be both allocated and freed by the caller.
|
||||
*
|
||||
* @param elements
|
||||
* The number of elements in the input arrays.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flags that affect the database as a whole. One of @ref
|
||||
* HS_MODE_STREAM, @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
|
||||
* supplied, to select between the generation of a streaming, block or
|
||||
* vectored database. In addition, other flags (beginning with HS_MODE_)
|
||||
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
|
||||
* more details.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref hs_free_database() function.
|
||||
*
|
||||
* @param error
|
||||
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* hs_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||
* HS_COMPILER_ERROR on failure, with details provided in the @a error
|
||||
* parameter.
|
||||
*
|
||||
*/
|
||||
hs_error_t hs_compile_ext_multi(const char *const *expressions,
|
||||
const unsigned int *flags,
|
||||
const unsigned int *ids,
|
||||
const hs_expr_ext_t *const *ext,
|
||||
unsigned int elements, unsigned int mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db, hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* Free an error structure generated by @ref hs_compile(), @ref
|
||||
* hs_compile_multi() or @ref hs_compile_ext_multi().
|
||||
*
|
||||
* @param error
|
||||
* The @ref hs_compile_error_t to be freed. NULL may also be safely
|
||||
* provided.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_free_compile_error(hs_compile_error_t *error);
|
||||
|
||||
/**
|
||||
* Utility function providing information about a regular expression. The
|
||||
* information provided in @ref hs_expr_info_t includes the minimum and maximum
|
||||
* width of a pattern match.
|
||||
*
|
||||
* @param expression
|
||||
* The NULL-terminated expression to parse. Note that this string must
|
||||
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
||||
* any global flags should be specified with the @a flags argument. For
|
||||
* example, the expression `/abc?def/i` should be compiled by providing
|
||||
* `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a
|
||||
* flags.
|
||||
*
|
||||
* @param flags
|
||||
* Flags which modify the behaviour of the expression. Multiple flags may
|
||||
* be used by ORing them together. Valid values are:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by the
|
||||
* expression per stream.
|
||||
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
|
||||
* empty string, such as `.*`.
|
||||
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - HS_FLAG_UCP - Use Unicode properties for character classes.
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
*
|
||||
* @param info
|
||||
* On success, a pointer to the pattern information will be returned in
|
||||
* this parameter, or NULL on failure. This structure is allocated using
|
||||
* the allocator supplied in @ref hs_set_allocator() (or malloc() if no
|
||||
* allocator was set) and should be freed by the caller.
|
||||
*
|
||||
* @param error
|
||||
* If the call fails, a pointer to a @ref hs_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* hs_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||
* HS_COMPILER_ERROR on failure, with details provided in the error
|
||||
* parameter.
|
||||
*/
|
||||
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* Populates the platform information based on the current host.
|
||||
*
|
||||
* @param platform
|
||||
* On success, the pointed to structure is populated based on the current
|
||||
* host.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_populate_platform(hs_platform_info_t *platform);
|
||||
|
||||
/**
|
||||
* @defgroup HS_PATTERN_FLAG Pattern flags
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Compile flag: Set case-insensitive matching.
|
||||
*
|
||||
* This flag sets the expression to be matched case-insensitively by default.
|
||||
* The expression may still use PCRE tokens (notably `(?i)` and
|
||||
* `(?-i)`) to switch case-insensitive matching on and off.
|
||||
*/
|
||||
#define HS_FLAG_CASELESS 1
|
||||
|
||||
/**
|
||||
* Compile flag: Matching a `.` will not exclude newlines.
|
||||
*
|
||||
* This flag sets any instances of the `.` token to match newline characters as
|
||||
* well as all other characters. The PCRE specification states that the `.`
|
||||
* token does not match newline characters by default, so without this flag the
|
||||
* `.` token will not cross line boundaries.
|
||||
*/
|
||||
#define HS_FLAG_DOTALL 2
|
||||
|
||||
/**
|
||||
* Compile flag: Set multi-line anchoring.
|
||||
*
|
||||
* This flag instructs the expression to make the `^` and `$` tokens match
|
||||
* newline characters as well as the start and end of the stream. If this flag
|
||||
* is not specified, the `^` token will only ever match at the start of a
|
||||
* stream, and the `$` token will only ever match at the end of a stream within
|
||||
* the guidelines of the PCRE specification.
|
||||
*/
|
||||
#define HS_FLAG_MULTILINE 4
|
||||
|
||||
/**
|
||||
* Compile flag: Set single-match only mode.
|
||||
*
|
||||
* This flag sets the expression's match ID to match at most once. In streaming
|
||||
* mode, this means that the expression will return only a single match over
|
||||
* the lifetime of the stream, rather than reporting every match as per
|
||||
* standard Hyperscan semantics. In block mode or vectored mode, only the first
|
||||
* match for each invocation of @ref hs_scan() or @ref hs_scan_vector() will be
|
||||
* returned.
|
||||
*
|
||||
* If multiple expressions in the database share the same match ID, then they
|
||||
* either must all specify @ref HS_FLAG_SINGLEMATCH or none of them specify
|
||||
* @ref HS_FLAG_SINGLEMATCH. If a group of expressions sharing a match ID
|
||||
* specify the flag, then at most one match with the match ID will be generated
|
||||
* per stream.
|
||||
*
|
||||
* Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST
|
||||
* is not currently supported.
|
||||
*/
|
||||
#define HS_FLAG_SINGLEMATCH 8
|
||||
|
||||
/**
|
||||
* Compile flag: Allow expressions that can match against empty buffers.
|
||||
*
|
||||
* This flag instructs the compiler to allow expressions that can match against
|
||||
* empty buffers, such as `.?`, `.*`, `(a|)`. Since Hyperscan can return every
|
||||
* possible match for an expression, such expressions generally execute very
|
||||
* slowly; the default behaviour is to return an error when an attempt to
|
||||
* compile one is made. Using this flag will force the compiler to allow such
|
||||
* an expression.
|
||||
*/
|
||||
#define HS_FLAG_ALLOWEMPTY 16
|
||||
|
||||
/**
|
||||
* Compile flag: Enable UTF-8 mode for this expression.
|
||||
*
|
||||
* This flag instructs Hyperscan to treat the pattern as a sequence of UTF-8
|
||||
* characters. The results of scanning invalid UTF-8 sequences with a Hyperscan
|
||||
* library that has been compiled with one or more patterns using this flag are
|
||||
* undefined.
|
||||
*/
|
||||
#define HS_FLAG_UTF8 32
|
||||
|
||||
/**
|
||||
* Compile flag: Enable Unicode property support for this expression.
|
||||
*
|
||||
* This flag instructs Hyperscan to use Unicode properties, rather than the
|
||||
* default ASCII interpretations, for character mnemonics like `\w` and `\s` as
|
||||
* well as the POSIX character classes. It is only meaningful in conjunction
|
||||
* with @ref HS_FLAG_UTF8.
|
||||
*/
|
||||
#define HS_FLAG_UCP 64
|
||||
|
||||
/**
|
||||
* Compile flag: Enable prefiltering mode for this expression.
|
||||
*
|
||||
* This flag instructs Hyperscan to compile an "approximate" version of this
|
||||
* pattern for use in a prefiltering application, even if Hyperscan does not
|
||||
* support the pattern in normal operation.
|
||||
*
|
||||
* The set of matches returned when this flag is used is guaranteed to be a
|
||||
* superset of the matches specified by the non-prefiltering expression.
|
||||
*
|
||||
* If the pattern contains pattern constructs not supported by Hyperscan (such
|
||||
* as zero-width assertions, back-references or conditional references) these
|
||||
* constructs will be replaced internally with broader constructs that may
|
||||
* match more often.
|
||||
*
|
||||
* Furthermore, in prefiltering mode Hyperscan may simplify a pattern that
|
||||
* would otherwise return a "Pattern too large" error at compile time, or for
|
||||
* performance reasons (subject to the matching guarantee above).
|
||||
*
|
||||
* It is generally expected that the application will subsequently confirm
|
||||
* prefilter matches with another regular expression matcher that can provide
|
||||
* exact matches for the pattern.
|
||||
*
|
||||
* Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST
|
||||
* is not currently supported.
|
||||
*/
|
||||
#define HS_FLAG_PREFILTER 128
|
||||
|
||||
/**
|
||||
* Compile flag: Enable leftmost start of match reporting.
|
||||
*
|
||||
* This flag instructs Hyperscan to report the leftmost possible start of match
|
||||
* offset when a match is reported for this expression. (By default, no start
|
||||
* of match is returned.)
|
||||
*
|
||||
* Enabling this behaviour may reduce performance and increase stream state
|
||||
* requirements in streaming mode.
|
||||
*/
|
||||
#define HS_FLAG_SOM_LEFTMOST 256
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @defgroup HS_CPU_FEATURES_FLAG CPU feature support flags
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* CPU features flag - Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2)
|
||||
*
|
||||
* Setting this flag indicates that the target platform supports AVX2
|
||||
* instructions.
|
||||
*/
|
||||
#define HS_CPU_FEATURES_AVX2 (1ULL << 2)
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @defgroup HS_TUNE_FLAG Tuning flags
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Tuning Parameter - Generic
|
||||
*
|
||||
* This indicates that the compiled database should not be tuned for any
|
||||
* particular target platform.
|
||||
*/
|
||||
#define HS_TUNE_FAMILY_GENERIC 0
|
||||
|
||||
/**
|
||||
* Tuning Parameter - Intel(R) microarchitecture code name Sandy Bridge
|
||||
*
|
||||
* This indicates that the compiled database should be tuned for the
|
||||
* Sandy Bridge microarchitecture.
|
||||
*/
|
||||
#define HS_TUNE_FAMILY_SNB 1
|
||||
|
||||
/**
|
||||
* Tuning Parameter - Intel(R) microarchitecture code name Ivy Bridge
|
||||
*
|
||||
* This indicates that the compiled database should be tuned for the
|
||||
* Ivy Bridge microarchitecture.
|
||||
*/
|
||||
#define HS_TUNE_FAMILY_IVB 2
|
||||
|
||||
/**
|
||||
* Tuning Parameter - Intel(R) microarchitecture code name Haswell
|
||||
*
|
||||
* This indicates that the compiled database should be tuned for the
|
||||
* Haswell microarchitecture.
|
||||
*/
|
||||
#define HS_TUNE_FAMILY_HSW 3
|
||||
|
||||
/**
|
||||
* Tuning Parameter - Intel(R) microarchitecture code name Silvermont
|
||||
*
|
||||
* This indicates that the compiled database should be tuned for the
|
||||
* Silvermont microarchitecture.
|
||||
*/
|
||||
#define HS_TUNE_FAMILY_SLM 4
|
||||
|
||||
/**
|
||||
* Tuning Parameter - Intel(R) microarchitecture code name Broadwell
|
||||
*
|
||||
* This indicates that the compiled database should be tuned for the
|
||||
* Broadwell microarchitecture.
|
||||
*/
|
||||
#define HS_TUNE_FAMILY_BDW 5
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @defgroup HS_MODE_FLAG Compile mode flags
|
||||
*
|
||||
* The mode flags are used as values for the mode parameter of the various
|
||||
* compile calls (@ref hs_compile(), @ref hs_compile_multi() and @ref
|
||||
* hs_compile_ext_multi()).
|
||||
*
|
||||
* A mode value can be built by ORing these flag values together; the only
|
||||
* required flag is one of @ref HS_MODE_BLOCK, @ref HS_MODE_STREAM or @ref
|
||||
* HS_MODE_VECTORED. Other flags may be added to enable support for additional
|
||||
* features.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Compiler mode flag: Block scan (non-streaming) database.
|
||||
*/
|
||||
#define HS_MODE_BLOCK 1
|
||||
|
||||
/**
|
||||
* Compiler mode flag: Alias for @ref HS_MODE_BLOCK.
|
||||
*/
|
||||
#define HS_MODE_NOSTREAM 1
|
||||
|
||||
/**
|
||||
* Compiler mode flag: Streaming database.
|
||||
*/
|
||||
#define HS_MODE_STREAM 2
|
||||
|
||||
/**
|
||||
* Compiler mode flag: Vectored scanning database.
|
||||
*/
|
||||
#define HS_MODE_VECTORED 4
|
||||
|
||||
/**
|
||||
* Compiler mode flag: use full precision to track start of match offsets in
|
||||
* stream state.
|
||||
*
|
||||
* This mode will use the most stream state per pattern, but will always return
|
||||
* an accurate start of match offset regardless of how far back in the past it
|
||||
* was found.
|
||||
*
|
||||
* One of the SOM_HORIZON modes must be selected to use the @ref
|
||||
* HS_FLAG_SOM_LEFTMOST expression flag.
|
||||
*/
|
||||
#define HS_MODE_SOM_HORIZON_LARGE (1U << 24)
|
||||
|
||||
/**
|
||||
* Compiler mode flag: use medium precision to track start of match offsets in
|
||||
* stream state.
|
||||
*
|
||||
* This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and
|
||||
* will limit start of match accuracy to offsets within 2^32 bytes of the
|
||||
* end of match offset reported.
|
||||
*
|
||||
* One of the SOM_HORIZON modes must be selected to use the @ref
|
||||
* HS_FLAG_SOM_LEFTMOST expression flag.
|
||||
*/
|
||||
#define HS_MODE_SOM_HORIZON_MEDIUM (1U << 25)
|
||||
|
||||
/**
|
||||
* Compiler mode flag: use limited precision to track start of match offsets in
|
||||
* stream state.
|
||||
*
|
||||
* This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and
|
||||
* will limit start of match accuracy to offsets within 2^16 bytes of the
|
||||
* end of match offset reported.
|
||||
*
|
||||
* One of the SOM_HORIZON modes must be selected to use the @ref
|
||||
* HS_FLAG_SOM_LEFTMOST expression flag.
|
||||
*/
|
||||
#define HS_MODE_SOM_HORIZON_SMALL (1U << 26)
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* HS_COMPILE_H_ */
|
78
src/hs_internal.h
Normal file
78
src/hs_internal.h
Normal file
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Internal-use only definitions. Available to internal tools.
|
||||
*/
|
||||
|
||||
#ifndef HS_INTERNAL_H
|
||||
#define HS_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hs.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct Grey;
|
||||
|
||||
/** \brief Internal use only: takes a Grey argument so that we can use it in
|
||||
* tools. */
|
||||
hs_error_t hs_compile_multi_int(const char *const *expressions,
|
||||
const unsigned *flags, const unsigned *ids,
|
||||
const hs_expr_ext *const *ext,
|
||||
unsigned elements, unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db,
|
||||
hs_compile_error_t **comp_error, const Grey &g);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#define HS_MATCH_FLAG_ADJUSTED 1U
|
||||
|
||||
/** \brief Bitmask of all valid Hyperscan flags. */
|
||||
#define HS_FLAG_ALL ( HS_FLAG_CASELESS \
|
||||
| HS_FLAG_DOTALL \
|
||||
| HS_FLAG_MULTILINE \
|
||||
| HS_FLAG_UTF8 \
|
||||
| HS_FLAG_UCP \
|
||||
| HS_FLAG_PREFILTER \
|
||||
| HS_FLAG_SINGLEMATCH \
|
||||
| HS_FLAG_ALLOWEMPTY \
|
||||
| HS_FLAG_SOM_LEFTMOST)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif
|
493
src/hs_runtime.h
Normal file
493
src/hs_runtime.h
Normal file
@ -0,0 +1,493 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HS_RUNTIME_H_
|
||||
#define HS_RUNTIME_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The Hyperscan runtime API definition.
|
||||
*
|
||||
* Hyperscan is a high speed regular expression engine.
|
||||
*
|
||||
* This header contains functions for using compiled Hyperscan databases for
|
||||
* scanning data at runtime.
|
||||
*/
|
||||
|
||||
#include "hs_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Definition of the stream identifier type.
|
||||
*/
|
||||
struct hs_stream;
|
||||
|
||||
/**
|
||||
* The stream identifier returned by @ref hs_open_stream().
|
||||
*/
|
||||
typedef struct hs_stream hs_stream_t;
|
||||
|
||||
struct hs_scratch;
|
||||
|
||||
/**
|
||||
* A Hyperscan scratch space.
|
||||
*/
|
||||
typedef struct hs_scratch hs_scratch_t;
|
||||
|
||||
/**
|
||||
* Definition of the match event callback function type.
|
||||
*
|
||||
* A callback function matching the defined type must be provided by the
|
||||
* application calling the @ref hs_scan(), @ref hs_scan_vector() or @ref
|
||||
* hs_scan_stream() functions (or other streaming calls which can produce
|
||||
* matches).
|
||||
*
|
||||
* This callback function will be invoked whenever a match is located in the
|
||||
* target data during the execution of a scan. The details of the match are
|
||||
* passed in as parameters to the callback function, and the callback function
|
||||
* should return a value indicating whether or not matching should continue on
|
||||
* the target data. If no callbacks are desired from a scan call, NULL may be
|
||||
* provided in order to suppress match production.
|
||||
*
|
||||
* This callback function should not attempt to call Hyperscan API functions on
|
||||
* the same stream nor should it attempt to reuse the scratch space allocated
|
||||
* for the API calls that caused it to be triggered. Making another call to the
|
||||
* Hyperscan library with completely independent parameters should work (for
|
||||
* example, scanning a different database in a new stream and with new scratch
|
||||
* space), but reusing data structures like stream state and/or scratch space
|
||||
* will produce undefined behavior.
|
||||
*
|
||||
* @param id
|
||||
* The ID number of the expression that matched. If the expression was a
|
||||
* single expression compiled with @ref hs_compile(), this value will be
|
||||
* zero.
|
||||
*
|
||||
* @param from
|
||||
* - If a start of match flag is enabled for the current pattern, this
|
||||
* argument will be set to the start of match for the pattern assuming
|
||||
* that that start of match value lies within the current 'start of match
|
||||
* horizon' chosen by one of the SOM_HORIZON mode flags.
|
||||
|
||||
* - If the start of match value lies outside this horizon (possible only
|
||||
* when the SOM_HORIZON value is not @ref HS_MODE_SOM_HORIZON_LARGE),
|
||||
* the @a from value will be set to @ref HS_OFFSET_PAST_HORIZON.
|
||||
|
||||
* - This argument will be set to zero if the Start of Match flag is not
|
||||
* enabled for the given pattern.
|
||||
*
|
||||
* @param to
|
||||
* The offset after the last byte that matches the expression.
|
||||
*
|
||||
* @param flags
|
||||
* This is provided for future use and is unused at present.
|
||||
*
|
||||
* @param context
|
||||
* The pointer supplied by the user to the @ref hs_scan(), @ref
|
||||
* hs_scan_vector() or @ref hs_scan_stream() function.
|
||||
*
|
||||
* @return
|
||||
* Non-zero if the matching should cease, else zero. If scanning is
|
||||
* performed in streaming mode and a non-zero value is returned, any
|
||||
* subsequent calls to @ref hs_scan_stream() for that stream will
|
||||
* immediately return with @ref HS_SCAN_TERMINATED.
|
||||
*/
|
||||
typedef int (*match_event_handler)(unsigned int id,
|
||||
unsigned long long from,
|
||||
unsigned long long to,
|
||||
unsigned int flags,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* Open and initialise a stream.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database.
|
||||
*
|
||||
* @param flags
|
||||
* Flags modifying the behaviour of the stream. This parameter is provided
|
||||
* for future use and is unused at present.
|
||||
*
|
||||
* @param stream
|
||||
* On success, a pointer to the generated @ref hs_stream_t will be
|
||||
* returned; NULL on failure.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_open_stream(const hs_database_t *db, unsigned int flags,
|
||||
hs_stream_t **stream);
|
||||
|
||||
/**
|
||||
* Write data to be scanned to the opened stream.
|
||||
*
|
||||
* This is the function call in which the actual pattern matching takes place
|
||||
* as data is written to the stream. Matches will be returned via the @ref
|
||||
* match_event_handler callback supplied.
|
||||
*
|
||||
* @param id
|
||||
* The stream ID (returned by @ref hs_open_stream()) to which the data
|
||||
* will be written.
|
||||
*
|
||||
* @param data
|
||||
* Pointer to the data to be scanned.
|
||||
*
|
||||
* @param length
|
||||
* The number of bytes to scan.
|
||||
*
|
||||
* @param flags
|
||||
* Flags modifying the behaviour of the stream. This parameter is provided
|
||||
* for future use and is unused at present.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param ctxt
|
||||
* The user defined pointer which will be passed to the callback function
|
||||
* when a match occurs.
|
||||
*
|
||||
* @return
|
||||
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
|
||||
* match callback indicated that scanning should stop; other values on
|
||||
* error.
|
||||
*/
|
||||
hs_error_t hs_scan_stream(hs_stream_t *id, const char *data,
|
||||
unsigned int length, unsigned int flags,
|
||||
hs_scratch_t *scratch, match_event_handler onEvent,
|
||||
void *ctxt);
|
||||
|
||||
/**
|
||||
* Close a stream.
|
||||
*
|
||||
* This function must be called for any stream created with @ref
|
||||
* hs_open_stream(), even if scanning has been terminated by a non-zero return
|
||||
* from the match callback function.
|
||||
*
|
||||
* Note: This operation may result in matches being returned (via calls to the
|
||||
* match event callback) for expressions anchored to the end of the data stream
|
||||
* (for example, via the use of the `$` meta-character). If these matches are
|
||||
* not desired, NULL may be provided as the @ref match_event_handler callback.
|
||||
*
|
||||
* If NULL is provided as the @ref match_event_handler callback, it is
|
||||
* permissible to provide a NULL scratch.
|
||||
*
|
||||
* @param id
|
||||
* The stream ID returned by @ref hs_open_stream().
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
|
||||
* allowed to be NULL only if the @a onEvent callback is also NULL.
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param ctxt
|
||||
* The user defined pointer which will be passed to the callback function
|
||||
* when a match occurs.
|
||||
*
|
||||
* @return
|
||||
* Returns @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *ctxt);
|
||||
|
||||
/**
|
||||
* Reset a stream to an initial state.
|
||||
*
|
||||
* Conceptually, this is equivalent to performing @ref hs_close_stream() on the
|
||||
* given stream, followed by a @ref hs_open_stream(). This new stream replaces
|
||||
* the original stream in memory, avoiding the overhead of freeing the old
|
||||
* stream and allocating the new one.
|
||||
*
|
||||
* Note: This operation may result in matches being returned (via calls to the
|
||||
* match event callback) for expressions anchored to the end of the original
|
||||
* data stream (for example, via the use of the `$` meta-character). If these
|
||||
* matches are not desired, NULL may be provided as the @ref match_event_handler
|
||||
* callback.
|
||||
*
|
||||
* Note: the stream will also be tied to the same database.
|
||||
*
|
||||
* @param id
|
||||
* The stream (as created by @ref hs_open_stream()) to be replaced.
|
||||
*
|
||||
* @param flags
|
||||
* Flags modifying the behaviour of the stream. This parameter is provided
|
||||
* for future use and is unused at present.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param context
|
||||
* The user defined pointer which will be passed to the callback function
|
||||
* when a match occurs.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_reset_stream(hs_stream_t *id, unsigned int flags,
|
||||
hs_scratch_t *scratch, match_event_handler onEvent,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* Duplicate the given stream. The new stream will have the same state as the
|
||||
* original including the current stream offset.
|
||||
*
|
||||
* @param to_id
|
||||
* On success, a pointer to the new, copied @ref hs_stream_t will be
|
||||
* returned; NULL on failure.
|
||||
*
|
||||
* @param from_id
|
||||
* The stream (as created by @ref hs_open_stream()) to be copied.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id);
|
||||
|
||||
/**
|
||||
* Duplicate the given 'from' stream state onto the 'to' stream. The 'to' stream
|
||||
* will first be reset (reporting any EOD matches if a non-NULL @a onEvent
|
||||
* callback handler is provided).
|
||||
*
|
||||
* Note: the 'to' stream and the 'from' stream must be open against the same
|
||||
* database.
|
||||
*
|
||||
* @param to_id
|
||||
* On success, a pointer to the new, copied @ref hs_stream_t will be
|
||||
* returned; NULL on failure.
|
||||
*
|
||||
* @param from_id
|
||||
* The stream (as created by @ref hs_open_stream()) to be copied.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param context
|
||||
* The user defined pointer which will be passed to the callback function
|
||||
* when a match occurs.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id,
|
||||
const hs_stream_t *from_id,
|
||||
hs_scratch_t *scratch,
|
||||
match_event_handler onEvent,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* The block (non-streaming) regular expression scanner.
|
||||
*
|
||||
* This is the function call in which the actual pattern matching takes place
|
||||
* for block-mode pattern databases.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database.
|
||||
*
|
||||
* @param data
|
||||
* Pointer to the data to be scanned.
|
||||
*
|
||||
* @param length
|
||||
* The number of bytes to scan.
|
||||
*
|
||||
* @param flags
|
||||
* Flags modifying the behaviour of this function. This parameter is
|
||||
* provided for future use and is unused at present.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch() for this
|
||||
* database.
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param context
|
||||
* The user defined pointer which will be passed to the callback function.
|
||||
*
|
||||
* @return
|
||||
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
|
||||
* match callback indicated that scanning should stop; other values on
|
||||
* error.
|
||||
*/
|
||||
hs_error_t hs_scan(const hs_database_t *db, const char *data,
|
||||
unsigned int length, unsigned int flags,
|
||||
hs_scratch_t *scratch, match_event_handler onEvent,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* The vectored regular expression scanner.
|
||||
*
|
||||
* This is the function call in which the actual pattern matching takes place
|
||||
* for vectoring-mode pattern databases.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database.
|
||||
*
|
||||
* @param data
|
||||
* An array of pointers to the data blocks to be scanned.
|
||||
*
|
||||
* @param length
|
||||
* An array of lengths (in bytes) of each data block to scan.
|
||||
*
|
||||
* @param count
|
||||
* Number of data blocks to scan. This should correspond to the size of
|
||||
* of the @a data and @a length arrays.
|
||||
*
|
||||
* @param flags
|
||||
* Flags modifying the behaviour of this function. This parameter is
|
||||
* provided for future use and is unused at present.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch() for
|
||||
* this database.
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param context
|
||||
* The user defined pointer which will be passed to the callback function.
|
||||
*
|
||||
* @return
|
||||
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match
|
||||
* callback indicated that scanning should stop; other values on error.
|
||||
*/
|
||||
hs_error_t hs_scan_vector(const hs_database_t *db, const char *const *data,
|
||||
const unsigned int *length, unsigned int count,
|
||||
unsigned int flags, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *context);
|
||||
|
||||
/**
|
||||
* Allocate a "scratch" space for use by Hyperscan.
|
||||
*
|
||||
* This is required for runtime use, and one scratch space per thread, or
|
||||
* concurrent caller, is required. Any allocator callback set by @ref
|
||||
* hs_set_scratch_allocator() or @ref hs_set_allocator() will be used by this
|
||||
* function.
|
||||
*
|
||||
* @param db
|
||||
* The database, as produced by @ref hs_compile().
|
||||
*
|
||||
* @param scratch
|
||||
* On first allocation, a pointer to NULL should be provided so a new
|
||||
* scratch can be allocated. If a scratch block has been previously
|
||||
* allocated, then a pointer to it should be passed back in to see if it
|
||||
* is valid for this database block. If a new scratch block is required,
|
||||
* the original will be freed and the new one returned, otherwise the
|
||||
* previous scratch block will be returned. On success, the scratch block
|
||||
* will be suitable for use with the provided database in addition to any
|
||||
* databases that original scratch space was suitable for.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on successful allocation; @ref HS_NOMEM if the
|
||||
* allocation fails. Other errors may be returned if invalid parameters
|
||||
* are specified.
|
||||
*/
|
||||
hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch);
|
||||
|
||||
/**
|
||||
* Allocate a scratch space that is a clone of an existing scratch space.
|
||||
*
|
||||
* This is useful when multiple concurrent threads will be using the same set
|
||||
* of compiled databases, and another scratch space is required. Any allocator
|
||||
* callback set by @ref hs_set_scratch_allocator() or @ref hs_set_allocator()
|
||||
* will be used by this function.
|
||||
*
|
||||
* @param src
|
||||
* The existing @ref hs_scratch_t to be cloned.
|
||||
*
|
||||
* @param dest
|
||||
* A pointer to the new scratch space will be returned here.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success; @ref HS_NOMEM if the allocation fails.
|
||||
* Other errors may be returned if invalid parameters are specified.
|
||||
*/
|
||||
hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest);
|
||||
|
||||
/**
|
||||
* Provides the size of the given scratch space.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch() or @ref
|
||||
* hs_clone_scratch().
|
||||
*
|
||||
* @param scratch_size
|
||||
* On success, the size of the scratch space in bytes is placed in this
|
||||
* parameter.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_scratch_size(const hs_scratch_t *scratch, size_t *scratch_size);
|
||||
|
||||
/**
|
||||
* Free a scratch block previously allocated by @ref hs_alloc_scratch() or @ref
|
||||
* hs_clone_scratch().
|
||||
*
|
||||
* The free callback set by @ref hs_set_scratch_allocator() or @ref
|
||||
* hs_set_allocator() will be used by this function.
|
||||
*
|
||||
* @param scratch
|
||||
* The scratch block to be freed. NULL may also be safely provided.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_free_scratch(hs_scratch_t *scratch);
|
||||
|
||||
/**
|
||||
* Callback 'from' return value, indicating that the start of this match was
|
||||
* too early to be tracked with the requested SOM_HORIZON precision.
|
||||
*/
|
||||
#define HS_OFFSET_PAST_HORIZON (~0ULL)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* HS_RUNTIME_H_ */
|
36
src/hs_version.c
Normal file
36
src/hs_version.c
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hs_common.h"
|
||||
#include "hs_version.h"
|
||||
|
||||
HS_PUBLIC_API
|
||||
const char *hs_version(void) {
|
||||
return HS_VERSION_STRING;
|
||||
}
|
40
src/hs_version.h.in
Normal file
40
src/hs_version.h.in
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HS_VERSION_H_C6428FAF8E3713
|
||||
#define HS_VERSION_H_C6428FAF8E3713
|
||||
|
||||
/**
|
||||
* A version string to identify this release of Hyperscan.
|
||||
*/
|
||||
#define HS_VERSION_STRING "@HS_VERSION@ @BUILD_DATE@"
|
||||
|
||||
#define HS_VERSION_32BIT ((@HS_MAJOR_VERSION@ << 24) | (@HS_MINOR_VERSION@ << 16) | (@HS_PATCH_VERSION@ << 8) | 0)
|
||||
|
||||
#endif /* HS_VERSION_H_C6428FAF8E3713 */
|
||||
|
240
src/hwlm/hwlm.c
Normal file
240
src/hwlm/hwlm.c
Normal file
@ -0,0 +1,240 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: runtime.
|
||||
*/
|
||||
#include "hwlm.h"
|
||||
#include "hwlm_internal.h"
|
||||
#include "noodle_engine.h"
|
||||
#include "scratch.h"
|
||||
#include "ue2common.h"
|
||||
#include "fdr/fdr.h"
|
||||
#include "nfa/accel.h"
|
||||
#include "nfa/shufti.h"
|
||||
#include "nfa/vermicelli.h"
|
||||
#include <string.h>
|
||||
|
||||
#define MIN_ACCEL_LEN_BLOCK 16
|
||||
#define MIN_ACCEL_LEN_STREAM 16
|
||||
|
||||
static really_inline
|
||||
const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr,
|
||||
const u8 *end) {
|
||||
switch (aux->accel_type) {
|
||||
case ACCEL_VERM:
|
||||
DEBUG_PRINTF("single vermicelli for 0x%02hhx\n", aux->verm.c);
|
||||
return vermicelliExec(aux->verm.c, 0, ptr, end);
|
||||
case ACCEL_VERM_NOCASE:
|
||||
DEBUG_PRINTF("single vermicelli-nocase for 0x%02hhx\n", aux->verm.c);
|
||||
return vermicelliExec(aux->verm.c, 1, ptr, end);
|
||||
case ACCEL_DVERM:
|
||||
DEBUG_PRINTF("double vermicelli for 0x%02hhx%02hhx\n", aux->dverm.c1,
|
||||
aux->dverm.c2);
|
||||
return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 0, ptr, end);
|
||||
case ACCEL_DVERM_NOCASE:
|
||||
DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n",
|
||||
aux->dverm.c1, aux->dverm.c2);
|
||||
return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 1, ptr, end);
|
||||
case ACCEL_SHUFTI:
|
||||
DEBUG_PRINTF("single shufti\n");
|
||||
return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
|
||||
default:
|
||||
/* no acceleration, fall through and return current ptr */
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void do_accel_block(const union AccelAux *aux, const u8 *buf, size_t len,
|
||||
size_t *start) {
|
||||
if (len - *start < MIN_ACCEL_LEN_BLOCK) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u8 *ptr = buf + *start;
|
||||
const u8 *end = buf + len;
|
||||
const u8 offset = aux->generic.offset;
|
||||
ptr = run_hwlm_accel(aux, ptr, end);
|
||||
|
||||
if (offset) {
|
||||
ptr -= offset;
|
||||
if (ptr < buf) {
|
||||
ptr = buf;
|
||||
}
|
||||
}
|
||||
assert(ptr >= buf);
|
||||
*start = ptr - buf;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int inaccurate_accel(u8 type) {
|
||||
/* accels which don't always catch up to the boundary
|
||||
* DSHUFTI is also inaccurate but it is not used by the hamsters */
|
||||
return type == ACCEL_DVERM_NOCASE || type == ACCEL_DVERM;
|
||||
}
|
||||
|
||||
static never_inline
|
||||
void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen,
|
||||
const u8 *buf, size_t len, size_t *start) {
|
||||
if (aux->accel_type == ACCEL_NONE || len - *start < MIN_ACCEL_LEN_STREAM) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u8 offset = aux->generic.offset;
|
||||
|
||||
DEBUG_PRINTF("using accel %hhu offset %hhu\n", aux->accel_type, offset);
|
||||
|
||||
// Scan history buffer, but only if the start offset (which always refers to
|
||||
// buf) is zero.
|
||||
|
||||
if (!*start && hlen) {
|
||||
const u8 *ptr1 = hbuf;
|
||||
const u8 *end1 = hbuf + hlen;
|
||||
if (hlen >= 16) {
|
||||
ptr1 = run_hwlm_accel(aux, ptr1, end1);
|
||||
}
|
||||
|
||||
if ((hlen <= 16 || inaccurate_accel(aux->accel_type))
|
||||
&& end1 != ptr1 && end1 - ptr1 <= 16) {
|
||||
DEBUG_PRINTF("already scanned %zu/%zu\n", ptr1 - hbuf, hlen);
|
||||
/* see if we can finish off the history buffer completely */
|
||||
u8 ALIGN_DIRECTIVE temp[17];
|
||||
ptrdiff_t tlen = end1 - ptr1;
|
||||
memcpy(temp, ptr1, tlen);
|
||||
memset(temp + tlen, 0, 17 - tlen);
|
||||
if (len) { /* for dverm */
|
||||
temp[end1 - ptr1] = *buf;
|
||||
}
|
||||
|
||||
const u8 *tempp = run_hwlm_accel(aux, temp, temp + 17);
|
||||
|
||||
if (tempp - temp >= tlen) {
|
||||
ptr1 = end1;
|
||||
}
|
||||
DEBUG_PRINTF("got %zu\n", tempp - temp);
|
||||
}
|
||||
|
||||
if (ptr1 != end1) {
|
||||
DEBUG_PRINTF("bailing in history\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("scanning main buffer, start=%zu, len=%zu\n", *start, len);
|
||||
|
||||
const u8 *ptr2 = buf + *start;
|
||||
const u8 *end2 = buf + len;
|
||||
|
||||
const u8 *found = run_hwlm_accel(aux, ptr2, end2);
|
||||
|
||||
if (found >= ptr2 + offset) {
|
||||
size_t delta = found - offset - ptr2;
|
||||
DEBUG_PRINTF("got %zu/%zu in 2nd buffer\n", delta, len);
|
||||
*start += delta;
|
||||
} else if (hlen) {
|
||||
UNUSED size_t remaining = offset + ptr2 - found;
|
||||
DEBUG_PRINTF("got %zu/%zu remaining in 1st buffer\n", remaining, hlen);
|
||||
}
|
||||
}
|
||||
|
||||
hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
hwlm_group_t groups) {
|
||||
DEBUG_PRINTF("buf len=%zu, start=%zu, groups=%llx\n", len, start, groups);
|
||||
if (!groups) {
|
||||
DEBUG_PRINTF("groups all off\n");
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
assert(start < len);
|
||||
|
||||
if (t->type == HWLM_ENGINE_NOOD) {
|
||||
DEBUG_PRINTF("calling noodExec\n");
|
||||
return noodExec(HWLM_C_DATA(t), buf + start, len - start, start, cb,
|
||||
ctxt);
|
||||
} else {
|
||||
assert(t->type == HWLM_ENGINE_FDR);
|
||||
const union AccelAux *aa = &t->accel0;
|
||||
if ((groups & ~t->accel1_groups) == 0) {
|
||||
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
|
||||
aa = &t->accel1;
|
||||
}
|
||||
do_accel_block(aa, buf, len, &start);
|
||||
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
|
||||
start);
|
||||
return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt, groups);
|
||||
}
|
||||
}
|
||||
|
||||
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
|
||||
size_t len, size_t start, HWLMCallback cb,
|
||||
void *ctxt, hwlm_group_t groups,
|
||||
u8 *stream_state) {
|
||||
const u8 *hbuf = scratch->core_info.hbuf;
|
||||
const size_t hlen = scratch->core_info.hlen;
|
||||
const u8 *buf = scratch->core_info.buf;
|
||||
|
||||
DEBUG_PRINTF("hbuf len=%zu, buf len=%zu, start=%zu, groups=%llx\n", hlen,
|
||||
len, start, groups);
|
||||
|
||||
if (!groups) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
assert(start < len);
|
||||
|
||||
if (t->type == HWLM_ENGINE_NOOD) {
|
||||
DEBUG_PRINTF("calling noodExec\n");
|
||||
// If we've been handed a start offset, we can use a block mode scan at
|
||||
// that offset.
|
||||
if (start) {
|
||||
return noodExec(HWLM_C_DATA(t), buf + start, len - start, start,
|
||||
cb, ctxt);
|
||||
} else {
|
||||
return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb,
|
||||
ctxt, scratch->fdr_temp_buf,
|
||||
FDR_TEMP_BUF_SIZE);
|
||||
}
|
||||
} else {
|
||||
// t->type == HWLM_ENGINE_FDR
|
||||
const union AccelAux *aa = &t->accel0;
|
||||
if ((groups & ~t->accel1_groups) == 0) {
|
||||
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
|
||||
aa = &t->accel1;
|
||||
}
|
||||
// if no active stream state, use acceleration
|
||||
if (!fdrStreamStateActive(HWLM_C_DATA(t), stream_state)) {
|
||||
do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
|
||||
}
|
||||
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
|
||||
start);
|
||||
return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len,
|
||||
start, cb, ctxt, groups, stream_state);
|
||||
}
|
||||
}
|
142
src/hwlm/hwlm.h
Normal file
142
src/hwlm/hwlm.h
Normal file
@ -0,0 +1,142 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: runtime API.
|
||||
*/
|
||||
|
||||
#ifndef HWLM_H
|
||||
#define HWLM_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/** \brief Error return type for exec functions. */
|
||||
typedef int hwlm_error_t;
|
||||
|
||||
/** \brief Type representing a set of groups as a bitmap. */
|
||||
typedef u64a hwlm_group_t;
|
||||
|
||||
/** \brief HWLM callback return type. */
|
||||
typedef hwlm_group_t hwlmcb_rv_t;
|
||||
|
||||
/** \brief Value representing all possible literal groups. */
|
||||
#define HWLM_ALL_GROUPS ((hwlm_group_t)~0ULL)
|
||||
|
||||
/** \brief Callback return value indicating that we should continue matching. */
|
||||
#define HWLM_CONTINUE_MATCHING HWLM_ALL_GROUPS
|
||||
|
||||
/** \brief Callback return value indicating that we should halt matching. */
|
||||
#define HWLM_TERMINATE_MATCHING 0
|
||||
|
||||
/** \brief Matching finished without being terminated by the user. */
|
||||
#define HWLM_SUCCESS 0
|
||||
|
||||
/** \brief The user terminated matching by returning HWLM_TERMINATE_MATCHING
|
||||
* from the match callback. */
|
||||
#define HWLM_TERMINATED 1
|
||||
|
||||
/** \brief An error occurred during matching.
|
||||
*
|
||||
* This should only be used if an unsupported engine was called (like one
|
||||
* designed for a different architecture). */
|
||||
#define HWLM_ERROR_UNKNOWN 2
|
||||
|
||||
struct hs_scratch;
|
||||
struct HWLM;
|
||||
|
||||
/** \brief The type for an HWLM callback.
|
||||
*
|
||||
* This callback receives a start-of-match offset, an end-of-match offset, the
|
||||
* ID of the match and the context pointer that was passed into \ref
|
||||
* hwlmExec or \ref hwlmExecStreaming.
|
||||
*
|
||||
* A callback return of \ref HWLM_TERMINATE_MATCHING will stop matching.
|
||||
*
|
||||
* A callback return of \ref HWLM_CONTINUE_MATCHING continues matching.
|
||||
*
|
||||
* An arbitrary group mask may be given as the return value. This will be taken
|
||||
* as a hint by the underlying engine that only literals with groups
|
||||
* overlapping the provided mask need to be reported.
|
||||
*
|
||||
* The underlying engine may choose not to report a match if there is no group
|
||||
* belonging to the literal which was active at the when the end match location
|
||||
* was first reached.
|
||||
*/
|
||||
typedef hwlmcb_rv_t (*HWLMCallback)(size_t start, size_t end, u32 id,
|
||||
void *context);
|
||||
|
||||
/** \brief Match strings in table.
|
||||
*
|
||||
* If a match occurs, the callback function given will be called with the index
|
||||
* of the last character in the string and the \p context (passed through
|
||||
* without interpretation).
|
||||
*
|
||||
* Returns \ref HWLM_TERMINATED if scanning is cancelled due to the callback
|
||||
* returning \ref HWLM_TERMINATE_MATCHING.
|
||||
*
|
||||
* \p start is the first offset at which a match may start.
|
||||
*
|
||||
* The underlying engine may choose not to report any match which starts before
|
||||
* the first possible match of a literal which is in the initial group mask.
|
||||
*/
|
||||
hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback callback, void *context,
|
||||
hwlm_group_t groups);
|
||||
|
||||
/** \brief As for \ref hwlmExec, but a streaming case across two buffers.
|
||||
*
|
||||
* \p scratch is used to access fdr_temp_buf and to access the history buffer,
|
||||
* history length and the main buffer.
|
||||
*
|
||||
* \p len is the length of the main buffer to be scanned.
|
||||
*
|
||||
* \p start is an advisory hint representing the first offset at which a match
|
||||
* may start. Some underlying literal matches may not respect it.
|
||||
*
|
||||
* Two buffers/lengths are provided. Matches that occur entirely within
|
||||
* the history buffer will not be reported by this function. The offsets
|
||||
* reported for the main buffer are relative to the start of that buffer (a
|
||||
* match at byte 10 of the main buffer is reported as 10). Matches that start
|
||||
* in the history buffer will have starts reported with 'negative' values.
|
||||
*/
|
||||
hwlm_error_t hwlmExecStreaming(const struct HWLM *tab,
|
||||
struct hs_scratch *scratch, size_t len,
|
||||
size_t start, HWLMCallback callback,
|
||||
void *context, hwlm_group_t groups,
|
||||
u8 *stream_state);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif
|
635
src/hwlm/hwlm_build.cpp
Normal file
635
src/hwlm/hwlm_build.cpp
Normal file
@ -0,0 +1,635 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: build code.
|
||||
*/
|
||||
#include "grey.h"
|
||||
#include "hwlm.h"
|
||||
#include "hwlm_build.h"
|
||||
#include "hwlm_internal.h"
|
||||
#include "noodle_engine.h"
|
||||
#include "noodle_build.h"
|
||||
#include "ue2common.h"
|
||||
#include "fdr/fdr_compile.h"
|
||||
#include "fdr/fdr.h"
|
||||
#include "nfa/shufticompile.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static const unsigned int MAX_ACCEL_OFFSET = 16;
|
||||
static const unsigned int MAX_SHUFTI_WIDTH = 240;
|
||||
|
||||
static
|
||||
bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
||||
const hwlmLiteral &first = *lits.front();
|
||||
|
||||
struct candidate {
|
||||
candidate(void)
|
||||
: c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {}
|
||||
candidate(const hwlmLiteral &base, u32 offset)
|
||||
: c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0),
|
||||
b5insens(false), valid(true) {}
|
||||
char c1;
|
||||
char c2;
|
||||
u32 max_offset;
|
||||
bool b5insens;
|
||||
bool valid;
|
||||
|
||||
bool operator>(const candidate &other) const {
|
||||
if (!valid) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!other.valid) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (other.cdiffers() && !cdiffers()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!other.cdiffers() && cdiffers()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!other.b5insens && b5insens) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (other.b5insens && !b5insens) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (max_offset > other.max_offset) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool cdiffers(void) const {
|
||||
if (!b5insens) {
|
||||
return c1 != c2;
|
||||
}
|
||||
return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR);
|
||||
}
|
||||
};
|
||||
|
||||
candidate best;
|
||||
|
||||
for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) {
|
||||
candidate curr(first, i);
|
||||
|
||||
/* check to see if this pair appears in each string */
|
||||
for (const auto &lit_ptr : lits) {
|
||||
const hwlmLiteral &lit = *lit_ptr;
|
||||
if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) {
|
||||
curr.b5insens = true; /* no choice but to be case insensitive */
|
||||
}
|
||||
|
||||
bool found = false;
|
||||
bool found_nc = false;
|
||||
for (u32 j = 0;
|
||||
!found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) {
|
||||
found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
|
||||
found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
|
||||
&& (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
|
||||
|
||||
if (curr.b5insens) {
|
||||
found = found_nc;
|
||||
}
|
||||
}
|
||||
|
||||
if (!curr.b5insens && !found && found_nc) {
|
||||
curr.b5insens = true;
|
||||
found = true;
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
goto next_candidate;
|
||||
}
|
||||
}
|
||||
|
||||
/* check to find the max offset where this appears */
|
||||
for (const auto &lit_ptr : lits) {
|
||||
const hwlmLiteral &lit = *lit_ptr;
|
||||
for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1;
|
||||
j++) {
|
||||
bool found = false;
|
||||
if (curr.b5insens) {
|
||||
found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
|
||||
&& (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
|
||||
} else {
|
||||
found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
|
||||
}
|
||||
|
||||
if (found) {
|
||||
curr.max_offset = MAX(curr.max_offset, j);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (curr > best) {
|
||||
best = curr;
|
||||
}
|
||||
|
||||
next_candidate:;
|
||||
}
|
||||
|
||||
if (!best.valid) {
|
||||
return false;
|
||||
}
|
||||
|
||||
aux->dverm.offset = verify_u8(best.max_offset);
|
||||
|
||||
if (!best.b5insens) {
|
||||
aux->dverm.accel_type = ACCEL_DVERM;
|
||||
aux->dverm.c1 = best.c1;
|
||||
aux->dverm.c2 = best.c2;
|
||||
DEBUG_PRINTF("built dverm for %02hhx%02hhx\n",
|
||||
aux->dverm.c1, aux->dverm.c2);
|
||||
} else {
|
||||
aux->dverm.accel_type = ACCEL_DVERM_NOCASE;
|
||||
aux->dverm.c1 = best.c1 & CASE_CLEAR;
|
||||
aux->dverm.c2 = best.c2 & CASE_CLEAR;
|
||||
DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n",
|
||||
aux->dverm.c1, aux->dverm.c2);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool findSVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
||||
const hwlmLiteral &first = *lits.front();
|
||||
|
||||
struct candidate {
|
||||
candidate(void)
|
||||
: c(0), max_offset(0), b5insens(false), valid(false) {}
|
||||
candidate(const hwlmLiteral &base, u32 offset)
|
||||
: c(base.s[offset]), max_offset(0),
|
||||
b5insens(false), valid(true) {}
|
||||
char c;
|
||||
u32 max_offset;
|
||||
bool b5insens;
|
||||
bool valid;
|
||||
|
||||
bool operator>(const candidate &other) const {
|
||||
if (!valid) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!other.valid) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!other.b5insens && b5insens) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (other.b5insens && !b5insens) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (max_offset > other.max_offset) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
candidate best;
|
||||
|
||||
for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) {
|
||||
candidate curr(first, i);
|
||||
|
||||
/* check to see if this pair appears in each string */
|
||||
for (const auto &lit_ptr : lits) {
|
||||
const hwlmLiteral &lit = *lit_ptr;
|
||||
if (lit.nocase && ourisalpha(curr.c)) {
|
||||
curr.b5insens = true; /* no choice but to be case insensitive */
|
||||
}
|
||||
|
||||
bool found = false;
|
||||
bool found_nc = false;
|
||||
for (u32 j = 0;
|
||||
!found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
|
||||
found |= curr.c == lit.s[j];
|
||||
found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
|
||||
|
||||
if (curr.b5insens) {
|
||||
found = found_nc;
|
||||
}
|
||||
}
|
||||
|
||||
if (!curr.b5insens && !found && found_nc) {
|
||||
curr.b5insens = true;
|
||||
found = true;
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
goto next_candidate;
|
||||
}
|
||||
}
|
||||
|
||||
/* check to find the max offset where this appears */
|
||||
for (const auto &lit_ptr : lits) {
|
||||
const hwlmLiteral &lit = *lit_ptr;
|
||||
for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
|
||||
bool found = false;
|
||||
if (curr.b5insens) {
|
||||
found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
|
||||
} else {
|
||||
found = curr.c == lit.s[j];
|
||||
}
|
||||
|
||||
if (found) {
|
||||
curr.max_offset = MAX(curr.max_offset, j);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (curr > best) {
|
||||
best = curr;
|
||||
}
|
||||
|
||||
next_candidate:;
|
||||
}
|
||||
|
||||
if (!best.valid) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!best.b5insens) {
|
||||
aux->verm.accel_type = ACCEL_VERM;
|
||||
aux->verm.c = best.c;
|
||||
DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c);
|
||||
} else {
|
||||
aux->verm.accel_type = ACCEL_VERM_NOCASE;
|
||||
aux->verm.c = best.c & CASE_CLEAR;
|
||||
DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c);
|
||||
}
|
||||
aux->verm.offset = verify_u8(best.max_offset);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
void filterLits(const vector<hwlmLiteral> &lits, hwlm_group_t expected_groups,
|
||||
vector<const hwlmLiteral *> *filtered_lits, u32 *min_len) {
|
||||
*min_len = MAX_ACCEL_OFFSET;
|
||||
|
||||
for (const auto &lit : lits) {
|
||||
if (!(lit.groups & expected_groups)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const size_t lit_len = lit.s.length();
|
||||
if (lit_len < *min_len) {
|
||||
*min_len = verify_u32(lit_len);
|
||||
}
|
||||
|
||||
filtered_lits->push_back(&lit);
|
||||
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF("lit:");
|
||||
for (u32 i = 0; i < lit.s.length(); i++) {
|
||||
printf("%02hhx", lit.s[i]);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
||||
hwlm_group_t expected_groups, AccelAux *aux) {
|
||||
DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups);
|
||||
u32 min_len = MAX_ACCEL_OFFSET;
|
||||
vector<const hwlmLiteral *> filtered_lits;
|
||||
|
||||
filterLits(lits, expected_groups, &filtered_lits, &min_len);
|
||||
if (filtered_lits.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (findDVerm(filtered_lits, aux)
|
||||
|| findSVerm(filtered_lits, aux)) {
|
||||
return;
|
||||
}
|
||||
|
||||
vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
|
||||
for (const auto &lit : lits) {
|
||||
if (!(lit.groups & expected_groups)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < MAX_ACCEL_OFFSET && i < lit.s.length(); i++) {
|
||||
unsigned char c = lit.s[i];
|
||||
if (lit.nocase) {
|
||||
DEBUG_PRINTF("adding %02hhx to %u\n", mytoupper(c), i);
|
||||
DEBUG_PRINTF("adding %02hhx to %u\n", mytolower(c), i);
|
||||
reach[i].set(mytoupper(c));
|
||||
reach[i].set(mytolower(c));
|
||||
} else {
|
||||
DEBUG_PRINTF("adding %02hhx to %u\n", c, i);
|
||||
reach[i].set(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u32 min_count = ~0U;
|
||||
u32 min_offset = ~0U;
|
||||
for (u32 i = 0; i < min_len; i++) {
|
||||
size_t count = reach[i].count();
|
||||
DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
|
||||
describeClass(reach[i]).c_str(), count);
|
||||
if (count < min_count) {
|
||||
min_count = (u32)count;
|
||||
min_offset = i;
|
||||
}
|
||||
}
|
||||
assert(min_offset <= min_len);
|
||||
|
||||
if (min_count > MAX_SHUFTI_WIDTH) {
|
||||
DEBUG_PRINTF("min shufti with %u chars is too wide\n", min_count);
|
||||
return;
|
||||
}
|
||||
|
||||
const CharReach &cr = reach[min_offset];
|
||||
if (shuftiBuildMasks(cr, &aux->shufti.lo, &aux->shufti.hi) != -1) {
|
||||
DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n",
|
||||
describeClass(cr).c_str(), cr.count(), min_offset);
|
||||
aux->shufti.accel_type = ACCEL_SHUFTI;
|
||||
aux->shufti.offset = verify_u8(min_offset);
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("fail\n");
|
||||
}
|
||||
|
||||
static
|
||||
void buildForwardAccel(HWLM *h, const vector<hwlmLiteral> &lits,
|
||||
hwlm_group_t expected_groups) {
|
||||
findForwardAccelScheme(lits, expected_groups, &h->accel1);
|
||||
findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0);
|
||||
|
||||
h->accel1_groups = expected_groups;
|
||||
}
|
||||
|
||||
static
|
||||
void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF("building lit table for:\n");
|
||||
for (const auto &lit : lits) {
|
||||
printf("\t%u:%016llx %s%s\n", lit.id, lit.groups,
|
||||
escapeString(lit.s).c_str(), lit.nocase ? " (nc)" : "");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Called by an assertion.
|
||||
static
|
||||
bool everyoneHasGroups(const vector<hwlmLiteral> &lits) {
|
||||
for (const auto &lit : lits) {
|
||||
if (!lit.groups) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
bool isNoodleable(const vector<hwlmLiteral> &lits,
|
||||
const hwlmStreamingControl *stream_control,
|
||||
const CompileContext &cc) {
|
||||
if (!cc.grey.allowNoodle) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (lits.size() != 1) {
|
||||
DEBUG_PRINTF("too many literals for noodle\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (stream_control) { // nullptr if in block mode
|
||||
if (lits.front().s.length() + 1 > stream_control->history_max) {
|
||||
DEBUG_PRINTF("length of %zu too long for history max %zu\n",
|
||||
lits.front().s.length(),
|
||||
stream_control->history_max);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!lits.front().msk.empty()) {
|
||||
DEBUG_PRINTF("noodle can't handle supplementary masks\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
|
||||
hwlmStreamingControl *stream_control,
|
||||
bool make_small, const CompileContext &cc,
|
||||
hwlm_group_t expected_groups) {
|
||||
assert(!lits.empty());
|
||||
dumpLits(lits);
|
||||
|
||||
if (stream_control) {
|
||||
assert(stream_control->history_min <= stream_control->history_max);
|
||||
}
|
||||
|
||||
// Check that we haven't exceeded the maximum number of literals.
|
||||
if (lits.size() > cc.grey.limitLiteralCount) {
|
||||
throw ResourceLimitError();
|
||||
}
|
||||
|
||||
// Safety and resource limit checks.
|
||||
u64a total_chars = 0;
|
||||
for (const auto &lit : lits) {
|
||||
assert(!lit.s.empty());
|
||||
|
||||
if (lit.s.length() > cc.grey.limitLiteralLength) {
|
||||
throw ResourceLimitError();
|
||||
}
|
||||
total_chars += lit.s.length();
|
||||
if (total_chars > cc.grey.limitLiteralMatcherChars) {
|
||||
throw ResourceLimitError();
|
||||
}
|
||||
|
||||
// We do not allow the all-ones ID, as we reserve that for internal use
|
||||
// within literal matchers.
|
||||
if (lit.id == 0xffffffffu) {
|
||||
assert(!"reserved id 0xffffffff used");
|
||||
throw CompileError("Internal error.");
|
||||
}
|
||||
}
|
||||
|
||||
u8 engType = 0;
|
||||
size_t engSize = 0;
|
||||
shared_ptr<void> eng;
|
||||
|
||||
DEBUG_PRINTF("building table with %zu strings\n", lits.size());
|
||||
|
||||
assert(everyoneHasGroups(lits));
|
||||
|
||||
if (isNoodleable(lits, stream_control, cc)) {
|
||||
DEBUG_PRINTF("build noodle table\n");
|
||||
engType = HWLM_ENGINE_NOOD;
|
||||
const hwlmLiteral &lit = lits.front();
|
||||
auto noodle = noodBuildTable((const u8 *)lit.s.c_str(), lit.s.length(),
|
||||
lit.nocase, lit.id);
|
||||
if (noodle) {
|
||||
engSize = noodSize(noodle.get());
|
||||
}
|
||||
if (stream_control) {
|
||||
// For now, a single literal still goes to noodle and asks
|
||||
// for a great big history
|
||||
stream_control->literal_history_required = lit.s.length() - 1;
|
||||
assert(stream_control->literal_history_required
|
||||
<= stream_control->history_max);
|
||||
stream_control->literal_stream_state_required = 0;
|
||||
}
|
||||
eng = move(noodle);
|
||||
} else {
|
||||
DEBUG_PRINTF("building a new deal\n");
|
||||
engType = HWLM_ENGINE_FDR;
|
||||
auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey,
|
||||
stream_control);
|
||||
if (fdr) {
|
||||
engSize = fdrSize(fdr.get());
|
||||
}
|
||||
eng = move(fdr);
|
||||
}
|
||||
|
||||
if (!eng) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
assert(engSize);
|
||||
if (engSize > cc.grey.limitLiteralMatcherSize) {
|
||||
throw ResourceLimitError();
|
||||
}
|
||||
|
||||
auto h = aligned_zmalloc_unique<HWLM>(ROUNDUP_CL(sizeof(HWLM)) + engSize);
|
||||
|
||||
h->type = engType;
|
||||
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
|
||||
|
||||
if (engType == HWLM_ENGINE_FDR && cc.grey.hamsterAccelForward) {
|
||||
buildForwardAccel(h.get(), lits, expected_groups);
|
||||
}
|
||||
|
||||
if (stream_control) {
|
||||
DEBUG_PRINTF("requires %zu (of max %zu) bytes of history\n",
|
||||
stream_control->literal_history_required,
|
||||
stream_control->history_max);
|
||||
assert(stream_control->literal_history_required
|
||||
<= stream_control->history_max);
|
||||
}
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
size_t hwlmSize(const HWLM *h) {
|
||||
size_t engSize = 0;
|
||||
|
||||
switch (h->type) {
|
||||
case HWLM_ENGINE_NOOD:
|
||||
engSize = noodSize((const noodTable *)HWLM_C_DATA(h));
|
||||
break;
|
||||
case HWLM_ENGINE_FDR:
|
||||
engSize = fdrSize((const FDR *)HWLM_C_DATA(h));
|
||||
break;
|
||||
}
|
||||
|
||||
if (!engSize) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return engSize + ROUNDUP_CL(sizeof(*h));
|
||||
}
|
||||
|
||||
size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc) {
|
||||
const size_t NO_LIMIT = ~(size_t)0;
|
||||
|
||||
// NOTE: this function contains a number of magic numbers which are
|
||||
// conservative estimates of flood-proneness based on internal details of
|
||||
// the various literal engines that fall under the HWLM aegis. If you
|
||||
// change those engines, you might need to change this function too.
|
||||
|
||||
DEBUG_PRINTF("%zu literals\n", numLiterals);
|
||||
|
||||
if (cc.grey.allowNoodle && numLiterals <= 1) {
|
||||
DEBUG_PRINTF("noodle\n");
|
||||
return NO_LIMIT;
|
||||
}
|
||||
|
||||
if (cc.grey.fdrAllowTeddy) {
|
||||
if (numLiterals <= 48) {
|
||||
DEBUG_PRINTF("teddy\n");
|
||||
return 3;
|
||||
}
|
||||
if (cc.target_info.has_avx2() && numLiterals <= 96) {
|
||||
DEBUG_PRINTF("avx2 teddy\n");
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: we had thought we could push this value up to 9, but it seems that
|
||||
// hurts performance on floods in some FDR models. Super-conservative for
|
||||
// now.
|
||||
DEBUG_PRINTF("fdr\n");
|
||||
return 3;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
104
src/hwlm/hwlm_build.h
Normal file
104
src/hwlm/hwlm_build.h
Normal file
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: build API.
|
||||
*/
|
||||
|
||||
#ifndef HWLM_BUILD_H
|
||||
#define HWLM_BUILD_H
|
||||
|
||||
#include "hwlm.h"
|
||||
#include "hwlm_literal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
struct HWLM;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct CompileContext;
|
||||
struct Grey;
|
||||
struct target_t;
|
||||
|
||||
/** \brief Structure gathering together the input/output parameters related to
|
||||
* streaming mode operation. */
|
||||
struct hwlmStreamingControl {
|
||||
/** \brief IN parameter: Upper limit on the amount of history that can be
|
||||
* requested. */
|
||||
size_t history_max;
|
||||
|
||||
/** \brief IN parameter: History already known to be used before literal
|
||||
* analysis. */
|
||||
size_t history_min;
|
||||
|
||||
/** \brief OUT parameter: History required by the literal matcher to
|
||||
* correctly match all literals. */
|
||||
size_t literal_history_required;
|
||||
|
||||
/** OUT parameter: Stream state required by literal matcher in bytes. Can
|
||||
* be zero, and generally will be small (0-8 bytes). */
|
||||
size_t literal_stream_state_required;
|
||||
};
|
||||
|
||||
/** \brief Build an \ref HWLM literal matcher runtime structure for a group of
|
||||
* literals.
|
||||
*
|
||||
* \param lits The group of literals.
|
||||
* \param stream_control Streaming control parameters. If the matcher will
|
||||
* operate in non-streaming (block) mode, this pointer should be NULL.
|
||||
* \param make_small Optimise matcher for small size.
|
||||
* \param cc Compile context.
|
||||
* \param expected_groups FIXME: document me!
|
||||
*
|
||||
* Build failures are generally a result of memory allocation failure. These
|
||||
* may result in a nullptr return value, or a std::bad_alloc exception being
|
||||
* thrown.
|
||||
*/
|
||||
aligned_unique_ptr<HWLM>
|
||||
hwlmBuild(const std::vector<hwlmLiteral> &lits,
|
||||
hwlmStreamingControl *stream_control, bool make_small,
|
||||
const CompileContext &cc,
|
||||
hwlm_group_t expected_groups = HWLM_ALL_GROUPS);
|
||||
|
||||
/**
|
||||
* Returns an estimate of the number of repeated characters on the end of a
|
||||
* literal that will make a literal set of size \a numLiterals suffer
|
||||
* performance degradation.
|
||||
*/
|
||||
size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc);
|
||||
|
||||
/** \brief Return the size in bytes of an HWLM structure. */
|
||||
size_t hwlmSize(const HWLM *h);
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // HWLM_BUILD_H
|
70
src/hwlm/hwlm_dump.cpp
Normal file
70
src/hwlm/hwlm_dump.cpp
Normal file
@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: dump code.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "hwlm_dump.h"
|
||||
#include "hwlm_internal.h"
|
||||
#include "noodle_build.h"
|
||||
#include "ue2common.h"
|
||||
#include "fdr/fdr_dump.h"
|
||||
#include "nfa/accel_dump.h"
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void hwlmPrintStats(const HWLM *h, FILE *f) {
|
||||
switch (h->type) {
|
||||
case HWLM_ENGINE_NOOD:
|
||||
noodPrintStats((const noodTable *)HWLM_C_DATA(h), f);
|
||||
break;
|
||||
case HWLM_ENGINE_FDR:
|
||||
fdrPrintStats((const FDR *)HWLM_C_DATA(h), f);
|
||||
break;
|
||||
default:
|
||||
fprintf(f, "<unknown hwlm subengine>\n");
|
||||
}
|
||||
|
||||
fprintf(f, "accel1_groups: %016llx\n", h->accel1_groups);
|
||||
|
||||
fprintf(f, "accel1:");
|
||||
dumpAccelInfo(f, h->accel1);
|
||||
fprintf(f, "accel0:");
|
||||
dumpAccelInfo(f, h->accel0);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
50
src/hwlm/hwlm_dump.h
Normal file
50
src/hwlm/hwlm_dump.h
Normal file
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: dump API.
|
||||
*/
|
||||
|
||||
#ifndef HWLM_DUMP_H
|
||||
#define HWLM_DUMP_H
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
struct HWLM;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Dump some information about the give HWLM structure. */
|
||||
void hwlmPrintStats(const HWLM *h, FILE *f);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
#endif
|
62
src/hwlm/hwlm_internal.h
Normal file
62
src/hwlm/hwlm_internal.h
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: data structures.
|
||||
*/
|
||||
|
||||
#ifndef HWLM_INTERNAL_H
|
||||
#define HWLM_INTERNAL_H
|
||||
|
||||
#include "hwlm.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/accel.h"
|
||||
|
||||
/** \brief Underlying engine is FDR. */
|
||||
#define HWLM_ENGINE_FDR 12
|
||||
|
||||
/** \brief Underlying engine is Noodle. */
|
||||
#define HWLM_ENGINE_NOOD 16
|
||||
|
||||
/** \brief Main Hamster Wheel Literal Matcher header. Followed by
|
||||
* engine-specific structure. */
|
||||
struct HWLM {
|
||||
u8 type; /**< HWLM_ENGINE_NOOD or HWLM_ENGINE_FDR */
|
||||
hwlm_group_t accel1_groups; /**< accelerable groups. */
|
||||
union AccelAux accel1; /**< used if group mask is subset of accel1_groups */
|
||||
union AccelAux accel0; /**< fallback accel scheme */
|
||||
};
|
||||
|
||||
/** \brief Fetch a const pointer to the underlying engine. */
|
||||
#define HWLM_C_DATA(p) ((const void *)((const char *)(p) \
|
||||
+ ROUNDUP_CL(sizeof(struct HWLM))))
|
||||
|
||||
/** \brief Fetch a pointer to the underlying engine. */
|
||||
#define HWLM_DATA(p) ((void *)((char *)(p) + ROUNDUP_CL(sizeof(struct HWLM))))
|
||||
|
||||
#endif
|
111
src/hwlm/hwlm_literal.cpp
Normal file
111
src/hwlm/hwlm_literal.cpp
Normal file
@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: literal representation at build time.
|
||||
*/
|
||||
#include "hwlm_literal.h"
|
||||
#include "util/bitutils.h" // for CASE_BIT
|
||||
#include "util/compare.h" // for ourisalpha
|
||||
#include "util/ue2string.h" // for escapeString
|
||||
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/algorithm/cxx11/all_of.hpp>
|
||||
|
||||
using namespace std;
|
||||
using namespace boost::algorithm;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#ifdef DEBUG
|
||||
static UNUSED
|
||||
std::string dumpMask(const vector<u8> &v) {
|
||||
ostringstream oss;
|
||||
vector<u8>::const_iterator it, ite;
|
||||
for (it = v.begin(), ite = v.end(); it != ite; ++it) {
|
||||
oss << setfill('0') << setw(2) << hex << (unsigned int)*it;
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
bool maskIsConsistent(const std::string &s, bool nocase, const vector<u8> &msk,
|
||||
const vector<u8> &cmp) {
|
||||
string::const_reverse_iterator si = s.rbegin();
|
||||
vector<u8>::const_reverse_iterator mi = msk.rbegin(), ci = cmp.rbegin();
|
||||
|
||||
for (; si != s.rend() && mi != msk.rend(); ++si, ++mi, ++ci) {
|
||||
u8 c = *si, m = *mi, v = *ci;
|
||||
if (nocase && ourisalpha(c)) {
|
||||
m &= ~CASE_BIT;
|
||||
v &= ~CASE_BIT;
|
||||
}
|
||||
|
||||
assert(ci != cmp.rend());
|
||||
if ((c & m) != v) {
|
||||
DEBUG_PRINTF("c = %02hhx; *ci = %02hhx m =%02hhx\n", c, *ci, m);
|
||||
DEBUG_PRINTF("s = %s; dist = %zd\n", s.c_str(), si - s.rbegin());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \brief Complete constructor, takes group information and msk/cmp.
|
||||
*
|
||||
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
|
||||
* \ref HWLM_MASKLEN. */
|
||||
hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
|
||||
bool noruns_in, u32 id_in, hwlm_group_t groups_in,
|
||||
const vector<u8> &msk_in, const vector<u8> &cmp_in)
|
||||
: s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
|
||||
groups(groups_in), msk(msk_in), cmp(cmp_in) {
|
||||
assert(msk.size() <= HWLM_MASKLEN);
|
||||
assert(msk.size() == cmp.size());
|
||||
|
||||
DEBUG_PRINTF("literal '%s', msk=%s, cmp=%s\n",
|
||||
escapeString(s).c_str(), dumpMask(msk).c_str(),
|
||||
dumpMask(cmp).c_str());
|
||||
|
||||
// Mask and compare vectors MUST be the same size.
|
||||
assert(msk.size() == cmp.size());
|
||||
|
||||
// We must have been passed a msk/cmp that can be applied to s.
|
||||
assert(maskIsConsistent(s, nocase, msk, cmp));
|
||||
|
||||
// In the name of good hygiene, zap msk/cmp if msk is all zeroes.
|
||||
if (all_of_equal(msk.begin(), msk.end(), 0)) {
|
||||
msk.clear();
|
||||
cmp.clear();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
121
src/hwlm/hwlm_literal.h
Normal file
121
src/hwlm/hwlm_literal.h
Normal file
@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: literal representation at build time.
|
||||
*/
|
||||
|
||||
#ifndef HWLM_LITERAL_H
|
||||
#define HWLM_LITERAL_H
|
||||
|
||||
#include "hwlm.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
|
||||
#define HWLM_MASKLEN 8
|
||||
|
||||
/** \brief Class representing a literal, fed to \ref hwlmBuild. */
|
||||
struct hwlmLiteral {
|
||||
std::string s; //!< \brief The literal itself.
|
||||
|
||||
/** \brief The ID to pass to the callback if this literal matches.
|
||||
*
|
||||
* Note that the special value 0xFFFFFFFF is reserved for internal use and
|
||||
* should not be used. */
|
||||
u32 id;
|
||||
|
||||
bool nocase; //!< \brief True if literal is case-insensitive.
|
||||
|
||||
/** \brief Matches for runs of this literal can be quashed.
|
||||
*
|
||||
* Advisory flag meaning that there is no value in returning runs of
|
||||
* additional matches for a literal after the first one, so such matches
|
||||
* can be quashed by the literal matcher. */
|
||||
bool noruns;
|
||||
|
||||
/** \brief Set of groups that literal belongs to.
|
||||
*
|
||||
* Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of
|
||||
* the groups that are switched on. */
|
||||
hwlm_group_t groups;
|
||||
|
||||
/** \brief Supplementary comparison mask.
|
||||
*
|
||||
* These two values add a supplementary comparison that is done over the
|
||||
* final 8 bytes of the string -- if v is those bytes, then the string must
|
||||
* match as well as (v & msk) == cmp.
|
||||
*
|
||||
* An empty msk is the safe way of not adding any comparison to the string
|
||||
* unnecessarily filling in msk may turn off optimizations.
|
||||
*
|
||||
* The msk/cmp mechanism must NOT place a value into the literal that
|
||||
* conflicts with the contents of the string, but can be allowed to add
|
||||
* additional power within the string -- for example, to allow some case
|
||||
* sensitivity within a case-insensitive string.
|
||||
|
||||
* Values are stored in memory order -- i.e. the last byte of the mask
|
||||
* corresponds to the last byte of the string. Both vectors must be the
|
||||
* same size, and must not exceed \ref HWLM_MASKLEN in length.
|
||||
*/
|
||||
std::vector<u8> msk;
|
||||
|
||||
/** \brief Supplementary comparison value.
|
||||
*
|
||||
* See documentation for \ref msk.
|
||||
*/
|
||||
std::vector<u8> cmp;
|
||||
|
||||
/** \brief Simple constructor: no group information, no msk/cmp. */
|
||||
hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in)
|
||||
: s(s_in), id(id_in), nocase(nocase_in), noruns(false),
|
||||
groups(HWLM_ALL_GROUPS), msk(0), cmp(0) {}
|
||||
|
||||
/** \brief Complete constructor, takes group information and msk/cmp.
|
||||
*
|
||||
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
|
||||
* \ref HWLM_MASKLEN. */
|
||||
hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
|
||||
u32 id_in, hwlm_group_t groups_in,
|
||||
const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in);
|
||||
};
|
||||
|
||||
/**
|
||||
* Consistency test; returns false if the given msk/cmp test can never match
|
||||
* the literal string s.
|
||||
*/
|
||||
bool maskIsConsistent(const std::string &s, bool nocase,
|
||||
const std::vector<u8> &msk, const std::vector<u8> &cmp);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // HWLM_LITERAL_H
|
110
src/hwlm/noodle_build.cpp
Normal file
110
src/hwlm/noodle_build.cpp
Normal file
@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Noodle literal matcher: build code.
|
||||
*/
|
||||
#include <cstring> // for memcpy
|
||||
|
||||
#include "noodle_build.h"
|
||||
#include "noodle_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
size_t findNoodFragOffset(const u8 *lit, size_t len, bool nocase) {
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i + 1 < len; i++) {
|
||||
int diff = 0;
|
||||
const char c = lit[i];
|
||||
const char d = lit[i + 1];
|
||||
if (nocase && ourisalpha(c)) {
|
||||
diff = (mytoupper(c) != mytoupper(d));
|
||||
} else {
|
||||
diff = (c != d);
|
||||
}
|
||||
offset = i;
|
||||
if (diff) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
/** \brief Construct a Noodle matcher for the given literal. */
|
||||
aligned_unique_ptr<noodTable> noodBuildTable(const u8 *lit, size_t len,
|
||||
bool nocase, u32 id) {
|
||||
size_t noodle_len = sizeof(noodTable) + len;
|
||||
aligned_unique_ptr<noodTable> n =
|
||||
aligned_zmalloc_unique<noodTable>(noodle_len);
|
||||
assert(n);
|
||||
|
||||
size_t key_offset = findNoodFragOffset(lit, len, nocase);
|
||||
|
||||
n->id = id;
|
||||
n->len = verify_u32(len);
|
||||
n->key_offset = verify_u32(key_offset);
|
||||
n->nocase = nocase ? 1 : 0;
|
||||
memcpy(n->str, lit, len);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
size_t noodSize(const noodTable *n) {
|
||||
assert(n); // shouldn't call with null
|
||||
return sizeof(*n) + n->len;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
#include <cctype>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void noodPrintStats(const noodTable *n, FILE *f) {
|
||||
fprintf(f, "Noodle table\n");
|
||||
fprintf(f, "Len: %u Key Offset: %u\n", n->len, n->key_offset);
|
||||
fprintf(f, "String: ");
|
||||
for (u32 i = 0; i < n->len; i++) {
|
||||
if (isgraph(n->str[i]) && n->str[i] != '\\') {
|
||||
fprintf(f, "%c", n->str[i]);
|
||||
} else {
|
||||
fprintf(f, "\\x%02hhx", n->str[i]);
|
||||
}
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
64
src/hwlm/noodle_build.h
Normal file
64
src/hwlm/noodle_build.h
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Noodle literal matcher: build code.
|
||||
*/
|
||||
|
||||
#ifndef NOODLE_BUILD_H_048A1A6D585A9A
|
||||
#define NOODLE_BUILD_H_048A1A6D585A9A
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
struct noodTable;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Construct a Noodle matcher for the given literal. */
|
||||
ue2::aligned_unique_ptr<noodTable> noodBuildTable(const u8 *lit, size_t len,
|
||||
bool nocase, u32 id);
|
||||
|
||||
size_t noodSize(const noodTable *n);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void noodPrintStats(const noodTable *n, FILE *f);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // DUMP_SUPPORT
|
||||
|
||||
#endif /* NOODLE_BUILD_H_048A1A6D585A9A */
|
||||
|
364
src/hwlm/noodle_engine.c
Normal file
364
src/hwlm/noodle_engine.c
Normal file
@ -0,0 +1,364 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Noodle literal matcher: runtime.
|
||||
*/
|
||||
#include "hwlm.h"
|
||||
#include "noodle_engine.h"
|
||||
#include "noodle_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/masked_move.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
|
||||
/** \brief Noodle runtime context. */
|
||||
struct cb_info {
|
||||
HWLMCallback cb; //!< callback function called on match
|
||||
u32 id; //!< ID to pass to callback on match
|
||||
void *ctx; //!< caller-supplied context to pass to callback
|
||||
size_t offsetAdj; //!< used in streaming mode
|
||||
};
|
||||
|
||||
#define RETURN_IF_TERMINATED(x) \
|
||||
{ \
|
||||
if ((x) == HWLM_TERMINATED) { \
|
||||
return HWLM_TERMINATED; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define SINGLE_ZSCAN() \
|
||||
do { \
|
||||
while (unlikely(z)) { \
|
||||
u32 pos = findAndClearLSB_32(&z); \
|
||||
size_t matchPos = d - buf + pos; \
|
||||
hwlmcb_rv_t rv = final(buf, len, key, 1, 0, 0, noCase, cbi, \
|
||||
matchPos); \
|
||||
RETURN_IF_TERMINATED(rv); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define DOUBLE_ZSCAN() \
|
||||
do { \
|
||||
while (unlikely(z)) { \
|
||||
u32 pos = findAndClearLSB_32(&z); \
|
||||
size_t matchPos = d - buf + pos - 1; \
|
||||
hwlmcb_rv_t rv = final(buf, len, key, keyLen, keyOffset, 1, \
|
||||
noCase, cbi, matchPos); \
|
||||
RETURN_IF_TERMINATED(rv); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static really_inline
|
||||
u8 caseClear8(u8 x, bool noCase) {
|
||||
return (u8)(noCase ? (x & (u8)0xdf) : x);
|
||||
}
|
||||
|
||||
// Make sure the rest of the string is there. The single character scanner
|
||||
// is used only for single chars with case insensitivity used correctly,
|
||||
// so it can go straight to the callback if we get this far.
|
||||
static really_inline
|
||||
hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
|
||||
size_t keyOffset, bool is_double, bool noCase,
|
||||
const struct cb_info *cbi, size_t pos) {
|
||||
pos -= keyOffset;
|
||||
if (is_double) {
|
||||
if (pos + keyLen > len) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
if (cmpForward(buf + pos, key, keyLen, noCase)) { // ret 1 on mismatch
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
}
|
||||
pos += cbi->offsetAdj;
|
||||
DEBUG_PRINTF("match @ %zu->%zu\n", pos, (pos + keyLen - 1));
|
||||
hwlmcb_rv_t rv = cbi->cb(pos, (pos + keyLen - 1), cbi->id, cbi->ctx);
|
||||
if (rv == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define CHUNKSIZE 32
|
||||
#define MASK_TYPE m256
|
||||
#include "noodle_engine_avx2.c"
|
||||
#else
|
||||
#define CHUNKSIZE 16
|
||||
#define MASK_TYPE m128
|
||||
#include "noodle_engine_sse.c"
|
||||
#endif
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key,
|
||||
bool noCase, const struct cb_info *cbi) {
|
||||
hwlm_error_t rv;
|
||||
size_t end = len;
|
||||
|
||||
const MASK_TYPE mask1 = getMask(key[0], noCase);
|
||||
const MASK_TYPE caseMask = getCaseMask();
|
||||
|
||||
if (len < CHUNKSIZE) {
|
||||
rv = scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, len);
|
||||
return rv;
|
||||
}
|
||||
|
||||
if (len == CHUNKSIZE) {
|
||||
rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi,
|
||||
0, len);
|
||||
return rv;
|
||||
}
|
||||
|
||||
uintptr_t data = (uintptr_t)buf;
|
||||
uintptr_t s2Start = ROUNDUP_N(data, CHUNKSIZE) - data;
|
||||
uintptr_t last = data + end;
|
||||
uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
|
||||
uintptr_t s3Start = len - CHUNKSIZE;
|
||||
|
||||
if (s2Start) {
|
||||
// first scan out to the fast scan starting point
|
||||
DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
|
||||
rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi,
|
||||
0, s2Start);
|
||||
RETURN_IF_TERMINATED(rv);
|
||||
}
|
||||
|
||||
if (likely(s2Start != s2End)) {
|
||||
// scan as far as we can, bounded by the last point this key can
|
||||
// possibly match
|
||||
DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s2End);
|
||||
rv = scanSingleFast(buf, len, key, noCase, caseMask, mask1, cbi,
|
||||
s2Start, s2End);
|
||||
RETURN_IF_TERMINATED(rv);
|
||||
}
|
||||
|
||||
// if we are done bail out
|
||||
if (s2End == end) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, end);
|
||||
rv = scanSingleUnaligned(buf, len, s3Start, key, noCase, caseMask, mask1,
|
||||
cbi, s2End, end);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset, bool noCase,
|
||||
const struct cb_info *cbi) {
|
||||
hwlm_error_t rv;
|
||||
// we stop scanning for the key-fragment when the rest of the key can't
|
||||
// possibly fit in the remaining buffer
|
||||
size_t end = len - keyLen + keyOffset + 2;
|
||||
|
||||
const MASK_TYPE caseMask = getCaseMask();
|
||||
const MASK_TYPE mask1 = getMask(key[keyOffset + 0], noCase);
|
||||
const MASK_TYPE mask2 = getMask(key[keyOffset + 1], noCase);
|
||||
|
||||
if (end - keyOffset < CHUNKSIZE) {
|
||||
rv = scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
||||
mask1, mask2, cbi, keyOffset, end);
|
||||
return rv;
|
||||
}
|
||||
if (end - keyOffset == CHUNKSIZE) {
|
||||
rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset,
|
||||
noCase, caseMask, mask1, mask2, cbi, keyOffset,
|
||||
end);
|
||||
return rv;
|
||||
}
|
||||
|
||||
uintptr_t data = (uintptr_t)buf;
|
||||
uintptr_t s2Start = ROUNDUP_N(data + keyOffset, CHUNKSIZE) - data;
|
||||
uintptr_t s1End = s2Start + 1;
|
||||
uintptr_t last = data + end;
|
||||
uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
|
||||
uintptr_t s3Start = end - CHUNKSIZE;
|
||||
uintptr_t off = keyOffset;
|
||||
|
||||
if (s2Start != keyOffset) {
|
||||
// first scan out to the fast scan starting point plus one char past to
|
||||
// catch the key on the overlap
|
||||
DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
|
||||
rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset,
|
||||
noCase, caseMask, mask1, mask2, cbi, off,
|
||||
s1End);
|
||||
RETURN_IF_TERMINATED(rv);
|
||||
}
|
||||
off = s1End;
|
||||
|
||||
if (s2Start >= end) {
|
||||
DEBUG_PRINTF("s2 == mL %zu\n", end);
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
if (likely(s2Start != s2End)) {
|
||||
// scan as far as we can, bounded by the last point this key can
|
||||
// possibly match
|
||||
DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s3Start);
|
||||
rv = scanDoubleFast(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
||||
mask1, mask2, cbi, s2Start, s2End);
|
||||
RETURN_IF_TERMINATED(rv);
|
||||
off = s2End;
|
||||
}
|
||||
|
||||
// if there isn't enough data left to match the key, bail out
|
||||
if (s2End == end) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s3Start, end);
|
||||
rv = scanDoubleUnaligned(buf, len, s3Start, key, keyLen, keyOffset, noCase,
|
||||
caseMask, mask1, mask2, cbi, off, end);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleNoCase(const u8 *buf, size_t len, const u8 *key,
|
||||
const struct cb_info *cbi) {
|
||||
return scanSingleMain(buf, len, key, 1, cbi);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleCase(const u8 *buf, size_t len, const u8 *key,
|
||||
const struct cb_info *cbi) {
|
||||
return scanSingleMain(buf, len, key, 0, cbi);
|
||||
}
|
||||
|
||||
// Single-character specialisation, used when keyLen = 1
|
||||
static really_inline
|
||||
hwlm_error_t scanSingle(const u8 *buf, size_t len, const u8 *key, bool noCase,
|
||||
const struct cb_info *cbi) {
|
||||
if (!ourisalpha(key[0])) {
|
||||
noCase = 0; // force noCase off if we don't have an alphabetic char
|
||||
}
|
||||
|
||||
// kinda ugly, but this forces constant propagation
|
||||
if (noCase) {
|
||||
return scanSingleNoCase(buf, len, key, cbi);
|
||||
} else {
|
||||
return scanSingleCase(buf, len, key, cbi);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleNoCase(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset,
|
||||
const struct cb_info *cbi) {
|
||||
return scanDoubleMain(buf, len, key, keyLen, keyOffset, 1, cbi);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleCase(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset,
|
||||
const struct cb_info *cbi) {
|
||||
return scanDoubleMain(buf, len, key, keyLen, keyOffset, 0, cbi);
|
||||
}
|
||||
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDouble(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
|
||||
size_t keyOffset, bool noCase,
|
||||
const struct cb_info *cbi) {
|
||||
// kinda ugly, but this forces constant propagation
|
||||
if (noCase) {
|
||||
return scanDoubleNoCase(buf, len, key, keyLen, keyOffset, cbi);
|
||||
} else {
|
||||
return scanDoubleCase(buf, len, key, keyLen, keyOffset, cbi);
|
||||
}
|
||||
}
|
||||
|
||||
// main entry point for the scan code
|
||||
static really_inline
|
||||
hwlm_error_t scan(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
|
||||
size_t keyOffset, bool noCase, const struct cb_info *cbi) {
|
||||
if (len < keyLen) {
|
||||
// can't find string of length keyLen in a shorter buffer
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
if (keyLen == 1) {
|
||||
assert(keyOffset == 0);
|
||||
return scanSingle(buf, len, key, noCase, cbi);
|
||||
} else {
|
||||
return scanDouble(buf, len, key, keyLen, keyOffset, noCase, cbi);
|
||||
}
|
||||
}
|
||||
|
||||
/** \brief Block-mode scanner. */
|
||||
hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
|
||||
size_t offset_adj, HWLMCallback cb, void *ctxt) {
|
||||
assert(n && buf);
|
||||
|
||||
struct cb_info cbi = { cb, n->id, ctxt, offset_adj };
|
||||
DEBUG_PRINTF("nood scan of %zu bytes for %*s\n", len, n->len, n->str);
|
||||
return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi);
|
||||
}
|
||||
|
||||
/** \brief Streaming-mode scanner. */
|
||||
hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
|
||||
size_t hlen, const u8 *buf, size_t len,
|
||||
HWLMCallback cb, void *ctxt, u8 *temp_buf,
|
||||
UNUSED size_t temp_buffer_size) {
|
||||
assert(n);
|
||||
|
||||
struct cb_info cbi = {cb, n->id, ctxt, 0};
|
||||
hwlm_error_t rv;
|
||||
|
||||
if (hlen) {
|
||||
assert(hbuf);
|
||||
|
||||
size_t tl1 = MIN(n->len - 1, hlen);
|
||||
size_t tl2 = MIN(n->len - 1, len);
|
||||
size_t temp_len = tl1 + tl2;
|
||||
assert(temp_len < temp_buffer_size);
|
||||
memcpy(temp_buf, hbuf + hlen - tl1, tl1);
|
||||
memcpy(temp_buf + tl1, buf, tl2);
|
||||
|
||||
cbi.offsetAdj = -tl1;
|
||||
rv = scan(temp_buf, temp_len, n->str, n->len, n->key_offset, n->nocase,
|
||||
&cbi);
|
||||
if (rv == HWLM_TERMINATED) {
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
}
|
||||
|
||||
assert(buf);
|
||||
|
||||
cbi.offsetAdj = 0;
|
||||
return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi);
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user