Merge branch develop into master

This commit is contained in:
Matthew Barr 2016-08-24 14:29:28 +10:00
commit bf99ad00eb
227 changed files with 15736 additions and 5764 deletions

View File

@ -2,6 +2,36 @@
This is a list of notable changes to Hyperscan, in reverse chronological order. This is a list of notable changes to Hyperscan, in reverse chronological order.
## [4.3.0] 2016-08-24
- Introduce a new analysis pass ("Violet") used for decomposition of patterns
into literals and smaller engines.
- Introduce a new container engine ("Tamarama") for infix and suffix engines
that can be proven to run exclusively of one another. This reduces stream
state for pattern sets with many such engines.
- Introduce a new shuffle-based DFA engine ("Sheng"). This improves scanning
performance for pattern sets where small engines are generated.
- Improve the analysis used to extract extra mask information from short
literals.
- Reduced compile time spent in equivalence class analysis.
- Build: frame pointers are now only omitted for 32-bit release builds.
- Build: Workaround for C++ issues reported on FreeBSD/libc++ platforms.
(github issue #27)
- Simplify the LimEx NFA with a unified "variable shift" model, which reduces
the number of different NFA code paths to one per model size.
- Allow some anchored prefixes that may squash the literal to which they are
attached to run eagerly. This improves scanning performance for some
patterns.
- Simplify and improve EOD ("end of data") matching, using the interpreter for
all operations.
- Elide unnecessary instructions in the Rose interpreter at compile time.
- Reduce the number of inlined instantiations of the Rose interpreter in order
to reduce instruction cache pressure.
- Small improvements to literal matcher acceleration.
- Parser: ignore `\E` metacharacters that are not preceded by `\Q`. This
conforms to PCRE's behaviour, rather than returning a compile error.
- Check for misaligned memory when allocating an error structure in Hyperscan's
compile path and return an appropriate error if detected.
## [4.2.0] 2016-05-31 ## [4.2.0] 2016-05-31
- Introduce an interpreter for many complex actions to replace the use of - Introduce an interpreter for many complex actions to replace the use of
internal reports within the core of Hyperscan (the "Rose" engine). This internal reports within the core of Hyperscan (the "Rose" engine). This

View File

@ -1,12 +1,18 @@
cmake_minimum_required (VERSION 2.8.11) cmake_minimum_required (VERSION 2.8.11)
# don't use the built-in default configs
set (CMAKE_NOT_USING_CONFIG_FLAGS TRUE)
project (Hyperscan C CXX) project (Hyperscan C CXX)
set (HS_MAJOR_VERSION 4) set (HS_MAJOR_VERSION 4)
set (HS_MINOR_VERSION 2) set (HS_MINOR_VERSION 3)
set (HS_PATCH_VERSION 0) set (HS_PATCH_VERSION 0)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
string (TIMESTAMP BUILD_DATE "%Y-%m-%d") # since we are doing this manually, we only have three types
set (CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo"
CACHE STRING "" FORCE)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
include(CheckCCompilerFlag) include(CheckCCompilerFlag)
@ -24,7 +30,7 @@ find_package(PkgConfig QUIET)
if (NOT CMAKE_BUILD_TYPE) if (NOT CMAKE_BUILD_TYPE)
message(STATUS "Default build type 'Release with debug info'") message(STATUS "Default build type 'Release with debug info'")
set(CMAKE_BUILD_TYPE "RELWITHDEBINFO") set(CMAKE_BUILD_TYPE RELWITHDEBINFO CACHE STRING "" FORCE )
else() else()
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE) string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
message(STATUS "Build type ${CMAKE_BUILD_TYPE}") message(STATUS "Build type ${CMAKE_BUILD_TYPE}")
@ -90,6 +96,18 @@ else()
message(FATAL_ERROR "No python interpreter found") message(FATAL_ERROR "No python interpreter found")
endif() endif()
# allow for reproducible builds - python for portability
if (DEFINED ENV{SOURCE_DATE_EPOCH})
execute_process(
COMMAND "${PYTHON}" "${CMAKE_MODULE_PATH}/formatdate.py" "$ENV{SOURCE_DATE_EPOCH}"
OUTPUT_VARIABLE BUILD_DATE
OUTPUT_STRIP_TRAILING_WHITESPACE)
else ()
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
endif ()
message(STATUS "Build date: ${BUILD_DATE}")
if(${RAGEL} STREQUAL "RAGEL-NOTFOUND") if(${RAGEL} STREQUAL "RAGEL-NOTFOUND")
message(FATAL_ERROR "Ragel state machine compiler not found") message(FATAL_ERROR "Ragel state machine compiler not found")
endif() endif()
@ -121,13 +139,7 @@ endif()
CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in release builds" ON "NOT RELEASE_BUILD" OFF) CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in release builds" ON "NOT RELEASE_BUILD" OFF)
option(DISABLE_ASSERTS "Disable assert(); enabled in debug builds, disabled in release builds" FALSE) CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON)
if (DISABLE_ASSERTS)
if (CMAKE_BUILD_TYPE STREQUAL "DEBUG")
add_definitions(-DNDEBUG)
endif()
endif()
option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF) option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF)
@ -139,18 +151,26 @@ if(MSVC OR MSVC_IDE)
if (MSVC_VERSION LESS 1700) if (MSVC_VERSION LESS 1700)
message(FATAL_ERROR "The project requires C++11 features.") message(FATAL_ERROR "The project requires C++11 features.")
else() else()
# set base flags
set(CMAKE_C_FLAGS "/DWIN32 /D_WINDOWS /W3")
set(CMAKE_C_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od")
set(CMAKE_C_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi")
set(CMAKE_CXX_FLAGS "/DWIN32 /D_WINDOWS /W3 /GR /EHsc")
set(CMAKE_CXX_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od")
set(CMAKE_CXX_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi")
if (WINDOWS_ICC) if (WINDOWS_ICC)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /O3 /wd4267 /Qdiag-disable:remark") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /O2 /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
else() else()
#TODO: don't hardcode arch #TODO: don't hardcode arch
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /O2 /wd4267") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /wd4267")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /O2 /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
endif() endif()
string(REGEX REPLACE "/RTC1" ""
CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" )
string(REGEX REPLACE "/RTC1" ""
CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}" )
endif() endif()
@ -172,16 +192,34 @@ else()
unset(_GXX_OUTPUT) unset(_GXX_OUTPUT)
endif() endif()
# set compiler flags - more are tested and added later if(OPTIMISE)
set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual") set(OPT_C_FLAG "-O3")
set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wno-shadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor") set(OPT_CXX_FLAG "-O2")
if (NOT RELEASE_BUILD) else()
# -Werror is most useful during development, don't potentially break set(OPT_C_FLAG "-O0")
# release builds set(OPT_CXX_FLAG "-O0")
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror") endif(OPTIMISE)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
# set up base flags for build types
set(CMAKE_C_FLAGS_DEBUG "-g ${OPT_C_FLAG} -Werror")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-g ${OPT_C_FLAG}")
set(CMAKE_C_FLAGS_RELEASE "${OPT_C_FLAG}")
set(CMAKE_CXX_FLAGS_DEBUG "-g ${OPT_CXX_FLAG} -Werror")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g ${OPT_CXX_FLAG}")
set(CMAKE_CXX_FLAGS_RELEASE "${OPT_CXX_FLAG}")
if (DISABLE_ASSERTS)
# usually true for release builds, false for debug
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNDEBUG")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG")
endif() endif()
# set compiler flags - more are tested and added later
set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing")
if (NOT CMAKE_C_FLAGS MATCHES .*march.*) if (NOT CMAKE_C_FLAGS MATCHES .*march.*)
message(STATUS "Building for current host CPU") message(STATUS "Building for current host CPU")
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -march=native -mtune=native") set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -march=native -mtune=native")
@ -199,15 +237,7 @@ else()
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0 -Wno-unused-local-typedefs -Wno-maybe-uninitialized") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0 -Wno-unused-local-typedefs -Wno-maybe-uninitialized")
endif() endif()
if(OPTIMISE) if (NOT(ARCH_IA32 AND RELEASE_BUILD))
set(EXTRA_C_FLAGS "-O3 ${EXTRA_C_FLAGS}")
set(EXTRA_CXX_FLAGS "-O2 ${EXTRA_CXX_FLAGS}")
else()
set(EXTRA_C_FLAGS "-O0 ${EXTRA_C_FLAGS}")
set(EXTRA_CXX_FLAGS "-O0 ${EXTRA_CXX_FLAGS}")
endif(OPTIMISE)
if(NOT RELEASE_BUILD)
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer") set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
endif() endif()
@ -297,6 +327,11 @@ if (CXX_UNUSED_CONST_VAR)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
endif() endif()
# gcc 6 complains about type attributes that get ignored, like alignment
CHECK_CXX_COMPILER_FLAG("-Wignored-attributes" CXX_IGNORED_ATTR)
if (CXX_IGNORED_ATTR)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-ignored-attributes")
endif()
# note this for later # note this for later
# g++ doesn't have this flag but clang does # g++ doesn't have this flag but clang does
@ -438,15 +473,14 @@ set (hs_exec_SRCS
src/nfa/limex_simd128.c src/nfa/limex_simd128.c
src/nfa/limex_simd256.c src/nfa/limex_simd256.c
src/nfa/limex_simd384.c src/nfa/limex_simd384.c
src/nfa/limex_simd512a.c src/nfa/limex_simd512.c
src/nfa/limex_simd512b.c
src/nfa/limex_simd512c.c
src/nfa/limex.h src/nfa/limex.h
src/nfa/limex_common_impl.h src/nfa/limex_common_impl.h
src/nfa/limex_context.h src/nfa/limex_context.h
src/nfa/limex_internal.h src/nfa/limex_internal.h
src/nfa/limex_runtime.h src/nfa/limex_runtime.h
src/nfa/limex_runtime_impl.h src/nfa/limex_runtime_impl.h
src/nfa/limex_shuffle.h
src/nfa/limex_state_impl.h src/nfa/limex_state_impl.h
src/nfa/mpv.h src/nfa/mpv.h
src/nfa/mpv.c src/nfa/mpv.c
@ -477,9 +511,18 @@ set (hs_exec_SRCS
src/nfa/repeat.c src/nfa/repeat.c
src/nfa/repeat.h src/nfa/repeat.h
src/nfa/repeat_internal.h src/nfa/repeat_internal.h
src/nfa/sheng.c
src/nfa/sheng.h
src/nfa/sheng_defs.h
src/nfa/sheng_impl.h
src/nfa/sheng_impl4.h
src/nfa/sheng_internal.h
src/nfa/shufti_common.h src/nfa/shufti_common.h
src/nfa/shufti.c src/nfa/shufti.c
src/nfa/shufti.h src/nfa/shufti.h
src/nfa/tamarama.c
src/nfa/tamarama.h
src/nfa/tamarama_internal.h
src/nfa/truffle_common.h src/nfa/truffle_common.h
src/nfa/truffle.c src/nfa/truffle.c
src/nfa/truffle.h src/nfa/truffle.h
@ -495,7 +538,6 @@ set (hs_exec_SRCS
src/rose/block.c src/rose/block.c
src/rose/catchup.h src/rose/catchup.h
src/rose/catchup.c src/rose/catchup.c
src/rose/eod.c
src/rose/infix.h src/rose/infix.h
src/rose/init.h src/rose/init.h
src/rose/init.c src/rose/init.c
@ -503,6 +545,7 @@ set (hs_exec_SRCS
src/rose/match.h src/rose/match.h
src/rose/match.c src/rose/match.c
src/rose/miracle.h src/rose/miracle.h
src/rose/program_runtime.c
src/rose/program_runtime.h src/rose/program_runtime.h
src/rose/runtime.h src/rose/runtime.h
src/rose/rose.h src/rose/rose.h
@ -510,6 +553,7 @@ set (hs_exec_SRCS
src/rose/rose_program.h src/rose/rose_program.h
src/rose/rose_types.h src/rose/rose_types.h
src/rose/rose_common.h src/rose/rose_common.h
src/rose/validate_mask.h
src/util/bitutils.h src/util/bitutils.h
src/util/exhaust.h src/util/exhaust.h
src/util/fatbit.h src/util/fatbit.h
@ -524,11 +568,8 @@ set (hs_exec_SRCS
src/util/pqueue.h src/util/pqueue.h
src/util/scatter.h src/util/scatter.h
src/util/scatter_runtime.h src/util/scatter_runtime.h
src/util/shuffle.h
src/util/shuffle_ssse3.h
src/util/simd_utils.h src/util/simd_utils.h
src/util/simd_utils_ssse3.h src/util/simd_utils.c
src/util/simd_utils_ssse3.c
src/util/state_compress.h src/util/state_compress.h
src/util/state_compress.c src/util/state_compress.c
src/util/unaligned.h src/util/unaligned.h
@ -597,11 +638,15 @@ SET (hs_SRCS
src/hwlm/noodle_build.h src/hwlm/noodle_build.h
src/hwlm/noodle_internal.h src/hwlm/noodle_internal.h
src/nfa/accel.h src/nfa/accel.h
src/nfa/accel_dfa_build_strat.cpp
src/nfa/accel_dfa_build_strat.h
src/nfa/accelcompile.cpp src/nfa/accelcompile.cpp
src/nfa/accelcompile.h src/nfa/accelcompile.h
src/nfa/callback.h src/nfa/callback.h
src/nfa/castlecompile.cpp src/nfa/castlecompile.cpp
src/nfa/castlecompile.h src/nfa/castlecompile.h
src/nfa/dfa_build_strat.cpp
src/nfa/dfa_build_strat.h
src/nfa/dfa_min.cpp src/nfa/dfa_min.cpp
src/nfa/dfa_min.h src/nfa/dfa_min.h
src/nfa/goughcompile.cpp src/nfa/goughcompile.cpp
@ -613,8 +658,6 @@ SET (hs_SRCS
src/nfa/mcclellan_internal.h src/nfa/mcclellan_internal.h
src/nfa/mcclellancompile.cpp src/nfa/mcclellancompile.cpp
src/nfa/mcclellancompile.h src/nfa/mcclellancompile.h
src/nfa/mcclellancompile_accel.cpp
src/nfa/mcclellancompile_accel.h
src/nfa/mcclellancompile_util.cpp src/nfa/mcclellancompile_util.cpp
src/nfa/mcclellancompile_util.h src/nfa/mcclellancompile_util.h
src/nfa/limex_compile.cpp src/nfa/limex_compile.cpp
@ -639,8 +682,13 @@ SET (hs_SRCS
src/nfa/repeat_internal.h src/nfa/repeat_internal.h
src/nfa/repeatcompile.cpp src/nfa/repeatcompile.cpp
src/nfa/repeatcompile.h src/nfa/repeatcompile.h
src/nfa/sheng_internal.h
src/nfa/shengcompile.cpp
src/nfa/shengcompile.h
src/nfa/shufticompile.cpp src/nfa/shufticompile.cpp
src/nfa/shufticompile.h src/nfa/shufticompile.h
src/nfa/tamaramacompile.cpp
src/nfa/tamaramacompile.h
src/nfa/trufflecompile.cpp src/nfa/trufflecompile.cpp
src/nfa/trufflecompile.h src/nfa/trufflecompile.h
src/nfagraph/ng.cpp src/nfagraph/ng.cpp
@ -746,6 +794,8 @@ SET (hs_SRCS
src/nfagraph/ng_util.h src/nfagraph/ng_util.h
src/nfagraph/ng_vacuous.cpp src/nfagraph/ng_vacuous.cpp
src/nfagraph/ng_vacuous.h src/nfagraph/ng_vacuous.h
src/nfagraph/ng_violet.cpp
src/nfagraph/ng_violet.h
src/nfagraph/ng_width.cpp src/nfagraph/ng_width.cpp
src/nfagraph/ng_width.h src/nfagraph/ng_width.h
src/parser/AsciiComponentClass.cpp src/parser/AsciiComponentClass.cpp
@ -825,6 +875,10 @@ SET (hs_SRCS
src/rose/rose_build_compile.cpp src/rose/rose_build_compile.cpp
src/rose/rose_build_convert.cpp src/rose/rose_build_convert.cpp
src/rose/rose_build_convert.h src/rose/rose_build_convert.h
src/rose/rose_build_exclusive.cpp
src/rose/rose_build_exclusive.h
src/rose/rose_build_groups.cpp
src/rose/rose_build_groups.h
src/rose/rose_build_impl.h src/rose/rose_build_impl.h
src/rose/rose_build_infix.cpp src/rose/rose_build_infix.cpp
src/rose/rose_build_infix.h src/rose/rose_build_infix.h
@ -853,6 +907,8 @@ SET (hs_SRCS
src/util/charreach.cpp src/util/charreach.cpp
src/util/charreach.h src/util/charreach.h
src/util/charreach_util.h src/util/charreach_util.h
src/util/clique.cpp
src/util/clique.h
src/util/compare.h src/util/compare.h
src/util/compile_context.cpp src/util/compile_context.cpp
src/util/compile_context.h src/util/compile_context.h
@ -878,7 +934,6 @@ SET (hs_SRCS
src/util/report_manager.cpp src/util/report_manager.cpp
src/util/report_manager.h src/util/report_manager.h
src/util/simd_utils.h src/util/simd_utils.h
src/util/simd_utils_ssse3.h
src/util/target_info.cpp src/util/target_info.cpp
src/util/target_info.h src/util/target_info.h
src/util/ue2_containers.h src/util/ue2_containers.h
@ -916,6 +971,10 @@ set(hs_dump_SRCS
src/nfa/nfa_dump_dispatch.cpp src/nfa/nfa_dump_dispatch.cpp
src/nfa/nfa_dump_internal.cpp src/nfa/nfa_dump_internal.cpp
src/nfa/nfa_dump_internal.h src/nfa/nfa_dump_internal.h
src/nfa/shengdump.cpp
src/nfa/shengdump.h
src/nfa/tamarama_dump.cpp
src/nfa/tamarama_dump.h
src/parser/dump.cpp src/parser/dump.cpp
src/parser/dump.h src/parser/dump.h
src/parser/position_dump.h src/parser/position_dump.h
@ -941,7 +1000,7 @@ endif()
# choose which ones to build # choose which ones to build
set (LIB_VERSION ${HS_VERSION}) set (LIB_VERSION ${HS_VERSION})
set (LIB_SOVERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}) set (LIB_SOVERSION ${HS_MAJOR_VERSION})
add_library(hs_exec OBJECT ${hs_exec_SRCS}) add_library(hs_exec OBJECT ${hs_exec_SRCS})

18
cmake/formatdate.py Executable file
View File

@ -0,0 +1,18 @@
#!/usr/bin/env python
from __future__ import print_function
import os
import sys
import datetime
def usage():
print("Usage:", os.path.basename(sys.argv[0]), "<seconds from epoch>")
if len(sys.argv) != 2:
usage()
sys.exit(1)
ts = sys.argv[1]
build_date = datetime.datetime.utcfromtimestamp(int(ts))
print(build_date.strftime("%Y-%m-%d"))

View File

@ -77,7 +77,7 @@ static int eventHandler(unsigned int id, unsigned long long from,
* length with its length. Returns NULL on failure. * length with its length. Returns NULL on failure.
*/ */
static char *readInputData(const char *inputFN, unsigned int *length) { static char *readInputData(const char *inputFN, unsigned int *length) {
FILE *f = fopen(inputFN, "r"); FILE *f = fopen(inputFN, "rb");
if (!f) { if (!f) {
fprintf(stderr, "ERROR: unable to open file \"%s\": %s\n", inputFN, fprintf(stderr, "ERROR: unable to open file \"%s\": %s\n", inputFN,
strerror(errno)); strerror(errno));

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -52,7 +52,6 @@
#include "parser/shortcut_literal.h" #include "parser/shortcut_literal.h"
#include "parser/unsupported.h" #include "parser/unsupported.h"
#include "parser/utf8_validate.h" #include "parser/utf8_validate.h"
#include "smallwrite/smallwrite_build.h"
#include "rose/rose_build.h" #include "rose/rose_build.h"
#include "rose/rose_build_dump.h" #include "rose/rose_build_dump.h"
#include "som/slot_manager_dump.h" #include "som/slot_manager_dump.h"
@ -304,15 +303,6 @@ aligned_unique_ptr<RoseEngine> generateRoseEngine(NG &ng) {
return nullptr; return nullptr;
} }
/* avoid building a smwr if just a pure floating case. */
if (!roseIsPureLiteral(rose.get())) {
u32 qual = roseQuality(rose.get());
auto smwr = ng.smwr->build(qual);
if (smwr) {
rose = roseAddSmallWrite(rose.get(), smwr.get());
}
}
dumpRose(*ng.rose, rose.get(), ng.cc.grey); dumpRose(*ng.rose, rose.get(), ng.cc.grey);
dumpReportManager(ng.rm, ng.cc.grey); dumpReportManager(ng.rm, ng.cc.grey);
dumpSomSlotManager(ng.ssm, ng.cc.grey); dumpSomSlotManager(ng.ssm, ng.cc.grey);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -42,6 +42,7 @@ using std::string;
static const char failureNoMemory[] = "Unable to allocate memory."; static const char failureNoMemory[] = "Unable to allocate memory.";
static const char failureInternal[] = "Internal error."; static const char failureInternal[] = "Internal error.";
static const char failureBadAlloc[] = "Allocator returned misaligned memory.";
extern const hs_compile_error_t hs_enomem = { extern const hs_compile_error_t hs_enomem = {
const_cast<char *>(failureNoMemory), 0 const_cast<char *>(failureNoMemory), 0
@ -49,6 +50,9 @@ extern const hs_compile_error_t hs_enomem = {
extern const hs_compile_error_t hs_einternal = { extern const hs_compile_error_t hs_einternal = {
const_cast<char *>(failureInternal), 0 const_cast<char *>(failureInternal), 0
}; };
extern const hs_compile_error_t hs_badalloc = {
const_cast<char *>(failureBadAlloc), 0
};
namespace ue2 { namespace ue2 {
@ -56,8 +60,18 @@ hs_compile_error_t *generateCompileError(const string &err, int expression) {
hs_compile_error_t *ret = hs_compile_error_t *ret =
(struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t)); (struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t));
if (ret) { if (ret) {
hs_error_t e = hs_check_alloc(ret);
if (e != HS_SUCCESS) {
hs_misc_free(ret);
return const_cast<hs_compile_error_t *>(&hs_badalloc);
}
char *msg = (char *)hs_misc_alloc(err.size() + 1); char *msg = (char *)hs_misc_alloc(err.size() + 1);
if (msg) { if (msg) {
e = hs_check_alloc(msg);
if (e != HS_SUCCESS) {
hs_misc_free(msg);
return const_cast<hs_compile_error_t *>(&hs_badalloc);
}
memcpy(msg, err.c_str(), err.size() + 1); memcpy(msg, err.c_str(), err.size() + 1);
ret->message = msg; ret->message = msg;
} else { } else {
@ -83,7 +97,8 @@ void freeCompileError(hs_compile_error_t *error) {
if (!error) { if (!error) {
return; return;
} }
if (error == &hs_enomem || error == &hs_einternal) { if (error == &hs_enomem || error == &hs_einternal ||
error == &hs_badalloc) {
// These are not allocated. // These are not allocated.
return; return;
} }

View File

@ -458,33 +458,16 @@ hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
} }
*info = NULL; *info = NULL;
if (!bytes || length < sizeof(struct hs_database)) { // Decode and check the header
return HS_INVALID; hs_database_t header;
hs_error_t ret = db_decode_header(&bytes, length, &header);
if (ret != HS_SUCCESS) {
return ret;
} }
const u32 *buf = (const u32 *)bytes; u32 mode = unaligned_load_u32(bytes + offsetof(struct RoseEngine, mode));
u32 magic = unaligned_load_u32(buf++); return print_database_string(info, header.version, header.platform, mode);
if (magic != HS_DB_MAGIC) {
return HS_INVALID;
}
u32 version = unaligned_load_u32(buf++);
buf++; /* length */
platform_t plat;
plat = unaligned_load_u64a(buf);
buf += 2;
buf++; /* crc */
buf++; /* reserved 0 */
buf++; /* reserved 1 */
const char *t_raw = (const char *)buf;
u32 mode = unaligned_load_u32(t_raw + offsetof(struct RoseEngine, mode));
return print_database_string(info, version, plat, mode);
} }
HS_PUBLIC_API HS_PUBLIC_API

View File

@ -36,7 +36,6 @@
#include "teddy.h" #include "teddy.h"
#include "teddy_internal.h" #include "teddy_internal.h"
#include "util/simd_utils.h" #include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
/** \brief number of bytes processed in each iteration */ /** \brief number of bytes processed in each iteration */
#define ITER_BYTES 16 #define ITER_BYTES 16
@ -132,7 +131,7 @@ m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft,
u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1); u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1);
tmp &= fdr->domainMask; tmp &= fdr->domainMask;
s = *((const m128 *)ft + tmp); s = *((const m128 *)ft + tmp);
s = shiftRight8Bits(s); s = rshiftbyte_m128(s, 1);
} else { } else {
s = fdr->start; s = fdr->start;
} }
@ -186,20 +185,20 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
m128 st14 = *(const m128 *)(ft + v14*8); m128 st14 = *(const m128 *)(ft + v14*8);
m128 st15 = *(const m128 *)(ft + v15*8); m128 st15 = *(const m128 *)(ft + v15*8);
st1 = byteShiftLeft128(st1, 1); st1 = lshiftbyte_m128(st1, 1);
st2 = byteShiftLeft128(st2, 2); st2 = lshiftbyte_m128(st2, 2);
st3 = byteShiftLeft128(st3, 3); st3 = lshiftbyte_m128(st3, 3);
st4 = byteShiftLeft128(st4, 4); st4 = lshiftbyte_m128(st4, 4);
st5 = byteShiftLeft128(st5, 5); st5 = lshiftbyte_m128(st5, 5);
st6 = byteShiftLeft128(st6, 6); st6 = lshiftbyte_m128(st6, 6);
st7 = byteShiftLeft128(st7, 7); st7 = lshiftbyte_m128(st7, 7);
st9 = byteShiftLeft128(st9, 1); st9 = lshiftbyte_m128(st9, 1);
st10 = byteShiftLeft128(st10, 2); st10 = lshiftbyte_m128(st10, 2);
st11 = byteShiftLeft128(st11, 3); st11 = lshiftbyte_m128(st11, 3);
st12 = byteShiftLeft128(st12, 4); st12 = lshiftbyte_m128(st12, 4);
st13 = byteShiftLeft128(st13, 5); st13 = lshiftbyte_m128(st13, 5);
st14 = byteShiftLeft128(st14, 6); st14 = lshiftbyte_m128(st14, 6);
st15 = byteShiftLeft128(st15, 7); st15 = lshiftbyte_m128(st15, 7);
*s = or128(*s, st0); *s = or128(*s, st0);
*s = or128(*s, st1); *s = or128(*s, st1);
@ -210,7 +209,7 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
*s = or128(*s, st6); *s = or128(*s, st6);
*s = or128(*s, st7); *s = or128(*s, st7);
*conf0 = movq(*s); *conf0 = movq(*s);
*s = byteShiftRight128(*s, 8); *s = rshiftbyte_m128(*s, 8);
*conf0 ^= ~0ULL; *conf0 ^= ~0ULL;
*s = or128(*s, st8); *s = or128(*s, st8);
@ -222,7 +221,7 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
*s = or128(*s, st14); *s = or128(*s, st14);
*s = or128(*s, st15); *s = or128(*s, st15);
*conf8 = movq(*s); *conf8 = movq(*s);
*s = byteShiftRight128(*s, 8); *s = rshiftbyte_m128(*s, 8);
*conf8 ^= ~0ULL; *conf8 ^= ~0ULL;
} }
@ -253,19 +252,19 @@ void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
m128 st12 = *(const m128 *)(ft + v12*8); m128 st12 = *(const m128 *)(ft + v12*8);
m128 st14 = *(const m128 *)(ft + v14*8); m128 st14 = *(const m128 *)(ft + v14*8);
st2 = byteShiftLeft128(st2, 2); st2 = lshiftbyte_m128(st2, 2);
st4 = byteShiftLeft128(st4, 4); st4 = lshiftbyte_m128(st4, 4);
st6 = byteShiftLeft128(st6, 6); st6 = lshiftbyte_m128(st6, 6);
st10 = byteShiftLeft128(st10, 2); st10 = lshiftbyte_m128(st10, 2);
st12 = byteShiftLeft128(st12, 4); st12 = lshiftbyte_m128(st12, 4);
st14 = byteShiftLeft128(st14, 6); st14 = lshiftbyte_m128(st14, 6);
*s = or128(*s, st0); *s = or128(*s, st0);
*s = or128(*s, st2); *s = or128(*s, st2);
*s = or128(*s, st4); *s = or128(*s, st4);
*s = or128(*s, st6); *s = or128(*s, st6);
*conf0 = movq(*s); *conf0 = movq(*s);
*s = byteShiftRight128(*s, 8); *s = rshiftbyte_m128(*s, 8);
*conf0 ^= ~0ULL; *conf0 ^= ~0ULL;
*s = or128(*s, st8); *s = or128(*s, st8);
@ -273,7 +272,7 @@ void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
*s = or128(*s, st12); *s = or128(*s, st12);
*s = or128(*s, st14); *s = or128(*s, st14);
*conf8 = movq(*s); *conf8 = movq(*s);
*s = byteShiftRight128(*s, 8); *s = rshiftbyte_m128(*s, 8);
*conf8 ^= ~0ULL; *conf8 ^= ~0ULL;
} }
@ -296,27 +295,26 @@ void get_conf_stride_4(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
m128 st8 = *(const m128 *)(ft + v8*8); m128 st8 = *(const m128 *)(ft + v8*8);
m128 st12 = *(const m128 *)(ft + v12*8); m128 st12 = *(const m128 *)(ft + v12*8);
st4 = byteShiftLeft128(st4, 4); st4 = lshiftbyte_m128(st4, 4);
st12 = byteShiftLeft128(st12, 4); st12 = lshiftbyte_m128(st12, 4);
*s = or128(*s, st0); *s = or128(*s, st0);
*s = or128(*s, st4); *s = or128(*s, st4);
*conf0 = movq(*s); *conf0 = movq(*s);
*s = byteShiftRight128(*s, 8); *s = rshiftbyte_m128(*s, 8);
*conf0 ^= ~0ULL; *conf0 ^= ~0ULL;
*s = or128(*s, st8); *s = or128(*s, st8);
*s = or128(*s, st12); *s = or128(*s, st12);
*conf8 = movq(*s); *conf8 = movq(*s);
*s = byteShiftRight128(*s, 8); *s = rshiftbyte_m128(*s, 8);
*conf8 ^= ~0ULL; *conf8 ^= ~0ULL;
} }
static really_inline static really_inline
void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *controlVal, void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
const u32 *confBase, const struct FDR_Runtime_Args *a, const u32 *confBase, const struct FDR_Runtime_Args *a,
const u8 *ptr, hwlmcb_rv_t *control, u32 *last_match_id, const u8 *ptr, u32 *last_match_id, struct zone *z) {
struct zone *z) {
const u8 bucket = 8; const u8 bucket = 8;
const u8 pullback = 1; const u8 pullback = 1;
@ -352,13 +350,13 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *controlVal,
continue; continue;
} }
*last_match_id = id; *last_match_id = id;
*controlVal = a->cb(ptr_main + byte - a->buf, *control = a->cb(ptr_main + byte - a->buf, ptr_main + byte - a->buf,
ptr_main + byte - a->buf, id, a->ctxt); id, a->ctxt);
continue; continue;
} }
u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a)); u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a));
confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, control,
control, last_match_id, confVal); last_match_id, confVal);
} while (unlikely(!!*conf)); } while (unlikely(!!*conf));
} }
@ -681,9 +679,9 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
itPtr += ITER_BYTES) { \ itPtr += ITER_BYTES) { \
if (unlikely(itPtr > tryFloodDetect)) { \ if (unlikely(itPtr > tryFloodDetect)) { \
tryFloodDetect = floodDetect(fdr, a, &itPtr, tryFloodDetect,\ tryFloodDetect = floodDetect(fdr, a, &itPtr, tryFloodDetect,\
&floodBackoff, &controlVal, \ &floodBackoff, &control, \
ITER_BYTES); \ ITER_BYTES); \
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \ if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \
return HWLM_TERMINATED; \ return HWLM_TERMINATED; \
} \ } \
} \ } \
@ -692,11 +690,11 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
u64a conf8; \ u64a conf8; \
get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_adjusted, \ get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_adjusted, \
ft, &conf0, &conf8, &s); \ ft, &conf0, &conf8, &s); \
do_confirm_fdr(&conf0, 0, &controlVal, confBase, a, itPtr, \ do_confirm_fdr(&conf0, 0, &control, confBase, a, itPtr, \
control, &last_match_id, zz); \ &last_match_id, zz); \
do_confirm_fdr(&conf8, 8, &controlVal, confBase, a, itPtr, \ do_confirm_fdr(&conf8, 8, &control, confBase, a, itPtr, \
control, &last_match_id, zz); \ &last_match_id, zz); \
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \ if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \
return HWLM_TERMINATED; \ return HWLM_TERMINATED; \
} \ } \
} /* end for loop */ \ } /* end for loop */ \
@ -704,9 +702,8 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
static never_inline static never_inline
hwlm_error_t fdr_engine_exec(const struct FDR *fdr, hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlmcb_rv_t controlVal = *a->groups; hwlm_group_t control) {
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
u32 last_match_id = INVALID_MATCH_ID; u32 last_match_id = INVALID_MATCH_ID;
u64a domain_mask_adjusted = fdr->domainMask << 1; u64a domain_mask_adjusted = fdr->domainMask << 1;
@ -771,7 +768,10 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
#define ONLY_AVX2(func) NULL #define ONLY_AVX2(func) NULL
#endif #endif
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a); typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
static const FDRFUNCTYPE funcs[] = { static const FDRFUNCTYPE funcs[] = {
fdr_engine_exec, fdr_engine_exec,
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fast), ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fast),
@ -814,7 +814,6 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
start, start,
cb, cb,
ctxt, ctxt,
&groups,
nextFloodDetect(buf, len, FLOOD_BACKOFF_START), nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
0 0
}; };
@ -822,7 +821,7 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
return HWLM_SUCCESS; return HWLM_SUCCESS;
} else { } else {
assert(funcs[fdr->engineID]); assert(funcs[fdr->engineID]);
return funcs[fdr->engineID](fdr, &a); return funcs[fdr->engineID](fdr, &a, groups);
} }
} }
@ -840,7 +839,6 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
start, start,
cb, cb,
ctxt, ctxt,
&groups,
nextFloodDetect(buf, len, FLOOD_BACKOFF_START), nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
/* we are guaranteed to always have 16 initialised bytes at the end of /* we are guaranteed to always have 16 initialised bytes at the end of
* the history buffer (they may be garbage). */ * the history buffer (they may be garbage). */
@ -853,7 +851,7 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
ret = HWLM_SUCCESS; ret = HWLM_SUCCESS;
} else { } else {
assert(funcs[fdr->engineID]); assert(funcs[fdr->engineID]);
ret = funcs[fdr->engineID](fdr, &a); ret = funcs[fdr->engineID](fdr, &a, groups);
} }
fdrPackState(fdr, &a, stream_state); fdrPackState(fdr, &a, stream_state);

View File

@ -81,7 +81,7 @@ private:
void dumpMasks(const u8 *defaultMask); void dumpMasks(const u8 *defaultMask);
#endif #endif
void setupTab(); void setupTab();
aligned_unique_ptr<FDR> setupFDR(pair<u8 *, size_t> link); aligned_unique_ptr<FDR> setupFDR(pair<aligned_unique_ptr<u8>, size_t> &link);
void createInitialState(FDR *fdr); void createInitialState(FDR *fdr);
public: public:
@ -90,7 +90,7 @@ public:
: eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in), : eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in),
make_small(make_small_in) {} make_small(make_small_in) {}
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link); aligned_unique_ptr<FDR> build(pair<aligned_unique_ptr<u8>, size_t> &link);
}; };
u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) { u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
@ -124,10 +124,8 @@ void FDRCompiler::createInitialState(FDR *fdr) {
// Find the minimum length for the literals in this bucket. // Find the minimum length for the literals in this bucket.
const vector<LiteralIndex> &bucket_lits = bucketToLits[b]; const vector<LiteralIndex> &bucket_lits = bucketToLits[b];
u32 min_len = ~0U; u32 min_len = ~0U;
for (vector<LiteralIndex>::const_iterator it = bucket_lits.begin(), for (const LiteralIndex &lit_idx : bucket_lits) {
ite = bucket_lits.end(); min_len = min(min_len, verify_u32(lits[lit_idx].s.length()));
it != ite; ++it) {
min_len = min(min_len, verify_u32(lits[*it].s.length()));
} }
DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len); DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
@ -141,13 +139,12 @@ void FDRCompiler::createInitialState(FDR *fdr) {
} }
} }
aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) { aligned_unique_ptr<FDR>
FDRCompiler::setupFDR(pair<aligned_unique_ptr<u8>, size_t> &link) {
size_t tabSize = eng.getTabSizeBytes(); size_t tabSize = eng.getTabSizeBytes();
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng); auto floodControlTmp = setupFDRFloodControl(lits, eng);
auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small);
pair<u8 *, size_t> confirmTmp =
setupFullMultiConfs(lits, eng, bucketToLits, make_small);
assert(ISALIGNED_16(tabSize)); assert(ISALIGNED_16(tabSize));
assert(ISALIGNED_16(confirmTmp.second)); assert(ISALIGNED_16(confirmTmp.second));
@ -175,14 +172,12 @@ aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
copy(tab.begin(), tab.end(), ptr); copy(tab.begin(), tab.end(), ptr);
ptr += tabSize; ptr += tabSize;
memcpy(ptr, confirmTmp.first, confirmTmp.second); memcpy(ptr, confirmTmp.first.get(), confirmTmp.second);
ptr += confirmTmp.second; ptr += confirmTmp.second;
aligned_free(confirmTmp.first);
fdr->floodOffset = verify_u32(ptr - fdr_base); fdr->floodOffset = verify_u32(ptr - fdr_base);
memcpy(ptr, floodControlTmp.first, floodControlTmp.second); memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second);
ptr += floodControlTmp.second; ptr += floodControlTmp.second;
aligned_free(floodControlTmp.first);
/* we are allowing domains 9 to 15 only */ /* we are allowing domains 9 to 15 only */
assert(eng.bits > 8 && eng.bits < 16); assert(eng.bits > 8 && eng.bits < 16);
@ -193,8 +188,7 @@ aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
if (link.first) { if (link.first) {
fdr->link = verify_u32(ptr - fdr_base); fdr->link = verify_u32(ptr - fdr_base);
memcpy(ptr, link.first, link.second); memcpy(ptr, link.first.get(), link.second);
aligned_free(link.first);
} else { } else {
fdr->link = 0; fdr->link = 0;
} }
@ -217,13 +211,11 @@ struct LitOrder {
if (len1 != len2) { if (len1 != len2) {
return len1 < len2; return len1 < len2;
} else { } else {
string::const_reverse_iterator it1, it2; auto p = std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
tie(it1, it2) = if (p.first == i1s.rend()) {
std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
if (it1 == i1s.rend()) {
return false; return false;
} }
return *it1 < *it2; return *p.first < *p.second;
} }
} }
@ -266,9 +258,8 @@ void FDRCompiler::assignStringsToBuckets() {
stable_sort(vli.begin(), vli.end(), LitOrder(lits)); stable_sort(vli.begin(), vli.end(), LitOrder(lits));
#ifdef DEBUG_ASSIGNMENT #ifdef DEBUG_ASSIGNMENT
for (map<u32, u32>::iterator i = lenCounts.begin(), e = lenCounts.end(); for (const auto &m : lenCounts) {
i != e; ++i) { printf("l<%u>:%u ", m.first, m.second);
printf("l<%d>:%d ", i->first, i->second);
} }
printf("\n"); printf("\n");
#endif #endif
@ -324,12 +315,12 @@ void FDRCompiler::assignStringsToBuckets() {
for (u32 k = j; k < nChunks; ++k) { for (u32 k = j; k < nChunks; ++k) {
cnt += count[k]; cnt += count[k];
} }
t[j][0] = make_pair(getScoreUtil(length[j], cnt), 0); t[j][0] = {getScoreUtil(length[j], cnt), 0};
} }
for (u32 i = 1; i < nb; i++) { for (u32 i = 1; i < nb; i++) {
for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row
SCORE_INDEX_PAIR best = make_pair(MAX_SCORE, 0); SCORE_INDEX_PAIR best = {MAX_SCORE, 0};
u32 cnt = count[j]; u32 cnt = count[j];
for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) { for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) {
SCORE score = getScoreUtil(length[j], cnt); SCORE score = getScoreUtil(length[j], cnt);
@ -338,12 +329,12 @@ void FDRCompiler::assignStringsToBuckets() {
} }
score += t[k][i-1].first; score += t[k][i-1].first;
if (score < best.first) { if (score < best.first) {
best = make_pair(score, k); best = {score, k};
} }
} }
t[j][i] = best; t[j][i] = best;
} }
t[nChunks - 1][i] = make_pair(0,0); // fill in empty final row for next iteration t[nChunks - 1][i] = {0,0}; // fill in empty final row for next iteration
} }
#ifdef DEBUG_ASSIGNMENT #ifdef DEBUG_ASSIGNMENT
@ -405,8 +396,7 @@ bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
distance = 4; distance = 4;
} }
for (vector<LiteralIndex>::const_iterator i = vl.begin(), e = vl.end(); for (auto i = vl.begin(), e = vl.end(); i != e; ++i) {
i != e; ++i) {
if (e - i > 5) { if (e - i > 5) {
__builtin_prefetch(&lits[*(i + 5)]); __builtin_prefetch(&lits[*(i + 5)]);
} }
@ -460,31 +450,25 @@ void FDRCompiler::setupTab() {
memcpy(tabIndexToMask(i), &defaultMask[0], mask_size); memcpy(tabIndexToMask(i), &defaultMask[0], mask_size);
} }
typedef std::map<u32, ue2::unordered_set<u32> > M2SET;
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) { for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
const vector<LiteralIndex> &vl = bucketToLits[b]; const vector<LiteralIndex> &vl = bucketToLits[b];
SuffixPositionInString pLimit = eng.getBucketWidth(b); SuffixPositionInString pLimit = eng.getBucketWidth(b);
for (SuffixPositionInString pos = 0; pos < pLimit; pos++) { for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
u32 bit = eng.getSchemeBit(b, pos); u32 bit = eng.getSchemeBit(b, pos);
M2SET m2; map<u32, ue2::unordered_set<u32>> m2;
bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2); bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
if (done) { if (done) {
clearbit(&defaultMask[0], bit); clearbit(&defaultMask[0], bit);
continue; continue;
} }
for (M2SET::const_iterator i = m2.begin(), e = m2.end(); i != e; for (const auto &elem : m2) {
++i) { u32 dc = elem.first;
u32 dc = i->first; const ue2::unordered_set<u32> &mskSet = elem.second;
const ue2::unordered_set<u32> &mskSet = i->second;
u32 v = ~dc; u32 v = ~dc;
do { do {
u32 b2 = v & dc; u32 b2 = v & dc;
for (ue2::unordered_set<u32>::const_iterator for (const u32 &mskVal : mskSet) {
i2 = mskSet.begin(), u32 val = (mskVal & ~dc) | b2;
e2 = mskSet.end();
i2 != e2; ++i2) {
u32 val = (*i2 & ~dc) | b2;
clearbit(tabIndexToMask(val), bit); clearbit(tabIndexToMask(val), bit);
} }
v = (v + (dc & -dc)) | ~dc; v = (v + (dc & -dc)) | ~dc;
@ -502,7 +486,8 @@ void FDRCompiler::setupTab() {
#endif #endif
} }
aligned_unique_ptr<FDR> FDRCompiler::build(pair<u8 *, size_t> link) { aligned_unique_ptr<FDR>
FDRCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
assignStringsToBuckets(); assignStringsToBuckets();
setupTab(); setupTab();
return setupFDR(link); return setupFDR(link);
@ -515,16 +500,15 @@ aligned_unique_ptr<FDR>
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small, fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
const target_t &target, const Grey &grey, u32 hint, const target_t &target, const Grey &grey, u32 hint,
hwlmStreamingControl *stream_control) { hwlmStreamingControl *stream_control) {
pair<u8 *, size_t> link(nullptr, 0); pair<aligned_unique_ptr<u8>, size_t> link(nullptr, 0);
if (stream_control) { if (stream_control) {
link = fdrBuildTableStreaming(lits, stream_control); link = fdrBuildTableStreaming(lits, *stream_control);
} }
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2"); DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
if (grey.fdrAllowTeddy) { if (grey.fdrAllowTeddy) {
aligned_unique_ptr<FDR> fdr auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, link);
= teddyBuildTableHinted(lits, make_small, hint, target, link);
if (fdr) { if (fdr) {
DEBUG_PRINTF("build with teddy succeeded\n"); DEBUG_PRINTF("build with teddy succeeded\n");
return fdr; return fdr;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -31,6 +31,7 @@
#include "ue2common.h" #include "ue2common.h"
#include "hwlm/hwlm_literal.h" #include "hwlm/hwlm_literal.h"
#include "util/alloc.h"
#include <map> #include <map>
#include <utility> #include <utility>
@ -44,7 +45,6 @@ namespace ue2 {
// a pile of decorative typedefs // a pile of decorative typedefs
// good for documentation purposes more than anything else // good for documentation purposes more than anything else
typedef u32 LiteralIndex; typedef u32 LiteralIndex;
typedef u32 ConfirmIndex;
typedef u32 SuffixPositionInString; // zero is last byte, counting back typedef u32 SuffixPositionInString; // zero is last byte, counting back
// into the string // into the string
typedef u32 BucketIndex; typedef u32 BucketIndex;
@ -56,25 +56,22 @@ class EngineDescription;
class FDREngineDescription; class FDREngineDescription;
struct hwlmStreamingControl; struct hwlmStreamingControl;
size_t getFDRConfirm(const std::vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p, std::pair<aligned_unique_ptr<u8>, size_t> setupFullMultiConfs(
bool make_small);
std::pair<u8 *, size_t> setupFullMultiConfs(
const std::vector<hwlmLiteral> &lits, const EngineDescription &eng, const std::vector<hwlmLiteral> &lits, const EngineDescription &eng,
std::map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits, std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
bool make_small); bool make_small);
// all suffixes include an implicit max_bucket_width suffix to ensure that // all suffixes include an implicit max_bucket_width suffix to ensure that
// we always read a full-scale flood "behind" us in terms of what's in our // we always read a full-scale flood "behind" us in terms of what's in our
// state; if we don't have a flood that's long enough we won't be in the // state; if we don't have a flood that's long enough we won't be in the
// right state yet to allow blindly advancing // right state yet to allow blindly advancing
std::pair<u8 *, size_t> std::pair<aligned_unique_ptr<u8>, size_t>
setupFDRFloodControl(const std::vector<hwlmLiteral> &lits, setupFDRFloodControl(const std::vector<hwlmLiteral> &lits,
const EngineDescription &eng); const EngineDescription &eng);
std::pair<u8 *, size_t> std::pair<aligned_unique_ptr<u8>, size_t>
fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits, fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
hwlmStreamingControl *stream_control); hwlmStreamingControl &stream_control);
static constexpr u32 HINT_INVALID = 0xffffffff; static constexpr u32 HINT_INVALID = 0xffffffff;

View File

@ -45,9 +45,10 @@ using namespace std;
namespace ue2 { namespace ue2 {
typedef u8 ConfSplitType; using ConfSplitType = u8;
typedef pair<BucketIndex, ConfSplitType> BucketSplitPair; using BucketSplitPair = pair<BucketIndex, ConfSplitType>;
typedef map<BucketSplitPair, pair<FDRConfirm *, size_t> > BC2CONF; using BC2CONF = map<BucketSplitPair,
pair<aligned_unique_ptr<FDRConfirm>, size_t>>;
// return the number of bytes beyond a length threshold in all strings in lits // return the number of bytes beyond a length threshold in all strings in lits
static static
@ -149,9 +150,9 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
//#define FDR_CONFIRM_DUMP 1 //#define FDR_CONFIRM_DUMP 1
static static pair<aligned_unique_ptr<FDRConfirm>, size_t>
size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p, getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
bool applyOneCharOpt, bool make_small, bool make_confirm) { bool make_small, bool make_confirm) {
vector<LitInfo> tmpLitInfo(lits.size()); vector<LitInfo> tmpLitInfo(lits.size());
CONF_TYPE andmsk; CONF_TYPE andmsk;
fillLitInfo(lits, tmpLitInfo, andmsk); fillLitInfo(lits, tmpLitInfo, andmsk);
@ -220,55 +221,61 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
#ifdef FDR_CONFIRM_DUMP #ifdef FDR_CONFIRM_DUMP
// print out the literals reversed - makes it easier to line up analyses // print out the literals reversed - makes it easier to line up analyses
// that are end-offset based // that are end-offset based
for (map<u32, vector<LiteralIndex> >::iterator i = res2lits.begin(), for (const auto &m : res2lits) {
e = res2lits.end(); i != e; ++i) { const u32 &hash = m.first;
u32 hash = i->first; const vector<LiteralIndex> &vlidx = m.second;
vector<LiteralIndex> & vlidx = i->second; if (vlidx.size() <= 1) {
if (vlidx.size() > 1) { continue;
printf("%x -> %zu literals\n", hash, vlidx.size()); }
u32 min_len = lits[vlidx.front()].s.size(); printf("%x -> %zu literals\n", hash, vlidx.size());
vector<set<u8> > vsl; // contains the set of chars at each location size_t min_len = lits[vlidx.front()].s.size();
// reversed from the end
vsl.resize(1024); vector<set<u8>> vsl; // contains the set of chars at each location
u32 total_string_size = 0; // reversed from the end
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
e2 = vlidx.end(); i2 != e2; ++i2) { for (const auto &litIdx : vlidx) {
LiteralIndex litIdx = *i2; const auto &lit = lits[litIdx];
total_string_size += lits[litIdx].s.size(); if (lit.s.size() > vsl.size()) {
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) { vsl.resize(lit.s.size());
vsl[lits[litIdx].s.size()-j].insert(lits[litIdx].s.c_str()[j - 1]);
}
min_len = MIN(min_len, lits[litIdx].s.size());
} }
printf("common "); for (size_t j = lit.s.size(); j != 0; j--) {
for (u32 j = 0; j < min_len; j++) { vsl[lit.s.size() - j].insert(lit.s[j - 1]);
if (vsl[j].size() == 1) { }
printf("%02x", (u32)*vsl[j].begin()); min_len = min(min_len, lit.s.size());
} else { }
printf("common ");
for (size_t j = 0; j < min_len; j++) {
if (vsl[j].size() == 1) {
printf("%02x", *vsl[j].begin());
} else {
printf("__");
}
}
printf("\n");
for (const auto &litIdx : vlidx) {
const auto &lit = lits[litIdx];
printf("%8x %c", lit.id, lit.nocase ? '!' : ' ');
for (size_t j = lit.s.size(); j != 0; j--) {
size_t dist_from_end = lit.s.size() - j;
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
printf("__"); printf("__");
} else {
printf("%02x", lit.s[j - 1]);
} }
} }
printf("\n"); printf("\n");
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
e2 = vlidx.end(); i2 != e2; ++i2) {
LiteralIndex litIdx = *i2;
printf("%8x %c", lits[litIdx].id, lits[litIdx].nocase ? '!' : ' ');
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
u32 dist_from_end = lits[litIdx].s.size() - j;
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
printf("__");
} else {
printf("%02x", (u32)lits[litIdx].s.c_str()[j-1]);
}
}
printf("\n");
}
u32 total_compares = 0;
for (u32 j = 0; j < 1024; j++) { // naughty
total_compares += vsl[j].size();
}
printf("Total compare load: %d Total string size: %d\n\n", total_compares, total_string_size);
} }
size_t total_compares = 0;
for (const auto &v : vsl) {
total_compares += v.size();
}
size_t total_string_size = 0;
for (const auto &litIdx : vlidx) {
const auto &lit = lits[litIdx];
total_string_size += lit.s.size();
}
printf("Total compare load: %zu Total string size: %zu\n\n",
total_compares, total_string_size);
} }
#endif #endif
@ -281,7 +288,7 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
sizeof(LitInfo) * lits.size() + totalLitSize; sizeof(LitInfo) * lits.size() + totalLitSize;
size = ROUNDUP_N(size, alignof(FDRConfirm)); size = ROUNDUP_N(size, alignof(FDRConfirm));
FDRConfirm *fdrc = (FDRConfirm *)aligned_zmalloc(size); auto fdrc = aligned_zmalloc_unique<FDRConfirm>(size);
assert(fdrc); // otherwise would have thrown std::bad_alloc assert(fdrc); // otherwise would have thrown std::bad_alloc
fdrc->andmsk = andmsk; fdrc->andmsk = andmsk;
@ -295,7 +302,7 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
fdrc->groups = gm; fdrc->groups = gm;
// After the FDRConfirm, we have the lit index array. // After the FDRConfirm, we have the lit index array.
u8 *fdrc_base = (u8 *)fdrc; u8 *fdrc_base = (u8 *)fdrc.get();
u8 *ptr = fdrc_base + sizeof(*fdrc); u8 *ptr = fdrc_base + sizeof(*fdrc);
ptr = ROUNDUP_PTR(ptr, alignof(u32)); ptr = ROUNDUP_PTR(ptr, alignof(u32));
u32 *bitsToLitIndex = (u32 *)ptr; u32 *bitsToLitIndex = (u32 *)ptr;
@ -307,14 +314,12 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
// Walk the map by hash value assigning indexes and laying out the // Walk the map by hash value assigning indexes and laying out the
// elements (and their associated string confirm material) in memory. // elements (and their associated string confirm material) in memory.
for (std::map<u32, vector<LiteralIndex> >::const_iterator for (const auto &m : res2lits) {
i = res2lits.begin(), e = res2lits.end(); i != e; ++i) { const u32 hash = m.first;
const u32 hash = i->first; const vector<LiteralIndex> &vlidx = m.second;
const vector<LiteralIndex> &vlidx = i->second; bitsToLitIndex[hash] = verify_u32(ptr - fdrc_base);
bitsToLitIndex[hash] = verify_u32(ptr - (u8 *)fdrc); for (auto i = vlidx.begin(), e = vlidx.end(); i != e; ++i) {
for (vector<LiteralIndex>::const_iterator i2 = vlidx.begin(), LiteralIndex litIdx = *i;
e2 = vlidx.end(); i2 != e2; ++i2) {
LiteralIndex litIdx = *i2;
// Write LitInfo header. // Write LitInfo header.
u8 *oldPtr = ptr; u8 *oldPtr = ptr;
@ -333,7 +338,7 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
} }
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo)); ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
if (i2 + 1 == e2) { if (next(i) == e) {
finalLI.next = 0x0; finalLI.next = 0x0;
} else { } else {
// our next field represents an adjustment on top of // our next field represents an adjustment on top of
@ -348,14 +353,13 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
assert((size_t)(ptr - fdrc_base) <= size); assert((size_t)(ptr - fdrc_base) <= size);
} }
*fdrc_p = fdrc;
// Return actual used size, not worst-case size. Must be rounded up to // Return actual used size, not worst-case size. Must be rounded up to
// FDRConfirm alignment so that the caller can lay out a sequence of these. // FDRConfirm alignment so that the caller can lay out a sequence of these.
size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base), size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
alignof(FDRConfirm)); alignof(FDRConfirm));
assert(actual_size <= size); assert(actual_size <= size);
return actual_size;
return {move(fdrc), actual_size};
} }
static static
@ -377,12 +381,9 @@ u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
u32 totalConfirmSize = 0; u32 totalConfirmSize = 0;
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) { for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
if (!bucketToLits[b].empty()) { if (!bucketToLits[b].empty()) {
vector<vector<hwlmLiteral> > vl(eng.getConfirmTopLevelSplit()); vector<vector<hwlmLiteral>> vl(eng.getConfirmTopLevelSplit());
for (vector<LiteralIndex>::const_iterator for (const LiteralIndex &lit_idx : bucketToLits[b]) {
i = bucketToLits[b].begin(), hwlmLiteral lit = lits[lit_idx]; // copy
e = bucketToLits[b].end();
i != e; ++i) {
hwlmLiteral lit = lits[*i]; // copy
// c is last char of this literal // c is last char of this literal
u8 c = *(lit.s.rbegin()); u8 c = *(lit.s.rbegin());
@ -424,26 +425,27 @@ u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
} }
for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) { for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) {
if (!vl[c].empty()) { if (vl[c].empty()) {
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size()); continue;
FDRConfirm *fdrc;
size_t size = getFDRConfirm(vl[c], &fdrc,
eng.typicallyHoldsOneCharLits(),
make_small, makeConfirm);
BucketSplitPair p = make_pair(b, c);
bc2Conf[p] = make_pair(fdrc, size);
totalConfirmSize += size;
} }
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
auto key = make_pair(b, c);
auto fc = getFDRConfirm(vl[c], eng.typicallyHoldsOneCharLits(),
make_small, makeConfirm);
totalConfirmSize += fc.second;
assert(bc2Conf.find(key) == end(bc2Conf));
bc2Conf.emplace(key, move(fc));
} }
} }
} }
return totalConfirmSize; return totalConfirmSize;
} }
pair<u8 *, size_t> setupFullMultiConfs(const vector<hwlmLiteral> &lits, pair<aligned_unique_ptr<u8>, size_t>
const EngineDescription &eng, setupFullMultiConfs(const vector<hwlmLiteral> &lits,
map<BucketIndex, vector<LiteralIndex> > &bucketToLits, const EngineDescription &eng,
bool make_small) { map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
bool make_small) {
BC2CONF bc2Conf; BC2CONF bc2Conf;
u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits, u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits,
make_small); make_small);
@ -453,26 +455,24 @@ pair<u8 *, size_t> setupFullMultiConfs(const vector<hwlmLiteral> &lits,
u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32); u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32);
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize); u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
u8 *buf = (u8 *)aligned_zmalloc(totalSize); auto buf = aligned_zmalloc_unique<u8>(totalSize);
assert(buf); // otherwise would have thrown std::bad_alloc assert(buf); // otherwise would have thrown std::bad_alloc
u32 *confBase = (u32 *)buf; u32 *confBase = (u32 *)buf.get();
u8 *ptr = buf + totalConfSwitchSize; u8 *ptr = buf.get() + totalConfSwitchSize;
for (BC2CONF::const_iterator i = bc2Conf.begin(), e = bc2Conf.end(); i != e; for (const auto &m : bc2Conf) {
++i) { const BucketIndex &b = m.first.first;
const pair<FDRConfirm *, size_t> &p = i->second; const u8 &c = m.first.second;
const pair<aligned_unique_ptr<FDRConfirm>, size_t> &p = m.second;
// confirm offset is relative to the base of this structure, now // confirm offset is relative to the base of this structure, now
u32 confirm_offset = verify_u32(ptr - (u8 *)buf); u32 confirm_offset = verify_u32(ptr - buf.get());
memcpy(ptr, p.first, p.second); memcpy(ptr, p.first.get(), p.second);
ptr += p.second; ptr += p.second;
aligned_free(p.first);
BucketIndex b = i->first.first;
u8 c = i->first.second;
u32 idx = c * nBuckets + b; u32 idx = c * nBuckets + b;
confBase[idx] = confirm_offset; confBase[idx] = confirm_offset;
} }
return make_pair(buf, totalSize); return {move(buf), totalSize};
} }
} // namespace ue2 } // namespace ue2

View File

@ -105,7 +105,6 @@ struct FDR_Runtime_Args {
size_t start_offset; size_t start_offset;
HWLMCallback cb; HWLMCallback cb;
void *ctxt; void *ctxt;
hwlm_group_t *groups;
const u8 *firstFloodDetect; const u8 *firstFloodDetect;
const u64a histBytes; const u64a histBytes;
}; };

View File

@ -94,14 +94,13 @@ static
bool setupLongLits(const vector<hwlmLiteral> &lits, bool setupLongLits(const vector<hwlmLiteral> &lits,
vector<hwlmLiteral> &long_lits, size_t max_len) { vector<hwlmLiteral> &long_lits, size_t max_len) {
long_lits.reserve(lits.size()); long_lits.reserve(lits.size());
for (vector<hwlmLiteral>::const_iterator it = lits.begin(); for (const auto &lit : lits) {
it != lits.end(); ++it) { if (lit.s.length() > max_len) {
if (it->s.length() > max_len) { hwlmLiteral tmp = lit; // copy
hwlmLiteral tmp = *it; // copy tmp.s.pop_back();
tmp.s.erase(tmp.s.size() - 1, 1); // erase last char
tmp.id = 0; // recalc later tmp.id = 0; // recalc later
tmp.groups = 0; // filled in later by hash bucket(s) tmp.groups = 0; // filled in later by hash bucket(s)
long_lits.push_back(tmp); long_lits.push_back(move(tmp));
} }
} }
@ -112,15 +111,12 @@ bool setupLongLits(const vector<hwlmLiteral> &lits,
// sort long_literals by caseful/caseless and in lexicographical order, // sort long_literals by caseful/caseless and in lexicographical order,
// remove duplicates // remove duplicates
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder()); stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
vector<hwlmLiteral>::iterator new_end = auto new_end = unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
long_lits.erase(new_end, long_lits.end()); long_lits.erase(new_end, long_lits.end());
// fill in ids; not currently used // fill in ids; not currently used
for (vector<hwlmLiteral>::iterator i = long_lits.begin(), for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
e = long_lits.end(); i->id = distance(long_lits.begin(), i);
i != e; ++i) {
i->id = i - long_lits.begin();
} }
return true; return true;
} }
@ -143,23 +139,19 @@ void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
hashedPositions[m] = 0; hashedPositions[m] = 0;
} }
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(), for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
e = long_lits.end();
i != e; ++i) {
if (i->nocase) { if (i->nocase) {
boundaries[CASEFUL] = verify_u32(i - long_lits.begin()); boundaries[CASEFUL] = verify_u32(distance(long_lits.begin(), i));
break; break;
} }
} }
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(), for (const auto &lit : long_lits) {
e = long_lits.end(); Modes m = lit.nocase ? CASELESS : CASEFUL;
i != e; ++i) { for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
MODES m = i->nocase ? CASELESS : CASEFUL;
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
hashedPositions[m]++; hashedPositions[m]++;
} }
positions[m] += i->s.size(); positions[m] += lit.s.size();
} }
for (u32 m = CASEFUL; m < MAX_MODES; m++) { for (u32 m = CASEFUL; m < MAX_MODES; m++) {
@ -170,7 +162,7 @@ void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
#ifdef DEBUG_COMPILE #ifdef DEBUG_COMPILE
printf("analyzeLits:\n"); printf("analyzeLits:\n");
for (MODES m = CASEFUL; m < MAX_MODES; m++) { for (Modes m = CASEFUL; m < MAX_MODES; m++) {
printf("mode %s boundary %d positions %d hashedPositions %d " printf("mode %s boundary %d positions %d hashedPositions %d "
"hashEntries %d\n", "hashEntries %d\n",
(m == CASEFUL) ? "caseful" : "caseless", boundaries[m], (m == CASEFUL) ? "caseful" : "caseless", boundaries[m],
@ -181,7 +173,7 @@ void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
} }
static static
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, MODES m) { u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, Modes m) {
return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m); return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m);
} }
@ -203,24 +195,21 @@ struct OffsetIDFromEndOrder {
static static
void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len, void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
FDRSHashEntry *tab, size_t numEntries, MODES m, FDRSHashEntry *tab, size_t numEntries, Modes mode,
map<u32, u32> &litToOffsetVal) { map<u32, u32> &litToOffsetVal) {
const u32 nbits = lg2(numEntries); const u32 nbits = lg2(numEntries);
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs; map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
map<u32, u64a> bucketToBitfield; map<u32, u64a> bucketToBitfield;
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(), for (const auto &lit : long_lits) {
e = long_lits.end(); if ((mode == CASELESS) != lit.nocase) {
i != e; ++i) {
const hwlmLiteral &l = *i;
if ((m == CASELESS) != i->nocase) {
continue; continue;
} }
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) { for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
u32 h = hashLit(l, j, max_len, m); u32 h = hashLit(lit, j, max_len, mode);
u32 h_ent = h & ((1U << nbits) - 1); u32 h_ent = h & ((1U << nbits) - 1);
u32 h_low = (h >> nbits) & 63; u32 h_low = (h >> nbits) & 63;
bucketToLitOffPairs[h_ent].push_back(make_pair(i->id, j)); bucketToLitOffPairs[h_ent].emplace_back(lit.id, j);
bucketToBitfield[h_ent] |= (1ULL << h_low); bucketToBitfield[h_ent] |= (1ULL << h_low);
} }
} }
@ -231,11 +220,9 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
// sweep out bitfield entries and save the results swapped accordingly // sweep out bitfield entries and save the results swapped accordingly
// also, anything with bitfield entries is put in filledBuckets // also, anything with bitfield entries is put in filledBuckets
for (map<u32, u64a>::const_iterator i = bucketToBitfield.begin(), for (const auto &m : bucketToBitfield) {
e = bucketToBitfield.end(); const u32 &bucket = m.first;
i != e; ++i) { const u64a &contents = m.second;
u32 bucket = i->first;
u64a contents = i->second;
tab[bucket].bitfield = contents; tab[bucket].bitfield = contents;
filledBuckets.set(bucket); filledBuckets.set(bucket);
} }
@ -243,12 +230,9 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
// store out all our chains based on free values in our hash table. // store out all our chains based on free values in our hash table.
// find nearest free locations that are empty (there will always be more // find nearest free locations that are empty (there will always be more
// entries than strings, at present) // entries than strings, at present)
for (map<u32, deque<pair<u32, u32> > >::iterator for (auto &m : bucketToLitOffPairs) {
i = bucketToLitOffPairs.begin(), u32 bucket = m.first;
e = bucketToLitOffPairs.end(); deque<pair<u32, u32>> &d = m.second;
i != e; ++i) {
u32 bucket = i->first;
deque<pair<u32, u32> > &d = i->second;
// sort d by distance of the residual string (len minus our depth into // sort d by distance of the residual string (len minus our depth into
// the string). We need to put the 'furthest back' string first... // the string). We need to put the 'furthest back' string first...
@ -299,31 +283,30 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
static static
size_t maxMaskLen(const vector<hwlmLiteral> &lits) { size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
size_t rv = 0; size_t rv = 0;
vector<hwlmLiteral>::const_iterator it, ite; for (const auto &lit : lits) {
for (it = lits.begin(), ite = lits.end(); it != ite; ++it) { rv = max(rv, lit.msk.size());
rv = max(rv, it->msk.size());
} }
return rv; return rv;
} }
pair<u8 *, size_t> pair<aligned_unique_ptr<u8>, size_t>
fdrBuildTableStreaming(const vector<hwlmLiteral> &lits, fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
hwlmStreamingControl *stream_control) { hwlmStreamingControl &stream_control) {
// refuse to compile if we are forced to have smaller than minimum // refuse to compile if we are forced to have smaller than minimum
// history required for long-literal support, full stop // history required for long-literal support, full stop
// otherwise, choose the maximum of the preferred history quantity // otherwise, choose the maximum of the preferred history quantity
// (currently a fairly extravagant 32) or the already used history // (currently a fairly extravagant 32) or the already used history
// quantity - subject to the limitation of stream_control->history_max // quantity - subject to the limitation of stream_control.history_max
const size_t MIN_HISTORY_REQUIRED = 32; const size_t MIN_HISTORY_REQUIRED = 32;
if (MIN_HISTORY_REQUIRED > stream_control->history_max) { if (MIN_HISTORY_REQUIRED > stream_control.history_max) {
throw std::logic_error("Cannot set history to minimum history required"); throw std::logic_error("Cannot set history to minimum history required");
} }
size_t max_len = size_t max_len =
MIN(stream_control->history_max, MIN(stream_control.history_max,
MAX(MIN_HISTORY_REQUIRED, stream_control->history_min)); MAX(MIN_HISTORY_REQUIRED, stream_control.history_min));
assert(max_len >= MIN_HISTORY_REQUIRED); assert(max_len >= MIN_HISTORY_REQUIRED);
size_t max_mask_len = maxMaskLen(lits); size_t max_mask_len = maxMaskLen(lits);
@ -334,10 +317,10 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
// we want enough history to manage the longest literal and the longest // we want enough history to manage the longest literal and the longest
// mask. // mask.
stream_control->literal_history_required = stream_control.literal_history_required =
max(maxLen(lits), max_mask_len) - 1; max(maxLen(lits), max_mask_len) - 1;
stream_control->literal_stream_state_required = 0; stream_control.literal_stream_state_required = 0;
return make_pair(nullptr, size_t{0}); return {nullptr, size_t{0}};
} }
// Ensure that we have enough room for the longest mask. // Ensure that we have enough room for the longest mask.
@ -381,11 +364,11 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2)); streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2));
u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8; u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8;
u8 * secondaryTable = (u8 *)aligned_zmalloc(tabSize); auto secondaryTable = aligned_zmalloc_unique<u8>(tabSize);
assert(secondaryTable); // otherwise would have thrown std::bad_alloc assert(secondaryTable); // otherwise would have thrown std::bad_alloc
// then fill it in // then fill it in
u8 * ptr = secondaryTable; u8 * ptr = secondaryTable.get();
FDRSTableHeader * header = (FDRSTableHeader *)ptr; FDRSTableHeader * header = (FDRSTableHeader *)ptr;
// fill in header // fill in header
header->pseudoEngineID = (u32)0xffffffff; header->pseudoEngineID = (u32)0xffffffff;
@ -407,11 +390,9 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
ptr += litTabSize; ptr += litTabSize;
map<u32, u32> litToOffsetVal; map<u32, u32> litToOffsetVal;
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(), for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
e = long_lits.end();
i != e; ++i) {
u32 entry = verify_u32(i - long_lits.begin()); u32 entry = verify_u32(i - long_lits.begin());
u32 offset = verify_u32(ptr - secondaryTable); u32 offset = verify_u32(ptr - secondaryTable.get());
// point the table entry to the string location // point the table entry to the string location
litTabPtr[entry].offset = offset; litTabPtr[entry].offset = offset;
@ -425,20 +406,20 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
} }
// fill in final lit table entry with current ptr (serves as end value) // fill in final lit table entry with current ptr (serves as end value)
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable); litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable.get());
// fill hash tables // fill hash tables
ptr = secondaryTable + htOffset[CASEFUL]; ptr = secondaryTable.get() + htOffset[CASEFUL];
for (u32 m = CASEFUL; m < MAX_MODES; ++m) { for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m], fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m],
(MODES)m, litToOffsetVal); (Modes)m, litToOffsetVal);
ptr += htSize[m]; ptr += htSize[m];
} }
// tell the world what we did // tell the world what we did
stream_control->literal_history_required = max_len; stream_control.literal_history_required = max_len;
stream_control->literal_stream_state_required = tot_state_bytes; stream_control.literal_stream_state_required = tot_state_bytes;
return make_pair(secondaryTable, tabSize); return {move(secondaryTable), tabSize};
} }
} // namespace ue2 } // namespace ue2

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -41,11 +41,11 @@
// hash table (caseful) (FDRSHashEntry) // hash table (caseful) (FDRSHashEntry)
// hash table (caseless) (FDRSHashEntry) // hash table (caseless) (FDRSHashEntry)
typedef enum { enum Modes {
CASEFUL = 0, CASEFUL = 0,
CASELESS = 1, CASELESS = 1,
MAX_MODES = 2 MAX_MODES = 2
} MODES; };
// We have one of these structures hanging off the 'link' of our secondary // We have one of these structures hanging off the 'link' of our secondary
// FDR table that handles streaming strings // FDR table that handles streaming strings
@ -91,12 +91,12 @@ struct FDRSHashEntry {
}; };
static really_inline static really_inline
u32 get_start_lit_idx(const struct FDRSTableHeader * h, MODES m) { u32 get_start_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
return m == CASEFUL ? 0 : h->boundary[m-1]; return m == CASEFUL ? 0 : h->boundary[m-1];
} }
static really_inline static really_inline
u32 get_end_lit_idx(const struct FDRSTableHeader * h, MODES m) { u32 get_end_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
return h->boundary[m]; return h->boundary[m];
} }
@ -107,17 +107,17 @@ const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) {
} }
static really_inline static really_inline
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, MODES m) { u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, enum Modes m) {
return getLitTab(h)[get_start_lit_idx(h, m)].offset; return getLitTab(h)[get_start_lit_idx(h, m)].offset;
} }
static really_inline static really_inline
u32 packStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) { u32 packStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
return v - getBaseOffsetOfLits(h, m) + 1; return v - getBaseOffsetOfLits(h, m) + 1;
} }
static really_inline static really_inline
u32 unpackStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) { u32 unpackStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
return v + getBaseOffsetOfLits(h, m) - 1; return v + getBaseOffsetOfLits(h, m) - 1;
} }
@ -127,7 +127,7 @@ u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) {
} }
static really_inline static really_inline
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, MODES mode) { u32 streaming_hash(const u8 *ptr, UNUSED size_t len, enum Modes mode) {
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL; const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL; const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
assert(len >= 32); assert(len >= 32);

View File

@ -143,7 +143,7 @@ u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) {
// binary search for the literal index that contains the current state // binary search for the literal index that contains the current state
static really_inline static really_inline
u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable, u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable,
u32 stateValue, MODES m) { u32 stateValue, enum Modes m) {
const struct FDRSLiteral * litTab = getLitTab(streamingTable); const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 lo = get_start_lit_idx(streamingTable, m); u32 lo = get_start_lit_idx(streamingTable, m);
u32 hi = get_end_lit_idx(streamingTable, m); u32 hi = get_end_lit_idx(streamingTable, m);
@ -175,7 +175,7 @@ void fdrUnpackStateMode(struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable, const struct FDRSTableHeader *streamingTable,
const struct FDRSLiteral * litTab, const struct FDRSLiteral * litTab,
const u32 *state_table, const u32 *state_table,
const MODES m) { const enum Modes m) {
if (!state_table[m]) { if (!state_table[m]) {
return; return;
} }
@ -213,8 +213,9 @@ void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a,
} }
static really_inline static really_inline
u32 do_single_confirm(const struct FDRSTableHeader * streamingTable, u32 do_single_confirm(const struct FDRSTableHeader *streamingTable,
const struct FDR_Runtime_Args * a, u32 hashState, MODES m) { const struct FDR_Runtime_Args *a, u32 hashState,
enum Modes m) {
const struct FDRSLiteral * litTab = getLitTab(streamingTable); const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 idx = findLitTabEntry(streamingTable, hashState, m); u32 idx = findLitTabEntry(streamingTable, hashState, m);
size_t found_offset = litTab[idx].offset; size_t found_offset = litTab[idx].offset;
@ -279,7 +280,7 @@ void fdrFindStreamingHash(const struct FDR_Runtime_Args *a,
static really_inline static really_inline
const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable, const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
u32 h, const MODES m) { u32 h, const enum Modes m) {
u32 nbits = streamingTable->hashNBits[m]; u32 nbits = streamingTable->hashNBits[m];
if (!nbits) { if (!nbits) {
return NULL; return NULL;
@ -303,7 +304,7 @@ const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
static really_inline static really_inline
void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a, void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable, const struct FDRSTableHeader *streamingTable,
const struct FDRSHashEntry *ent, const MODES m) { const struct FDRSHashEntry *ent, const enum Modes m) {
assert(ent); assert(ent);
assert(streamingTable->hashNBits[m]); assert(streamingTable->hashNBits[m]);

View File

@ -69,7 +69,7 @@ static
void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) { void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) {
FDRFlood &fl = tmpFlood[c]; FDRFlood &fl = tmpFlood[c];
fl.suffix = MAX(fl.suffix, suffix + 1); fl.suffix = MAX(fl.suffix, suffix + 1);
DEBUG_PRINTF("Updated Flood Suffix for char '%c' to %u\n", c, fl.suffix); DEBUG_PRINTF("Updated Flood Suffix for char 0x%02x to %u\n", c, fl.suffix);
} }
static static
@ -90,8 +90,9 @@ void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
} }
} }
pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits, pair<aligned_unique_ptr<u8>, size_t>
const EngineDescription &eng) { setupFDRFloodControl(const vector<hwlmLiteral> &lits,
const EngineDescription &eng) {
vector<FDRFlood> tmpFlood(N_CHARS); vector<FDRFlood> tmpFlood(N_CHARS);
u32 default_suffix = eng.getDefaultFloodSuffixLength(); u32 default_suffix = eng.getDefaultFloodSuffixLength();
@ -124,8 +125,9 @@ pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
for (u32 i = 0; i < iEnd; i++) { for (u32 i = 0; i < iEnd; i++) {
if (i < litSize) { if (i < litSize) {
if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) { if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) {
DEBUG_PRINTF("non-flood char in literal[%u] %c != %c\n", DEBUG_PRINTF("non-flood char in literal[%u]: "
i, c, lit.s[litSize - i - 1]); "0x%02x != 0x%02x\n",
i, c, lit.s[litSize - i - 1]);
upSuffix = MIN(upSuffix, i); upSuffix = MIN(upSuffix, i);
loSuffix = MIN(loSuffix, i); // makes sense only for case-less loSuffix = MIN(loSuffix, i); // makes sense only for case-less
break; break;
@ -195,11 +197,12 @@ pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
size_t floodHeaderSize = sizeof(u32) * N_CHARS; size_t floodHeaderSize = sizeof(u32) * N_CHARS;
size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods; size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize); size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
auto buf = aligned_zmalloc_unique<u8>(totalSize);
assert(buf); // otherwise would have thrown std::bad_alloc assert(buf); // otherwise would have thrown std::bad_alloc
u32 *floodHeader = (u32 *)buf; u32 *floodHeader = (u32 *)buf.get();
FDRFlood *layoutFlood = (FDRFlood * )(buf + floodHeaderSize); FDRFlood *layoutFlood = (FDRFlood *)(buf.get() + floodHeaderSize);
u32 currentFloodIndex = 0; u32 currentFloodIndex = 0;
for (const auto &m : flood2chars) { for (const auto &m : flood2chars) {
@ -215,7 +218,7 @@ pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n", DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
floodHeaderSize, floodStructSize, totalSize); floodHeaderSize, floodStructSize, totalSize);
return make_pair((u8 *)buf, totalSize); return {move(buf), totalSize};
} }
} // namespace ue2 } // namespace ue2

View File

@ -36,7 +36,6 @@
#include "teddy_internal.h" #include "teddy_internal.h"
#include "teddy_runtime_common.h" #include "teddy_runtime_common.h"
#include "util/simd_utils.h" #include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = { const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@ -80,15 +79,15 @@ const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
do { \ do { \
if (unlikely(isnonzero128(var))) { \ if (unlikely(isnonzero128(var))) { \
u64a lo = movq(var); \ u64a lo = movq(var); \
u64a hi = movq(byteShiftRight128(var, 8)); \ u64a hi = movq(rshiftbyte_m128(var, 8)); \
if (unlikely(lo)) { \ if (unlikely(lo)) { \
conf_fn(&lo, bucket, offset, confBase, reason, a, ptr, \ conf_fn(&lo, bucket, offset, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
if (unlikely(hi)) { \ if (unlikely(hi)) { \
conf_fn(&hi, bucket, offset + 8, confBase, reason, a, ptr, \ conf_fn(&hi, bucket, offset + 8, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
} \ } \
@ -98,27 +97,27 @@ do { \
do { \ do { \
if (unlikely(isnonzero128(var))) { \ if (unlikely(isnonzero128(var))) { \
u32 part1 = movd(var); \ u32 part1 = movd(var); \
u32 part2 = movd(byteShiftRight128(var, 4)); \ u32 part2 = movd(rshiftbyte_m128(var, 4)); \
u32 part3 = movd(byteShiftRight128(var, 8)); \ u32 part3 = movd(rshiftbyte_m128(var, 8)); \
u32 part4 = movd(byteShiftRight128(var, 12)); \ u32 part4 = movd(rshiftbyte_m128(var, 12)); \
if (unlikely(part1)) { \ if (unlikely(part1)) { \
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
if (unlikely(part2)) { \ if (unlikely(part2)) { \
conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \ conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
if (unlikely(part3)) { \ if (unlikely(part3)) { \
conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \ conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
if (unlikely(part4)) { \ if (unlikely(part4)) { \
conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \ conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
} \ } \
@ -126,36 +125,34 @@ do { \
#endif #endif
static really_inline static really_inline
m128 prep_conf_teddy_m1(const m128 *maskBase, m128 p_mask, m128 val) { m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) {
m128 mask = set16x8(0xf); m128 mask = set16x8(0xf);
m128 lo = and128(val, mask); m128 lo = and128(val, mask);
m128 hi = and128(rshift2x64(val, 4), mask); m128 hi = and128(rshift64_m128(val, 4), mask);
return and128(and128(pshufb(maskBase[0*2], lo), return and128(pshufb(maskBase[0*2], lo), pshufb(maskBase[0*2+1], hi));
pshufb(maskBase[0*2+1], hi)), p_mask);
} }
static really_inline static really_inline
m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 p_mask, m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) {
m128 val) {
m128 mask = set16x8(0xf); m128 mask = set16x8(0xf);
m128 lo = and128(val, mask); m128 lo = and128(val, mask);
m128 hi = and128(rshift2x64(val, 4), mask); m128 hi = and128(rshift64_m128(val, 4), mask);
m128 r = prep_conf_teddy_m1(maskBase, p_mask, val); m128 r = prep_conf_teddy_m1(maskBase, val);
m128 res_1 = and128(pshufb(maskBase[1*2], lo), m128 res_1 = and128(pshufb(maskBase[1*2], lo),
pshufb(maskBase[1*2+1], hi)); pshufb(maskBase[1*2+1], hi));
m128 res_shifted_1 = palignr(res_1, *old_1, 16-1); m128 res_shifted_1 = palignr(res_1, *old_1, 16-1);
*old_1 = res_1; *old_1 = res_1;
return and128(and128(r, p_mask), res_shifted_1); return and128(r, res_shifted_1);
} }
static really_inline static really_inline
m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2, m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
m128 p_mask, m128 val) { m128 val) {
m128 mask = set16x8(0xf); m128 mask = set16x8(0xf);
m128 lo = and128(val, mask); m128 lo = and128(val, mask);
m128 hi = and128(rshift2x64(val, 4), mask); m128 hi = and128(rshift64_m128(val, 4), mask);
m128 r = prep_conf_teddy_m2(maskBase, old_1, p_mask, val); m128 r = prep_conf_teddy_m2(maskBase, old_1, val);
m128 res_2 = and128(pshufb(maskBase[2*2], lo), m128 res_2 = and128(pshufb(maskBase[2*2], lo),
pshufb(maskBase[2*2+1], hi)); pshufb(maskBase[2*2+1], hi));
@ -166,11 +163,11 @@ m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
static really_inline static really_inline
m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2, m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
m128 *old_3, m128 p_mask, m128 val) { m128 *old_3, m128 val) {
m128 mask = set16x8(0xf); m128 mask = set16x8(0xf);
m128 lo = and128(val, mask); m128 lo = and128(val, mask);
m128 hi = and128(rshift2x64(val, 4), mask); m128 hi = and128(rshift64_m128(val, 4), mask);
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, p_mask, val); m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val);
m128 res_3 = and128(pshufb(maskBase[3*2], lo), m128 res_3 = and128(pshufb(maskBase[3*2], lo),
pshufb(maskBase[3*2+1], hi)); pshufb(maskBase[3*2+1], hi));
@ -180,11 +177,10 @@ m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
} }
hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -203,13 +199,14 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
m128 p_mask; m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1); a->buf_history, a->len_history, 1);
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0); m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr)); m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
ptr += 16; ptr += 16;
} }
@ -217,9 +214,9 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr)); m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit1_teddy); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit1_teddy);
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16)); m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit1_teddy); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit1_teddy);
} }
@ -227,19 +224,19 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
m128 p_mask; m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1); a->buf_history, a->len_history, 1);
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0); m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -258,13 +255,14 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
m128 p_mask; m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1); a->buf_history, a->len_history, 1);
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0); m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr)); m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
@ -272,9 +270,9 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr)); m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16)); m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
} }
@ -282,19 +280,19 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
m128 p_mask; m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1); a->buf_history, a->len_history, 1);
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0); m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -314,14 +312,14 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
m128 p_mask; m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2); a->buf_history, a->len_history, 2);
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0); m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16; ptr += 16;
} }
@ -329,11 +327,9 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16));
load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
} }
@ -341,19 +337,19 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
m128 p_mask; m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2); a->buf_history, a->len_history, 2);
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0); m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -373,14 +369,14 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
m128 p_mask; m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2); a->buf_history, a->len_history, 2);
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0); m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
@ -388,11 +384,9 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16));
load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
} }
@ -400,19 +394,19 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
m128 p_mask; m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2); a->buf_history, a->len_history, 2);
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0); m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -434,14 +428,15 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3); a->buf_history, a->len_history, 3);
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0); val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones128(), load128(ptr)); load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16; ptr += 16;
} }
@ -450,10 +445,10 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones128(), load128(ptr)); load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones128(), load128(ptr + 16)); load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
} }
@ -461,20 +456,19 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
m128 p_mask; m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3); a->buf_history, a->len_history, 3);
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0);
p_mask, val_0); r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -496,14 +490,15 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3); a->buf_history, a->len_history, 3);
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0); val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones128(), load128(ptr)); load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
@ -512,10 +507,10 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones128(), load128(ptr)); load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones128(), load128(ptr + 16)); load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
} }
@ -523,20 +518,19 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
m128 p_mask; m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3); a->buf_history, a->len_history, 3);
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0);
p_mask, val_0); r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -559,14 +553,15 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4); a->buf_history, a->len_history, 4);
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0); &res_old_3, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones128(), load128(ptr)); &res_old_3, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16; ptr += 16;
} }
@ -575,10 +570,10 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones128(), load128(ptr)); &res_old_3, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones128(), load128(ptr + 16)); &res_old_3, load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
} }
@ -587,19 +582,19 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4); a->buf_history, a->len_history, 4);
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0); &res_old_3, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -622,14 +617,15 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4); a->buf_history, a->len_history, 4);
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0); &res_old_3, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones128(), load128(ptr)); &res_old_3, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
@ -638,10 +634,10 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones128(), load128(ptr)); &res_old_3, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones128(), load128(ptr + 16)); &res_old_3, load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
} }
@ -650,9 +646,10 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4); a->buf_history, a->len_history, 4);
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0); &res_old_3, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }

View File

@ -33,64 +33,85 @@
#ifndef TEDDY_H_ #ifndef TEDDY_H_
#define TEDDY_H_ #define TEDDY_H_
#include "hwlm/hwlm.h" // for hwlm_group_t
struct FDR; // forward declaration from fdr_internal.h struct FDR; // forward declaration from fdr_internal.h
struct FDR_Runtime_Args; struct FDR_Runtime_Args;
hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
#if defined(__AVX2__) #if defined(__AVX2__)
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
const struct FDR_Runtime_Args *a); const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, hwlm_error_t
const struct FDR_Runtime_Args *a); fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
#endif /* __AVX2__ */ #endif /* __AVX2__ */

View File

@ -36,7 +36,6 @@
#include "teddy_internal.h" #include "teddy_internal.h"
#include "teddy_runtime_common.h" #include "teddy_runtime_common.h"
#include "util/simd_utils.h" #include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
#if defined(__AVX2__) #if defined(__AVX2__)
@ -122,22 +121,22 @@ do { \
u64a part4 = extract64from256(r, 1); \ u64a part4 = extract64from256(r, 1); \
if (unlikely(part1)) { \ if (unlikely(part1)) { \
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
if (unlikely(part2)) { \ if (unlikely(part2)) { \
conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \ conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
if (unlikely(part3)) { \ if (unlikely(part3)) { \
conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \ conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
if (unlikely(part4)) { \ if (unlikely(part4)) { \
conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \ conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
} \ } \
@ -159,41 +158,41 @@ do { \
u32 part8 = extract32from256(r, 3); \ u32 part8 = extract32from256(r, 3); \
if (unlikely(part1)) { \ if (unlikely(part1)) { \
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
if (unlikely(part2)) { \ if (unlikely(part2)) { \
conf_fn(&part2, bucket, offset + 2, confBase, reason, a, ptr, \ conf_fn(&part2, bucket, offset + 2, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
} \ } \
if (unlikely(part3)) { \ if (unlikely(part3)) { \
conf_fn(&part3, bucket, offset + 4, confBase, reason, a, ptr, \ conf_fn(&part3, bucket, offset + 4, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
if (unlikely(part4)) { \ if (unlikely(part4)) { \
conf_fn(&part4, bucket, offset + 6, confBase, reason, a, ptr, \ conf_fn(&part4, bucket, offset + 6, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
if (unlikely(part5)) { \ if (unlikely(part5)) { \
conf_fn(&part5, bucket, offset + 8, confBase, reason, a, ptr, \ conf_fn(&part5, bucket, offset + 8, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
if (unlikely(part6)) { \ if (unlikely(part6)) { \
conf_fn(&part6, bucket, offset + 10, confBase, reason, a, ptr, \ conf_fn(&part6, bucket, offset + 10, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
if (unlikely(part7)) { \ if (unlikely(part7)) { \
conf_fn(&part7, bucket, offset + 12, confBase, reason, a, ptr, \ conf_fn(&part7, bucket, offset + 12, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
if (unlikely(part8)) { \ if (unlikely(part8)) { \
conf_fn(&part8, bucket, offset + 14, confBase, reason, a, ptr, \ conf_fn(&part8, bucket, offset + 14, confBase, reason, a, ptr, \
control, &last_match); \ &control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
} \ } \
@ -205,11 +204,11 @@ do { \
if (unlikely(isnonzero256(var))) { \ if (unlikely(isnonzero256(var))) { \
u32 arrCnt = 0; \ u32 arrCnt = 0; \
m128 lo = cast256to128(var); \ m128 lo = cast256to128(var); \
m128 hi = cast256to128(swap128in256(var)); \ m128 hi = movdq_hi(var); \
bit_array_fast_teddy(lo, bitArr, &arrCnt, offset); \ bit_array_fast_teddy(lo, bitArr, &arrCnt, offset); \
bit_array_fast_teddy(hi, bitArr, &arrCnt, offset + 2); \ bit_array_fast_teddy(hi, bitArr, &arrCnt, offset + 2); \
for (u32 i = 0; i < arrCnt; i++) { \ for (u32 i = 0; i < arrCnt; i++) { \
conf_fn(bitArr[i], confBase, reason, a, ptr, control, \ conf_fn(bitArr[i], confBase, reason, a, ptr, &control, \
&last_match); \ &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
@ -372,7 +371,7 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
64 * (offset); 64 * (offset);
*arrCnt += 1; *arrCnt += 1;
} }
u64a part_1 = movq(byteShiftRight128(var, 8)); u64a part_1 = movq(rshiftbyte_m128(var, 8));
while (unlikely(part_1)) { while (unlikely(part_1)) {
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) + bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
64 * (offset + 1); 64 * (offset + 1);
@ -385,19 +384,19 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
32 * (offset * 2); 32 * (offset * 2);
*arrCnt += 1; *arrCnt += 1;
} }
u32 part_1 = movd(byteShiftRight128(var, 4)); u32 part_1 = movd(rshiftbyte_m128(var, 4));
while (unlikely(part_1)) { while (unlikely(part_1)) {
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) + bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
32 * (offset * 2 + 1); 32 * (offset * 2 + 1);
*arrCnt += 1; *arrCnt += 1;
} }
u32 part_2 = movd(byteShiftRight128(var, 8)); u32 part_2 = movd(rshiftbyte_m128(var, 8));
while (unlikely(part_2)) { while (unlikely(part_2)) {
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_2) + bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_2) +
32 * (offset * 2 + 2); 32 * (offset * 2 + 2);
*arrCnt += 1; *arrCnt += 1;
} }
u32 part_3 = movd(byteShiftRight128(var, 12)); u32 part_3 = movd(rshiftbyte_m128(var, 12));
while (unlikely(part_3)) { while (unlikely(part_3)) {
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_3) + bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_3) +
32 * (offset * 2 + 3); 32 * (offset * 2 + 3);
@ -408,36 +407,35 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
} }
static really_inline static really_inline
m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 p_mask, m256 val) { m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 val) {
m256 mask = set32x8(0xf); m256 mask = set32x8(0xf);
m256 lo = and256(val, mask); m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask); m256 hi = and256(rshift64_m256(val, 4), mask);
return and256(and256(vpshufb(maskBase[0*2], lo), return and256(vpshufb(maskBase[0*2], lo),
vpshufb(maskBase[0*2+1], hi)), p_mask); vpshufb(maskBase[0*2+1], hi));
} }
static really_inline static really_inline
m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 p_mask, m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 val) {
m256 val) {
m256 mask = set32x8(0xf); m256 mask = set32x8(0xf);
m256 lo = and256(val, mask); m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask); m256 hi = and256(rshift64_m256(val, 4), mask);
m256 r = prep_conf_fat_teddy_m1(maskBase, p_mask, val); m256 r = prep_conf_fat_teddy_m1(maskBase, val);
m256 res_1 = and256(vpshufb(maskBase[1*2], lo), m256 res_1 = and256(vpshufb(maskBase[1*2], lo),
vpshufb(maskBase[1*2+1], hi)); vpshufb(maskBase[1*2+1], hi));
m256 res_shifted_1 = vpalignr(res_1, *old_1, 16-1); m256 res_shifted_1 = vpalignr(res_1, *old_1, 16-1);
*old_1 = res_1; *old_1 = res_1;
return and256(and256(r, p_mask), res_shifted_1); return and256(r, res_shifted_1);
} }
static really_inline static really_inline
m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2, m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2,
m256 p_mask, m256 val) { m256 val) {
m256 mask = set32x8(0xf); m256 mask = set32x8(0xf);
m256 lo = and256(val, mask); m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask); m256 hi = and256(rshift64_m256(val, 4), mask);
m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, p_mask, val); m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, val);
m256 res_2 = and256(vpshufb(maskBase[2*2], lo), m256 res_2 = and256(vpshufb(maskBase[2*2], lo),
vpshufb(maskBase[2*2+1], hi)); vpshufb(maskBase[2*2+1], hi));
@ -448,11 +446,11 @@ m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2,
static really_inline static really_inline
m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2, m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2,
m256 *old_3, m256 p_mask, m256 val) { m256 *old_3, m256 val) {
m256 mask = set32x8(0xf); m256 mask = set32x8(0xf);
m256 lo = and256(val, mask); m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask); m256 hi = and256(rshift64_m256(val, 4), mask);
m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, p_mask, val); m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, val);
m256 res_3 = and256(vpshufb(maskBase[3*2], lo), m256 res_3 = and256(vpshufb(maskBase[3*2], lo),
vpshufb(maskBase[3*2+1], hi)); vpshufb(maskBase[3*2+1], hi));
@ -462,12 +460,10 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2,
} }
static really_inline static really_inline
m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi, m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi) {
m256 p_mask) {
m256 lo = and256(val, mask); m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask); m256 hi = and256(rshift64_m256(val, 4), mask);
m256 res = and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi)); return and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi));
return and256(res, p_mask);
} }
static really_inline static really_inline
@ -482,11 +478,10 @@ const u32 * getConfBase_avx2(const struct Teddy *teddy, u8 numMask) {
} }
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -505,13 +500,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
m256 p_mask; m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1); a->buf_history, a->len_history, 1);
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0); m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr)); m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
ptr += 16; ptr += 16;
} }
@ -519,10 +515,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr)); m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit1_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit1_teddy);
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, ones256(), m256 r_1 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr + 16));
load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit1_teddy); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit1_teddy);
} }
@ -530,19 +525,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
m256 p_mask; m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1); a->buf_history, a->len_history, 1);
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0); m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -561,13 +556,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
m256 p_mask; m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1); a->buf_history, a->len_history, 1);
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0); m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr)); m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
@ -575,10 +571,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr)); m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, ones256(), m256 r_1 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr + 16));
load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
} }
@ -586,19 +581,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
m256 p_mask; m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1); a->buf_history, a->len_history, 1);
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0); m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -618,14 +613,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
m256 p_mask; m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2); a->buf_history, a->len_history, 2);
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0); m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16; ptr += 16;
} }
@ -633,10 +628,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1,
load2x128(ptr + 16)); load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
} }
@ -645,19 +639,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
m256 p_mask; m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2); a->buf_history, a->len_history, 2);
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0); m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -677,25 +671,24 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
m256 p_mask; m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2); a->buf_history, a->len_history, 2);
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0); m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1,
load2x128(ptr + 16)); load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
} }
@ -704,19 +697,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
m256 p_mask; m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2); a->buf_history, a->len_history, 2);
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0); m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -738,14 +731,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3); a->buf_history, a->len_history, 3);
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0); val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones256(), load2x128(ptr)); load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16; ptr += 16;
} }
@ -754,10 +748,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones256(), load2x128(ptr)); load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones256(), load2x128(ptr + 16)); load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
} }
@ -766,19 +760,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3); a->buf_history, a->len_history, 3);
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0); val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -800,14 +794,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3); a->buf_history, a->len_history, 3);
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0); val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones256(), load2x128(ptr)); load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
@ -816,10 +811,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones256(), load2x128(ptr)); load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones256(), load2x128(ptr + 16)); load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
} }
@ -828,19 +823,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3); a->buf_history, a->len_history, 3);
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0); val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -863,15 +858,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4); a->buf_history, a->len_history, 4);
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0); &res_old_3, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones256(), &res_old_3, load2x128(ptr));
load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16; ptr += 16;
} }
@ -880,12 +875,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones256(), &res_old_3, load2x128(ptr));
load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones256(), &res_old_3, load2x128(ptr + 16));
load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
} }
@ -894,19 +887,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4); a->buf_history, a->len_history, 4);
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0); &res_old_3, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -929,15 +922,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4); a->buf_history, a->len_history, 4);
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0); &res_old_3, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
if (ptr + 16 < buf_end) { if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones256(), &res_old_3, load2x128(ptr));
load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16; ptr += 16;
} }
@ -946,12 +939,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4)); __builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD; CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones256(), &res_old_3, load2x128(ptr));
load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones256(), &res_old_3, load2x128(ptr + 16));
load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
} }
@ -960,19 +951,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4); a->buf_history, a->len_history, 4);
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0); &res_old_3, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -996,16 +987,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
m256 p_mask; m256 p_mask;
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
buf_end, a->buf_history, a->len_history); buf_end, a->buf_history, a->len_history);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
p_mask); res_0 = and256(res_0, p_mask);
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
ptr += 32; ptr += 32;
} }
if (ptr + 32 < buf_end) { if (ptr + 32 < buf_end) {
m256 val_0 = load256(ptr + 0); m256 val_0 = load256(ptr + 0);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
ones256());
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
ptr += 32; ptr += 32;
} }
@ -1015,13 +1005,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
CHECK_FLOOD; CHECK_FLOOD;
m256 val_0 = load256(ptr + 0); m256 val_0 = load256(ptr + 0);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
ones256());
CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit1_fast_teddy); CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit1_fast_teddy);
m256 val_1 = load256(ptr + 32); m256 val_1 = load256(ptr + 32);
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi, m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi);
ones256());
CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit1_fast_teddy); CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit1_fast_teddy);
} }
@ -1029,20 +1017,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
m256 p_mask; m256 p_mask;
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
buf_end, a->buf_history, a->len_history); buf_end, a->buf_history, a->len_history);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
p_mask); res_0 = and256(res_0, p_mask);
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) { const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len; const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset; const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START; u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect; const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1; u32 last_match = (u32)-1;
@ -1066,16 +1053,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
m256 p_mask; m256 p_mask;
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
buf_end, a->buf_history, a->len_history); buf_end, a->buf_history, a->len_history);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
p_mask); res_0 = and256(res_0, p_mask);
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
ptr += 32; ptr += 32;
} }
if (ptr + 32 < buf_end) { if (ptr + 32 < buf_end) {
m256 val_0 = load256(ptr + 0); m256 val_0 = load256(ptr + 0);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
ones256());
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
ptr += 32; ptr += 32;
} }
@ -1085,13 +1071,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
CHECK_FLOOD; CHECK_FLOOD;
m256 val_0 = load256(ptr + 0); m256 val_0 = load256(ptr + 0);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
ones256());
CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit_fast_teddy); CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit_fast_teddy);
m256 val_1 = load256(ptr + 32); m256 val_1 = load256(ptr + 32);
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi, m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi);
ones256());
CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit_fast_teddy); CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit_fast_teddy);
} }
@ -1099,11 +1083,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
m256 p_mask; m256 p_mask;
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
buf_end, a->buf_history, a->len_history); buf_end, a->buf_history, a->len_history);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
p_mask); res_0 = and256(res_0, p_mask);
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
} }
*a->groups = controlVal;
return HWLM_SUCCESS; return HWLM_SUCCESS;
} }

View File

@ -74,12 +74,11 @@ public:
const TeddyEngineDescription &eng_in, bool make_small_in) const TeddyEngineDescription &eng_in, bool make_small_in)
: eng(eng_in), lits(lits_in), make_small(make_small_in) {} : eng(eng_in), lits(lits_in), make_small(make_small_in) {}
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link); aligned_unique_ptr<FDR> build(pair<aligned_unique_ptr<u8>, size_t> &link);
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits); bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
}; };
class TeddySet { class TeddySet {
const vector<hwlmLiteral> &lits;
u32 len; u32 len;
// nibbleSets is a series of bitfields over 16 predicates // nibbleSets is a series of bitfields over 16 predicates
// that represent the whether shufti nibble set // that represent the whether shufti nibble set
@ -89,8 +88,7 @@ class TeddySet {
vector<u16> nibbleSets; vector<u16> nibbleSets;
set<u32> litIds; set<u32> litIds;
public: public:
TeddySet(const vector<hwlmLiteral> &lits_in, u32 len_in) explicit TeddySet(u32 len_in) : len(len_in), nibbleSets(len_in * 2, 0) {}
: lits(lits_in), len(len_in), nibbleSets(len_in * 2, 0) {}
const set<u32> & getLits() const { return litIds; } const set<u32> & getLits() const { return litIds; }
size_t litCount() const { return litIds.size(); } size_t litCount() const { return litIds.size(); }
@ -106,8 +104,8 @@ public:
} }
printf("\nnlits: %zu\nLit ids: ", litCount()); printf("\nnlits: %zu\nLit ids: ", litCount());
printf("Prob: %llu\n", probability()); printf("Prob: %llu\n", probability());
for (set<u32>::iterator i = litIds.begin(), e = litIds.end(); i != e; ++i) { for (const auto &id : litIds) {
printf("%u ", *i); printf("%u ", id);
} }
printf("\n"); printf("\n");
printf("Flood prone : %s\n", isRunProne()?"yes":"no"); printf("Flood prone : %s\n", isRunProne()?"yes":"no");
@ -118,15 +116,15 @@ public:
return nibbleSets == ts.nibbleSets; return nibbleSets == ts.nibbleSets;
} }
void addLiteral(u32 lit_id) { void addLiteral(u32 lit_id, const hwlmLiteral &lit) {
const string &s = lits[lit_id].s; const string &s = lit.s;
for (u32 i = 0; i < len; i++) { for (u32 i = 0; i < len; i++) {
if (i < s.size()) { if (i < s.size()) {
u8 c = s[s.size() - i - 1]; u8 c = s[s.size() - i - 1];
u8 c_hi = (c >> 4) & 0xf; u8 c_hi = (c >> 4) & 0xf;
u8 c_lo = c & 0xf; u8 c_lo = c & 0xf;
nibbleSets[i*2] = 1 << c_lo; nibbleSets[i*2] = 1 << c_lo;
if (lits[lit_id].nocase && ourisalpha(c)) { if (lit.nocase && ourisalpha(c)) {
nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2)); nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2));
} else { } else {
nibbleSets[i*2+1] = 1 << c_hi; nibbleSets[i*2+1] = 1 << c_hi;
@ -185,28 +183,26 @@ bool TeddyCompiler::pack(map<BucketIndex,
set<TeddySet> sts; set<TeddySet> sts;
for (u32 i = 0; i < lits.size(); i++) { for (u32 i = 0; i < lits.size(); i++) {
TeddySet ts(lits, eng.numMasks); TeddySet ts(eng.numMasks);
ts.addLiteral(i); ts.addLiteral(i, lits[i]);
sts.insert(ts); sts.insert(ts);
} }
while (1) { while (1) {
#ifdef TEDDY_DEBUG #ifdef TEDDY_DEBUG
printf("Size %zu\n", sts.size()); printf("Size %zu\n", sts.size());
for (set<TeddySet>::const_iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) { for (const TeddySet &ts : sts) {
printf("\n"); i1->dump(); printf("\n"); ts.dump();
} }
printf("\n===============================================\n"); printf("\n===============================================\n");
#endif #endif
set<TeddySet>::iterator m1 = sts.end(), m2 = sts.end(); auto m1 = sts.end(), m2 = sts.end();
u64a best = 0xffffffffffffffffULL; u64a best = 0xffffffffffffffffULL;
for (set<TeddySet>::iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) { for (auto i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
set<TeddySet>::iterator i2 = i1;
++i2;
const TeddySet &s1 = *i1; const TeddySet &s1 = *i1;
for (set<TeddySet>::iterator e2 = sts.end(); i2 != e2; ++i2) { for (auto i2 = next(i1), e2 = sts.end(); i2 != e2; ++i2) {
const TeddySet &s2 = *i2; const TeddySet &s2 = *i2;
// be more conservative if we don't absolutely need to // be more conservative if we don't absolutely need to
@ -216,7 +212,7 @@ bool TeddyCompiler::pack(map<BucketIndex,
continue; continue;
} }
TeddySet tmpSet(lits, eng.numMasks); TeddySet tmpSet(eng.numMasks);
tmpSet.merge(s1); tmpSet.merge(s1);
tmpSet.merge(s2); tmpSet.merge(s2);
u64a newScore = tmpSet.heuristic(); u64a newScore = tmpSet.heuristic();
@ -246,7 +242,7 @@ bool TeddyCompiler::pack(map<BucketIndex,
} }
// do the merge // do the merge
TeddySet nts(lits, eng.numMasks); TeddySet nts(eng.numMasks);
nts.merge(*m1); nts.merge(*m1);
nts.merge(*m2); nts.merge(*m2);
#ifdef TEDDY_DEBUG #ifdef TEDDY_DEBUG
@ -263,25 +259,23 @@ bool TeddyCompiler::pack(map<BucketIndex,
sts.erase(m2); sts.erase(m2);
sts.insert(nts); sts.insert(nts);
} }
u32 cnt = 0;
if (sts.size() > eng.getNumBuckets()) { if (sts.size() > eng.getNumBuckets()) {
return false; return false;
} }
for (set<TeddySet>::const_iterator i = sts.begin(), e = sts.end(); i != e; u32 bucket_id = 0;
++i) { for (const TeddySet &ts : sts) {
for (set<u32>::const_iterator i2 = i->getLits().begin(), const auto &ts_lits = ts.getLits();
e2 = i->getLits().end(); auto &bucket_lits = bucketToLits[bucket_id];
i2 != e2; ++i2) { bucket_lits.insert(end(bucket_lits), begin(ts_lits), end(ts_lits));
bucketToLits[cnt].push_back(*i2); bucket_id++;
}
cnt++;
} }
return true; return true;
} }
aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) { aligned_unique_ptr<FDR>
TeddyCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) { if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
DEBUG_PRINTF("too many literals: %zu\n", lits.size()); DEBUG_PRINTF("too many literals: %zu\n", lits.size());
return nullptr; return nullptr;
@ -314,9 +308,8 @@ aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng); auto floodControlTmp = setupFDRFloodControl(lits, eng);
pair<u8 *, size_t> confirmTmp auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small);
= setupFullMultiConfs(lits, eng, bucketToLits, make_small);
size_t size = ROUNDUP_N(sizeof(Teddy) + size_t size = ROUNDUP_N(sizeof(Teddy) +
maskLen + maskLen +
@ -334,38 +327,29 @@ aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
teddy->maxStringLen = verify_u32(maxLen(lits)); teddy->maxStringLen = verify_u32(maxLen(lits));
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen; u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
memcpy(ptr, confirmTmp.first, confirmTmp.second); memcpy(ptr, confirmTmp.first.get(), confirmTmp.second);
ptr += confirmTmp.second; ptr += confirmTmp.second;
aligned_free(confirmTmp.first);
teddy->floodOffset = verify_u32(ptr - teddy_base); teddy->floodOffset = verify_u32(ptr - teddy_base);
memcpy(ptr, floodControlTmp.first, floodControlTmp.second); memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second);
ptr += floodControlTmp.second; ptr += floodControlTmp.second;
aligned_free(floodControlTmp.first);
if (link.first) { if (link.first) {
teddy->link = verify_u32(ptr - teddy_base); teddy->link = verify_u32(ptr - teddy_base);
memcpy(ptr, link.first, link.second); memcpy(ptr, link.first.get(), link.second);
aligned_free(link.first);
} else { } else {
teddy->link = 0; teddy->link = 0;
} }
u8 *baseMsk = teddy_base + sizeof(Teddy); u8 *baseMsk = teddy_base + sizeof(Teddy);
for (map<BucketIndex, std::vector<LiteralIndex> >::const_iterator for (const auto &b2l : bucketToLits) {
i = bucketToLits.begin(), const u32 &bucket_id = b2l.first;
e = bucketToLits.end(); const vector<LiteralIndex> &ids = b2l.second;
i != e; ++i) {
const u32 bucket_id = i->first;
const vector<LiteralIndex> &ids = i->second;
const u8 bmsk = 1U << (bucket_id % 8); const u8 bmsk = 1U << (bucket_id % 8);
for (vector<LiteralIndex>::const_iterator i2 = ids.begin(), for (const LiteralIndex &lit_id : ids) {
e2 = ids.end(); const hwlmLiteral &l = lits[lit_id];
i2 != e2; ++i2) {
LiteralIndex lit_id = *i2;
const hwlmLiteral & l = lits[lit_id];
DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id); DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
const u32 sz = verify_u32(l.s.size()); const u32 sz = verify_u32(l.s.size());
@ -439,10 +423,10 @@ aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
} // namespace } // namespace
aligned_unique_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits, aligned_unique_ptr<FDR>
bool make_small, u32 hint, teddyBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small,
const target_t &target, u32 hint, const target_t &target,
pair<u8 *, size_t> link) { pair<aligned_unique_ptr<u8>, size_t> &link) {
unique_ptr<TeddyEngineDescription> des; unique_ptr<TeddyEngineDescription> des;
if (hint == HINT_INVALID) { if (hint == HINT_INVALID) {
des = chooseTeddyEngine(target, lits); des = chooseTeddyEngine(target, lits);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -49,7 +49,7 @@ struct hwlmLiteral;
ue2::aligned_unique_ptr<FDR> ue2::aligned_unique_ptr<FDR>
teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small, teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
u32 hint, const target_t &target, u32 hint, const target_t &target,
std::pair<u8 *, size_t> link); std::pair<aligned_unique_ptr<u8>, size_t> &link);
} // namespace ue2 } // namespace ue2

View File

@ -51,8 +51,7 @@ extern const u8 ALIGN_DIRECTIVE p_mask_arr[17][32];
#define CHECK_HWLM_TERMINATE_MATCHING \ #define CHECK_HWLM_TERMINATE_MATCHING \
do { \ do { \
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \ if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \
*a->groups = controlVal; \
return HWLM_TERMINATED; \ return HWLM_TERMINATED; \
} \ } \
} while (0); } while (0);
@ -61,8 +60,7 @@ do { \
do { \ do { \
if (unlikely(ptr > tryFloodDetect)) { \ if (unlikely(ptr > tryFloodDetect)) { \
tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, \ tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, \
&floodBackoff, &controlVal, \ &floodBackoff, &control, iterBytes); \
iterBytes); \
CHECK_HWLM_TERMINATE_MATCHING; \ CHECK_HWLM_TERMINATE_MATCHING; \
} \ } \
} while (0); } while (0);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -34,7 +34,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#define DEFAULT_MAX_HISTORY 60 #define DEFAULT_MAX_HISTORY 110
using namespace std; using namespace std;
@ -50,8 +50,11 @@ Grey::Grey(void) :
allowLitHaig(true), allowLitHaig(true),
allowLbr(true), allowLbr(true),
allowMcClellan(true), allowMcClellan(true),
allowSheng(true),
allowPuff(true), allowPuff(true),
allowLiteral(true),
allowRose(true), allowRose(true),
allowViolet(true),
allowExtendedNFA(true), /* bounded repeats of course */ allowExtendedNFA(true), /* bounded repeats of course */
allowLimExNFA(true), allowLimExNFA(true),
allowAnchoredAcyclic(true), allowAnchoredAcyclic(true),
@ -60,6 +63,13 @@ Grey::Grey(void) :
allowDecoratedLiteral(true), allowDecoratedLiteral(true),
allowNoodle(true), allowNoodle(true),
fdrAllowTeddy(true), fdrAllowTeddy(true),
violetAvoidSuffixes(true),
violetAvoidWeakInfixes(true),
violetDoubleCut(true),
violetExtractStrongLiterals(true),
violetLiteralChains(true),
violetDoubleCutLiteralLen(3),
violetEarlyCleanLiteralLen(6),
puffImproveHead(true), puffImproveHead(true),
castleExclusive(true), castleExclusive(true),
mergeSEP(true), /* short exhaustible passthroughs */ mergeSEP(true), /* short exhaustible passthroughs */
@ -81,7 +91,6 @@ Grey::Grey(void) :
allowZombies(true), allowZombies(true),
floodAsPuffette(false), floodAsPuffette(false),
nfaForceSize(0), nfaForceSize(0),
nfaForceShifts(0),
maxHistoryAvailable(DEFAULT_MAX_HISTORY), maxHistoryAvailable(DEFAULT_MAX_HISTORY),
minHistoryAvailable(0), /* debugging only */ minHistoryAvailable(0), /* debugging only */
maxAnchoredRegion(63), /* for rose's atable to run over */ maxAnchoredRegion(63), /* for rose's atable to run over */
@ -119,6 +128,7 @@ Grey::Grey(void) :
equivalenceEnable(true), equivalenceEnable(true),
allowSmallWrite(true), // McClellan dfas for small patterns allowSmallWrite(true), // McClellan dfas for small patterns
allowSmallWriteSheng(false), // allow use of Sheng for SMWR
smallWriteLargestBuffer(70), // largest buffer that can be smallWriteLargestBuffer(70), // largest buffer that can be
// considered a small write // considered a small write
@ -126,6 +136,10 @@ Grey::Grey(void) :
// are given to rose &co // are given to rose &co
smallWriteLargestBufferBad(35), smallWriteLargestBufferBad(35),
limitSmallWriteOutfixSize(1048576), // 1 MB limitSmallWriteOutfixSize(1048576), // 1 MB
smallWriteMaxPatterns(10000),
smallWriteMaxLiterals(10000),
allowTamarama(true), // Tamarama engine
tamaChunkSize(100),
dumpFlags(0), dumpFlags(0),
limitPatternCount(8000000), // 8M patterns limitPatternCount(8000000), // 8M patterns
limitPatternLength(16000), // 16K bytes limitPatternLength(16000), // 16K bytes
@ -202,8 +216,11 @@ void applyGreyOverrides(Grey *g, const string &s) {
G_UPDATE(allowLitHaig); G_UPDATE(allowLitHaig);
G_UPDATE(allowLbr); G_UPDATE(allowLbr);
G_UPDATE(allowMcClellan); G_UPDATE(allowMcClellan);
G_UPDATE(allowSheng);
G_UPDATE(allowPuff); G_UPDATE(allowPuff);
G_UPDATE(allowLiteral);
G_UPDATE(allowRose); G_UPDATE(allowRose);
G_UPDATE(allowViolet);
G_UPDATE(allowExtendedNFA); G_UPDATE(allowExtendedNFA);
G_UPDATE(allowLimExNFA); G_UPDATE(allowLimExNFA);
G_UPDATE(allowAnchoredAcyclic); G_UPDATE(allowAnchoredAcyclic);
@ -212,6 +229,13 @@ void applyGreyOverrides(Grey *g, const string &s) {
G_UPDATE(allowDecoratedLiteral); G_UPDATE(allowDecoratedLiteral);
G_UPDATE(allowNoodle); G_UPDATE(allowNoodle);
G_UPDATE(fdrAllowTeddy); G_UPDATE(fdrAllowTeddy);
G_UPDATE(violetAvoidSuffixes);
G_UPDATE(violetAvoidWeakInfixes);
G_UPDATE(violetDoubleCut);
G_UPDATE(violetExtractStrongLiterals);
G_UPDATE(violetLiteralChains);
G_UPDATE(violetDoubleCutLiteralLen);
G_UPDATE(violetEarlyCleanLiteralLen);
G_UPDATE(puffImproveHead); G_UPDATE(puffImproveHead);
G_UPDATE(castleExclusive); G_UPDATE(castleExclusive);
G_UPDATE(mergeSEP); G_UPDATE(mergeSEP);
@ -232,7 +256,6 @@ void applyGreyOverrides(Grey *g, const string &s) {
G_UPDATE(allowZombies); G_UPDATE(allowZombies);
G_UPDATE(floodAsPuffette); G_UPDATE(floodAsPuffette);
G_UPDATE(nfaForceSize); G_UPDATE(nfaForceSize);
G_UPDATE(nfaForceShifts);
G_UPDATE(highlanderSquash); G_UPDATE(highlanderSquash);
G_UPDATE(maxHistoryAvailable); G_UPDATE(maxHistoryAvailable);
G_UPDATE(minHistoryAvailable); G_UPDATE(minHistoryAvailable);
@ -270,9 +293,14 @@ void applyGreyOverrides(Grey *g, const string &s) {
G_UPDATE(miracleHistoryBonus); G_UPDATE(miracleHistoryBonus);
G_UPDATE(equivalenceEnable); G_UPDATE(equivalenceEnable);
G_UPDATE(allowSmallWrite); G_UPDATE(allowSmallWrite);
G_UPDATE(allowSmallWriteSheng);
G_UPDATE(smallWriteLargestBuffer); G_UPDATE(smallWriteLargestBuffer);
G_UPDATE(smallWriteLargestBufferBad); G_UPDATE(smallWriteLargestBufferBad);
G_UPDATE(limitSmallWriteOutfixSize); G_UPDATE(limitSmallWriteOutfixSize);
G_UPDATE(smallWriteMaxPatterns);
G_UPDATE(smallWriteMaxLiterals);
G_UPDATE(allowTamarama);
G_UPDATE(tamaChunkSize);
G_UPDATE(limitPatternCount); G_UPDATE(limitPatternCount);
G_UPDATE(limitPatternLength); G_UPDATE(limitPatternLength);
G_UPDATE(limitGraphVertices); G_UPDATE(limitGraphVertices);
@ -309,7 +337,9 @@ void applyGreyOverrides(Grey *g, const string &s) {
g->allowLitHaig = false; g->allowLitHaig = false;
g->allowMcClellan = false; g->allowMcClellan = false;
g->allowPuff = false; g->allowPuff = false;
g->allowLiteral = false;
g->allowRose = false; g->allowRose = false;
g->allowViolet = false;
g->allowSmallLiteralSet = false; g->allowSmallLiteralSet = false;
g->roseMasks = false; g->roseMasks = false;
done = true; done = true;
@ -325,7 +355,9 @@ void applyGreyOverrides(Grey *g, const string &s) {
g->allowLitHaig = false; g->allowLitHaig = false;
g->allowMcClellan = true; g->allowMcClellan = true;
g->allowPuff = false; g->allowPuff = false;
g->allowLiteral = false;
g->allowRose = false; g->allowRose = false;
g->allowViolet = false;
g->allowSmallLiteralSet = false; g->allowSmallLiteralSet = false;
g->roseMasks = false; g->roseMasks = false;
done = true; done = true;
@ -341,7 +373,9 @@ void applyGreyOverrides(Grey *g, const string &s) {
g->allowLitHaig = false; g->allowLitHaig = false;
g->allowMcClellan = true; g->allowMcClellan = true;
g->allowPuff = false; g->allowPuff = false;
g->allowLiteral = false;
g->allowRose = false; g->allowRose = false;
g->allowViolet = false;
g->allowSmallLiteralSet = false; g->allowSmallLiteralSet = false;
g->roseMasks = false; g->roseMasks = false;
done = true; done = true;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -50,8 +50,11 @@ struct Grey {
bool allowLitHaig; bool allowLitHaig;
bool allowLbr; bool allowLbr;
bool allowMcClellan; bool allowMcClellan;
bool allowSheng;
bool allowPuff; bool allowPuff;
bool allowLiteral;
bool allowRose; bool allowRose;
bool allowViolet;
bool allowExtendedNFA; bool allowExtendedNFA;
bool allowLimExNFA; bool allowLimExNFA;
bool allowAnchoredAcyclic; bool allowAnchoredAcyclic;
@ -62,6 +65,14 @@ struct Grey {
bool allowNoodle; bool allowNoodle;
bool fdrAllowTeddy; bool fdrAllowTeddy;
u32 violetAvoidSuffixes; /* 0=never, 1=sometimes, 2=always */
bool violetAvoidWeakInfixes;
bool violetDoubleCut;
bool violetExtractStrongLiterals;
bool violetLiteralChains;
u32 violetDoubleCutLiteralLen;
u32 violetEarlyCleanLiteralLen;
bool puffImproveHead; bool puffImproveHead;
bool castleExclusive; // enable castle mutual exclusion analysis bool castleExclusive; // enable castle mutual exclusion analysis
@ -88,7 +99,6 @@ struct Grey {
bool floodAsPuffette; bool floodAsPuffette;
u32 nfaForceSize; u32 nfaForceSize;
u32 nfaForceShifts;
u32 maxHistoryAvailable; u32 maxHistoryAvailable;
u32 minHistoryAvailable; u32 minHistoryAvailable;
@ -140,9 +150,16 @@ struct Grey {
// SmallWrite engine // SmallWrite engine
bool allowSmallWrite; bool allowSmallWrite;
bool allowSmallWriteSheng;
u32 smallWriteLargestBuffer; // largest buffer that can be small write u32 smallWriteLargestBuffer; // largest buffer that can be small write
u32 smallWriteLargestBufferBad;// largest buffer that can be small write u32 smallWriteLargestBufferBad;// largest buffer that can be small write
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
u32 smallWriteMaxPatterns; // only try small writes if fewer patterns
u32 smallWriteMaxLiterals; // only try small writes if fewer literals
// Tamarama engine
bool allowTamarama;
u32 tamaChunkSize; //!< max chunk size for exclusivity analysis in Tamarama
enum DumpFlags { enum DumpFlags {
DUMP_NONE = 0, DUMP_NONE = 0,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -219,7 +219,7 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
: get_current_target(); : get_current_target();
CompileContext cc(isStreaming, isVectored, target_info, g); CompileContext cc(isStreaming, isVectored, target_info, g);
NG ng(cc, somPrecision); NG ng(cc, elements, somPrecision);
try { try {
for (unsigned int i = 0; i < elements; i++) { for (unsigned int i = 0; i < elements; i++) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -98,6 +98,12 @@ extern "C"
* The library was unable to allocate temporary storage used during * The library was unable to allocate temporary storage used during
* compilation time. * compilation time.
* *
* - *Allocator returned misaligned memory*
*
* The memory allocator (either malloc() or the allocator set with @ref
* hs_set_allocator()) did not correctly return memory suitably aligned
* for the largest representable data type on this platform.
*
* - *Internal error* * - *Internal error*
* *
* An unexpected error occurred: if this error is reported, please contact * An unexpected error occurred: if this error is reported, please contact

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -37,6 +37,7 @@
#include "fdr/fdr.h" #include "fdr/fdr.h"
#include "nfa/accel.h" #include "nfa/accel.h"
#include "nfa/shufti.h" #include "nfa/shufti.h"
#include "nfa/truffle.h"
#include "nfa/vermicelli.h" #include "nfa/vermicelli.h"
#include <string.h> #include <string.h>
@ -64,8 +65,13 @@ const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr,
case ACCEL_SHUFTI: case ACCEL_SHUFTI:
DEBUG_PRINTF("single shufti\n"); DEBUG_PRINTF("single shufti\n");
return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end); return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
case ACCEL_TRUFFLE:
DEBUG_PRINTF("truffle\n");
return truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end);
default: default:
/* no acceleration, fall through and return current ptr */ /* no acceleration, fall through and return current ptr */
DEBUG_PRINTF("no accel; %u\n", (int)aux->accel_type);
assert(aux->accel_type == ACCEL_NONE);
return ptr; return ptr;
} }
} }

View File

@ -35,9 +35,11 @@
#include "hwlm_internal.h" #include "hwlm_internal.h"
#include "noodle_engine.h" #include "noodle_engine.h"
#include "noodle_build.h" #include "noodle_build.h"
#include "scratch.h"
#include "ue2common.h" #include "ue2common.h"
#include "fdr/fdr_compile.h" #include "fdr/fdr_compile.h"
#include "nfa/shufticompile.h" #include "nfa/shufticompile.h"
#include "nfa/trufflecompile.h"
#include "util/alloc.h" #include "util/alloc.h"
#include "util/bitutils.h" #include "util/bitutils.h"
#include "util/charreach.h" #include "util/charreach.h"
@ -62,6 +64,28 @@ namespace ue2 {
static const unsigned int MAX_ACCEL_OFFSET = 16; static const unsigned int MAX_ACCEL_OFFSET = 16;
static const unsigned int MAX_SHUFTI_WIDTH = 240; static const unsigned int MAX_SHUFTI_WIDTH = 240;
static
size_t mask_overhang(const hwlmLiteral &lit) {
size_t msk_true_size = lit.msk.size();
assert(msk_true_size <= HWLM_MASKLEN);
assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET);
for (u8 c : lit.msk) {
if (!c) {
msk_true_size--;
} else {
break;
}
}
if (lit.s.length() >= msk_true_size) {
return 0;
}
/* only short literals should be able to have a mask which overhangs */
assert(lit.s.length() < MAX_ACCEL_OFFSET);
return msk_true_size - lit.s.length();
}
static static
bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) { bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
const hwlmLiteral &first = *lits.front(); const hwlmLiteral &first = *lits.front();
@ -167,7 +191,8 @@ bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
} }
if (found) { if (found) {
curr.max_offset = MAX(curr.max_offset, j); assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
break; break;
} }
} }
@ -288,8 +313,8 @@ bool findSVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
} }
if (found) { if (found) {
curr.max_offset = MAX(curr.max_offset, j); assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
break; ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
} }
} }
} }
@ -346,6 +371,25 @@ void filterLits(const vector<hwlmLiteral> &lits, hwlm_group_t expected_groups,
} }
} }
static
bool litGuardedByCharReach(const CharReach &cr, const hwlmLiteral &lit,
u32 max_offset) {
for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) {
unsigned char c = lit.s[i];
if (lit.nocase) {
if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) {
return true;
}
} else {
if (cr.test(c)) {
return true;
}
}
}
return false;
}
static static
void findForwardAccelScheme(const vector<hwlmLiteral> &lits, void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
hwlm_group_t expected_groups, AccelAux *aux) { hwlm_group_t expected_groups, AccelAux *aux) {
@ -363,29 +407,45 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
return; return;
} }
/* look for shufti/truffle */
vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach()); vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
for (const auto &lit : lits) { for (const auto &lit : lits) {
if (!(lit.groups & expected_groups)) { if (!(lit.groups & expected_groups)) {
continue; continue;
} }
for (u32 i = 0; i < MAX_ACCEL_OFFSET && i < lit.s.length(); i++) { u32 overhang = mask_overhang(lit);
unsigned char c = lit.s[i]; for (u32 i = 0; i < overhang; i++) {
/* this offset overhangs the start of the real literal; look at the
* msk/cmp */
for (u32 j = 0; j < N_CHARS; j++) {
if ((j & lit.msk[i]) == lit.cmp[i]) {
reach[i].set(j);
}
}
}
for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) {
CharReach &reach_i = reach[i];
u32 i_effective = i - overhang;
if (litGuardedByCharReach(reach_i, lit, i_effective)) {
continue;
}
unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective]
: lit.s.back();
if (lit.nocase) { if (lit.nocase) {
DEBUG_PRINTF("adding %02hhx to %u\n", mytoupper(c), i); reach_i.set(mytoupper(c));
DEBUG_PRINTF("adding %02hhx to %u\n", mytolower(c), i); reach_i.set(mytolower(c));
reach[i].set(mytoupper(c));
reach[i].set(mytolower(c));
} else { } else {
DEBUG_PRINTF("adding %02hhx to %u\n", c, i); reach_i.set(c);
reach[i].set(c);
} }
} }
} }
u32 min_count = ~0U; u32 min_count = ~0U;
u32 min_offset = ~0U; u32 min_offset = ~0U;
for (u32 i = 0; i < min_len; i++) { for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) {
size_t count = reach[i].count(); size_t count = reach[i].count();
DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i, DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
describeClass(reach[i]).c_str(), count); describeClass(reach[i]).c_str(), count);
@ -394,10 +454,9 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
min_offset = i; min_offset = i;
} }
} }
assert(min_offset <= min_len);
if (min_count > MAX_SHUFTI_WIDTH) { if (min_count > MAX_SHUFTI_WIDTH) {
DEBUG_PRINTF("min shufti with %u chars is too wide\n", min_count); DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count);
return; return;
} }
@ -410,7 +469,11 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
return; return;
} }
DEBUG_PRINTF("fail\n"); truffleBuildMasks(cr, &aux->truffle.mask1, &aux->truffle.mask2);
DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n",
describeClass(cr).c_str(), cr.count(), min_offset);
aux->truffle.accel_type = ACCEL_TRUFFLE;
aux->truffle.offset = verify_u8(min_offset);
} }
static static
@ -466,6 +529,10 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
stream_control->history_max); stream_control->history_max);
return false; return false;
} }
if (2 * lits.front().s.length() - 2 > FDR_TEMP_BUF_SIZE) {
assert(0);
return false;
}
} }
if (!lits.front().msk.empty()) { if (!lits.front().msk.empty()) {

View File

@ -37,7 +37,6 @@
#include "util/compare.h" #include "util/compare.h"
#include "util/masked_move.h" #include "util/masked_move.h"
#include "util/simd_utils.h" #include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
#include <ctype.h> #include <ctype.h>
#include <stdbool.h> #include <stdbool.h>

View File

@ -115,7 +115,8 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
v = and128(v, caseMask); v = and128(v, caseMask);
} }
u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v))); u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
eq128(mask2, v)));
// mask out where we can't match // mask out where we can't match
u32 mask = (0xFFFF >> (16 - l)); u32 mask = (0xFFFF >> (16 - l));
@ -142,7 +143,8 @@ hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
v = and128(v, caseMask); v = and128(v, caseMask);
} }
u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v))); u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
eq128(mask2, v)));
// mask out where we can't match // mask out where we can't match
u32 buf_off = start - offset; u32 buf_off = start - offset;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -26,18 +26,20 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "mcclellancompile_accel.h" #include "accel_dfa_build_strat.h"
#include "mcclellancompile_util.h"
#include "accel.h"
#include "grey.h" #include "grey.h"
#include "nfagraph/ng_limex_accel.h" #include "nfagraph/ng_limex_accel.h"
#include "shufticompile.h"
#include "trufflecompile.h"
#include "util/charreach.h" #include "util/charreach.h"
#include "util/container.h" #include "util/container.h"
#include "util/dump_charclass.h" #include "util/dump_charclass.h"
#include "util/verify_types.h"
#include <vector>
#include <sstream> #include <sstream>
#include <vector>
#define PATHS_LIMIT 500 #define PATHS_LIMIT 500
@ -46,14 +48,13 @@ using namespace std;
namespace ue2 { namespace ue2 {
namespace { namespace {
struct path { struct path {
vector<CharReach> reach; vector<CharReach> reach;
dstate_id_t dest = DEAD_STATE; dstate_id_t dest = DEAD_STATE;
explicit path(dstate_id_t base) : dest(base) {} explicit path(dstate_id_t base) : dest(base) {
}
};
}; };
}
static UNUSED static UNUSED
string describeClasses(const vector<CharReach> &v) { string describeClasses(const vector<CharReach> &v) {
@ -85,8 +86,8 @@ bool is_useful_path(const vector<path> &good, const path &p) {
goto next; goto next;
} }
} }
DEBUG_PRINTF("better: [%s] -> %u\n", DEBUG_PRINTF("better: [%s] -> %u\n", describeClasses(g.reach).c_str(),
describeClasses(g.reach).c_str(), g.dest); g.dest);
return false; return false;
next:; next:;
@ -106,8 +107,7 @@ path append(const path &orig, const CharReach &cr, u32 new_dest) {
static static
void extend(const raw_dfa &rdfa, const path &p, void extend(const raw_dfa &rdfa, const path &p,
map<u32, vector<path> > &all, map<u32, vector<path>> &all, vector<path> &out) {
vector<path> &out) {
dstate s = rdfa.states[p.dest]; dstate s = rdfa.states[p.dest];
if (!p.reach.empty() && p.reach.back().none()) { if (!p.reach.empty() && p.reach.back().none()) {
@ -147,17 +147,17 @@ void extend(const raw_dfa &rdfa, const path &p,
} }
DEBUG_PRINTF("----good: [%s] -> %u\n", DEBUG_PRINTF("----good: [%s] -> %u\n",
describeClasses(pp.reach).c_str(), pp.dest); describeClasses(pp.reach).c_str(), pp.dest);
all[e.first].push_back(pp); all[e.first].push_back(pp);
out.push_back(pp); out.push_back(pp);
} }
} }
static static
vector<vector<CharReach> > generate_paths(const raw_dfa &rdfa, dstate_id_t base, vector<vector<CharReach>> generate_paths(const raw_dfa &rdfa,
u32 len) { dstate_id_t base, u32 len) {
vector<path> paths{ path(base) }; vector<path> paths{path(base)};
map<u32, vector<path> > all; map<u32, vector<path>> all;
all[base].push_back(path(base)); all[base].push_back(path(base));
for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) { for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) {
vector<path> next_gen; vector<path> next_gen;
@ -170,7 +170,7 @@ vector<vector<CharReach> > generate_paths(const raw_dfa &rdfa, dstate_id_t base,
dump_paths(paths); dump_paths(paths);
vector<vector<CharReach> > rv; vector<vector<CharReach>> rv;
for (auto &p : paths) { for (auto &p : paths) {
rv.push_back(move(p.reach)); rv.push_back(move(p.reach));
} }
@ -181,16 +181,58 @@ static
AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
u32 max_allowed_accel_offset) { u32 max_allowed_accel_offset) {
DEBUG_PRINTF("looking for accel for %hu\n", base); DEBUG_PRINTF("looking for accel for %hu\n", base);
vector<vector<CharReach> > paths = generate_paths(rdfa, base, vector<vector<CharReach>> paths =
max_allowed_accel_offset + 1); generate_paths(rdfa, base, max_allowed_accel_offset + 1);
AccelScheme as = findBestAccelScheme(paths, CharReach(), true); AccelScheme as = findBestAccelScheme(paths, CharReach(), true);
DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset); DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset);
return as; return as;
} }
static UNUSED
bool better(const AccelScheme &a, const AccelScheme &b) {
if (!a.double_byte.empty() && b.double_byte.empty()) {
return true;
}
if (!b.double_byte.empty()) {
return false;
}
return a.cr.count() < b.cr.count();
}
static
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
for (u32 i = 0; i < N_CHARS; i++) {
rv.at(rdfa.alpha_remap[i]).set(i);
}
return rv;
}
static
bool double_byte_ok(const AccelScheme &info) {
return !info.double_byte.empty() &&
info.double_cr.count() < info.double_byte.size() &&
info.double_cr.count() <= 2 && !info.double_byte.empty();
}
static
bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
u16 top_remap = raw.alpha_remap[TOP];
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
if (i != top_remap && raw.states[s].next[i] == s) {
return true;
}
}
return false;
}
static static
vector<u16> find_nonexit_symbols(const raw_dfa &rdfa, vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
const CharReach &escape) { const CharReach &escape) {
set<u16> rv; set<u16> rv;
CharReach nonexit = ~escape; CharReach nonexit = ~escape;
for (auto i = nonexit.find_first(); i != CharReach::npos; for (auto i = nonexit.find_first(); i != CharReach::npos;
@ -201,9 +243,58 @@ vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
return vector<u16>(rv.begin(), rv.end()); return vector<u16>(rv.begin(), rv.end());
} }
static
dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
if (raw.start_floating != DEAD_STATE) {
DEBUG_PRINTF("has floating start\n");
return raw.start_floating;
}
DEBUG_PRINTF("looking for SDS proxy\n");
dstate_id_t s = raw.start_anchored;
if (has_self_loop(s, raw)) {
return s;
}
u16 top_remap = raw.alpha_remap[TOP];
ue2::unordered_set<dstate_id_t> seen;
while (true) {
seen.insert(s);
DEBUG_PRINTF("basis %hu\n", s);
/* check if we are connected to a state with a self loop */
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
dstate_id_t t = raw.states[s].next[i];
if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
return t;
}
}
/* find a neighbour to use as a basis for looking for the sds proxy */
dstate_id_t t = DEAD_STATE;
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
dstate_id_t tt = raw.states[s].next[i];
if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
t = tt;
break;
}
}
if (t == DEAD_STATE) {
/* we were unable to find a state to use as a SDS proxy */
return DEAD_STATE;
}
s = t;
}
}
static static
set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base, set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
const AccelScheme &ei) { const AccelScheme &ei) {
DEBUG_PRINTF("looking for region around %hu\n", base); DEBUG_PRINTF("looking for region around %hu\n", base);
set<dstate_id_t> region = {base}; set<dstate_id_t> region = {base};
@ -236,98 +327,10 @@ set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
return region; return region;
} }
static AccelScheme
bool better(const AccelScheme &a, const AccelScheme &b) { accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const {
if (!a.double_byte.empty() && b.double_byte.empty()) {
return true;
}
if (!b.double_byte.empty()) {
return false;
}
return a.cr.count() < b.cr.count();
}
static
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
for (u32 i = 0; i < N_CHARS; i++) {
rv.at(rdfa.alpha_remap[i]).set(i);
}
return rv;
}
map<dstate_id_t, AccelScheme> populateAccelerationInfo(const raw_dfa &rdfa,
const dfa_build_strat &strat,
const Grey &grey) {
map<dstate_id_t, AccelScheme> rv;
if (!grey.accelerateDFA) {
return rv;
}
dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
DEBUG_PRINTF("sds %hu\n", sds_proxy);
for (size_t i = 0; i < rdfa.states.size(); i++) {
if (i == DEAD_STATE) {
continue;
}
/* Note on report acceleration states: While we can't accelerate while we
* are spamming out callbacks, the QR code paths don't raise reports
* during scanning so they can accelerate report states. */
if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) {
continue;
}
size_t single_limit = i == sds_proxy ? ACCEL_DFA_MAX_FLOATING_STOP_CHAR
: ACCEL_DFA_MAX_STOP_CHAR;
DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
AccelScheme ei = strat.find_escape_strings(i);
if (ei.cr.count() > single_limit) {
DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
ei.cr.count());
continue;
}
DEBUG_PRINTF("state %zu should be accelerable %zu\n",
i, ei.cr.count());
rv[i] = ei;
}
/* provide accleration states to states in the region of sds */
if (contains(rv, sds_proxy)) {
AccelScheme sds_ei = rv[sds_proxy];
sds_ei.double_byte.clear(); /* region based on single byte scheme
* may differ from double byte */
DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
sds_ei.cr.count());
auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
for (auto s : sds_region) {
if (!contains(rv, s) || better(sds_ei, rv[s])) {
rv[s] = sds_ei;
}
}
}
return rv;
}
static
bool double_byte_ok(const AccelScheme &info) {
return !info.double_byte.empty()
&& info.double_cr.count() < info.double_byte.size()
&& info.double_cr.count() <= 2 && !info.double_byte.empty();
}
AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx,
u32 max_allowed_accel_offset) {
AccelScheme rv; AccelScheme rv;
const raw_dfa &rdfa = get_raw();
rv.cr.clear(); rv.cr.clear();
rv.offset = 0; rv.offset = 0;
const dstate &raw = rdfa.states[this_idx]; const dstate &raw = rdfa.states[this_idx];
@ -354,7 +357,7 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx
if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) {
DEBUG_PRINTF("leads to report\n"); DEBUG_PRINTF("leads to report\n");
outs2_broken = true; /* cannot accelerate over reports */ outs2_broken = true; /* cannot accelerate over reports */
continue; continue;
} }
succs[next_id] |= cr_i; succs[next_id] |= cr_i;
@ -402,14 +405,12 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx
DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa)); DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa));
DEBUG_PRINTF("broken %d\n", outs2_broken); DEBUG_PRINTF("broken %d\n", outs2_broken);
if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) &&
&& this_idx == rdfa.start_floating this_idx == rdfa.start_floating && this_idx != DEAD_STATE) {
&& this_idx != DEAD_STATE) {
DEBUG_PRINTF("looking for offset accel at %u\n", this_idx); DEBUG_PRINTF("looking for offset accel at %u\n", this_idx);
auto offset = look_for_offset_accel(rdfa, this_idx, auto offset =
max_allowed_accel_offset); look_for_offset_accel(rdfa, this_idx, max_allowed_offset_accel());
DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), rv.cr.count());
rv.cr.count());
if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) { if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) {
DEBUG_PRINTF("using offset accel\n"); DEBUG_PRINTF("using offset accel\n");
rv = offset; rv = offset;
@ -419,4 +420,172 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx
return rv; return rv;
} }
void
accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
const AccelScheme &info,
void *accel_out) {
AccelAux *accel = (AccelAux *)accel_out;
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
info.double_offset);
accel->generic.offset = verify_u8(info.offset);
if (double_byte_ok(info) && info.double_cr.none() &&
info.double_byte.size() == 1) {
accel->accel_type = ACCEL_DVERM;
accel->dverm.c1 = info.double_byte.begin()->first;
accel->dverm.c2 = info.double_byte.begin()->second;
accel->dverm.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
return;
}
if (double_byte_ok(info) && info.double_cr.none() &&
(info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
bool ok = true;
assert(!info.double_byte.empty());
u8 firstC = info.double_byte.begin()->first & CASE_CLEAR;
u8 secondC = info.double_byte.begin()->second & CASE_CLEAR;
for (const pair<u8, u8> &p : info.double_byte) {
if ((p.first & CASE_CLEAR) != firstC ||
(p.second & CASE_CLEAR) != secondC) {
ok = false;
break;
}
}
if (ok) {
accel->accel_type = ACCEL_DVERM_NOCASE;
accel->dverm.c1 = firstC;
accel->dverm.c2 = secondC;
accel->dverm.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
return;
}
u8 m1;
u8 m2;
if (buildDvermMask(info.double_byte, &m1, &m2)) {
accel->accel_type = ACCEL_DVERM_MASKED;
accel->dverm.offset = verify_u8(info.double_offset);
accel->dverm.c1 = info.double_byte.begin()->first & m1;
accel->dverm.c2 = info.double_byte.begin()->second & m2;
accel->dverm.m1 = m1;
accel->dverm.m2 = m2;
DEBUG_PRINTF(
"building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
accel->dverm.c1, accel->dverm.c2);
return;
}
}
if (double_byte_ok(info) &&
shuftiBuildDoubleMasks(info.double_cr, info.double_byte,
&accel->dshufti.lo1, &accel->dshufti.hi1,
&accel->dshufti.lo2, &accel->dshufti.hi2)) {
accel->accel_type = ACCEL_DSHUFTI;
accel->dshufti.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
return;
}
if (info.cr.none()) {
accel->accel_type = ACCEL_RED_TAPE;
DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
" from which there is no escape\n",
this_idx);
return;
}
if (info.cr.count() == 1) {
accel->accel_type = ACCEL_VERM;
accel->verm.c = info.cr.find_first();
DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
return;
}
if (info.cr.count() == 2 && info.cr.isCaselessChar()) {
accel->accel_type = ACCEL_VERM_NOCASE;
accel->verm.c = info.cr.find_first() & CASE_CLEAR;
DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
return;
}
if (info.cr.count() > max_floating_stop_char()) {
accel->accel_type = ACCEL_NONE;
DEBUG_PRINTF("state %hu is too broad\n", this_idx);
return;
}
accel->accel_type = ACCEL_SHUFTI;
if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo, &accel->shufti.hi)) {
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
return;
}
assert(!info.cr.none());
accel->accel_type = ACCEL_TRUFFLE;
truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2);
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
} }
map<dstate_id_t, AccelScheme>
accel_dfa_build_strat::getAccelInfo(const Grey &grey) {
map<dstate_id_t, AccelScheme> rv;
raw_dfa &rdfa = get_raw();
if (!grey.accelerateDFA) {
return rv;
}
dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
DEBUG_PRINTF("sds %hu\n", sds_proxy);
for (size_t i = 0; i < rdfa.states.size(); i++) {
if (i == DEAD_STATE) {
continue;
}
/* Note on report acceleration states: While we can't accelerate while
* we
* are spamming out callbacks, the QR code paths don't raise reports
* during scanning so they can accelerate report states. */
if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) {
continue;
}
size_t single_limit =
i == sds_proxy ? max_floating_stop_char() : max_stop_char();
DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
AccelScheme ei = find_escape_strings(i);
if (ei.cr.count() > single_limit) {
DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
ei.cr.count());
continue;
}
DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count());
rv[i] = ei;
}
/* provide accleration states to states in the region of sds */
if (contains(rv, sds_proxy)) {
AccelScheme sds_ei = rv[sds_proxy];
sds_ei.double_byte.clear(); /* region based on single byte scheme
* may differ from double byte */
DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
sds_ei.cr.count());
auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
for (auto s : sds_region) {
if (!contains(rv, s) || better(sds_ei, rv[s])) {
rv[s] = sds_ei;
}
}
}
return rv;
}
};

60
src/nfa/accel_dfa_build_strat.h Executable file
View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ACCEL_DFA_BUILD_STRAT_H
#define ACCEL_DFA_BUILD_STRAT_H
#include "rdfa.h"
#include "dfa_build_strat.h"
#include "ue2common.h"
#include "util/accel_scheme.h"
#include <map>
namespace ue2 {
class ReportManager;
struct Grey;
class accel_dfa_build_strat : public dfa_build_strat {
public:
explicit accel_dfa_build_strat(const ReportManager &rm_in)
: dfa_build_strat(rm_in) {}
virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const;
virtual size_t accelSize(void) const = 0;
virtual u32 max_allowed_offset_accel() const = 0;
virtual u32 max_stop_char() const = 0;
virtual u32 max_floating_stop_char() const = 0;
virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
void *accel_out);
virtual std::map<dstate_id_t, AccelScheme> getAccelInfo(const Grey &grey);
};
} // namespace ue2
#endif // ACCEL_DFA_BUILD_STRAT_H

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -37,30 +37,26 @@
/** \brief The type for an NFA callback. /** \brief The type for an NFA callback.
* *
* This is a function that takes as arguments the current offset where the * This is a function that takes as arguments the current start and end offsets
* match occurs, the id of the match and the context pointer that was passed * where the match occurs, the id of the match and the context pointer that was
* into the NFA API function that executed the NFA. * passed into the NFA API function that executed the NFA.
* *
* The offset where the match occurs will be the offset after the character * The start offset is the "start of match" (SOM) offset for the match. It is
* that caused the match. Thus, if we have a buffer containing 'abc', then a * only provided by engines that natively support SOM tracking (e.g. Gough).
* pattern that matches an empty string will have an offset of 0, a pattern *
* that matches 'a' will have an offset of 1, and a pattern that matches 'abc' * The end offset will be the offset after the character that caused the match.
* will have an offset of 3, which will be a value that is 'beyond' the size of * Thus, if we have a buffer containing 'abc', then a pattern that matches an
* the buffer. That is, if we have n characters in the buffer, there are n+1 * empty string will have an offset of 0, a pattern that matches 'a' will have
* different potential offsets for matches. * an offset of 1, and a pattern that matches 'abc' will have an offset of 3,
* which will be a value that is 'beyond' the size of the buffer. That is, if
* we have n characters in the buffer, there are n+1 different potential
* offsets for matches.
* *
* This function should return an int - currently the possible return values * This function should return an int - currently the possible return values
* are 0, which means 'stop running the engine' or non-zero, which means * are 0, which means 'stop running the engine' or non-zero, which means
* 'continue matching'. * 'continue matching'.
*/ */
typedef int (*NfaCallback)(u64a offset, ReportID id, void *context); typedef int (*NfaCallback)(u64a start, u64a end, ReportID id, void *context);
/** \brief The type for an NFA callback which also tracks start of match.
*
* see \ref NfaCallback
*/
typedef int (*SomNfaCallback)(u64a from_offset, u64a to_offset, ReportID id,
void *context);
/** /**
* standard \ref NfaCallback return value indicating that engine execution * standard \ref NfaCallback return value indicating that engine execution

View File

@ -98,7 +98,7 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q,
if (match == REPEAT_MATCH) { if (match == REPEAT_MATCH) {
DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset, DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset,
subIdx, sub->report); subIdx, sub->report);
if (q->cb(offset, sub->report, q->context) == MO_HALT_MATCHING) { if (q->cb(0, offset, sub->report, q->context) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
} }
} }
@ -457,7 +457,7 @@ char subCastleFireMatch(const struct Castle *c, const void *full_state,
i = mmbit_iterate(matching, c->numRepeats, i)) { i = mmbit_iterate(matching, c->numRepeats, i)) {
const struct SubCastle *sub = getSubCastle(c, i); const struct SubCastle *sub = getSubCastle(c, i);
DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, i); DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, i);
if (cb(offset, sub->report, ctx) == MO_HALT_MATCHING) { if (cb(0, offset, sub->report, ctx) == MO_HALT_MATCHING) {
DEBUG_PRINTF("caller told us to halt\n"); DEBUG_PRINTF("caller told us to halt\n");
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
} }
@ -979,6 +979,46 @@ char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
return castleInAccept(c, q, report, q_cur_offset(q)); return castleInAccept(c, q, report, q_cur_offset(q));
} }
char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) {
assert(n && q);
assert(n->type == CASTLE_NFA_0);
DEBUG_PRINTF("entry\n");
const struct Castle *c = getImplNfa(n);
const u64a offset = q_cur_offset(q);
DEBUG_PRINTF("offset=%llu\n", offset);
if (c->exclusive) {
u8 *active = (u8 *)q->streamState;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx);
const struct SubCastle *sub = getSubCastle(c, activeIdx);
if (subCastleInAccept(c, q, sub->report, offset, activeIdx)) {
return 1;
}
}
}
if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
DEBUG_PRINTF("subcastle %u\n", i);
const struct SubCastle *sub = getSubCastle(c, i);
if (subCastleInAccept(c, q, sub->report, offset, i)) {
return 1;
}
}
}
return 0;
}
char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) { char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
assert(n && q); assert(n && q);
assert(n->type == CASTLE_NFA_0); assert(n->type == CASTLE_NFA_0);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -44,6 +44,7 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report, char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
struct mq *q); struct mq *q);
char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q); char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset, char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key); void *state, u8 key);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -48,7 +48,8 @@
namespace ue2 { namespace ue2 {
void nfaExecCastle0_dumpDot(const struct NFA *, FILE *) { void nfaExecCastle0_dumpDot(const struct NFA *, FILE *,
UNUSED const std::string &base) {
// No GraphViz output for Castles. // No GraphViz output for Castles.
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -32,12 +32,14 @@
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
#include <cstdio> #include <cstdio>
#include <string>
struct NFA; struct NFA;
namespace ue2 { namespace ue2 {
void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file); void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file,
const std::string &base);
void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file); void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file);
} // namespace ue2 } // namespace ue2

40
src/nfa/dfa_build_strat.cpp Executable file
View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "dfa_build_strat.h"
namespace ue2 {
// prevent weak vtables for raw_report_info, dfa_build_strat and raw_dfa
raw_report_info::~raw_report_info() {}
dfa_build_strat::~dfa_build_strat() {}
raw_dfa::~raw_dfa() {}
} // namespace ue2

68
src/nfa/dfa_build_strat.h Normal file
View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DFA_BUILD_STRAT_H
#define DFA_BUILD_STRAT_H
#include "rdfa.h"
#include "ue2common.h"
#include <memory>
#include <vector>
struct NFA;
namespace ue2 {
class ReportManager;
struct raw_report_info {
virtual ~raw_report_info();
virtual u32 getReportListSize() const = 0; /* in bytes */
virtual size_t size() const = 0; /* number of lists */
virtual void fillReportLists(NFA *n, size_t base_offset,
std::vector<u32> &ro /* out */) const = 0;
};
class dfa_build_strat {
public:
explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {}
virtual ~dfa_build_strat();
virtual raw_dfa &get_raw() const = 0;
virtual std::unique_ptr<raw_report_info> gatherReports(
std::vector<u32> &reports /* out */,
std::vector<u32> &reports_eod /* out */,
u8 *isSingleReport /* out */,
ReportID *arbReport /* out */) const = 0;
protected:
const ReportManager &rm;
};
} // namespace ue2
#endif // DFA_BUILD_STRAT_H

View File

@ -110,7 +110,7 @@ u64a expandSomValue(u32 comp_slot_width, u64a curr_offset,
} }
static really_inline static really_inline
char doReports(SomNfaCallback cb, void *ctxt, const struct mcclellan *m, char doReports(NfaCallback cb, void *ctxt, const struct mcclellan *m,
const struct gough_som_info *som, u16 s, u64a loc, const struct gough_som_info *som, u16 s, u64a loc,
char eod, u16 * const cached_accept_state, char eod, u16 * const cached_accept_state,
u32 * const cached_accept_id, u32 * const cached_accept_som) { u32 * const cached_accept_id, u32 * const cached_accept_som) {
@ -307,7 +307,7 @@ u16 goughEnableStarts(const struct mcclellan *m, u16 s, u64a som_offset,
static really_inline static really_inline
char goughExec16_i(const struct mcclellan *m, struct gough_som_info *som, char goughExec16_i(const struct mcclellan *m, struct gough_som_info *som,
u16 *state, const u8 *buf, size_t len, u64a offAdj, u16 *state, const u8 *buf, size_t len, u64a offAdj,
SomNfaCallback cb, void *ctxt, const u8 **c_final, NfaCallback cb, void *ctxt, const u8 **c_final,
enum MatchMode mode) { enum MatchMode mode) {
assert(ISALIGNED_N(state, 2)); assert(ISALIGNED_N(state, 2));
@ -461,7 +461,7 @@ with_accel:
static really_inline static really_inline
char goughExec8_i(const struct mcclellan *m, struct gough_som_info *som, char goughExec8_i(const struct mcclellan *m, struct gough_som_info *som,
u8 *state, const u8 *buf, size_t len, u64a offAdj, u8 *state, const u8 *buf, size_t len, u64a offAdj,
SomNfaCallback cb, void *ctxt, const u8 **c_final, NfaCallback cb, void *ctxt, const u8 **c_final,
enum MatchMode mode) { enum MatchMode mode) {
u8 s = *state; u8 s = *state;
const u8 *c = buf, *c_end = buf + len; const u8 *c = buf, *c_end = buf + len;
@ -595,7 +595,7 @@ with_accel:
static never_inline static never_inline
char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som, char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som,
u8 *state, const u8 *buf, size_t len, u64a offAdj, u8 *state, const u8 *buf, size_t len, u64a offAdj,
SomNfaCallback cb, void *ctxt, const u8 **final_point, NfaCallback cb, void *ctxt, const u8 **final_point,
enum MatchMode mode) { enum MatchMode mode) {
return goughExec8_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point, return goughExec8_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
mode); mode);
@ -604,7 +604,7 @@ char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som,
static never_inline static never_inline
char goughExec16_i_ni(const struct mcclellan *m, struct gough_som_info *som, char goughExec16_i_ni(const struct mcclellan *m, struct gough_som_info *som,
u16 *state, const u8 *buf, size_t len, u64a offAdj, u16 *state, const u8 *buf, size_t len, u64a offAdj,
SomNfaCallback cb, void *ctxt, const u8 **final_point, NfaCallback cb, void *ctxt, const u8 **final_point,
enum MatchMode mode) { enum MatchMode mode) {
return goughExec16_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point, return goughExec16_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
mode); mode);
@ -622,7 +622,7 @@ const struct gough_som_info *getSomInfoConst(const char *state_base) {
static really_inline static really_inline
char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
const u8 *hend, SomNfaCallback cb, void *context, const u8 *hend, NfaCallback cb, void *context,
struct mq *q, s64a end, enum MatchMode mode) { struct mq *q, s64a end, enum MatchMode mode) {
DEBUG_PRINTF("enter\n"); DEBUG_PRINTF("enter\n");
struct gough_som_info *som = getSomInfo(q->state); struct gough_som_info *som = getSomInfo(q->state);
@ -755,7 +755,7 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
static really_inline static really_inline
char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
const u8 *hend, SomNfaCallback cb, void *context, const u8 *hend, NfaCallback cb, void *context,
struct mq *q, s64a end, enum MatchMode mode) { struct mq *q, s64a end, enum MatchMode mode) {
struct gough_som_info *som = getSomInfo(q->state); struct gough_som_info *som = getSomInfo(q->state);
assert(n->type == GOUGH_NFA_16); assert(n->type == GOUGH_NFA_16);
@ -887,7 +887,7 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) { char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) {
u64a offset = q->offset; u64a offset = q->offset;
const u8 *buffer = q->buffer; const u8 *buffer = q->buffer;
SomNfaCallback cb = q->som_cb; NfaCallback cb = q->cb;
void *context = q->context; void *context = q->context;
assert(n->type == GOUGH_NFA_8); assert(n->type == GOUGH_NFA_8);
const u8 *hend = q->history + q->hlength; const u8 *hend = q->history + q->hlength;
@ -899,7 +899,7 @@ char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) {
char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) { char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) {
u64a offset = q->offset; u64a offset = q->offset;
const u8 *buffer = q->buffer; const u8 *buffer = q->buffer;
SomNfaCallback cb = q->som_cb; NfaCallback cb = q->cb;
void *context = q->context; void *context = q->context;
assert(n->type == GOUGH_NFA_16); assert(n->type == GOUGH_NFA_16);
const u8 *hend = q->history + q->hlength; const u8 *hend = q->history + q->hlength;
@ -911,7 +911,7 @@ char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) {
char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) { char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) {
u64a offset = q->offset; u64a offset = q->offset;
const u8 *buffer = q->buffer; const u8 *buffer = q->buffer;
SomNfaCallback cb = q->som_cb; NfaCallback cb = q->cb;
void *context = q->context; void *context = q->context;
assert(n->type == GOUGH_NFA_8); assert(n->type == GOUGH_NFA_8);
const u8 *hend = q->history + q->hlength; const u8 *hend = q->history + q->hlength;
@ -923,7 +923,7 @@ char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) {
char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) { char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) {
u64a offset = q->offset; u64a offset = q->offset;
const u8 *buffer = q->buffer; const u8 *buffer = q->buffer;
SomNfaCallback cb = q->som_cb; NfaCallback cb = q->cb;
void *context = q->context; void *context = q->context;
assert(n->type == GOUGH_NFA_16); assert(n->type == GOUGH_NFA_16);
const u8 *hend = q->history + q->hlength; const u8 *hend = q->history + q->hlength;
@ -935,7 +935,7 @@ char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) {
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) { char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) {
u64a offset = q->offset; u64a offset = q->offset;
const u8 *buffer = q->buffer; const u8 *buffer = q->buffer;
SomNfaCallback cb = q->som_cb; NfaCallback cb = q->cb;
void *context = q->context; void *context = q->context;
assert(n->type == GOUGH_NFA_8); assert(n->type == GOUGH_NFA_8);
const u8 *hend = q->history + q->hlength; const u8 *hend = q->history + q->hlength;
@ -952,7 +952,7 @@ char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) {
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report) { char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report) {
u64a offset = q->offset; u64a offset = q->offset;
const u8 *buffer = q->buffer; const u8 *buffer = q->buffer;
SomNfaCallback cb = q->som_cb; NfaCallback cb = q->cb;
void *context = q->context; void *context = q->context;
assert(n->type == GOUGH_NFA_16); assert(n->type == GOUGH_NFA_16);
const u8 *hend = q->history + q->hlength; const u8 *hend = q->history + q->hlength;
@ -994,7 +994,7 @@ char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset,
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) { char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
SomNfaCallback cb = q->som_cb; NfaCallback cb = q->cb;
void *ctxt = q->context; void *ctxt = q->context;
u8 s = *(u8 *)q->state; u8 s = *(u8 *)q->state;
u64a offset = q_cur_offset(q); u64a offset = q_cur_offset(q);
@ -1016,7 +1016,7 @@ char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) { char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
SomNfaCallback cb = q->som_cb; NfaCallback cb = q->cb;
void *ctxt = q->context; void *ctxt = q->context;
u16 s = *(u16 *)q->state; u16 s = *(u16 *)q->state;
const struct mstate_aux *aux = get_aux(m, s); const struct mstate_aux *aux = get_aux(m, s);
@ -1048,10 +1048,18 @@ char nfaExecGough16_inAccept(const struct NFA *n, ReportID report,
return nfaExecMcClellan16_inAccept(n, report, q); return nfaExecMcClellan16_inAccept(n, report, q);
} }
char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q) {
return nfaExecMcClellan8_inAnyAccept(n, q);
}
char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q) {
return nfaExecMcClellan16_inAnyAccept(n, q);
}
static static
char goughCheckEOD(const struct NFA *nfa, u16 s, char goughCheckEOD(const struct NFA *nfa, u16 s,
const struct gough_som_info *som, const struct gough_som_info *som,
u64a offset, SomNfaCallback cb, void *ctxt) { u64a offset, NfaCallback cb, void *ctxt) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
const struct mstate_aux *aux = get_aux(m, s); const struct mstate_aux *aux = get_aux(m, s);
@ -1062,21 +1070,19 @@ char goughCheckEOD(const struct NFA *nfa, u16 s,
} }
char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state, char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState, u64a offset, UNUSED const char *streamState, u64a offset,
UNUSED NfaCallback callback, NfaCallback callback, void *context) {
SomNfaCallback som_callback, void *context) {
const struct gough_som_info *som = getSomInfoConst(state); const struct gough_som_info *som = getSomInfoConst(state);
return goughCheckEOD(nfa, *(const u8 *)state, som, offset, som_callback, return goughCheckEOD(nfa, *(const u8 *)state, som, offset, callback,
context); context);
} }
char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state, char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState, u64a offset, UNUSED const char *streamState, u64a offset,
UNUSED NfaCallback callback, NfaCallback callback, void *context) {
SomNfaCallback som_callback, void *context) {
assert(ISALIGNED_N(state, 8)); assert(ISALIGNED_N(state, 8));
const struct gough_som_info *som = getSomInfoConst(state); const struct gough_som_info *som = getSomInfoConst(state);
return goughCheckEOD(nfa, *(const u16 *)state, som, offset, som_callback, return goughCheckEOD(nfa, *(const u16 *)state, som, offset, callback,
context); context);
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -39,13 +39,13 @@ struct mq;
char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state, char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset, const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb, NfaCallback callback, void *context);
void *context);
char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q); char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q); char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset, char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key); void *state, u8 key);
@ -61,13 +61,13 @@ char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state, char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset, const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb, NfaCallback callback, void *context);
void *context);
char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q); char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q); char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset, char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key); void *state, u8 key);

View File

@ -79,9 +79,9 @@ namespace {
class gough_build_strat : public mcclellan_build_strat { class gough_build_strat : public mcclellan_build_strat {
public: public:
gough_build_strat( gough_build_strat(
raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm, raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in,
const map<dstate_id_t, gough_accel_state_info> &accel_info) const map<dstate_id_t, gough_accel_state_info> &accel_info)
: mcclellan_build_strat(r, rm), rdfa(r), gg(g), : mcclellan_build_strat(r, rm_in), rdfa(r), gg(g),
accel_gough_info(accel_info) {} accel_gough_info(accel_info) {}
unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */, unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */,
vector<u32> &reports_eod /* out */, vector<u32> &reports_eod /* out */,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -259,7 +259,8 @@ void dumpTransitions(const NFA *nfa, FILE *f,
fprintf(f, "\n"); fprintf(f, "\n");
} }
void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f) { void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f,
UNUSED const string &base) {
assert(nfa->type == GOUGH_NFA_8); assert(nfa->type == GOUGH_NFA_8);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa); const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
@ -302,7 +303,8 @@ void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) {
dumpTextReverse(nfa, f); dumpTextReverse(nfa, f);
} }
void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f) { void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f,
UNUSED const string &base) {
assert(nfa->type == GOUGH_NFA_16); assert(nfa->type == GOUGH_NFA_16);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa); const mcclellan *m = (const mcclellan *)getImplNfa(nfa);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -33,12 +33,16 @@
#include "ue2common.h" #include "ue2common.h"
#include <string>
struct NFA; struct NFA;
namespace ue2 { namespace ue2 {
void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file); void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file,
void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file); const std::string &base);
void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file,
const std::string &base);
void nfaExecGough8_dumpText(const NFA *nfa, FILE *file); void nfaExecGough8_dumpText(const NFA *nfa, FILE *file);
void nfaExecGough16_dumpText(const NFA *nfa, FILE *file); void nfaExecGough16_dumpText(const NFA *nfa, FILE *file);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -293,7 +293,7 @@ char lbrMatchLoop(const struct lbr_common *l, const u64a begin, const u64a end,
} }
DEBUG_PRINTF("firing match at %llu\n", i); DEBUG_PRINTF("firing match at %llu\n", i);
if (cb(i, l->report, ctx) == MO_HALT_MATCHING) { if (cb(0, i, l->report, ctx) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
} }
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -46,6 +46,7 @@ char nfaExecLbrDot_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q); char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecLbrDot_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset, char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key); void *state, u8 key);
@ -66,6 +67,7 @@ char nfaExecLbrVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report, char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report,
struct mq *q); struct mq *q);
char nfaExecLbrVerm_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset, char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key); void *state, u8 key);
@ -86,6 +88,7 @@ char nfaExecLbrNVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report, char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report,
struct mq *q); struct mq *q);
char nfaExecLbrNVerm_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset, char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key); void *state, u8 key);
@ -106,6 +109,7 @@ char nfaExecLbrShuf_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report, char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report,
struct mq *q); struct mq *q);
char nfaExecLbrShuf_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset, char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key); void *state, u8 key);
@ -126,6 +130,7 @@ char nfaExecLbrTruf_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report, char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report,
struct mq *q); struct mq *q);
char nfaExecLbrTruf_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset, char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key); void *state, u8 key);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -72,7 +72,7 @@ char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa,
const struct lbr_common *l = getImplNfa(nfa); const struct lbr_common *l = getImplNfa(nfa);
u64a offset = q_cur_offset(q); u64a offset = q_cur_offset(q);
DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset); DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset);
q->cb(offset, l->report, q->context); q->cb(0, offset, l->report, q->context);
return 0; return 0;
} }
@ -94,6 +94,15 @@ char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa,
return lbrInAccept(l, lstate, q->streamState, offset, report); return lbrInAccept(l, lstate, q->streamState, offset, report);
} }
char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
assert(nfa && q);
assert(isLbrType(nfa->type));
DEBUG_PRINTF("entry\n");
const struct lbr_common *l = getImplNfa(nfa);
return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q);
}
char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa, char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa,
struct mq *q) { struct mq *q) {
assert(nfa && q); assert(nfa && q);
@ -206,7 +215,7 @@ char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q,
if (q->report_current) { if (q->report_current) {
DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q)); DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q));
int rv = q->cb(q_cur_offset(q), l->report, q->context); int rv = q->cb(0, q_cur_offset(q), l->report, q->context);
q->report_current = 0; q->report_current = 0;
if (rv == MO_HALT_MATCHING) { if (rv == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; return MO_HALT_MATCHING;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -49,23 +49,28 @@
namespace ue2 { namespace ue2 {
void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) { void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl // No impl
} }
void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) { void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl // No impl
} }
void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) { void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl // No impl
} }
void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) { void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl // No impl
} }
void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) { void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl // No impl
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -32,16 +32,22 @@
#ifdef DUMP_SUPPORT #ifdef DUMP_SUPPORT
#include <cstdio> #include <cstdio>
#include <string>
struct NFA; struct NFA;
namespace ue2 { namespace ue2 {
void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file); void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file,
void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file); const std::string &base);
void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file); void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file,
void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file); const std::string &base);
void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file); void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecLbrDot_dumpText(const struct NFA *nfa, FILE *file); void nfaExecLbrDot_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecLbrVerm_dumpText(const struct NFA *nfa, FILE *file); void nfaExecLbrVerm_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecLbrNVerm_dumpText(const struct NFA *nfa, FILE *file); void nfaExecLbrNVerm_dumpText(const struct NFA *nfa, FILE *file);

View File

@ -30,6 +30,7 @@
#define LIMEX_H #define LIMEX_H
#ifdef __cplusplus #ifdef __cplusplus
#include <string>
extern "C" extern "C"
{ {
#endif #endif
@ -40,7 +41,8 @@ extern "C"
#define GENERATE_NFA_DUMP_DECL(gf_name) \ #define GENERATE_NFA_DUMP_DECL(gf_name) \
} /* extern "C" */ \ } /* extern "C" */ \
namespace ue2 { \ namespace ue2 { \
void gf_name##_dumpDot(const struct NFA *nfa, FILE *file); \ void gf_name##_dumpDot(const struct NFA *nfa, FILE *file, \
const std::string &base); \
void gf_name##_dumpText(const struct NFA *nfa, FILE *file); \ void gf_name##_dumpText(const struct NFA *nfa, FILE *file); \
} /* namespace ue2 */ \ } /* namespace ue2 */ \
extern "C" { extern "C" {
@ -52,14 +54,14 @@ extern "C"
#define GENERATE_NFA_DECL(gf_name) \ #define GENERATE_NFA_DECL(gf_name) \
char gf_name##_testEOD(const struct NFA *nfa, const char *state, \ char gf_name##_testEOD(const struct NFA *nfa, const char *state, \
const char *streamState, u64a offset, \ const char *streamState, u64a offset, \
NfaCallback callback, SomNfaCallback som_cb, \ NfaCallback callback, void *context); \
void *context); \
char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \ char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \
char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \ char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \
char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \ char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \
char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \ char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \
char gf_name##_inAccept(const struct NFA *n, ReportID report, \ char gf_name##_inAccept(const struct NFA *n, ReportID report, \
struct mq *q); \ struct mq *q); \
char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \
char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \ char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \
char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \ char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \
void *state, u8 key); \ void *state, u8 key); \
@ -74,41 +76,11 @@ extern "C"
struct mq *q, s64a loc); \ struct mq *q, s64a loc); \
GENERATE_NFA_DUMP_DECL(gf_name) GENERATE_NFA_DUMP_DECL(gf_name)
GENERATE_NFA_DECL(nfaExecLimEx32_1) GENERATE_NFA_DECL(nfaExecLimEx32)
GENERATE_NFA_DECL(nfaExecLimEx32_2) GENERATE_NFA_DECL(nfaExecLimEx128)
GENERATE_NFA_DECL(nfaExecLimEx32_3) GENERATE_NFA_DECL(nfaExecLimEx256)
GENERATE_NFA_DECL(nfaExecLimEx32_4) GENERATE_NFA_DECL(nfaExecLimEx384)
GENERATE_NFA_DECL(nfaExecLimEx32_5) GENERATE_NFA_DECL(nfaExecLimEx512)
GENERATE_NFA_DECL(nfaExecLimEx32_6)
GENERATE_NFA_DECL(nfaExecLimEx32_7)
GENERATE_NFA_DECL(nfaExecLimEx128_1)
GENERATE_NFA_DECL(nfaExecLimEx128_2)
GENERATE_NFA_DECL(nfaExecLimEx128_3)
GENERATE_NFA_DECL(nfaExecLimEx128_4)
GENERATE_NFA_DECL(nfaExecLimEx128_5)
GENERATE_NFA_DECL(nfaExecLimEx128_6)
GENERATE_NFA_DECL(nfaExecLimEx128_7)
GENERATE_NFA_DECL(nfaExecLimEx256_1)
GENERATE_NFA_DECL(nfaExecLimEx256_2)
GENERATE_NFA_DECL(nfaExecLimEx256_3)
GENERATE_NFA_DECL(nfaExecLimEx256_4)
GENERATE_NFA_DECL(nfaExecLimEx256_5)
GENERATE_NFA_DECL(nfaExecLimEx256_6)
GENERATE_NFA_DECL(nfaExecLimEx256_7)
GENERATE_NFA_DECL(nfaExecLimEx384_1)
GENERATE_NFA_DECL(nfaExecLimEx384_2)
GENERATE_NFA_DECL(nfaExecLimEx384_3)
GENERATE_NFA_DECL(nfaExecLimEx384_4)
GENERATE_NFA_DECL(nfaExecLimEx384_5)
GENERATE_NFA_DECL(nfaExecLimEx384_6)
GENERATE_NFA_DECL(nfaExecLimEx384_7)
GENERATE_NFA_DECL(nfaExecLimEx512_1)
GENERATE_NFA_DECL(nfaExecLimEx512_2)
GENERATE_NFA_DECL(nfaExecLimEx512_3)
GENERATE_NFA_DECL(nfaExecLimEx512_4)
GENERATE_NFA_DECL(nfaExecLimEx512_5)
GENERATE_NFA_DECL(nfaExecLimEx512_6)
GENERATE_NFA_DECL(nfaExecLimEx512_7)
#undef GENERATE_NFA_DECL #undef GENERATE_NFA_DECL
#undef GENERATE_NFA_DUMP_DECL #undef GENERATE_NFA_DUMP_DECL

View File

@ -35,6 +35,7 @@
#include "accel.h" #include "accel.h"
#include "limex_internal.h" #include "limex_internal.h"
#include "limex_limits.h" #include "limex_limits.h"
#include "limex_shuffle.h"
#include "nfa_internal.h" #include "nfa_internal.h"
#include "shufti.h" #include "shufti.h"
#include "truffle.h" #include "truffle.h"
@ -44,10 +45,7 @@
#include "ue2common.h" #include "ue2common.h"
#include "vermicelli.h" #include "vermicelli.h"
#include "util/bitutils.h" #include "util/bitutils.h"
#include "util/shuffle.h"
#include "util/simd_utils.h" #include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
#include "util/shuffle_ssse3.h"
static really_inline static really_inline
size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
@ -80,7 +78,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i, const union AccelAux *aux, const u8 *input, size_t i,
size_t end) { size_t end) {
u32 idx = shuffleDynamic32(s, accel); u32 idx = packedExtract32(s, accel);
return accelScanWrapper(accelTable, aux, input, idx, i, end); return accelScanWrapper(accelTable, aux, input, idx, i, end);
} }
@ -92,7 +90,7 @@ size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex,
DEBUG_PRINTF("using PSHUFB for 128-bit shuffle\n"); DEBUG_PRINTF("using PSHUFB for 128-bit shuffle\n");
m128 accelPerm = limex->accelPermute; m128 accelPerm = limex->accelPermute;
m128 accelComp = limex->accelCompare; m128 accelComp = limex->accelCompare;
idx = shufflePshufb128(s, accelPerm, accelComp); idx = packedExtract128(s, accelPerm, accelComp);
return accelScanWrapper(accelTable, aux, input, idx, i, end); return accelScanWrapper(accelTable, aux, input, idx, i, end);
} }
@ -105,17 +103,13 @@ size_t doAccel256(const m256 *state, const struct LimExNFA256 *limex,
m256 accelPerm = limex->accelPermute; m256 accelPerm = limex->accelPermute;
m256 accelComp = limex->accelCompare; m256 accelComp = limex->accelCompare;
#if !defined(__AVX2__) #if !defined(__AVX2__)
u32 idx1 = shufflePshufb128(s.lo, accelPerm.lo, accelComp.lo); u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo);
u32 idx2 = shufflePshufb128(s.hi, accelPerm.hi, accelComp.hi); u32 idx2 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi);
#else
// TODO: learn you some avx2 shuffles for great good
u32 idx1 = shufflePshufb128(movdq_lo(s), movdq_lo(accelPerm),
movdq_lo(accelComp));
u32 idx2 = shufflePshufb128(movdq_hi(s), movdq_hi(accelPerm),
movdq_hi(accelComp));
#endif
assert((idx1 & idx2) == 0); // should be no shared bits assert((idx1 & idx2) == 0); // should be no shared bits
idx = idx1 | idx2; idx = idx1 | idx2;
#else
idx = packedExtract256(s, accelPerm, accelComp);
#endif
return accelScanWrapper(accelTable, aux, input, idx, i, end); return accelScanWrapper(accelTable, aux, input, idx, i, end);
} }
@ -127,9 +121,9 @@ size_t doAccel384(const m384 *state, const struct LimExNFA384 *limex,
DEBUG_PRINTF("using PSHUFB for 384-bit shuffle\n"); DEBUG_PRINTF("using PSHUFB for 384-bit shuffle\n");
m384 accelPerm = limex->accelPermute; m384 accelPerm = limex->accelPermute;
m384 accelComp = limex->accelCompare; m384 accelComp = limex->accelCompare;
u32 idx1 = shufflePshufb128(s.lo, accelPerm.lo, accelComp.lo); u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo);
u32 idx2 = shufflePshufb128(s.mid, accelPerm.mid, accelComp.mid); u32 idx2 = packedExtract128(s.mid, accelPerm.mid, accelComp.mid);
u32 idx3 = shufflePshufb128(s.hi, accelPerm.hi, accelComp.hi); u32 idx3 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi);
assert((idx1 & idx2 & idx3) == 0); // should be no shared bits assert((idx1 & idx2 & idx3) == 0); // should be no shared bits
idx = idx1 | idx2 | idx3; idx = idx1 | idx2 | idx3;
return accelScanWrapper(accelTable, aux, input, idx, i, end); return accelScanWrapper(accelTable, aux, input, idx, i, end);
@ -144,21 +138,17 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex,
m512 accelPerm = limex->accelPermute; m512 accelPerm = limex->accelPermute;
m512 accelComp = limex->accelCompare; m512 accelComp = limex->accelCompare;
#if !defined(__AVX2__) #if !defined(__AVX2__)
u32 idx1 = shufflePshufb128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo); u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo);
u32 idx2 = shufflePshufb128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi); u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi);
u32 idx3 = shufflePshufb128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo); u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo);
u32 idx4 = shufflePshufb128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi); u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi);
#else
u32 idx1 = shufflePshufb128(movdq_lo(s.lo), movdq_lo(accelPerm.lo),
movdq_lo(accelComp.lo));
u32 idx2 = shufflePshufb128(movdq_hi(s.lo), movdq_hi(accelPerm.lo),
movdq_hi(accelComp.lo));
u32 idx3 = shufflePshufb128(movdq_lo(s.hi), movdq_lo(accelPerm.hi),
movdq_lo(accelComp.hi));
u32 idx4 = shufflePshufb128(movdq_hi(s.hi), movdq_hi(accelPerm.hi),
movdq_hi(accelComp.hi));
#endif
assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits
idx = idx1 | idx2 | idx3 | idx4; idx = idx1 | idx2 | idx3 | idx4;
#else
u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo);
u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi);
assert((idx1 & idx2) == 0); // should be no shared bits
idx = idx1 | idx2;
#endif
return accelScanWrapper(accelTable, aux, input, idx, i, end); return accelScanWrapper(accelTable, aux, input, idx, i, end);
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -40,6 +40,7 @@
#define TESTEOD_FN JOIN(moNfaTestEod, SIZE) #define TESTEOD_FN JOIN(moNfaTestEod, SIZE)
#define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE) #define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE)
#define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE) #define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE)
#define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE)
#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE) #define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE)
#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE) #define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE)
#define INITIAL_FN JOIN(moNfaInitial, SIZE) #define INITIAL_FN JOIN(moNfaInitial, SIZE)
@ -118,7 +119,7 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s,
if (TESTBIT_STATE(s, a->state)) { if (TESTBIT_STATE(s, a->state)) {
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n", DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
a->state, a->externalId, offset); a->state, a->externalId, offset);
int rv = callback(offset, a->externalId, context); int rv = callback(0, offset, a->externalId, context);
if (unlikely(rv == MO_HALT_MATCHING)) { if (unlikely(rv == MO_HALT_MATCHING)) {
return 1; return 1;
} }
@ -149,7 +150,7 @@ char PROCESS_ACCEPTS_NOSQUASH_FN(const STATE_T *s,
if (TESTBIT_STATE(s, a->state)) { if (TESTBIT_STATE(s, a->state)) {
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n", DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
a->state, a->externalId, offset); a->state, a->externalId, offset);
int rv = callback(offset, a->externalId, context); int rv = callback(0, offset, a->externalId, context);
if (unlikely(rv == MO_HALT_MATCHING)) { if (unlikely(rv == MO_HALT_MATCHING)) {
return 1; return 1;
} }
@ -374,11 +375,32 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
return 0; return 0;
} }
static really_inline
char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
union RepeatControl *repeat_ctrl, char *repeat_state,
u64a offset) {
assert(limex);
const STATE_T acceptMask = LOAD_STATE(&limex->accept);
STATE_T accstate = AND_STATE(state, acceptMask);
// Are we in an accept state?
if (ISZERO_STATE(accstate)) {
DEBUG_PRINTF("no accept states are on\n");
return 0;
}
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accstate);
return ISNONZERO_STATE(accstate);
}
#undef TESTEOD_FN #undef TESTEOD_FN
#undef TESTEOD_REV_FN #undef TESTEOD_REV_FN
#undef REPORTCURRENT_FN #undef REPORTCURRENT_FN
#undef EXPIRE_ESTATE_FN #undef EXPIRE_ESTATE_FN
#undef LIMEX_INACCEPT_FN #undef LIMEX_INACCEPT_FN
#undef LIMEX_INANYACCEPT_FN
#undef INITIAL_FN #undef INITIAL_FN
#undef TOP_FN #undef TOP_FN
#undef TOPN_FN #undef TOPN_FN

View File

@ -167,12 +167,10 @@ struct build_info {
limex_accel_info accel; limex_accel_info accel;
}; };
#define LAST_LIMEX_NFA LIMEX_NFA_512
// Constants for scoring mechanism // Constants for scoring mechanism
const int SHIFT_COST = 10; // limex: cost per shift mask
#define LAST_LIMEX_NFA LIMEX_NFA_512_7
const int LIMEX_INITIAL_SCORE = 2000;
const int SHIFT_COST = 20; // limex: cost per shift mask
const int EXCEPTION_COST = 4; // limex: per exception const int EXCEPTION_COST = 4; // limex: per exception
template<NFAEngineType t> struct NFATraits { }; template<NFAEngineType t> struct NFATraits { };
@ -261,6 +259,17 @@ void maskSetBits(Mask &m, const NFAStateSet &bits) {
} }
} }
template<class Mask>
bool isMaskZero(Mask &m) {
u8 *m8 = (u8 *)&m;
for (u32 i = 0; i < sizeof(m); i++) {
if (m8[i]) {
return false;
}
}
return true;
}
// Sets an entire byte in a mask to the given value // Sets an entire byte in a mask to the given value
template<class Mask> template<class Mask>
void maskSetByte(Mask &m, const unsigned int idx, const char val) { void maskSetByte(Mask &m, const unsigned int idx, const char val) {
@ -336,7 +345,7 @@ void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
} }
struct AccelBuild { struct AccelBuild {
AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0), ma_len1(0), AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0), ma_len1(0),
ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {} ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {}
NFAVertex v; NFAVertex v;
u32 state; u32 state;
@ -999,7 +1008,8 @@ void findMaskedCompressionStates(const build_info &args,
// Suffixes and outfixes can mask out leaf states, which should all be // Suffixes and outfixes can mask out leaf states, which should all be
// accepts. Right now we can only do this when there is nothing in initDs, // accepts. Right now we can only do this when there is nothing in initDs,
// as we switch that on unconditionally in the expand call. // as we switch that on unconditionally in the expand call.
if (generates_callbacks(h) && !hasInitDsStates(h, args.state_ids)) { if (!inspects_states_for_accepts(h)
&& !hasInitDsStates(h, args.state_ids)) {
NFAStateSet nonleaf(args.num_states); NFAStateSet nonleaf(args.num_states);
for (const auto &e : edges_range(h)) { for (const auto &e : edges_range(h)) {
u32 from = args.state_ids.at(source(e, h)); u32 from = args.state_ids.at(source(e, h));
@ -1162,12 +1172,13 @@ u32 getReportListIndex(const flat_set<ReportID> &reports,
} }
static static
void buildExceptionMap(const build_info &args, u32 buildExceptionMap(const build_info &args,
const ue2::unordered_set<NFAEdge> &exceptional, const ue2::unordered_set<NFAEdge> &exceptional,
map<ExceptionProto, vector<u32> > &exceptionMap, map<ExceptionProto, vector<u32> > &exceptionMap,
vector<ReportID> &exceptionReports) { vector<ReportID> &exceptionReports) {
const NGHolder &h = args.h; const NGHolder &h = args.h;
const u32 num_states = args.num_states; const u32 num_states = args.num_states;
u32 exceptionCount = 0;
ue2::unordered_map<NFAVertex, u32> pos_trigger; ue2::unordered_map<NFAVertex, u32> pos_trigger;
ue2::unordered_map<NFAVertex, u32> tug_trigger; ue2::unordered_map<NFAVertex, u32> tug_trigger;
@ -1297,10 +1308,13 @@ void buildExceptionMap(const build_info &args,
assert(e.succ_states.size() == num_states); assert(e.succ_states.size() == num_states);
assert(e.squash_states.size() == num_states); assert(e.squash_states.size() == num_states);
exceptionMap[e].push_back(i); exceptionMap[e].push_back(i);
exceptionCount++;
} }
} }
DEBUG_PRINTF("%zu unique exceptions found.\n", exceptionMap.size()); DEBUG_PRINTF("%u exceptions found (%zu unique)\n", exceptionCount,
exceptionMap.size());
return exceptionCount;
} }
static static
@ -1315,6 +1329,92 @@ u32 depth_to_u32(const depth &d) {
return d_val; return d_val;
} }
static
bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e,
const build_info &args, u32 maxShift) {
NFAVertex from = source(e, h);
NFAVertex to = target(e, h);
u32 f = args.state_ids.at(from);
u32 t = args.state_ids.at(to);
if (!isLimitedTransition(f, t, maxShift)) {
return true;
}
// All transitions out of a tug trigger are exceptional.
if (contains(args.tugs, from)) {
return true;
}
return false;
}
static
u32 findMaxVarShift(const build_info &args, u32 nShifts) {
const NGHolder &h = args.h;
u32 shiftMask = 0;
for (const auto &e : edges_range(h)) {
u32 from = args.state_ids.at(source(e, h));
u32 to = args.state_ids.at(target(e, h));
if (from == NO_STATE || to == NO_STATE) {
continue;
}
if (!isExceptionalTransition(h, e, args, MAX_SHIFT_AMOUNT)) {
shiftMask |= (1UL << (to - from));
}
}
u32 maxVarShift = 0;
for (u32 shiftCnt = 0; shiftMask != 0 && shiftCnt < nShifts; shiftCnt++) {
maxVarShift = findAndClearLSB_32(&shiftMask);
}
return maxVarShift;
}
static
int getLimexScore(const build_info &args, u32 nShifts) {
const NGHolder &h = args.h;
u32 maxVarShift = nShifts;
int score = 0;
score += SHIFT_COST * nShifts;
maxVarShift = findMaxVarShift(args, nShifts);
NFAStateSet exceptionalStates(args.num_states);
for (const auto &e : edges_range(h)) {
u32 from = args.state_ids.at(source(e, h));
u32 to = args.state_ids.at(target(e, h));
if (from == NO_STATE || to == NO_STATE) {
continue;
}
if (isExceptionalTransition(h, e, args, maxVarShift)) {
exceptionalStates.set(from);
}
}
score += EXCEPTION_COST * exceptionalStates.count();
return score;
}
// This function finds the best shift scheme with highest score
// Returns number of shifts and score calculated for appropriate scheme
// Returns zero if no appropriate scheme was found
static
u32 findBestNumOfVarShifts(const build_info &args,
int *bestScoreRet = nullptr) {
u32 bestNumOfVarShifts = 0;
int bestScore = INT_MAX;
for (u32 shiftCount = 1; shiftCount <= MAX_SHIFT_COUNT; shiftCount++) {
int score = getLimexScore(args, shiftCount);
if (score < bestScore) {
bestScore = score;
bestNumOfVarShifts = shiftCount;
}
}
if (bestScoreRet != nullptr) {
*bestScoreRet = bestScore;
}
return bestNumOfVarShifts;
}
template<NFAEngineType dtype> template<NFAEngineType dtype>
struct Factory { struct Factory {
// typedefs for readability, for types derived from traits // typedefs for readability, for types derived from traits
@ -1322,25 +1422,6 @@ struct Factory {
typedef typename NFATraits<dtype>::implNFA_t implNFA_t; typedef typename NFATraits<dtype>::implNFA_t implNFA_t;
typedef typename NFATraits<dtype>::tableRow_t tableRow_t; typedef typename NFATraits<dtype>::tableRow_t tableRow_t;
static
bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
const ue2::unordered_set<NFAVertex> &tugs) {
NFAVertex from = source(e, h);
NFAVertex to = target(e, h);
u32 f = state_ids.at(from);
u32 t = state_ids.at(to);
if (!isLimitedTransition(f, t, NFATraits<dtype>::maxShift)) {
return true;
}
// All transitions out of a tug trigger are exceptional.
if (contains(tugs, from)) {
return true;
}
return false;
}
static static
void allocState(NFA *nfa, u32 repeatscratchStateSize, void allocState(NFA *nfa, u32 repeatscratchStateSize,
u32 repeatStreamState) { u32 repeatStreamState) {
@ -1504,6 +1585,9 @@ struct Factory {
static static
void writeShiftMasks(const build_info &args, implNFA_t *limex) { void writeShiftMasks(const build_info &args, implNFA_t *limex) {
const NGHolder &h = args.h; const NGHolder &h = args.h;
u32 maxShift = findMaxVarShift(args, limex->shiftCount);
u32 shiftMask = 0;
int shiftMaskIdx = 0;
for (const auto &e : edges_range(h)) { for (const auto &e : edges_range(h)) {
u32 from = args.state_ids.at(source(e, h)); u32 from = args.state_ids.at(source(e, h));
@ -1515,15 +1599,32 @@ struct Factory {
// We check for exceptional transitions here, as we don't want tug // We check for exceptional transitions here, as we don't want tug
// trigger transitions emitted as limited transitions (even if they // trigger transitions emitted as limited transitions (even if they
// could be in this model). // could be in this model).
if (!isExceptionalTransition(h, e, args.state_ids, args.tugs)) { if (!isExceptionalTransition(h, e, args, maxShift)) {
maskSetBit(limex->shift[to - from], from); u32 shift = to - from;
if ((shiftMask & (1UL << shift)) == 0UL) {
shiftMask |= (1UL << shift);
limex->shiftAmount[shiftMaskIdx++] = (u8)shift;
}
assert(limex->shiftCount <= MAX_SHIFT_COUNT);
for (u32 i = 0; i < limex->shiftCount; i++) {
if (limex->shiftAmount[i] == (u8)shift) {
maskSetBit(limex->shift[i], from);
break;
}
}
}
}
if (maxShift && limex->shiftCount > 1) {
for (u32 i = 0; i < limex->shiftCount; i++) {
assert(!isMaskZero(limex->shift[i]));
} }
} }
} }
static static
void findExceptionalTransitions(const build_info &args, void findExceptionalTransitions(const build_info &args,
ue2::unordered_set<NFAEdge> &exceptional) { ue2::unordered_set<NFAEdge> &exceptional,
u32 maxShift) {
const NGHolder &h = args.h; const NGHolder &h = args.h;
for (const auto &e : edges_range(h)) { for (const auto &e : edges_range(h)) {
@ -1533,7 +1634,7 @@ struct Factory {
continue; continue;
} }
if (isExceptionalTransition(h, e, args.state_ids, args.tugs)) { if (isExceptionalTransition(h, e, args, maxShift)) {
exceptional.insert(e); exceptional.insert(e);
} }
} }
@ -1545,19 +1646,25 @@ struct Factory {
implNFA_t *limex, const u32 exceptionsOffset) { implNFA_t *limex, const u32 exceptionsOffset) {
DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset); DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset);
// to make testing easier, we pre-set the exceptionMap to all invalid
// values
memset(limex->exceptionMap, 0xff, sizeof(limex->exceptionMap));
exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset); exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset);
assert(ISALIGNED(etable)); assert(ISALIGNED(etable));
u32 ecount = 0; map<u32, ExceptionProto> exception_by_state;
for (const auto &m : exceptionMap) { for (const auto &m : exceptionMap) {
const ExceptionProto &proto = m.first; const ExceptionProto &proto = m.first;
const vector<u32> &states = m.second; const vector<u32> &states = m.second;
DEBUG_PRINTF("exception %u, triggered by %zu states.\n", ecount, for (u32 i : states) {
states.size()); assert(!contains(exception_by_state, i));
exception_by_state.emplace(i, proto);
}
}
u32 ecount = 0;
for (const auto &m : exception_by_state) {
const ExceptionProto &proto = m.second;
u32 state_id = m.first;
DEBUG_PRINTF("exception %u, triggered by state %u\n", ecount,
state_id);
// Write the exception entry. // Write the exception entry.
exception_t &e = etable[ecount]; exception_t &e = etable[ecount];
@ -1571,13 +1678,10 @@ struct Factory {
: repeatOffsets[proto.repeat_index]; : repeatOffsets[proto.repeat_index];
e.repeatOffset = repeat_offset; e.repeatOffset = repeat_offset;
// for each state that can switch it on // for the state that can switch it on
for (auto state_id : states) { // set this bit in the exception mask
// set this bit in the exception mask maskSetBit(limex->exceptionMask, state_id);
maskSetBit(limex->exceptionMask, state_id);
// set this index in the exception map
limex->exceptionMap[state_id] = ecount;
}
ecount++; ecount++;
} }
@ -1778,16 +1882,17 @@ struct Factory {
} }
ue2::unordered_set<NFAEdge> exceptional; ue2::unordered_set<NFAEdge> exceptional;
findExceptionalTransitions(args, exceptional); u32 shiftCount = findBestNumOfVarShifts(args);
assert(shiftCount);
u32 maxShift = findMaxVarShift(args, shiftCount);
findExceptionalTransitions(args, exceptional, maxShift);
map<ExceptionProto, vector<u32> > exceptionMap; map<ExceptionProto, vector<u32> > exceptionMap;
vector<ReportID> exceptionReports; vector<ReportID> exceptionReports;
buildExceptionMap(args, exceptional, exceptionMap, exceptionReports); u32 exceptionCount = buildExceptionMap(args, exceptional, exceptionMap,
exceptionReports);
if (exceptionMap.size() > ~0U) { assert(exceptionCount <= args.num_states);
DEBUG_PRINTF("too many exceptions!\n");
return nullptr;
}
// Build reach table and character mapping. // Build reach table and character mapping.
vector<NFAStateSet> reach; vector<NFAStateSet> reach;
@ -1842,7 +1947,7 @@ struct Factory {
offset = ROUNDUP_CL(offset); offset = ROUNDUP_CL(offset);
const u32 exceptionsOffset = offset; const u32 exceptionsOffset = offset;
offset += sizeof(exception_t) * exceptionMap.size(); offset += sizeof(exception_t) * exceptionCount;
const u32 exceptionReportsOffset = offset; const u32 exceptionReportsOffset = offset;
offset += sizeof(ReportID) * exceptionReports.size(); offset += sizeof(ReportID) * exceptionReports.size();
@ -1874,6 +1979,7 @@ struct Factory {
writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash, writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash,
limex, acceptsOffset, acceptsEodOffset, squashOffset); limex, acceptsOffset, acceptsEodOffset, squashOffset);
limex->shiftCount = shiftCount;
writeShiftMasks(args, limex); writeShiftMasks(args, limex);
// Determine the state required for our state vector. // Determine the state required for our state vector.
@ -1907,8 +2013,6 @@ struct Factory {
} }
static int score(const build_info &args) { static int score(const build_info &args) {
const NGHolder &h = args.h;
// LimEx NFAs are available in sizes from 32 to 512-bit. // LimEx NFAs are available in sizes from 32 to 512-bit.
size_t num_states = args.num_states; size_t num_states = args.num_states;
@ -1928,45 +2032,17 @@ struct Factory {
sz = args.cc.grey.nfaForceSize; sz = args.cc.grey.nfaForceSize;
} }
if (args.cc.grey.nfaForceShifts &&
NFATraits<dtype>::maxShift != args.cc.grey.nfaForceShifts) {
return -1;
}
if (sz != NFATraits<dtype>::maxStates) { if (sz != NFATraits<dtype>::maxStates) {
return -1; // fail, size not appropriate return -1; // fail, size not appropriate
} }
// We are of the right size, calculate a score based on the number // We are of the right size, calculate a score based on the number
// of exceptions and the number of shifts used by this LimEx. // of exceptions and the number of shifts used by this LimEx.
int score = LIMEX_INITIAL_SCORE; int score;
if (NFATraits<dtype>::maxShift != 0) { u32 shiftCount = findBestNumOfVarShifts(args, &score);
score -= SHIFT_COST / 2; // first shift mask is cheap if (shiftCount == 0) {
score -= SHIFT_COST * (NFATraits<dtype>::maxShift - 1); return -1;
} }
NFAStateSet exceptionalStates(num_states); // outbound exc trans
for (const auto &e : edges_range(h)) {
u32 from = args.state_ids.at(source(e, h));
u32 to = args.state_ids.at(target(e, h));
if (from == NO_STATE || to == NO_STATE) {
continue;
}
if (isExceptionalTransition(h, e, args.state_ids, args.tugs)) {
exceptionalStates.set(from);
}
}
DEBUG_PRINTF("%zu exceptional states\n", exceptionalStates.count());
score -= EXCEPTION_COST * exceptionalStates.count();
/* ensure that we always report a valid score if have the right number
* of states */
if (score < 0) {
score = 0;
}
return score; return score;
} }
}; };
@ -1985,50 +2061,19 @@ struct scoreNfa {
} }
}; };
#define MAKE_LIMEX_TRAITS(mlt_size, mlt_shift) \ #define MAKE_LIMEX_TRAITS(mlt_size) \
template<> struct NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift> { \ template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \
typedef LimExNFA##mlt_size implNFA_t; \ typedef LimExNFA##mlt_size implNFA_t; \
typedef u_##mlt_size tableRow_t; \ typedef u_##mlt_size tableRow_t; \
typedef NFAException##mlt_size exception_t; \ typedef NFAException##mlt_size exception_t; \
static const size_t maxStates = mlt_size; \ static const size_t maxStates = mlt_size; \
static const u32 maxShift = mlt_shift; \ };
}; \
MAKE_LIMEX_TRAITS(32, 1) MAKE_LIMEX_TRAITS(32)
MAKE_LIMEX_TRAITS(32, 2) MAKE_LIMEX_TRAITS(128)
MAKE_LIMEX_TRAITS(32, 3) MAKE_LIMEX_TRAITS(256)
MAKE_LIMEX_TRAITS(32, 4) MAKE_LIMEX_TRAITS(384)
MAKE_LIMEX_TRAITS(32, 5) MAKE_LIMEX_TRAITS(512)
MAKE_LIMEX_TRAITS(32, 6)
MAKE_LIMEX_TRAITS(32, 7)
MAKE_LIMEX_TRAITS(128, 1)
MAKE_LIMEX_TRAITS(128, 2)
MAKE_LIMEX_TRAITS(128, 3)
MAKE_LIMEX_TRAITS(128, 4)
MAKE_LIMEX_TRAITS(128, 5)
MAKE_LIMEX_TRAITS(128, 6)
MAKE_LIMEX_TRAITS(128, 7)
MAKE_LIMEX_TRAITS(256, 1)
MAKE_LIMEX_TRAITS(256, 2)
MAKE_LIMEX_TRAITS(256, 3)
MAKE_LIMEX_TRAITS(256, 4)
MAKE_LIMEX_TRAITS(256, 5)
MAKE_LIMEX_TRAITS(256, 6)
MAKE_LIMEX_TRAITS(256, 7)
MAKE_LIMEX_TRAITS(384, 1)
MAKE_LIMEX_TRAITS(384, 2)
MAKE_LIMEX_TRAITS(384, 3)
MAKE_LIMEX_TRAITS(384, 4)
MAKE_LIMEX_TRAITS(384, 5)
MAKE_LIMEX_TRAITS(384, 6)
MAKE_LIMEX_TRAITS(384, 7)
MAKE_LIMEX_TRAITS(512, 1)
MAKE_LIMEX_TRAITS(512, 2)
MAKE_LIMEX_TRAITS(512, 3)
MAKE_LIMEX_TRAITS(512, 4)
MAKE_LIMEX_TRAITS(512, 5)
MAKE_LIMEX_TRAITS(512, 6)
MAKE_LIMEX_TRAITS(512, 7)
} // namespace } // namespace
@ -2133,20 +2178,18 @@ aligned_unique_ptr<NFA> generate(NGHolder &h,
// Acceleration analysis. // Acceleration analysis.
fillAccelInfo(arg); fillAccelInfo(arg);
typedef pair<int, NFAEngineType> EngineScore; vector<pair<int, NFAEngineType>> scores;
vector<EngineScore> scores;
if (hint != INVALID_NFA) { if (hint != INVALID_NFA) {
// The caller has told us what to (attempt to) build. // The caller has told us what to (attempt to) build.
scores.push_back(make_pair(0, (NFAEngineType)hint)); scores.emplace_back(0, (NFAEngineType)hint);
} else { } else {
for (size_t i = 0; i <= LAST_LIMEX_NFA; i++) { for (size_t i = 0; i <= LAST_LIMEX_NFA; i++) {
NFAEngineType ntype = (NFAEngineType)i; NFAEngineType ntype = (NFAEngineType)i;
int score = DISPATCH_BY_LIMEX_TYPE(ntype, scoreNfa, arg); int score = DISPATCH_BY_LIMEX_TYPE(ntype, scoreNfa, arg);
if (score >= 0) { if (score >= 0) {
DEBUG_PRINTF("%s scores %d\n", nfa_type_name(ntype), score); DEBUG_PRINTF("%s scores %d\n", nfa_type_name(ntype), score);
scores.push_back(make_pair(score, ntype)); scores.emplace_back(score, ntype);
} }
} }
} }
@ -2156,22 +2199,22 @@ aligned_unique_ptr<NFA> generate(NGHolder &h,
return nullptr; return nullptr;
} }
sort(scores.begin(), scores.end(), greater<EngineScore>()); // Sort acceptable models in priority order, lowest score first.
sort(scores.begin(), scores.end());
aligned_unique_ptr<NFA> nfa; for (const auto &elem : scores) {
for (auto i = scores.begin(); !nfa && i != scores.end(); ++i) { assert(elem.first >= 0);
assert(i->first >= 0); NFAEngineType limex_model = elem.second;
nfa = DISPATCH_BY_LIMEX_TYPE(i->second, generateNfa, arg); auto nfa = DISPATCH_BY_LIMEX_TYPE(limex_model, generateNfa, arg);
if (nfa) {
DEBUG_PRINTF("successful build with NFA engine: %s\n",
nfa_type_name(limex_model));
return nfa;
}
} }
if (!nfa) { DEBUG_PRINTF("NFA build failed.\n");
DEBUG_PRINTF("NFA build failed.\n"); return nullptr;
return nullptr;
}
DEBUG_PRINTF("successful build with NFA engine: %s\n",
nfa_type_name((NFAEngineType)nfa->type));
return nfa;
} }
u32 countAccelStates(NGHolder &h, u32 countAccelStates(NGHolder &h,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -80,6 +80,23 @@ void dumpMask(FILE *f, const char *name, const u8 *mask, u32 mask_bits) {
fprintf(f, "MSK %-20s %s\n", name, dumpMask(mask, mask_bits).c_str()); fprintf(f, "MSK %-20s %s\n", name, dumpMask(mask, mask_bits).c_str());
} }
template<typename mask_t>
static
u32 rank_in_mask(mask_t mask, u32 bit) {
assert(bit < 8 * sizeof(mask));
u32 chunks[sizeof(mask)/sizeof(u32)];
memcpy(chunks, &mask, sizeof(mask));
u32 base_rank = 0;
for (u32 i = 0; i < bit / 32; i++) {
base_rank += popcount32(chunks[i]);
}
u32 chunk = chunks[bit / 32];
u32 local_bit = bit % 32;
assert(chunk & (1U << local_bit));
return base_rank + popcount32(chunk & ((1U << local_bit) - 1));
}
template <typename limex_type> template <typename limex_type>
static static
void dumpRepeats(const limex_type *limex, u32 model_size, FILE *f) { void dumpRepeats(const limex_type *limex, u32 model_size, FILE *f) {
@ -244,6 +261,16 @@ void dumpLimexExceptions(const limex_type *limex, FILE *f) {
} }
} }
template<typename limex_type>
static
void dumpLimexShifts(const limex_type *limex, FILE *f) {
u32 size = limex_traits<limex_type>::size;
fprintf(f, "Shift Masks:\n");
for(u32 i = 0; i < limex->shiftCount; i++) {
fprintf(f, "\t Shift %u(%hhu)\t\tMask: %s\n", i, limex->shiftAmount[i],
dumpMask((const u8 *)&limex->shift[i], size).c_str());
}
}
template<typename limex_type> template<typename limex_type>
static static
void dumpLimexText(const limex_type *limex, FILE *f) { void dumpLimexText(const limex_type *limex, FILE *f) {
@ -270,6 +297,9 @@ void dumpLimexText(const limex_type *limex, FILE *f) {
topMask += size / 8; topMask += size / 8;
} }
// Dump shift masks
dumpLimexShifts(limex, f);
dumpSquash(limex, f); dumpSquash(limex, f);
dumpLimexReachMap(limex->reachMap, f); dumpLimexReachMap(limex->reachMap, f);
@ -325,7 +355,7 @@ struct limex_labeller : public nfa_labeller {
return; return;
} }
u32 ex_index = limex->exceptionMap[state]; u32 ex_index = rank_in_mask(limex->exceptionMask, state);
const typename limex_traits<limex_type>::exception_type *e const typename limex_traits<limex_type>::exception_type *e
= &exceptions[ex_index]; = &exceptions[ex_index];
@ -396,7 +426,7 @@ void dumpExDotInfo(const limex_type *limex, u32 state, FILE *f) {
const typename limex_traits<limex_type>::exception_type *exceptions const typename limex_traits<limex_type>::exception_type *exceptions
= getExceptionTable(limex); = getExceptionTable(limex);
u32 ex_index = limex->exceptionMap[state]; u32 ex_index = rank_in_mask(limex->exceptionMask, state);
const typename limex_traits<limex_type>::exception_type *e const typename limex_traits<limex_type>::exception_type *e
= &exceptions[ex_index]; = &exceptions[ex_index];
@ -420,78 +450,45 @@ void dumpExDotInfo(const limex_type *limex, u32 state, FILE *f) {
template<typename limex_type> template<typename limex_type>
static static
void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) { void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
for (u32 j = 0; j < MAX_MAX_SHIFT; j++) { for (u32 j = 0; j < limex->shiftCount; j++) {
const u32 shift_amount = limex->shiftAmount[j];
if (testbit((const u8 *)&limex->shift[j], if (testbit((const u8 *)&limex->shift[j],
limex_traits<limex_type>::size, state)) { limex_traits<limex_type>::size, state)) {
fprintf(f, "%u -> %u;\n", state, state + j); fprintf(f, "%u -> %u;\n", state, state + shift_amount);
} }
} }
} }
#define DUMP_TEXT_FN(ddf_u, ddf_n, ddf_s) \ #define DUMP_TEXT_FN(ddf_n) \
void nfaExecLimEx##ddf_n##_##ddf_s##_dumpText(const NFA *nfa, FILE *f) { \ void nfaExecLimEx##ddf_n##_dumpText(const NFA *nfa, FILE *f) { \
dumpLimexText((const LimExNFA##ddf_n *)getImplNfa(nfa), f); \ dumpLimexText((const LimExNFA##ddf_n *)getImplNfa(nfa), f); \
} }
#define DUMP_DOT_FN(ddf_u, ddf_n, ddf_s) \ #define DUMP_DOT_FN(ddf_n) \
void nfaExecLimEx##ddf_n##_##ddf_s##_dumpDot(const NFA *nfa, FILE *f) { \ void nfaExecLimEx##ddf_n##_dumpDot(const NFA *nfa, FILE *f, \
UNUSED const string &base) { \
const LimExNFA##ddf_n *limex = \ const LimExNFA##ddf_n *limex = \
(const LimExNFA##ddf_n *)getImplNfa(nfa); \ (const LimExNFA##ddf_n *)getImplNfa(nfa); \
\ \
dumpDotPreamble(f); \ dumpDotPreamble(f); \
u32 state_count = nfa->nPositions; \ u32 state_count = nfa->nPositions; \
dumpVertexDotInfo(limex, state_count, f, \ dumpVertexDotInfo(limex, state_count, f, \
limex_labeller<LimExNFA##ddf_n>(limex)); \ limex_labeller<LimExNFA##ddf_n>(limex)); \
for (u32 i = 0; i < state_count; i++) { \ for (u32 i = 0; i < state_count; i++) { \
dumpLimDotInfo(limex, i, f); \ dumpLimDotInfo(limex, i, f); \
dumpExDotInfo(limex, i, f); \ dumpExDotInfo(limex, i, f); \
} \ } \
\
dumpDotTrailer(f); \ dumpDotTrailer(f); \
} }
#define LIMEX_DUMP_FNS(ntype, size, shifts) \ #define LIMEX_DUMP_FNS(size) \
DUMP_TEXT_FN(ntype, size, shifts) \ DUMP_TEXT_FN(size) \
DUMP_DOT_FN(ntype, size, shifts) DUMP_DOT_FN(size)
LIMEX_DUMP_FNS(u32, 32, 1) LIMEX_DUMP_FNS(32)
LIMEX_DUMP_FNS(u32, 32, 2) LIMEX_DUMP_FNS(128)
LIMEX_DUMP_FNS(u32, 32, 3) LIMEX_DUMP_FNS(256)
LIMEX_DUMP_FNS(u32, 32, 4) LIMEX_DUMP_FNS(384)
LIMEX_DUMP_FNS(u32, 32, 5) LIMEX_DUMP_FNS(512)
LIMEX_DUMP_FNS(u32, 32, 6)
LIMEX_DUMP_FNS(u32, 32, 7)
LIMEX_DUMP_FNS(m128, 128, 1)
LIMEX_DUMP_FNS(m128, 128, 2)
LIMEX_DUMP_FNS(m128, 128, 3)
LIMEX_DUMP_FNS(m128, 128, 4)
LIMEX_DUMP_FNS(m128, 128, 5)
LIMEX_DUMP_FNS(m128, 128, 6)
LIMEX_DUMP_FNS(m128, 128, 7)
LIMEX_DUMP_FNS(m256, 256, 1)
LIMEX_DUMP_FNS(m256, 256, 2)
LIMEX_DUMP_FNS(m256, 256, 3)
LIMEX_DUMP_FNS(m256, 256, 4)
LIMEX_DUMP_FNS(m256, 256, 5)
LIMEX_DUMP_FNS(m256, 256, 6)
LIMEX_DUMP_FNS(m256, 256, 7)
LIMEX_DUMP_FNS(m384, 384, 1)
LIMEX_DUMP_FNS(m384, 384, 2)
LIMEX_DUMP_FNS(m384, 384, 3)
LIMEX_DUMP_FNS(m384, 384, 4)
LIMEX_DUMP_FNS(m384, 384, 5)
LIMEX_DUMP_FNS(m384, 384, 6)
LIMEX_DUMP_FNS(m384, 384, 7)
LIMEX_DUMP_FNS(m512, 512, 1)
LIMEX_DUMP_FNS(m512, 512, 2)
LIMEX_DUMP_FNS(m512, 512, 3)
LIMEX_DUMP_FNS(m512, 512, 4)
LIMEX_DUMP_FNS(m512, 512, 5)
LIMEX_DUMP_FNS(m512, 512, 6)
LIMEX_DUMP_FNS(m512, 512, 7)
} // namespace ue2 } // namespace ue2

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -79,9 +79,13 @@
#ifdef ARCH_64_BIT #ifdef ARCH_64_BIT
#define CHUNK_T u64a #define CHUNK_T u64a
#define FIND_AND_CLEAR_FN findAndClearLSB_64 #define FIND_AND_CLEAR_FN findAndClearLSB_64
#define POPCOUNT_FN popcount64
#define RANK_IN_MASK_FN rank_in_mask64
#else #else
#define CHUNK_T u32 #define CHUNK_T u32
#define FIND_AND_CLEAR_FN findAndClearLSB_32 #define FIND_AND_CLEAR_FN findAndClearLSB_32
#define POPCOUNT_FN popcount32
#define RANK_IN_MASK_FN rank_in_mask32
#endif #endif
/** \brief Process a single exception. Returns 1 if exception handling should /** \brief Process a single exception. Returns 1 if exception handling should
@ -206,13 +210,13 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
#ifndef RUN_EXCEPTION_FN_ONLY #ifndef RUN_EXCEPTION_FN_ONLY
/** \brief Process all of the exceptions associated with the states in the \a estate. */ /** \brief Process all of the exceptions associated with the states in the \a
* estate. */
static really_inline static really_inline
int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
const struct IMPL_NFA_T *limex, const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
const u32 *exceptionMap, const EXCEPTION_T *exceptions, const ReportID *exReports, u64a offset, struct CONTEXT_T *ctx,
const ReportID *exReports, char in_rev, char flags) {
u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) {
assert(diffmask > 0); // guaranteed by caller macro assert(diffmask > 0); // guaranteed by caller macro
if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) { if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) {
@ -237,15 +241,23 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
// A copy of the estate as an array of GPR-sized chunks. // A copy of the estate as an array of GPR-sized chunks.
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
#ifdef ESTATE_ON_STACK #ifdef ESTATE_ON_STACK
memcpy(chunks, &estate, sizeof(STATE_T)); memcpy(chunks, &estate, sizeof(STATE_T));
#else #else
memcpy(chunks, estatep, sizeof(STATE_T)); memcpy(chunks, estatep, sizeof(STATE_T));
#endif #endif
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
struct proto_cache new_cache = {0, NULL}; struct proto_cache new_cache = {0, NULL};
enum CacheResult cacheable = CACHE_RESULT; enum CacheResult cacheable = CACHE_RESULT;
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
base_index[0] = 0;
for (u32 i = 0; i < ARRAY_LENGTH(base_index) - 1; i++) {
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
}
do { do {
u32 t = findAndClearLSB_32(&diffmask); u32 t = findAndClearLSB_32(&diffmask);
#ifdef ARCH_64_BIT #ifdef ARCH_64_BIT
@ -254,10 +266,10 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
assert(t < ARRAY_LENGTH(chunks)); assert(t < ARRAY_LENGTH(chunks));
CHUNK_T word = chunks[t]; CHUNK_T word = chunks[t];
assert(word != 0); assert(word != 0);
u32 base = t * sizeof(CHUNK_T) * 8;
do { do {
u32 bit = FIND_AND_CLEAR_FN(&word) + base; u32 bit = FIND_AND_CLEAR_FN(&word);
u32 idx = exceptionMap[bit]; u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit);
u32 idx = local_index + base_index[t];
const EXCEPTION_T *e = &exceptions[idx]; const EXCEPTION_T *e = &exceptions[idx];
if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ, if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -68,6 +68,9 @@
The value of NFA.stateSize gives the total state size in bytes (the sum of The value of NFA.stateSize gives the total state size in bytes (the sum of
all the above). all the above).
Number of shifts should be always greater or equal to 1
Number of shifts 0 means that no appropriate NFA engine was found.
*/ */
#ifndef LIMEX_INTERNAL_H #ifndef LIMEX_INTERNAL_H
@ -77,7 +80,8 @@
#include "repeat_internal.h" #include "repeat_internal.h"
// Constants // Constants
#define MAX_MAX_SHIFT 8 /**< largest maxshift used by a LimEx NFA */ #define MAX_SHIFT_COUNT 8 /**< largest number of shifts used by a LimEx NFA */
#define MAX_SHIFT_AMOUNT 16 /**< largest shift amount used by a LimEx NFA */
#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */ #define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */ #define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
@ -95,24 +99,6 @@ enum LimExSquash {
LIMEX_SQUASH_REPORT = 3 //!< squash when report is raised LIMEX_SQUASH_REPORT = 3 //!< squash when report is raised
}; };
struct LimExNFABase {
u8 reachMap[N_CHARS];
u32 reachSize;
u32 accelCount;
u32 accelTableOffset;
u32 accelAuxCount;
u32 accelAuxOffset;
u32 acceptCount;
u32 acceptOffset;
u32 acceptEodCount;
u32 acceptEodOffset;
u32 exceptionCount;
u32 exceptionOffset;
u32 exReportOffset;
u32 repeatCount;
u32 repeatOffset;
};
/* uniform looking types for the macros */ /* uniform looking types for the macros */
typedef u8 u_8; typedef u8 u_8;
typedef u16 u_16; typedef u16 u_16;
@ -133,7 +119,7 @@ struct NFAException##size { \
u8 trigger; /**< from enum LimExTrigger */ \ u8 trigger; /**< from enum LimExTrigger */ \
}; \ }; \
\ \
struct LimExNFA##size { /* MUST align with LimExNFABase */ \ struct LimExNFA##size { \
u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */ \ u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */ \
u32 reachSize; /**< number of reach masks */ \ u32 reachSize; /**< number of reach masks */ \
u32 accelCount; /**< number of entries in accel table */ \ u32 accelCount; /**< number of entries in accel table */ \
@ -149,7 +135,6 @@ struct LimExNFA##size { /* MUST align with LimExNFABase */ \
u32 exReportOffset; /* rel. to start of LimExNFA */ \ u32 exReportOffset; /* rel. to start of LimExNFA */ \
u32 repeatCount; \ u32 repeatCount; \
u32 repeatOffset; \ u32 repeatOffset; \
u32 exceptionMap[size]; \
u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \ u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \
u32 squashCount; \ u32 squashCount; \
u32 topCount; \ u32 topCount; \
@ -168,8 +153,10 @@ struct LimExNFA##size { /* MUST align with LimExNFABase */ \
u_##size compressMask; /**< switch off before compress */ \ u_##size compressMask; /**< switch off before compress */ \
u_##size exceptionMask; \ u_##size exceptionMask; \
u_##size repeatCyclicMask; \ u_##size repeatCyclicMask; \
u_##size shift[MAX_MAX_SHIFT]; \
u_##size zombieMask; /**< zombie if in any of the set states */ \ u_##size zombieMask; /**< zombie if in any of the set states */ \
u_##size shift[MAX_SHIFT_COUNT]; \
u32 shiftCount; /**< number of shift masks used */ \
u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \
}; };
CREATE_NFA_LIMEX(32) CREATE_NFA_LIMEX(32)

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -74,7 +74,6 @@
static really_inline static really_inline
int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
const struct LimExNFA32 *limex, const struct LimExNFA32 *limex,
const u32 *exceptionMap,
const struct NFAException32 *exceptions, const struct NFAException32 *exceptions,
const ReportID *exReports, u64a offset, const ReportID *exReports, u64a offset,
struct NFAContext32 *ctx, char in_rev, char flags) { struct NFAContext32 *ctx, char in_rev, char flags) {
@ -104,7 +103,7 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
do { do {
u32 bit = findAndClearLSB_32(&estate); u32 bit = findAndClearLSB_32(&estate);
u32 idx = exceptionMap[bit]; u32 idx = rank_in_mask32(limex->exceptionMask, bit);
const struct NFAException32 *e = &exceptions[idx]; const struct NFAException32 *e = &exceptions[idx];
if (!runException32(e, s, succ, &local_succ, limex, exReports, offset, if (!runException32(e, s, succ, &local_succ, limex, exReports, offset,
ctx, &new_cache, &cacheable, in_rev, flags)) { ctx, &new_cache, &cacheable, in_rev, flags)) {
@ -132,35 +131,4 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
#define SIZE 32 #define SIZE 32
#define STATE_T u32 #define STATE_T u32
#define SHIFT 1
#include "limex_runtime_impl.h"
#define SIZE 32
#define STATE_T u32
#define SHIFT 2
#include "limex_runtime_impl.h"
#define SIZE 32
#define STATE_T u32
#define SHIFT 3
#include "limex_runtime_impl.h"
#define SIZE 32
#define STATE_T u32
#define SHIFT 4
#include "limex_runtime_impl.h"
#define SIZE 32
#define STATE_T u32
#define SHIFT 5
#include "limex_runtime_impl.h"
#define SIZE 32
#define STATE_T u32
#define SHIFT 6
#include "limex_runtime_impl.h"
#define SIZE 32
#define STATE_T u32
#define SHIFT 7
#include "limex_runtime_impl.h" #include "limex_runtime_impl.h"

View File

@ -73,34 +73,35 @@ struct proto_cache {
}; };
// Shift macros for Limited NFAs. Defined in terms of uniform ops. // Shift macros for Limited NFAs. Defined in terms of uniform ops.
// LimExNFAxxx ptr in 'limex' and the current state in 's'
#define NFA_EXEC_LIM_SHIFT(nels_type, nels_i) \ #define NFA_EXEC_LIM_SHIFT(nels_type, nels_i) \
(JOIN(shift_, nels_type)( \ (JOIN(lshift_, nels_type)( \
JOIN(and_, nels_type)(s, \ JOIN(and_, nels_type)(s, \
JOIN(load_, nels_type)(&limex->shift[nels_i])), \ JOIN(load_, nels_type)(&limex->shift[nels_i])), \
nels_i)) limex->shiftAmount[nels_i]))
// Calculate the (limited model) successors for a given max shift. Assumes // Calculate the (limited model) successors for a number of variable shifts.
// LimExNFAxxx ptr in 'l', current state in 's' and successors in 'succ'. // Assumes current state in 's' and successors in 'succ'.
#define NFA_EXEC_GET_LIM_SUCC(gls_type, gls_shift) \ #define NFA_EXEC_GET_LIM_SUCC(gls_type) \
do { \ do { \
succ = \ succ = NFA_EXEC_LIM_SHIFT(gls_type, 0); \
JOIN(and_, gls_type)(s, JOIN(load_, gls_type)(&limex->shift[0])); \ switch (limex->shiftCount) { \
switch (gls_shift) { \ case 8: \
case 7: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 7)); \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 7)); \
case 6: \ case 7: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 6)); \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 6)); \
case 5: \ case 6: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 5)); \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 5)); \
case 4: \ case 5: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 4)); \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 4)); \
case 3: \ case 4: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 3)); \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 3)); \
case 2: \ case 3: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 2)); \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 2)); \
case 1: \ case 2: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 1)); \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 1)); \
case 1: \
case 0: \ case 0: \
; \ ; \
} \ } \
@ -129,7 +130,7 @@ int limexRunReports(const ReportID *reports, NfaCallback callback,
for (; *reports != MO_INVALID_IDX; ++reports) { for (; *reports != MO_INVALID_IDX; ++reports) {
DEBUG_PRINTF("firing report for id %u at offset %llu\n", DEBUG_PRINTF("firing report for id %u at offset %llu\n",
*reports, offset); *reports, offset);
int rv = callback(offset, *reports, context); int rv = callback(0, offset, *reports, context);
if (rv == MO_HALT_MATCHING) { if (rv == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
} }

View File

@ -37,11 +37,11 @@
* Version 2.0: now with X-Macros, so you get line numbers in your debugger. * Version 2.0: now with X-Macros, so you get line numbers in your debugger.
*/ */
#if !defined(SIZE) || !defined(STATE_T) || !defined(SHIFT) #if !defined(SIZE) || !defined(STATE_T)
# error Must define SIZE and STATE_T and SHIFT in includer. # error Must define SIZE and STATE_T in includer.
#endif #endif
#define LIMEX_API_ROOT JOIN(JOIN(JOIN(nfaExecLimEx, SIZE), _), SHIFT) #define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE)
#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
@ -73,6 +73,7 @@
#define ANDNOT_STATE JOIN(andnot_, STATE_T) #define ANDNOT_STATE JOIN(andnot_, STATE_T)
#define OR_STATE JOIN(or_, STATE_T) #define OR_STATE JOIN(or_, STATE_T)
#define TESTBIT_STATE JOIN(testbit_, STATE_T) #define TESTBIT_STATE JOIN(testbit_, STATE_T)
#define CLEARBIT_STATE JOIN(clearbit_, STATE_T)
#define ZERO_STATE JOIN(zero_, STATE_T) #define ZERO_STATE JOIN(zero_, STATE_T)
#define ISNONZERO_STATE JOIN(isNonZero_, STATE_T) #define ISNONZERO_STATE JOIN(isNonZero_, STATE_T)
#define ISZERO_STATE JOIN(isZero_, STATE_T) #define ISZERO_STATE JOIN(isZero_, STATE_T)
@ -104,8 +105,8 @@
// continue, 1 if an accept was fired and the user instructed us to halt. // continue, 1 if an accept was fired and the user instructed us to halt.
static really_inline static really_inline
char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
const ReportID *exReports, const u32 *exceptionMap, const ReportID *exReports, STATE_T s,
STATE_T s, const STATE_T emask, size_t i, u64a offset, const STATE_T emask, size_t i, u64a offset,
STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx, STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx,
const char flags, const char in_rev, const char flags, const char in_rev,
const char first_match) { const char first_match) {
@ -132,8 +133,8 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags; char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags;
int rv = JOIN(processExceptional, SIZE)( int rv = JOIN(processExceptional, SIZE)(
pass_state, pass_estate, diffmask, succ, limex, exceptionMap, pass_state, pass_estate, diffmask, succ, limex, exceptions, exReports,
exceptions, exReports, callback_offset, ctx, in_rev, localflags); callback_offset, ctx, in_rev, localflags);
if (rv == PE_RV_HALT) { if (rv == PE_RV_HALT) {
return 1; // Halt matching. return 1; // Halt matching.
} }
@ -175,7 +176,6 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
(const union AccelAux *)((const char *)limex + limex->accelAuxOffset); (const union AccelAux *)((const char *)limex + limex->accelAuxOffset);
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
const ReportID *exReports = getExReports(limex); const ReportID *exReports = getExReports(limex);
const u32 *exceptionMap = limex->exceptionMap;
STATE_T s = LOAD_STATE(&ctx->s); STATE_T s = LOAD_STATE(&ctx->s);
/* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(exceptions)); */
@ -201,11 +201,11 @@ without_accel:
u8 c = input[i]; u8 c = input[i];
STATE_T succ; STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT); NFA_EXEC_GET_LIM_SUCC(STATE_T);
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s, if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK,
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, i, offset, &succ, final_loc, ctx, flags, 0,
flags, 0, first_match)) { first_match)) {
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
} }
@ -252,11 +252,11 @@ with_accel:
u8 c = input[i]; u8 c = input[i];
STATE_T succ; STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT); NFA_EXEC_GET_LIM_SUCC(STATE_T);
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s, if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK,
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, i, offset, &succ, final_loc, ctx, flags, 0,
flags, 0, first_match)) { first_match)) {
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
} }
@ -300,7 +300,6 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
#endif #endif
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
const ReportID *exReports = getExReports(limex); const ReportID *exReports = getExReports(limex);
const u32 *exceptionMap = limex->exceptionMap;
STATE_T s = LOAD_STATE(&ctx->s); STATE_T s = LOAD_STATE(&ctx->s);
/* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(exceptions)); */
@ -318,9 +317,9 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
u8 c = input[i-1]; u8 c = input[i-1];
STATE_T succ; STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT); NFA_EXEC_GET_LIM_SUCC(STATE_T);
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s, if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s,
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx,
flags, 1, 0)) { flags, 1, 0)) {
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
@ -349,36 +348,57 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
} }
static really_inline static really_inline
void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src,
u64a offset) { u64a offset) {
if (!limex->repeatCount) { if (!limex->repeatCount) {
return; return;
} }
// Note: we compress all repeats, as they may have *just* had their STATE_T s = LOAD_STATE(src);
// cyclic states switched off a moment ago. TODO: is this required
if (ISZERO_STATE(AND_STATE(s, LOAD_STATE(&limex->repeatCyclicMask)))) {
DEBUG_PRINTF("no cyclics are on\n");
return;
}
const union RepeatControl *ctrl = const union RepeatControl *ctrl =
getRepeatControlBaseConst((const char *)src, sizeof(STATE_T)); getRepeatControlBaseConst((const char *)src, sizeof(STATE_T));
char *state_base = (char *)dest + limex->stateSize; char *state_base = (char *)dest + limex->stateSize;
for (u32 i = 0; i < limex->repeatCount; i++) { for (u32 i = 0; i < limex->repeatCount; i++) {
DEBUG_PRINTF("repeat %u\n", i);
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
if (!TESTBIT_STATE(&s, info->cyclicState)) {
DEBUG_PRINTF("is dead\n");
continue;
}
const struct RepeatInfo *repeat = getRepeatInfo(info); const struct RepeatInfo *repeat = getRepeatInfo(info);
if (repeatHasMatch(repeat, &ctrl[i], state_base + info->stateOffset,
offset) == REPEAT_STALE) {
DEBUG_PRINTF("is stale, clearing state\n");
CLEARBIT_STATE(&s, info->cyclicState);
continue;
}
DEBUG_PRINTF("packing state (packedCtrlOffset=%u)\n",
info->packedCtrlOffset);
repeatPack(state_base + info->packedCtrlOffset, repeat, &ctrl[i], repeatPack(state_base + info->packedCtrlOffset, repeat, &ctrl[i],
offset); offset);
} }
STORE_STATE(src, s);
} }
char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n, char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n,
const struct mq *q, const struct mq *q, s64a loc) {
s64a loc) {
void *dest = q->streamState; void *dest = q->streamState;
const void *src = q->state; void *src = q->state;
u8 key = queue_prev_byte(q, loc); u8 key = queue_prev_byte(q, loc);
const IMPL_NFA_T *limex = getImplNfa(n); const IMPL_NFA_T *limex = getImplNfa(n);
COMPRESS_FN(limex, dest, src, key);
COMPRESS_REPEATS_FN(limex, dest, src, q->offset + loc); COMPRESS_REPEATS_FN(limex, dest, src, q->offset + loc);
COMPRESS_FN(limex, dest, src, key);
return 0; return 0;
} }
@ -389,15 +409,29 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
return; return;
} }
// Note: we expand all repeats, as they may have *just* had their // Note: state has already been expanded into 'dest'.
// cyclic states switched off a moment ago. TODO: is this required? const STATE_T cyclics =
AND_STATE(LOAD_STATE(dest), LOAD_STATE(&limex->repeatCyclicMask));
if (ISZERO_STATE(cyclics)) {
DEBUG_PRINTF("no cyclics are on\n");
return;
}
union RepeatControl *ctrl = union RepeatControl *ctrl =
getRepeatControlBase((char *)dest, sizeof(STATE_T)); getRepeatControlBase((char *)dest, sizeof(STATE_T));
const char *state_base = (const char *)src + limex->stateSize; const char *state_base = (const char *)src + limex->stateSize;
for (u32 i = 0; i < limex->repeatCount; i++) { for (u32 i = 0; i < limex->repeatCount; i++) {
DEBUG_PRINTF("repeat %u\n", i);
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
if (!TESTBIT_STATE(&cyclics, info->cyclicState)) {
DEBUG_PRINTF("is dead\n");
continue;
}
DEBUG_PRINTF("unpacking state (packedCtrlOffset=%u)\n",
info->packedCtrlOffset);
const struct RepeatInfo *repeat = getRepeatInfo(info); const struct RepeatInfo *repeat = getRepeatInfo(info);
repeatUnpack(state_base + info->packedCtrlOffset, repeat, offset, repeatUnpack(state_base + info->packedCtrlOffset, repeat, offset,
&ctrl[i]); &ctrl[i]);
@ -650,7 +684,27 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
ep = MIN(ep, end_abs); ep = MIN(ep, end_abs);
assert(ep >= sp); assert(ep >= sp);
assert(sp >= offset); // We no longer do history buffer scans here. if (sp < offset) {
DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
assert(offset - sp <= q->hlength);
u64a local_ep = MIN(offset, ep);
u64a final_look = 0;
/* we are starting inside the history buffer */
if (STREAMFIRST_FN(limex, q->history + q->hlength + sp - offset,
local_ep - sp, &ctx, sp,
&final_look) == MO_HALT_MATCHING) {
DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu "
"offset:%llu\n", final_look, sp, end_abs, offset);
assert(q->cur);
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = sp + final_look - offset;
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
return MO_MATCHES_PENDING;
}
sp = local_ep;
}
if (sp >= ep) { if (sp >= ep) {
goto scan_done; goto scan_done;
@ -789,10 +843,8 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
} }
char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
const char *streamState, u64a offset, const char *streamState, u64a offset,
NfaCallback callback, NfaCallback callback, void *context) {
UNUSED SomNfaCallback som_callback,
void *context) {
assert(n && state); assert(n && state);
const IMPL_NFA_T *limex = getImplNfa(n); const IMPL_NFA_T *limex = getImplNfa(n);
@ -868,6 +920,21 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa,
offset, report); offset, report);
} }
char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
assert(nfa && q);
assert(q->state && q->streamState);
const IMPL_NFA_T *limex = getImplNfa(nfa);
union RepeatControl *repeat_ctrl =
getRepeatControlBase(q->state, sizeof(STATE_T));
char *repeat_state = q->streamState + limex->stateSize;
STATE_T state = LOAD_STATE(q->state);
u64a offset = q->offset + q_last_loc(q) + 1;
return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state,
offset);
}
enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
const struct NFA *nfa, const struct NFA *nfa,
struct mq *q, struct mq *q,
@ -920,6 +987,7 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
#undef ANDNOT_STATE #undef ANDNOT_STATE
#undef OR_STATE #undef OR_STATE
#undef TESTBIT_STATE #undef TESTBIT_STATE
#undef CLEARBIT_STATE
#undef ZERO_STATE #undef ZERO_STATE
#undef ISNONZERO_STATE #undef ISNONZERO_STATE
#undef ISZERO_STATE #undef ISZERO_STATE
@ -935,5 +1003,4 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
// Parameters. // Parameters.
#undef SIZE #undef SIZE
#undef STATE_T #undef STATE_T
#undef SHIFT
#undef LIMEX_API_ROOT #undef LIMEX_API_ROOT

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -34,20 +34,19 @@
* be faster and actually correct if these assumptions don't hold true. * be faster and actually correct if these assumptions don't hold true.
*/ */
#ifndef SHUFFLE_H #ifndef LIMEX_SHUFFLE_H
#define SHUFFLE_H #define LIMEX_SHUFFLE_H
#include "config.h"
#include "bitutils.h"
#include "simd_utils.h"
#include "ue2common.h" #include "ue2common.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) #if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__))
#define HAVE_PEXT #define HAVE_PEXT
#endif #endif
static really_inline static really_inline
u32 shuffleDynamic32(u32 x, u32 mask) { u32 packedExtract32(u32 x, u32 mask) {
#if defined(HAVE_PEXT) #if defined(HAVE_PEXT)
// Intel BMI2 can do this operation in one instruction. // Intel BMI2 can do this operation in one instruction.
return _pext_u32(x, mask); return _pext_u32(x, mask);
@ -67,7 +66,7 @@ u32 shuffleDynamic32(u32 x, u32 mask) {
} }
static really_inline static really_inline
u32 shuffleDynamic64(u64a x, u64a mask) { u32 packedExtract64(u64a x, u64a mask) {
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) #if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
// Intel BMI2 can do this operation in one instruction. // Intel BMI2 can do this operation in one instruction.
return _pext_u64(x, mask); return _pext_u64(x, mask);
@ -88,4 +87,24 @@ u32 shuffleDynamic64(u64a x, u64a mask) {
#undef HAVE_PEXT #undef HAVE_PEXT
#endif // SHUFFLE_H static really_inline
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
m128 shuffled = pshufb(s, permute);
m128 compared = and128(shuffled, compare);
u16 rv = ~movemask128(eq128(compared, shuffled));
return (u32)rv;
}
#if defined(__AVX2__)
static really_inline
u32 packedExtract256(m256 s, const m256 permute, const m256 compare) {
// vpshufb doesn't cross lanes, so this is a bit of a cheat
m256 shuffled = vpshufb(s, permute);
m256 compared = and256(shuffled, compare);
u32 rv = ~movemask256(eq256(compared, shuffled));
// stitch the lane-wise results back together
return (u32)((rv >> 16) | (rv & 0xffffU));
}
#endif // AVX2
#endif // LIMEX_SHUFFLE_H

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -61,37 +61,6 @@
#define INLINE_ATTR really_inline #define INLINE_ATTR really_inline
#include "limex_common_impl.h" #include "limex_common_impl.h"
#define SIZE 128 #define SIZE 128
#define STATE_T m128 #define STATE_T m128
#define SHIFT 1
#include "limex_runtime_impl.h"
#define SIZE 128
#define STATE_T m128
#define SHIFT 2
#include "limex_runtime_impl.h"
#define SIZE 128
#define STATE_T m128
#define SHIFT 3
#include "limex_runtime_impl.h"
#define SIZE 128
#define STATE_T m128
#define SHIFT 4
#include "limex_runtime_impl.h"
#define SIZE 128
#define STATE_T m128
#define SHIFT 5
#include "limex_runtime_impl.h"
#define SIZE 128
#define STATE_T m128
#define SHIFT 6
#include "limex_runtime_impl.h"
#define SIZE 128
#define STATE_T m128
#define SHIFT 7
#include "limex_runtime_impl.h" #include "limex_runtime_impl.h"

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -58,37 +58,6 @@
#define INLINE_ATTR really_inline #define INLINE_ATTR really_inline
#include "limex_common_impl.h" #include "limex_common_impl.h"
#define SIZE 256 #define SIZE 256
#define STATE_T m256 #define STATE_T m256
#define SHIFT 1
#include "limex_runtime_impl.h"
#define SIZE 256
#define STATE_T m256
#define SHIFT 2
#include "limex_runtime_impl.h"
#define SIZE 256
#define STATE_T m256
#define SHIFT 3
#include "limex_runtime_impl.h"
#define SIZE 256
#define STATE_T m256
#define SHIFT 4
#include "limex_runtime_impl.h"
#define SIZE 256
#define STATE_T m256
#define SHIFT 5
#include "limex_runtime_impl.h"
#define SIZE 256
#define STATE_T m256
#define SHIFT 6
#include "limex_runtime_impl.h"
#define SIZE 256
#define STATE_T m256
#define SHIFT 7
#include "limex_runtime_impl.h" #include "limex_runtime_impl.h"

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -58,37 +58,6 @@
#define INLINE_ATTR really_inline #define INLINE_ATTR really_inline
#include "limex_common_impl.h" #include "limex_common_impl.h"
#define SIZE 384 #define SIZE 384
#define STATE_T m384 #define STATE_T m384
#define SHIFT 1
#include "limex_runtime_impl.h"
#define SIZE 384
#define STATE_T m384
#define SHIFT 2
#include "limex_runtime_impl.h"
#define SIZE 384
#define STATE_T m384
#define SHIFT 3
#include "limex_runtime_impl.h"
#define SIZE 384
#define STATE_T m384
#define SHIFT 4
#include "limex_runtime_impl.h"
#define SIZE 384
#define STATE_T m384
#define SHIFT 5
#include "limex_runtime_impl.h"
#define SIZE 384
#define STATE_T m384
#define SHIFT 6
#include "limex_runtime_impl.h"
#define SIZE 384
#define STATE_T m384
#define SHIFT 7
#include "limex_runtime_impl.h" #include "limex_runtime_impl.h"

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -58,12 +58,6 @@
#define INLINE_ATTR really_inline #define INLINE_ATTR really_inline
#include "limex_common_impl.h" #include "limex_common_impl.h"
#define SIZE 512 #define SIZE 512
#define STATE_T m512 #define STATE_T m512
#define SHIFT 4
#include "limex_runtime_impl.h"
#define SIZE 512
#define STATE_T m512
#define SHIFT 5
#include "limex_runtime_impl.h" #include "limex_runtime_impl.h"

View File

@ -42,13 +42,13 @@
static really_inline static really_inline
char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
u16 s, u64a loc, char eod, u16 * const cached_accept_state, u16 s, u64a loc, char eod, u16 *const cached_accept_state,
u32 * const cached_accept_id) { u32 *const cached_accept_id) {
DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n", DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n",
(u16)(s & STATE_MASK), loc, eod); (u16)(s & STATE_MASK), loc, eod);
if (!eod && s == *cached_accept_state) { if (!eod && s == *cached_accept_state) {
if (cb(loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */ return MO_HALT_MATCHING; /* termination requested */
} }
@ -71,7 +71,7 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
*cached_accept_id = rl->report[0]; *cached_accept_id = rl->report[0];
DEBUG_PRINTF("reporting %u\n", rl->report[0]); DEBUG_PRINTF("reporting %u\n", rl->report[0]);
if (cb(loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */ return MO_HALT_MATCHING; /* termination requested */
} }
@ -80,7 +80,7 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
for (u32 i = 0; i < count; i++) { for (u32 i = 0; i < count; i++) {
DEBUG_PRINTF("reporting %u\n", rl->report[i]); DEBUG_PRINTF("reporting %u\n", rl->report[i]);
if (cb(loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */ return MO_HALT_MATCHING; /* termination requested */
} }
} }
@ -146,7 +146,7 @@ without_accel:
if (single) { if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report); DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */ return MO_HALT_MATCHING; /* termination requested */
} }
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
@ -186,7 +186,7 @@ with_accel:
if (single) { if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report); DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */ return MO_HALT_MATCHING; /* termination requested */
} }
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
@ -328,7 +328,7 @@ without_accel:
u64a loc = (c - 1) - buf + offAdj + 1; u64a loc = (c - 1) - buf + offAdj + 1;
if (single) { if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report); DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
} }
} else if (doComplexReport(cb, ctxt, m, s, loc, 0, } else if (doComplexReport(cb, ctxt, m, s, loc, 0,
@ -360,7 +360,7 @@ with_accel:
u64a loc = (c - 1) - buf + offAdj + 1; u64a loc = (c - 1) - buf + offAdj + 1;
if (single) { if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report); DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
} }
} else if (doComplexReport(cb, ctxt, m, s, loc, 0, } else if (doComplexReport(cb, ctxt, m, s, loc, 0,
@ -475,7 +475,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
int rv; int rv;
if (single) { if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report); DEBUG_PRINTF("reporting %u\n", m->arb_report);
rv = cb(q_cur_offset(q), m->arb_report, context); rv = cb(0, q_cur_offset(q), m->arb_report, context);
} else { } else {
u32 cached_accept_id = 0; u32 cached_accept_id = 0;
u16 cached_accept_state = 0; u16 cached_accept_state = 0;
@ -632,7 +632,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
int rv; int rv;
if (single) { if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report); DEBUG_PRINTF("reporting %u\n", m->arb_report);
rv = cb(q_cur_offset(q), m->arb_report, context); rv = cb(0, q_cur_offset(q), m->arb_report, context);
} else { } else {
u32 cached_accept_id = 0; u32 cached_accept_id = 0;
u16 cached_accept_state = 0; u16 cached_accept_state = 0;
@ -836,7 +836,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
if (s >= m->accept_limit_8) { if (s >= m->accept_limit_8) {
if (single) { if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report); DEBUG_PRINTF("reporting %u\n", m->arb_report);
cb(offset, m->arb_report, ctxt); cb(0, offset, m->arb_report, ctxt);
} else { } else {
u32 cached_accept_id = 0; u32 cached_accept_id = 0;
u16 cached_accept_state = 0; u16 cached_accept_state = 0;
@ -850,7 +850,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
} }
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) { char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); const struct mcclellan *m = getImplNfa(n);
NfaCallback cb = q->cb; NfaCallback cb = q->cb;
void *ctxt = q->context; void *ctxt = q->context;
u16 s = *(u16 *)q->state; u16 s = *(u16 *)q->state;
@ -864,7 +864,7 @@ char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
if (aux->accept) { if (aux->accept) {
if (single) { if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report); DEBUG_PRINTF("reporting %u\n", m->arb_report);
cb(offset, m->arb_report, ctxt); cb(0, offset, m->arb_report, ctxt);
} else { } else {
u32 cached_accept_id = 0; u32 cached_accept_id = 0;
u16 cached_accept_state = 0; u16 cached_accept_state = 0;
@ -905,7 +905,7 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
struct mq *q) { struct mq *q) {
assert(n && q); assert(n && q);
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); const struct mcclellan *m = getImplNfa(n);
u8 s = *(u8 *)q->state; u8 s = *(u8 *)q->state;
DEBUG_PRINTF("checking accepts for %hhu\n", s); DEBUG_PRINTF("checking accepts for %hhu\n", s);
if (s < m->accept_limit_8) { if (s < m->accept_limit_8) {
@ -915,25 +915,45 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
return mcclellanHasAccept(m, get_aux(m, s), report); return mcclellanHasAccept(m, get_aux(m, s), report);
} }
char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) {
assert(n && q);
const struct mcclellan *m = getImplNfa(n);
u8 s = *(u8 *)q->state;
DEBUG_PRINTF("checking accepts for %hhu\n", s);
assert(s < m->accept_limit_8 || get_aux(m, s)->accept);
return s >= m->accept_limit_8;
}
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
struct mq *q) { struct mq *q) {
assert(n && q); assert(n && q);
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); const struct mcclellan *m = getImplNfa(n);
u16 s = *(u16 *)q->state; u16 s = *(u16 *)q->state;
DEBUG_PRINTF("checking accepts for %hu\n", s); DEBUG_PRINTF("checking accepts for %hu\n", s);
return mcclellanHasAccept(m, get_aux(m, s), report); return mcclellanHasAccept(m, get_aux(m, s), report);
} }
char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) {
assert(n && q);
const struct mcclellan *m = getImplNfa(n);
u16 s = *(u16 *)q->state;
DEBUG_PRINTF("checking accepts for %hu\n", s);
return !!get_aux(m, s)->accept;
}
char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) { char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) {
u64a offset = q->offset; u64a offset = q->offset;
const u8 *buffer = q->buffer; const u8 *buffer = q->buffer;
NfaCallback cb = q->cb; NfaCallback cb = q->cb;
void *context = q->context; void *context = q->context;
assert(n->type == MCCLELLAN_NFA_8); assert(n->type == MCCLELLAN_NFA_8);
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); const struct mcclellan *m = getImplNfa(n);
const u8 *hend = q->history + q->hlength; const u8 *hend = q->history + q->hlength;
return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
@ -947,7 +967,7 @@ char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end) {
NfaCallback cb = q->cb; NfaCallback cb = q->cb;
void *context = q->context; void *context = q->context;
assert(n->type == MCCLELLAN_NFA_16); assert(n->type == MCCLELLAN_NFA_16);
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); const struct mcclellan *m = getImplNfa(n);
const u8 *hend = q->history + q->hlength; const u8 *hend = q->history + q->hlength;
return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
@ -961,7 +981,7 @@ char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report) {
NfaCallback cb = q->cb; NfaCallback cb = q->cb;
void *context = q->context; void *context = q->context;
assert(n->type == MCCLELLAN_NFA_8); assert(n->type == MCCLELLAN_NFA_8);
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); const struct mcclellan *m = getImplNfa(n);
const u8 *hend = q->history + q->hlength; const u8 *hend = q->history + q->hlength;
char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
@ -980,7 +1000,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
NfaCallback cb = q->cb; NfaCallback cb = q->cb;
void *context = q->context; void *context = q->context;
assert(n->type == MCCLELLAN_NFA_16); assert(n->type == MCCLELLAN_NFA_16);
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); const struct mcclellan *m = getImplNfa(n);
const u8 *hend = q->history + q->hlength; const u8 *hend = q->history + q->hlength;
char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
@ -996,7 +1016,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset, char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
void *state, UNUSED u8 key) { void *state, UNUSED u8 key) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); const struct mcclellan *m = getImplNfa(nfa);
u8 s = offset ? m->start_floating : m->start_anchored; u8 s = offset ? m->start_floating : m->start_anchored;
if (s) { if (s) {
*(u8 *)state = s; *(u8 *)state = s;
@ -1007,7 +1027,7 @@ char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
void *state, UNUSED u8 key) { void *state, UNUSED u8 key) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); const struct mcclellan *m = getImplNfa(nfa);
u16 s = offset ? m->start_floating : m->start_anchored; u16 s = offset ? m->start_floating : m->start_anchored;
if (s) { if (s) {
unaligned_store_u16(state, s); unaligned_store_u16(state, s);
@ -1019,7 +1039,7 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state, void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
const u8 *buf, char top, size_t start_off, const u8 *buf, char top, size_t start_off,
size_t len, NfaCallback cb, void *ctxt) { size_t len, NfaCallback cb, void *ctxt) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); const struct mcclellan *m = getImplNfa(nfa);
u8 s = top ? m->start_anchored : *(u8 *)state; u8 s = top ? m->start_anchored : *(u8 *)state;
@ -1037,7 +1057,7 @@ void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
const u8 *buf, char top, size_t start_off, const u8 *buf, char top, size_t start_off,
size_t len, NfaCallback cb, void *ctxt) { size_t len, NfaCallback cb, void *ctxt) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); const struct mcclellan *m = getImplNfa(nfa);
u16 s = top ? m->start_anchored : unaligned_load_u16(state); u16 s = top ? m->start_anchored : unaligned_load_u16(state);
@ -1053,17 +1073,15 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
} }
char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state, char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState, UNUSED const char *streamState, u64a offset,
u64a offset, NfaCallback callback, NfaCallback callback, void *context) {
UNUSED SomNfaCallback som_cb, void *context) {
return mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback, return mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback,
context); context);
} }
char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState, UNUSED const char *streamState, u64a offset,
u64a offset, NfaCallback callback, NfaCallback callback, void *context) {
UNUSED SomNfaCallback som_cb, void *context) {
assert(ISALIGNED_N(state, 2)); assert(ISALIGNED_N(state, 2));
return mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback, return mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback,
context); context);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -39,14 +39,14 @@ struct NFA;
char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state, char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset, const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb, NfaCallback callback, void *context);
void *context);
char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
struct mq *q); struct mq *q);
char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q); char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset, char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key); void *state, u8 key);
@ -62,14 +62,14 @@ char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest,
char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset, const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb, NfaCallback callback, void *context);
void *context);
char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
struct mq *q); struct mq *q);
char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q); char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset, char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key); void *state, u8 key);

View File

@ -32,7 +32,6 @@
#include "accelcompile.h" #include "accelcompile.h"
#include "grey.h" #include "grey.h"
#include "mcclellan_internal.h" #include "mcclellan_internal.h"
#include "mcclellancompile_accel.h"
#include "mcclellancompile_util.h" #include "mcclellancompile_util.h"
#include "nfa_internal.h" #include "nfa_internal.h"
#include "shufticompile.h" #include "shufticompile.h"
@ -65,6 +64,17 @@
using namespace std; using namespace std;
using boost::adaptors::map_keys; using boost::adaptors::map_keys;
#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
/** Maximum tolerated number of escape character from an accel state.
* This is larger than nfa, as we don't have a budget and the nfa cheats on stop
* characters for sets of states */
#define ACCEL_DFA_MAX_STOP_CHAR 160
/** Maximum tolerated number of escape character from a sds accel state. Larger
* than normal states as accelerating sds is important. Matches NFA value */
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
namespace ue2 { namespace ue2 {
namespace /* anon */ { namespace /* anon */ {
@ -75,7 +85,7 @@ struct dstate_extra {
}; };
struct dfa_info { struct dfa_info {
dfa_build_strat &strat; accel_dfa_build_strat &strat;
raw_dfa &raw; raw_dfa &raw;
vector<dstate> &states; vector<dstate> &states;
vector<dstate_extra> extra; vector<dstate_extra> extra;
@ -85,7 +95,7 @@ struct dfa_info {
u8 getAlphaShift() const; u8 getAlphaShift() const;
explicit dfa_info(dfa_build_strat &s) explicit dfa_info(accel_dfa_build_strat &s)
: strat(s), : strat(s),
raw(s.get_raw()), raw(s.get_raw()),
states(raw.states), states(raw.states),
@ -128,13 +138,6 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) {
return aux; return aux;
} }
static
bool double_byte_ok(const AccelScheme &info) {
return !info.double_byte.empty()
&& info.double_cr.count() < info.double_byte.size()
&& info.double_cr.count() <= 2 && !info.double_byte.empty();
}
static static
void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
assert((size_t)succ_table % 2 == 0); assert((size_t)succ_table % 2 == 0);
@ -190,120 +193,12 @@ u32 mcclellan_build_strat::max_allowed_offset_accel() const {
return ACCEL_DFA_MAX_OFFSET_DEPTH; return ACCEL_DFA_MAX_OFFSET_DEPTH;
} }
AccelScheme mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx) u32 mcclellan_build_strat::max_stop_char() const {
const { return ACCEL_DFA_MAX_STOP_CHAR;
return find_mcclellan_escape_info(rdfa, this_idx,
max_allowed_offset_accel());
} }
/** builds acceleration schemes for states */ u32 mcclellan_build_strat::max_floating_stop_char() const {
void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, return ACCEL_DFA_MAX_FLOATING_STOP_CHAR;
const AccelScheme &info,
void *accel_out) {
AccelAux *accel = (AccelAux *)accel_out;
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
info.double_offset);
accel->generic.offset = verify_u8(info.offset);
if (double_byte_ok(info) && info.double_cr.none()
&& info.double_byte.size() == 1) {
accel->accel_type = ACCEL_DVERM;
accel->dverm.c1 = info.double_byte.begin()->first;
accel->dverm.c2 = info.double_byte.begin()->second;
accel->dverm.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
return;
}
if (double_byte_ok(info) && info.double_cr.none()
&& (info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
bool ok = true;
assert(!info.double_byte.empty());
u8 firstC = info.double_byte.begin()->first & CASE_CLEAR;
u8 secondC = info.double_byte.begin()->second & CASE_CLEAR;
for (const pair<u8, u8> &p : info.double_byte) {
if ((p.first & CASE_CLEAR) != firstC
|| (p.second & CASE_CLEAR) != secondC) {
ok = false;
break;
}
}
if (ok) {
accel->accel_type = ACCEL_DVERM_NOCASE;
accel->dverm.c1 = firstC;
accel->dverm.c2 = secondC;
accel->dverm.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
return;
}
u8 m1;
u8 m2;
if (buildDvermMask(info.double_byte, &m1, &m2)) {
accel->accel_type = ACCEL_DVERM_MASKED;
accel->dverm.offset = verify_u8(info.double_offset);
accel->dverm.c1 = info.double_byte.begin()->first & m1;
accel->dverm.c2 = info.double_byte.begin()->second & m2;
accel->dverm.m1 = m1;
accel->dverm.m2 = m2;
DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
accel->dverm.c1, accel->dverm.c2);
return;
}
}
if (double_byte_ok(info)
&& shuftiBuildDoubleMasks(info.double_cr, info.double_byte,
&accel->dshufti.lo1, &accel->dshufti.hi1,
&accel->dshufti.lo2, &accel->dshufti.hi2)) {
accel->accel_type = ACCEL_DSHUFTI;
accel->dshufti.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
return;
}
if (info.cr.none()) {
accel->accel_type = ACCEL_RED_TAPE;
DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
" from which there is no escape\n", this_idx);
return;
}
if (info.cr.count() == 1) {
accel->accel_type = ACCEL_VERM;
accel->verm.c = info.cr.find_first();
DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
return;
}
if (info.cr.count() == 2 && info.cr.isCaselessChar()) {
accel->accel_type = ACCEL_VERM_NOCASE;
accel->verm.c = info.cr.find_first() & CASE_CLEAR;
DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
return;
}
if (info.cr.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) {
accel->accel_type = ACCEL_NONE;
DEBUG_PRINTF("state %hu is too broad\n", this_idx);
return;
}
accel->accel_type = ACCEL_SHUFTI;
if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo,
&accel->shufti.hi)) {
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
return;
}
assert(!info.cr.none());
accel->accel_type = ACCEL_TRUFFLE;
truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2);
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
} }
static static
@ -343,15 +238,6 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
} }
} }
raw_dfa::~raw_dfa() {
}
raw_report_info::raw_report_info() {
}
raw_report_info::~raw_report_info() {
}
namespace { namespace {
struct raw_report_list { struct raw_report_list {
@ -592,7 +478,7 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
map<dstate_id_t, AccelScheme> accel_escape_info map<dstate_id_t, AccelScheme> accel_escape_info
= populateAccelerationInfo(info.raw, info.strat, cc.grey); = info.strat.getAccelInfo(cc.grey);
size_t tran_size = (1 << info.getAlphaShift()) size_t tran_size = (1 << info.getAlphaShift())
* sizeof(u16) * count_real_states; * sizeof(u16) * count_real_states;
@ -811,7 +697,7 @@ aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
map<dstate_id_t, AccelScheme> accel_escape_info map<dstate_id_t, AccelScheme> accel_escape_info
= populateAccelerationInfo(info.raw, info.strat, cc.grey); = info.strat.getAccelInfo(cc.grey);
size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size(); size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size();
size_t aux_size = sizeof(mstate_aux) * info.size(); size_t aux_size = sizeof(mstate_aux) * info.size();
@ -1053,7 +939,7 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
return false; return false;
} }
aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat, aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
const CompileContext &cc, const CompileContext &cc,
set<dstate_id_t> *accel_states) { set<dstate_id_t> *accel_states) {
u16 total_daddy = 0; u16 total_daddy = 0;
@ -1123,12 +1009,9 @@ u32 mcclellanStartReachSize(const raw_dfa *raw) {
return out.count(); return out.count();
} }
bool has_accel_dfa(const NFA *nfa) { bool has_accel_mcclellan(const NFA *nfa) {
const mcclellan *m = (const mcclellan *)getImplNfa(nfa); const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
return m->has_accel; return m->has_accel;
} }
dfa_build_strat::~dfa_build_strat() {
}
} // namespace ue2 } // namespace ue2

View File

@ -29,6 +29,7 @@
#ifndef MCCLELLANCOMPILE_H #ifndef MCCLELLANCOMPILE_H
#define MCCLELLANCOMPILE_H #define MCCLELLANCOMPILE_H
#include "accel_dfa_build_strat.h"
#include "rdfa.h" #include "rdfa.h"
#include "ue2common.h" #include "ue2common.h"
#include "util/accel_scheme.h" #include "util/accel_scheme.h"
@ -47,48 +48,20 @@ namespace ue2 {
class ReportManager; class ReportManager;
struct CompileContext; struct CompileContext;
struct raw_report_info { class mcclellan_build_strat : public accel_dfa_build_strat {
raw_report_info();
virtual ~raw_report_info();
virtual u32 getReportListSize() const = 0; /* in bytes */
virtual size_t size() const = 0; /* number of lists */
virtual void fillReportLists(NFA *n, size_t base_offset,
std::vector<u32> &ro /* out */) const = 0;
};
class dfa_build_strat {
public:
explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {}
virtual ~dfa_build_strat();
virtual raw_dfa &get_raw() const = 0;
virtual std::unique_ptr<raw_report_info> gatherReports(
std::vector<u32> &reports /* out */,
std::vector<u32> &reports_eod /* out */,
u8 *isSingleReport /* out */,
ReportID *arbReport /* out */) const = 0;
virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const = 0;
virtual size_t accelSize(void) const = 0;
virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
void *accel_out) = 0;
protected:
const ReportManager &rm;
};
class mcclellan_build_strat : public dfa_build_strat {
public: public:
mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in) mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in)
: dfa_build_strat(rm_in), rdfa(rdfa_in) {} : accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {}
raw_dfa &get_raw() const override { return rdfa; } raw_dfa &get_raw() const override { return rdfa; }
std::unique_ptr<raw_report_info> gatherReports( std::unique_ptr<raw_report_info> gatherReports(
std::vector<u32> &reports /* out */, std::vector<u32> &reports /* out */,
std::vector<u32> &reports_eod /* out */, std::vector<u32> &reports_eod /* out */,
u8 *isSingleReport /* out */, u8 *isSingleReport /* out */,
ReportID *arbReport /* out */) const override; ReportID *arbReport /* out */) const override;
AccelScheme find_escape_strings(dstate_id_t this_idx) const override;
size_t accelSize(void) const override; size_t accelSize(void) const override;
void buildAccel(dstate_id_t this_idx,const AccelScheme &info, u32 max_allowed_offset_accel() const override;
void *accel_out) override; u32 max_stop_char() const override;
virtual u32 max_allowed_offset_accel() const; u32 max_floating_stop_char() const override;
private: private:
raw_dfa &rdfa; raw_dfa &rdfa;
@ -103,7 +76,7 @@ mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
/* used internally by mcclellan/haig/gough compile process */ /* used internally by mcclellan/haig/gough compile process */
ue2::aligned_unique_ptr<NFA> ue2::aligned_unique_ptr<NFA>
mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat, mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
const CompileContext &cc, const CompileContext &cc,
std::set<dstate_id_t> *accel_states = nullptr); std::set<dstate_id_t> *accel_states = nullptr);
@ -114,7 +87,7 @@ u32 mcclellanStartReachSize(const raw_dfa *raw);
std::set<ReportID> all_reports(const raw_dfa &rdfa); std::set<ReportID> all_reports(const raw_dfa &rdfa);
bool has_accel_dfa(const NFA *nfa); bool has_accel_mcclellan(const NFA *nfa);
} // namespace ue2 } // namespace ue2

View File

@ -337,62 +337,35 @@ size_t hash_dfa(const raw_dfa &rdfa) {
} }
static static
bool has_self_loop(dstate_id_t s, const raw_dfa &raw) { bool can_die_early(const raw_dfa &raw, dstate_id_t s,
u16 top_remap = raw.alpha_remap[TOP]; map<dstate_id_t, u32> &visited, u32 age_limit) {
for (u32 i = 0; i < raw.states[s].next.size(); i++) { if (contains(visited, s) && visited[s] >= age_limit) {
if (i != top_remap && raw.states[s].next[i] == s) { /* we have already visited (or are in the process of visiting) here with
* a looser limit. */
return false;
}
visited[s] = age_limit;
if (s == DEAD_STATE) {
return true;
}
if (age_limit == 0) {
return false;
}
for (const auto &next : raw.states[s].next) {
if (can_die_early(raw, next, visited, age_limit - 1)) {
return true; return true;
} }
} }
return false; return false;
} }
dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { bool can_die_early(const raw_dfa &raw, u32 age_limit) {
if (raw.start_floating != DEAD_STATE) { map<dstate_id_t, u32> visited;
DEBUG_PRINTF("has floating start\n"); return can_die_early(raw, raw.start_anchored, visited, age_limit);
return raw.start_floating;
}
DEBUG_PRINTF("looking for SDS proxy\n");
dstate_id_t s = raw.start_anchored;
if (has_self_loop(s, raw)) {
return s;
}
u16 top_remap = raw.alpha_remap[TOP];
ue2::unordered_set<dstate_id_t> seen;
while (true) {
seen.insert(s);
DEBUG_PRINTF("basis %hu\n", s);
/* check if we are connected to a state with a self loop */
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
dstate_id_t t = raw.states[s].next[i];
if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
return t;
}
}
/* find a neighbour to use as a basis for looking for the sds proxy */
dstate_id_t t = DEAD_STATE;
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
dstate_id_t tt = raw.states[s].next[i];
if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
t = tt;
break;
}
}
if (t == DEAD_STATE) {
/* we were unable to find a state to use as a SDS proxy */
return DEAD_STATE;
}
s = t;
}
} }
} // namespace ue2 } // namespace ue2

View File

@ -55,7 +55,7 @@ size_t hash_dfa_no_reports(const raw_dfa &rdfa);
/** \brief Compute a simple hash of this raw_dfa, including its reports. */ /** \brief Compute a simple hash of this raw_dfa, including its reports. */
size_t hash_dfa(const raw_dfa &rdfa); size_t hash_dfa(const raw_dfa &rdfa);
dstate_id_t get_sds_or_proxy(const raw_dfa &raw); bool can_die_early(const raw_dfa &raw, u32 age_limit);
} // namespace ue2 } // namespace ue2

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -267,7 +267,8 @@ void dumpDotPreambleDfa(FILE *f) {
fprintf(f, "0 [style=invis];\n"); fprintf(f, "0 [style=invis];\n");
} }
void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) { void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f,
UNUSED const string &base) {
assert(nfa->type == MCCLELLAN_NFA_16); assert(nfa->type == MCCLELLAN_NFA_16);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa); const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
@ -286,7 +287,8 @@ void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) {
fprintf(f, "}\n"); fprintf(f, "}\n");
} }
void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f) { void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f,
UNUSED const string &base) {
assert(nfa->type == MCCLELLAN_NFA_8); assert(nfa->type == MCCLELLAN_NFA_8);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa); const mcclellan *m = (const mcclellan *)getImplNfa(nfa);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -34,6 +34,7 @@
#include "rdfa.h" #include "rdfa.h"
#include <cstdio> #include <cstdio>
#include <string>
struct mcclellan; struct mcclellan;
struct mstate_aux; struct mstate_aux;
@ -42,8 +43,10 @@ union AccelAux;
namespace ue2 { namespace ue2 {
void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file); void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file,
void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file); const std::string &base);
void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecMcClellan8_dumpText(const struct NFA *nfa, FILE *file); void nfaExecMcClellan8_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecMcClellan16_dumpText(const struct NFA *nfa, FILE *file); void nfaExecMcClellan16_dumpText(const struct NFA *nfa, FILE *file);

View File

@ -131,7 +131,8 @@ char processReports(const struct mpv *m, u8 *reporters,
rl_count++; rl_count++;
} }
if (cb(report_offset, curr->report, ctxt) == MO_HALT_MATCHING) { if (cb(0, report_offset, curr->report, ctxt) ==
MO_HALT_MATCHING) {
DEBUG_PRINTF("bailing\n"); DEBUG_PRINTF("bailing\n");
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
} }
@ -180,7 +181,7 @@ char processReportsForRange(const struct mpv *m, u8 *reporters,
for (size_t i = 2; i <= length; i++) { for (size_t i = 2; i <= length; i++) {
for (u32 j = 0; j < rl_count; j++) { for (u32 j = 0; j < rl_count; j++) {
if (cb(first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) { if (cb(0, first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) {
DEBUG_PRINTF("bailing\n"); DEBUG_PRINTF("bailing\n");
return MO_HALT_MATCHING; return MO_HALT_MATCHING;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -36,7 +36,6 @@ struct NFA;
char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMpv0_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q); char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset, char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key); void *state, u8 key);
@ -47,6 +46,7 @@ char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src,
#define nfaExecMpv0_testEOD NFA_API_NO_IMPL #define nfaExecMpv0_testEOD NFA_API_NO_IMPL
#define nfaExecMpv0_inAccept NFA_API_NO_IMPL #define nfaExecMpv0_inAccept NFA_API_NO_IMPL
#define nfaExecMpv0_inAnyAccept NFA_API_NO_IMPL
#define nfaExecMpv0_QR NFA_API_NO_IMPL #define nfaExecMpv0_QR NFA_API_NO_IMPL
#define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */ #define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
#define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL #define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL

View File

@ -48,7 +48,8 @@
namespace ue2 { namespace ue2 {
void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file) { void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file,
UNUSED const std::string &base) {
} }
static really_inline static really_inline

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -32,12 +32,14 @@
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
#include <cstdio> #include <cstdio>
#include <string>
struct NFA; struct NFA;
namespace ue2 { namespace ue2 {
void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file); void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecMpv0_dumpText(const struct NFA *nfa, FILE *file); void nfaExecMpv0_dumpText(const struct NFA *nfa, FILE *file);
} // namespace ue2 } // namespace ue2

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -347,9 +347,9 @@ void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) {
} }
} }
MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, u32 off, MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr,
unsigned max_len) : u32 off, unsigned max_length)
cr(ref_cr), offset(off), max_len(max_len) { : cr(ref_cr), offset(off), max_len(max_length) {
int accel_num = (int) MultibyteAccelInfo::MAT_MAX; int accel_num = (int) MultibyteAccelInfo::MAT_MAX;
accels.resize(accel_num); accels.resize(accel_num);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -31,7 +31,6 @@
#include "ue2common.h" #include "ue2common.h"
#include "util/bitutils.h" #include "util/bitutils.h"
#include "util/simd_utils.h" #include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
static really_inline static really_inline
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -31,7 +31,6 @@
#include "ue2common.h" #include "ue2common.h"
#include "util/bitutils.h" #include "util/bitutils.h"
#include "util/simd_utils.h" #include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
/* Normal SSSE3 shufti */ /* Normal SSSE3 shufti */

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -32,7 +32,6 @@
#include "multitruffle.h" #include "multitruffle.h"
#include "util/bitutils.h" #include "util/bitutils.h"
#include "util/simd_utils.h" #include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
#include "multiaccel_common.h" #include "multiaccel_common.h"

View File

@ -120,6 +120,16 @@ char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state,
*/ */
char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end); char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
/**
* Main execution function that doesn't perform the checks and optimisations of
* nfaQueueExec() and just dispatches directly to the nfa implementations. It is
* intended to be used by the Tamarama engine.
*/
char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end);
/** Return value indicating that the engine is dead. */
#define MO_DEAD 0
/** Return value indicating that the engine is alive. */ /** Return value indicating that the engine is alive. */
#define MO_ALIVE 1 #define MO_ALIVE 1
@ -155,6 +165,13 @@ char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
*/ */
char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end); char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end);
/**
* Main execution function that doesn't perform the checks and optimisations of
* nfaQueueExecToMatch() and just dispatches directly to the nfa
* implementations. It is intended to be used by the Tamarama engine.
*/
char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end);
/** /**
* Report matches at the current queue location. * Report matches at the current queue location.
* *
@ -175,10 +192,16 @@ char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q);
*/ */
char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q); char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
/**
* Returns non-zero if the NFA is in any accept state regardless of report
* ID.
*/
char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q);
/** /**
* Process the queued commands on the given NFA up to end or the first match. * Process the queued commands on the given NFA up to end or the first match.
* *
* Note: This version is meant for rose prefix NFAs: * Note: This version is meant for rose prefix/infix NFAs:
* - never uses a callback * - never uses a callback
* - loading of state at a point in history is not special cased * - loading of state at a point in history is not special cased
* *
@ -187,9 +210,9 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
* end with some variant of end. The location field of the events must * end with some variant of end. The location field of the events must
* be monotonically increasing. If not all the data was processed during * be monotonically increasing. If not all the data was processed during
* the call, the queue is updated to reflect the remaining work. * the call, the queue is updated to reflect the remaining work.
* @param report we are interested in, if set at the end of the scan returns * @param report we are interested in. If the given report will be raised at
* @ref MO_MATCHES_PENDING. If no report is desired, MO_INVALID_IDX should * the end location, the function returns @ref MO_MATCHES_PENDING. If no
* be passed in. * match information is desired, MO_INVALID_IDX should be passed in.
* @return @ref MO_ALIVE if the nfa is still active with no matches pending, * @return @ref MO_ALIVE if the nfa is still active with no matches pending,
* and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not
* alive * alive
@ -205,6 +228,9 @@ char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID report);
* Runs an NFA in reverse from (buf + buflen) to buf and then from (hbuf + hlen) * Runs an NFA in reverse from (buf + buflen) to buf and then from (hbuf + hlen)
* to hbuf (main buffer and history buffer). * to hbuf (main buffer and history buffer).
* *
* Note: provides the match location as the "end" offset when the callback is
* called.
*
* @param nfa engine to run * @param nfa engine to run
* @param offset base offset of buf * @param offset base offset of buf
* @param buf main buffer * @param buf main buffer
@ -229,7 +255,6 @@ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf,
* (including br region) * (including br region)
* @param offset the offset to return (via the callback) with each match * @param offset the offset to return (via the callback) with each match
* @param callback the callback to call for each match raised * @param callback the callback to call for each match raised
* @param som_cb the callback to call for each match raised (Haig)
* @param context context pointer passed to each callback * @param context context pointer passed to each callback
* *
* @return @ref MO_HALT_MATCHING if the user instructed us to halt, otherwise * @return @ref MO_HALT_MATCHING if the user instructed us to halt, otherwise
@ -237,8 +262,7 @@ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf,
*/ */
char nfaCheckFinalState(const struct NFA *nfa, const char *state, char nfaCheckFinalState(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset, const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb, NfaCallback callback, void *context);
void *context);
/** /**
* Indicates if an engine is a zombie. * Indicates if an engine is a zombie.

View File

@ -42,6 +42,8 @@
#include "limex.h" #include "limex.h"
#include "mcclellan.h" #include "mcclellan.h"
#include "mpv.h" #include "mpv.h"
#include "sheng.h"
#include "tamarama.h"
#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \ #define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \
case dc_ltype##_NFA_##dc_subtype: \ case dc_ltype##_NFA_##dc_subtype: \
@ -52,41 +54,11 @@
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \ #define DISPATCH_BY_NFA_TYPE(dbnt_func) \
switch (nfa->type) { \ switch (nfa->type) { \
DISPATCH_CASE(LIMEX, LimEx, 32_1, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_2, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_3, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_4, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_5, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_7, dbnt_func); \
DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \ DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \
DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \ DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \
DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \ DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \
@ -98,21 +70,22 @@
DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \ DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \
DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \ DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \
DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \ DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \
DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \
DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \
default: \ default: \
assert(0); \ assert(0); \
} }
char nfaCheckFinalState(const struct NFA *nfa, const char *state, char nfaCheckFinalState(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset, const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb, NfaCallback callback, void *context) {
void *context) {
assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
// Caller should avoid calling us if we can never produce matches. // Caller should avoid calling us if we can never produce matches.
assert(nfaAcceptsEod(nfa)); assert(nfaAcceptsEod(nfa));
DISPATCH_BY_NFA_TYPE(_testEOD(nfa, state, streamState, offset, callback, DISPATCH_BY_NFA_TYPE(_testEOD(nfa, state, streamState, offset, callback,
som_cb, context)); context));
return 0; return 0;
} }
@ -135,6 +108,14 @@ char nfaQueueExec2_i(const struct NFA *nfa, struct mq *q, s64a end) {
return 0; return 0;
} }
char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end) {
return nfaQueueExec_i(nfa, q, end);
}
char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end) {
return nfaQueueExec2_i(nfa, q, end);
}
static really_inline static really_inline
char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) { char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) {
DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report)); DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report));
@ -258,7 +239,6 @@ char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end) {
assert(q); assert(q);
assert(end >= 0); assert(end >= 0);
assert(q->context);
assert(q->state); assert(q->state);
assert(q->cur < q->end); assert(q->cur < q->end);
assert(q->end <= MAX_MQE_LEN); assert(q->end <= MAX_MQE_LEN);
@ -315,6 +295,11 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q) {
return 0; return 0;
} }
char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q) {
DISPATCH_BY_NFA_TYPE(_inAnyAccept(nfa, q));
return 0;
}
char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) { char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) {
DEBUG_PRINTF("nfa=%p\n", nfa); DEBUG_PRINTF("nfa=%p\n", nfa);
#ifdef DEBUG #ifdef DEBUG

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -91,12 +91,12 @@ struct mq {
* history buffer; (logically) immediately before the * history buffer; (logically) immediately before the
* main buffer */ * main buffer */
size_t hlength; /**< length of the history buffer */ size_t hlength; /**< length of the history buffer */
struct hs_scratch *scratch; /**< global scratch space */
char report_current; /**< char report_current; /**<
* report_current matches at starting offset through * report_current matches at starting offset through
* callback. If true, the queue must be located at a * callback. If true, the queue must be located at a
* point where MO_MATCHES_PENDING was returned */ * point where MO_MATCHES_PENDING was returned */
NfaCallback cb; /**< callback to trigger on matches */ NfaCallback cb; /**< callback to trigger on matches */
SomNfaCallback som_cb; /**< callback with som info; used by haig */
void *context; /**< context to pass along with a callback */ void *context; /**< context to pass along with a callback */
struct mq_item items[MAX_MQE_LEN]; /**< queue items */ struct mq_item items[MAX_MQE_LEN]; /**< queue items */
}; };

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -30,6 +30,7 @@
#include "limex_internal.h" #include "limex_internal.h"
#include "mcclellancompile.h" #include "mcclellancompile.h"
#include "shengcompile.h"
#include "nfa_internal.h" #include "nfa_internal.h"
#include "repeat_internal.h" #include "repeat_internal.h"
#include "ue2common.h" #include "ue2common.h"
@ -78,7 +79,7 @@ struct DISPATCH_BY_NFA_TYPE_INT<sfunc, rv_t, arg_t, INVALID_NFA> {
decltype(arg), (NFAEngineType)0>::doOp(i, arg) decltype(arg), (NFAEngineType)0>::doOp(i, arg)
} }
typedef bool (*has_accel_fn)(const NFA *nfa); typedef bool (*nfa_dispatch_fn)(const NFA *nfa);
template<typename T> template<typename T>
static static
@ -87,8 +88,37 @@ bool has_accel_limex(const NFA *nfa) {
return limex->accelCount; return limex->accelCount;
} }
template<typename T>
static static
bool has_accel_generic(const NFA *) { bool has_repeats_limex(const NFA *nfa) {
const T *limex = (const T *)getImplNfa(nfa);
return limex->repeatCount;
}
template<typename T>
static
bool has_repeats_other_than_firsts_limex(const NFA *nfa) {
const T *limex = (const T *)getImplNfa(nfa);
const char *ptr = (const char *)limex;
const u32 *repeatOffset = (const u32 *)(ptr + limex->repeatOffset);
for (u32 i = 0; i < limex->repeatCount; i++) {
u32 offset = repeatOffset[i];
const NFARepeatInfo *info = (const NFARepeatInfo *)(ptr + offset);
const RepeatInfo *repeat =
(const RepeatInfo *)((const char *)info + sizeof(*info));
if (repeat->type != REPEAT_FIRST) {
return true;
}
}
return false;
}
static
bool dispatch_false(const NFA *) {
return false; return false;
} }
@ -140,72 +170,53 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER};
#define DO_IF_DUMP_SUPPORT(a) #define DO_IF_DUMP_SUPPORT(a)
#endif #endif
#define MAKE_LIMEX_TRAITS(mlt_size, mlt_shift) \ #define MAKE_LIMEX_TRAITS(mlt_size) \
template<> struct NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift> { \ template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \
static UNUSED const char *name; \ static UNUSED const char *name; \
static const NFACategory category = NFA_LIMEX; \ static const NFACategory category = NFA_LIMEX; \
typedef LimExNFA##mlt_size implNFA_t; \ typedef LimExNFA##mlt_size implNFA_t; \
typedef u_##mlt_size tableRow_t; \ typedef u_##mlt_size tableRow_t; \
static const has_accel_fn has_accel; \ static const nfa_dispatch_fn has_accel; \
static const nfa_dispatch_fn has_repeats; \
static const nfa_dispatch_fn has_repeats_other_than_firsts; \
static const u32 stateAlign = \ static const u32 stateAlign = \
MAX(alignof(tableRow_t), alignof(RepeatControl)); \ MAX(alignof(tableRow_t), alignof(RepeatControl)); \
static const bool fast = mlt_size <= 64; \ static const bool fast = mlt_size <= 64; \
}; \ }; \
const has_accel_fn NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift>::has_accel \ const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_accel \
= has_accel_limex<LimExNFA##mlt_size>; \ = has_accel_limex<LimExNFA##mlt_size>; \
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_repeats \
= has_repeats_limex<LimExNFA##mlt_size>; \
const nfa_dispatch_fn \
NFATraits<LIMEX_NFA_##mlt_size>::has_repeats_other_than_firsts \
= has_repeats_other_than_firsts_limex<LimExNFA##mlt_size>; \
DO_IF_DUMP_SUPPORT( \ DO_IF_DUMP_SUPPORT( \
const char *NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift>::name \ const char *NFATraits<LIMEX_NFA_##mlt_size>::name \
= "LimEx (0-"#mlt_shift") "#mlt_size; \ = "LimEx "#mlt_size; \
template<> struct getDescription<LIMEX_NFA_##mlt_size##_##mlt_shift> { \ template<> struct getDescription<LIMEX_NFA_##mlt_size> { \
static string call(const void *ptr) { \ static string call(const void *ptr) { \
return getDescriptionLimEx<LIMEX_NFA_##mlt_size##_##mlt_shift>((const NFA *)ptr); \ return getDescriptionLimEx<LIMEX_NFA_##mlt_size>((const NFA *)ptr); \
} \ } \
};) };)
MAKE_LIMEX_TRAITS(32, 1) MAKE_LIMEX_TRAITS(32)
MAKE_LIMEX_TRAITS(32, 2) MAKE_LIMEX_TRAITS(128)
MAKE_LIMEX_TRAITS(32, 3) MAKE_LIMEX_TRAITS(256)
MAKE_LIMEX_TRAITS(32, 4) MAKE_LIMEX_TRAITS(384)
MAKE_LIMEX_TRAITS(32, 5) MAKE_LIMEX_TRAITS(512)
MAKE_LIMEX_TRAITS(32, 6)
MAKE_LIMEX_TRAITS(32, 7)
MAKE_LIMEX_TRAITS(128, 1)
MAKE_LIMEX_TRAITS(128, 2)
MAKE_LIMEX_TRAITS(128, 3)
MAKE_LIMEX_TRAITS(128, 4)
MAKE_LIMEX_TRAITS(128, 5)
MAKE_LIMEX_TRAITS(128, 6)
MAKE_LIMEX_TRAITS(128, 7)
MAKE_LIMEX_TRAITS(256, 1)
MAKE_LIMEX_TRAITS(256, 2)
MAKE_LIMEX_TRAITS(256, 3)
MAKE_LIMEX_TRAITS(256, 4)
MAKE_LIMEX_TRAITS(256, 5)
MAKE_LIMEX_TRAITS(256, 6)
MAKE_LIMEX_TRAITS(256, 7)
MAKE_LIMEX_TRAITS(384, 1)
MAKE_LIMEX_TRAITS(384, 2)
MAKE_LIMEX_TRAITS(384, 3)
MAKE_LIMEX_TRAITS(384, 4)
MAKE_LIMEX_TRAITS(384, 5)
MAKE_LIMEX_TRAITS(384, 6)
MAKE_LIMEX_TRAITS(384, 7)
MAKE_LIMEX_TRAITS(512, 1)
MAKE_LIMEX_TRAITS(512, 2)
MAKE_LIMEX_TRAITS(512, 3)
MAKE_LIMEX_TRAITS(512, 4)
MAKE_LIMEX_TRAITS(512, 5)
MAKE_LIMEX_TRAITS(512, 6)
MAKE_LIMEX_TRAITS(512, 7)
template<> struct NFATraits<MCCLELLAN_NFA_8> { template<> struct NFATraits<MCCLELLAN_NFA_8> {
UNUSED static const char *name; UNUSED static const char *name;
static const NFACategory category = NFA_OTHER; static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1; static const u32 stateAlign = 1;
static const bool fast = true; static const bool fast = true;
static const has_accel_fn has_accel; static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
}; };
const has_accel_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_dfa; const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_mcclellan;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
const char *NFATraits<MCCLELLAN_NFA_8>::name = "McClellan 8"; const char *NFATraits<MCCLELLAN_NFA_8>::name = "McClellan 8";
#endif #endif
@ -215,9 +226,13 @@ template<> struct NFATraits<MCCLELLAN_NFA_16> {
static const NFACategory category = NFA_OTHER; static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 2; static const u32 stateAlign = 2;
static const bool fast = true; static const bool fast = true;
static const has_accel_fn has_accel; static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
}; };
const has_accel_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_dfa; const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_mcclellan;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
const char *NFATraits<MCCLELLAN_NFA_16>::name = "McClellan 16"; const char *NFATraits<MCCLELLAN_NFA_16>::name = "McClellan 16";
#endif #endif
@ -227,9 +242,13 @@ template<> struct NFATraits<GOUGH_NFA_8> {
static const NFACategory category = NFA_OTHER; static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8; static const u32 stateAlign = 8;
static const bool fast = true; static const bool fast = true;
static const has_accel_fn has_accel; static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
}; };
const has_accel_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_dfa; const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_mcclellan;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
const char *NFATraits<GOUGH_NFA_8>::name = "Goughfish 8"; const char *NFATraits<GOUGH_NFA_8>::name = "Goughfish 8";
#endif #endif
@ -239,9 +258,13 @@ template<> struct NFATraits<GOUGH_NFA_16> {
static const NFACategory category = NFA_OTHER; static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8; static const u32 stateAlign = 8;
static const bool fast = true; static const bool fast = true;
static const has_accel_fn has_accel; static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
}; };
const has_accel_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_dfa; const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_mcclellan;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
const char *NFATraits<GOUGH_NFA_16>::name = "Goughfish 16"; const char *NFATraits<GOUGH_NFA_16>::name = "Goughfish 16";
#endif #endif
@ -251,9 +274,13 @@ template<> struct NFATraits<MPV_NFA_0> {
static const NFACategory category = NFA_OTHER; static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8; static const u32 stateAlign = 8;
static const bool fast = true; static const bool fast = true;
static const has_accel_fn has_accel; static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
}; };
const has_accel_fn NFATraits<MPV_NFA_0>::has_accel = has_accel_generic; const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
const char *NFATraits<MPV_NFA_0>::name = "Mega-Puff-Vac"; const char *NFATraits<MPV_NFA_0>::name = "Mega-Puff-Vac";
#endif #endif
@ -263,9 +290,13 @@ template<> struct NFATraits<CASTLE_NFA_0> {
static const NFACategory category = NFA_OTHER; static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8; static const u32 stateAlign = 8;
static const bool fast = true; static const bool fast = true;
static const has_accel_fn has_accel; static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
}; };
const has_accel_fn NFATraits<CASTLE_NFA_0>::has_accel = has_accel_generic; const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
const char *NFATraits<CASTLE_NFA_0>::name = "Castle"; const char *NFATraits<CASTLE_NFA_0>::name = "Castle";
#endif #endif
@ -275,9 +306,13 @@ template<> struct NFATraits<LBR_NFA_Dot> {
static const NFACategory category = NFA_OTHER; static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8; static const u32 stateAlign = 8;
static const bool fast = true; static const bool fast = true;
static const has_accel_fn has_accel; static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
}; };
const has_accel_fn NFATraits<LBR_NFA_Dot>::has_accel = has_accel_generic; const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_Dot>::name = "Lim Bounded Repeat (D)"; const char *NFATraits<LBR_NFA_Dot>::name = "Lim Bounded Repeat (D)";
#endif #endif
@ -287,9 +322,13 @@ template<> struct NFATraits<LBR_NFA_Verm> {
static const NFACategory category = NFA_OTHER; static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8; static const u32 stateAlign = 8;
static const bool fast = true; static const bool fast = true;
static const has_accel_fn has_accel; static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
}; };
const has_accel_fn NFATraits<LBR_NFA_Verm>::has_accel = has_accel_generic; const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_Verm>::name = "Lim Bounded Repeat (V)"; const char *NFATraits<LBR_NFA_Verm>::name = "Lim Bounded Repeat (V)";
#endif #endif
@ -299,9 +338,13 @@ template<> struct NFATraits<LBR_NFA_NVerm> {
static const NFACategory category = NFA_OTHER; static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8; static const u32 stateAlign = 8;
static const bool fast = true; static const bool fast = true;
static const has_accel_fn has_accel; static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
}; };
const has_accel_fn NFATraits<LBR_NFA_NVerm>::has_accel = has_accel_generic; const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_NVerm>::name = "Lim Bounded Repeat (NV)"; const char *NFATraits<LBR_NFA_NVerm>::name = "Lim Bounded Repeat (NV)";
#endif #endif
@ -311,9 +354,13 @@ template<> struct NFATraits<LBR_NFA_Shuf> {
static const NFACategory category = NFA_OTHER; static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8; static const u32 stateAlign = 8;
static const bool fast = true; static const bool fast = true;
static const has_accel_fn has_accel; static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
}; };
const has_accel_fn NFATraits<LBR_NFA_Shuf>::has_accel = has_accel_generic; const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_Shuf>::name = "Lim Bounded Repeat (S)"; const char *NFATraits<LBR_NFA_Shuf>::name = "Lim Bounded Repeat (S)";
#endif #endif
@ -323,13 +370,49 @@ template<> struct NFATraits<LBR_NFA_Truf> {
static const NFACategory category = NFA_OTHER; static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8; static const u32 stateAlign = 8;
static const bool fast = true; static const bool fast = true;
static const has_accel_fn has_accel; static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
}; };
const has_accel_fn NFATraits<LBR_NFA_Truf>::has_accel = has_accel_generic; const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_Truf>::name = "Lim Bounded Repeat (M)"; const char *NFATraits<LBR_NFA_Truf>::name = "Lim Bounded Repeat (M)";
#endif #endif
template<> struct NFATraits<SHENG_NFA_0> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_accel = has_accel_sheng;
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<SHENG_NFA_0>::name = "Sheng";
#endif
template<> struct NFATraits<TAMARAMA_NFA_0> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 32;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<TAMARAMA_NFA_0>::name = "Tamarama";
#endif
} // namespace } // namespace
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
@ -380,42 +463,39 @@ struct is_limex {
}; };
} }
namespace {
template<NFAEngineType t>
struct has_repeats_other_than_firsts_dispatch {
static nfa_dispatch_fn call(const void *) {
return NFATraits<t>::has_repeats_other_than_firsts;
}
};
}
bool has_bounded_repeats_other_than_firsts(const NFA &nfa) { bool has_bounded_repeats_other_than_firsts(const NFA &nfa) {
if (!DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa)) { return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type,
return false; has_repeats_other_than_firsts_dispatch,
&nfa)(&nfa);
}
namespace {
template<NFAEngineType t>
struct has_repeats_dispatch {
static nfa_dispatch_fn call(const void *) {
return NFATraits<t>::has_repeats;
} }
};
const LimExNFABase *limex = (const LimExNFABase *)getImplNfa(&nfa);
const char *ptr = (const char *)limex;
const u32 *repeatOffset = (const u32 *)(ptr + limex->repeatOffset);
for (u32 i = 0; i < limex->repeatCount; i++) {
u32 offset = repeatOffset[i];
const NFARepeatInfo *info = (const NFARepeatInfo *)(ptr + offset);
const RepeatInfo *repeat =
(const RepeatInfo *)((const char *)info + sizeof(*info));
if (repeat->type != REPEAT_FIRST) {
return true;
}
}
return false;
} }
bool has_bounded_repeats(const NFA &nfa) { bool has_bounded_repeats(const NFA &nfa) {
if (!DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa)) { return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_repeats_dispatch,
return false; &nfa)(&nfa);
}
const LimExNFABase *limex = (const LimExNFABase *)getImplNfa(&nfa);
return limex->repeatCount;
} }
namespace { namespace {
template<NFAEngineType t> template<NFAEngineType t>
struct has_accel_dispatch { struct has_accel_dispatch {
static has_accel_fn call(const void *) { static nfa_dispatch_fn call(const void *) {
return NFATraits<t>::has_accel; return NFATraits<t>::has_accel;
} }
}; };
@ -423,8 +503,7 @@ struct has_accel_dispatch {
bool has_accel(const NFA &nfa) { bool has_accel(const NFA &nfa) {
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_accel_dispatch, return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_accel_dispatch,
&nfa) &nfa)(&nfa);
(&nfa);
} }
bool requires_decompress_key(const NFA &nfa) { bool requires_decompress_key(const NFA &nfa) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -36,6 +36,7 @@
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
#include <cstdio> #include <cstdio>
#include <string>
struct NFA; struct NFA;
@ -45,7 +46,7 @@ namespace ue2 {
* \brief Dump (in Graphviz 'dot' format) a representation of the NFA into the * \brief Dump (in Graphviz 'dot' format) a representation of the NFA into the
* file pointed to by dotFile. * file pointed to by dotFile.
*/ */
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile); void nfaDumpDot(const struct NFA *nfa, FILE *dotFile, const std::string &base);
/** \brief Dump a textual representation of the NFA. */ /** \brief Dump a textual representation of the NFA. */
void nfaDumpText(const struct NFA *fact, FILE *textFile); void nfaDumpText(const struct NFA *fact, FILE *textFile);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -40,6 +40,8 @@
#include "limex.h" #include "limex.h"
#include "mcclellandump.h" #include "mcclellandump.h"
#include "mpv_dump.h" #include "mpv_dump.h"
#include "shengdump.h"
#include "tamarama_dump.h"
#ifndef DUMP_SUPPORT #ifndef DUMP_SUPPORT
#error "no dump support" #error "no dump support"
@ -57,41 +59,11 @@ namespace ue2 {
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \ #define DISPATCH_BY_NFA_TYPE(dbnt_func) \
DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \ DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \
switch (nfa->type) { \ switch (nfa->type) { \
DISPATCH_CASE(LIMEX, LimEx, 32_1, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_2, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_3, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_4, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_5, dbnt_func); \ DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_7, dbnt_func); \
DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \ DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \
DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \ DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \
DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \ DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \
@ -103,12 +75,15 @@ namespace ue2 {
DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \ DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \
DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \ DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \
DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \ DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \
DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \
DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \
default: \ default: \
assert(0); \ assert(0); \
} }
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile) { void nfaDumpDot(const struct NFA *nfa, FILE *dotFile,
DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile)); const std::string &base) {
DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile, base));
} }
void nfaDumpText(const struct NFA *nfa, FILE *txtFile) { void nfaDumpText(const struct NFA *nfa, FILE *txtFile) {

View File

@ -51,41 +51,11 @@ extern "C"
// Common data structures for NFAs // Common data structures for NFAs
enum NFAEngineType { enum NFAEngineType {
LIMEX_NFA_32_1, LIMEX_NFA_32,
LIMEX_NFA_32_2, LIMEX_NFA_128,
LIMEX_NFA_32_3, LIMEX_NFA_256,
LIMEX_NFA_32_4, LIMEX_NFA_384,
LIMEX_NFA_32_5, LIMEX_NFA_512,
LIMEX_NFA_32_6,
LIMEX_NFA_32_7,
LIMEX_NFA_128_1,
LIMEX_NFA_128_2,
LIMEX_NFA_128_3,
LIMEX_NFA_128_4,
LIMEX_NFA_128_5,
LIMEX_NFA_128_6,
LIMEX_NFA_128_7,
LIMEX_NFA_256_1,
LIMEX_NFA_256_2,
LIMEX_NFA_256_3,
LIMEX_NFA_256_4,
LIMEX_NFA_256_5,
LIMEX_NFA_256_6,
LIMEX_NFA_256_7,
LIMEX_NFA_384_1,
LIMEX_NFA_384_2,
LIMEX_NFA_384_3,
LIMEX_NFA_384_4,
LIMEX_NFA_384_5,
LIMEX_NFA_384_6,
LIMEX_NFA_384_7,
LIMEX_NFA_512_1,
LIMEX_NFA_512_2,
LIMEX_NFA_512_3,
LIMEX_NFA_512_4,
LIMEX_NFA_512_5,
LIMEX_NFA_512_6,
LIMEX_NFA_512_7,
MCCLELLAN_NFA_8, /**< magic pseudo nfa */ MCCLELLAN_NFA_8, /**< magic pseudo nfa */
MCCLELLAN_NFA_16, /**< magic pseudo nfa */ MCCLELLAN_NFA_16, /**< magic pseudo nfa */
GOUGH_NFA_8, /**< magic pseudo nfa */ GOUGH_NFA_8, /**< magic pseudo nfa */
@ -97,6 +67,8 @@ enum NFAEngineType {
LBR_NFA_Shuf, /**< magic pseudo nfa */ LBR_NFA_Shuf, /**< magic pseudo nfa */
LBR_NFA_Truf, /**< magic pseudo nfa */ LBR_NFA_Truf, /**< magic pseudo nfa */
CASTLE_NFA_0, /**< magic pseudo nfa */ CASTLE_NFA_0, /**< magic pseudo nfa */
SHENG_NFA_0, /**< magic pseudo nfa */
TAMARAMA_NFA_0, /**< magic nfa container */
/** \brief bogus NFA - not used */ /** \brief bogus NFA - not used */
INVALID_NFA INVALID_NFA
}; };
@ -175,50 +147,27 @@ static really_inline int isGoughType(u8 t) {
return t == GOUGH_NFA_8 || t == GOUGH_NFA_16; return t == GOUGH_NFA_8 || t == GOUGH_NFA_16;
} }
/** \brief True if the given type (from NFA::type) is a McClellan or Gough DFA. /** \brief True if the given type (from NFA::type) is a Sheng DFA. */
* */ static really_inline int isShengType(u8 t) {
return t == SHENG_NFA_0;
}
/**
* \brief True if the given type (from NFA::type) is a McClellan, Gough or
* Sheng DFA.
*/
static really_inline int isDfaType(u8 t) { static really_inline int isDfaType(u8 t) {
return isMcClellanType(t) || isGoughType(t); return isMcClellanType(t) || isGoughType(t) || isShengType(t);
} }
/** \brief True if the given type (from NFA::type) is an NFA. */ /** \brief True if the given type (from NFA::type) is an NFA. */
static really_inline int isNfaType(u8 t) { static really_inline int isNfaType(u8 t) {
switch (t) { switch (t) {
case LIMEX_NFA_32_1: case LIMEX_NFA_32:
case LIMEX_NFA_32_2: case LIMEX_NFA_128:
case LIMEX_NFA_32_3: case LIMEX_NFA_256:
case LIMEX_NFA_32_4: case LIMEX_NFA_384:
case LIMEX_NFA_32_5: case LIMEX_NFA_512:
case LIMEX_NFA_32_6:
case LIMEX_NFA_32_7:
case LIMEX_NFA_128_1:
case LIMEX_NFA_128_2:
case LIMEX_NFA_128_3:
case LIMEX_NFA_128_4:
case LIMEX_NFA_128_5:
case LIMEX_NFA_128_6:
case LIMEX_NFA_128_7:
case LIMEX_NFA_256_1:
case LIMEX_NFA_256_2:
case LIMEX_NFA_256_3:
case LIMEX_NFA_256_4:
case LIMEX_NFA_256_5:
case LIMEX_NFA_256_6:
case LIMEX_NFA_256_7:
case LIMEX_NFA_384_1:
case LIMEX_NFA_384_2:
case LIMEX_NFA_384_3:
case LIMEX_NFA_384_4:
case LIMEX_NFA_384_5:
case LIMEX_NFA_384_6:
case LIMEX_NFA_384_7:
case LIMEX_NFA_512_1:
case LIMEX_NFA_512_2:
case LIMEX_NFA_512_3:
case LIMEX_NFA_512_4:
case LIMEX_NFA_512_5:
case LIMEX_NFA_512_6:
case LIMEX_NFA_512_7:
return 1; return 1;
default: default:
break; break;
@ -233,6 +182,12 @@ int isLbrType(u8 t) {
t == LBR_NFA_Shuf || t == LBR_NFA_Truf; t == LBR_NFA_Shuf || t == LBR_NFA_Truf;
} }
/** \brief True if the given type (from NFA::type) is a container engine. */
static really_inline
int isContainerType(u8 t) {
return t == TAMARAMA_NFA_0;
}
static really_inline static really_inline
int isMultiTopType(u8 t) { int isMultiTopType(u8 t) {
return !isDfaType(t) && !isLbrType(t); return !isDfaType(t) && !isLbrType(t);

View File

@ -37,6 +37,8 @@
#include "ue2common.h" #include "ue2common.h"
#include <string>
namespace ue2 { namespace ue2 {
/** \brief Specify the use-case for an nfa engine. */ /** \brief Specify the use-case for an nfa engine. */
@ -47,6 +49,7 @@ enum nfa_kind {
NFA_OUTFIX, //!< "outfix" nfa not triggered by external events NFA_OUTFIX, //!< "outfix" nfa not triggered by external events
NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports
NFA_REV_PREFIX, //! reverse running prefixes (for som) NFA_REV_PREFIX, //! reverse running prefixes (for som)
NFA_EAGER_PREFIX, //!< rose prefix that is also run up to matches
}; };
/** \brief True if this kind of engine is triggered by a top event. */ /** \brief True if this kind of engine is triggered by a top event. */
@ -63,8 +66,10 @@ bool is_triggered(enum nfa_kind k) {
} }
/** /**
* \brief True if this kind of engine generates callback events when it * \brief True if this kind of engine generates actively checks for accept
* enters accept states. * states either to halt matching or to raise a callback. Only these engines
* generated with this property should call nfaQueueExec() or
* nfaQueueExecToMatch().
*/ */
inline inline
bool generates_callbacks(enum nfa_kind k) { bool generates_callbacks(enum nfa_kind k) {
@ -73,6 +78,24 @@ bool generates_callbacks(enum nfa_kind k) {
case NFA_OUTFIX: case NFA_OUTFIX:
case NFA_OUTFIX_RAW: case NFA_OUTFIX_RAW:
case NFA_REV_PREFIX: case NFA_REV_PREFIX:
case NFA_EAGER_PREFIX:
return true;
default:
return false;
}
}
/**
* \brief True if this kind of engine has its state inspected to see if it is in
* an accept state. Engines generated with this property will commonly call
* nfaQueueExecRose(), nfaInAcceptState(), and nfaInAnyAcceptState().
*/
inline
bool inspects_states_for_accepts(enum nfa_kind k) {
switch (k) {
case NFA_PREFIX:
case NFA_INFIX:
case NFA_EAGER_PREFIX:
return true; return true;
default: default:
return false; return false;
@ -94,6 +117,32 @@ bool has_managed_reports(enum nfa_kind k) {
} }
} }
#if defined(DEBUG) || defined(DUMP_SUPPORT)
inline
std::string to_string(nfa_kind k) {
switch (k) {
case NFA_PREFIX:
return "PREFIX";
case NFA_INFIX:
return "INFIX";
case NFA_SUFFIX:
return "SUFFIX";
case NFA_OUTFIX:
return "OUTFIX";
case NFA_REV_PREFIX:
return "REV_PREFIX";
case NFA_OUTFIX_RAW:
return "OUTFIX_RAW";
case NFA_EAGER_PREFIX:
return "EAGER_PREFIX";
}
assert(0);
return "?";
}
#endif
} // namespace ue2 } // namespace ue2
#endif #endif

676
src/nfa/sheng.c Normal file
View File

@ -0,0 +1,676 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "sheng.h"
#include "accel.h"
#include "sheng_internal.h"
#include "nfa_api.h"
#include "nfa_api_queue.h"
#include "nfa_internal.h"
#include "util/bitutils.h"
#include "util/compare.h"
#include "util/join.h"
#include "util/simd_utils.h"
enum MatchMode {
CALLBACK_OUTPUT,
STOP_AT_MATCH,
NO_MATCHES
};
static really_inline
const struct sheng *get_sheng(const struct NFA *n) {
return (const struct sheng *)getImplNfa(n);
}
static really_inline
const struct sstate_aux *get_aux(const struct sheng *sh, u8 id) {
u32 offset = sh->aux_offset - sizeof(struct NFA) +
(id & SHENG_STATE_MASK) * sizeof(struct sstate_aux);
DEBUG_PRINTF("Getting aux for state %u at offset %llu\n",
id & SHENG_STATE_MASK, (u64a)offset + sizeof(struct NFA));
return (const struct sstate_aux *)((const char *) sh + offset);
}
static really_inline
const union AccelAux *get_accel(const struct sheng *sh, u8 id) {
const struct sstate_aux *saux = get_aux(sh, id);
DEBUG_PRINTF("Getting accel aux at offset %u\n", saux->accel);
const union AccelAux *aux = (const union AccelAux *)
((const char *)sh + saux->accel - sizeof(struct NFA));
return aux;
}
static really_inline
const struct report_list *get_rl(const struct sheng *sh,
const struct sstate_aux *aux) {
DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept);
return (const struct report_list *)
((const char *)sh + aux->accept - sizeof(struct NFA));
}
static really_inline
const struct report_list *get_eod_rl(const struct sheng *sh,
const struct sstate_aux *aux) {
DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept);
return (const struct report_list *)
((const char *)sh + aux->accept_eod - sizeof(struct NFA));
}
static really_inline
char shengHasAccept(const struct sheng *sh, const struct sstate_aux *aux,
ReportID report) {
assert(sh && aux);
const struct report_list *rl = get_rl(sh, aux);
assert(ISALIGNED_N(rl, 4));
DEBUG_PRINTF("report list has %u entries\n", rl->count);
for (u32 i = 0; i < rl->count; i++) {
if (rl->report[i] == report) {
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
return 1;
}
}
return 0;
}
static really_inline
char fireSingleReport(NfaCallback cb, void *ctxt, ReportID r, u64a loc) {
DEBUG_PRINTF("reporting %u\n", r);
if (cb(0, loc, r, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
}
return MO_CONTINUE_MATCHING; /* continue execution */
}
static really_inline
char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt,
const u8 state, u64a loc, u8 *const cached_accept_state,
ReportID *const cached_accept_id, char eod) {
DEBUG_PRINTF("reporting matches @ %llu\n", loc);
if (!eod && state == *cached_accept_state) {
DEBUG_PRINTF("reporting %u\n", *cached_accept_id);
if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
}
return MO_CONTINUE_MATCHING; /* continue execution */
}
const struct sstate_aux *aux = get_aux(sh, state);
const struct report_list *rl = eod ? get_eod_rl(sh, aux) : get_rl(sh, aux);
assert(ISALIGNED(rl));
DEBUG_PRINTF("report list has %u entries\n", rl->count);
u32 count = rl->count;
if (!eod && count == 1) {
*cached_accept_state = state;
*cached_accept_id = rl->report[0];
DEBUG_PRINTF("reporting %u\n", rl->report[0]);
if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
}
return MO_CONTINUE_MATCHING; /* continue execution */
}
for (u32 i = 0; i < count; i++) {
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
}
}
return MO_CONTINUE_MATCHING; /* continue execution */
}
/* include Sheng function definitions */
#include "sheng_defs.h"
static really_inline
char runShengCb(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset,
u8 *const cached_accept_state, ReportID *const cached_accept_id,
const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die,
u8 has_accel, u8 single, const u8 **scanned, u8 *state) {
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n",
(u64a)(end - start), offset);
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
(s64a)(end - cur_buf));
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
!!has_accel, !!single);
int rv;
/* scan and report all matches */
if (can_die) {
if (has_accel) {
rv = sheng4_coda(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
} else {
rv = sheng4_cod(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
}
if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
}
rv = sheng_cod(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, *scanned, end,
scanned);
} else {
if (has_accel) {
rv = sheng4_coa(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
} else {
rv = sheng4_co(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
}
if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
}
rv = sheng_co(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, *scanned, end,
scanned);
}
if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
}
return MO_ALIVE;
}
static really_inline
void runShengNm(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset,
u8 *const cached_accept_state, ReportID *const cached_accept_id,
const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die,
u8 has_accel, u8 single, const u8 **scanned, u8 *state) {
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n",
(u64a)(end - start), offset);
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
(s64a)(end - cur_buf));
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
!!has_accel, !!single);
/* just scan the buffer */
if (can_die) {
if (has_accel) {
sheng4_nmda(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start, end,
scanned);
} else {
sheng4_nmd(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start, end,
scanned);
}
sheng_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
single, offset, cur_buf, *scanned, end, scanned);
} else {
sheng4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
single, offset, cur_buf, start, end, scanned);
sheng_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
single, offset, cur_buf, *scanned, end, scanned);
}
}
static really_inline
char runShengSam(const struct sheng *sh, NfaCallback cb, void *ctxt,
u64a offset, u8 *const cached_accept_state,
ReportID *const cached_accept_id, const u8 *cur_buf,
const u8 *start, const u8 *end, u8 can_die, u8 has_accel,
u8 single, const u8 **scanned, u8 *state) {
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n",
(u64a)(end - start), offset);
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
(s64a)(end - cur_buf));
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
!!has_accel, !!single);
int rv;
/* scan until first match */
if (can_die) {
if (has_accel) {
rv = sheng4_samda(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
} else {
rv = sheng4_samd(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
}
if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
}
/* if we stopped before we expected, we found a match */
if (rv == MO_MATCHES_PENDING) {
return MO_MATCHES_PENDING;
}
rv = sheng_samd(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, *scanned,
end, scanned);
} else {
if (has_accel) {
rv = sheng4_sama(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
} else {
rv = sheng4_sam(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
}
if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
}
/* if we stopped before we expected, we found a match */
if (rv == MO_MATCHES_PENDING) {
return MO_MATCHES_PENDING;
}
rv = sheng_sam(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, *scanned, end,
scanned);
}
if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
}
/* if we stopped before we expected, we found a match */
if (rv == MO_MATCHES_PENDING) {
return MO_MATCHES_PENDING;
}
return MO_ALIVE;
}
static never_inline
char runSheng(const struct sheng *sh, struct mq *q, s64a b_end,
enum MatchMode mode) {
u8 state = *(u8 *)q->state;
u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
u8 cached_accept_state = 0;
ReportID cached_accept_id = 0;
DEBUG_PRINTF("starting Sheng execution in state %u\n",
state & SHENG_STATE_MASK);
if (q->report_current) {
DEBUG_PRINTF("reporting current pending matches\n");
assert(sh);
q->report_current = 0;
int rv;
if (single) {
rv = fireSingleReport(q->cb, q->context, sh->report,
q_cur_offset(q));
} else {
rv = fireReports(sh, q->cb, q->context, state, q_cur_offset(q),
&cached_accept_state, &cached_accept_id, 0);
}
if (rv == MO_HALT_MATCHING) {
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
return MO_DEAD;
}
DEBUG_PRINTF("proceeding with matching\n");
}
assert(q_cur_type(q) == MQE_START);
s64a start = q_cur_loc(q);
DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start,
mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" :
mode == NO_MATCHES ? "NO MATCHES" :
mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???");
DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
q_cur_type(q) == MQE_START ? "START" :
q_cur_type(q) == MQE_TOP ? "TOP" :
q_cur_type(q) == MQE_END ? "END" : "???");
const u8* cur_buf;
if (start < 0) {
DEBUG_PRINTF("negative location, scanning history\n");
DEBUG_PRINTF("min location: %zd\n", -q->hlength);
cur_buf = q->history + q->hlength;
} else {
DEBUG_PRINTF("positive location, scanning buffer\n");
DEBUG_PRINTF("max location: %lli\n", b_end);
cur_buf = q->buffer;
}
/* if we our queue event is past our end */
if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
DEBUG_PRINTF("current location past buffer end\n");
DEBUG_PRINTF("setting q location to %llu\n", b_end);
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
q->items[q->cur].location = b_end;
return MO_ALIVE;
}
q->cur++;
s64a cur_start = start;
while (1) {
DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
q_cur_type(q) == MQE_START ? "START" :
q_cur_type(q) == MQE_TOP ? "TOP" :
q_cur_type(q) == MQE_END ? "END" : "???");
s64a end = q_cur_loc(q);
if (mode != NO_MATCHES) {
end = MIN(end, b_end);
}
assert(end <= (s64a) q->length);
s64a cur_end = end;
/* we may cross the border between history and current buffer */
if (cur_start < 0) {
cur_end = MIN(0, cur_end);
}
DEBUG_PRINTF("start: %lli end: %lli\n", start, end);
/* don't scan zero length buffer */
if (cur_start != cur_end) {
const u8 * scanned = cur_buf;
char rv;
/* if we're in nomatch mode or if we're scanning history buffer */
if (mode == NO_MATCHES ||
(cur_start < 0 && mode == CALLBACK_OUTPUT)) {
runShengNm(sh, q->cb, q->context, q->offset,
&cached_accept_state, &cached_accept_id, cur_buf,
cur_buf + cur_start, cur_buf + cur_end, can_die,
has_accel, single, &scanned, &state);
} else if (mode == CALLBACK_OUTPUT) {
rv = runShengCb(sh, q->cb, q->context, q->offset,
&cached_accept_state, &cached_accept_id,
cur_buf, cur_buf + cur_start, cur_buf + cur_end,
can_die, has_accel, single, &scanned, &state);
if (rv == MO_DEAD) {
DEBUG_PRINTF("exiting in state %u\n",
state & SHENG_STATE_MASK);
return MO_DEAD;
}
} else if (mode == STOP_AT_MATCH) {
rv = runShengSam(sh, q->cb, q->context, q->offset,
&cached_accept_state, &cached_accept_id,
cur_buf, cur_buf + cur_start,
cur_buf + cur_end, can_die, has_accel, single,
&scanned, &state);
if (rv == MO_DEAD) {
DEBUG_PRINTF("exiting in state %u\n",
state & SHENG_STATE_MASK);
return rv;
} else if (rv == MO_MATCHES_PENDING) {
assert(q->cur);
DEBUG_PRINTF("found a match, setting q location to %zd\n",
scanned - cur_buf + 1);
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location =
scanned - cur_buf + 1; /* due to exiting early */
*(u8 *)q->state = state;
DEBUG_PRINTF("exiting in state %u\n",
state & SHENG_STATE_MASK);
return rv;
}
} else {
assert(!"invalid scanning mode!");
}
assert(scanned == cur_buf + cur_end);
cur_start = cur_end;
}
/* if we our queue event is past our end */
if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
DEBUG_PRINTF("current location past buffer end\n");
DEBUG_PRINTF("setting q location to %llu\n", b_end);
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = b_end;
*(u8 *)q->state = state;
return MO_ALIVE;
}
/* crossing over into actual buffer */
if (cur_start == 0) {
DEBUG_PRINTF("positive location, scanning buffer\n");
DEBUG_PRINTF("max offset: %lli\n", b_end);
cur_buf = q->buffer;
}
/* continue scanning the same buffer */
if (end != cur_end) {
continue;
}
switch (q_cur_type(q)) {
case MQE_END:
*(u8 *)q->state = state;
q->cur++;
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
if (can_die) {
return (state & SHENG_STATE_DEAD) ? MO_DEAD : MO_ALIVE;
}
return MO_ALIVE;
case MQE_TOP:
if (q->offset + cur_start == 0) {
DEBUG_PRINTF("Anchored start, going to state %u\n",
sh->anchored);
state = sh->anchored;
} else {
u8 new_state = get_aux(sh, state)->top;
DEBUG_PRINTF("Top event %u->%u\n", state & SHENG_STATE_MASK,
new_state & SHENG_STATE_MASK);
state = new_state;
}
break;
default:
assert(!"invalid queue event");
break;
}
q->cur++;
}
}
char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context) {
DEBUG_PRINTF("smallwrite Sheng\n");
assert(n->type == SHENG_NFA_0);
const struct sheng *sh = getImplNfa(n);
u8 state = sh->anchored;
u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
u8 cached_accept_state = 0;
ReportID cached_accept_id = 0;
/* scan and report all matches */
int rv;
s64a end = length;
const u8 *scanned;
rv = runShengCb(sh, cb, context, offset, &cached_accept_state,
&cached_accept_id, buffer, buffer, buffer + end, can_die,
has_accel, single, &scanned, &state);
if (rv == MO_DEAD) {
DEBUG_PRINTF("exiting in state %u\n",
state & SHENG_STATE_MASK);
return MO_DEAD;
}
DEBUG_PRINTF("%u\n", state & SHENG_STATE_MASK);
const struct sstate_aux *aux = get_aux(sh, state);
if (aux->accept_eod) {
DEBUG_PRINTF("Reporting EOD matches\n");
fireReports(sh, cb, context, state, end + offset, &cached_accept_state,
&cached_accept_id, 1);
}
return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE;
}
char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end) {
const struct sheng *sh = get_sheng(n);
char rv = runSheng(sh, q, end, CALLBACK_OUTPUT);
return rv;
}
char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end) {
const struct sheng *sh = get_sheng(n);
char rv = runSheng(sh, q, end, STOP_AT_MATCH);
return rv;
}
char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report) {
assert(q_cur_type(q) == MQE_START);
const struct sheng *sh = get_sheng(n);
char rv = runSheng(sh, q, 0 /* end */, NO_MATCHES);
if (rv && nfaExecSheng0_inAccept(n, report, q)) {
return MO_MATCHES_PENDING;
}
return rv;
}
char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report,
struct mq *q) {
assert(n && q);
const struct sheng *sh = get_sheng(n);
u8 s = *(const u8 *)q->state;
DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
const struct sstate_aux *aux = get_aux(sh, s);
if (!aux->accept) {
return 0;
}
return shengHasAccept(sh, aux, report);
}
char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q) {
assert(n && q);
const struct sheng *sh = get_sheng(n);
u8 s = *(const u8 *)q->state;
DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
const struct sstate_aux *aux = get_aux(sh, s);
return !!aux->accept;
}
char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState, u64a offset,
NfaCallback cb, void *ctxt) {
assert(nfa);
const struct sheng *sh = get_sheng(nfa);
u8 s = *(const u8 *)state;
DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
const struct sstate_aux *aux = get_aux(sh, s);
if (!aux->accept_eod) {
return MO_CONTINUE_MATCHING;
}
return fireReports(sh, cb, ctxt, s, offset, NULL, NULL, 1);
}
char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q) {
const struct sheng *sh = (const struct sheng *)getImplNfa(n);
NfaCallback cb = q->cb;
void *ctxt = q->context;
u8 s = *(u8 *)q->state;
const struct sstate_aux *aux = get_aux(sh, s);
u64a offset = q_cur_offset(q);
u8 cached_state_id = 0;
ReportID cached_report_id = 0;
assert(q_cur_type(q) == MQE_START);
if (aux->accept) {
if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
fireSingleReport(cb, ctxt, sh->report, offset);
} else {
fireReports(sh, cb, ctxt, s, offset, &cached_state_id,
&cached_report_id, 1);
}
}
return 0;
}
char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset,
void *state, UNUSED u8 key) {
const struct sheng *sh = get_sheng(nfa);
u8 *s = (u8 *)state;
*s = offset ? sh->floating: sh->anchored;
return !(*s & SHENG_STATE_DEAD);
}
char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q) {
assert(nfa->scratchStateSize == 1);
/* starting in floating state */
const struct sheng *sh = get_sheng(nfa);
*(u8 *)q->state = sh->floating;
DEBUG_PRINTF("starting in floating state\n");
return 0;
}
char nfaExecSheng0_queueCompressState(UNUSED const struct NFA *nfa,
const struct mq *q, UNUSED s64a loc) {
void *dest = q->streamState;
const void *src = q->state;
assert(nfa->scratchStateSize == 1);
assert(nfa->streamStateSize == 1);
*(u8 *)dest = *(const u8 *)src;
return 0;
}
char nfaExecSheng0_expandState(UNUSED const struct NFA *nfa, void *dest,
const void *src, UNUSED u64a offset,
UNUSED u8 key) {
assert(nfa->scratchStateSize == 1);
assert(nfa->streamStateSize == 1);
*(u8 *)dest = *(const u8 *)src;
return 0;
}

61
src/nfa/sheng.h Normal file
View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SHENG_H_
#define SHENG_H_
#include "callback.h"
#include "ue2common.h"
struct mq;
struct NFA;
#define nfaExecSheng0_B_Reverse NFA_API_NO_IMPL
#define nfaExecSheng0_zombie_status NFA_API_ZOMBIE_NO_IMPL
char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q);
char nfaExecSheng0_queueCompressState(const struct NFA *nfa, const struct mq *q,
s64a loc);
char nfaExecSheng0_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset,
void *state, u8 key);
char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, void *context);
char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context);
#endif /* SHENG_H_ */

353
src/nfa/sheng_defs.h Normal file
View File

@ -0,0 +1,353 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SHENG_DEFS_H
#define SHENG_DEFS_H
/*
* Utility functions used by various versions of Sheng engine
*/
static really_inline
u8 isDeadState(const u8 a) {
return a & SHENG_STATE_DEAD;
}
static really_inline
u8 isAcceptState(const u8 a) {
return a & SHENG_STATE_ACCEPT;
}
static really_inline
u8 isAccelState(const u8 a) {
return a & SHENG_STATE_ACCEL;
}
static really_inline
u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
}
/* these functions should be optimized out, used by NO_MATCHES mode */
static really_inline
u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c,
UNUSED const u8 d) {
return 0;
}
static really_inline
u8 dummyFunc(UNUSED const u8 a) {
return 0;
}
/*
* Sheng function definitions for single byte loops
*/
/* callback output, can die */
#define SHENG_IMPL sheng_cod
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* callback output, can't die */
#define SHENG_IMPL sheng_co
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* stop at match, can die */
#define SHENG_IMPL sheng_samd
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 1
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* stop at match, can't die */
#define SHENG_IMPL sheng_sam
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 1
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* no match, can die */
#define SHENG_IMPL sheng_nmd
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC dummyFunc
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* no match, can't die */
#define SHENG_IMPL sheng_nm
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/*
* Sheng function definitions for 4-byte loops
*/
/* callback output, can die, accelerated */
#define SHENG_IMPL sheng4_coda
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC isDeadState
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* callback output, can die, not accelerated */
#define SHENG_IMPL sheng4_cod
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC isDeadState
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* callback output, can't die, accelerated */
#define SHENG_IMPL sheng4_coa
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* callback output, can't die, not accelerated */
#define SHENG_IMPL sheng4_co
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* stop at match, can die, accelerated */
#define SHENG_IMPL sheng4_samda
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC isDeadState
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* stop at match, can die, not accelerated */
#define SHENG_IMPL sheng4_samd
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC isDeadState
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* stop at match, can't die, accelerated */
#define SHENG_IMPL sheng4_sama
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* stop at match, can't die, not accelerated */
#define SHENG_IMPL sheng4_sam
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* no-match have interesting func as dummy, and die/accel checks are outer */
/* no match, can die, accelerated */
#define SHENG_IMPL sheng4_nmda
#define INTERESTING_FUNC dummyFunc4
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC isDeadState
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC isAccelState
#define ACCEPT_FUNC dummyFunc
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* no match, can die, not accelerated */
#define SHENG_IMPL sheng4_nmd
#define INTERESTING_FUNC dummyFunc4
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC isDeadState
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* there is no performance benefit in accelerating a no-match case that can't
* die */
/* no match, can't die */
#define SHENG_IMPL sheng4_nm
#define INTERESTING_FUNC dummyFunc4
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
#endif // SHENG_DEFS_H

97
src/nfa/sheng_impl.h Normal file
View File

@ -0,0 +1,97 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* In order to use this macro, the following things need to be defined:
*
* - SHENG_IMPL (name of the Sheng implementation function)
* - DEAD_FUNC (name of the function checking for dead states)
* - ACCEPT_FUNC (name of the function checking for accept state)
* - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match)
*/
/* byte-by-byte version. we don't do byte-by-byte death checking as it's
* pretty pointless to do it over a buffer that's at most 3 bytes long */
static really_inline
char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
u8 *const cached_accept_state, ReportID *const cached_accept_id,
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
const u8 *end, const u8 **scan_end) {
DEBUG_PRINTF("Starting DFA execution in state %u\n",
*state & SHENG_STATE_MASK);
const u8 *cur_buf = start;
if (DEAD_FUNC(*state)) {
DEBUG_PRINTF("Dead on arrival\n");
*scan_end = end;
return MO_CONTINUE_MATCHING;
}
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
m128 cur_state = set16x8(*state);
const m128 *masks = s->shuffle_masks;
while (likely(cur_buf != end)) {
const u8 c = *cur_buf;
const m128 shuffle_mask = masks[c];
cur_state = pshufb(shuffle_mask, cur_state);
const u8 tmp = movd(cur_state);
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", tmp, (tmp & 0xF0) >> 4,
tmp & 0xF);
if (unlikely(ACCEPT_FUNC(tmp))) {
DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG_STATE_MASK);
u64a match_offset = base_offset + (cur_buf - buf) + 1;
DEBUG_PRINTF("Match @ %llu\n", match_offset);
if (STOP_AT_MATCH) {
DEBUG_PRINTF("Stopping at match @ %lli\n",
(u64a)(cur_buf - start));
*state = tmp;
*scan_end = cur_buf;
return MO_MATCHES_PENDING;
}
if (single) {
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else {
if (fireReports(s, cb, ctxt, tmp, match_offset,
cached_accept_state, cached_accept_id,
0) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
cur_buf++;
}
*state = movd(cur_state);
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}

284
src/nfa/sheng_impl4.h Normal file
View File

@ -0,0 +1,284 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* In order to use this macro, the following things need to be defined:
*
* - SHENG_IMPL (name of the Sheng implementation function)
* - INTERESTING_FUNC (name of the function checking for accept, accel or dead
* states)
* - INNER_DEAD_FUNC (name of the inner function checking for dead states)
* - OUTER_DEAD_FUNC (name of the outer function checking for dead states)
* - INNER_ACCEL_FUNC (name of the inner function checking for accel states)
* - OUTER_ACCEL_FUNC (name of the outer function checking for accel states)
* - ACCEPT_FUNC (name of the function checking for accept state)
* - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match)
*/
/* unrolled 4-byte-at-a-time version.
*
* we put innerDeadFunc inside interestingFunc() block so that we don't pay for
* dead states checking. however, if interestingFunc is dummy, innerDeadFunc
* gets lost with it, so we need an additional check outside the
* interestingFunc() branch - it's normally dummy so we don't pay for it, but
* when interestingFunc is dummy, outerDeadFunc should be set if we want to
* check for dead states.
*
* also, deadFunc only checks the last known state, but since we can't ever get
* out of the dead state and we don't really care where we died, it's not a
* problem.
*/
static really_inline
char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
u8 *const cached_accept_state, ReportID *const cached_accept_id,
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
const u8 *end, const u8 **scan_end) {
DEBUG_PRINTF("Starting DFAx4 execution in state %u\n",
*state & SHENG_STATE_MASK);
const u8 *cur_buf = start;
const u8 *min_accel_dist = start;
base_offset++;
DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start));
if (INNER_ACCEL_FUNC(*state) || OUTER_ACCEL_FUNC(*state)) {
DEBUG_PRINTF("Accel state reached @ 0\n");
const union AccelAux *aaux = get_accel(s, *state & SHENG_STATE_MASK);
const u8 *new_offset = run_accel(aaux, cur_buf, end);
if (new_offset < cur_buf + BAD_ACCEL_DIST) {
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
} else {
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
}
DEBUG_PRINTF("Next accel chance: %llu\n",
(u64a)(min_accel_dist - start));
DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf);
cur_buf = new_offset;
DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start));
}
if (INNER_DEAD_FUNC(*state) || OUTER_DEAD_FUNC(*state)) {
DEBUG_PRINTF("Dead on arrival\n");
*scan_end = end;
return MO_CONTINUE_MATCHING;
}
m128 cur_state = set16x8(*state);
const m128 *masks = s->shuffle_masks;
while (likely(end - cur_buf >= 4)) {
const u8 *b1 = cur_buf;
const u8 *b2 = cur_buf + 1;
const u8 *b3 = cur_buf + 2;
const u8 *b4 = cur_buf + 3;
const u8 c1 = *b1;
const u8 c2 = *b2;
const u8 c3 = *b3;
const u8 c4 = *b4;
const m128 shuffle_mask1 = masks[c1];
cur_state = pshufb(shuffle_mask1, cur_state);
const u8 a1 = movd(cur_state);
const m128 shuffle_mask2 = masks[c2];
cur_state = pshufb(shuffle_mask2, cur_state);
const u8 a2 = movd(cur_state);
const m128 shuffle_mask3 = masks[c3];
cur_state = pshufb(shuffle_mask3, cur_state);
const u8 a3 = movd(cur_state);
const m128 shuffle_mask4 = masks[c4];
cur_state = pshufb(shuffle_mask4, cur_state);
const u8 a4 = movd(cur_state);
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a1, (a1 & 0xF0) >> 4, a1 & 0xF);
DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?');
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a2, (a2 & 0xF0) >> 4, a2 & 0xF);
DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?');
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a3, (a3 & 0xF0) >> 4, a3 & 0xF);
DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?');
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a4, (a4 & 0xF0) >> 4, a4 & 0xF);
if (unlikely(INTERESTING_FUNC(a1, a2, a3, a4))) {
if (ACCEPT_FUNC(a1)) {
u64a match_offset = base_offset + b1 - buf;
DEBUG_PRINTF("Accept state %u reached\n",
a1 & SHENG_STATE_MASK);
DEBUG_PRINTF("Match @ %llu\n", match_offset);
if (STOP_AT_MATCH) {
DEBUG_PRINTF("Stopping at match @ %lli\n",
(s64a)(b1 - start));
*scan_end = b1;
*state = a1;
return MO_MATCHES_PENDING;
}
if (single) {
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else {
if (fireReports(s, cb, ctxt, a1, match_offset,
cached_accept_state, cached_accept_id,
0) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
if (ACCEPT_FUNC(a2)) {
u64a match_offset = base_offset + b2 - buf;
DEBUG_PRINTF("Accept state %u reached\n",
a2 & SHENG_STATE_MASK);
DEBUG_PRINTF("Match @ %llu\n", match_offset);
if (STOP_AT_MATCH) {
DEBUG_PRINTF("Stopping at match @ %lli\n",
(s64a)(b2 - start));
*scan_end = b2;
*state = a2;
return MO_MATCHES_PENDING;
}
if (single) {
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else {
if (fireReports(s, cb, ctxt, a2, match_offset,
cached_accept_state, cached_accept_id,
0) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
if (ACCEPT_FUNC(a3)) {
u64a match_offset = base_offset + b3 - buf;
DEBUG_PRINTF("Accept state %u reached\n",
a3 & SHENG_STATE_MASK);
DEBUG_PRINTF("Match @ %llu\n", match_offset);
if (STOP_AT_MATCH) {
DEBUG_PRINTF("Stopping at match @ %lli\n",
(s64a)(b3 - start));
*scan_end = b3;
*state = a3;
return MO_MATCHES_PENDING;
}
if (single) {
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else {
if (fireReports(s, cb, ctxt, a3, match_offset,
cached_accept_state, cached_accept_id,
0) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
if (ACCEPT_FUNC(a4)) {
u64a match_offset = base_offset + b4 - buf;
DEBUG_PRINTF("Accept state %u reached\n",
a4 & SHENG_STATE_MASK);
DEBUG_PRINTF("Match @ %llu\n", match_offset);
if (STOP_AT_MATCH) {
DEBUG_PRINTF("Stopping at match @ %lli\n",
(s64a)(b4 - start));
*scan_end = b4;
*state = a4;
return MO_MATCHES_PENDING;
}
if (single) {
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else {
if (fireReports(s, cb, ctxt, a4, match_offset,
cached_accept_state, cached_accept_id,
0) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
if (INNER_DEAD_FUNC(a4)) {
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf));
*scan_end = end;
*state = a4;
return MO_CONTINUE_MATCHING;
}
if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC(a4)) {
DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
const union AccelAux *aaux =
get_accel(s, a4 & SHENG_STATE_MASK);
const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
} else {
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
}
DEBUG_PRINTF("Next accel chance: %llu\n",
(u64a)(min_accel_dist - start));
DEBUG_PRINTF("Accel scanned %llu bytes\n",
(u64a)(new_offset - cur_buf - 4));
cur_buf = new_offset;
DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
continue;
}
}
if (OUTER_DEAD_FUNC(a4)) {
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf));
*scan_end = end;
*state = a4;
return MO_CONTINUE_MATCHING;
};
if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC(a4)) {
DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
const union AccelAux *aaux = get_accel(s, a4 & SHENG_STATE_MASK);
const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
} else {
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
}
DEBUG_PRINTF("Next accel chance: %llu\n",
(u64a)(min_accel_dist - start));
DEBUG_PRINTF("Accel scanned %llu bytes\n",
(u64a)(new_offset - cur_buf - 4));
cur_buf = new_offset;
DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
continue;
};
cur_buf += 4;
}
*state = movd(cur_state);
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -26,44 +26,45 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
*/ */
/** \file #ifndef SHENG_INTERNAL_H_
* \brief LimEx NFA: 512-bit SIMD runtime implementations. #define SHENG_INTERNAL_H_
*/
//#define DEBUG_INPUT
//#define DEBUG_EXCEPTIONS
#include "limex.h"
#include "accel.h"
#include "limex_internal.h"
#include "nfa_internal.h"
#include "ue2common.h" #include "ue2common.h"
#include "util/bitutils.h"
#include "util/simd_utils.h" #include "util/simd_utils.h"
// Common code #define SHENG_STATE_ACCEPT 0x10
#include "limex_runtime.h" #define SHENG_STATE_DEAD 0x20
#define SHENG_STATE_ACCEL 0x40
#define SHENG_STATE_MASK 0xF
#define SHENG_STATE_FLAG_MASK 0x70
#define SIZE 512 #define SHENG_FLAG_SINGLE_REPORT 0x1
#define STATE_T m512 #define SHENG_FLAG_CAN_DIE 0x2
#include "limex_exceptional.h" #define SHENG_FLAG_HAS_ACCEL 0x4
#define SIZE 512 struct report_list {
#define STATE_T m512 u32 count;
#include "limex_state_impl.h" ReportID report[];
};
#define SIZE 512 struct sstate_aux {
#define STATE_T m512 u32 accept;
#define INLINE_ATTR really_inline u32 accept_eod;
#include "limex_common_impl.h" u32 accel;
u32 top;
};
#define SIZE 512 struct sheng {
#define STATE_T m512 m128 shuffle_masks[256];
#define SHIFT 6 u32 length;
#include "limex_runtime_impl.h" u32 aux_offset;
u32 report_offset;
u32 accel_offset;
u8 n_states;
u8 anchored;
u8 floating;
u8 flags;
ReportID report;
};
#define SIZE 512 #endif /* SHENG_INTERNAL_H_ */
#define STATE_T m512
#define SHIFT 7
#include "limex_runtime_impl.h"

541
src/nfa/shengcompile.cpp Normal file
View File

@ -0,0 +1,541 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "shengcompile.h"
#include "accel.h"
#include "accelcompile.h"
#include "shufticompile.h"
#include "trufflecompile.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/charreach.h"
#include "util/compare.h"
#include "util/container.h"
#include "util/order_check.h"
#include "util/report_manager.h"
#include "util/unaligned.h"
#include "grey.h"
#include "nfa_internal.h"
#include "sheng_internal.h"
#include "ue2common.h"
#include "util/compile_context.h"
#include "util/make_unique.h"
#include "util/verify_types.h"
#include "util/simd_utils.h"
#include <map>
#include <vector>
#include <sstream>
#include <boost/range/adaptor/map.hpp>
using namespace std;
using boost::adaptors::map_keys;
namespace ue2 {
#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
/** Maximum tolerated number of escape character from an accel state.
* This is larger than nfa, as we don't have a budget and the nfa cheats on stop
* characters for sets of states */
#define ACCEL_DFA_MAX_STOP_CHAR 160
/** Maximum tolerated number of escape character from a sds accel state. Larger
* than normal states as accelerating sds is important. Matches NFA value */
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
struct dfa_info {
accel_dfa_build_strat &strat;
raw_dfa &raw;
vector<dstate> &states;
dstate &floating;
dstate &anchored;
bool can_die;
explicit dfa_info(accel_dfa_build_strat &s)
: strat(s), raw(strat.get_raw()), states(raw.states),
floating(states[raw.start_floating]),
anchored(states[raw.start_anchored]), can_die(dfaCanDie(raw)) {}
// returns adjusted size
size_t size() const {
return can_die ? states.size() : states.size() - 1;
}
// expects adjusted index
dstate &operator[](dstate_id_t idx) {
return states[raw_id(idx)];
}
dstate &top(dstate_id_t idx) {
if (isDead(idx)) {
return floating;
}
return next(idx, TOP);
}
dstate &next(dstate_id_t idx, u16 chr) {
auto &src = (*this)[idx];
auto next_id = src.next[raw.alpha_remap[chr]];
return states[next_id];
}
// get original idx from adjusted idx
dstate_id_t raw_id(dstate_id_t idx) {
assert(idx < size());
// if DFA can't die, shift all indices left by 1
return can_die ? idx : idx + 1;
}
bool isDead(dstate &state) {
return raw_id(state.impl_id) == DEAD_STATE;
}
bool isDead(dstate_id_t idx) {
return raw_id(idx) == DEAD_STATE;
}
private:
static bool dfaCanDie(raw_dfa &rdfa) {
for (unsigned chr = 0; chr < 256; chr++) {
for (dstate_id_t state = 0; state < rdfa.states.size(); state++) {
auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]];
if (succ == DEAD_STATE) {
return true;
}
}
}
return false;
}
};
namespace {
struct raw_report_list {
flat_set<ReportID> reports;
raw_report_list(const flat_set<ReportID> &reports_in,
const ReportManager &rm, bool do_remap) {
if (do_remap) {
for (auto &id : reports_in) {
reports.insert(rm.getProgramOffset(id));
}
} else {
reports = reports_in;
}
}
bool operator<(const raw_report_list &b) const {
return reports < b.reports;
}
};
struct raw_report_info_impl : public raw_report_info {
vector<raw_report_list> rl;
u32 getReportListSize() const override;
size_t size() const override;
void fillReportLists(NFA *n, size_t base_offset,
std::vector<u32> &ro /* out */) const override;
};
}
u32 raw_report_info_impl::getReportListSize() const {
u32 rv = 0;
for (const auto &reps : rl) {
rv += sizeof(report_list);
rv += sizeof(ReportID) * reps.reports.size();
}
return rv;
}
size_t raw_report_info_impl::size() const {
return rl.size();
}
void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
vector<u32> &ro) const {
for (const auto &reps : rl) {
ro.push_back(base_offset);
report_list *p = (report_list *)((char *)n + base_offset);
u32 i = 0;
for (const ReportID report : reps.reports) {
p->report[i++] = report;
}
p->count = verify_u32(reps.reports.size());
base_offset += sizeof(report_list);
base_offset += sizeof(ReportID) * reps.reports.size();
}
}
unique_ptr<raw_report_info> sheng_build_strat::gatherReports(
vector<u32> &reports,
vector<u32> &reports_eod,
u8 *isSingleReport,
ReportID *arbReport) const {
DEBUG_PRINTF("gathering reports\n");
const bool remap_reports = has_managed_reports(rdfa.kind);
auto ri = ue2::make_unique<raw_report_info_impl>();
map<raw_report_list, u32> rev;
for (const dstate &s : rdfa.states) {
if (s.reports.empty()) {
reports.push_back(MO_INVALID_IDX);
continue;
}
raw_report_list rrl(s.reports, rm, remap_reports);
DEBUG_PRINTF("non empty r\n");
if (rev.find(rrl) != rev.end()) {
reports.push_back(rev[rrl]);
} else {
DEBUG_PRINTF("adding to rl %zu\n", ri->size());
rev[rrl] = ri->size();
reports.push_back(ri->size());
ri->rl.push_back(rrl);
}
}
for (const dstate &s : rdfa.states) {
if (s.reports_eod.empty()) {
reports_eod.push_back(MO_INVALID_IDX);
continue;
}
DEBUG_PRINTF("non empty r eod\n");
raw_report_list rrl(s.reports_eod, rm, remap_reports);
if (rev.find(rrl) != rev.end()) {
reports_eod.push_back(rev[rrl]);
continue;
}
DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size());
rev[rrl] = ri->size();
reports_eod.push_back(ri->size());
ri->rl.push_back(rrl);
}
assert(!ri->rl.empty()); /* all components should be able to generate
reports */
if (!ri->rl.empty()) {
*arbReport = *ri->rl.begin()->reports.begin();
} else {
*arbReport = 0;
}
/* if we have only a single report id generated from all accepts (not eod)
* we can take some short cuts */
set<ReportID> reps;
for (u32 rl_index : reports) {
if (rl_index == MO_INVALID_IDX) {
continue;
}
assert(rl_index < ri->size());
insert(&reps, ri->rl[rl_index].reports);
}
if (reps.size() == 1) {
*isSingleReport = 1;
*arbReport = *reps.begin();
DEBUG_PRINTF("single -- %u\n", *arbReport);
} else {
*isSingleReport = 0;
}
return move(ri);
}
u32 sheng_build_strat::max_allowed_offset_accel() const {
return ACCEL_DFA_MAX_OFFSET_DEPTH;
}
u32 sheng_build_strat::max_stop_char() const {
return ACCEL_DFA_MAX_STOP_CHAR;
}
u32 sheng_build_strat::max_floating_stop_char() const {
return ACCEL_DFA_MAX_FLOATING_STOP_CHAR;
}
size_t sheng_build_strat::accelSize() const {
return sizeof(AccelAux);
}
#ifdef DEBUG
static really_inline
void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) {
stringstream o;
for (unsigned i = 0; i < sz; i++) {
o.width(2);
o << (buf[i] & SHENG_STATE_MASK) << " ";
}
DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
}
#endif
static
void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
set<dstate_id_t> *accel_states) {
for (dstate_id_t i : accel_escape_info | map_keys) {
accel_states->insert(i);
}
}
static
u8 getShengState(dstate &state, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo) {
u8 s = state.impl_id;
if (!state.reports.empty()) {
s |= SHENG_STATE_ACCEPT;
}
if (info.isDead(state)) {
s |= SHENG_STATE_DEAD;
}
if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) {
s |= SHENG_STATE_ACCEL;
}
return s;
}
static
void fillAccelAux(struct NFA *n, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo) {
DEBUG_PRINTF("Filling accel aux structures\n");
sheng *s = (sheng *)getMutableImplNfa(n);
u32 offset = s->accel_offset;
for (dstate_id_t i = 0; i < info.size(); i++) {
dstate_id_t state_id = info.raw_id(i);
if (accelInfo.find(state_id) != accelInfo.end()) {
s->flags |= SHENG_FLAG_HAS_ACCEL;
AccelAux *aux = (AccelAux *)((char *)n + offset);
info.strat.buildAccel(state_id, accelInfo[state_id], aux);
sstate_aux *saux =
(sstate_aux *)((char *)n + s->aux_offset) + state_id;
saux->accel = offset;
DEBUG_PRINTF("Accel offset: %u\n", offset);
offset += ROUNDUP_N(sizeof(AccelAux), alignof(AccelAux));
}
}
}
static
void populateBasicInfo(struct NFA *n, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo, u32 aux_offset,
u32 report_offset, u32 accel_offset, u32 total_size,
u32 dfa_size) {
n->length = total_size;
n->scratchStateSize = 1;
n->streamStateSize = 1;
n->nPositions = info.size();
n->type = SHENG_NFA_0;
n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0;
sheng *s = (sheng *)getMutableImplNfa(n);
s->aux_offset = aux_offset;
s->report_offset = report_offset;
s->accel_offset = accel_offset;
s->n_states = info.size();
s->length = dfa_size;
s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
s->anchored = getShengState(info.anchored, info, accelInfo);
s->floating = getShengState(info.floating, info, accelInfo);
}
static
void fillTops(NFA *n, dfa_info &info, dstate_id_t id,
map<dstate_id_t, AccelScheme> &accelInfo) {
sheng *s = (sheng *)getMutableImplNfa(n);
u32 aux_base = s->aux_offset;
DEBUG_PRINTF("Filling tops for state %u\n", id);
sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id;
DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id,
(char *)aux - (char *)n);
/* we could conceivably end up in an accept/dead state on a top event,
* so mark top as accept/dead state if it indeed is.
*/
auto &top_state = info.top(id);
DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id);
aux->top = getShengState(top_state, info, accelInfo);
}
static
void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports,
vector<u32> &reports_eod, vector<u32> &report_offsets) {
sheng *s = (sheng *)getMutableImplNfa(n);
u32 aux_base = s->aux_offset;
auto raw_id = info.raw_id(id);
auto &state = info[id];
sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id;
DEBUG_PRINTF("Filling aux and report structures for state %u\n", id);
DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id,
(char *)aux - (char *)n);
aux->accept = state.reports.empty() ? 0 : report_offsets[reports[raw_id]];
aux->accept_eod =
state.reports_eod.empty() ? 0 : report_offsets[reports_eod[raw_id]];
DEBUG_PRINTF("Report list offset: %u\n", aux->accept);
DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod);
}
static
void fillSingleReport(NFA *n, ReportID r_id) {
sheng *s = (sheng *)getMutableImplNfa(n);
DEBUG_PRINTF("Single report ID: %u\n", r_id);
s->report = r_id;
s->flags |= SHENG_FLAG_SINGLE_REPORT;
}
static
void createShuffleMasks(sheng *s, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo) {
for (u16 chr = 0; chr < 256; chr++) {
u8 buf[16] = {0};
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
auto &succ_state = info.next(idx, chr);
buf[idx] = getShengState(succ_state, info, accelInfo);
}
#ifdef DEBUG
dumpShuffleMask(chr, buf, sizeof(buf));
#endif
m128 mask = loadu128(buf);
s->shuffle_masks[chr] = mask;
}
}
bool has_accel_sheng(const NFA *nfa) {
const sheng *s = (const sheng *)getImplNfa(nfa);
return s->flags & SHENG_FLAG_HAS_ACCEL;
}
aligned_unique_ptr<NFA> shengCompile(raw_dfa &raw,
const CompileContext &cc,
const ReportManager &rm,
set<dstate_id_t> *accel_states) {
if (!cc.grey.allowSheng) {
DEBUG_PRINTF("Sheng is not allowed!\n");
return nullptr;
}
sheng_build_strat strat(raw, rm);
dfa_info info(strat);
DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
raw.start_anchored, raw.start_floating);
DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
info.can_die ? "can" : "cannot", info.size());
if (info.size() > 16) {
DEBUG_PRINTF("Too many states\n");
return nullptr;
}
if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
* mode with our semantics */
raw.stripExtraEodReports();
}
auto accelInfo = strat.getAccelInfo(cc.grey);
// set impl_id of each dfa state
for (dstate_id_t i = 0; i < info.size(); i++) {
info[i].impl_id = i;
}
DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n",
info.anchored.impl_id, info.floating.impl_id);
u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(sheng));
vector<u32> reports, eod_reports, report_offsets;
u8 isSingle = 0;
ReportID single_report = 0;
auto ri =
strat.gatherReports(reports, eod_reports, &isSingle, &single_report);
u32 total_aux = sizeof(sstate_aux) * info.size();
u32 total_accel = strat.accelSize() * accelInfo.size();
u32 total_reports = ri->getReportListSize();
u32 reports_offset = nfa_size + total_aux;
u32 accel_offset =
ROUNDUP_N(reports_offset + total_reports, alignof(AccelAux));
u32 total_size = ROUNDUP_N(accel_offset + total_accel, 64);
DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n",
nfa_size, total_aux, total_reports, total_accel, total_size);
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
populateBasicInfo(nfa.get(), info, accelInfo, nfa_size, reports_offset,
accel_offset, total_size, total_size - sizeof(NFA));
DEBUG_PRINTF("Setting up aux and report structures\n");
ri->fillReportLists(nfa.get(), reports_offset, report_offsets);
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
fillTops(nfa.get(), info, idx, accelInfo);
fillAux(nfa.get(), info, idx, reports, eod_reports, report_offsets);
}
if (isSingle) {
fillSingleReport(nfa.get(), single_report);
}
fillAccelAux(nfa.get(), info, accelInfo);
if (accel_states) {
fillAccelOut(accelInfo, accel_states);
}
createShuffleMasks((sheng *)getMutableImplNfa(nfa.get()), info, accelInfo);
return nfa;
}
} // namespace ue2

80
src/nfa/shengcompile.h Normal file
View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SHENGCOMPILE_H_
#define SHENGCOMPILE_H_
#include "accel_dfa_build_strat.h"
#include "rdfa.h"
#include "util/alloc.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
struct NFA;
namespace ue2 {
class ReportManager;
struct CompileContext;
struct raw_dfa;
class sheng_build_strat : public accel_dfa_build_strat {
public:
sheng_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in)
: accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {}
raw_dfa &get_raw() const override { return rdfa; }
std::unique_ptr<raw_report_info> gatherReports(
std::vector<u32> &reports /* out */,
std::vector<u32> &reports_eod /* out */,
u8 *isSingleReport /* out */,
ReportID *arbReport /* out */) const override;
size_t accelSize(void) const override;
u32 max_allowed_offset_accel() const override;
u32 max_stop_char() const override;
u32 max_floating_stop_char() const override;
private:
raw_dfa &rdfa;
};
aligned_unique_ptr<NFA>
shengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm,
std::set<dstate_id_t> *accel_states = nullptr);
struct sheng_escape_info {
CharReach outs;
CharReach outs2_single;
flat_set<std::pair<u8, u8>> outs2;
bool outs2_broken = false;
};
bool has_accel_sheng(const NFA *nfa);
} // namespace ue2
#endif /* SHENGCOMPILE_H_ */

265
src/nfa/shengdump.cpp Normal file
View File

@ -0,0 +1,265 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "shengdump.h"
#include "accel_dump.h"
#include "nfa_dump_internal.h"
#include "nfa_internal.h"
#include "sheng_internal.h"
#include "rdfa.h"
#include "ue2common.h"
#include "util/charreach.h"
#include "util/dump_charclass.h"
#include "util/simd_utils.h"
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
using namespace std;
namespace ue2 {
static
const sstate_aux *get_aux(const NFA *n, dstate_id_t i) {
assert(n && isShengType(n->type));
const sheng *s = (const sheng *)getImplNfa(n);
const sstate_aux *aux_base =
(const sstate_aux *)((const char *)n + s->aux_offset);
const sstate_aux *aux = aux_base + i;
assert((const char *)aux < (const char *)s + s->length);
return aux;
}
static
void dumpHeader(FILE *f, const sheng *s) {
fprintf(f, "number of states: %u, DFA engine size: %u\n", s->n_states,
s->length);
fprintf(f, "aux base offset: %u, reports base offset: %u, "
"accel offset: %u\n",
s->aux_offset, s->report_offset, s->accel_offset);
fprintf(f, "anchored start state: %u, floating start state: %u\n",
s->anchored & SHENG_STATE_MASK, s->floating & SHENG_STATE_MASK);
fprintf(f, "has accel: %u can die: %u single report: %u\n",
!!(s->flags & SHENG_FLAG_HAS_ACCEL),
!!(s->flags & SHENG_FLAG_CAN_DIE),
!!(s->flags & SHENG_FLAG_SINGLE_REPORT));
}
static
void dumpAux(FILE *f, u32 state, const sstate_aux *aux) {
fprintf(f, "state id: %u, reports offset: %u, EOD reports offset: %u, "
"accel offset: %u, top: %u\n",
state, aux->accept, aux->accept_eod, aux->accel,
aux->top & SHENG_STATE_MASK);
}
static
void dumpReports(FILE *f, const report_list *rl) {
fprintf(f, "reports count: %u\n", rl->count);
for (u32 i = 0; i < rl->count; i++) {
fprintf(f, " report: %u, report ID: %u\n", i, rl->report[i]);
}
}
static
void dumpMasks(FILE *f, const sheng *s) {
for (u32 chr = 0; chr < 256; chr++) {
u8 buf[16];
m128 shuffle_mask = s->shuffle_masks[chr];
store128(buf, shuffle_mask);
fprintf(f, "%3u: ", chr);
for (u32 pos = 0; pos < 16; pos++) {
u8 c = buf[pos];
if (c & SHENG_STATE_FLAG_MASK) {
fprintf(f, "%2u* ", c & SHENG_STATE_MASK);
} else {
fprintf(f, "%2u ", c & SHENG_STATE_MASK);
}
}
fprintf(f, "\n");
}
}
void nfaExecSheng0_dumpText(const NFA *nfa, FILE *f) {
assert(nfa->type == SHENG_NFA_0);
const sheng *s = (const sheng *)getImplNfa(nfa);
fprintf(f, "sheng DFA\n");
dumpHeader(f, s);
for (u32 state = 0; state < s->n_states; state++) {
const sstate_aux *aux = get_aux(nfa, state);
dumpAux(f, state, aux);
if (aux->accept) {
fprintf(f, "report list:\n");
const report_list *rl =
(const report_list *)((const char *)nfa + aux->accept);
dumpReports(f, rl);
}
if (aux->accept_eod) {
fprintf(f, "EOD report list:\n");
const report_list *rl =
(const report_list *)((const char *)nfa + aux->accept_eod);
dumpReports(f, rl);
}
if (aux->accel) {
fprintf(f, "accel:\n");
const AccelAux *accel =
(const AccelAux *)((const char *)nfa + aux->accel);
dumpAccelInfo(f, *accel);
}
}
fprintf(f, "\n");
dumpMasks(f, s);
fprintf(f, "\n");
}
static
void dumpDotPreambleDfa(FILE *f) {
dumpDotPreamble(f);
// DFA specific additions.
fprintf(f, "STARTF [style=invis];\n");
fprintf(f, "STARTA [style=invis];\n");
fprintf(f, "0 [style=invis];\n");
}
static
void describeNode(const NFA *n, const sheng *s, u16 i, FILE *f) {
const sstate_aux *aux = get_aux(n, i);
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
"label = \"%u\" ]; \n",
i, i);
if (aux->accept_eod) {
fprintf(f, "%u [ color = darkorchid ];\n", i);
}
if (aux->accept) {
fprintf(f, "%u [ shape = doublecircle ];\n", i);
}
if (aux->top && (aux->top & SHENG_STATE_MASK) != i) {
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
aux->top & SHENG_STATE_MASK);
}
if (i == (s->anchored & SHENG_STATE_MASK)) {
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
}
if (i == (s->floating & SHENG_STATE_MASK)) {
fprintf(f, "STARTF -> %u [color = red ]\n", i);
}
}
static
void describeEdge(FILE *f, const u16 *t, u16 i) {
for (u16 s = 0; s < N_CHARS; s++) {
if (!t[s]) {
continue;
}
u16 ss;
for (ss = 0; ss < s; ss++) {
if (t[s] == t[ss]) {
break;
}
}
if (ss != s) {
continue;
}
CharReach reach;
for (ss = s; ss < 256; ss++) {
if (t[s] == t[ss]) {
reach.set(ss);
}
}
fprintf(f, "%u -> %u [ label = \"", i, t[s]);
describeClass(f, reach, 5, CC_OUT_DOT);
fprintf(f, "\" ];\n");
}
}
static
void shengGetTransitions(const NFA *n, u16 state, u16 *t) {
assert(isShengType(n->type));
const sheng *s = (const sheng *)getImplNfa(n);
const sstate_aux *aux = get_aux(n, state);
for (unsigned i = 0; i < N_CHARS; i++) {
u8 buf[16];
m128 shuffle_mask = s->shuffle_masks[i];
store128(buf, shuffle_mask);
t[i] = buf[state] & SHENG_STATE_MASK;
}
t[TOP] = aux->top & SHENG_STATE_MASK;
}
void nfaExecSheng0_dumpDot(const NFA *nfa, FILE *f, const string &) {
assert(nfa->type == SHENG_NFA_0);
const sheng *s = (const sheng *)getImplNfa(nfa);
dumpDotPreambleDfa(f);
for (u16 i = 1; i < s->n_states; i++) {
describeNode(nfa, s, i, f);
u16 t[ALPHABET_SIZE];
shengGetTransitions(nfa, i, t);
describeEdge(f, t, i);
}
fprintf(f, "}\n");
}
} // namespace ue2

View File

@ -26,15 +26,24 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "simd_utils_ssse3.h" #ifndef SHENGDUMP_H_
#define SHENGDUMP_H_
const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = { #ifdef DUMP_SUPPORT
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, #include <cstdio>
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, #include <string>
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, struct NFA;
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
}; namespace ue2 {
void nfaExecSheng0_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecSheng0_dumpText(const struct NFA *nfa, FILE *file);
} // namespace ue2
#endif // DUMP_SUPPORT
#endif /* SHENGDUMP_H_ */

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -40,8 +40,6 @@
#include "shufti_common.h" #include "shufti_common.h"
#include "util/simd_utils_ssse3.h"
/** \brief Naive byte-by-byte implementation. */ /** \brief Naive byte-by-byte implementation. */
static really_inline static really_inline
const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf, const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf,
@ -235,7 +233,7 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi,
m128 c2_lo = pshufb(mask2_lo, chars_lo); m128 c2_lo = pshufb(mask2_lo, chars_lo);
m128 c2_hi = pshufb(mask2_hi, chars_hi); m128 c2_hi = pshufb(mask2_hi, chars_hi);
m128 t2 = or128(t, shiftRight8Bits(or128(c2_lo, c2_hi))); m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1));
#ifdef DEBUG #ifdef DEBUG
DEBUG_PRINTF(" c2_lo: "); dumpMsk128(c2_lo); printf("\n"); DEBUG_PRINTF(" c2_lo: "); dumpMsk128(c2_lo); printf("\n");
@ -472,7 +470,7 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi,
m256 c2_lo = vpshufb(mask2_lo, chars_lo); m256 c2_lo = vpshufb(mask2_lo, chars_lo);
m256 c2_hi = vpshufb(mask2_hi, chars_hi); m256 c2_hi = vpshufb(mask2_hi, chars_hi);
m256 t2 = or256(t, shift256Right8Bits(or256(c2_lo, c2_hi))); m256 t2 = or256(t, rshift128_m256(or256(c2_lo, c2_hi), 1));
#ifdef DEBUG #ifdef DEBUG
DEBUG_PRINTF(" c2_lo: "); dumpMsk256(c2_lo); printf("\n"); DEBUG_PRINTF(" c2_lo: "); dumpMsk256(c2_lo); printf("\n");

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2016, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -34,7 +34,6 @@
#include "util/bitutils.h" #include "util/bitutils.h"
#include "util/simd_utils.h" #include "util/simd_utils.h"
#include "util/unaligned.h" #include "util/unaligned.h"
#include "util/simd_utils_ssse3.h"
/* /*
* Common stuff for all versions of shufti (single, multi and multidouble) * Common stuff for all versions of shufti (single, multi and multidouble)
@ -94,7 +93,7 @@ DUMP_MSK(128)
#endif #endif
#define GET_LO_4(chars) and128(chars, low4bits) #define GET_LO_4(chars) and128(chars, low4bits)
#define GET_HI_4(chars) rshift2x64(andnot128(low4bits, chars), 4) #define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
static really_inline static really_inline
u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
@ -120,7 +119,7 @@ DUMP_MSK(256)
#endif #endif
#define GET_LO_4(chars) and256(chars, low4bits) #define GET_LO_4(chars) and256(chars, low4bits)
#define GET_HI_4(chars) rshift4x64(andnot256(low4bits, chars), 4) #define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4)
static really_inline static really_inline
u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,

Some files were not shown because too many files have changed in this diff Show More