mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch develop into master
This commit is contained in:
commit
bf99ad00eb
30
CHANGELOG.md
30
CHANGELOG.md
@ -2,6 +2,36 @@
|
||||
|
||||
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
||||
|
||||
## [4.3.0] 2016-08-24
|
||||
- Introduce a new analysis pass ("Violet") used for decomposition of patterns
|
||||
into literals and smaller engines.
|
||||
- Introduce a new container engine ("Tamarama") for infix and suffix engines
|
||||
that can be proven to run exclusively of one another. This reduces stream
|
||||
state for pattern sets with many such engines.
|
||||
- Introduce a new shuffle-based DFA engine ("Sheng"). This improves scanning
|
||||
performance for pattern sets where small engines are generated.
|
||||
- Improve the analysis used to extract extra mask information from short
|
||||
literals.
|
||||
- Reduced compile time spent in equivalence class analysis.
|
||||
- Build: frame pointers are now only omitted for 32-bit release builds.
|
||||
- Build: Workaround for C++ issues reported on FreeBSD/libc++ platforms.
|
||||
(github issue #27)
|
||||
- Simplify the LimEx NFA with a unified "variable shift" model, which reduces
|
||||
the number of different NFA code paths to one per model size.
|
||||
- Allow some anchored prefixes that may squash the literal to which they are
|
||||
attached to run eagerly. This improves scanning performance for some
|
||||
patterns.
|
||||
- Simplify and improve EOD ("end of data") matching, using the interpreter for
|
||||
all operations.
|
||||
- Elide unnecessary instructions in the Rose interpreter at compile time.
|
||||
- Reduce the number of inlined instantiations of the Rose interpreter in order
|
||||
to reduce instruction cache pressure.
|
||||
- Small improvements to literal matcher acceleration.
|
||||
- Parser: ignore `\E` metacharacters that are not preceded by `\Q`. This
|
||||
conforms to PCRE's behaviour, rather than returning a compile error.
|
||||
- Check for misaligned memory when allocating an error structure in Hyperscan's
|
||||
compile path and return an appropriate error if detected.
|
||||
|
||||
## [4.2.0] 2016-05-31
|
||||
- Introduce an interpreter for many complex actions to replace the use of
|
||||
internal reports within the core of Hyperscan (the "Rose" engine). This
|
||||
|
153
CMakeLists.txt
153
CMakeLists.txt
@ -1,12 +1,18 @@
|
||||
cmake_minimum_required (VERSION 2.8.11)
|
||||
|
||||
# don't use the built-in default configs
|
||||
set (CMAKE_NOT_USING_CONFIG_FLAGS TRUE)
|
||||
|
||||
project (Hyperscan C CXX)
|
||||
|
||||
set (HS_MAJOR_VERSION 4)
|
||||
set (HS_MINOR_VERSION 2)
|
||||
set (HS_MINOR_VERSION 3)
|
||||
set (HS_PATCH_VERSION 0)
|
||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||
|
||||
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
|
||||
# since we are doing this manually, we only have three types
|
||||
set (CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo"
|
||||
CACHE STRING "" FORCE)
|
||||
|
||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
include(CheckCCompilerFlag)
|
||||
@ -24,7 +30,7 @@ find_package(PkgConfig QUIET)
|
||||
|
||||
if (NOT CMAKE_BUILD_TYPE)
|
||||
message(STATUS "Default build type 'Release with debug info'")
|
||||
set(CMAKE_BUILD_TYPE "RELWITHDEBINFO")
|
||||
set(CMAKE_BUILD_TYPE RELWITHDEBINFO CACHE STRING "" FORCE )
|
||||
else()
|
||||
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
|
||||
message(STATUS "Build type ${CMAKE_BUILD_TYPE}")
|
||||
@ -90,6 +96,18 @@ else()
|
||||
message(FATAL_ERROR "No python interpreter found")
|
||||
endif()
|
||||
|
||||
# allow for reproducible builds - python for portability
|
||||
if (DEFINED ENV{SOURCE_DATE_EPOCH})
|
||||
execute_process(
|
||||
COMMAND "${PYTHON}" "${CMAKE_MODULE_PATH}/formatdate.py" "$ENV{SOURCE_DATE_EPOCH}"
|
||||
OUTPUT_VARIABLE BUILD_DATE
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
else ()
|
||||
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
|
||||
endif ()
|
||||
message(STATUS "Build date: ${BUILD_DATE}")
|
||||
|
||||
|
||||
if(${RAGEL} STREQUAL "RAGEL-NOTFOUND")
|
||||
message(FATAL_ERROR "Ragel state machine compiler not found")
|
||||
endif()
|
||||
@ -121,13 +139,7 @@ endif()
|
||||
|
||||
CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in release builds" ON "NOT RELEASE_BUILD" OFF)
|
||||
|
||||
option(DISABLE_ASSERTS "Disable assert(); enabled in debug builds, disabled in release builds" FALSE)
|
||||
|
||||
if (DISABLE_ASSERTS)
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "DEBUG")
|
||||
add_definitions(-DNDEBUG)
|
||||
endif()
|
||||
endif()
|
||||
CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON)
|
||||
|
||||
option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF)
|
||||
|
||||
@ -139,18 +151,26 @@ if(MSVC OR MSVC_IDE)
|
||||
if (MSVC_VERSION LESS 1700)
|
||||
message(FATAL_ERROR "The project requires C++11 features.")
|
||||
else()
|
||||
# set base flags
|
||||
set(CMAKE_C_FLAGS "/DWIN32 /D_WINDOWS /W3")
|
||||
set(CMAKE_C_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od")
|
||||
set(CMAKE_C_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi")
|
||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "/DWIN32 /D_WINDOWS /W3 /GR /EHsc")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi")
|
||||
|
||||
if (WINDOWS_ICC)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /O3 /wd4267 /Qdiag-disable:remark")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /O2 /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
||||
else()
|
||||
#TODO: don't hardcode arch
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /O2 /wd4267")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /O2 /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /wd4267")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
||||
endif()
|
||||
string(REGEX REPLACE "/RTC1" ""
|
||||
CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" )
|
||||
string(REGEX REPLACE "/RTC1" ""
|
||||
CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}" )
|
||||
|
||||
|
||||
endif()
|
||||
|
||||
@ -172,16 +192,34 @@ else()
|
||||
unset(_GXX_OUTPUT)
|
||||
endif()
|
||||
|
||||
# set compiler flags - more are tested and added later
|
||||
set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual")
|
||||
set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wno-shadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor")
|
||||
if (NOT RELEASE_BUILD)
|
||||
# -Werror is most useful during development, don't potentially break
|
||||
# release builds
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
|
||||
if(OPTIMISE)
|
||||
set(OPT_C_FLAG "-O3")
|
||||
set(OPT_CXX_FLAG "-O2")
|
||||
else()
|
||||
set(OPT_C_FLAG "-O0")
|
||||
set(OPT_CXX_FLAG "-O0")
|
||||
endif(OPTIMISE)
|
||||
|
||||
# set up base flags for build types
|
||||
set(CMAKE_C_FLAGS_DEBUG "-g ${OPT_C_FLAG} -Werror")
|
||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-g ${OPT_C_FLAG}")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${OPT_C_FLAG}")
|
||||
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-g ${OPT_CXX_FLAG} -Werror")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g ${OPT_CXX_FLAG}")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${OPT_CXX_FLAG}")
|
||||
|
||||
if (DISABLE_ASSERTS)
|
||||
# usually true for release builds, false for debug
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNDEBUG")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG")
|
||||
endif()
|
||||
|
||||
|
||||
# set compiler flags - more are tested and added later
|
||||
set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
|
||||
set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing")
|
||||
|
||||
if (NOT CMAKE_C_FLAGS MATCHES .*march.*)
|
||||
message(STATUS "Building for current host CPU")
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -march=native -mtune=native")
|
||||
@ -199,15 +237,7 @@ else()
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0 -Wno-unused-local-typedefs -Wno-maybe-uninitialized")
|
||||
endif()
|
||||
|
||||
if(OPTIMISE)
|
||||
set(EXTRA_C_FLAGS "-O3 ${EXTRA_C_FLAGS}")
|
||||
set(EXTRA_CXX_FLAGS "-O2 ${EXTRA_CXX_FLAGS}")
|
||||
else()
|
||||
set(EXTRA_C_FLAGS "-O0 ${EXTRA_C_FLAGS}")
|
||||
set(EXTRA_CXX_FLAGS "-O0 ${EXTRA_CXX_FLAGS}")
|
||||
endif(OPTIMISE)
|
||||
|
||||
if(NOT RELEASE_BUILD)
|
||||
if (NOT(ARCH_IA32 AND RELEASE_BUILD))
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
|
||||
endif()
|
||||
@ -297,6 +327,11 @@ if (CXX_UNUSED_CONST_VAR)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
|
||||
endif()
|
||||
|
||||
# gcc 6 complains about type attributes that get ignored, like alignment
|
||||
CHECK_CXX_COMPILER_FLAG("-Wignored-attributes" CXX_IGNORED_ATTR)
|
||||
if (CXX_IGNORED_ATTR)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-ignored-attributes")
|
||||
endif()
|
||||
|
||||
# note this for later
|
||||
# g++ doesn't have this flag but clang does
|
||||
@ -438,15 +473,14 @@ set (hs_exec_SRCS
|
||||
src/nfa/limex_simd128.c
|
||||
src/nfa/limex_simd256.c
|
||||
src/nfa/limex_simd384.c
|
||||
src/nfa/limex_simd512a.c
|
||||
src/nfa/limex_simd512b.c
|
||||
src/nfa/limex_simd512c.c
|
||||
src/nfa/limex_simd512.c
|
||||
src/nfa/limex.h
|
||||
src/nfa/limex_common_impl.h
|
||||
src/nfa/limex_context.h
|
||||
src/nfa/limex_internal.h
|
||||
src/nfa/limex_runtime.h
|
||||
src/nfa/limex_runtime_impl.h
|
||||
src/nfa/limex_shuffle.h
|
||||
src/nfa/limex_state_impl.h
|
||||
src/nfa/mpv.h
|
||||
src/nfa/mpv.c
|
||||
@ -477,9 +511,18 @@ set (hs_exec_SRCS
|
||||
src/nfa/repeat.c
|
||||
src/nfa/repeat.h
|
||||
src/nfa/repeat_internal.h
|
||||
src/nfa/sheng.c
|
||||
src/nfa/sheng.h
|
||||
src/nfa/sheng_defs.h
|
||||
src/nfa/sheng_impl.h
|
||||
src/nfa/sheng_impl4.h
|
||||
src/nfa/sheng_internal.h
|
||||
src/nfa/shufti_common.h
|
||||
src/nfa/shufti.c
|
||||
src/nfa/shufti.h
|
||||
src/nfa/tamarama.c
|
||||
src/nfa/tamarama.h
|
||||
src/nfa/tamarama_internal.h
|
||||
src/nfa/truffle_common.h
|
||||
src/nfa/truffle.c
|
||||
src/nfa/truffle.h
|
||||
@ -495,7 +538,6 @@ set (hs_exec_SRCS
|
||||
src/rose/block.c
|
||||
src/rose/catchup.h
|
||||
src/rose/catchup.c
|
||||
src/rose/eod.c
|
||||
src/rose/infix.h
|
||||
src/rose/init.h
|
||||
src/rose/init.c
|
||||
@ -503,6 +545,7 @@ set (hs_exec_SRCS
|
||||
src/rose/match.h
|
||||
src/rose/match.c
|
||||
src/rose/miracle.h
|
||||
src/rose/program_runtime.c
|
||||
src/rose/program_runtime.h
|
||||
src/rose/runtime.h
|
||||
src/rose/rose.h
|
||||
@ -510,6 +553,7 @@ set (hs_exec_SRCS
|
||||
src/rose/rose_program.h
|
||||
src/rose/rose_types.h
|
||||
src/rose/rose_common.h
|
||||
src/rose/validate_mask.h
|
||||
src/util/bitutils.h
|
||||
src/util/exhaust.h
|
||||
src/util/fatbit.h
|
||||
@ -524,11 +568,8 @@ set (hs_exec_SRCS
|
||||
src/util/pqueue.h
|
||||
src/util/scatter.h
|
||||
src/util/scatter_runtime.h
|
||||
src/util/shuffle.h
|
||||
src/util/shuffle_ssse3.h
|
||||
src/util/simd_utils.h
|
||||
src/util/simd_utils_ssse3.h
|
||||
src/util/simd_utils_ssse3.c
|
||||
src/util/simd_utils.c
|
||||
src/util/state_compress.h
|
||||
src/util/state_compress.c
|
||||
src/util/unaligned.h
|
||||
@ -597,11 +638,15 @@ SET (hs_SRCS
|
||||
src/hwlm/noodle_build.h
|
||||
src/hwlm/noodle_internal.h
|
||||
src/nfa/accel.h
|
||||
src/nfa/accel_dfa_build_strat.cpp
|
||||
src/nfa/accel_dfa_build_strat.h
|
||||
src/nfa/accelcompile.cpp
|
||||
src/nfa/accelcompile.h
|
||||
src/nfa/callback.h
|
||||
src/nfa/castlecompile.cpp
|
||||
src/nfa/castlecompile.h
|
||||
src/nfa/dfa_build_strat.cpp
|
||||
src/nfa/dfa_build_strat.h
|
||||
src/nfa/dfa_min.cpp
|
||||
src/nfa/dfa_min.h
|
||||
src/nfa/goughcompile.cpp
|
||||
@ -613,8 +658,6 @@ SET (hs_SRCS
|
||||
src/nfa/mcclellan_internal.h
|
||||
src/nfa/mcclellancompile.cpp
|
||||
src/nfa/mcclellancompile.h
|
||||
src/nfa/mcclellancompile_accel.cpp
|
||||
src/nfa/mcclellancompile_accel.h
|
||||
src/nfa/mcclellancompile_util.cpp
|
||||
src/nfa/mcclellancompile_util.h
|
||||
src/nfa/limex_compile.cpp
|
||||
@ -639,8 +682,13 @@ SET (hs_SRCS
|
||||
src/nfa/repeat_internal.h
|
||||
src/nfa/repeatcompile.cpp
|
||||
src/nfa/repeatcompile.h
|
||||
src/nfa/sheng_internal.h
|
||||
src/nfa/shengcompile.cpp
|
||||
src/nfa/shengcompile.h
|
||||
src/nfa/shufticompile.cpp
|
||||
src/nfa/shufticompile.h
|
||||
src/nfa/tamaramacompile.cpp
|
||||
src/nfa/tamaramacompile.h
|
||||
src/nfa/trufflecompile.cpp
|
||||
src/nfa/trufflecompile.h
|
||||
src/nfagraph/ng.cpp
|
||||
@ -746,6 +794,8 @@ SET (hs_SRCS
|
||||
src/nfagraph/ng_util.h
|
||||
src/nfagraph/ng_vacuous.cpp
|
||||
src/nfagraph/ng_vacuous.h
|
||||
src/nfagraph/ng_violet.cpp
|
||||
src/nfagraph/ng_violet.h
|
||||
src/nfagraph/ng_width.cpp
|
||||
src/nfagraph/ng_width.h
|
||||
src/parser/AsciiComponentClass.cpp
|
||||
@ -825,6 +875,10 @@ SET (hs_SRCS
|
||||
src/rose/rose_build_compile.cpp
|
||||
src/rose/rose_build_convert.cpp
|
||||
src/rose/rose_build_convert.h
|
||||
src/rose/rose_build_exclusive.cpp
|
||||
src/rose/rose_build_exclusive.h
|
||||
src/rose/rose_build_groups.cpp
|
||||
src/rose/rose_build_groups.h
|
||||
src/rose/rose_build_impl.h
|
||||
src/rose/rose_build_infix.cpp
|
||||
src/rose/rose_build_infix.h
|
||||
@ -853,6 +907,8 @@ SET (hs_SRCS
|
||||
src/util/charreach.cpp
|
||||
src/util/charreach.h
|
||||
src/util/charreach_util.h
|
||||
src/util/clique.cpp
|
||||
src/util/clique.h
|
||||
src/util/compare.h
|
||||
src/util/compile_context.cpp
|
||||
src/util/compile_context.h
|
||||
@ -878,7 +934,6 @@ SET (hs_SRCS
|
||||
src/util/report_manager.cpp
|
||||
src/util/report_manager.h
|
||||
src/util/simd_utils.h
|
||||
src/util/simd_utils_ssse3.h
|
||||
src/util/target_info.cpp
|
||||
src/util/target_info.h
|
||||
src/util/ue2_containers.h
|
||||
@ -916,6 +971,10 @@ set(hs_dump_SRCS
|
||||
src/nfa/nfa_dump_dispatch.cpp
|
||||
src/nfa/nfa_dump_internal.cpp
|
||||
src/nfa/nfa_dump_internal.h
|
||||
src/nfa/shengdump.cpp
|
||||
src/nfa/shengdump.h
|
||||
src/nfa/tamarama_dump.cpp
|
||||
src/nfa/tamarama_dump.h
|
||||
src/parser/dump.cpp
|
||||
src/parser/dump.h
|
||||
src/parser/position_dump.h
|
||||
@ -941,7 +1000,7 @@ endif()
|
||||
# choose which ones to build
|
||||
|
||||
set (LIB_VERSION ${HS_VERSION})
|
||||
set (LIB_SOVERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION})
|
||||
set (LIB_SOVERSION ${HS_MAJOR_VERSION})
|
||||
|
||||
add_library(hs_exec OBJECT ${hs_exec_SRCS})
|
||||
|
||||
|
18
cmake/formatdate.py
Executable file
18
cmake/formatdate.py
Executable file
@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
|
||||
def usage():
|
||||
print("Usage:", os.path.basename(sys.argv[0]), "<seconds from epoch>")
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
usage()
|
||||
sys.exit(1)
|
||||
|
||||
ts = sys.argv[1]
|
||||
|
||||
build_date = datetime.datetime.utcfromtimestamp(int(ts))
|
||||
|
||||
print(build_date.strftime("%Y-%m-%d"))
|
@ -77,7 +77,7 @@ static int eventHandler(unsigned int id, unsigned long long from,
|
||||
* length with its length. Returns NULL on failure.
|
||||
*/
|
||||
static char *readInputData(const char *inputFN, unsigned int *length) {
|
||||
FILE *f = fopen(inputFN, "r");
|
||||
FILE *f = fopen(inputFN, "rb");
|
||||
if (!f) {
|
||||
fprintf(stderr, "ERROR: unable to open file \"%s\": %s\n", inputFN,
|
||||
strerror(errno));
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -52,7 +52,6 @@
|
||||
#include "parser/shortcut_literal.h"
|
||||
#include "parser/unsupported.h"
|
||||
#include "parser/utf8_validate.h"
|
||||
#include "smallwrite/smallwrite_build.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "rose/rose_build_dump.h"
|
||||
#include "som/slot_manager_dump.h"
|
||||
@ -304,15 +303,6 @@ aligned_unique_ptr<RoseEngine> generateRoseEngine(NG &ng) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* avoid building a smwr if just a pure floating case. */
|
||||
if (!roseIsPureLiteral(rose.get())) {
|
||||
u32 qual = roseQuality(rose.get());
|
||||
auto smwr = ng.smwr->build(qual);
|
||||
if (smwr) {
|
||||
rose = roseAddSmallWrite(rose.get(), smwr.get());
|
||||
}
|
||||
}
|
||||
|
||||
dumpRose(*ng.rose, rose.get(), ng.cc.grey);
|
||||
dumpReportManager(ng.rm, ng.cc.grey);
|
||||
dumpSomSlotManager(ng.ssm, ng.cc.grey);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -42,6 +42,7 @@ using std::string;
|
||||
|
||||
static const char failureNoMemory[] = "Unable to allocate memory.";
|
||||
static const char failureInternal[] = "Internal error.";
|
||||
static const char failureBadAlloc[] = "Allocator returned misaligned memory.";
|
||||
|
||||
extern const hs_compile_error_t hs_enomem = {
|
||||
const_cast<char *>(failureNoMemory), 0
|
||||
@ -49,6 +50,9 @@ extern const hs_compile_error_t hs_enomem = {
|
||||
extern const hs_compile_error_t hs_einternal = {
|
||||
const_cast<char *>(failureInternal), 0
|
||||
};
|
||||
extern const hs_compile_error_t hs_badalloc = {
|
||||
const_cast<char *>(failureBadAlloc), 0
|
||||
};
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
@ -56,8 +60,18 @@ hs_compile_error_t *generateCompileError(const string &err, int expression) {
|
||||
hs_compile_error_t *ret =
|
||||
(struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t));
|
||||
if (ret) {
|
||||
hs_error_t e = hs_check_alloc(ret);
|
||||
if (e != HS_SUCCESS) {
|
||||
hs_misc_free(ret);
|
||||
return const_cast<hs_compile_error_t *>(&hs_badalloc);
|
||||
}
|
||||
char *msg = (char *)hs_misc_alloc(err.size() + 1);
|
||||
if (msg) {
|
||||
e = hs_check_alloc(msg);
|
||||
if (e != HS_SUCCESS) {
|
||||
hs_misc_free(msg);
|
||||
return const_cast<hs_compile_error_t *>(&hs_badalloc);
|
||||
}
|
||||
memcpy(msg, err.c_str(), err.size() + 1);
|
||||
ret->message = msg;
|
||||
} else {
|
||||
@ -83,7 +97,8 @@ void freeCompileError(hs_compile_error_t *error) {
|
||||
if (!error) {
|
||||
return;
|
||||
}
|
||||
if (error == &hs_enomem || error == &hs_einternal) {
|
||||
if (error == &hs_enomem || error == &hs_einternal ||
|
||||
error == &hs_badalloc) {
|
||||
// These are not allocated.
|
||||
return;
|
||||
}
|
||||
|
@ -458,33 +458,16 @@ hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
|
||||
}
|
||||
*info = NULL;
|
||||
|
||||
if (!bytes || length < sizeof(struct hs_database)) {
|
||||
return HS_INVALID;
|
||||
// Decode and check the header
|
||||
hs_database_t header;
|
||||
hs_error_t ret = db_decode_header(&bytes, length, &header);
|
||||
if (ret != HS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
const u32 *buf = (const u32 *)bytes;
|
||||
u32 mode = unaligned_load_u32(bytes + offsetof(struct RoseEngine, mode));
|
||||
|
||||
u32 magic = unaligned_load_u32(buf++);
|
||||
if (magic != HS_DB_MAGIC) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
u32 version = unaligned_load_u32(buf++);
|
||||
|
||||
buf++; /* length */
|
||||
|
||||
platform_t plat;
|
||||
plat = unaligned_load_u64a(buf);
|
||||
buf += 2;
|
||||
|
||||
buf++; /* crc */
|
||||
buf++; /* reserved 0 */
|
||||
buf++; /* reserved 1 */
|
||||
|
||||
const char *t_raw = (const char *)buf;
|
||||
u32 mode = unaligned_load_u32(t_raw + offsetof(struct RoseEngine, mode));
|
||||
|
||||
return print_database_string(info, version, plat, mode);
|
||||
return print_database_string(info, header.version, header.platform, mode);
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
|
102
src/fdr/fdr.c
102
src/fdr/fdr.c
@ -36,7 +36,6 @@
|
||||
#include "teddy.h"
|
||||
#include "teddy_internal.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
/** \brief number of bytes processed in each iteration */
|
||||
#define ITER_BYTES 16
|
||||
@ -132,7 +131,7 @@ m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft,
|
||||
u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1);
|
||||
tmp &= fdr->domainMask;
|
||||
s = *((const m128 *)ft + tmp);
|
||||
s = shiftRight8Bits(s);
|
||||
s = rshiftbyte_m128(s, 1);
|
||||
} else {
|
||||
s = fdr->start;
|
||||
}
|
||||
@ -186,20 +185,20 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
||||
m128 st14 = *(const m128 *)(ft + v14*8);
|
||||
m128 st15 = *(const m128 *)(ft + v15*8);
|
||||
|
||||
st1 = byteShiftLeft128(st1, 1);
|
||||
st2 = byteShiftLeft128(st2, 2);
|
||||
st3 = byteShiftLeft128(st3, 3);
|
||||
st4 = byteShiftLeft128(st4, 4);
|
||||
st5 = byteShiftLeft128(st5, 5);
|
||||
st6 = byteShiftLeft128(st6, 6);
|
||||
st7 = byteShiftLeft128(st7, 7);
|
||||
st9 = byteShiftLeft128(st9, 1);
|
||||
st10 = byteShiftLeft128(st10, 2);
|
||||
st11 = byteShiftLeft128(st11, 3);
|
||||
st12 = byteShiftLeft128(st12, 4);
|
||||
st13 = byteShiftLeft128(st13, 5);
|
||||
st14 = byteShiftLeft128(st14, 6);
|
||||
st15 = byteShiftLeft128(st15, 7);
|
||||
st1 = lshiftbyte_m128(st1, 1);
|
||||
st2 = lshiftbyte_m128(st2, 2);
|
||||
st3 = lshiftbyte_m128(st3, 3);
|
||||
st4 = lshiftbyte_m128(st4, 4);
|
||||
st5 = lshiftbyte_m128(st5, 5);
|
||||
st6 = lshiftbyte_m128(st6, 6);
|
||||
st7 = lshiftbyte_m128(st7, 7);
|
||||
st9 = lshiftbyte_m128(st9, 1);
|
||||
st10 = lshiftbyte_m128(st10, 2);
|
||||
st11 = lshiftbyte_m128(st11, 3);
|
||||
st12 = lshiftbyte_m128(st12, 4);
|
||||
st13 = lshiftbyte_m128(st13, 5);
|
||||
st14 = lshiftbyte_m128(st14, 6);
|
||||
st15 = lshiftbyte_m128(st15, 7);
|
||||
|
||||
*s = or128(*s, st0);
|
||||
*s = or128(*s, st1);
|
||||
@ -210,7 +209,7 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
||||
*s = or128(*s, st6);
|
||||
*s = or128(*s, st7);
|
||||
*conf0 = movq(*s);
|
||||
*s = byteShiftRight128(*s, 8);
|
||||
*s = rshiftbyte_m128(*s, 8);
|
||||
*conf0 ^= ~0ULL;
|
||||
|
||||
*s = or128(*s, st8);
|
||||
@ -222,7 +221,7 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
||||
*s = or128(*s, st14);
|
||||
*s = or128(*s, st15);
|
||||
*conf8 = movq(*s);
|
||||
*s = byteShiftRight128(*s, 8);
|
||||
*s = rshiftbyte_m128(*s, 8);
|
||||
*conf8 ^= ~0ULL;
|
||||
}
|
||||
|
||||
@ -253,19 +252,19 @@ void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
||||
m128 st12 = *(const m128 *)(ft + v12*8);
|
||||
m128 st14 = *(const m128 *)(ft + v14*8);
|
||||
|
||||
st2 = byteShiftLeft128(st2, 2);
|
||||
st4 = byteShiftLeft128(st4, 4);
|
||||
st6 = byteShiftLeft128(st6, 6);
|
||||
st10 = byteShiftLeft128(st10, 2);
|
||||
st12 = byteShiftLeft128(st12, 4);
|
||||
st14 = byteShiftLeft128(st14, 6);
|
||||
st2 = lshiftbyte_m128(st2, 2);
|
||||
st4 = lshiftbyte_m128(st4, 4);
|
||||
st6 = lshiftbyte_m128(st6, 6);
|
||||
st10 = lshiftbyte_m128(st10, 2);
|
||||
st12 = lshiftbyte_m128(st12, 4);
|
||||
st14 = lshiftbyte_m128(st14, 6);
|
||||
|
||||
*s = or128(*s, st0);
|
||||
*s = or128(*s, st2);
|
||||
*s = or128(*s, st4);
|
||||
*s = or128(*s, st6);
|
||||
*conf0 = movq(*s);
|
||||
*s = byteShiftRight128(*s, 8);
|
||||
*s = rshiftbyte_m128(*s, 8);
|
||||
*conf0 ^= ~0ULL;
|
||||
|
||||
*s = or128(*s, st8);
|
||||
@ -273,7 +272,7 @@ void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
||||
*s = or128(*s, st12);
|
||||
*s = or128(*s, st14);
|
||||
*conf8 = movq(*s);
|
||||
*s = byteShiftRight128(*s, 8);
|
||||
*s = rshiftbyte_m128(*s, 8);
|
||||
*conf8 ^= ~0ULL;
|
||||
}
|
||||
|
||||
@ -296,27 +295,26 @@ void get_conf_stride_4(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
|
||||
m128 st8 = *(const m128 *)(ft + v8*8);
|
||||
m128 st12 = *(const m128 *)(ft + v12*8);
|
||||
|
||||
st4 = byteShiftLeft128(st4, 4);
|
||||
st12 = byteShiftLeft128(st12, 4);
|
||||
st4 = lshiftbyte_m128(st4, 4);
|
||||
st12 = lshiftbyte_m128(st12, 4);
|
||||
|
||||
*s = or128(*s, st0);
|
||||
*s = or128(*s, st4);
|
||||
*conf0 = movq(*s);
|
||||
*s = byteShiftRight128(*s, 8);
|
||||
*s = rshiftbyte_m128(*s, 8);
|
||||
*conf0 ^= ~0ULL;
|
||||
|
||||
*s = or128(*s, st8);
|
||||
*s = or128(*s, st12);
|
||||
*conf8 = movq(*s);
|
||||
*s = byteShiftRight128(*s, 8);
|
||||
*s = rshiftbyte_m128(*s, 8);
|
||||
*conf8 ^= ~0ULL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *controlVal,
|
||||
void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
|
||||
const u32 *confBase, const struct FDR_Runtime_Args *a,
|
||||
const u8 *ptr, hwlmcb_rv_t *control, u32 *last_match_id,
|
||||
struct zone *z) {
|
||||
const u8 *ptr, u32 *last_match_id, struct zone *z) {
|
||||
const u8 bucket = 8;
|
||||
const u8 pullback = 1;
|
||||
|
||||
@ -352,13 +350,13 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *controlVal,
|
||||
continue;
|
||||
}
|
||||
*last_match_id = id;
|
||||
*controlVal = a->cb(ptr_main + byte - a->buf,
|
||||
ptr_main + byte - a->buf, id, a->ctxt);
|
||||
*control = a->cb(ptr_main + byte - a->buf, ptr_main + byte - a->buf,
|
||||
id, a->ctxt);
|
||||
continue;
|
||||
}
|
||||
u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a));
|
||||
confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback,
|
||||
control, last_match_id, confVal);
|
||||
confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, control,
|
||||
last_match_id, confVal);
|
||||
} while (unlikely(!!*conf));
|
||||
}
|
||||
|
||||
@ -681,9 +679,9 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
|
||||
itPtr += ITER_BYTES) { \
|
||||
if (unlikely(itPtr > tryFloodDetect)) { \
|
||||
tryFloodDetect = floodDetect(fdr, a, &itPtr, tryFloodDetect,\
|
||||
&floodBackoff, &controlVal, \
|
||||
&floodBackoff, &control, \
|
||||
ITER_BYTES); \
|
||||
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \
|
||||
if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \
|
||||
return HWLM_TERMINATED; \
|
||||
} \
|
||||
} \
|
||||
@ -692,11 +690,11 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
|
||||
u64a conf8; \
|
||||
get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_adjusted, \
|
||||
ft, &conf0, &conf8, &s); \
|
||||
do_confirm_fdr(&conf0, 0, &controlVal, confBase, a, itPtr, \
|
||||
control, &last_match_id, zz); \
|
||||
do_confirm_fdr(&conf8, 8, &controlVal, confBase, a, itPtr, \
|
||||
control, &last_match_id, zz); \
|
||||
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \
|
||||
do_confirm_fdr(&conf0, 0, &control, confBase, a, itPtr, \
|
||||
&last_match_id, zz); \
|
||||
do_confirm_fdr(&conf8, 8, &control, confBase, a, itPtr, \
|
||||
&last_match_id, zz); \
|
||||
if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \
|
||||
return HWLM_TERMINATED; \
|
||||
} \
|
||||
} /* end for loop */ \
|
||||
@ -704,9 +702,8 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
|
||||
|
||||
static never_inline
|
||||
hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
u32 last_match_id = INVALID_MATCH_ID;
|
||||
u64a domain_mask_adjusted = fdr->domainMask << 1;
|
||||
@ -771,7 +768,10 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
|
||||
#define ONLY_AVX2(func) NULL
|
||||
#endif
|
||||
|
||||
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a);
|
||||
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
static const FDRFUNCTYPE funcs[] = {
|
||||
fdr_engine_exec,
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fast),
|
||||
@ -814,7 +814,6 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
|
||||
start,
|
||||
cb,
|
||||
ctxt,
|
||||
&groups,
|
||||
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
||||
0
|
||||
};
|
||||
@ -822,7 +821,7 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
|
||||
return HWLM_SUCCESS;
|
||||
} else {
|
||||
assert(funcs[fdr->engineID]);
|
||||
return funcs[fdr->engineID](fdr, &a);
|
||||
return funcs[fdr->engineID](fdr, &a, groups);
|
||||
}
|
||||
}
|
||||
|
||||
@ -840,7 +839,6 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
||||
start,
|
||||
cb,
|
||||
ctxt,
|
||||
&groups,
|
||||
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
||||
/* we are guaranteed to always have 16 initialised bytes at the end of
|
||||
* the history buffer (they may be garbage). */
|
||||
@ -853,7 +851,7 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
||||
ret = HWLM_SUCCESS;
|
||||
} else {
|
||||
assert(funcs[fdr->engineID]);
|
||||
ret = funcs[fdr->engineID](fdr, &a);
|
||||
ret = funcs[fdr->engineID](fdr, &a, groups);
|
||||
}
|
||||
|
||||
fdrPackState(fdr, &a, stream_state);
|
||||
|
@ -81,7 +81,7 @@ private:
|
||||
void dumpMasks(const u8 *defaultMask);
|
||||
#endif
|
||||
void setupTab();
|
||||
aligned_unique_ptr<FDR> setupFDR(pair<u8 *, size_t> link);
|
||||
aligned_unique_ptr<FDR> setupFDR(pair<aligned_unique_ptr<u8>, size_t> &link);
|
||||
void createInitialState(FDR *fdr);
|
||||
|
||||
public:
|
||||
@ -90,7 +90,7 @@ public:
|
||||
: eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in),
|
||||
make_small(make_small_in) {}
|
||||
|
||||
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
|
||||
aligned_unique_ptr<FDR> build(pair<aligned_unique_ptr<u8>, size_t> &link);
|
||||
};
|
||||
|
||||
u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
|
||||
@ -124,10 +124,8 @@ void FDRCompiler::createInitialState(FDR *fdr) {
|
||||
// Find the minimum length for the literals in this bucket.
|
||||
const vector<LiteralIndex> &bucket_lits = bucketToLits[b];
|
||||
u32 min_len = ~0U;
|
||||
for (vector<LiteralIndex>::const_iterator it = bucket_lits.begin(),
|
||||
ite = bucket_lits.end();
|
||||
it != ite; ++it) {
|
||||
min_len = min(min_len, verify_u32(lits[*it].s.length()));
|
||||
for (const LiteralIndex &lit_idx : bucket_lits) {
|
||||
min_len = min(min_len, verify_u32(lits[lit_idx].s.length()));
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
|
||||
@ -141,13 +139,12 @@ void FDRCompiler::createInitialState(FDR *fdr) {
|
||||
}
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
|
||||
aligned_unique_ptr<FDR>
|
||||
FDRCompiler::setupFDR(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
||||
size_t tabSize = eng.getTabSizeBytes();
|
||||
|
||||
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
|
||||
|
||||
pair<u8 *, size_t> confirmTmp =
|
||||
setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
||||
auto floodControlTmp = setupFDRFloodControl(lits, eng);
|
||||
auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
||||
|
||||
assert(ISALIGNED_16(tabSize));
|
||||
assert(ISALIGNED_16(confirmTmp.second));
|
||||
@ -175,14 +172,12 @@ aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
|
||||
copy(tab.begin(), tab.end(), ptr);
|
||||
ptr += tabSize;
|
||||
|
||||
memcpy(ptr, confirmTmp.first, confirmTmp.second);
|
||||
memcpy(ptr, confirmTmp.first.get(), confirmTmp.second);
|
||||
ptr += confirmTmp.second;
|
||||
aligned_free(confirmTmp.first);
|
||||
|
||||
fdr->floodOffset = verify_u32(ptr - fdr_base);
|
||||
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
|
||||
memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second);
|
||||
ptr += floodControlTmp.second;
|
||||
aligned_free(floodControlTmp.first);
|
||||
|
||||
/* we are allowing domains 9 to 15 only */
|
||||
assert(eng.bits > 8 && eng.bits < 16);
|
||||
@ -193,8 +188,7 @@ aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
|
||||
|
||||
if (link.first) {
|
||||
fdr->link = verify_u32(ptr - fdr_base);
|
||||
memcpy(ptr, link.first, link.second);
|
||||
aligned_free(link.first);
|
||||
memcpy(ptr, link.first.get(), link.second);
|
||||
} else {
|
||||
fdr->link = 0;
|
||||
}
|
||||
@ -217,13 +211,11 @@ struct LitOrder {
|
||||
if (len1 != len2) {
|
||||
return len1 < len2;
|
||||
} else {
|
||||
string::const_reverse_iterator it1, it2;
|
||||
tie(it1, it2) =
|
||||
std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
|
||||
if (it1 == i1s.rend()) {
|
||||
auto p = std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
|
||||
if (p.first == i1s.rend()) {
|
||||
return false;
|
||||
}
|
||||
return *it1 < *it2;
|
||||
return *p.first < *p.second;
|
||||
}
|
||||
}
|
||||
|
||||
@ -266,9 +258,8 @@ void FDRCompiler::assignStringsToBuckets() {
|
||||
stable_sort(vli.begin(), vli.end(), LitOrder(lits));
|
||||
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
for (map<u32, u32>::iterator i = lenCounts.begin(), e = lenCounts.end();
|
||||
i != e; ++i) {
|
||||
printf("l<%d>:%d ", i->first, i->second);
|
||||
for (const auto &m : lenCounts) {
|
||||
printf("l<%u>:%u ", m.first, m.second);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
@ -324,12 +315,12 @@ void FDRCompiler::assignStringsToBuckets() {
|
||||
for (u32 k = j; k < nChunks; ++k) {
|
||||
cnt += count[k];
|
||||
}
|
||||
t[j][0] = make_pair(getScoreUtil(length[j], cnt), 0);
|
||||
t[j][0] = {getScoreUtil(length[j], cnt), 0};
|
||||
}
|
||||
|
||||
for (u32 i = 1; i < nb; i++) {
|
||||
for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row
|
||||
SCORE_INDEX_PAIR best = make_pair(MAX_SCORE, 0);
|
||||
SCORE_INDEX_PAIR best = {MAX_SCORE, 0};
|
||||
u32 cnt = count[j];
|
||||
for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) {
|
||||
SCORE score = getScoreUtil(length[j], cnt);
|
||||
@ -338,12 +329,12 @@ void FDRCompiler::assignStringsToBuckets() {
|
||||
}
|
||||
score += t[k][i-1].first;
|
||||
if (score < best.first) {
|
||||
best = make_pair(score, k);
|
||||
best = {score, k};
|
||||
}
|
||||
}
|
||||
t[j][i] = best;
|
||||
}
|
||||
t[nChunks - 1][i] = make_pair(0,0); // fill in empty final row for next iteration
|
||||
t[nChunks - 1][i] = {0,0}; // fill in empty final row for next iteration
|
||||
}
|
||||
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
@ -405,8 +396,7 @@ bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
|
||||
distance = 4;
|
||||
}
|
||||
|
||||
for (vector<LiteralIndex>::const_iterator i = vl.begin(), e = vl.end();
|
||||
i != e; ++i) {
|
||||
for (auto i = vl.begin(), e = vl.end(); i != e; ++i) {
|
||||
if (e - i > 5) {
|
||||
__builtin_prefetch(&lits[*(i + 5)]);
|
||||
}
|
||||
@ -460,31 +450,25 @@ void FDRCompiler::setupTab() {
|
||||
memcpy(tabIndexToMask(i), &defaultMask[0], mask_size);
|
||||
}
|
||||
|
||||
typedef std::map<u32, ue2::unordered_set<u32> > M2SET;
|
||||
|
||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||
const vector<LiteralIndex> &vl = bucketToLits[b];
|
||||
SuffixPositionInString pLimit = eng.getBucketWidth(b);
|
||||
for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
|
||||
u32 bit = eng.getSchemeBit(b, pos);
|
||||
M2SET m2;
|
||||
map<u32, ue2::unordered_set<u32>> m2;
|
||||
bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
|
||||
if (done) {
|
||||
clearbit(&defaultMask[0], bit);
|
||||
continue;
|
||||
}
|
||||
for (M2SET::const_iterator i = m2.begin(), e = m2.end(); i != e;
|
||||
++i) {
|
||||
u32 dc = i->first;
|
||||
const ue2::unordered_set<u32> &mskSet = i->second;
|
||||
for (const auto &elem : m2) {
|
||||
u32 dc = elem.first;
|
||||
const ue2::unordered_set<u32> &mskSet = elem.second;
|
||||
u32 v = ~dc;
|
||||
do {
|
||||
u32 b2 = v & dc;
|
||||
for (ue2::unordered_set<u32>::const_iterator
|
||||
i2 = mskSet.begin(),
|
||||
e2 = mskSet.end();
|
||||
i2 != e2; ++i2) {
|
||||
u32 val = (*i2 & ~dc) | b2;
|
||||
for (const u32 &mskVal : mskSet) {
|
||||
u32 val = (mskVal & ~dc) | b2;
|
||||
clearbit(tabIndexToMask(val), bit);
|
||||
}
|
||||
v = (v + (dc & -dc)) | ~dc;
|
||||
@ -502,7 +486,8 @@ void FDRCompiler::setupTab() {
|
||||
#endif
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> FDRCompiler::build(pair<u8 *, size_t> link) {
|
||||
aligned_unique_ptr<FDR>
|
||||
FDRCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
||||
assignStringsToBuckets();
|
||||
setupTab();
|
||||
return setupFDR(link);
|
||||
@ -515,16 +500,15 @@ aligned_unique_ptr<FDR>
|
||||
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
|
||||
const target_t &target, const Grey &grey, u32 hint,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
pair<u8 *, size_t> link(nullptr, 0);
|
||||
pair<aligned_unique_ptr<u8>, size_t> link(nullptr, 0);
|
||||
if (stream_control) {
|
||||
link = fdrBuildTableStreaming(lits, stream_control);
|
||||
link = fdrBuildTableStreaming(lits, *stream_control);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
|
||||
|
||||
if (grey.fdrAllowTeddy) {
|
||||
aligned_unique_ptr<FDR> fdr
|
||||
= teddyBuildTableHinted(lits, make_small, hint, target, link);
|
||||
auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, link);
|
||||
if (fdr) {
|
||||
DEBUG_PRINTF("build with teddy succeeded\n");
|
||||
return fdr;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -31,6 +31,7 @@
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm_literal.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
#include <map>
|
||||
#include <utility>
|
||||
@ -44,7 +45,6 @@ namespace ue2 {
|
||||
// a pile of decorative typedefs
|
||||
// good for documentation purposes more than anything else
|
||||
typedef u32 LiteralIndex;
|
||||
typedef u32 ConfirmIndex;
|
||||
typedef u32 SuffixPositionInString; // zero is last byte, counting back
|
||||
// into the string
|
||||
typedef u32 BucketIndex;
|
||||
@ -56,25 +56,22 @@ class EngineDescription;
|
||||
class FDREngineDescription;
|
||||
struct hwlmStreamingControl;
|
||||
|
||||
size_t getFDRConfirm(const std::vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
bool make_small);
|
||||
|
||||
std::pair<u8 *, size_t> setupFullMultiConfs(
|
||||
std::pair<aligned_unique_ptr<u8>, size_t> setupFullMultiConfs(
|
||||
const std::vector<hwlmLiteral> &lits, const EngineDescription &eng,
|
||||
std::map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits,
|
||||
std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
|
||||
bool make_small);
|
||||
|
||||
// all suffixes include an implicit max_bucket_width suffix to ensure that
|
||||
// we always read a full-scale flood "behind" us in terms of what's in our
|
||||
// state; if we don't have a flood that's long enough we won't be in the
|
||||
// right state yet to allow blindly advancing
|
||||
std::pair<u8 *, size_t>
|
||||
std::pair<aligned_unique_ptr<u8>, size_t>
|
||||
setupFDRFloodControl(const std::vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng);
|
||||
|
||||
std::pair<u8 *, size_t>
|
||||
std::pair<aligned_unique_ptr<u8>, size_t>
|
||||
fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
|
||||
hwlmStreamingControl *stream_control);
|
||||
hwlmStreamingControl &stream_control);
|
||||
|
||||
static constexpr u32 HINT_INVALID = 0xffffffff;
|
||||
|
||||
|
@ -45,9 +45,10 @@ using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
typedef u8 ConfSplitType;
|
||||
typedef pair<BucketIndex, ConfSplitType> BucketSplitPair;
|
||||
typedef map<BucketSplitPair, pair<FDRConfirm *, size_t> > BC2CONF;
|
||||
using ConfSplitType = u8;
|
||||
using BucketSplitPair = pair<BucketIndex, ConfSplitType>;
|
||||
using BC2CONF = map<BucketSplitPair,
|
||||
pair<aligned_unique_ptr<FDRConfirm>, size_t>>;
|
||||
|
||||
// return the number of bytes beyond a length threshold in all strings in lits
|
||||
static
|
||||
@ -149,9 +150,9 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
|
||||
|
||||
//#define FDR_CONFIRM_DUMP 1
|
||||
|
||||
static
|
||||
size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
bool applyOneCharOpt, bool make_small, bool make_confirm) {
|
||||
static pair<aligned_unique_ptr<FDRConfirm>, size_t>
|
||||
getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
|
||||
bool make_small, bool make_confirm) {
|
||||
vector<LitInfo> tmpLitInfo(lits.size());
|
||||
CONF_TYPE andmsk;
|
||||
fillLitInfo(lits, tmpLitInfo, andmsk);
|
||||
@ -220,55 +221,61 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
#ifdef FDR_CONFIRM_DUMP
|
||||
// print out the literals reversed - makes it easier to line up analyses
|
||||
// that are end-offset based
|
||||
for (map<u32, vector<LiteralIndex> >::iterator i = res2lits.begin(),
|
||||
e = res2lits.end(); i != e; ++i) {
|
||||
u32 hash = i->first;
|
||||
vector<LiteralIndex> & vlidx = i->second;
|
||||
if (vlidx.size() > 1) {
|
||||
printf("%x -> %zu literals\n", hash, vlidx.size());
|
||||
u32 min_len = lits[vlidx.front()].s.size();
|
||||
vector<set<u8> > vsl; // contains the set of chars at each location
|
||||
// reversed from the end
|
||||
vsl.resize(1024);
|
||||
u32 total_string_size = 0;
|
||||
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
|
||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
||||
LiteralIndex litIdx = *i2;
|
||||
total_string_size += lits[litIdx].s.size();
|
||||
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
|
||||
vsl[lits[litIdx].s.size()-j].insert(lits[litIdx].s.c_str()[j - 1]);
|
||||
}
|
||||
min_len = MIN(min_len, lits[litIdx].s.size());
|
||||
for (const auto &m : res2lits) {
|
||||
const u32 &hash = m.first;
|
||||
const vector<LiteralIndex> &vlidx = m.second;
|
||||
if (vlidx.size() <= 1) {
|
||||
continue;
|
||||
}
|
||||
printf("%x -> %zu literals\n", hash, vlidx.size());
|
||||
size_t min_len = lits[vlidx.front()].s.size();
|
||||
|
||||
vector<set<u8>> vsl; // contains the set of chars at each location
|
||||
// reversed from the end
|
||||
|
||||
for (const auto &litIdx : vlidx) {
|
||||
const auto &lit = lits[litIdx];
|
||||
if (lit.s.size() > vsl.size()) {
|
||||
vsl.resize(lit.s.size());
|
||||
}
|
||||
printf("common ");
|
||||
for (u32 j = 0; j < min_len; j++) {
|
||||
if (vsl[j].size() == 1) {
|
||||
printf("%02x", (u32)*vsl[j].begin());
|
||||
} else {
|
||||
for (size_t j = lit.s.size(); j != 0; j--) {
|
||||
vsl[lit.s.size() - j].insert(lit.s[j - 1]);
|
||||
}
|
||||
min_len = min(min_len, lit.s.size());
|
||||
}
|
||||
printf("common ");
|
||||
for (size_t j = 0; j < min_len; j++) {
|
||||
if (vsl[j].size() == 1) {
|
||||
printf("%02x", *vsl[j].begin());
|
||||
} else {
|
||||
printf("__");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
for (const auto &litIdx : vlidx) {
|
||||
const auto &lit = lits[litIdx];
|
||||
printf("%8x %c", lit.id, lit.nocase ? '!' : ' ');
|
||||
for (size_t j = lit.s.size(); j != 0; j--) {
|
||||
size_t dist_from_end = lit.s.size() - j;
|
||||
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
|
||||
printf("__");
|
||||
} else {
|
||||
printf("%02x", lit.s[j - 1]);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
|
||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
||||
LiteralIndex litIdx = *i2;
|
||||
printf("%8x %c", lits[litIdx].id, lits[litIdx].nocase ? '!' : ' ');
|
||||
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
|
||||
u32 dist_from_end = lits[litIdx].s.size() - j;
|
||||
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
|
||||
printf("__");
|
||||
} else {
|
||||
printf("%02x", (u32)lits[litIdx].s.c_str()[j-1]);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
u32 total_compares = 0;
|
||||
for (u32 j = 0; j < 1024; j++) { // naughty
|
||||
total_compares += vsl[j].size();
|
||||
}
|
||||
printf("Total compare load: %d Total string size: %d\n\n", total_compares, total_string_size);
|
||||
}
|
||||
size_t total_compares = 0;
|
||||
for (const auto &v : vsl) {
|
||||
total_compares += v.size();
|
||||
}
|
||||
size_t total_string_size = 0;
|
||||
for (const auto &litIdx : vlidx) {
|
||||
const auto &lit = lits[litIdx];
|
||||
total_string_size += lit.s.size();
|
||||
}
|
||||
printf("Total compare load: %zu Total string size: %zu\n\n",
|
||||
total_compares, total_string_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -281,7 +288,7 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
sizeof(LitInfo) * lits.size() + totalLitSize;
|
||||
size = ROUNDUP_N(size, alignof(FDRConfirm));
|
||||
|
||||
FDRConfirm *fdrc = (FDRConfirm *)aligned_zmalloc(size);
|
||||
auto fdrc = aligned_zmalloc_unique<FDRConfirm>(size);
|
||||
assert(fdrc); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
fdrc->andmsk = andmsk;
|
||||
@ -295,7 +302,7 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
fdrc->groups = gm;
|
||||
|
||||
// After the FDRConfirm, we have the lit index array.
|
||||
u8 *fdrc_base = (u8 *)fdrc;
|
||||
u8 *fdrc_base = (u8 *)fdrc.get();
|
||||
u8 *ptr = fdrc_base + sizeof(*fdrc);
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(u32));
|
||||
u32 *bitsToLitIndex = (u32 *)ptr;
|
||||
@ -307,14 +314,12 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
|
||||
// Walk the map by hash value assigning indexes and laying out the
|
||||
// elements (and their associated string confirm material) in memory.
|
||||
for (std::map<u32, vector<LiteralIndex> >::const_iterator
|
||||
i = res2lits.begin(), e = res2lits.end(); i != e; ++i) {
|
||||
const u32 hash = i->first;
|
||||
const vector<LiteralIndex> &vlidx = i->second;
|
||||
bitsToLitIndex[hash] = verify_u32(ptr - (u8 *)fdrc);
|
||||
for (vector<LiteralIndex>::const_iterator i2 = vlidx.begin(),
|
||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
||||
LiteralIndex litIdx = *i2;
|
||||
for (const auto &m : res2lits) {
|
||||
const u32 hash = m.first;
|
||||
const vector<LiteralIndex> &vlidx = m.second;
|
||||
bitsToLitIndex[hash] = verify_u32(ptr - fdrc_base);
|
||||
for (auto i = vlidx.begin(), e = vlidx.end(); i != e; ++i) {
|
||||
LiteralIndex litIdx = *i;
|
||||
|
||||
// Write LitInfo header.
|
||||
u8 *oldPtr = ptr;
|
||||
@ -333,7 +338,7 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
}
|
||||
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
|
||||
if (i2 + 1 == e2) {
|
||||
if (next(i) == e) {
|
||||
finalLI.next = 0x0;
|
||||
} else {
|
||||
// our next field represents an adjustment on top of
|
||||
@ -348,14 +353,13 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
assert((size_t)(ptr - fdrc_base) <= size);
|
||||
}
|
||||
|
||||
*fdrc_p = fdrc;
|
||||
|
||||
// Return actual used size, not worst-case size. Must be rounded up to
|
||||
// FDRConfirm alignment so that the caller can lay out a sequence of these.
|
||||
size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
|
||||
alignof(FDRConfirm));
|
||||
assert(actual_size <= size);
|
||||
return actual_size;
|
||||
|
||||
return {move(fdrc), actual_size};
|
||||
}
|
||||
|
||||
static
|
||||
@ -377,12 +381,9 @@ u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
|
||||
u32 totalConfirmSize = 0;
|
||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||
if (!bucketToLits[b].empty()) {
|
||||
vector<vector<hwlmLiteral> > vl(eng.getConfirmTopLevelSplit());
|
||||
for (vector<LiteralIndex>::const_iterator
|
||||
i = bucketToLits[b].begin(),
|
||||
e = bucketToLits[b].end();
|
||||
i != e; ++i) {
|
||||
hwlmLiteral lit = lits[*i]; // copy
|
||||
vector<vector<hwlmLiteral>> vl(eng.getConfirmTopLevelSplit());
|
||||
for (const LiteralIndex &lit_idx : bucketToLits[b]) {
|
||||
hwlmLiteral lit = lits[lit_idx]; // copy
|
||||
// c is last char of this literal
|
||||
u8 c = *(lit.s.rbegin());
|
||||
|
||||
@ -424,26 +425,27 @@ u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
|
||||
}
|
||||
|
||||
for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) {
|
||||
if (!vl[c].empty()) {
|
||||
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
|
||||
FDRConfirm *fdrc;
|
||||
size_t size = getFDRConfirm(vl[c], &fdrc,
|
||||
eng.typicallyHoldsOneCharLits(),
|
||||
make_small, makeConfirm);
|
||||
BucketSplitPair p = make_pair(b, c);
|
||||
bc2Conf[p] = make_pair(fdrc, size);
|
||||
totalConfirmSize += size;
|
||||
if (vl[c].empty()) {
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
|
||||
auto key = make_pair(b, c);
|
||||
auto fc = getFDRConfirm(vl[c], eng.typicallyHoldsOneCharLits(),
|
||||
make_small, makeConfirm);
|
||||
totalConfirmSize += fc.second;
|
||||
assert(bc2Conf.find(key) == end(bc2Conf));
|
||||
bc2Conf.emplace(key, move(fc));
|
||||
}
|
||||
}
|
||||
}
|
||||
return totalConfirmSize;
|
||||
}
|
||||
|
||||
pair<u8 *, size_t> setupFullMultiConfs(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng,
|
||||
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
|
||||
bool make_small) {
|
||||
pair<aligned_unique_ptr<u8>, size_t>
|
||||
setupFullMultiConfs(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng,
|
||||
map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
|
||||
bool make_small) {
|
||||
BC2CONF bc2Conf;
|
||||
u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits,
|
||||
make_small);
|
||||
@ -453,26 +455,24 @@ pair<u8 *, size_t> setupFullMultiConfs(const vector<hwlmLiteral> &lits,
|
||||
u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32);
|
||||
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
|
||||
|
||||
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
|
||||
auto buf = aligned_zmalloc_unique<u8>(totalSize);
|
||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
u32 *confBase = (u32 *)buf;
|
||||
u8 *ptr = buf + totalConfSwitchSize;
|
||||
u32 *confBase = (u32 *)buf.get();
|
||||
u8 *ptr = buf.get() + totalConfSwitchSize;
|
||||
|
||||
for (BC2CONF::const_iterator i = bc2Conf.begin(), e = bc2Conf.end(); i != e;
|
||||
++i) {
|
||||
const pair<FDRConfirm *, size_t> &p = i->second;
|
||||
for (const auto &m : bc2Conf) {
|
||||
const BucketIndex &b = m.first.first;
|
||||
const u8 &c = m.first.second;
|
||||
const pair<aligned_unique_ptr<FDRConfirm>, size_t> &p = m.second;
|
||||
// confirm offset is relative to the base of this structure, now
|
||||
u32 confirm_offset = verify_u32(ptr - (u8 *)buf);
|
||||
memcpy(ptr, p.first, p.second);
|
||||
u32 confirm_offset = verify_u32(ptr - buf.get());
|
||||
memcpy(ptr, p.first.get(), p.second);
|
||||
ptr += p.second;
|
||||
aligned_free(p.first);
|
||||
BucketIndex b = i->first.first;
|
||||
u8 c = i->first.second;
|
||||
u32 idx = c * nBuckets + b;
|
||||
confBase[idx] = confirm_offset;
|
||||
}
|
||||
return make_pair(buf, totalSize);
|
||||
return {move(buf), totalSize};
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -105,7 +105,6 @@ struct FDR_Runtime_Args {
|
||||
size_t start_offset;
|
||||
HWLMCallback cb;
|
||||
void *ctxt;
|
||||
hwlm_group_t *groups;
|
||||
const u8 *firstFloodDetect;
|
||||
const u64a histBytes;
|
||||
};
|
||||
|
@ -94,14 +94,13 @@ static
|
||||
bool setupLongLits(const vector<hwlmLiteral> &lits,
|
||||
vector<hwlmLiteral> &long_lits, size_t max_len) {
|
||||
long_lits.reserve(lits.size());
|
||||
for (vector<hwlmLiteral>::const_iterator it = lits.begin();
|
||||
it != lits.end(); ++it) {
|
||||
if (it->s.length() > max_len) {
|
||||
hwlmLiteral tmp = *it; // copy
|
||||
tmp.s.erase(tmp.s.size() - 1, 1); // erase last char
|
||||
for (const auto &lit : lits) {
|
||||
if (lit.s.length() > max_len) {
|
||||
hwlmLiteral tmp = lit; // copy
|
||||
tmp.s.pop_back();
|
||||
tmp.id = 0; // recalc later
|
||||
tmp.groups = 0; // filled in later by hash bucket(s)
|
||||
long_lits.push_back(tmp);
|
||||
long_lits.push_back(move(tmp));
|
||||
}
|
||||
}
|
||||
|
||||
@ -112,15 +111,12 @@ bool setupLongLits(const vector<hwlmLiteral> &lits,
|
||||
// sort long_literals by caseful/caseless and in lexicographical order,
|
||||
// remove duplicates
|
||||
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
|
||||
vector<hwlmLiteral>::iterator new_end =
|
||||
unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
|
||||
auto new_end = unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
|
||||
long_lits.erase(new_end, long_lits.end());
|
||||
|
||||
// fill in ids; not currently used
|
||||
for (vector<hwlmLiteral>::iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
i->id = i - long_lits.begin();
|
||||
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
|
||||
i->id = distance(long_lits.begin(), i);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -143,23 +139,19 @@ void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||
hashedPositions[m] = 0;
|
||||
}
|
||||
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
|
||||
if (i->nocase) {
|
||||
boundaries[CASEFUL] = verify_u32(i - long_lits.begin());
|
||||
boundaries[CASEFUL] = verify_u32(distance(long_lits.begin(), i));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
MODES m = i->nocase ? CASELESS : CASEFUL;
|
||||
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
|
||||
for (const auto &lit : long_lits) {
|
||||
Modes m = lit.nocase ? CASELESS : CASEFUL;
|
||||
for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
|
||||
hashedPositions[m]++;
|
||||
}
|
||||
positions[m] += i->s.size();
|
||||
positions[m] += lit.s.size();
|
||||
}
|
||||
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; m++) {
|
||||
@ -170,7 +162,7 @@ void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||
|
||||
#ifdef DEBUG_COMPILE
|
||||
printf("analyzeLits:\n");
|
||||
for (MODES m = CASEFUL; m < MAX_MODES; m++) {
|
||||
for (Modes m = CASEFUL; m < MAX_MODES; m++) {
|
||||
printf("mode %s boundary %d positions %d hashedPositions %d "
|
||||
"hashEntries %d\n",
|
||||
(m == CASEFUL) ? "caseful" : "caseless", boundaries[m],
|
||||
@ -181,7 +173,7 @@ void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||
}
|
||||
|
||||
static
|
||||
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, MODES m) {
|
||||
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, Modes m) {
|
||||
return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m);
|
||||
}
|
||||
|
||||
@ -203,24 +195,21 @@ struct OffsetIDFromEndOrder {
|
||||
|
||||
static
|
||||
void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||
FDRSHashEntry *tab, size_t numEntries, MODES m,
|
||||
FDRSHashEntry *tab, size_t numEntries, Modes mode,
|
||||
map<u32, u32> &litToOffsetVal) {
|
||||
const u32 nbits = lg2(numEntries);
|
||||
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
|
||||
map<u32, u64a> bucketToBitfield;
|
||||
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
const hwlmLiteral &l = *i;
|
||||
if ((m == CASELESS) != i->nocase) {
|
||||
for (const auto &lit : long_lits) {
|
||||
if ((mode == CASELESS) != lit.nocase) {
|
||||
continue;
|
||||
}
|
||||
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
|
||||
u32 h = hashLit(l, j, max_len, m);
|
||||
for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
|
||||
u32 h = hashLit(lit, j, max_len, mode);
|
||||
u32 h_ent = h & ((1U << nbits) - 1);
|
||||
u32 h_low = (h >> nbits) & 63;
|
||||
bucketToLitOffPairs[h_ent].push_back(make_pair(i->id, j));
|
||||
bucketToLitOffPairs[h_ent].emplace_back(lit.id, j);
|
||||
bucketToBitfield[h_ent] |= (1ULL << h_low);
|
||||
}
|
||||
}
|
||||
@ -231,11 +220,9 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||
|
||||
// sweep out bitfield entries and save the results swapped accordingly
|
||||
// also, anything with bitfield entries is put in filledBuckets
|
||||
for (map<u32, u64a>::const_iterator i = bucketToBitfield.begin(),
|
||||
e = bucketToBitfield.end();
|
||||
i != e; ++i) {
|
||||
u32 bucket = i->first;
|
||||
u64a contents = i->second;
|
||||
for (const auto &m : bucketToBitfield) {
|
||||
const u32 &bucket = m.first;
|
||||
const u64a &contents = m.second;
|
||||
tab[bucket].bitfield = contents;
|
||||
filledBuckets.set(bucket);
|
||||
}
|
||||
@ -243,12 +230,9 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||
// store out all our chains based on free values in our hash table.
|
||||
// find nearest free locations that are empty (there will always be more
|
||||
// entries than strings, at present)
|
||||
for (map<u32, deque<pair<u32, u32> > >::iterator
|
||||
i = bucketToLitOffPairs.begin(),
|
||||
e = bucketToLitOffPairs.end();
|
||||
i != e; ++i) {
|
||||
u32 bucket = i->first;
|
||||
deque<pair<u32, u32> > &d = i->second;
|
||||
for (auto &m : bucketToLitOffPairs) {
|
||||
u32 bucket = m.first;
|
||||
deque<pair<u32, u32>> &d = m.second;
|
||||
|
||||
// sort d by distance of the residual string (len minus our depth into
|
||||
// the string). We need to put the 'furthest back' string first...
|
||||
@ -299,31 +283,30 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||
static
|
||||
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
|
||||
size_t rv = 0;
|
||||
vector<hwlmLiteral>::const_iterator it, ite;
|
||||
for (it = lits.begin(), ite = lits.end(); it != ite; ++it) {
|
||||
rv = max(rv, it->msk.size());
|
||||
for (const auto &lit : lits) {
|
||||
rv = max(rv, lit.msk.size());
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
pair<u8 *, size_t>
|
||||
pair<aligned_unique_ptr<u8>, size_t>
|
||||
fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
hwlmStreamingControl &stream_control) {
|
||||
// refuse to compile if we are forced to have smaller than minimum
|
||||
// history required for long-literal support, full stop
|
||||
// otherwise, choose the maximum of the preferred history quantity
|
||||
// (currently a fairly extravagant 32) or the already used history
|
||||
// quantity - subject to the limitation of stream_control->history_max
|
||||
// quantity - subject to the limitation of stream_control.history_max
|
||||
|
||||
const size_t MIN_HISTORY_REQUIRED = 32;
|
||||
|
||||
if (MIN_HISTORY_REQUIRED > stream_control->history_max) {
|
||||
if (MIN_HISTORY_REQUIRED > stream_control.history_max) {
|
||||
throw std::logic_error("Cannot set history to minimum history required");
|
||||
}
|
||||
|
||||
size_t max_len =
|
||||
MIN(stream_control->history_max,
|
||||
MAX(MIN_HISTORY_REQUIRED, stream_control->history_min));
|
||||
MIN(stream_control.history_max,
|
||||
MAX(MIN_HISTORY_REQUIRED, stream_control.history_min));
|
||||
assert(max_len >= MIN_HISTORY_REQUIRED);
|
||||
size_t max_mask_len = maxMaskLen(lits);
|
||||
|
||||
@ -334,10 +317,10 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
||||
|
||||
// we want enough history to manage the longest literal and the longest
|
||||
// mask.
|
||||
stream_control->literal_history_required =
|
||||
stream_control.literal_history_required =
|
||||
max(maxLen(lits), max_mask_len) - 1;
|
||||
stream_control->literal_stream_state_required = 0;
|
||||
return make_pair(nullptr, size_t{0});
|
||||
stream_control.literal_stream_state_required = 0;
|
||||
return {nullptr, size_t{0}};
|
||||
}
|
||||
|
||||
// Ensure that we have enough room for the longest mask.
|
||||
@ -381,11 +364,11 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
||||
streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2));
|
||||
u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8;
|
||||
|
||||
u8 * secondaryTable = (u8 *)aligned_zmalloc(tabSize);
|
||||
auto secondaryTable = aligned_zmalloc_unique<u8>(tabSize);
|
||||
assert(secondaryTable); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
// then fill it in
|
||||
u8 * ptr = secondaryTable;
|
||||
u8 * ptr = secondaryTable.get();
|
||||
FDRSTableHeader * header = (FDRSTableHeader *)ptr;
|
||||
// fill in header
|
||||
header->pseudoEngineID = (u32)0xffffffff;
|
||||
@ -407,11 +390,9 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
||||
ptr += litTabSize;
|
||||
|
||||
map<u32, u32> litToOffsetVal;
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
|
||||
u32 entry = verify_u32(i - long_lits.begin());
|
||||
u32 offset = verify_u32(ptr - secondaryTable);
|
||||
u32 offset = verify_u32(ptr - secondaryTable.get());
|
||||
|
||||
// point the table entry to the string location
|
||||
litTabPtr[entry].offset = offset;
|
||||
@ -425,20 +406,20 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
||||
}
|
||||
|
||||
// fill in final lit table entry with current ptr (serves as end value)
|
||||
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable);
|
||||
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable.get());
|
||||
|
||||
// fill hash tables
|
||||
ptr = secondaryTable + htOffset[CASEFUL];
|
||||
ptr = secondaryTable.get() + htOffset[CASEFUL];
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
|
||||
fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m],
|
||||
(MODES)m, litToOffsetVal);
|
||||
(Modes)m, litToOffsetVal);
|
||||
ptr += htSize[m];
|
||||
}
|
||||
|
||||
// tell the world what we did
|
||||
stream_control->literal_history_required = max_len;
|
||||
stream_control->literal_stream_state_required = tot_state_bytes;
|
||||
return make_pair(secondaryTable, tabSize);
|
||||
stream_control.literal_history_required = max_len;
|
||||
stream_control.literal_stream_state_required = tot_state_bytes;
|
||||
return {move(secondaryTable), tabSize};
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -41,11 +41,11 @@
|
||||
// hash table (caseful) (FDRSHashEntry)
|
||||
// hash table (caseless) (FDRSHashEntry)
|
||||
|
||||
typedef enum {
|
||||
enum Modes {
|
||||
CASEFUL = 0,
|
||||
CASELESS = 1,
|
||||
MAX_MODES = 2
|
||||
} MODES;
|
||||
};
|
||||
|
||||
// We have one of these structures hanging off the 'link' of our secondary
|
||||
// FDR table that handles streaming strings
|
||||
@ -91,12 +91,12 @@ struct FDRSHashEntry {
|
||||
};
|
||||
|
||||
static really_inline
|
||||
u32 get_start_lit_idx(const struct FDRSTableHeader * h, MODES m) {
|
||||
u32 get_start_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
|
||||
return m == CASEFUL ? 0 : h->boundary[m-1];
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 get_end_lit_idx(const struct FDRSTableHeader * h, MODES m) {
|
||||
u32 get_end_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
|
||||
return h->boundary[m];
|
||||
}
|
||||
|
||||
@ -107,17 +107,17 @@ const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) {
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, MODES m) {
|
||||
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, enum Modes m) {
|
||||
return getLitTab(h)[get_start_lit_idx(h, m)].offset;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 packStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
|
||||
u32 packStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
|
||||
return v - getBaseOffsetOfLits(h, m) + 1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 unpackStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
|
||||
u32 unpackStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
|
||||
return v + getBaseOffsetOfLits(h, m) - 1;
|
||||
}
|
||||
|
||||
@ -127,7 +127,7 @@ u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) {
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, MODES mode) {
|
||||
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, enum Modes mode) {
|
||||
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
|
||||
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
|
||||
assert(len >= 32);
|
||||
|
@ -143,7 +143,7 @@ u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) {
|
||||
// binary search for the literal index that contains the current state
|
||||
static really_inline
|
||||
u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable,
|
||||
u32 stateValue, MODES m) {
|
||||
u32 stateValue, enum Modes m) {
|
||||
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
||||
u32 lo = get_start_lit_idx(streamingTable, m);
|
||||
u32 hi = get_end_lit_idx(streamingTable, m);
|
||||
@ -175,7 +175,7 @@ void fdrUnpackStateMode(struct FDR_Runtime_Args *a,
|
||||
const struct FDRSTableHeader *streamingTable,
|
||||
const struct FDRSLiteral * litTab,
|
||||
const u32 *state_table,
|
||||
const MODES m) {
|
||||
const enum Modes m) {
|
||||
if (!state_table[m]) {
|
||||
return;
|
||||
}
|
||||
@ -213,8 +213,9 @@ void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 do_single_confirm(const struct FDRSTableHeader * streamingTable,
|
||||
const struct FDR_Runtime_Args * a, u32 hashState, MODES m) {
|
||||
u32 do_single_confirm(const struct FDRSTableHeader *streamingTable,
|
||||
const struct FDR_Runtime_Args *a, u32 hashState,
|
||||
enum Modes m) {
|
||||
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
||||
u32 idx = findLitTabEntry(streamingTable, hashState, m);
|
||||
size_t found_offset = litTab[idx].offset;
|
||||
@ -279,7 +280,7 @@ void fdrFindStreamingHash(const struct FDR_Runtime_Args *a,
|
||||
|
||||
static really_inline
|
||||
const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
|
||||
u32 h, const MODES m) {
|
||||
u32 h, const enum Modes m) {
|
||||
u32 nbits = streamingTable->hashNBits[m];
|
||||
if (!nbits) {
|
||||
return NULL;
|
||||
@ -303,7 +304,7 @@ const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
|
||||
static really_inline
|
||||
void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a,
|
||||
const struct FDRSTableHeader *streamingTable,
|
||||
const struct FDRSHashEntry *ent, const MODES m) {
|
||||
const struct FDRSHashEntry *ent, const enum Modes m) {
|
||||
assert(ent);
|
||||
assert(streamingTable->hashNBits[m]);
|
||||
|
||||
|
@ -69,7 +69,7 @@ static
|
||||
void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) {
|
||||
FDRFlood &fl = tmpFlood[c];
|
||||
fl.suffix = MAX(fl.suffix, suffix + 1);
|
||||
DEBUG_PRINTF("Updated Flood Suffix for char '%c' to %u\n", c, fl.suffix);
|
||||
DEBUG_PRINTF("Updated Flood Suffix for char 0x%02x to %u\n", c, fl.suffix);
|
||||
}
|
||||
|
||||
static
|
||||
@ -90,8 +90,9 @@ void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
|
||||
}
|
||||
}
|
||||
|
||||
pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng) {
|
||||
pair<aligned_unique_ptr<u8>, size_t>
|
||||
setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng) {
|
||||
vector<FDRFlood> tmpFlood(N_CHARS);
|
||||
u32 default_suffix = eng.getDefaultFloodSuffixLength();
|
||||
|
||||
@ -124,8 +125,9 @@ pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
||||
for (u32 i = 0; i < iEnd; i++) {
|
||||
if (i < litSize) {
|
||||
if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) {
|
||||
DEBUG_PRINTF("non-flood char in literal[%u] %c != %c\n",
|
||||
i, c, lit.s[litSize - i - 1]);
|
||||
DEBUG_PRINTF("non-flood char in literal[%u]: "
|
||||
"0x%02x != 0x%02x\n",
|
||||
i, c, lit.s[litSize - i - 1]);
|
||||
upSuffix = MIN(upSuffix, i);
|
||||
loSuffix = MIN(loSuffix, i); // makes sense only for case-less
|
||||
break;
|
||||
@ -195,11 +197,12 @@ pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
||||
size_t floodHeaderSize = sizeof(u32) * N_CHARS;
|
||||
size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
|
||||
size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
|
||||
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
|
||||
|
||||
auto buf = aligned_zmalloc_unique<u8>(totalSize);
|
||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
u32 *floodHeader = (u32 *)buf;
|
||||
FDRFlood *layoutFlood = (FDRFlood * )(buf + floodHeaderSize);
|
||||
u32 *floodHeader = (u32 *)buf.get();
|
||||
FDRFlood *layoutFlood = (FDRFlood *)(buf.get() + floodHeaderSize);
|
||||
|
||||
u32 currentFloodIndex = 0;
|
||||
for (const auto &m : flood2chars) {
|
||||
@ -215,7 +218,7 @@ pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
||||
DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
|
||||
floodHeaderSize, floodStructSize, totalSize);
|
||||
|
||||
return make_pair((u8 *)buf, totalSize);
|
||||
return {move(buf), totalSize};
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
209
src/fdr/teddy.c
209
src/fdr/teddy.c
@ -36,7 +36,6 @@
|
||||
#include "teddy_internal.h"
|
||||
#include "teddy_runtime_common.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
@ -80,15 +79,15 @@ const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
||||
do { \
|
||||
if (unlikely(isnonzero128(var))) { \
|
||||
u64a lo = movq(var); \
|
||||
u64a hi = movq(byteShiftRight128(var, 8)); \
|
||||
u64a hi = movq(rshiftbyte_m128(var, 8)); \
|
||||
if (unlikely(lo)) { \
|
||||
conf_fn(&lo, bucket, offset, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(hi)) { \
|
||||
conf_fn(&hi, bucket, offset + 8, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
} \
|
||||
@ -98,27 +97,27 @@ do { \
|
||||
do { \
|
||||
if (unlikely(isnonzero128(var))) { \
|
||||
u32 part1 = movd(var); \
|
||||
u32 part2 = movd(byteShiftRight128(var, 4)); \
|
||||
u32 part3 = movd(byteShiftRight128(var, 8)); \
|
||||
u32 part4 = movd(byteShiftRight128(var, 12)); \
|
||||
u32 part2 = movd(rshiftbyte_m128(var, 4)); \
|
||||
u32 part3 = movd(rshiftbyte_m128(var, 8)); \
|
||||
u32 part4 = movd(rshiftbyte_m128(var, 12)); \
|
||||
if (unlikely(part1)) { \
|
||||
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part2)) { \
|
||||
conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part3)) { \
|
||||
conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part4)) { \
|
||||
conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
} \
|
||||
@ -126,36 +125,34 @@ do { \
|
||||
#endif
|
||||
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m1(const m128 *maskBase, m128 p_mask, m128 val) {
|
||||
m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
||||
return and128(and128(pshufb(maskBase[0*2], lo),
|
||||
pshufb(maskBase[0*2+1], hi)), p_mask);
|
||||
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||
return and128(pshufb(maskBase[0*2], lo), pshufb(maskBase[0*2+1], hi));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 p_mask,
|
||||
m128 val) {
|
||||
m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m1(maskBase, p_mask, val);
|
||||
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m1(maskBase, val);
|
||||
|
||||
m128 res_1 = and128(pshufb(maskBase[1*2], lo),
|
||||
pshufb(maskBase[1*2+1], hi));
|
||||
m128 res_shifted_1 = palignr(res_1, *old_1, 16-1);
|
||||
*old_1 = res_1;
|
||||
return and128(and128(r, p_mask), res_shifted_1);
|
||||
return and128(r, res_shifted_1);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
||||
m128 p_mask, m128 val) {
|
||||
m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m2(maskBase, old_1, p_mask, val);
|
||||
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m2(maskBase, old_1, val);
|
||||
|
||||
m128 res_2 = and128(pshufb(maskBase[2*2], lo),
|
||||
pshufb(maskBase[2*2+1], hi));
|
||||
@ -166,11 +163,11 @@ m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
||||
|
||||
static really_inline
|
||||
m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
||||
m128 *old_3, m128 p_mask, m128 val) {
|
||||
m128 *old_3, m128 val) {
|
||||
m128 mask = set16x8(0xf);
|
||||
m128 lo = and128(val, mask);
|
||||
m128 hi = and128(rshift2x64(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, p_mask, val);
|
||||
m128 hi = and128(rshift64_m128(val, 4), mask);
|
||||
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val);
|
||||
|
||||
m128 res_3 = and128(pshufb(maskBase[3*2], lo),
|
||||
pshufb(maskBase[3*2+1], hi));
|
||||
@ -180,11 +177,10 @@ m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -203,13 +199,14 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -217,9 +214,9 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16));
|
||||
m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||
}
|
||||
|
||||
@ -227,19 +224,19 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -258,13 +255,14 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -272,9 +270,9 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16));
|
||||
m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@ -282,19 +280,19 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -314,14 +312,14 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -329,11 +327,9 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr + 16));
|
||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
@ -341,19 +337,19 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -373,14 +369,14 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -388,11 +384,9 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
||||
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr));
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
|
||||
load128(ptr + 16));
|
||||
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@ -400,19 +394,19 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -434,14 +428,15 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -450,10 +445,10 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr + 16));
|
||||
load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
@ -461,20 +456,19 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -496,14 +490,15 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -512,10 +507,10 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr));
|
||||
load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones128(), load128(ptr + 16));
|
||||
load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@ -523,20 +518,19 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
||||
m128 p_mask;
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -559,14 +553,15 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
&res_old_3, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
&res_old_3, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -575,10 +570,10 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
&res_old_3, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr + 16));
|
||||
&res_old_3, load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
@ -587,19 +582,19 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
&res_old_3, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -622,14 +617,15 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
&res_old_3, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
&res_old_3, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -638,10 +634,10 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr));
|
||||
&res_old_3, load128(ptr));
|
||||
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones128(), load128(ptr + 16));
|
||||
&res_old_3, load128(ptr + 16));
|
||||
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@ -650,9 +646,10 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
&res_old_3, val_0);
|
||||
r_0 = and128(r_0, p_mask);
|
||||
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
@ -33,64 +33,85 @@
|
||||
#ifndef TEDDY_H_
|
||||
#define TEDDY_H_
|
||||
|
||||
#include "hwlm/hwlm.h" // for hwlm_group_t
|
||||
|
||||
struct FDR; // forward declaration from fdr_internal.h
|
||||
struct FDR_Runtime_Args;
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a);
|
||||
hwlm_error_t
|
||||
fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
#endif /* __AVX2__ */
|
||||
|
||||
|
@ -36,7 +36,6 @@
|
||||
#include "teddy_internal.h"
|
||||
#include "teddy_runtime_common.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
@ -122,22 +121,22 @@ do { \
|
||||
u64a part4 = extract64from256(r, 1); \
|
||||
if (unlikely(part1)) { \
|
||||
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part2)) { \
|
||||
conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part3)) { \
|
||||
conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part4)) { \
|
||||
conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
} \
|
||||
@ -159,41 +158,41 @@ do { \
|
||||
u32 part8 = extract32from256(r, 3); \
|
||||
if (unlikely(part1)) { \
|
||||
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part2)) { \
|
||||
conf_fn(&part2, bucket, offset + 2, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
} \
|
||||
if (unlikely(part3)) { \
|
||||
conf_fn(&part3, bucket, offset + 4, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part4)) { \
|
||||
conf_fn(&part4, bucket, offset + 6, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part5)) { \
|
||||
conf_fn(&part5, bucket, offset + 8, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part6)) { \
|
||||
conf_fn(&part6, bucket, offset + 10, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part7)) { \
|
||||
conf_fn(&part7, bucket, offset + 12, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
if (unlikely(part8)) { \
|
||||
conf_fn(&part8, bucket, offset + 14, confBase, reason, a, ptr, \
|
||||
control, &last_match); \
|
||||
&control, &last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
} \
|
||||
@ -205,11 +204,11 @@ do { \
|
||||
if (unlikely(isnonzero256(var))) { \
|
||||
u32 arrCnt = 0; \
|
||||
m128 lo = cast256to128(var); \
|
||||
m128 hi = cast256to128(swap128in256(var)); \
|
||||
m128 hi = movdq_hi(var); \
|
||||
bit_array_fast_teddy(lo, bitArr, &arrCnt, offset); \
|
||||
bit_array_fast_teddy(hi, bitArr, &arrCnt, offset + 2); \
|
||||
for (u32 i = 0; i < arrCnt; i++) { \
|
||||
conf_fn(bitArr[i], confBase, reason, a, ptr, control, \
|
||||
conf_fn(bitArr[i], confBase, reason, a, ptr, &control, \
|
||||
&last_match); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
@ -372,7 +371,7 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
|
||||
64 * (offset);
|
||||
*arrCnt += 1;
|
||||
}
|
||||
u64a part_1 = movq(byteShiftRight128(var, 8));
|
||||
u64a part_1 = movq(rshiftbyte_m128(var, 8));
|
||||
while (unlikely(part_1)) {
|
||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
|
||||
64 * (offset + 1);
|
||||
@ -385,19 +384,19 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
|
||||
32 * (offset * 2);
|
||||
*arrCnt += 1;
|
||||
}
|
||||
u32 part_1 = movd(byteShiftRight128(var, 4));
|
||||
u32 part_1 = movd(rshiftbyte_m128(var, 4));
|
||||
while (unlikely(part_1)) {
|
||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
|
||||
32 * (offset * 2 + 1);
|
||||
*arrCnt += 1;
|
||||
}
|
||||
u32 part_2 = movd(byteShiftRight128(var, 8));
|
||||
u32 part_2 = movd(rshiftbyte_m128(var, 8));
|
||||
while (unlikely(part_2)) {
|
||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_2) +
|
||||
32 * (offset * 2 + 2);
|
||||
*arrCnt += 1;
|
||||
}
|
||||
u32 part_3 = movd(byteShiftRight128(var, 12));
|
||||
u32 part_3 = movd(rshiftbyte_m128(var, 12));
|
||||
while (unlikely(part_3)) {
|
||||
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_3) +
|
||||
32 * (offset * 2 + 3);
|
||||
@ -408,36 +407,35 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 p_mask, m256 val) {
|
||||
m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 val) {
|
||||
m256 mask = set32x8(0xf);
|
||||
m256 lo = and256(val, mask);
|
||||
m256 hi = and256(rshift4x64(val, 4), mask);
|
||||
return and256(and256(vpshufb(maskBase[0*2], lo),
|
||||
vpshufb(maskBase[0*2+1], hi)), p_mask);
|
||||
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||
return and256(vpshufb(maskBase[0*2], lo),
|
||||
vpshufb(maskBase[0*2+1], hi));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 p_mask,
|
||||
m256 val) {
|
||||
m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 val) {
|
||||
m256 mask = set32x8(0xf);
|
||||
m256 lo = and256(val, mask);
|
||||
m256 hi = and256(rshift4x64(val, 4), mask);
|
||||
m256 r = prep_conf_fat_teddy_m1(maskBase, p_mask, val);
|
||||
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||
m256 r = prep_conf_fat_teddy_m1(maskBase, val);
|
||||
|
||||
m256 res_1 = and256(vpshufb(maskBase[1*2], lo),
|
||||
vpshufb(maskBase[1*2+1], hi));
|
||||
m256 res_shifted_1 = vpalignr(res_1, *old_1, 16-1);
|
||||
*old_1 = res_1;
|
||||
return and256(and256(r, p_mask), res_shifted_1);
|
||||
return and256(r, res_shifted_1);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2,
|
||||
m256 p_mask, m256 val) {
|
||||
m256 val) {
|
||||
m256 mask = set32x8(0xf);
|
||||
m256 lo = and256(val, mask);
|
||||
m256 hi = and256(rshift4x64(val, 4), mask);
|
||||
m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, p_mask, val);
|
||||
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||
m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, val);
|
||||
|
||||
m256 res_2 = and256(vpshufb(maskBase[2*2], lo),
|
||||
vpshufb(maskBase[2*2+1], hi));
|
||||
@ -448,11 +446,11 @@ m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2,
|
||||
|
||||
static really_inline
|
||||
m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2,
|
||||
m256 *old_3, m256 p_mask, m256 val) {
|
||||
m256 *old_3, m256 val) {
|
||||
m256 mask = set32x8(0xf);
|
||||
m256 lo = and256(val, mask);
|
||||
m256 hi = and256(rshift4x64(val, 4), mask);
|
||||
m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, p_mask, val);
|
||||
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||
m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, val);
|
||||
|
||||
m256 res_3 = and256(vpshufb(maskBase[3*2], lo),
|
||||
vpshufb(maskBase[3*2+1], hi));
|
||||
@ -462,12 +460,10 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi,
|
||||
m256 p_mask) {
|
||||
m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi) {
|
||||
m256 lo = and256(val, mask);
|
||||
m256 hi = and256(rshift4x64(val, 4), mask);
|
||||
m256 res = and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi));
|
||||
return and256(res, p_mask);
|
||||
m256 hi = and256(rshift64_m256(val, 4), mask);
|
||||
return and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
@ -482,11 +478,10 @@ const u32 * getConfBase_avx2(const struct Teddy *teddy, u8 numMask) {
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -505,13 +500,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
||||
m256 p_mask;
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0);
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr));
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -519,10 +515,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
||||
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr));
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, ones256(),
|
||||
load2x128(ptr + 16));
|
||||
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr + 16));
|
||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit1_teddy);
|
||||
}
|
||||
|
||||
@ -530,19 +525,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
||||
m256 p_mask;
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0);
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -561,13 +556,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
||||
m256 p_mask;
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0);
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr));
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -575,10 +571,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
||||
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr));
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, ones256(),
|
||||
load2x128(ptr + 16));
|
||||
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr + 16));
|
||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@ -586,19 +581,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
||||
m256 p_mask;
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 1);
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0);
|
||||
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -618,14 +613,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
||||
m256 p_mask;
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
|
||||
load2x128(ptr));
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -633,10 +628,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
||||
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
|
||||
load2x128(ptr));
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
|
||||
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1,
|
||||
load2x128(ptr + 16));
|
||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
@ -645,19 +639,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
||||
m256 p_mask;
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -677,25 +671,24 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
|
||||
m256 p_mask;
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
|
||||
load2x128(ptr));
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
|
||||
load2x128(ptr));
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
|
||||
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1,
|
||||
load2x128(ptr + 16));
|
||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
@ -704,19 +697,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
|
||||
m256 p_mask;
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 2);
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
|
||||
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -738,14 +731,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones256(), load2x128(ptr));
|
||||
load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -754,10 +748,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones256(), load2x128(ptr));
|
||||
load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones256(), load2x128(ptr + 16));
|
||||
load2x128(ptr + 16));
|
||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
@ -766,19 +760,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -800,14 +794,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones256(), load2x128(ptr));
|
||||
load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -816,10 +811,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones256(), load2x128(ptr));
|
||||
load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
ones256(), load2x128(ptr + 16));
|
||||
load2x128(ptr + 16));
|
||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@ -828,19 +823,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 3);
|
||||
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
|
||||
p_mask, val_0);
|
||||
val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -863,15 +858,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
&res_old_3, val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones256(),
|
||||
load2x128(ptr));
|
||||
&res_old_3, load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -880,12 +875,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones256(),
|
||||
load2x128(ptr));
|
||||
&res_old_3, load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones256(),
|
||||
load2x128(ptr + 16));
|
||||
&res_old_3, load2x128(ptr + 16));
|
||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
|
||||
}
|
||||
|
||||
@ -894,19 +887,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
&res_old_3, val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -929,15 +922,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
&res_old_3, val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
|
||||
if (ptr + 16 < buf_end) {
|
||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones256(),
|
||||
load2x128(ptr));
|
||||
&res_old_3, load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||
ptr += 16;
|
||||
}
|
||||
@ -946,12 +939,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
||||
__builtin_prefetch(ptr + (iterBytes*4));
|
||||
CHECK_FLOOD;
|
||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones256(),
|
||||
load2x128(ptr));
|
||||
&res_old_3, load2x128(ptr));
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, ones256(),
|
||||
load2x128(ptr + 16));
|
||||
&res_old_3, load2x128(ptr + 16));
|
||||
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
|
||||
}
|
||||
|
||||
@ -960,19 +951,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
||||
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
|
||||
a->buf_history, a->len_history, 4);
|
||||
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
|
||||
&res_old_3, p_mask, val_0);
|
||||
&res_old_3, val_0);
|
||||
r_0 = and256(r_0, p_mask);
|
||||
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -996,16 +987,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
||||
m256 p_mask;
|
||||
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
||||
buf_end, a->buf_history, a->len_history);
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
||||
p_mask);
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||
res_0 = and256(res_0, p_mask);
|
||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
|
||||
ptr += 32;
|
||||
}
|
||||
|
||||
if (ptr + 32 < buf_end) {
|
||||
m256 val_0 = load256(ptr + 0);
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
||||
ones256());
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
|
||||
ptr += 32;
|
||||
}
|
||||
@ -1015,13 +1005,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
||||
CHECK_FLOOD;
|
||||
|
||||
m256 val_0 = load256(ptr + 0);
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
||||
ones256());
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||
CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit1_fast_teddy);
|
||||
|
||||
m256 val_1 = load256(ptr + 32);
|
||||
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi,
|
||||
ones256());
|
||||
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi);
|
||||
CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit1_fast_teddy);
|
||||
}
|
||||
|
||||
@ -1029,20 +1017,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
|
||||
m256 p_mask;
|
||||
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
||||
buf_end, a->buf_history, a->len_history);
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
||||
p_mask);
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||
res_0 = and256(res_0, p_mask);
|
||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a) {
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
const u8 *buf_end = a->buf + a->len;
|
||||
const u8 *ptr = a->buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t *control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 *tryFloodDetect = a->firstFloodDetect;
|
||||
u32 last_match = (u32)-1;
|
||||
@ -1066,16 +1053,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
||||
m256 p_mask;
|
||||
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
||||
buf_end, a->buf_history, a->len_history);
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
||||
p_mask);
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||
res_0 = and256(res_0, p_mask);
|
||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
|
||||
ptr += 32;
|
||||
}
|
||||
|
||||
if (ptr + 32 < buf_end) {
|
||||
m256 val_0 = load256(ptr + 0);
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
||||
ones256());
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
|
||||
ptr += 32;
|
||||
}
|
||||
@ -1085,13 +1071,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
||||
CHECK_FLOOD;
|
||||
|
||||
m256 val_0 = load256(ptr + 0);
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
||||
ones256());
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||
CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit_fast_teddy);
|
||||
|
||||
m256 val_1 = load256(ptr + 32);
|
||||
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi,
|
||||
ones256());
|
||||
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi);
|
||||
CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit_fast_teddy);
|
||||
}
|
||||
|
||||
@ -1099,11 +1083,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
|
||||
m256 p_mask;
|
||||
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
|
||||
buf_end, a->buf_history, a->len_history);
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
|
||||
p_mask);
|
||||
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
|
||||
res_0 = and256(res_0, p_mask);
|
||||
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
|
||||
}
|
||||
*a->groups = controlVal;
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -74,12 +74,11 @@ public:
|
||||
const TeddyEngineDescription &eng_in, bool make_small_in)
|
||||
: eng(eng_in), lits(lits_in), make_small(make_small_in) {}
|
||||
|
||||
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
|
||||
aligned_unique_ptr<FDR> build(pair<aligned_unique_ptr<u8>, size_t> &link);
|
||||
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
|
||||
};
|
||||
|
||||
class TeddySet {
|
||||
const vector<hwlmLiteral> &lits;
|
||||
u32 len;
|
||||
// nibbleSets is a series of bitfields over 16 predicates
|
||||
// that represent the whether shufti nibble set
|
||||
@ -89,8 +88,7 @@ class TeddySet {
|
||||
vector<u16> nibbleSets;
|
||||
set<u32> litIds;
|
||||
public:
|
||||
TeddySet(const vector<hwlmLiteral> &lits_in, u32 len_in)
|
||||
: lits(lits_in), len(len_in), nibbleSets(len_in * 2, 0) {}
|
||||
explicit TeddySet(u32 len_in) : len(len_in), nibbleSets(len_in * 2, 0) {}
|
||||
const set<u32> & getLits() const { return litIds; }
|
||||
size_t litCount() const { return litIds.size(); }
|
||||
|
||||
@ -106,8 +104,8 @@ public:
|
||||
}
|
||||
printf("\nnlits: %zu\nLit ids: ", litCount());
|
||||
printf("Prob: %llu\n", probability());
|
||||
for (set<u32>::iterator i = litIds.begin(), e = litIds.end(); i != e; ++i) {
|
||||
printf("%u ", *i);
|
||||
for (const auto &id : litIds) {
|
||||
printf("%u ", id);
|
||||
}
|
||||
printf("\n");
|
||||
printf("Flood prone : %s\n", isRunProne()?"yes":"no");
|
||||
@ -118,15 +116,15 @@ public:
|
||||
return nibbleSets == ts.nibbleSets;
|
||||
}
|
||||
|
||||
void addLiteral(u32 lit_id) {
|
||||
const string &s = lits[lit_id].s;
|
||||
void addLiteral(u32 lit_id, const hwlmLiteral &lit) {
|
||||
const string &s = lit.s;
|
||||
for (u32 i = 0; i < len; i++) {
|
||||
if (i < s.size()) {
|
||||
u8 c = s[s.size() - i - 1];
|
||||
u8 c_hi = (c >> 4) & 0xf;
|
||||
u8 c_lo = c & 0xf;
|
||||
nibbleSets[i*2] = 1 << c_lo;
|
||||
if (lits[lit_id].nocase && ourisalpha(c)) {
|
||||
if (lit.nocase && ourisalpha(c)) {
|
||||
nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2));
|
||||
} else {
|
||||
nibbleSets[i*2+1] = 1 << c_hi;
|
||||
@ -185,28 +183,26 @@ bool TeddyCompiler::pack(map<BucketIndex,
|
||||
set<TeddySet> sts;
|
||||
|
||||
for (u32 i = 0; i < lits.size(); i++) {
|
||||
TeddySet ts(lits, eng.numMasks);
|
||||
ts.addLiteral(i);
|
||||
TeddySet ts(eng.numMasks);
|
||||
ts.addLiteral(i, lits[i]);
|
||||
sts.insert(ts);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
#ifdef TEDDY_DEBUG
|
||||
printf("Size %zu\n", sts.size());
|
||||
for (set<TeddySet>::const_iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
|
||||
printf("\n"); i1->dump();
|
||||
for (const TeddySet &ts : sts) {
|
||||
printf("\n"); ts.dump();
|
||||
}
|
||||
printf("\n===============================================\n");
|
||||
#endif
|
||||
|
||||
set<TeddySet>::iterator m1 = sts.end(), m2 = sts.end();
|
||||
auto m1 = sts.end(), m2 = sts.end();
|
||||
u64a best = 0xffffffffffffffffULL;
|
||||
|
||||
for (set<TeddySet>::iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
|
||||
set<TeddySet>::iterator i2 = i1;
|
||||
++i2;
|
||||
for (auto i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
|
||||
const TeddySet &s1 = *i1;
|
||||
for (set<TeddySet>::iterator e2 = sts.end(); i2 != e2; ++i2) {
|
||||
for (auto i2 = next(i1), e2 = sts.end(); i2 != e2; ++i2) {
|
||||
const TeddySet &s2 = *i2;
|
||||
|
||||
// be more conservative if we don't absolutely need to
|
||||
@ -216,7 +212,7 @@ bool TeddyCompiler::pack(map<BucketIndex,
|
||||
continue;
|
||||
}
|
||||
|
||||
TeddySet tmpSet(lits, eng.numMasks);
|
||||
TeddySet tmpSet(eng.numMasks);
|
||||
tmpSet.merge(s1);
|
||||
tmpSet.merge(s2);
|
||||
u64a newScore = tmpSet.heuristic();
|
||||
@ -246,7 +242,7 @@ bool TeddyCompiler::pack(map<BucketIndex,
|
||||
}
|
||||
|
||||
// do the merge
|
||||
TeddySet nts(lits, eng.numMasks);
|
||||
TeddySet nts(eng.numMasks);
|
||||
nts.merge(*m1);
|
||||
nts.merge(*m2);
|
||||
#ifdef TEDDY_DEBUG
|
||||
@ -263,25 +259,23 @@ bool TeddyCompiler::pack(map<BucketIndex,
|
||||
sts.erase(m2);
|
||||
sts.insert(nts);
|
||||
}
|
||||
u32 cnt = 0;
|
||||
|
||||
if (sts.size() > eng.getNumBuckets()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (set<TeddySet>::const_iterator i = sts.begin(), e = sts.end(); i != e;
|
||||
++i) {
|
||||
for (set<u32>::const_iterator i2 = i->getLits().begin(),
|
||||
e2 = i->getLits().end();
|
||||
i2 != e2; ++i2) {
|
||||
bucketToLits[cnt].push_back(*i2);
|
||||
}
|
||||
cnt++;
|
||||
u32 bucket_id = 0;
|
||||
for (const TeddySet &ts : sts) {
|
||||
const auto &ts_lits = ts.getLits();
|
||||
auto &bucket_lits = bucketToLits[bucket_id];
|
||||
bucket_lits.insert(end(bucket_lits), begin(ts_lits), end(ts_lits));
|
||||
bucket_id++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
|
||||
aligned_unique_ptr<FDR>
|
||||
TeddyCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
|
||||
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
|
||||
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
|
||||
return nullptr;
|
||||
@ -314,9 +308,8 @@ aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
|
||||
|
||||
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
|
||||
|
||||
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
|
||||
pair<u8 *, size_t> confirmTmp
|
||||
= setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
||||
auto floodControlTmp = setupFDRFloodControl(lits, eng);
|
||||
auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
||||
|
||||
size_t size = ROUNDUP_N(sizeof(Teddy) +
|
||||
maskLen +
|
||||
@ -334,38 +327,29 @@ aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
|
||||
teddy->maxStringLen = verify_u32(maxLen(lits));
|
||||
|
||||
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
|
||||
memcpy(ptr, confirmTmp.first, confirmTmp.second);
|
||||
memcpy(ptr, confirmTmp.first.get(), confirmTmp.second);
|
||||
ptr += confirmTmp.second;
|
||||
aligned_free(confirmTmp.first);
|
||||
|
||||
teddy->floodOffset = verify_u32(ptr - teddy_base);
|
||||
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
|
||||
memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second);
|
||||
ptr += floodControlTmp.second;
|
||||
aligned_free(floodControlTmp.first);
|
||||
|
||||
if (link.first) {
|
||||
teddy->link = verify_u32(ptr - teddy_base);
|
||||
memcpy(ptr, link.first, link.second);
|
||||
aligned_free(link.first);
|
||||
memcpy(ptr, link.first.get(), link.second);
|
||||
} else {
|
||||
teddy->link = 0;
|
||||
}
|
||||
|
||||
u8 *baseMsk = teddy_base + sizeof(Teddy);
|
||||
|
||||
for (map<BucketIndex, std::vector<LiteralIndex> >::const_iterator
|
||||
i = bucketToLits.begin(),
|
||||
e = bucketToLits.end();
|
||||
i != e; ++i) {
|
||||
const u32 bucket_id = i->first;
|
||||
const vector<LiteralIndex> &ids = i->second;
|
||||
for (const auto &b2l : bucketToLits) {
|
||||
const u32 &bucket_id = b2l.first;
|
||||
const vector<LiteralIndex> &ids = b2l.second;
|
||||
const u8 bmsk = 1U << (bucket_id % 8);
|
||||
|
||||
for (vector<LiteralIndex>::const_iterator i2 = ids.begin(),
|
||||
e2 = ids.end();
|
||||
i2 != e2; ++i2) {
|
||||
LiteralIndex lit_id = *i2;
|
||||
const hwlmLiteral & l = lits[lit_id];
|
||||
for (const LiteralIndex &lit_id : ids) {
|
||||
const hwlmLiteral &l = lits[lit_id];
|
||||
DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
|
||||
const u32 sz = verify_u32(l.s.size());
|
||||
|
||||
@ -439,10 +423,10 @@ aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
|
||||
|
||||
} // namespace
|
||||
|
||||
aligned_unique_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
|
||||
bool make_small, u32 hint,
|
||||
const target_t &target,
|
||||
pair<u8 *, size_t> link) {
|
||||
aligned_unique_ptr<FDR>
|
||||
teddyBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small,
|
||||
u32 hint, const target_t &target,
|
||||
pair<aligned_unique_ptr<u8>, size_t> &link) {
|
||||
unique_ptr<TeddyEngineDescription> des;
|
||||
if (hint == HINT_INVALID) {
|
||||
des = chooseTeddyEngine(target, lits);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -49,7 +49,7 @@ struct hwlmLiteral;
|
||||
ue2::aligned_unique_ptr<FDR>
|
||||
teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
|
||||
u32 hint, const target_t &target,
|
||||
std::pair<u8 *, size_t> link);
|
||||
std::pair<aligned_unique_ptr<u8>, size_t> &link);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@ -51,8 +51,7 @@ extern const u8 ALIGN_DIRECTIVE p_mask_arr[17][32];
|
||||
|
||||
#define CHECK_HWLM_TERMINATE_MATCHING \
|
||||
do { \
|
||||
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \
|
||||
*a->groups = controlVal; \
|
||||
if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \
|
||||
return HWLM_TERMINATED; \
|
||||
} \
|
||||
} while (0);
|
||||
@ -61,8 +60,7 @@ do { \
|
||||
do { \
|
||||
if (unlikely(ptr > tryFloodDetect)) { \
|
||||
tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, \
|
||||
&floodBackoff, &controlVal, \
|
||||
iterBytes); \
|
||||
&floodBackoff, &control, iterBytes); \
|
||||
CHECK_HWLM_TERMINATE_MATCHING; \
|
||||
} \
|
||||
} while (0);
|
||||
|
42
src/grey.cpp
42
src/grey.cpp
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -34,7 +34,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define DEFAULT_MAX_HISTORY 60
|
||||
#define DEFAULT_MAX_HISTORY 110
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -50,8 +50,11 @@ Grey::Grey(void) :
|
||||
allowLitHaig(true),
|
||||
allowLbr(true),
|
||||
allowMcClellan(true),
|
||||
allowSheng(true),
|
||||
allowPuff(true),
|
||||
allowLiteral(true),
|
||||
allowRose(true),
|
||||
allowViolet(true),
|
||||
allowExtendedNFA(true), /* bounded repeats of course */
|
||||
allowLimExNFA(true),
|
||||
allowAnchoredAcyclic(true),
|
||||
@ -60,6 +63,13 @@ Grey::Grey(void) :
|
||||
allowDecoratedLiteral(true),
|
||||
allowNoodle(true),
|
||||
fdrAllowTeddy(true),
|
||||
violetAvoidSuffixes(true),
|
||||
violetAvoidWeakInfixes(true),
|
||||
violetDoubleCut(true),
|
||||
violetExtractStrongLiterals(true),
|
||||
violetLiteralChains(true),
|
||||
violetDoubleCutLiteralLen(3),
|
||||
violetEarlyCleanLiteralLen(6),
|
||||
puffImproveHead(true),
|
||||
castleExclusive(true),
|
||||
mergeSEP(true), /* short exhaustible passthroughs */
|
||||
@ -81,7 +91,6 @@ Grey::Grey(void) :
|
||||
allowZombies(true),
|
||||
floodAsPuffette(false),
|
||||
nfaForceSize(0),
|
||||
nfaForceShifts(0),
|
||||
maxHistoryAvailable(DEFAULT_MAX_HISTORY),
|
||||
minHistoryAvailable(0), /* debugging only */
|
||||
maxAnchoredRegion(63), /* for rose's atable to run over */
|
||||
@ -119,6 +128,7 @@ Grey::Grey(void) :
|
||||
equivalenceEnable(true),
|
||||
|
||||
allowSmallWrite(true), // McClellan dfas for small patterns
|
||||
allowSmallWriteSheng(false), // allow use of Sheng for SMWR
|
||||
|
||||
smallWriteLargestBuffer(70), // largest buffer that can be
|
||||
// considered a small write
|
||||
@ -126,6 +136,10 @@ Grey::Grey(void) :
|
||||
// are given to rose &co
|
||||
smallWriteLargestBufferBad(35),
|
||||
limitSmallWriteOutfixSize(1048576), // 1 MB
|
||||
smallWriteMaxPatterns(10000),
|
||||
smallWriteMaxLiterals(10000),
|
||||
allowTamarama(true), // Tamarama engine
|
||||
tamaChunkSize(100),
|
||||
dumpFlags(0),
|
||||
limitPatternCount(8000000), // 8M patterns
|
||||
limitPatternLength(16000), // 16K bytes
|
||||
@ -202,8 +216,11 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
||||
G_UPDATE(allowLitHaig);
|
||||
G_UPDATE(allowLbr);
|
||||
G_UPDATE(allowMcClellan);
|
||||
G_UPDATE(allowSheng);
|
||||
G_UPDATE(allowPuff);
|
||||
G_UPDATE(allowLiteral);
|
||||
G_UPDATE(allowRose);
|
||||
G_UPDATE(allowViolet);
|
||||
G_UPDATE(allowExtendedNFA);
|
||||
G_UPDATE(allowLimExNFA);
|
||||
G_UPDATE(allowAnchoredAcyclic);
|
||||
@ -212,6 +229,13 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
||||
G_UPDATE(allowDecoratedLiteral);
|
||||
G_UPDATE(allowNoodle);
|
||||
G_UPDATE(fdrAllowTeddy);
|
||||
G_UPDATE(violetAvoidSuffixes);
|
||||
G_UPDATE(violetAvoidWeakInfixes);
|
||||
G_UPDATE(violetDoubleCut);
|
||||
G_UPDATE(violetExtractStrongLiterals);
|
||||
G_UPDATE(violetLiteralChains);
|
||||
G_UPDATE(violetDoubleCutLiteralLen);
|
||||
G_UPDATE(violetEarlyCleanLiteralLen);
|
||||
G_UPDATE(puffImproveHead);
|
||||
G_UPDATE(castleExclusive);
|
||||
G_UPDATE(mergeSEP);
|
||||
@ -232,7 +256,6 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
||||
G_UPDATE(allowZombies);
|
||||
G_UPDATE(floodAsPuffette);
|
||||
G_UPDATE(nfaForceSize);
|
||||
G_UPDATE(nfaForceShifts);
|
||||
G_UPDATE(highlanderSquash);
|
||||
G_UPDATE(maxHistoryAvailable);
|
||||
G_UPDATE(minHistoryAvailable);
|
||||
@ -270,9 +293,14 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
||||
G_UPDATE(miracleHistoryBonus);
|
||||
G_UPDATE(equivalenceEnable);
|
||||
G_UPDATE(allowSmallWrite);
|
||||
G_UPDATE(allowSmallWriteSheng);
|
||||
G_UPDATE(smallWriteLargestBuffer);
|
||||
G_UPDATE(smallWriteLargestBufferBad);
|
||||
G_UPDATE(limitSmallWriteOutfixSize);
|
||||
G_UPDATE(smallWriteMaxPatterns);
|
||||
G_UPDATE(smallWriteMaxLiterals);
|
||||
G_UPDATE(allowTamarama);
|
||||
G_UPDATE(tamaChunkSize);
|
||||
G_UPDATE(limitPatternCount);
|
||||
G_UPDATE(limitPatternLength);
|
||||
G_UPDATE(limitGraphVertices);
|
||||
@ -309,7 +337,9 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
||||
g->allowLitHaig = false;
|
||||
g->allowMcClellan = false;
|
||||
g->allowPuff = false;
|
||||
g->allowLiteral = false;
|
||||
g->allowRose = false;
|
||||
g->allowViolet = false;
|
||||
g->allowSmallLiteralSet = false;
|
||||
g->roseMasks = false;
|
||||
done = true;
|
||||
@ -325,7 +355,9 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
||||
g->allowLitHaig = false;
|
||||
g->allowMcClellan = true;
|
||||
g->allowPuff = false;
|
||||
g->allowLiteral = false;
|
||||
g->allowRose = false;
|
||||
g->allowViolet = false;
|
||||
g->allowSmallLiteralSet = false;
|
||||
g->roseMasks = false;
|
||||
done = true;
|
||||
@ -341,7 +373,9 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
||||
g->allowLitHaig = false;
|
||||
g->allowMcClellan = true;
|
||||
g->allowPuff = false;
|
||||
g->allowLiteral = false;
|
||||
g->allowRose = false;
|
||||
g->allowViolet = false;
|
||||
g->allowSmallLiteralSet = false;
|
||||
g->roseMasks = false;
|
||||
done = true;
|
||||
|
21
src/grey.h
21
src/grey.h
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -50,8 +50,11 @@ struct Grey {
|
||||
bool allowLitHaig;
|
||||
bool allowLbr;
|
||||
bool allowMcClellan;
|
||||
bool allowSheng;
|
||||
bool allowPuff;
|
||||
bool allowLiteral;
|
||||
bool allowRose;
|
||||
bool allowViolet;
|
||||
bool allowExtendedNFA;
|
||||
bool allowLimExNFA;
|
||||
bool allowAnchoredAcyclic;
|
||||
@ -62,6 +65,14 @@ struct Grey {
|
||||
bool allowNoodle;
|
||||
bool fdrAllowTeddy;
|
||||
|
||||
u32 violetAvoidSuffixes; /* 0=never, 1=sometimes, 2=always */
|
||||
bool violetAvoidWeakInfixes;
|
||||
bool violetDoubleCut;
|
||||
bool violetExtractStrongLiterals;
|
||||
bool violetLiteralChains;
|
||||
u32 violetDoubleCutLiteralLen;
|
||||
u32 violetEarlyCleanLiteralLen;
|
||||
|
||||
bool puffImproveHead;
|
||||
bool castleExclusive; // enable castle mutual exclusion analysis
|
||||
|
||||
@ -88,7 +99,6 @@ struct Grey {
|
||||
bool floodAsPuffette;
|
||||
|
||||
u32 nfaForceSize;
|
||||
u32 nfaForceShifts;
|
||||
|
||||
u32 maxHistoryAvailable;
|
||||
u32 minHistoryAvailable;
|
||||
@ -140,9 +150,16 @@ struct Grey {
|
||||
|
||||
// SmallWrite engine
|
||||
bool allowSmallWrite;
|
||||
bool allowSmallWriteSheng;
|
||||
u32 smallWriteLargestBuffer; // largest buffer that can be small write
|
||||
u32 smallWriteLargestBufferBad;// largest buffer that can be small write
|
||||
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
|
||||
u32 smallWriteMaxPatterns; // only try small writes if fewer patterns
|
||||
u32 smallWriteMaxLiterals; // only try small writes if fewer literals
|
||||
|
||||
// Tamarama engine
|
||||
bool allowTamarama;
|
||||
u32 tamaChunkSize; //!< max chunk size for exclusivity analysis in Tamarama
|
||||
|
||||
enum DumpFlags {
|
||||
DUMP_NONE = 0,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -219,7 +219,7 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
|
||||
: get_current_target();
|
||||
|
||||
CompileContext cc(isStreaming, isVectored, target_info, g);
|
||||
NG ng(cc, somPrecision);
|
||||
NG ng(cc, elements, somPrecision);
|
||||
|
||||
try {
|
||||
for (unsigned int i = 0; i < elements; i++) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -98,6 +98,12 @@ extern "C"
|
||||
* The library was unable to allocate temporary storage used during
|
||||
* compilation time.
|
||||
*
|
||||
* - *Allocator returned misaligned memory*
|
||||
*
|
||||
* The memory allocator (either malloc() or the allocator set with @ref
|
||||
* hs_set_allocator()) did not correctly return memory suitably aligned
|
||||
* for the largest representable data type on this platform.
|
||||
*
|
||||
* - *Internal error*
|
||||
*
|
||||
* An unexpected error occurred: if this error is reported, please contact
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -37,6 +37,7 @@
|
||||
#include "fdr/fdr.h"
|
||||
#include "nfa/accel.h"
|
||||
#include "nfa/shufti.h"
|
||||
#include "nfa/truffle.h"
|
||||
#include "nfa/vermicelli.h"
|
||||
#include <string.h>
|
||||
|
||||
@ -64,8 +65,13 @@ const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr,
|
||||
case ACCEL_SHUFTI:
|
||||
DEBUG_PRINTF("single shufti\n");
|
||||
return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
|
||||
case ACCEL_TRUFFLE:
|
||||
DEBUG_PRINTF("truffle\n");
|
||||
return truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end);
|
||||
default:
|
||||
/* no acceleration, fall through and return current ptr */
|
||||
DEBUG_PRINTF("no accel; %u\n", (int)aux->accel_type);
|
||||
assert(aux->accel_type == ACCEL_NONE);
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
|
@ -35,9 +35,11 @@
|
||||
#include "hwlm_internal.h"
|
||||
#include "noodle_engine.h"
|
||||
#include "noodle_build.h"
|
||||
#include "scratch.h"
|
||||
#include "ue2common.h"
|
||||
#include "fdr/fdr_compile.h"
|
||||
#include "nfa/shufticompile.h"
|
||||
#include "nfa/trufflecompile.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/charreach.h"
|
||||
@ -62,6 +64,28 @@ namespace ue2 {
|
||||
static const unsigned int MAX_ACCEL_OFFSET = 16;
|
||||
static const unsigned int MAX_SHUFTI_WIDTH = 240;
|
||||
|
||||
static
|
||||
size_t mask_overhang(const hwlmLiteral &lit) {
|
||||
size_t msk_true_size = lit.msk.size();
|
||||
assert(msk_true_size <= HWLM_MASKLEN);
|
||||
assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET);
|
||||
for (u8 c : lit.msk) {
|
||||
if (!c) {
|
||||
msk_true_size--;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (lit.s.length() >= msk_true_size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* only short literals should be able to have a mask which overhangs */
|
||||
assert(lit.s.length() < MAX_ACCEL_OFFSET);
|
||||
return msk_true_size - lit.s.length();
|
||||
}
|
||||
|
||||
static
|
||||
bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
||||
const hwlmLiteral &first = *lits.front();
|
||||
@ -167,7 +191,8 @@ bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
||||
}
|
||||
|
||||
if (found) {
|
||||
curr.max_offset = MAX(curr.max_offset, j);
|
||||
assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
|
||||
ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -288,8 +313,8 @@ bool findSVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
||||
}
|
||||
|
||||
if (found) {
|
||||
curr.max_offset = MAX(curr.max_offset, j);
|
||||
break;
|
||||
assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
|
||||
ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -346,6 +371,25 @@ void filterLits(const vector<hwlmLiteral> &lits, hwlm_group_t expected_groups,
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool litGuardedByCharReach(const CharReach &cr, const hwlmLiteral &lit,
|
||||
u32 max_offset) {
|
||||
for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) {
|
||||
unsigned char c = lit.s[i];
|
||||
if (lit.nocase) {
|
||||
if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if (cr.test(c)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
||||
hwlm_group_t expected_groups, AccelAux *aux) {
|
||||
@ -363,29 +407,45 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
||||
return;
|
||||
}
|
||||
|
||||
/* look for shufti/truffle */
|
||||
|
||||
vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
|
||||
for (const auto &lit : lits) {
|
||||
if (!(lit.groups & expected_groups)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < MAX_ACCEL_OFFSET && i < lit.s.length(); i++) {
|
||||
unsigned char c = lit.s[i];
|
||||
u32 overhang = mask_overhang(lit);
|
||||
for (u32 i = 0; i < overhang; i++) {
|
||||
/* this offset overhangs the start of the real literal; look at the
|
||||
* msk/cmp */
|
||||
for (u32 j = 0; j < N_CHARS; j++) {
|
||||
if ((j & lit.msk[i]) == lit.cmp[i]) {
|
||||
reach[i].set(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) {
|
||||
CharReach &reach_i = reach[i];
|
||||
u32 i_effective = i - overhang;
|
||||
|
||||
if (litGuardedByCharReach(reach_i, lit, i_effective)) {
|
||||
continue;
|
||||
}
|
||||
unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective]
|
||||
: lit.s.back();
|
||||
if (lit.nocase) {
|
||||
DEBUG_PRINTF("adding %02hhx to %u\n", mytoupper(c), i);
|
||||
DEBUG_PRINTF("adding %02hhx to %u\n", mytolower(c), i);
|
||||
reach[i].set(mytoupper(c));
|
||||
reach[i].set(mytolower(c));
|
||||
reach_i.set(mytoupper(c));
|
||||
reach_i.set(mytolower(c));
|
||||
} else {
|
||||
DEBUG_PRINTF("adding %02hhx to %u\n", c, i);
|
||||
reach[i].set(c);
|
||||
reach_i.set(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u32 min_count = ~0U;
|
||||
u32 min_offset = ~0U;
|
||||
for (u32 i = 0; i < min_len; i++) {
|
||||
for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) {
|
||||
size_t count = reach[i].count();
|
||||
DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
|
||||
describeClass(reach[i]).c_str(), count);
|
||||
@ -394,10 +454,9 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
||||
min_offset = i;
|
||||
}
|
||||
}
|
||||
assert(min_offset <= min_len);
|
||||
|
||||
if (min_count > MAX_SHUFTI_WIDTH) {
|
||||
DEBUG_PRINTF("min shufti with %u chars is too wide\n", min_count);
|
||||
DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -410,7 +469,11 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("fail\n");
|
||||
truffleBuildMasks(cr, &aux->truffle.mask1, &aux->truffle.mask2);
|
||||
DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n",
|
||||
describeClass(cr).c_str(), cr.count(), min_offset);
|
||||
aux->truffle.accel_type = ACCEL_TRUFFLE;
|
||||
aux->truffle.offset = verify_u8(min_offset);
|
||||
}
|
||||
|
||||
static
|
||||
@ -466,6 +529,10 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
|
||||
stream_control->history_max);
|
||||
return false;
|
||||
}
|
||||
if (2 * lits.front().s.length() - 2 > FDR_TEMP_BUF_SIZE) {
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!lits.front().msk.empty()) {
|
||||
|
@ -37,7 +37,6 @@
|
||||
#include "util/compare.h"
|
||||
#include "util/masked_move.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
|
@ -115,7 +115,8 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
v = and128(v, caseMask);
|
||||
}
|
||||
|
||||
u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v)));
|
||||
u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
|
||||
eq128(mask2, v)));
|
||||
|
||||
// mask out where we can't match
|
||||
u32 mask = (0xFFFF >> (16 - l));
|
||||
@ -142,7 +143,8 @@ hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
|
||||
v = and128(v, caseMask);
|
||||
}
|
||||
|
||||
u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v)));
|
||||
u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
|
||||
eq128(mask2, v)));
|
||||
|
||||
// mask out where we can't match
|
||||
u32 buf_off = start - offset;
|
||||
|
413
src/nfa/mcclellancompile_accel.cpp → src/nfa/accel_dfa_build_strat.cpp
Normal file → Executable file
413
src/nfa/mcclellancompile_accel.cpp → src/nfa/accel_dfa_build_strat.cpp
Normal file → Executable file
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -26,18 +26,20 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "mcclellancompile_accel.h"
|
||||
|
||||
#include "mcclellancompile_util.h"
|
||||
#include "accel_dfa_build_strat.h"
|
||||
|
||||
#include "accel.h"
|
||||
#include "grey.h"
|
||||
#include "nfagraph/ng_limex_accel.h"
|
||||
#include "shufticompile.h"
|
||||
#include "trufflecompile.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
#define PATHS_LIMIT 500
|
||||
|
||||
@ -46,14 +48,13 @@ using namespace std;
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
struct path {
|
||||
vector<CharReach> reach;
|
||||
dstate_id_t dest = DEAD_STATE;
|
||||
explicit path(dstate_id_t base) : dest(base) {}
|
||||
explicit path(dstate_id_t base) : dest(base) {
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
static UNUSED
|
||||
string describeClasses(const vector<CharReach> &v) {
|
||||
@ -85,8 +86,8 @@ bool is_useful_path(const vector<path> &good, const path &p) {
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
DEBUG_PRINTF("better: [%s] -> %u\n",
|
||||
describeClasses(g.reach).c_str(), g.dest);
|
||||
DEBUG_PRINTF("better: [%s] -> %u\n", describeClasses(g.reach).c_str(),
|
||||
g.dest);
|
||||
|
||||
return false;
|
||||
next:;
|
||||
@ -106,8 +107,7 @@ path append(const path &orig, const CharReach &cr, u32 new_dest) {
|
||||
|
||||
static
|
||||
void extend(const raw_dfa &rdfa, const path &p,
|
||||
map<u32, vector<path> > &all,
|
||||
vector<path> &out) {
|
||||
map<u32, vector<path>> &all, vector<path> &out) {
|
||||
dstate s = rdfa.states[p.dest];
|
||||
|
||||
if (!p.reach.empty() && p.reach.back().none()) {
|
||||
@ -147,17 +147,17 @@ void extend(const raw_dfa &rdfa, const path &p,
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("----good: [%s] -> %u\n",
|
||||
describeClasses(pp.reach).c_str(), pp.dest);
|
||||
describeClasses(pp.reach).c_str(), pp.dest);
|
||||
all[e.first].push_back(pp);
|
||||
out.push_back(pp);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
vector<vector<CharReach> > generate_paths(const raw_dfa &rdfa, dstate_id_t base,
|
||||
u32 len) {
|
||||
vector<path> paths{ path(base) };
|
||||
map<u32, vector<path> > all;
|
||||
vector<vector<CharReach>> generate_paths(const raw_dfa &rdfa,
|
||||
dstate_id_t base, u32 len) {
|
||||
vector<path> paths{path(base)};
|
||||
map<u32, vector<path>> all;
|
||||
all[base].push_back(path(base));
|
||||
for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) {
|
||||
vector<path> next_gen;
|
||||
@ -170,7 +170,7 @@ vector<vector<CharReach> > generate_paths(const raw_dfa &rdfa, dstate_id_t base,
|
||||
|
||||
dump_paths(paths);
|
||||
|
||||
vector<vector<CharReach> > rv;
|
||||
vector<vector<CharReach>> rv;
|
||||
for (auto &p : paths) {
|
||||
rv.push_back(move(p.reach));
|
||||
}
|
||||
@ -181,16 +181,58 @@ static
|
||||
AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
|
||||
u32 max_allowed_accel_offset) {
|
||||
DEBUG_PRINTF("looking for accel for %hu\n", base);
|
||||
vector<vector<CharReach> > paths = generate_paths(rdfa, base,
|
||||
max_allowed_accel_offset + 1);
|
||||
vector<vector<CharReach>> paths =
|
||||
generate_paths(rdfa, base, max_allowed_accel_offset + 1);
|
||||
AccelScheme as = findBestAccelScheme(paths, CharReach(), true);
|
||||
DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset);
|
||||
return as;
|
||||
}
|
||||
|
||||
static UNUSED
|
||||
bool better(const AccelScheme &a, const AccelScheme &b) {
|
||||
if (!a.double_byte.empty() && b.double_byte.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!b.double_byte.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return a.cr.count() < b.cr.count();
|
||||
}
|
||||
|
||||
static
|
||||
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
|
||||
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
|
||||
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
rv.at(rdfa.alpha_remap[i]).set(i);
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
bool double_byte_ok(const AccelScheme &info) {
|
||||
return !info.double_byte.empty() &&
|
||||
info.double_cr.count() < info.double_byte.size() &&
|
||||
info.double_cr.count() <= 2 && !info.double_byte.empty();
|
||||
}
|
||||
|
||||
static
|
||||
bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
|
||||
u16 top_remap = raw.alpha_remap[TOP];
|
||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||
if (i != top_remap && raw.states[s].next[i] == s) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
|
||||
const CharReach &escape) {
|
||||
const CharReach &escape) {
|
||||
set<u16> rv;
|
||||
CharReach nonexit = ~escape;
|
||||
for (auto i = nonexit.find_first(); i != CharReach::npos;
|
||||
@ -201,9 +243,58 @@ vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
|
||||
return vector<u16>(rv.begin(), rv.end());
|
||||
}
|
||||
|
||||
static
|
||||
dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
|
||||
if (raw.start_floating != DEAD_STATE) {
|
||||
DEBUG_PRINTF("has floating start\n");
|
||||
return raw.start_floating;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("looking for SDS proxy\n");
|
||||
|
||||
dstate_id_t s = raw.start_anchored;
|
||||
|
||||
if (has_self_loop(s, raw)) {
|
||||
return s;
|
||||
}
|
||||
|
||||
u16 top_remap = raw.alpha_remap[TOP];
|
||||
|
||||
ue2::unordered_set<dstate_id_t> seen;
|
||||
while (true) {
|
||||
seen.insert(s);
|
||||
DEBUG_PRINTF("basis %hu\n", s);
|
||||
|
||||
/* check if we are connected to a state with a self loop */
|
||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||
dstate_id_t t = raw.states[s].next[i];
|
||||
if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
/* find a neighbour to use as a basis for looking for the sds proxy */
|
||||
dstate_id_t t = DEAD_STATE;
|
||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||
dstate_id_t tt = raw.states[s].next[i];
|
||||
if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
|
||||
t = tt;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (t == DEAD_STATE) {
|
||||
/* we were unable to find a state to use as a SDS proxy */
|
||||
return DEAD_STATE;
|
||||
}
|
||||
|
||||
s = t;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
|
||||
const AccelScheme &ei) {
|
||||
const AccelScheme &ei) {
|
||||
DEBUG_PRINTF("looking for region around %hu\n", base);
|
||||
|
||||
set<dstate_id_t> region = {base};
|
||||
@ -236,98 +327,10 @@ set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
|
||||
return region;
|
||||
}
|
||||
|
||||
static
|
||||
bool better(const AccelScheme &a, const AccelScheme &b) {
|
||||
if (!a.double_byte.empty() && b.double_byte.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!b.double_byte.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return a.cr.count() < b.cr.count();
|
||||
}
|
||||
|
||||
static
|
||||
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
|
||||
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
|
||||
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
rv.at(rdfa.alpha_remap[i]).set(i);
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
map<dstate_id_t, AccelScheme> populateAccelerationInfo(const raw_dfa &rdfa,
|
||||
const dfa_build_strat &strat,
|
||||
const Grey &grey) {
|
||||
map<dstate_id_t, AccelScheme> rv;
|
||||
if (!grey.accelerateDFA) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
|
||||
DEBUG_PRINTF("sds %hu\n", sds_proxy);
|
||||
|
||||
for (size_t i = 0; i < rdfa.states.size(); i++) {
|
||||
if (i == DEAD_STATE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Note on report acceleration states: While we can't accelerate while we
|
||||
* are spamming out callbacks, the QR code paths don't raise reports
|
||||
* during scanning so they can accelerate report states. */
|
||||
if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t single_limit = i == sds_proxy ? ACCEL_DFA_MAX_FLOATING_STOP_CHAR
|
||||
: ACCEL_DFA_MAX_STOP_CHAR;
|
||||
DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
|
||||
|
||||
AccelScheme ei = strat.find_escape_strings(i);
|
||||
if (ei.cr.count() > single_limit) {
|
||||
DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
|
||||
ei.cr.count());
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("state %zu should be accelerable %zu\n",
|
||||
i, ei.cr.count());
|
||||
|
||||
rv[i] = ei;
|
||||
}
|
||||
|
||||
/* provide accleration states to states in the region of sds */
|
||||
if (contains(rv, sds_proxy)) {
|
||||
AccelScheme sds_ei = rv[sds_proxy];
|
||||
sds_ei.double_byte.clear(); /* region based on single byte scheme
|
||||
* may differ from double byte */
|
||||
DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
|
||||
sds_ei.cr.count());
|
||||
auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
|
||||
for (auto s : sds_region) {
|
||||
if (!contains(rv, s) || better(sds_ei, rv[s])) {
|
||||
rv[s] = sds_ei;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
bool double_byte_ok(const AccelScheme &info) {
|
||||
return !info.double_byte.empty()
|
||||
&& info.double_cr.count() < info.double_byte.size()
|
||||
&& info.double_cr.count() <= 2 && !info.double_byte.empty();
|
||||
}
|
||||
|
||||
AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx,
|
||||
u32 max_allowed_accel_offset) {
|
||||
AccelScheme
|
||||
accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const {
|
||||
AccelScheme rv;
|
||||
const raw_dfa &rdfa = get_raw();
|
||||
rv.cr.clear();
|
||||
rv.offset = 0;
|
||||
const dstate &raw = rdfa.states[this_idx];
|
||||
@ -354,7 +357,7 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx
|
||||
|
||||
if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) {
|
||||
DEBUG_PRINTF("leads to report\n");
|
||||
outs2_broken = true; /* cannot accelerate over reports */
|
||||
outs2_broken = true; /* cannot accelerate over reports */
|
||||
continue;
|
||||
}
|
||||
succs[next_id] |= cr_i;
|
||||
@ -402,14 +405,12 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx
|
||||
|
||||
DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa));
|
||||
DEBUG_PRINTF("broken %d\n", outs2_broken);
|
||||
if (!double_byte_ok(rv) && !is_triggered(rdfa.kind)
|
||||
&& this_idx == rdfa.start_floating
|
||||
&& this_idx != DEAD_STATE) {
|
||||
if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) &&
|
||||
this_idx == rdfa.start_floating && this_idx != DEAD_STATE) {
|
||||
DEBUG_PRINTF("looking for offset accel at %u\n", this_idx);
|
||||
auto offset = look_for_offset_accel(rdfa, this_idx,
|
||||
max_allowed_accel_offset);
|
||||
DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(),
|
||||
rv.cr.count());
|
||||
auto offset =
|
||||
look_for_offset_accel(rdfa, this_idx, max_allowed_offset_accel());
|
||||
DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), rv.cr.count());
|
||||
if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) {
|
||||
DEBUG_PRINTF("using offset accel\n");
|
||||
rv = offset;
|
||||
@ -419,4 +420,172 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx
|
||||
return rv;
|
||||
}
|
||||
|
||||
void
|
||||
accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
const AccelScheme &info,
|
||||
void *accel_out) {
|
||||
AccelAux *accel = (AccelAux *)accel_out;
|
||||
|
||||
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
|
||||
info.double_offset);
|
||||
accel->generic.offset = verify_u8(info.offset);
|
||||
|
||||
if (double_byte_ok(info) && info.double_cr.none() &&
|
||||
info.double_byte.size() == 1) {
|
||||
accel->accel_type = ACCEL_DVERM;
|
||||
accel->dverm.c1 = info.double_byte.begin()->first;
|
||||
accel->dverm.c2 = info.double_byte.begin()->second;
|
||||
accel->dverm.offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (double_byte_ok(info) && info.double_cr.none() &&
|
||||
(info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
|
||||
bool ok = true;
|
||||
|
||||
assert(!info.double_byte.empty());
|
||||
u8 firstC = info.double_byte.begin()->first & CASE_CLEAR;
|
||||
u8 secondC = info.double_byte.begin()->second & CASE_CLEAR;
|
||||
|
||||
for (const pair<u8, u8> &p : info.double_byte) {
|
||||
if ((p.first & CASE_CLEAR) != firstC ||
|
||||
(p.second & CASE_CLEAR) != secondC) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ok) {
|
||||
accel->accel_type = ACCEL_DVERM_NOCASE;
|
||||
accel->dverm.c1 = firstC;
|
||||
accel->dverm.c2 = secondC;
|
||||
accel->dverm.offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
u8 m1;
|
||||
u8 m2;
|
||||
if (buildDvermMask(info.double_byte, &m1, &m2)) {
|
||||
accel->accel_type = ACCEL_DVERM_MASKED;
|
||||
accel->dverm.offset = verify_u8(info.double_offset);
|
||||
accel->dverm.c1 = info.double_byte.begin()->first & m1;
|
||||
accel->dverm.c2 = info.double_byte.begin()->second & m2;
|
||||
accel->dverm.m1 = m1;
|
||||
accel->dverm.m2 = m2;
|
||||
DEBUG_PRINTF(
|
||||
"building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
|
||||
accel->dverm.c1, accel->dverm.c2);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (double_byte_ok(info) &&
|
||||
shuftiBuildDoubleMasks(info.double_cr, info.double_byte,
|
||||
&accel->dshufti.lo1, &accel->dshufti.hi1,
|
||||
&accel->dshufti.lo2, &accel->dshufti.hi2)) {
|
||||
accel->accel_type = ACCEL_DSHUFTI;
|
||||
accel->dshufti.offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (info.cr.none()) {
|
||||
accel->accel_type = ACCEL_RED_TAPE;
|
||||
DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
|
||||
" from which there is no escape\n",
|
||||
this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (info.cr.count() == 1) {
|
||||
accel->accel_type = ACCEL_VERM;
|
||||
accel->verm.c = info.cr.find_first();
|
||||
DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (info.cr.count() == 2 && info.cr.isCaselessChar()) {
|
||||
accel->accel_type = ACCEL_VERM_NOCASE;
|
||||
accel->verm.c = info.cr.find_first() & CASE_CLEAR;
|
||||
DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (info.cr.count() > max_floating_stop_char()) {
|
||||
accel->accel_type = ACCEL_NONE;
|
||||
DEBUG_PRINTF("state %hu is too broad\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
accel->accel_type = ACCEL_SHUFTI;
|
||||
if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo, &accel->shufti.hi)) {
|
||||
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(!info.cr.none());
|
||||
accel->accel_type = ACCEL_TRUFFLE;
|
||||
truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2);
|
||||
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
|
||||
}
|
||||
|
||||
map<dstate_id_t, AccelScheme>
|
||||
accel_dfa_build_strat::getAccelInfo(const Grey &grey) {
|
||||
map<dstate_id_t, AccelScheme> rv;
|
||||
raw_dfa &rdfa = get_raw();
|
||||
if (!grey.accelerateDFA) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
|
||||
DEBUG_PRINTF("sds %hu\n", sds_proxy);
|
||||
|
||||
for (size_t i = 0; i < rdfa.states.size(); i++) {
|
||||
if (i == DEAD_STATE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Note on report acceleration states: While we can't accelerate while
|
||||
* we
|
||||
* are spamming out callbacks, the QR code paths don't raise reports
|
||||
* during scanning so they can accelerate report states. */
|
||||
if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t single_limit =
|
||||
i == sds_proxy ? max_floating_stop_char() : max_stop_char();
|
||||
DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
|
||||
|
||||
AccelScheme ei = find_escape_strings(i);
|
||||
if (ei.cr.count() > single_limit) {
|
||||
DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
|
||||
ei.cr.count());
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count());
|
||||
|
||||
rv[i] = ei;
|
||||
}
|
||||
|
||||
/* provide accleration states to states in the region of sds */
|
||||
if (contains(rv, sds_proxy)) {
|
||||
AccelScheme sds_ei = rv[sds_proxy];
|
||||
sds_ei.double_byte.clear(); /* region based on single byte scheme
|
||||
* may differ from double byte */
|
||||
DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
|
||||
sds_ei.cr.count());
|
||||
auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
|
||||
for (auto s : sds_region) {
|
||||
if (!contains(rv, s) || better(sds_ei, rv[s])) {
|
||||
rv[s] = sds_ei;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
};
|
60
src/nfa/accel_dfa_build_strat.h
Executable file
60
src/nfa/accel_dfa_build_strat.h
Executable file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ACCEL_DFA_BUILD_STRAT_H
|
||||
#define ACCEL_DFA_BUILD_STRAT_H
|
||||
|
||||
#include "rdfa.h"
|
||||
#include "dfa_build_strat.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/accel_scheme.h"
|
||||
|
||||
#include <map>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class ReportManager;
|
||||
struct Grey;
|
||||
|
||||
class accel_dfa_build_strat : public dfa_build_strat {
|
||||
public:
|
||||
explicit accel_dfa_build_strat(const ReportManager &rm_in)
|
||||
: dfa_build_strat(rm_in) {}
|
||||
virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const;
|
||||
virtual size_t accelSize(void) const = 0;
|
||||
virtual u32 max_allowed_offset_accel() const = 0;
|
||||
virtual u32 max_stop_char() const = 0;
|
||||
virtual u32 max_floating_stop_char() const = 0;
|
||||
virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
|
||||
void *accel_out);
|
||||
virtual std::map<dstate_id_t, AccelScheme> getAccelInfo(const Grey &grey);
|
||||
};
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // ACCEL_DFA_BUILD_STRAT_H
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -37,30 +37,26 @@
|
||||
|
||||
/** \brief The type for an NFA callback.
|
||||
*
|
||||
* This is a function that takes as arguments the current offset where the
|
||||
* match occurs, the id of the match and the context pointer that was passed
|
||||
* into the NFA API function that executed the NFA.
|
||||
* This is a function that takes as arguments the current start and end offsets
|
||||
* where the match occurs, the id of the match and the context pointer that was
|
||||
* passed into the NFA API function that executed the NFA.
|
||||
*
|
||||
* The offset where the match occurs will be the offset after the character
|
||||
* that caused the match. Thus, if we have a buffer containing 'abc', then a
|
||||
* pattern that matches an empty string will have an offset of 0, a pattern
|
||||
* that matches 'a' will have an offset of 1, and a pattern that matches 'abc'
|
||||
* will have an offset of 3, which will be a value that is 'beyond' the size of
|
||||
* the buffer. That is, if we have n characters in the buffer, there are n+1
|
||||
* different potential offsets for matches.
|
||||
* The start offset is the "start of match" (SOM) offset for the match. It is
|
||||
* only provided by engines that natively support SOM tracking (e.g. Gough).
|
||||
*
|
||||
* The end offset will be the offset after the character that caused the match.
|
||||
* Thus, if we have a buffer containing 'abc', then a pattern that matches an
|
||||
* empty string will have an offset of 0, a pattern that matches 'a' will have
|
||||
* an offset of 1, and a pattern that matches 'abc' will have an offset of 3,
|
||||
* which will be a value that is 'beyond' the size of the buffer. That is, if
|
||||
* we have n characters in the buffer, there are n+1 different potential
|
||||
* offsets for matches.
|
||||
*
|
||||
* This function should return an int - currently the possible return values
|
||||
* are 0, which means 'stop running the engine' or non-zero, which means
|
||||
* 'continue matching'.
|
||||
*/
|
||||
typedef int (*NfaCallback)(u64a offset, ReportID id, void *context);
|
||||
|
||||
/** \brief The type for an NFA callback which also tracks start of match.
|
||||
*
|
||||
* see \ref NfaCallback
|
||||
*/
|
||||
typedef int (*SomNfaCallback)(u64a from_offset, u64a to_offset, ReportID id,
|
||||
void *context);
|
||||
typedef int (*NfaCallback)(u64a start, u64a end, ReportID id, void *context);
|
||||
|
||||
/**
|
||||
* standard \ref NfaCallback return value indicating that engine execution
|
||||
|
@ -98,7 +98,7 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q,
|
||||
if (match == REPEAT_MATCH) {
|
||||
DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset,
|
||||
subIdx, sub->report);
|
||||
if (q->cb(offset, sub->report, q->context) == MO_HALT_MATCHING) {
|
||||
if (q->cb(0, offset, sub->report, q->context) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
@ -457,7 +457,7 @@ char subCastleFireMatch(const struct Castle *c, const void *full_state,
|
||||
i = mmbit_iterate(matching, c->numRepeats, i)) {
|
||||
const struct SubCastle *sub = getSubCastle(c, i);
|
||||
DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, i);
|
||||
if (cb(offset, sub->report, ctx) == MO_HALT_MATCHING) {
|
||||
if (cb(0, offset, sub->report, ctx) == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("caller told us to halt\n");
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
@ -979,6 +979,46 @@ char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
|
||||
return castleInAccept(c, q, report, q_cur_offset(q));
|
||||
}
|
||||
|
||||
char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||
assert(n && q);
|
||||
assert(n->type == CASTLE_NFA_0);
|
||||
DEBUG_PRINTF("entry\n");
|
||||
|
||||
const struct Castle *c = getImplNfa(n);
|
||||
const u64a offset = q_cur_offset(q);
|
||||
DEBUG_PRINTF("offset=%llu\n", offset);
|
||||
|
||||
if (c->exclusive) {
|
||||
u8 *active = (u8 *)q->streamState;
|
||||
u8 *groups = active + c->groupIterOffset;
|
||||
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
|
||||
u8 *cur = active + i * c->activeIdxSize;
|
||||
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
|
||||
DEBUG_PRINTF("subcastle %u\n", activeIdx);
|
||||
const struct SubCastle *sub = getSubCastle(c, activeIdx);
|
||||
if (subCastleInAccept(c, q, sub->report, offset, activeIdx)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (c->exclusive != PURE_EXCLUSIVE) {
|
||||
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
|
||||
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
|
||||
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
|
||||
DEBUG_PRINTF("subcastle %u\n", i);
|
||||
const struct SubCastle *sub = getSubCastle(c, i);
|
||||
if (subCastleInAccept(c, q, sub->report, offset, i)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
|
||||
assert(n && q);
|
||||
assert(n->type == CASTLE_NFA_0);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -44,6 +44,7 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
|
||||
struct mq *q);
|
||||
char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||
char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -48,7 +48,8 @@
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void nfaExecCastle0_dumpDot(const struct NFA *, FILE *) {
|
||||
void nfaExecCastle0_dumpDot(const struct NFA *, FILE *,
|
||||
UNUSED const std::string &base) {
|
||||
// No GraphViz output for Castles.
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -32,12 +32,14 @@
|
||||
#if defined(DUMP_SUPPORT)
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file);
|
||||
void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file,
|
||||
const std::string &base);
|
||||
void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file);
|
||||
|
||||
} // namespace ue2
|
||||
|
40
src/nfa/dfa_build_strat.cpp
Executable file
40
src/nfa/dfa_build_strat.cpp
Executable file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "dfa_build_strat.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
// prevent weak vtables for raw_report_info, dfa_build_strat and raw_dfa
|
||||
raw_report_info::~raw_report_info() {}
|
||||
|
||||
dfa_build_strat::~dfa_build_strat() {}
|
||||
|
||||
raw_dfa::~raw_dfa() {}
|
||||
|
||||
} // namespace ue2
|
68
src/nfa/dfa_build_strat.h
Normal file
68
src/nfa/dfa_build_strat.h
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DFA_BUILD_STRAT_H
|
||||
#define DFA_BUILD_STRAT_H
|
||||
|
||||
#include "rdfa.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class ReportManager;
|
||||
|
||||
struct raw_report_info {
|
||||
virtual ~raw_report_info();
|
||||
virtual u32 getReportListSize() const = 0; /* in bytes */
|
||||
virtual size_t size() const = 0; /* number of lists */
|
||||
virtual void fillReportLists(NFA *n, size_t base_offset,
|
||||
std::vector<u32> &ro /* out */) const = 0;
|
||||
};
|
||||
|
||||
class dfa_build_strat {
|
||||
public:
|
||||
explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {}
|
||||
virtual ~dfa_build_strat();
|
||||
virtual raw_dfa &get_raw() const = 0;
|
||||
virtual std::unique_ptr<raw_report_info> gatherReports(
|
||||
std::vector<u32> &reports /* out */,
|
||||
std::vector<u32> &reports_eod /* out */,
|
||||
u8 *isSingleReport /* out */,
|
||||
ReportID *arbReport /* out */) const = 0;
|
||||
protected:
|
||||
const ReportManager &rm;
|
||||
};
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // DFA_BUILD_STRAT_H
|
@ -110,7 +110,7 @@ u64a expandSomValue(u32 comp_slot_width, u64a curr_offset,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char doReports(SomNfaCallback cb, void *ctxt, const struct mcclellan *m,
|
||||
char doReports(NfaCallback cb, void *ctxt, const struct mcclellan *m,
|
||||
const struct gough_som_info *som, u16 s, u64a loc,
|
||||
char eod, u16 * const cached_accept_state,
|
||||
u32 * const cached_accept_id, u32 * const cached_accept_som) {
|
||||
@ -307,7 +307,7 @@ u16 goughEnableStarts(const struct mcclellan *m, u16 s, u64a som_offset,
|
||||
static really_inline
|
||||
char goughExec16_i(const struct mcclellan *m, struct gough_som_info *som,
|
||||
u16 *state, const u8 *buf, size_t len, u64a offAdj,
|
||||
SomNfaCallback cb, void *ctxt, const u8 **c_final,
|
||||
NfaCallback cb, void *ctxt, const u8 **c_final,
|
||||
enum MatchMode mode) {
|
||||
assert(ISALIGNED_N(state, 2));
|
||||
|
||||
@ -461,7 +461,7 @@ with_accel:
|
||||
static really_inline
|
||||
char goughExec8_i(const struct mcclellan *m, struct gough_som_info *som,
|
||||
u8 *state, const u8 *buf, size_t len, u64a offAdj,
|
||||
SomNfaCallback cb, void *ctxt, const u8 **c_final,
|
||||
NfaCallback cb, void *ctxt, const u8 **c_final,
|
||||
enum MatchMode mode) {
|
||||
u8 s = *state;
|
||||
const u8 *c = buf, *c_end = buf + len;
|
||||
@ -595,7 +595,7 @@ with_accel:
|
||||
static never_inline
|
||||
char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som,
|
||||
u8 *state, const u8 *buf, size_t len, u64a offAdj,
|
||||
SomNfaCallback cb, void *ctxt, const u8 **final_point,
|
||||
NfaCallback cb, void *ctxt, const u8 **final_point,
|
||||
enum MatchMode mode) {
|
||||
return goughExec8_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
|
||||
mode);
|
||||
@ -604,7 +604,7 @@ char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som,
|
||||
static never_inline
|
||||
char goughExec16_i_ni(const struct mcclellan *m, struct gough_som_info *som,
|
||||
u16 *state, const u8 *buf, size_t len, u64a offAdj,
|
||||
SomNfaCallback cb, void *ctxt, const u8 **final_point,
|
||||
NfaCallback cb, void *ctxt, const u8 **final_point,
|
||||
enum MatchMode mode) {
|
||||
return goughExec16_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
|
||||
mode);
|
||||
@ -622,7 +622,7 @@ const struct gough_som_info *getSomInfoConst(const char *state_base) {
|
||||
|
||||
static really_inline
|
||||
char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
const u8 *hend, SomNfaCallback cb, void *context,
|
||||
const u8 *hend, NfaCallback cb, void *context,
|
||||
struct mq *q, s64a end, enum MatchMode mode) {
|
||||
DEBUG_PRINTF("enter\n");
|
||||
struct gough_som_info *som = getSomInfo(q->state);
|
||||
@ -755,7 +755,7 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
|
||||
static really_inline
|
||||
char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
const u8 *hend, SomNfaCallback cb, void *context,
|
||||
const u8 *hend, NfaCallback cb, void *context,
|
||||
struct mq *q, s64a end, enum MatchMode mode) {
|
||||
struct gough_som_info *som = getSomInfo(q->state);
|
||||
assert(n->type == GOUGH_NFA_16);
|
||||
@ -887,7 +887,7 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) {
|
||||
u64a offset = q->offset;
|
||||
const u8 *buffer = q->buffer;
|
||||
SomNfaCallback cb = q->som_cb;
|
||||
NfaCallback cb = q->cb;
|
||||
void *context = q->context;
|
||||
assert(n->type == GOUGH_NFA_8);
|
||||
const u8 *hend = q->history + q->hlength;
|
||||
@ -899,7 +899,7 @@ char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) {
|
||||
char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) {
|
||||
u64a offset = q->offset;
|
||||
const u8 *buffer = q->buffer;
|
||||
SomNfaCallback cb = q->som_cb;
|
||||
NfaCallback cb = q->cb;
|
||||
void *context = q->context;
|
||||
assert(n->type == GOUGH_NFA_16);
|
||||
const u8 *hend = q->history + q->hlength;
|
||||
@ -911,7 +911,7 @@ char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) {
|
||||
char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||
u64a offset = q->offset;
|
||||
const u8 *buffer = q->buffer;
|
||||
SomNfaCallback cb = q->som_cb;
|
||||
NfaCallback cb = q->cb;
|
||||
void *context = q->context;
|
||||
assert(n->type == GOUGH_NFA_8);
|
||||
const u8 *hend = q->history + q->hlength;
|
||||
@ -923,7 +923,7 @@ char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||
char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||
u64a offset = q->offset;
|
||||
const u8 *buffer = q->buffer;
|
||||
SomNfaCallback cb = q->som_cb;
|
||||
NfaCallback cb = q->cb;
|
||||
void *context = q->context;
|
||||
assert(n->type == GOUGH_NFA_16);
|
||||
const u8 *hend = q->history + q->hlength;
|
||||
@ -935,7 +935,7 @@ char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||
u64a offset = q->offset;
|
||||
const u8 *buffer = q->buffer;
|
||||
SomNfaCallback cb = q->som_cb;
|
||||
NfaCallback cb = q->cb;
|
||||
void *context = q->context;
|
||||
assert(n->type == GOUGH_NFA_8);
|
||||
const u8 *hend = q->history + q->hlength;
|
||||
@ -952,7 +952,7 @@ char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||
u64a offset = q->offset;
|
||||
const u8 *buffer = q->buffer;
|
||||
SomNfaCallback cb = q->som_cb;
|
||||
NfaCallback cb = q->cb;
|
||||
void *context = q->context;
|
||||
assert(n->type == GOUGH_NFA_16);
|
||||
const u8 *hend = q->history + q->hlength;
|
||||
@ -994,7 +994,7 @@ char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||
|
||||
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||
SomNfaCallback cb = q->som_cb;
|
||||
NfaCallback cb = q->cb;
|
||||
void *ctxt = q->context;
|
||||
u8 s = *(u8 *)q->state;
|
||||
u64a offset = q_cur_offset(q);
|
||||
@ -1016,7 +1016,7 @@ char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
|
||||
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||
SomNfaCallback cb = q->som_cb;
|
||||
NfaCallback cb = q->cb;
|
||||
void *ctxt = q->context;
|
||||
u16 s = *(u16 *)q->state;
|
||||
const struct mstate_aux *aux = get_aux(m, s);
|
||||
@ -1048,10 +1048,18 @@ char nfaExecGough16_inAccept(const struct NFA *n, ReportID report,
|
||||
return nfaExecMcClellan16_inAccept(n, report, q);
|
||||
}
|
||||
|
||||
char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||
return nfaExecMcClellan8_inAnyAccept(n, q);
|
||||
}
|
||||
|
||||
char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||
return nfaExecMcClellan16_inAnyAccept(n, q);
|
||||
}
|
||||
|
||||
static
|
||||
char goughCheckEOD(const struct NFA *nfa, u16 s,
|
||||
const struct gough_som_info *som,
|
||||
u64a offset, SomNfaCallback cb, void *ctxt) {
|
||||
u64a offset, NfaCallback cb, void *ctxt) {
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
||||
const struct mstate_aux *aux = get_aux(m, s);
|
||||
|
||||
@ -1062,21 +1070,19 @@ char goughCheckEOD(const struct NFA *nfa, u16 s,
|
||||
}
|
||||
|
||||
char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
|
||||
UNUSED const char *streamState, u64a offset,
|
||||
UNUSED NfaCallback callback,
|
||||
SomNfaCallback som_callback, void *context) {
|
||||
UNUSED const char *streamState, u64a offset,
|
||||
NfaCallback callback, void *context) {
|
||||
const struct gough_som_info *som = getSomInfoConst(state);
|
||||
return goughCheckEOD(nfa, *(const u8 *)state, som, offset, som_callback,
|
||||
return goughCheckEOD(nfa, *(const u8 *)state, som, offset, callback,
|
||||
context);
|
||||
}
|
||||
|
||||
char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
|
||||
UNUSED const char *streamState, u64a offset,
|
||||
UNUSED NfaCallback callback,
|
||||
SomNfaCallback som_callback, void *context) {
|
||||
UNUSED const char *streamState, u64a offset,
|
||||
NfaCallback callback, void *context) {
|
||||
assert(ISALIGNED_N(state, 8));
|
||||
const struct gough_som_info *som = getSomInfoConst(state);
|
||||
return goughCheckEOD(nfa, *(const u16 *)state, som, offset, som_callback,
|
||||
return goughCheckEOD(nfa, *(const u16 *)state, som, offset, callback,
|
||||
context);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -39,13 +39,13 @@ struct mq;
|
||||
|
||||
char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
|
||||
const char *streamState, u64a offset,
|
||||
NfaCallback callback, SomNfaCallback som_cb,
|
||||
void *context);
|
||||
NfaCallback callback, void *context);
|
||||
char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||
char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||
char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
@ -61,13 +61,13 @@ char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
|
||||
|
||||
char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
|
||||
const char *streamState, u64a offset,
|
||||
NfaCallback callback, SomNfaCallback som_cb,
|
||||
void *context);
|
||||
NfaCallback callback, void *context);
|
||||
char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||
char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||
char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
|
@ -79,9 +79,9 @@ namespace {
|
||||
class gough_build_strat : public mcclellan_build_strat {
|
||||
public:
|
||||
gough_build_strat(
|
||||
raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm,
|
||||
raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in,
|
||||
const map<dstate_id_t, gough_accel_state_info> &accel_info)
|
||||
: mcclellan_build_strat(r, rm), rdfa(r), gg(g),
|
||||
: mcclellan_build_strat(r, rm_in), rdfa(r), gg(g),
|
||||
accel_gough_info(accel_info) {}
|
||||
unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */,
|
||||
vector<u32> &reports_eod /* out */,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -259,7 +259,8 @@ void dumpTransitions(const NFA *nfa, FILE *f,
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f) {
|
||||
void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f,
|
||||
UNUSED const string &base) {
|
||||
assert(nfa->type == GOUGH_NFA_8);
|
||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||
|
||||
@ -302,7 +303,8 @@ void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) {
|
||||
dumpTextReverse(nfa, f);
|
||||
}
|
||||
|
||||
void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f) {
|
||||
void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f,
|
||||
UNUSED const string &base) {
|
||||
assert(nfa->type == GOUGH_NFA_16);
|
||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -33,12 +33,16 @@
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file);
|
||||
void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file);
|
||||
void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file,
|
||||
const std::string &base);
|
||||
void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file,
|
||||
const std::string &base);
|
||||
void nfaExecGough8_dumpText(const NFA *nfa, FILE *file);
|
||||
void nfaExecGough16_dumpText(const NFA *nfa, FILE *file);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -293,7 +293,7 @@ char lbrMatchLoop(const struct lbr_common *l, const u64a begin, const u64a end,
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("firing match at %llu\n", i);
|
||||
if (cb(i, l->report, ctx) == MO_HALT_MATCHING) {
|
||||
if (cb(0, i, l->report, ctx) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -46,6 +46,7 @@ char nfaExecLbrDot_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||
char nfaExecLbrDot_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
@ -66,6 +67,7 @@ char nfaExecLbrVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report,
|
||||
struct mq *q);
|
||||
char nfaExecLbrVerm_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
@ -86,6 +88,7 @@ char nfaExecLbrNVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report,
|
||||
struct mq *q);
|
||||
char nfaExecLbrNVerm_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
@ -106,6 +109,7 @@ char nfaExecLbrShuf_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report,
|
||||
struct mq *q);
|
||||
char nfaExecLbrShuf_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
@ -126,6 +130,7 @@ char nfaExecLbrTruf_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report,
|
||||
struct mq *q);
|
||||
char nfaExecLbrTruf_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -72,7 +72,7 @@ char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa,
|
||||
const struct lbr_common *l = getImplNfa(nfa);
|
||||
u64a offset = q_cur_offset(q);
|
||||
DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset);
|
||||
q->cb(offset, l->report, q->context);
|
||||
q->cb(0, offset, l->report, q->context);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -94,6 +94,15 @@ char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa,
|
||||
return lbrInAccept(l, lstate, q->streamState, offset, report);
|
||||
}
|
||||
|
||||
char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
|
||||
assert(nfa && q);
|
||||
assert(isLbrType(nfa->type));
|
||||
DEBUG_PRINTF("entry\n");
|
||||
|
||||
const struct lbr_common *l = getImplNfa(nfa);
|
||||
return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q);
|
||||
}
|
||||
|
||||
char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa,
|
||||
struct mq *q) {
|
||||
assert(nfa && q);
|
||||
@ -206,7 +215,7 @@ char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q,
|
||||
|
||||
if (q->report_current) {
|
||||
DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q));
|
||||
int rv = q->cb(q_cur_offset(q), l->report, q->context);
|
||||
int rv = q->cb(0, q_cur_offset(q), l->report, q->context);
|
||||
q->report_current = 0;
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -49,23 +49,28 @@
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
|
||||
void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
|
||||
UNUSED const std::string &base) {
|
||||
// No impl
|
||||
}
|
||||
|
||||
void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
|
||||
void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
|
||||
UNUSED const std::string &base) {
|
||||
// No impl
|
||||
}
|
||||
|
||||
void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
|
||||
void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
|
||||
UNUSED const std::string &base) {
|
||||
// No impl
|
||||
}
|
||||
|
||||
void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
|
||||
void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
|
||||
UNUSED const std::string &base) {
|
||||
// No impl
|
||||
}
|
||||
|
||||
void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
|
||||
void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
|
||||
UNUSED const std::string &base) {
|
||||
// No impl
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -32,16 +32,22 @@
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file);
|
||||
void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file);
|
||||
void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file);
|
||||
void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file);
|
||||
void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file);
|
||||
void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file,
|
||||
const std::string &base);
|
||||
void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file,
|
||||
const std::string &base);
|
||||
void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file,
|
||||
const std::string &base);
|
||||
void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file,
|
||||
const std::string &base);
|
||||
void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file,
|
||||
const std::string &base);
|
||||
void nfaExecLbrDot_dumpText(const struct NFA *nfa, FILE *file);
|
||||
void nfaExecLbrVerm_dumpText(const struct NFA *nfa, FILE *file);
|
||||
void nfaExecLbrNVerm_dumpText(const struct NFA *nfa, FILE *file);
|
||||
|
@ -30,6 +30,7 @@
|
||||
#define LIMEX_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include <string>
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
@ -40,7 +41,8 @@ extern "C"
|
||||
#define GENERATE_NFA_DUMP_DECL(gf_name) \
|
||||
} /* extern "C" */ \
|
||||
namespace ue2 { \
|
||||
void gf_name##_dumpDot(const struct NFA *nfa, FILE *file); \
|
||||
void gf_name##_dumpDot(const struct NFA *nfa, FILE *file, \
|
||||
const std::string &base); \
|
||||
void gf_name##_dumpText(const struct NFA *nfa, FILE *file); \
|
||||
} /* namespace ue2 */ \
|
||||
extern "C" {
|
||||
@ -52,14 +54,14 @@ extern "C"
|
||||
#define GENERATE_NFA_DECL(gf_name) \
|
||||
char gf_name##_testEOD(const struct NFA *nfa, const char *state, \
|
||||
const char *streamState, u64a offset, \
|
||||
NfaCallback callback, SomNfaCallback som_cb, \
|
||||
void *context); \
|
||||
NfaCallback callback, void *context); \
|
||||
char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \
|
||||
char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \
|
||||
char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \
|
||||
char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \
|
||||
char gf_name##_inAccept(const struct NFA *n, ReportID report, \
|
||||
struct mq *q); \
|
||||
char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \
|
||||
char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \
|
||||
char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \
|
||||
void *state, u8 key); \
|
||||
@ -74,41 +76,11 @@ extern "C"
|
||||
struct mq *q, s64a loc); \
|
||||
GENERATE_NFA_DUMP_DECL(gf_name)
|
||||
|
||||
GENERATE_NFA_DECL(nfaExecLimEx32_1)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx32_2)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx32_3)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx32_4)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx32_5)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx32_6)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx32_7)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx128_1)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx128_2)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx128_3)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx128_4)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx128_5)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx128_6)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx128_7)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx256_1)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx256_2)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx256_3)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx256_4)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx256_5)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx256_6)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx256_7)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx384_1)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx384_2)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx384_3)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx384_4)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx384_5)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx384_6)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx384_7)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx512_1)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx512_2)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx512_3)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx512_4)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx512_5)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx512_6)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx512_7)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx32)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx128)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx256)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx384)
|
||||
GENERATE_NFA_DECL(nfaExecLimEx512)
|
||||
|
||||
#undef GENERATE_NFA_DECL
|
||||
#undef GENERATE_NFA_DUMP_DECL
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "accel.h"
|
||||
#include "limex_internal.h"
|
||||
#include "limex_limits.h"
|
||||
#include "limex_shuffle.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "shufti.h"
|
||||
#include "truffle.h"
|
||||
@ -44,10 +45,7 @@
|
||||
#include "ue2common.h"
|
||||
#include "vermicelli.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/shuffle.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
#include "util/shuffle_ssse3.h"
|
||||
|
||||
static really_inline
|
||||
size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
|
||||
@ -80,7 +78,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
|
||||
size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
|
||||
const union AccelAux *aux, const u8 *input, size_t i,
|
||||
size_t end) {
|
||||
u32 idx = shuffleDynamic32(s, accel);
|
||||
u32 idx = packedExtract32(s, accel);
|
||||
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
||||
}
|
||||
|
||||
@ -92,7 +90,7 @@ size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex,
|
||||
DEBUG_PRINTF("using PSHUFB for 128-bit shuffle\n");
|
||||
m128 accelPerm = limex->accelPermute;
|
||||
m128 accelComp = limex->accelCompare;
|
||||
idx = shufflePshufb128(s, accelPerm, accelComp);
|
||||
idx = packedExtract128(s, accelPerm, accelComp);
|
||||
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
||||
}
|
||||
|
||||
@ -105,17 +103,13 @@ size_t doAccel256(const m256 *state, const struct LimExNFA256 *limex,
|
||||
m256 accelPerm = limex->accelPermute;
|
||||
m256 accelComp = limex->accelCompare;
|
||||
#if !defined(__AVX2__)
|
||||
u32 idx1 = shufflePshufb128(s.lo, accelPerm.lo, accelComp.lo);
|
||||
u32 idx2 = shufflePshufb128(s.hi, accelPerm.hi, accelComp.hi);
|
||||
#else
|
||||
// TODO: learn you some avx2 shuffles for great good
|
||||
u32 idx1 = shufflePshufb128(movdq_lo(s), movdq_lo(accelPerm),
|
||||
movdq_lo(accelComp));
|
||||
u32 idx2 = shufflePshufb128(movdq_hi(s), movdq_hi(accelPerm),
|
||||
movdq_hi(accelComp));
|
||||
#endif
|
||||
u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo);
|
||||
u32 idx2 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi);
|
||||
assert((idx1 & idx2) == 0); // should be no shared bits
|
||||
idx = idx1 | idx2;
|
||||
#else
|
||||
idx = packedExtract256(s, accelPerm, accelComp);
|
||||
#endif
|
||||
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
||||
}
|
||||
|
||||
@ -127,9 +121,9 @@ size_t doAccel384(const m384 *state, const struct LimExNFA384 *limex,
|
||||
DEBUG_PRINTF("using PSHUFB for 384-bit shuffle\n");
|
||||
m384 accelPerm = limex->accelPermute;
|
||||
m384 accelComp = limex->accelCompare;
|
||||
u32 idx1 = shufflePshufb128(s.lo, accelPerm.lo, accelComp.lo);
|
||||
u32 idx2 = shufflePshufb128(s.mid, accelPerm.mid, accelComp.mid);
|
||||
u32 idx3 = shufflePshufb128(s.hi, accelPerm.hi, accelComp.hi);
|
||||
u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo);
|
||||
u32 idx2 = packedExtract128(s.mid, accelPerm.mid, accelComp.mid);
|
||||
u32 idx3 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi);
|
||||
assert((idx1 & idx2 & idx3) == 0); // should be no shared bits
|
||||
idx = idx1 | idx2 | idx3;
|
||||
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
||||
@ -144,21 +138,17 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex,
|
||||
m512 accelPerm = limex->accelPermute;
|
||||
m512 accelComp = limex->accelCompare;
|
||||
#if !defined(__AVX2__)
|
||||
u32 idx1 = shufflePshufb128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo);
|
||||
u32 idx2 = shufflePshufb128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi);
|
||||
u32 idx3 = shufflePshufb128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo);
|
||||
u32 idx4 = shufflePshufb128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi);
|
||||
#else
|
||||
u32 idx1 = shufflePshufb128(movdq_lo(s.lo), movdq_lo(accelPerm.lo),
|
||||
movdq_lo(accelComp.lo));
|
||||
u32 idx2 = shufflePshufb128(movdq_hi(s.lo), movdq_hi(accelPerm.lo),
|
||||
movdq_hi(accelComp.lo));
|
||||
u32 idx3 = shufflePshufb128(movdq_lo(s.hi), movdq_lo(accelPerm.hi),
|
||||
movdq_lo(accelComp.hi));
|
||||
u32 idx4 = shufflePshufb128(movdq_hi(s.hi), movdq_hi(accelPerm.hi),
|
||||
movdq_hi(accelComp.hi));
|
||||
#endif
|
||||
u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo);
|
||||
u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi);
|
||||
u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo);
|
||||
u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi);
|
||||
assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits
|
||||
idx = idx1 | idx2 | idx3 | idx4;
|
||||
#else
|
||||
u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo);
|
||||
u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi);
|
||||
assert((idx1 & idx2) == 0); // should be no shared bits
|
||||
idx = idx1 | idx2;
|
||||
#endif
|
||||
return accelScanWrapper(accelTable, aux, input, idx, i, end);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -40,6 +40,7 @@
|
||||
#define TESTEOD_FN JOIN(moNfaTestEod, SIZE)
|
||||
#define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE)
|
||||
#define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE)
|
||||
#define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE)
|
||||
#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE)
|
||||
#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE)
|
||||
#define INITIAL_FN JOIN(moNfaInitial, SIZE)
|
||||
@ -118,7 +119,7 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s,
|
||||
if (TESTBIT_STATE(s, a->state)) {
|
||||
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
|
||||
a->state, a->externalId, offset);
|
||||
int rv = callback(offset, a->externalId, context);
|
||||
int rv = callback(0, offset, a->externalId, context);
|
||||
if (unlikely(rv == MO_HALT_MATCHING)) {
|
||||
return 1;
|
||||
}
|
||||
@ -149,7 +150,7 @@ char PROCESS_ACCEPTS_NOSQUASH_FN(const STATE_T *s,
|
||||
if (TESTBIT_STATE(s, a->state)) {
|
||||
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
|
||||
a->state, a->externalId, offset);
|
||||
int rv = callback(offset, a->externalId, context);
|
||||
int rv = callback(0, offset, a->externalId, context);
|
||||
if (unlikely(rv == MO_HALT_MATCHING)) {
|
||||
return 1;
|
||||
}
|
||||
@ -374,11 +375,32 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
|
||||
union RepeatControl *repeat_ctrl, char *repeat_state,
|
||||
u64a offset) {
|
||||
assert(limex);
|
||||
|
||||
const STATE_T acceptMask = LOAD_STATE(&limex->accept);
|
||||
STATE_T accstate = AND_STATE(state, acceptMask);
|
||||
|
||||
// Are we in an accept state?
|
||||
if (ISZERO_STATE(accstate)) {
|
||||
DEBUG_PRINTF("no accept states are on\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accstate);
|
||||
|
||||
return ISNONZERO_STATE(accstate);
|
||||
}
|
||||
|
||||
#undef TESTEOD_FN
|
||||
#undef TESTEOD_REV_FN
|
||||
#undef REPORTCURRENT_FN
|
||||
#undef EXPIRE_ESTATE_FN
|
||||
#undef LIMEX_INACCEPT_FN
|
||||
#undef LIMEX_INANYACCEPT_FN
|
||||
#undef INITIAL_FN
|
||||
#undef TOP_FN
|
||||
#undef TOPN_FN
|
||||
|
@ -167,12 +167,10 @@ struct build_info {
|
||||
limex_accel_info accel;
|
||||
};
|
||||
|
||||
#define LAST_LIMEX_NFA LIMEX_NFA_512
|
||||
|
||||
// Constants for scoring mechanism
|
||||
|
||||
#define LAST_LIMEX_NFA LIMEX_NFA_512_7
|
||||
|
||||
const int LIMEX_INITIAL_SCORE = 2000;
|
||||
const int SHIFT_COST = 20; // limex: cost per shift mask
|
||||
const int SHIFT_COST = 10; // limex: cost per shift mask
|
||||
const int EXCEPTION_COST = 4; // limex: per exception
|
||||
|
||||
template<NFAEngineType t> struct NFATraits { };
|
||||
@ -261,6 +259,17 @@ void maskSetBits(Mask &m, const NFAStateSet &bits) {
|
||||
}
|
||||
}
|
||||
|
||||
template<class Mask>
|
||||
bool isMaskZero(Mask &m) {
|
||||
u8 *m8 = (u8 *)&m;
|
||||
for (u32 i = 0; i < sizeof(m); i++) {
|
||||
if (m8[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Sets an entire byte in a mask to the given value
|
||||
template<class Mask>
|
||||
void maskSetByte(Mask &m, const unsigned int idx, const char val) {
|
||||
@ -336,7 +345,7 @@ void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
|
||||
}
|
||||
|
||||
struct AccelBuild {
|
||||
AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0), ma_len1(0),
|
||||
AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0), ma_len1(0),
|
||||
ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {}
|
||||
NFAVertex v;
|
||||
u32 state;
|
||||
@ -999,7 +1008,8 @@ void findMaskedCompressionStates(const build_info &args,
|
||||
// Suffixes and outfixes can mask out leaf states, which should all be
|
||||
// accepts. Right now we can only do this when there is nothing in initDs,
|
||||
// as we switch that on unconditionally in the expand call.
|
||||
if (generates_callbacks(h) && !hasInitDsStates(h, args.state_ids)) {
|
||||
if (!inspects_states_for_accepts(h)
|
||||
&& !hasInitDsStates(h, args.state_ids)) {
|
||||
NFAStateSet nonleaf(args.num_states);
|
||||
for (const auto &e : edges_range(h)) {
|
||||
u32 from = args.state_ids.at(source(e, h));
|
||||
@ -1162,12 +1172,13 @@ u32 getReportListIndex(const flat_set<ReportID> &reports,
|
||||
}
|
||||
|
||||
static
|
||||
void buildExceptionMap(const build_info &args,
|
||||
const ue2::unordered_set<NFAEdge> &exceptional,
|
||||
map<ExceptionProto, vector<u32> > &exceptionMap,
|
||||
vector<ReportID> &exceptionReports) {
|
||||
u32 buildExceptionMap(const build_info &args,
|
||||
const ue2::unordered_set<NFAEdge> &exceptional,
|
||||
map<ExceptionProto, vector<u32> > &exceptionMap,
|
||||
vector<ReportID> &exceptionReports) {
|
||||
const NGHolder &h = args.h;
|
||||
const u32 num_states = args.num_states;
|
||||
u32 exceptionCount = 0;
|
||||
|
||||
ue2::unordered_map<NFAVertex, u32> pos_trigger;
|
||||
ue2::unordered_map<NFAVertex, u32> tug_trigger;
|
||||
@ -1297,10 +1308,13 @@ void buildExceptionMap(const build_info &args,
|
||||
assert(e.succ_states.size() == num_states);
|
||||
assert(e.squash_states.size() == num_states);
|
||||
exceptionMap[e].push_back(i);
|
||||
exceptionCount++;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%zu unique exceptions found.\n", exceptionMap.size());
|
||||
DEBUG_PRINTF("%u exceptions found (%zu unique)\n", exceptionCount,
|
||||
exceptionMap.size());
|
||||
return exceptionCount;
|
||||
}
|
||||
|
||||
static
|
||||
@ -1315,6 +1329,92 @@ u32 depth_to_u32(const depth &d) {
|
||||
return d_val;
|
||||
}
|
||||
|
||||
static
|
||||
bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e,
|
||||
const build_info &args, u32 maxShift) {
|
||||
NFAVertex from = source(e, h);
|
||||
NFAVertex to = target(e, h);
|
||||
u32 f = args.state_ids.at(from);
|
||||
u32 t = args.state_ids.at(to);
|
||||
if (!isLimitedTransition(f, t, maxShift)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// All transitions out of a tug trigger are exceptional.
|
||||
if (contains(args.tugs, from)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
u32 findMaxVarShift(const build_info &args, u32 nShifts) {
|
||||
const NGHolder &h = args.h;
|
||||
u32 shiftMask = 0;
|
||||
for (const auto &e : edges_range(h)) {
|
||||
u32 from = args.state_ids.at(source(e, h));
|
||||
u32 to = args.state_ids.at(target(e, h));
|
||||
if (from == NO_STATE || to == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
if (!isExceptionalTransition(h, e, args, MAX_SHIFT_AMOUNT)) {
|
||||
shiftMask |= (1UL << (to - from));
|
||||
}
|
||||
}
|
||||
|
||||
u32 maxVarShift = 0;
|
||||
for (u32 shiftCnt = 0; shiftMask != 0 && shiftCnt < nShifts; shiftCnt++) {
|
||||
maxVarShift = findAndClearLSB_32(&shiftMask);
|
||||
}
|
||||
|
||||
return maxVarShift;
|
||||
}
|
||||
|
||||
static
|
||||
int getLimexScore(const build_info &args, u32 nShifts) {
|
||||
const NGHolder &h = args.h;
|
||||
u32 maxVarShift = nShifts;
|
||||
int score = 0;
|
||||
|
||||
score += SHIFT_COST * nShifts;
|
||||
maxVarShift = findMaxVarShift(args, nShifts);
|
||||
|
||||
NFAStateSet exceptionalStates(args.num_states);
|
||||
for (const auto &e : edges_range(h)) {
|
||||
u32 from = args.state_ids.at(source(e, h));
|
||||
u32 to = args.state_ids.at(target(e, h));
|
||||
if (from == NO_STATE || to == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
if (isExceptionalTransition(h, e, args, maxVarShift)) {
|
||||
exceptionalStates.set(from);
|
||||
}
|
||||
}
|
||||
score += EXCEPTION_COST * exceptionalStates.count();
|
||||
return score;
|
||||
}
|
||||
|
||||
// This function finds the best shift scheme with highest score
|
||||
// Returns number of shifts and score calculated for appropriate scheme
|
||||
// Returns zero if no appropriate scheme was found
|
||||
static
|
||||
u32 findBestNumOfVarShifts(const build_info &args,
|
||||
int *bestScoreRet = nullptr) {
|
||||
u32 bestNumOfVarShifts = 0;
|
||||
int bestScore = INT_MAX;
|
||||
for (u32 shiftCount = 1; shiftCount <= MAX_SHIFT_COUNT; shiftCount++) {
|
||||
int score = getLimexScore(args, shiftCount);
|
||||
if (score < bestScore) {
|
||||
bestScore = score;
|
||||
bestNumOfVarShifts = shiftCount;
|
||||
}
|
||||
}
|
||||
if (bestScoreRet != nullptr) {
|
||||
*bestScoreRet = bestScore;
|
||||
}
|
||||
return bestNumOfVarShifts;
|
||||
}
|
||||
|
||||
template<NFAEngineType dtype>
|
||||
struct Factory {
|
||||
// typedefs for readability, for types derived from traits
|
||||
@ -1322,25 +1422,6 @@ struct Factory {
|
||||
typedef typename NFATraits<dtype>::implNFA_t implNFA_t;
|
||||
typedef typename NFATraits<dtype>::tableRow_t tableRow_t;
|
||||
|
||||
static
|
||||
bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
const ue2::unordered_set<NFAVertex> &tugs) {
|
||||
NFAVertex from = source(e, h);
|
||||
NFAVertex to = target(e, h);
|
||||
u32 f = state_ids.at(from);
|
||||
u32 t = state_ids.at(to);
|
||||
if (!isLimitedTransition(f, t, NFATraits<dtype>::maxShift)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// All transitions out of a tug trigger are exceptional.
|
||||
if (contains(tugs, from)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
void allocState(NFA *nfa, u32 repeatscratchStateSize,
|
||||
u32 repeatStreamState) {
|
||||
@ -1504,6 +1585,9 @@ struct Factory {
|
||||
static
|
||||
void writeShiftMasks(const build_info &args, implNFA_t *limex) {
|
||||
const NGHolder &h = args.h;
|
||||
u32 maxShift = findMaxVarShift(args, limex->shiftCount);
|
||||
u32 shiftMask = 0;
|
||||
int shiftMaskIdx = 0;
|
||||
|
||||
for (const auto &e : edges_range(h)) {
|
||||
u32 from = args.state_ids.at(source(e, h));
|
||||
@ -1515,15 +1599,32 @@ struct Factory {
|
||||
// We check for exceptional transitions here, as we don't want tug
|
||||
// trigger transitions emitted as limited transitions (even if they
|
||||
// could be in this model).
|
||||
if (!isExceptionalTransition(h, e, args.state_ids, args.tugs)) {
|
||||
maskSetBit(limex->shift[to - from], from);
|
||||
if (!isExceptionalTransition(h, e, args, maxShift)) {
|
||||
u32 shift = to - from;
|
||||
if ((shiftMask & (1UL << shift)) == 0UL) {
|
||||
shiftMask |= (1UL << shift);
|
||||
limex->shiftAmount[shiftMaskIdx++] = (u8)shift;
|
||||
}
|
||||
assert(limex->shiftCount <= MAX_SHIFT_COUNT);
|
||||
for (u32 i = 0; i < limex->shiftCount; i++) {
|
||||
if (limex->shiftAmount[i] == (u8)shift) {
|
||||
maskSetBit(limex->shift[i], from);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maxShift && limex->shiftCount > 1) {
|
||||
for (u32 i = 0; i < limex->shiftCount; i++) {
|
||||
assert(!isMaskZero(limex->shift[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void findExceptionalTransitions(const build_info &args,
|
||||
ue2::unordered_set<NFAEdge> &exceptional) {
|
||||
ue2::unordered_set<NFAEdge> &exceptional,
|
||||
u32 maxShift) {
|
||||
const NGHolder &h = args.h;
|
||||
|
||||
for (const auto &e : edges_range(h)) {
|
||||
@ -1533,7 +1634,7 @@ struct Factory {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isExceptionalTransition(h, e, args.state_ids, args.tugs)) {
|
||||
if (isExceptionalTransition(h, e, args, maxShift)) {
|
||||
exceptional.insert(e);
|
||||
}
|
||||
}
|
||||
@ -1545,19 +1646,25 @@ struct Factory {
|
||||
implNFA_t *limex, const u32 exceptionsOffset) {
|
||||
DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset);
|
||||
|
||||
// to make testing easier, we pre-set the exceptionMap to all invalid
|
||||
// values
|
||||
memset(limex->exceptionMap, 0xff, sizeof(limex->exceptionMap));
|
||||
|
||||
exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset);
|
||||
assert(ISALIGNED(etable));
|
||||
|
||||
u32 ecount = 0;
|
||||
map<u32, ExceptionProto> exception_by_state;
|
||||
for (const auto &m : exceptionMap) {
|
||||
const ExceptionProto &proto = m.first;
|
||||
const vector<u32> &states = m.second;
|
||||
DEBUG_PRINTF("exception %u, triggered by %zu states.\n", ecount,
|
||||
states.size());
|
||||
for (u32 i : states) {
|
||||
assert(!contains(exception_by_state, i));
|
||||
exception_by_state.emplace(i, proto);
|
||||
}
|
||||
}
|
||||
|
||||
u32 ecount = 0;
|
||||
for (const auto &m : exception_by_state) {
|
||||
const ExceptionProto &proto = m.second;
|
||||
u32 state_id = m.first;
|
||||
DEBUG_PRINTF("exception %u, triggered by state %u\n", ecount,
|
||||
state_id);
|
||||
|
||||
// Write the exception entry.
|
||||
exception_t &e = etable[ecount];
|
||||
@ -1571,13 +1678,10 @@ struct Factory {
|
||||
: repeatOffsets[proto.repeat_index];
|
||||
e.repeatOffset = repeat_offset;
|
||||
|
||||
// for each state that can switch it on
|
||||
for (auto state_id : states) {
|
||||
// set this bit in the exception mask
|
||||
maskSetBit(limex->exceptionMask, state_id);
|
||||
// set this index in the exception map
|
||||
limex->exceptionMap[state_id] = ecount;
|
||||
}
|
||||
// for the state that can switch it on
|
||||
// set this bit in the exception mask
|
||||
maskSetBit(limex->exceptionMask, state_id);
|
||||
|
||||
ecount++;
|
||||
}
|
||||
|
||||
@ -1778,16 +1882,17 @@ struct Factory {
|
||||
}
|
||||
|
||||
ue2::unordered_set<NFAEdge> exceptional;
|
||||
findExceptionalTransitions(args, exceptional);
|
||||
u32 shiftCount = findBestNumOfVarShifts(args);
|
||||
assert(shiftCount);
|
||||
u32 maxShift = findMaxVarShift(args, shiftCount);
|
||||
findExceptionalTransitions(args, exceptional, maxShift);
|
||||
|
||||
map<ExceptionProto, vector<u32> > exceptionMap;
|
||||
vector<ReportID> exceptionReports;
|
||||
buildExceptionMap(args, exceptional, exceptionMap, exceptionReports);
|
||||
u32 exceptionCount = buildExceptionMap(args, exceptional, exceptionMap,
|
||||
exceptionReports);
|
||||
|
||||
if (exceptionMap.size() > ~0U) {
|
||||
DEBUG_PRINTF("too many exceptions!\n");
|
||||
return nullptr;
|
||||
}
|
||||
assert(exceptionCount <= args.num_states);
|
||||
|
||||
// Build reach table and character mapping.
|
||||
vector<NFAStateSet> reach;
|
||||
@ -1842,7 +1947,7 @@ struct Factory {
|
||||
|
||||
offset = ROUNDUP_CL(offset);
|
||||
const u32 exceptionsOffset = offset;
|
||||
offset += sizeof(exception_t) * exceptionMap.size();
|
||||
offset += sizeof(exception_t) * exceptionCount;
|
||||
|
||||
const u32 exceptionReportsOffset = offset;
|
||||
offset += sizeof(ReportID) * exceptionReports.size();
|
||||
@ -1874,6 +1979,7 @@ struct Factory {
|
||||
writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash,
|
||||
limex, acceptsOffset, acceptsEodOffset, squashOffset);
|
||||
|
||||
limex->shiftCount = shiftCount;
|
||||
writeShiftMasks(args, limex);
|
||||
|
||||
// Determine the state required for our state vector.
|
||||
@ -1907,8 +2013,6 @@ struct Factory {
|
||||
}
|
||||
|
||||
static int score(const build_info &args) {
|
||||
const NGHolder &h = args.h;
|
||||
|
||||
// LimEx NFAs are available in sizes from 32 to 512-bit.
|
||||
size_t num_states = args.num_states;
|
||||
|
||||
@ -1928,45 +2032,17 @@ struct Factory {
|
||||
sz = args.cc.grey.nfaForceSize;
|
||||
}
|
||||
|
||||
if (args.cc.grey.nfaForceShifts &&
|
||||
NFATraits<dtype>::maxShift != args.cc.grey.nfaForceShifts) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (sz != NFATraits<dtype>::maxStates) {
|
||||
return -1; // fail, size not appropriate
|
||||
}
|
||||
|
||||
// We are of the right size, calculate a score based on the number
|
||||
// of exceptions and the number of shifts used by this LimEx.
|
||||
int score = LIMEX_INITIAL_SCORE;
|
||||
if (NFATraits<dtype>::maxShift != 0) {
|
||||
score -= SHIFT_COST / 2; // first shift mask is cheap
|
||||
score -= SHIFT_COST * (NFATraits<dtype>::maxShift - 1);
|
||||
int score;
|
||||
u32 shiftCount = findBestNumOfVarShifts(args, &score);
|
||||
if (shiftCount == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
NFAStateSet exceptionalStates(num_states); // outbound exc trans
|
||||
|
||||
for (const auto &e : edges_range(h)) {
|
||||
u32 from = args.state_ids.at(source(e, h));
|
||||
u32 to = args.state_ids.at(target(e, h));
|
||||
if (from == NO_STATE || to == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isExceptionalTransition(h, e, args.state_ids, args.tugs)) {
|
||||
exceptionalStates.set(from);
|
||||
}
|
||||
}
|
||||
DEBUG_PRINTF("%zu exceptional states\n", exceptionalStates.count());
|
||||
score -= EXCEPTION_COST * exceptionalStates.count();
|
||||
|
||||
/* ensure that we always report a valid score if have the right number
|
||||
* of states */
|
||||
if (score < 0) {
|
||||
score = 0;
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
};
|
||||
@ -1985,50 +2061,19 @@ struct scoreNfa {
|
||||
}
|
||||
};
|
||||
|
||||
#define MAKE_LIMEX_TRAITS(mlt_size, mlt_shift) \
|
||||
template<> struct NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift> { \
|
||||
typedef LimExNFA##mlt_size implNFA_t; \
|
||||
typedef u_##mlt_size tableRow_t; \
|
||||
typedef NFAException##mlt_size exception_t; \
|
||||
static const size_t maxStates = mlt_size; \
|
||||
static const u32 maxShift = mlt_shift; \
|
||||
}; \
|
||||
#define MAKE_LIMEX_TRAITS(mlt_size) \
|
||||
template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \
|
||||
typedef LimExNFA##mlt_size implNFA_t; \
|
||||
typedef u_##mlt_size tableRow_t; \
|
||||
typedef NFAException##mlt_size exception_t; \
|
||||
static const size_t maxStates = mlt_size; \
|
||||
};
|
||||
|
||||
MAKE_LIMEX_TRAITS(32, 1)
|
||||
MAKE_LIMEX_TRAITS(32, 2)
|
||||
MAKE_LIMEX_TRAITS(32, 3)
|
||||
MAKE_LIMEX_TRAITS(32, 4)
|
||||
MAKE_LIMEX_TRAITS(32, 5)
|
||||
MAKE_LIMEX_TRAITS(32, 6)
|
||||
MAKE_LIMEX_TRAITS(32, 7)
|
||||
MAKE_LIMEX_TRAITS(128, 1)
|
||||
MAKE_LIMEX_TRAITS(128, 2)
|
||||
MAKE_LIMEX_TRAITS(128, 3)
|
||||
MAKE_LIMEX_TRAITS(128, 4)
|
||||
MAKE_LIMEX_TRAITS(128, 5)
|
||||
MAKE_LIMEX_TRAITS(128, 6)
|
||||
MAKE_LIMEX_TRAITS(128, 7)
|
||||
MAKE_LIMEX_TRAITS(256, 1)
|
||||
MAKE_LIMEX_TRAITS(256, 2)
|
||||
MAKE_LIMEX_TRAITS(256, 3)
|
||||
MAKE_LIMEX_TRAITS(256, 4)
|
||||
MAKE_LIMEX_TRAITS(256, 5)
|
||||
MAKE_LIMEX_TRAITS(256, 6)
|
||||
MAKE_LIMEX_TRAITS(256, 7)
|
||||
MAKE_LIMEX_TRAITS(384, 1)
|
||||
MAKE_LIMEX_TRAITS(384, 2)
|
||||
MAKE_LIMEX_TRAITS(384, 3)
|
||||
MAKE_LIMEX_TRAITS(384, 4)
|
||||
MAKE_LIMEX_TRAITS(384, 5)
|
||||
MAKE_LIMEX_TRAITS(384, 6)
|
||||
MAKE_LIMEX_TRAITS(384, 7)
|
||||
MAKE_LIMEX_TRAITS(512, 1)
|
||||
MAKE_LIMEX_TRAITS(512, 2)
|
||||
MAKE_LIMEX_TRAITS(512, 3)
|
||||
MAKE_LIMEX_TRAITS(512, 4)
|
||||
MAKE_LIMEX_TRAITS(512, 5)
|
||||
MAKE_LIMEX_TRAITS(512, 6)
|
||||
MAKE_LIMEX_TRAITS(512, 7)
|
||||
MAKE_LIMEX_TRAITS(32)
|
||||
MAKE_LIMEX_TRAITS(128)
|
||||
MAKE_LIMEX_TRAITS(256)
|
||||
MAKE_LIMEX_TRAITS(384)
|
||||
MAKE_LIMEX_TRAITS(512)
|
||||
|
||||
} // namespace
|
||||
|
||||
@ -2133,20 +2178,18 @@ aligned_unique_ptr<NFA> generate(NGHolder &h,
|
||||
// Acceleration analysis.
|
||||
fillAccelInfo(arg);
|
||||
|
||||
typedef pair<int, NFAEngineType> EngineScore;
|
||||
vector<EngineScore> scores;
|
||||
vector<pair<int, NFAEngineType>> scores;
|
||||
|
||||
if (hint != INVALID_NFA) {
|
||||
// The caller has told us what to (attempt to) build.
|
||||
scores.push_back(make_pair(0, (NFAEngineType)hint));
|
||||
scores.emplace_back(0, (NFAEngineType)hint);
|
||||
} else {
|
||||
for (size_t i = 0; i <= LAST_LIMEX_NFA; i++) {
|
||||
NFAEngineType ntype = (NFAEngineType)i;
|
||||
|
||||
int score = DISPATCH_BY_LIMEX_TYPE(ntype, scoreNfa, arg);
|
||||
if (score >= 0) {
|
||||
DEBUG_PRINTF("%s scores %d\n", nfa_type_name(ntype), score);
|
||||
scores.push_back(make_pair(score, ntype));
|
||||
scores.emplace_back(score, ntype);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2156,22 +2199,22 @@ aligned_unique_ptr<NFA> generate(NGHolder &h,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
sort(scores.begin(), scores.end(), greater<EngineScore>());
|
||||
// Sort acceptable models in priority order, lowest score first.
|
||||
sort(scores.begin(), scores.end());
|
||||
|
||||
aligned_unique_ptr<NFA> nfa;
|
||||
for (auto i = scores.begin(); !nfa && i != scores.end(); ++i) {
|
||||
assert(i->first >= 0);
|
||||
nfa = DISPATCH_BY_LIMEX_TYPE(i->second, generateNfa, arg);
|
||||
for (const auto &elem : scores) {
|
||||
assert(elem.first >= 0);
|
||||
NFAEngineType limex_model = elem.second;
|
||||
auto nfa = DISPATCH_BY_LIMEX_TYPE(limex_model, generateNfa, arg);
|
||||
if (nfa) {
|
||||
DEBUG_PRINTF("successful build with NFA engine: %s\n",
|
||||
nfa_type_name(limex_model));
|
||||
return nfa;
|
||||
}
|
||||
}
|
||||
|
||||
if (!nfa) {
|
||||
DEBUG_PRINTF("NFA build failed.\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("successful build with NFA engine: %s\n",
|
||||
nfa_type_name((NFAEngineType)nfa->type));
|
||||
return nfa;
|
||||
DEBUG_PRINTF("NFA build failed.\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
u32 countAccelStates(NGHolder &h,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -80,6 +80,23 @@ void dumpMask(FILE *f, const char *name, const u8 *mask, u32 mask_bits) {
|
||||
fprintf(f, "MSK %-20s %s\n", name, dumpMask(mask, mask_bits).c_str());
|
||||
}
|
||||
|
||||
template<typename mask_t>
|
||||
static
|
||||
u32 rank_in_mask(mask_t mask, u32 bit) {
|
||||
assert(bit < 8 * sizeof(mask));
|
||||
|
||||
u32 chunks[sizeof(mask)/sizeof(u32)];
|
||||
memcpy(chunks, &mask, sizeof(mask));
|
||||
u32 base_rank = 0;
|
||||
for (u32 i = 0; i < bit / 32; i++) {
|
||||
base_rank += popcount32(chunks[i]);
|
||||
}
|
||||
u32 chunk = chunks[bit / 32];
|
||||
u32 local_bit = bit % 32;
|
||||
assert(chunk & (1U << local_bit));
|
||||
return base_rank + popcount32(chunk & ((1U << local_bit) - 1));
|
||||
}
|
||||
|
||||
template <typename limex_type>
|
||||
static
|
||||
void dumpRepeats(const limex_type *limex, u32 model_size, FILE *f) {
|
||||
@ -244,6 +261,16 @@ void dumpLimexExceptions(const limex_type *limex, FILE *f) {
|
||||
}
|
||||
}
|
||||
|
||||
template<typename limex_type>
|
||||
static
|
||||
void dumpLimexShifts(const limex_type *limex, FILE *f) {
|
||||
u32 size = limex_traits<limex_type>::size;
|
||||
fprintf(f, "Shift Masks:\n");
|
||||
for(u32 i = 0; i < limex->shiftCount; i++) {
|
||||
fprintf(f, "\t Shift %u(%hhu)\t\tMask: %s\n", i, limex->shiftAmount[i],
|
||||
dumpMask((const u8 *)&limex->shift[i], size).c_str());
|
||||
}
|
||||
}
|
||||
template<typename limex_type>
|
||||
static
|
||||
void dumpLimexText(const limex_type *limex, FILE *f) {
|
||||
@ -270,6 +297,9 @@ void dumpLimexText(const limex_type *limex, FILE *f) {
|
||||
topMask += size / 8;
|
||||
}
|
||||
|
||||
// Dump shift masks
|
||||
dumpLimexShifts(limex, f);
|
||||
|
||||
dumpSquash(limex, f);
|
||||
|
||||
dumpLimexReachMap(limex->reachMap, f);
|
||||
@ -325,7 +355,7 @@ struct limex_labeller : public nfa_labeller {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 ex_index = limex->exceptionMap[state];
|
||||
u32 ex_index = rank_in_mask(limex->exceptionMask, state);
|
||||
const typename limex_traits<limex_type>::exception_type *e
|
||||
= &exceptions[ex_index];
|
||||
|
||||
@ -396,7 +426,7 @@ void dumpExDotInfo(const limex_type *limex, u32 state, FILE *f) {
|
||||
const typename limex_traits<limex_type>::exception_type *exceptions
|
||||
= getExceptionTable(limex);
|
||||
|
||||
u32 ex_index = limex->exceptionMap[state];
|
||||
u32 ex_index = rank_in_mask(limex->exceptionMask, state);
|
||||
const typename limex_traits<limex_type>::exception_type *e
|
||||
= &exceptions[ex_index];
|
||||
|
||||
@ -420,78 +450,45 @@ void dumpExDotInfo(const limex_type *limex, u32 state, FILE *f) {
|
||||
template<typename limex_type>
|
||||
static
|
||||
void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
|
||||
for (u32 j = 0; j < MAX_MAX_SHIFT; j++) {
|
||||
for (u32 j = 0; j < limex->shiftCount; j++) {
|
||||
const u32 shift_amount = limex->shiftAmount[j];
|
||||
if (testbit((const u8 *)&limex->shift[j],
|
||||
limex_traits<limex_type>::size, state)) {
|
||||
fprintf(f, "%u -> %u;\n", state, state + j);
|
||||
fprintf(f, "%u -> %u;\n", state, state + shift_amount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define DUMP_TEXT_FN(ddf_u, ddf_n, ddf_s) \
|
||||
void nfaExecLimEx##ddf_n##_##ddf_s##_dumpText(const NFA *nfa, FILE *f) { \
|
||||
#define DUMP_TEXT_FN(ddf_n) \
|
||||
void nfaExecLimEx##ddf_n##_dumpText(const NFA *nfa, FILE *f) { \
|
||||
dumpLimexText((const LimExNFA##ddf_n *)getImplNfa(nfa), f); \
|
||||
}
|
||||
|
||||
#define DUMP_DOT_FN(ddf_u, ddf_n, ddf_s) \
|
||||
void nfaExecLimEx##ddf_n##_##ddf_s##_dumpDot(const NFA *nfa, FILE *f) { \
|
||||
#define DUMP_DOT_FN(ddf_n) \
|
||||
void nfaExecLimEx##ddf_n##_dumpDot(const NFA *nfa, FILE *f, \
|
||||
UNUSED const string &base) { \
|
||||
const LimExNFA##ddf_n *limex = \
|
||||
(const LimExNFA##ddf_n *)getImplNfa(nfa); \
|
||||
\
|
||||
dumpDotPreamble(f); \
|
||||
u32 state_count = nfa->nPositions; \
|
||||
u32 state_count = nfa->nPositions; \
|
||||
dumpVertexDotInfo(limex, state_count, f, \
|
||||
limex_labeller<LimExNFA##ddf_n>(limex)); \
|
||||
for (u32 i = 0; i < state_count; i++) { \
|
||||
dumpLimDotInfo(limex, i, f); \
|
||||
dumpExDotInfo(limex, i, f); \
|
||||
} \
|
||||
\
|
||||
dumpDotTrailer(f); \
|
||||
}
|
||||
|
||||
#define LIMEX_DUMP_FNS(ntype, size, shifts) \
|
||||
DUMP_TEXT_FN(ntype, size, shifts) \
|
||||
DUMP_DOT_FN(ntype, size, shifts)
|
||||
#define LIMEX_DUMP_FNS(size) \
|
||||
DUMP_TEXT_FN(size) \
|
||||
DUMP_DOT_FN(size)
|
||||
|
||||
LIMEX_DUMP_FNS(u32, 32, 1)
|
||||
LIMEX_DUMP_FNS(u32, 32, 2)
|
||||
LIMEX_DUMP_FNS(u32, 32, 3)
|
||||
LIMEX_DUMP_FNS(u32, 32, 4)
|
||||
LIMEX_DUMP_FNS(u32, 32, 5)
|
||||
LIMEX_DUMP_FNS(u32, 32, 6)
|
||||
LIMEX_DUMP_FNS(u32, 32, 7)
|
||||
|
||||
LIMEX_DUMP_FNS(m128, 128, 1)
|
||||
LIMEX_DUMP_FNS(m128, 128, 2)
|
||||
LIMEX_DUMP_FNS(m128, 128, 3)
|
||||
LIMEX_DUMP_FNS(m128, 128, 4)
|
||||
LIMEX_DUMP_FNS(m128, 128, 5)
|
||||
LIMEX_DUMP_FNS(m128, 128, 6)
|
||||
LIMEX_DUMP_FNS(m128, 128, 7)
|
||||
|
||||
LIMEX_DUMP_FNS(m256, 256, 1)
|
||||
LIMEX_DUMP_FNS(m256, 256, 2)
|
||||
LIMEX_DUMP_FNS(m256, 256, 3)
|
||||
LIMEX_DUMP_FNS(m256, 256, 4)
|
||||
LIMEX_DUMP_FNS(m256, 256, 5)
|
||||
LIMEX_DUMP_FNS(m256, 256, 6)
|
||||
LIMEX_DUMP_FNS(m256, 256, 7)
|
||||
|
||||
LIMEX_DUMP_FNS(m384, 384, 1)
|
||||
LIMEX_DUMP_FNS(m384, 384, 2)
|
||||
LIMEX_DUMP_FNS(m384, 384, 3)
|
||||
LIMEX_DUMP_FNS(m384, 384, 4)
|
||||
LIMEX_DUMP_FNS(m384, 384, 5)
|
||||
LIMEX_DUMP_FNS(m384, 384, 6)
|
||||
LIMEX_DUMP_FNS(m384, 384, 7)
|
||||
|
||||
LIMEX_DUMP_FNS(m512, 512, 1)
|
||||
LIMEX_DUMP_FNS(m512, 512, 2)
|
||||
LIMEX_DUMP_FNS(m512, 512, 3)
|
||||
LIMEX_DUMP_FNS(m512, 512, 4)
|
||||
LIMEX_DUMP_FNS(m512, 512, 5)
|
||||
LIMEX_DUMP_FNS(m512, 512, 6)
|
||||
LIMEX_DUMP_FNS(m512, 512, 7)
|
||||
LIMEX_DUMP_FNS(32)
|
||||
LIMEX_DUMP_FNS(128)
|
||||
LIMEX_DUMP_FNS(256)
|
||||
LIMEX_DUMP_FNS(384)
|
||||
LIMEX_DUMP_FNS(512)
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -79,9 +79,13 @@
|
||||
#ifdef ARCH_64_BIT
|
||||
#define CHUNK_T u64a
|
||||
#define FIND_AND_CLEAR_FN findAndClearLSB_64
|
||||
#define POPCOUNT_FN popcount64
|
||||
#define RANK_IN_MASK_FN rank_in_mask64
|
||||
#else
|
||||
#define CHUNK_T u32
|
||||
#define FIND_AND_CLEAR_FN findAndClearLSB_32
|
||||
#define POPCOUNT_FN popcount32
|
||||
#define RANK_IN_MASK_FN rank_in_mask32
|
||||
#endif
|
||||
|
||||
/** \brief Process a single exception. Returns 1 if exception handling should
|
||||
@ -206,13 +210,13 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
|
||||
|
||||
#ifndef RUN_EXCEPTION_FN_ONLY
|
||||
|
||||
/** \brief Process all of the exceptions associated with the states in the \a estate. */
|
||||
/** \brief Process all of the exceptions associated with the states in the \a
|
||||
* estate. */
|
||||
static really_inline
|
||||
int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
||||
const struct IMPL_NFA_T *limex,
|
||||
const u32 *exceptionMap, const EXCEPTION_T *exceptions,
|
||||
const ReportID *exReports,
|
||||
u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) {
|
||||
const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
|
||||
const ReportID *exReports, u64a offset, struct CONTEXT_T *ctx,
|
||||
char in_rev, char flags) {
|
||||
assert(diffmask > 0); // guaranteed by caller macro
|
||||
|
||||
if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) {
|
||||
@ -237,15 +241,23 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
||||
|
||||
// A copy of the estate as an array of GPR-sized chunks.
|
||||
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||
#ifdef ESTATE_ON_STACK
|
||||
memcpy(chunks, &estate, sizeof(STATE_T));
|
||||
#else
|
||||
memcpy(chunks, estatep, sizeof(STATE_T));
|
||||
#endif
|
||||
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
|
||||
|
||||
struct proto_cache new_cache = {0, NULL};
|
||||
enum CacheResult cacheable = CACHE_RESULT;
|
||||
|
||||
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
|
||||
base_index[0] = 0;
|
||||
for (u32 i = 0; i < ARRAY_LENGTH(base_index) - 1; i++) {
|
||||
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
|
||||
}
|
||||
|
||||
do {
|
||||
u32 t = findAndClearLSB_32(&diffmask);
|
||||
#ifdef ARCH_64_BIT
|
||||
@ -254,10 +266,10 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
|
||||
assert(t < ARRAY_LENGTH(chunks));
|
||||
CHUNK_T word = chunks[t];
|
||||
assert(word != 0);
|
||||
u32 base = t * sizeof(CHUNK_T) * 8;
|
||||
do {
|
||||
u32 bit = FIND_AND_CLEAR_FN(&word) + base;
|
||||
u32 idx = exceptionMap[bit];
|
||||
u32 bit = FIND_AND_CLEAR_FN(&word);
|
||||
u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit);
|
||||
u32 idx = local_index + base_index[t];
|
||||
const EXCEPTION_T *e = &exceptions[idx];
|
||||
|
||||
if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -68,6 +68,9 @@
|
||||
The value of NFA.stateSize gives the total state size in bytes (the sum of
|
||||
all the above).
|
||||
|
||||
Number of shifts should be always greater or equal to 1
|
||||
Number of shifts 0 means that no appropriate NFA engine was found.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef LIMEX_INTERNAL_H
|
||||
@ -77,7 +80,8 @@
|
||||
#include "repeat_internal.h"
|
||||
|
||||
// Constants
|
||||
#define MAX_MAX_SHIFT 8 /**< largest maxshift used by a LimEx NFA */
|
||||
#define MAX_SHIFT_COUNT 8 /**< largest number of shifts used by a LimEx NFA */
|
||||
#define MAX_SHIFT_AMOUNT 16 /**< largest shift amount used by a LimEx NFA */
|
||||
|
||||
#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
|
||||
#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
|
||||
@ -95,24 +99,6 @@ enum LimExSquash {
|
||||
LIMEX_SQUASH_REPORT = 3 //!< squash when report is raised
|
||||
};
|
||||
|
||||
struct LimExNFABase {
|
||||
u8 reachMap[N_CHARS];
|
||||
u32 reachSize;
|
||||
u32 accelCount;
|
||||
u32 accelTableOffset;
|
||||
u32 accelAuxCount;
|
||||
u32 accelAuxOffset;
|
||||
u32 acceptCount;
|
||||
u32 acceptOffset;
|
||||
u32 acceptEodCount;
|
||||
u32 acceptEodOffset;
|
||||
u32 exceptionCount;
|
||||
u32 exceptionOffset;
|
||||
u32 exReportOffset;
|
||||
u32 repeatCount;
|
||||
u32 repeatOffset;
|
||||
};
|
||||
|
||||
/* uniform looking types for the macros */
|
||||
typedef u8 u_8;
|
||||
typedef u16 u_16;
|
||||
@ -133,7 +119,7 @@ struct NFAException##size { \
|
||||
u8 trigger; /**< from enum LimExTrigger */ \
|
||||
}; \
|
||||
\
|
||||
struct LimExNFA##size { /* MUST align with LimExNFABase */ \
|
||||
struct LimExNFA##size { \
|
||||
u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */ \
|
||||
u32 reachSize; /**< number of reach masks */ \
|
||||
u32 accelCount; /**< number of entries in accel table */ \
|
||||
@ -149,7 +135,6 @@ struct LimExNFA##size { /* MUST align with LimExNFABase */ \
|
||||
u32 exReportOffset; /* rel. to start of LimExNFA */ \
|
||||
u32 repeatCount; \
|
||||
u32 repeatOffset; \
|
||||
u32 exceptionMap[size]; \
|
||||
u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \
|
||||
u32 squashCount; \
|
||||
u32 topCount; \
|
||||
@ -168,8 +153,10 @@ struct LimExNFA##size { /* MUST align with LimExNFABase */ \
|
||||
u_##size compressMask; /**< switch off before compress */ \
|
||||
u_##size exceptionMask; \
|
||||
u_##size repeatCyclicMask; \
|
||||
u_##size shift[MAX_MAX_SHIFT]; \
|
||||
u_##size zombieMask; /**< zombie if in any of the set states */ \
|
||||
u_##size shift[MAX_SHIFT_COUNT]; \
|
||||
u32 shiftCount; /**< number of shift masks used */ \
|
||||
u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \
|
||||
};
|
||||
|
||||
CREATE_NFA_LIMEX(32)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -74,7 +74,6 @@
|
||||
static really_inline
|
||||
int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
|
||||
const struct LimExNFA32 *limex,
|
||||
const u32 *exceptionMap,
|
||||
const struct NFAException32 *exceptions,
|
||||
const ReportID *exReports, u64a offset,
|
||||
struct NFAContext32 *ctx, char in_rev, char flags) {
|
||||
@ -104,7 +103,7 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
|
||||
|
||||
do {
|
||||
u32 bit = findAndClearLSB_32(&estate);
|
||||
u32 idx = exceptionMap[bit];
|
||||
u32 idx = rank_in_mask32(limex->exceptionMask, bit);
|
||||
const struct NFAException32 *e = &exceptions[idx];
|
||||
if (!runException32(e, s, succ, &local_succ, limex, exReports, offset,
|
||||
ctx, &new_cache, &cacheable, in_rev, flags)) {
|
||||
@ -132,35 +131,4 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
|
||||
|
||||
#define SIZE 32
|
||||
#define STATE_T u32
|
||||
#define SHIFT 1
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 32
|
||||
#define STATE_T u32
|
||||
#define SHIFT 2
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 32
|
||||
#define STATE_T u32
|
||||
#define SHIFT 3
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 32
|
||||
#define STATE_T u32
|
||||
#define SHIFT 4
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 32
|
||||
#define STATE_T u32
|
||||
#define SHIFT 5
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 32
|
||||
#define STATE_T u32
|
||||
#define SHIFT 6
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 32
|
||||
#define STATE_T u32
|
||||
#define SHIFT 7
|
||||
#include "limex_runtime_impl.h"
|
||||
|
@ -73,34 +73,35 @@ struct proto_cache {
|
||||
};
|
||||
|
||||
// Shift macros for Limited NFAs. Defined in terms of uniform ops.
|
||||
// LimExNFAxxx ptr in 'limex' and the current state in 's'
|
||||
#define NFA_EXEC_LIM_SHIFT(nels_type, nels_i) \
|
||||
(JOIN(shift_, nels_type)( \
|
||||
(JOIN(lshift_, nels_type)( \
|
||||
JOIN(and_, nels_type)(s, \
|
||||
JOIN(load_, nels_type)(&limex->shift[nels_i])), \
|
||||
nels_i))
|
||||
limex->shiftAmount[nels_i]))
|
||||
|
||||
// Calculate the (limited model) successors for a given max shift. Assumes
|
||||
// LimExNFAxxx ptr in 'l', current state in 's' and successors in 'succ'.
|
||||
// Calculate the (limited model) successors for a number of variable shifts.
|
||||
// Assumes current state in 's' and successors in 'succ'.
|
||||
|
||||
#define NFA_EXEC_GET_LIM_SUCC(gls_type, gls_shift) \
|
||||
#define NFA_EXEC_GET_LIM_SUCC(gls_type) \
|
||||
do { \
|
||||
succ = \
|
||||
JOIN(and_, gls_type)(s, JOIN(load_, gls_type)(&limex->shift[0])); \
|
||||
switch (gls_shift) { \
|
||||
case 7: \
|
||||
succ = NFA_EXEC_LIM_SHIFT(gls_type, 0); \
|
||||
switch (limex->shiftCount) { \
|
||||
case 8: \
|
||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 7)); \
|
||||
case 6: \
|
||||
case 7: \
|
||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 6)); \
|
||||
case 5: \
|
||||
case 6: \
|
||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 5)); \
|
||||
case 4: \
|
||||
case 5: \
|
||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 4)); \
|
||||
case 3: \
|
||||
case 4: \
|
||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 3)); \
|
||||
case 2: \
|
||||
case 3: \
|
||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 2)); \
|
||||
case 1: \
|
||||
case 2: \
|
||||
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 1)); \
|
||||
case 1: \
|
||||
case 0: \
|
||||
; \
|
||||
} \
|
||||
@ -129,7 +130,7 @@ int limexRunReports(const ReportID *reports, NfaCallback callback,
|
||||
for (; *reports != MO_INVALID_IDX; ++reports) {
|
||||
DEBUG_PRINTF("firing report for id %u at offset %llu\n",
|
||||
*reports, offset);
|
||||
int rv = callback(offset, *reports, context);
|
||||
int rv = callback(0, offset, *reports, context);
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
@ -37,11 +37,11 @@
|
||||
* Version 2.0: now with X-Macros, so you get line numbers in your debugger.
|
||||
*/
|
||||
|
||||
#if !defined(SIZE) || !defined(STATE_T) || !defined(SHIFT)
|
||||
# error Must define SIZE and STATE_T and SHIFT in includer.
|
||||
#if !defined(SIZE) || !defined(STATE_T)
|
||||
# error Must define SIZE and STATE_T in includer.
|
||||
#endif
|
||||
|
||||
#define LIMEX_API_ROOT JOIN(JOIN(JOIN(nfaExecLimEx, SIZE), _), SHIFT)
|
||||
#define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE)
|
||||
|
||||
#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
|
||||
|
||||
@ -73,6 +73,7 @@
|
||||
#define ANDNOT_STATE JOIN(andnot_, STATE_T)
|
||||
#define OR_STATE JOIN(or_, STATE_T)
|
||||
#define TESTBIT_STATE JOIN(testbit_, STATE_T)
|
||||
#define CLEARBIT_STATE JOIN(clearbit_, STATE_T)
|
||||
#define ZERO_STATE JOIN(zero_, STATE_T)
|
||||
#define ISNONZERO_STATE JOIN(isNonZero_, STATE_T)
|
||||
#define ISZERO_STATE JOIN(isZero_, STATE_T)
|
||||
@ -104,8 +105,8 @@
|
||||
// continue, 1 if an accept was fired and the user instructed us to halt.
|
||||
static really_inline
|
||||
char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
|
||||
const ReportID *exReports, const u32 *exceptionMap,
|
||||
STATE_T s, const STATE_T emask, size_t i, u64a offset,
|
||||
const ReportID *exReports, STATE_T s,
|
||||
const STATE_T emask, size_t i, u64a offset,
|
||||
STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx,
|
||||
const char flags, const char in_rev,
|
||||
const char first_match) {
|
||||
@ -132,8 +133,8 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
|
||||
char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags;
|
||||
|
||||
int rv = JOIN(processExceptional, SIZE)(
|
||||
pass_state, pass_estate, diffmask, succ, limex, exceptionMap,
|
||||
exceptions, exReports, callback_offset, ctx, in_rev, localflags);
|
||||
pass_state, pass_estate, diffmask, succ, limex, exceptions, exReports,
|
||||
callback_offset, ctx, in_rev, localflags);
|
||||
if (rv == PE_RV_HALT) {
|
||||
return 1; // Halt matching.
|
||||
}
|
||||
@ -175,7 +176,6 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
||||
(const union AccelAux *)((const char *)limex + limex->accelAuxOffset);
|
||||
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
|
||||
const ReportID *exReports = getExReports(limex);
|
||||
const u32 *exceptionMap = limex->exceptionMap;
|
||||
STATE_T s = LOAD_STATE(&ctx->s);
|
||||
|
||||
/* assert(ISALIGNED_16(exceptions)); */
|
||||
@ -201,11 +201,11 @@ without_accel:
|
||||
|
||||
u8 c = input[i];
|
||||
STATE_T succ;
|
||||
NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT);
|
||||
NFA_EXEC_GET_LIM_SUCC(STATE_T);
|
||||
|
||||
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s,
|
||||
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx,
|
||||
flags, 0, first_match)) {
|
||||
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK,
|
||||
i, offset, &succ, final_loc, ctx, flags, 0,
|
||||
first_match)) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
@ -252,11 +252,11 @@ with_accel:
|
||||
|
||||
u8 c = input[i];
|
||||
STATE_T succ;
|
||||
NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT);
|
||||
NFA_EXEC_GET_LIM_SUCC(STATE_T);
|
||||
|
||||
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s,
|
||||
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx,
|
||||
flags, 0, first_match)) {
|
||||
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK,
|
||||
i, offset, &succ, final_loc, ctx, flags, 0,
|
||||
first_match)) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
@ -300,7 +300,6 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
||||
#endif
|
||||
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
|
||||
const ReportID *exReports = getExReports(limex);
|
||||
const u32 *exceptionMap = limex->exceptionMap;
|
||||
STATE_T s = LOAD_STATE(&ctx->s);
|
||||
|
||||
/* assert(ISALIGNED_16(exceptions)); */
|
||||
@ -318,9 +317,9 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
||||
|
||||
u8 c = input[i-1];
|
||||
STATE_T succ;
|
||||
NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT);
|
||||
NFA_EXEC_GET_LIM_SUCC(STATE_T);
|
||||
|
||||
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s,
|
||||
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s,
|
||||
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx,
|
||||
flags, 1, 0)) {
|
||||
return MO_HALT_MATCHING;
|
||||
@ -349,36 +348,57 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
|
||||
void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src,
|
||||
u64a offset) {
|
||||
if (!limex->repeatCount) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Note: we compress all repeats, as they may have *just* had their
|
||||
// cyclic states switched off a moment ago. TODO: is this required
|
||||
STATE_T s = LOAD_STATE(src);
|
||||
|
||||
if (ISZERO_STATE(AND_STATE(s, LOAD_STATE(&limex->repeatCyclicMask)))) {
|
||||
DEBUG_PRINTF("no cyclics are on\n");
|
||||
return;
|
||||
}
|
||||
|
||||
const union RepeatControl *ctrl =
|
||||
getRepeatControlBaseConst((const char *)src, sizeof(STATE_T));
|
||||
char *state_base = (char *)dest + limex->stateSize;
|
||||
|
||||
for (u32 i = 0; i < limex->repeatCount; i++) {
|
||||
DEBUG_PRINTF("repeat %u\n", i);
|
||||
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
|
||||
|
||||
if (!TESTBIT_STATE(&s, info->cyclicState)) {
|
||||
DEBUG_PRINTF("is dead\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
const struct RepeatInfo *repeat = getRepeatInfo(info);
|
||||
if (repeatHasMatch(repeat, &ctrl[i], state_base + info->stateOffset,
|
||||
offset) == REPEAT_STALE) {
|
||||
DEBUG_PRINTF("is stale, clearing state\n");
|
||||
CLEARBIT_STATE(&s, info->cyclicState);
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("packing state (packedCtrlOffset=%u)\n",
|
||||
info->packedCtrlOffset);
|
||||
repeatPack(state_base + info->packedCtrlOffset, repeat, &ctrl[i],
|
||||
offset);
|
||||
}
|
||||
|
||||
STORE_STATE(src, s);
|
||||
}
|
||||
|
||||
char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n,
|
||||
const struct mq *q,
|
||||
s64a loc) {
|
||||
const struct mq *q, s64a loc) {
|
||||
void *dest = q->streamState;
|
||||
const void *src = q->state;
|
||||
void *src = q->state;
|
||||
u8 key = queue_prev_byte(q, loc);
|
||||
const IMPL_NFA_T *limex = getImplNfa(n);
|
||||
COMPRESS_FN(limex, dest, src, key);
|
||||
COMPRESS_REPEATS_FN(limex, dest, src, q->offset + loc);
|
||||
COMPRESS_FN(limex, dest, src, key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -389,15 +409,29 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
|
||||
return;
|
||||
}
|
||||
|
||||
// Note: we expand all repeats, as they may have *just* had their
|
||||
// cyclic states switched off a moment ago. TODO: is this required?
|
||||
// Note: state has already been expanded into 'dest'.
|
||||
const STATE_T cyclics =
|
||||
AND_STATE(LOAD_STATE(dest), LOAD_STATE(&limex->repeatCyclicMask));
|
||||
if (ISZERO_STATE(cyclics)) {
|
||||
DEBUG_PRINTF("no cyclics are on\n");
|
||||
return;
|
||||
}
|
||||
|
||||
union RepeatControl *ctrl =
|
||||
getRepeatControlBase((char *)dest, sizeof(STATE_T));
|
||||
const char *state_base = (const char *)src + limex->stateSize;
|
||||
|
||||
for (u32 i = 0; i < limex->repeatCount; i++) {
|
||||
DEBUG_PRINTF("repeat %u\n", i);
|
||||
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
|
||||
|
||||
if (!TESTBIT_STATE(&cyclics, info->cyclicState)) {
|
||||
DEBUG_PRINTF("is dead\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("unpacking state (packedCtrlOffset=%u)\n",
|
||||
info->packedCtrlOffset);
|
||||
const struct RepeatInfo *repeat = getRepeatInfo(info);
|
||||
repeatUnpack(state_base + info->packedCtrlOffset, repeat, offset,
|
||||
&ctrl[i]);
|
||||
@ -650,7 +684,27 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
|
||||
ep = MIN(ep, end_abs);
|
||||
assert(ep >= sp);
|
||||
|
||||
assert(sp >= offset); // We no longer do history buffer scans here.
|
||||
if (sp < offset) {
|
||||
DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
|
||||
assert(offset - sp <= q->hlength);
|
||||
u64a local_ep = MIN(offset, ep);
|
||||
u64a final_look = 0;
|
||||
/* we are starting inside the history buffer */
|
||||
if (STREAMFIRST_FN(limex, q->history + q->hlength + sp - offset,
|
||||
local_ep - sp, &ctx, sp,
|
||||
&final_look) == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu "
|
||||
"offset:%llu\n", final_look, sp, end_abs, offset);
|
||||
assert(q->cur);
|
||||
q->cur--;
|
||||
q->items[q->cur].type = MQE_START;
|
||||
q->items[q->cur].location = sp + final_look - offset;
|
||||
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
|
||||
sp = local_ep;
|
||||
}
|
||||
|
||||
if (sp >= ep) {
|
||||
goto scan_done;
|
||||
@ -789,10 +843,8 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
|
||||
}
|
||||
|
||||
char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
|
||||
const char *streamState, u64a offset,
|
||||
NfaCallback callback,
|
||||
UNUSED SomNfaCallback som_callback,
|
||||
void *context) {
|
||||
const char *streamState, u64a offset,
|
||||
NfaCallback callback, void *context) {
|
||||
assert(n && state);
|
||||
|
||||
const IMPL_NFA_T *limex = getImplNfa(n);
|
||||
@ -868,6 +920,21 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa,
|
||||
offset, report);
|
||||
}
|
||||
|
||||
char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
|
||||
assert(nfa && q);
|
||||
assert(q->state && q->streamState);
|
||||
|
||||
const IMPL_NFA_T *limex = getImplNfa(nfa);
|
||||
union RepeatControl *repeat_ctrl =
|
||||
getRepeatControlBase(q->state, sizeof(STATE_T));
|
||||
char *repeat_state = q->streamState + limex->stateSize;
|
||||
STATE_T state = LOAD_STATE(q->state);
|
||||
u64a offset = q->offset + q_last_loc(q) + 1;
|
||||
|
||||
return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state,
|
||||
offset);
|
||||
}
|
||||
|
||||
enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
|
||||
const struct NFA *nfa,
|
||||
struct mq *q,
|
||||
@ -920,6 +987,7 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
|
||||
#undef ANDNOT_STATE
|
||||
#undef OR_STATE
|
||||
#undef TESTBIT_STATE
|
||||
#undef CLEARBIT_STATE
|
||||
#undef ZERO_STATE
|
||||
#undef ISNONZERO_STATE
|
||||
#undef ISZERO_STATE
|
||||
@ -935,5 +1003,4 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
|
||||
// Parameters.
|
||||
#undef SIZE
|
||||
#undef STATE_T
|
||||
#undef SHIFT
|
||||
#undef LIMEX_API_ROOT
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -34,20 +34,19 @@
|
||||
* be faster and actually correct if these assumptions don't hold true.
|
||||
*/
|
||||
|
||||
#ifndef SHUFFLE_H
|
||||
#define SHUFFLE_H
|
||||
#ifndef LIMEX_SHUFFLE_H
|
||||
#define LIMEX_SHUFFLE_H
|
||||
|
||||
#include "config.h"
|
||||
#include "bitutils.h"
|
||||
#include "simd_utils.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__))
|
||||
#define HAVE_PEXT
|
||||
#endif
|
||||
|
||||
static really_inline
|
||||
u32 shuffleDynamic32(u32 x, u32 mask) {
|
||||
u32 packedExtract32(u32 x, u32 mask) {
|
||||
#if defined(HAVE_PEXT)
|
||||
// Intel BMI2 can do this operation in one instruction.
|
||||
return _pext_u32(x, mask);
|
||||
@ -67,7 +66,7 @@ u32 shuffleDynamic32(u32 x, u32 mask) {
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 shuffleDynamic64(u64a x, u64a mask) {
|
||||
u32 packedExtract64(u64a x, u64a mask) {
|
||||
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
|
||||
// Intel BMI2 can do this operation in one instruction.
|
||||
return _pext_u64(x, mask);
|
||||
@ -88,4 +87,24 @@ u32 shuffleDynamic64(u64a x, u64a mask) {
|
||||
|
||||
#undef HAVE_PEXT
|
||||
|
||||
#endif // SHUFFLE_H
|
||||
static really_inline
|
||||
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
|
||||
m128 shuffled = pshufb(s, permute);
|
||||
m128 compared = and128(shuffled, compare);
|
||||
u16 rv = ~movemask128(eq128(compared, shuffled));
|
||||
return (u32)rv;
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
static really_inline
|
||||
u32 packedExtract256(m256 s, const m256 permute, const m256 compare) {
|
||||
// vpshufb doesn't cross lanes, so this is a bit of a cheat
|
||||
m256 shuffled = vpshufb(s, permute);
|
||||
m256 compared = and256(shuffled, compare);
|
||||
u32 rv = ~movemask256(eq256(compared, shuffled));
|
||||
// stitch the lane-wise results back together
|
||||
return (u32)((rv >> 16) | (rv & 0xffffU));
|
||||
}
|
||||
#endif // AVX2
|
||||
|
||||
#endif // LIMEX_SHUFFLE_H
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -61,37 +61,6 @@
|
||||
#define INLINE_ATTR really_inline
|
||||
#include "limex_common_impl.h"
|
||||
|
||||
#define SIZE 128
|
||||
#define STATE_T m128
|
||||
#define SHIFT 1
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 128
|
||||
#define STATE_T m128
|
||||
#define SHIFT 2
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 128
|
||||
#define STATE_T m128
|
||||
#define SHIFT 3
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 128
|
||||
#define STATE_T m128
|
||||
#define SHIFT 4
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 128
|
||||
#define STATE_T m128
|
||||
#define SHIFT 5
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 128
|
||||
#define STATE_T m128
|
||||
#define SHIFT 6
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 128
|
||||
#define STATE_T m128
|
||||
#define SHIFT 7
|
||||
#define SIZE 128
|
||||
#define STATE_T m128
|
||||
#include "limex_runtime_impl.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -58,37 +58,6 @@
|
||||
#define INLINE_ATTR really_inline
|
||||
#include "limex_common_impl.h"
|
||||
|
||||
#define SIZE 256
|
||||
#define STATE_T m256
|
||||
#define SHIFT 1
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 256
|
||||
#define STATE_T m256
|
||||
#define SHIFT 2
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 256
|
||||
#define STATE_T m256
|
||||
#define SHIFT 3
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 256
|
||||
#define STATE_T m256
|
||||
#define SHIFT 4
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 256
|
||||
#define STATE_T m256
|
||||
#define SHIFT 5
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 256
|
||||
#define STATE_T m256
|
||||
#define SHIFT 6
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 256
|
||||
#define STATE_T m256
|
||||
#define SHIFT 7
|
||||
#define SIZE 256
|
||||
#define STATE_T m256
|
||||
#include "limex_runtime_impl.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -58,37 +58,6 @@
|
||||
#define INLINE_ATTR really_inline
|
||||
#include "limex_common_impl.h"
|
||||
|
||||
#define SIZE 384
|
||||
#define STATE_T m384
|
||||
#define SHIFT 1
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 384
|
||||
#define STATE_T m384
|
||||
#define SHIFT 2
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 384
|
||||
#define STATE_T m384
|
||||
#define SHIFT 3
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 384
|
||||
#define STATE_T m384
|
||||
#define SHIFT 4
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 384
|
||||
#define STATE_T m384
|
||||
#define SHIFT 5
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 384
|
||||
#define STATE_T m384
|
||||
#define SHIFT 6
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 384
|
||||
#define STATE_T m384
|
||||
#define SHIFT 7
|
||||
#define SIZE 384
|
||||
#define STATE_T m384
|
||||
#include "limex_runtime_impl.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -58,12 +58,6 @@
|
||||
#define INLINE_ATTR really_inline
|
||||
#include "limex_common_impl.h"
|
||||
|
||||
#define SIZE 512
|
||||
#define STATE_T m512
|
||||
#define SHIFT 4
|
||||
#include "limex_runtime_impl.h"
|
||||
|
||||
#define SIZE 512
|
||||
#define STATE_T m512
|
||||
#define SHIFT 5
|
||||
#define SIZE 512
|
||||
#define STATE_T m512
|
||||
#include "limex_runtime_impl.h"
|
@ -42,13 +42,13 @@
|
||||
|
||||
static really_inline
|
||||
char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
|
||||
u16 s, u64a loc, char eod, u16 * const cached_accept_state,
|
||||
u32 * const cached_accept_id) {
|
||||
u16 s, u64a loc, char eod, u16 *const cached_accept_state,
|
||||
u32 *const cached_accept_id) {
|
||||
DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n",
|
||||
(u16)(s & STATE_MASK), loc, eod);
|
||||
|
||||
if (!eod && s == *cached_accept_state) {
|
||||
if (cb(loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
|
||||
if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING; /* termination requested */
|
||||
}
|
||||
|
||||
@ -71,7 +71,7 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
|
||||
*cached_accept_id = rl->report[0];
|
||||
|
||||
DEBUG_PRINTF("reporting %u\n", rl->report[0]);
|
||||
if (cb(loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
|
||||
if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING; /* termination requested */
|
||||
}
|
||||
|
||||
@ -80,7 +80,7 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
|
||||
|
||||
for (u32 i = 0; i < count; i++) {
|
||||
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
|
||||
if (cb(loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
|
||||
if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING; /* termination requested */
|
||||
}
|
||||
}
|
||||
@ -146,7 +146,7 @@ without_accel:
|
||||
|
||||
if (single) {
|
||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
||||
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING; /* termination requested */
|
||||
}
|
||||
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
|
||||
@ -186,7 +186,7 @@ with_accel:
|
||||
|
||||
if (single) {
|
||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
||||
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING; /* termination requested */
|
||||
}
|
||||
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
|
||||
@ -328,7 +328,7 @@ without_accel:
|
||||
u64a loc = (c - 1) - buf + offAdj + 1;
|
||||
if (single) {
|
||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
||||
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
|
||||
@ -360,7 +360,7 @@ with_accel:
|
||||
u64a loc = (c - 1) - buf + offAdj + 1;
|
||||
if (single) {
|
||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
||||
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
|
||||
@ -475,7 +475,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
int rv;
|
||||
if (single) {
|
||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||
rv = cb(q_cur_offset(q), m->arb_report, context);
|
||||
rv = cb(0, q_cur_offset(q), m->arb_report, context);
|
||||
} else {
|
||||
u32 cached_accept_id = 0;
|
||||
u16 cached_accept_state = 0;
|
||||
@ -632,7 +632,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
int rv;
|
||||
if (single) {
|
||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||
rv = cb(q_cur_offset(q), m->arb_report, context);
|
||||
rv = cb(0, q_cur_offset(q), m->arb_report, context);
|
||||
} else {
|
||||
u32 cached_accept_id = 0;
|
||||
u16 cached_accept_state = 0;
|
||||
@ -836,7 +836,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
if (s >= m->accept_limit_8) {
|
||||
if (single) {
|
||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||
cb(offset, m->arb_report, ctxt);
|
||||
cb(0, offset, m->arb_report, ctxt);
|
||||
} else {
|
||||
u32 cached_accept_id = 0;
|
||||
u16 cached_accept_state = 0;
|
||||
@ -850,7 +850,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
}
|
||||
|
||||
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||
const struct mcclellan *m = getImplNfa(n);
|
||||
NfaCallback cb = q->cb;
|
||||
void *ctxt = q->context;
|
||||
u16 s = *(u16 *)q->state;
|
||||
@ -864,7 +864,7 @@ char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
if (aux->accept) {
|
||||
if (single) {
|
||||
DEBUG_PRINTF("reporting %u\n", m->arb_report);
|
||||
cb(offset, m->arb_report, ctxt);
|
||||
cb(0, offset, m->arb_report, ctxt);
|
||||
} else {
|
||||
u32 cached_accept_id = 0;
|
||||
u16 cached_accept_state = 0;
|
||||
@ -905,7 +905,7 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
|
||||
struct mq *q) {
|
||||
assert(n && q);
|
||||
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||
const struct mcclellan *m = getImplNfa(n);
|
||||
u8 s = *(u8 *)q->state;
|
||||
DEBUG_PRINTF("checking accepts for %hhu\n", s);
|
||||
if (s < m->accept_limit_8) {
|
||||
@ -915,25 +915,45 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
|
||||
return mcclellanHasAccept(m, get_aux(m, s), report);
|
||||
}
|
||||
|
||||
char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||
assert(n && q);
|
||||
|
||||
const struct mcclellan *m = getImplNfa(n);
|
||||
u8 s = *(u8 *)q->state;
|
||||
DEBUG_PRINTF("checking accepts for %hhu\n", s);
|
||||
assert(s < m->accept_limit_8 || get_aux(m, s)->accept);
|
||||
|
||||
return s >= m->accept_limit_8;
|
||||
}
|
||||
|
||||
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
|
||||
struct mq *q) {
|
||||
assert(n && q);
|
||||
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||
const struct mcclellan *m = getImplNfa(n);
|
||||
u16 s = *(u16 *)q->state;
|
||||
DEBUG_PRINTF("checking accepts for %hu\n", s);
|
||||
|
||||
return mcclellanHasAccept(m, get_aux(m, s), report);
|
||||
}
|
||||
|
||||
char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||
assert(n && q);
|
||||
|
||||
const struct mcclellan *m = getImplNfa(n);
|
||||
u16 s = *(u16 *)q->state;
|
||||
DEBUG_PRINTF("checking accepts for %hu\n", s);
|
||||
|
||||
return !!get_aux(m, s)->accept;
|
||||
}
|
||||
|
||||
char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||
u64a offset = q->offset;
|
||||
const u8 *buffer = q->buffer;
|
||||
NfaCallback cb = q->cb;
|
||||
void *context = q->context;
|
||||
assert(n->type == MCCLELLAN_NFA_8);
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||
const struct mcclellan *m = getImplNfa(n);
|
||||
const u8 *hend = q->history + q->hlength;
|
||||
|
||||
return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
|
||||
@ -947,7 +967,7 @@ char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||
NfaCallback cb = q->cb;
|
||||
void *context = q->context;
|
||||
assert(n->type == MCCLELLAN_NFA_16);
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||
const struct mcclellan *m = getImplNfa(n);
|
||||
const u8 *hend = q->history + q->hlength;
|
||||
|
||||
return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
|
||||
@ -961,7 +981,7 @@ char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||
NfaCallback cb = q->cb;
|
||||
void *context = q->context;
|
||||
assert(n->type == MCCLELLAN_NFA_8);
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||
const struct mcclellan *m = getImplNfa(n);
|
||||
const u8 *hend = q->history + q->hlength;
|
||||
|
||||
char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
|
||||
@ -980,7 +1000,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||
NfaCallback cb = q->cb;
|
||||
void *context = q->context;
|
||||
assert(n->type == MCCLELLAN_NFA_16);
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
|
||||
const struct mcclellan *m = getImplNfa(n);
|
||||
const u8 *hend = q->history + q->hlength;
|
||||
|
||||
char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
|
||||
@ -996,7 +1016,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||
|
||||
char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||
void *state, UNUSED u8 key) {
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
||||
const struct mcclellan *m = getImplNfa(nfa);
|
||||
u8 s = offset ? m->start_floating : m->start_anchored;
|
||||
if (s) {
|
||||
*(u8 *)state = s;
|
||||
@ -1007,7 +1027,7 @@ char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||
|
||||
char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||
void *state, UNUSED u8 key) {
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
||||
const struct mcclellan *m = getImplNfa(nfa);
|
||||
u16 s = offset ? m->start_floating : m->start_anchored;
|
||||
if (s) {
|
||||
unaligned_store_u16(state, s);
|
||||
@ -1019,7 +1039,7 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||
void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
|
||||
const u8 *buf, char top, size_t start_off,
|
||||
size_t len, NfaCallback cb, void *ctxt) {
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
||||
const struct mcclellan *m = getImplNfa(nfa);
|
||||
|
||||
u8 s = top ? m->start_anchored : *(u8 *)state;
|
||||
|
||||
@ -1037,7 +1057,7 @@ void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
|
||||
void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
|
||||
const u8 *buf, char top, size_t start_off,
|
||||
size_t len, NfaCallback cb, void *ctxt) {
|
||||
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
|
||||
const struct mcclellan *m = getImplNfa(nfa);
|
||||
|
||||
u16 s = top ? m->start_anchored : unaligned_load_u16(state);
|
||||
|
||||
@ -1053,17 +1073,15 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
|
||||
}
|
||||
|
||||
char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
|
||||
UNUSED const char *streamState,
|
||||
u64a offset, NfaCallback callback,
|
||||
UNUSED SomNfaCallback som_cb, void *context) {
|
||||
UNUSED const char *streamState, u64a offset,
|
||||
NfaCallback callback, void *context) {
|
||||
return mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback,
|
||||
context);
|
||||
}
|
||||
|
||||
char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
|
||||
UNUSED const char *streamState,
|
||||
u64a offset, NfaCallback callback,
|
||||
UNUSED SomNfaCallback som_cb, void *context) {
|
||||
UNUSED const char *streamState, u64a offset,
|
||||
NfaCallback callback, void *context) {
|
||||
assert(ISALIGNED_N(state, 2));
|
||||
return mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback,
|
||||
context);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -39,14 +39,14 @@ struct NFA;
|
||||
|
||||
char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
|
||||
const char *streamState, u64a offset,
|
||||
NfaCallback callback, SomNfaCallback som_cb,
|
||||
void *context);
|
||||
NfaCallback callback, void *context);
|
||||
char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
|
||||
struct mq *q);
|
||||
char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||
char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
@ -62,14 +62,14 @@ char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest,
|
||||
|
||||
char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
|
||||
const char *streamState, u64a offset,
|
||||
NfaCallback callback, SomNfaCallback som_cb,
|
||||
void *context);
|
||||
NfaCallback callback, void *context);
|
||||
char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
|
||||
struct mq *q);
|
||||
char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||
char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
|
@ -32,7 +32,6 @@
|
||||
#include "accelcompile.h"
|
||||
#include "grey.h"
|
||||
#include "mcclellan_internal.h"
|
||||
#include "mcclellancompile_accel.h"
|
||||
#include "mcclellancompile_util.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "shufticompile.h"
|
||||
@ -65,6 +64,17 @@
|
||||
using namespace std;
|
||||
using boost::adaptors::map_keys;
|
||||
|
||||
#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
|
||||
|
||||
/** Maximum tolerated number of escape character from an accel state.
|
||||
* This is larger than nfa, as we don't have a budget and the nfa cheats on stop
|
||||
* characters for sets of states */
|
||||
#define ACCEL_DFA_MAX_STOP_CHAR 160
|
||||
|
||||
/** Maximum tolerated number of escape character from a sds accel state. Larger
|
||||
* than normal states as accelerating sds is important. Matches NFA value */
|
||||
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace /* anon */ {
|
||||
@ -75,7 +85,7 @@ struct dstate_extra {
|
||||
};
|
||||
|
||||
struct dfa_info {
|
||||
dfa_build_strat &strat;
|
||||
accel_dfa_build_strat &strat;
|
||||
raw_dfa &raw;
|
||||
vector<dstate> &states;
|
||||
vector<dstate_extra> extra;
|
||||
@ -85,7 +95,7 @@ struct dfa_info {
|
||||
|
||||
u8 getAlphaShift() const;
|
||||
|
||||
explicit dfa_info(dfa_build_strat &s)
|
||||
explicit dfa_info(accel_dfa_build_strat &s)
|
||||
: strat(s),
|
||||
raw(s.get_raw()),
|
||||
states(raw.states),
|
||||
@ -128,13 +138,6 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) {
|
||||
return aux;
|
||||
}
|
||||
|
||||
static
|
||||
bool double_byte_ok(const AccelScheme &info) {
|
||||
return !info.double_byte.empty()
|
||||
&& info.double_cr.count() < info.double_byte.size()
|
||||
&& info.double_cr.count() <= 2 && !info.double_byte.empty();
|
||||
}
|
||||
|
||||
static
|
||||
void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
|
||||
assert((size_t)succ_table % 2 == 0);
|
||||
@ -190,120 +193,12 @@ u32 mcclellan_build_strat::max_allowed_offset_accel() const {
|
||||
return ACCEL_DFA_MAX_OFFSET_DEPTH;
|
||||
}
|
||||
|
||||
AccelScheme mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx)
|
||||
const {
|
||||
return find_mcclellan_escape_info(rdfa, this_idx,
|
||||
max_allowed_offset_accel());
|
||||
u32 mcclellan_build_strat::max_stop_char() const {
|
||||
return ACCEL_DFA_MAX_STOP_CHAR;
|
||||
}
|
||||
|
||||
/** builds acceleration schemes for states */
|
||||
void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
||||
const AccelScheme &info,
|
||||
void *accel_out) {
|
||||
AccelAux *accel = (AccelAux *)accel_out;
|
||||
|
||||
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
|
||||
info.double_offset);
|
||||
accel->generic.offset = verify_u8(info.offset);
|
||||
|
||||
if (double_byte_ok(info) && info.double_cr.none()
|
||||
&& info.double_byte.size() == 1) {
|
||||
accel->accel_type = ACCEL_DVERM;
|
||||
accel->dverm.c1 = info.double_byte.begin()->first;
|
||||
accel->dverm.c2 = info.double_byte.begin()->second;
|
||||
accel->dverm.offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (double_byte_ok(info) && info.double_cr.none()
|
||||
&& (info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
|
||||
bool ok = true;
|
||||
|
||||
assert(!info.double_byte.empty());
|
||||
u8 firstC = info.double_byte.begin()->first & CASE_CLEAR;
|
||||
u8 secondC = info.double_byte.begin()->second & CASE_CLEAR;
|
||||
|
||||
for (const pair<u8, u8> &p : info.double_byte) {
|
||||
if ((p.first & CASE_CLEAR) != firstC
|
||||
|| (p.second & CASE_CLEAR) != secondC) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ok) {
|
||||
accel->accel_type = ACCEL_DVERM_NOCASE;
|
||||
accel->dverm.c1 = firstC;
|
||||
accel->dverm.c2 = secondC;
|
||||
accel->dverm.offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
u8 m1;
|
||||
u8 m2;
|
||||
if (buildDvermMask(info.double_byte, &m1, &m2)) {
|
||||
accel->accel_type = ACCEL_DVERM_MASKED;
|
||||
accel->dverm.offset = verify_u8(info.double_offset);
|
||||
accel->dverm.c1 = info.double_byte.begin()->first & m1;
|
||||
accel->dverm.c2 = info.double_byte.begin()->second & m2;
|
||||
accel->dverm.m1 = m1;
|
||||
accel->dverm.m2 = m2;
|
||||
DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
|
||||
accel->dverm.c1, accel->dverm.c2);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (double_byte_ok(info)
|
||||
&& shuftiBuildDoubleMasks(info.double_cr, info.double_byte,
|
||||
&accel->dshufti.lo1, &accel->dshufti.hi1,
|
||||
&accel->dshufti.lo2, &accel->dshufti.hi2)) {
|
||||
accel->accel_type = ACCEL_DSHUFTI;
|
||||
accel->dshufti.offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (info.cr.none()) {
|
||||
accel->accel_type = ACCEL_RED_TAPE;
|
||||
DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
|
||||
" from which there is no escape\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (info.cr.count() == 1) {
|
||||
accel->accel_type = ACCEL_VERM;
|
||||
accel->verm.c = info.cr.find_first();
|
||||
DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (info.cr.count() == 2 && info.cr.isCaselessChar()) {
|
||||
accel->accel_type = ACCEL_VERM_NOCASE;
|
||||
accel->verm.c = info.cr.find_first() & CASE_CLEAR;
|
||||
DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (info.cr.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) {
|
||||
accel->accel_type = ACCEL_NONE;
|
||||
DEBUG_PRINTF("state %hu is too broad\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
accel->accel_type = ACCEL_SHUFTI;
|
||||
if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo,
|
||||
&accel->shufti.hi)) {
|
||||
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(!info.cr.none());
|
||||
accel->accel_type = ACCEL_TRUFFLE;
|
||||
truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2);
|
||||
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
|
||||
u32 mcclellan_build_strat::max_floating_stop_char() const {
|
||||
return ACCEL_DFA_MAX_FLOATING_STOP_CHAR;
|
||||
}
|
||||
|
||||
static
|
||||
@ -343,15 +238,6 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
|
||||
}
|
||||
}
|
||||
|
||||
raw_dfa::~raw_dfa() {
|
||||
}
|
||||
|
||||
raw_report_info::raw_report_info() {
|
||||
}
|
||||
|
||||
raw_report_info::~raw_report_info() {
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
struct raw_report_list {
|
||||
@ -592,7 +478,7 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
|
||||
|
||||
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
||||
map<dstate_id_t, AccelScheme> accel_escape_info
|
||||
= populateAccelerationInfo(info.raw, info.strat, cc.grey);
|
||||
= info.strat.getAccelInfo(cc.grey);
|
||||
|
||||
size_t tran_size = (1 << info.getAlphaShift())
|
||||
* sizeof(u16) * count_real_states;
|
||||
@ -811,7 +697,7 @@ aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
|
||||
|
||||
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
|
||||
map<dstate_id_t, AccelScheme> accel_escape_info
|
||||
= populateAccelerationInfo(info.raw, info.strat, cc.grey);
|
||||
= info.strat.getAccelInfo(cc.grey);
|
||||
|
||||
size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size();
|
||||
size_t aux_size = sizeof(mstate_aux) * info.size();
|
||||
@ -1053,7 +939,7 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
|
||||
return false;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat,
|
||||
aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
||||
const CompileContext &cc,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
u16 total_daddy = 0;
|
||||
@ -1123,12 +1009,9 @@ u32 mcclellanStartReachSize(const raw_dfa *raw) {
|
||||
return out.count();
|
||||
}
|
||||
|
||||
bool has_accel_dfa(const NFA *nfa) {
|
||||
bool has_accel_mcclellan(const NFA *nfa) {
|
||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||
return m->has_accel;
|
||||
}
|
||||
|
||||
dfa_build_strat::~dfa_build_strat() {
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -29,6 +29,7 @@
|
||||
#ifndef MCCLELLANCOMPILE_H
|
||||
#define MCCLELLANCOMPILE_H
|
||||
|
||||
#include "accel_dfa_build_strat.h"
|
||||
#include "rdfa.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/accel_scheme.h"
|
||||
@ -47,48 +48,20 @@ namespace ue2 {
|
||||
class ReportManager;
|
||||
struct CompileContext;
|
||||
|
||||
struct raw_report_info {
|
||||
raw_report_info();
|
||||
virtual ~raw_report_info();
|
||||
virtual u32 getReportListSize() const = 0; /* in bytes */
|
||||
virtual size_t size() const = 0; /* number of lists */
|
||||
virtual void fillReportLists(NFA *n, size_t base_offset,
|
||||
std::vector<u32> &ro /* out */) const = 0;
|
||||
};
|
||||
|
||||
class dfa_build_strat {
|
||||
public:
|
||||
explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {}
|
||||
virtual ~dfa_build_strat();
|
||||
virtual raw_dfa &get_raw() const = 0;
|
||||
virtual std::unique_ptr<raw_report_info> gatherReports(
|
||||
std::vector<u32> &reports /* out */,
|
||||
std::vector<u32> &reports_eod /* out */,
|
||||
u8 *isSingleReport /* out */,
|
||||
ReportID *arbReport /* out */) const = 0;
|
||||
virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const = 0;
|
||||
virtual size_t accelSize(void) const = 0;
|
||||
virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
|
||||
void *accel_out) = 0;
|
||||
protected:
|
||||
const ReportManager &rm;
|
||||
};
|
||||
|
||||
class mcclellan_build_strat : public dfa_build_strat {
|
||||
class mcclellan_build_strat : public accel_dfa_build_strat {
|
||||
public:
|
||||
mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in)
|
||||
: dfa_build_strat(rm_in), rdfa(rdfa_in) {}
|
||||
: accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {}
|
||||
raw_dfa &get_raw() const override { return rdfa; }
|
||||
std::unique_ptr<raw_report_info> gatherReports(
|
||||
std::vector<u32> &reports /* out */,
|
||||
std::vector<u32> &reports_eod /* out */,
|
||||
u8 *isSingleReport /* out */,
|
||||
ReportID *arbReport /* out */) const override;
|
||||
AccelScheme find_escape_strings(dstate_id_t this_idx) const override;
|
||||
size_t accelSize(void) const override;
|
||||
void buildAccel(dstate_id_t this_idx,const AccelScheme &info,
|
||||
void *accel_out) override;
|
||||
virtual u32 max_allowed_offset_accel() const;
|
||||
u32 max_allowed_offset_accel() const override;
|
||||
u32 max_stop_char() const override;
|
||||
u32 max_floating_stop_char() const override;
|
||||
|
||||
private:
|
||||
raw_dfa &rdfa;
|
||||
@ -103,7 +76,7 @@ mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
|
||||
|
||||
/* used internally by mcclellan/haig/gough compile process */
|
||||
ue2::aligned_unique_ptr<NFA>
|
||||
mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat,
|
||||
mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
||||
const CompileContext &cc,
|
||||
std::set<dstate_id_t> *accel_states = nullptr);
|
||||
|
||||
@ -114,7 +87,7 @@ u32 mcclellanStartReachSize(const raw_dfa *raw);
|
||||
|
||||
std::set<ReportID> all_reports(const raw_dfa &rdfa);
|
||||
|
||||
bool has_accel_dfa(const NFA *nfa);
|
||||
bool has_accel_mcclellan(const NFA *nfa);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@ -337,62 +337,35 @@ size_t hash_dfa(const raw_dfa &rdfa) {
|
||||
}
|
||||
|
||||
static
|
||||
bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
|
||||
u16 top_remap = raw.alpha_remap[TOP];
|
||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||
if (i != top_remap && raw.states[s].next[i] == s) {
|
||||
bool can_die_early(const raw_dfa &raw, dstate_id_t s,
|
||||
map<dstate_id_t, u32> &visited, u32 age_limit) {
|
||||
if (contains(visited, s) && visited[s] >= age_limit) {
|
||||
/* we have already visited (or are in the process of visiting) here with
|
||||
* a looser limit. */
|
||||
return false;
|
||||
}
|
||||
visited[s] = age_limit;
|
||||
|
||||
if (s == DEAD_STATE) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (age_limit == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto &next : raw.states[s].next) {
|
||||
if (can_die_early(raw, next, visited, age_limit - 1)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
|
||||
if (raw.start_floating != DEAD_STATE) {
|
||||
DEBUG_PRINTF("has floating start\n");
|
||||
return raw.start_floating;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("looking for SDS proxy\n");
|
||||
|
||||
dstate_id_t s = raw.start_anchored;
|
||||
|
||||
if (has_self_loop(s, raw)) {
|
||||
return s;
|
||||
}
|
||||
|
||||
u16 top_remap = raw.alpha_remap[TOP];
|
||||
|
||||
ue2::unordered_set<dstate_id_t> seen;
|
||||
while (true) {
|
||||
seen.insert(s);
|
||||
DEBUG_PRINTF("basis %hu\n", s);
|
||||
|
||||
/* check if we are connected to a state with a self loop */
|
||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||
dstate_id_t t = raw.states[s].next[i];
|
||||
if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
/* find a neighbour to use as a basis for looking for the sds proxy */
|
||||
dstate_id_t t = DEAD_STATE;
|
||||
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
|
||||
dstate_id_t tt = raw.states[s].next[i];
|
||||
if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
|
||||
t = tt;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (t == DEAD_STATE) {
|
||||
/* we were unable to find a state to use as a SDS proxy */
|
||||
return DEAD_STATE;
|
||||
}
|
||||
|
||||
s = t;
|
||||
}
|
||||
bool can_die_early(const raw_dfa &raw, u32 age_limit) {
|
||||
map<dstate_id_t, u32> visited;
|
||||
return can_die_early(raw, raw.start_anchored, visited, age_limit);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -55,7 +55,7 @@ size_t hash_dfa_no_reports(const raw_dfa &rdfa);
|
||||
/** \brief Compute a simple hash of this raw_dfa, including its reports. */
|
||||
size_t hash_dfa(const raw_dfa &rdfa);
|
||||
|
||||
dstate_id_t get_sds_or_proxy(const raw_dfa &raw);
|
||||
bool can_die_early(const raw_dfa &raw, u32 age_limit);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -267,7 +267,8 @@ void dumpDotPreambleDfa(FILE *f) {
|
||||
fprintf(f, "0 [style=invis];\n");
|
||||
}
|
||||
|
||||
void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) {
|
||||
void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f,
|
||||
UNUSED const string &base) {
|
||||
assert(nfa->type == MCCLELLAN_NFA_16);
|
||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||
|
||||
@ -286,7 +287,8 @@ void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) {
|
||||
fprintf(f, "}\n");
|
||||
}
|
||||
|
||||
void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f) {
|
||||
void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f,
|
||||
UNUSED const string &base) {
|
||||
assert(nfa->type == MCCLELLAN_NFA_8);
|
||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -34,6 +34,7 @@
|
||||
#include "rdfa.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
struct mcclellan;
|
||||
struct mstate_aux;
|
||||
@ -42,8 +43,10 @@ union AccelAux;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file);
|
||||
void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file);
|
||||
void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file,
|
||||
const std::string &base);
|
||||
void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file,
|
||||
const std::string &base);
|
||||
void nfaExecMcClellan8_dumpText(const struct NFA *nfa, FILE *file);
|
||||
void nfaExecMcClellan16_dumpText(const struct NFA *nfa, FILE *file);
|
||||
|
||||
|
@ -131,7 +131,8 @@ char processReports(const struct mpv *m, u8 *reporters,
|
||||
rl_count++;
|
||||
}
|
||||
|
||||
if (cb(report_offset, curr->report, ctxt) == MO_HALT_MATCHING) {
|
||||
if (cb(0, report_offset, curr->report, ctxt) ==
|
||||
MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("bailing\n");
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
@ -180,7 +181,7 @@ char processReportsForRange(const struct mpv *m, u8 *reporters,
|
||||
|
||||
for (size_t i = 2; i <= length; i++) {
|
||||
for (u32 j = 0; j < rl_count; j++) {
|
||||
if (cb(first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) {
|
||||
if (cb(0, first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("bailing\n");
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -36,7 +36,6 @@ struct NFA;
|
||||
|
||||
char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecMpv0_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||
char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
@ -47,6 +46,7 @@ char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src,
|
||||
|
||||
#define nfaExecMpv0_testEOD NFA_API_NO_IMPL
|
||||
#define nfaExecMpv0_inAccept NFA_API_NO_IMPL
|
||||
#define nfaExecMpv0_inAnyAccept NFA_API_NO_IMPL
|
||||
#define nfaExecMpv0_QR NFA_API_NO_IMPL
|
||||
#define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
|
||||
#define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL
|
||||
|
@ -48,7 +48,8 @@
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file) {
|
||||
void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file,
|
||||
UNUSED const std::string &base) {
|
||||
}
|
||||
|
||||
static really_inline
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -32,12 +32,14 @@
|
||||
#if defined(DUMP_SUPPORT)
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file);
|
||||
void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file,
|
||||
const std::string &base);
|
||||
void nfaExecMpv0_dumpText(const struct NFA *nfa, FILE *file);
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -347,9 +347,9 @@ void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) {
|
||||
}
|
||||
}
|
||||
|
||||
MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, u32 off,
|
||||
unsigned max_len) :
|
||||
cr(ref_cr), offset(off), max_len(max_len) {
|
||||
MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr,
|
||||
u32 off, unsigned max_length)
|
||||
: cr(ref_cr), offset(off), max_len(max_length) {
|
||||
int accel_num = (int) MultibyteAccelInfo::MAT_MAX;
|
||||
accels.resize(accel_num);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -31,7 +31,6 @@
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
static really_inline
|
||||
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -31,7 +31,6 @@
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
/* Normal SSSE3 shufti */
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -32,7 +32,6 @@
|
||||
#include "multitruffle.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
#include "multiaccel_common.h"
|
||||
|
||||
|
@ -120,6 +120,16 @@ char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state,
|
||||
*/
|
||||
char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
|
||||
|
||||
/**
|
||||
* Main execution function that doesn't perform the checks and optimisations of
|
||||
* nfaQueueExec() and just dispatches directly to the nfa implementations. It is
|
||||
* intended to be used by the Tamarama engine.
|
||||
*/
|
||||
char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end);
|
||||
|
||||
/** Return value indicating that the engine is dead. */
|
||||
#define MO_DEAD 0
|
||||
|
||||
/** Return value indicating that the engine is alive. */
|
||||
#define MO_ALIVE 1
|
||||
|
||||
@ -155,6 +165,13 @@ char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
|
||||
*/
|
||||
char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end);
|
||||
|
||||
/**
|
||||
* Main execution function that doesn't perform the checks and optimisations of
|
||||
* nfaQueueExecToMatch() and just dispatches directly to the nfa
|
||||
* implementations. It is intended to be used by the Tamarama engine.
|
||||
*/
|
||||
char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end);
|
||||
|
||||
/**
|
||||
* Report matches at the current queue location.
|
||||
*
|
||||
@ -175,10 +192,16 @@ char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q);
|
||||
*/
|
||||
char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
|
||||
|
||||
/**
|
||||
* Returns non-zero if the NFA is in any accept state regardless of report
|
||||
* ID.
|
||||
*/
|
||||
char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q);
|
||||
|
||||
/**
|
||||
* Process the queued commands on the given NFA up to end or the first match.
|
||||
*
|
||||
* Note: This version is meant for rose prefix NFAs:
|
||||
* Note: This version is meant for rose prefix/infix NFAs:
|
||||
* - never uses a callback
|
||||
* - loading of state at a point in history is not special cased
|
||||
*
|
||||
@ -187,9 +210,9 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
|
||||
* end with some variant of end. The location field of the events must
|
||||
* be monotonically increasing. If not all the data was processed during
|
||||
* the call, the queue is updated to reflect the remaining work.
|
||||
* @param report we are interested in, if set at the end of the scan returns
|
||||
* @ref MO_MATCHES_PENDING. If no report is desired, MO_INVALID_IDX should
|
||||
* be passed in.
|
||||
* @param report we are interested in. If the given report will be raised at
|
||||
* the end location, the function returns @ref MO_MATCHES_PENDING. If no
|
||||
* match information is desired, MO_INVALID_IDX should be passed in.
|
||||
* @return @ref MO_ALIVE if the nfa is still active with no matches pending,
|
||||
* and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not
|
||||
* alive
|
||||
@ -205,6 +228,9 @@ char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID report);
|
||||
* Runs an NFA in reverse from (buf + buflen) to buf and then from (hbuf + hlen)
|
||||
* to hbuf (main buffer and history buffer).
|
||||
*
|
||||
* Note: provides the match location as the "end" offset when the callback is
|
||||
* called.
|
||||
*
|
||||
* @param nfa engine to run
|
||||
* @param offset base offset of buf
|
||||
* @param buf main buffer
|
||||
@ -229,7 +255,6 @@ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf,
|
||||
* (including br region)
|
||||
* @param offset the offset to return (via the callback) with each match
|
||||
* @param callback the callback to call for each match raised
|
||||
* @param som_cb the callback to call for each match raised (Haig)
|
||||
* @param context context pointer passed to each callback
|
||||
*
|
||||
* @return @ref MO_HALT_MATCHING if the user instructed us to halt, otherwise
|
||||
@ -237,8 +262,7 @@ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf,
|
||||
*/
|
||||
char nfaCheckFinalState(const struct NFA *nfa, const char *state,
|
||||
const char *streamState, u64a offset,
|
||||
NfaCallback callback, SomNfaCallback som_cb,
|
||||
void *context);
|
||||
NfaCallback callback, void *context);
|
||||
|
||||
/**
|
||||
* Indicates if an engine is a zombie.
|
||||
|
@ -42,6 +42,8 @@
|
||||
#include "limex.h"
|
||||
#include "mcclellan.h"
|
||||
#include "mpv.h"
|
||||
#include "sheng.h"
|
||||
#include "tamarama.h"
|
||||
|
||||
#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \
|
||||
case dc_ltype##_NFA_##dc_subtype: \
|
||||
@ -52,41 +54,11 @@
|
||||
|
||||
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \
|
||||
switch (nfa->type) { \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_1, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_2, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_3, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_4, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_5, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_6, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_7, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_1, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_2, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_3, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_4, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_5, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_6, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_7, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_1, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_2, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_3, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_4, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_5, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_6, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_7, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_1, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_2, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_3, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_4, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_5, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_6, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_7, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_1, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_2, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_3, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_4, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_5, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_6, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_7, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \
|
||||
DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \
|
||||
DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \
|
||||
DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \
|
||||
@ -98,21 +70,22 @@
|
||||
DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \
|
||||
DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \
|
||||
DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \
|
||||
DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \
|
||||
DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \
|
||||
default: \
|
||||
assert(0); \
|
||||
}
|
||||
|
||||
char nfaCheckFinalState(const struct NFA *nfa, const char *state,
|
||||
const char *streamState, u64a offset,
|
||||
NfaCallback callback, SomNfaCallback som_cb,
|
||||
void *context) {
|
||||
NfaCallback callback, void *context) {
|
||||
assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
|
||||
|
||||
// Caller should avoid calling us if we can never produce matches.
|
||||
assert(nfaAcceptsEod(nfa));
|
||||
|
||||
DISPATCH_BY_NFA_TYPE(_testEOD(nfa, state, streamState, offset, callback,
|
||||
som_cb, context));
|
||||
context));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -135,6 +108,14 @@ char nfaQueueExec2_i(const struct NFA *nfa, struct mq *q, s64a end) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end) {
|
||||
return nfaQueueExec_i(nfa, q, end);
|
||||
}
|
||||
|
||||
char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end) {
|
||||
return nfaQueueExec2_i(nfa, q, end);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) {
|
||||
DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report));
|
||||
@ -258,7 +239,6 @@ char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end) {
|
||||
|
||||
assert(q);
|
||||
assert(end >= 0);
|
||||
assert(q->context);
|
||||
assert(q->state);
|
||||
assert(q->cur < q->end);
|
||||
assert(q->end <= MAX_MQE_LEN);
|
||||
@ -315,6 +295,11 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q) {
|
||||
DISPATCH_BY_NFA_TYPE(_inAnyAccept(nfa, q));
|
||||
return 0;
|
||||
}
|
||||
|
||||
char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) {
|
||||
DEBUG_PRINTF("nfa=%p\n", nfa);
|
||||
#ifdef DEBUG
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -91,12 +91,12 @@ struct mq {
|
||||
* history buffer; (logically) immediately before the
|
||||
* main buffer */
|
||||
size_t hlength; /**< length of the history buffer */
|
||||
struct hs_scratch *scratch; /**< global scratch space */
|
||||
char report_current; /**<
|
||||
* report_current matches at starting offset through
|
||||
* callback. If true, the queue must be located at a
|
||||
* point where MO_MATCHES_PENDING was returned */
|
||||
NfaCallback cb; /**< callback to trigger on matches */
|
||||
SomNfaCallback som_cb; /**< callback with som info; used by haig */
|
||||
void *context; /**< context to pass along with a callback */
|
||||
struct mq_item items[MAX_MQE_LEN]; /**< queue items */
|
||||
};
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -30,6 +30,7 @@
|
||||
|
||||
#include "limex_internal.h"
|
||||
#include "mcclellancompile.h"
|
||||
#include "shengcompile.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "repeat_internal.h"
|
||||
#include "ue2common.h"
|
||||
@ -78,7 +79,7 @@ struct DISPATCH_BY_NFA_TYPE_INT<sfunc, rv_t, arg_t, INVALID_NFA> {
|
||||
decltype(arg), (NFAEngineType)0>::doOp(i, arg)
|
||||
}
|
||||
|
||||
typedef bool (*has_accel_fn)(const NFA *nfa);
|
||||
typedef bool (*nfa_dispatch_fn)(const NFA *nfa);
|
||||
|
||||
template<typename T>
|
||||
static
|
||||
@ -87,8 +88,37 @@ bool has_accel_limex(const NFA *nfa) {
|
||||
return limex->accelCount;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static
|
||||
bool has_accel_generic(const NFA *) {
|
||||
bool has_repeats_limex(const NFA *nfa) {
|
||||
const T *limex = (const T *)getImplNfa(nfa);
|
||||
return limex->repeatCount;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
static
|
||||
bool has_repeats_other_than_firsts_limex(const NFA *nfa) {
|
||||
const T *limex = (const T *)getImplNfa(nfa);
|
||||
const char *ptr = (const char *)limex;
|
||||
|
||||
const u32 *repeatOffset = (const u32 *)(ptr + limex->repeatOffset);
|
||||
|
||||
for (u32 i = 0; i < limex->repeatCount; i++) {
|
||||
u32 offset = repeatOffset[i];
|
||||
const NFARepeatInfo *info = (const NFARepeatInfo *)(ptr + offset);
|
||||
const RepeatInfo *repeat =
|
||||
(const RepeatInfo *)((const char *)info + sizeof(*info));
|
||||
if (repeat->type != REPEAT_FIRST) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
bool dispatch_false(const NFA *) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -140,72 +170,53 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER};
|
||||
#define DO_IF_DUMP_SUPPORT(a)
|
||||
#endif
|
||||
|
||||
#define MAKE_LIMEX_TRAITS(mlt_size, mlt_shift) \
|
||||
template<> struct NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift> { \
|
||||
#define MAKE_LIMEX_TRAITS(mlt_size) \
|
||||
template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \
|
||||
static UNUSED const char *name; \
|
||||
static const NFACategory category = NFA_LIMEX; \
|
||||
typedef LimExNFA##mlt_size implNFA_t; \
|
||||
typedef u_##mlt_size tableRow_t; \
|
||||
static const has_accel_fn has_accel; \
|
||||
static const nfa_dispatch_fn has_accel; \
|
||||
static const nfa_dispatch_fn has_repeats; \
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts; \
|
||||
static const u32 stateAlign = \
|
||||
MAX(alignof(tableRow_t), alignof(RepeatControl)); \
|
||||
static const bool fast = mlt_size <= 64; \
|
||||
}; \
|
||||
const has_accel_fn NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift>::has_accel \
|
||||
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_accel \
|
||||
= has_accel_limex<LimExNFA##mlt_size>; \
|
||||
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_repeats \
|
||||
= has_repeats_limex<LimExNFA##mlt_size>; \
|
||||
const nfa_dispatch_fn \
|
||||
NFATraits<LIMEX_NFA_##mlt_size>::has_repeats_other_than_firsts \
|
||||
= has_repeats_other_than_firsts_limex<LimExNFA##mlt_size>; \
|
||||
DO_IF_DUMP_SUPPORT( \
|
||||
const char *NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift>::name \
|
||||
= "LimEx (0-"#mlt_shift") "#mlt_size; \
|
||||
template<> struct getDescription<LIMEX_NFA_##mlt_size##_##mlt_shift> { \
|
||||
static string call(const void *ptr) { \
|
||||
return getDescriptionLimEx<LIMEX_NFA_##mlt_size##_##mlt_shift>((const NFA *)ptr); \
|
||||
const char *NFATraits<LIMEX_NFA_##mlt_size>::name \
|
||||
= "LimEx "#mlt_size; \
|
||||
template<> struct getDescription<LIMEX_NFA_##mlt_size> { \
|
||||
static string call(const void *ptr) { \
|
||||
return getDescriptionLimEx<LIMEX_NFA_##mlt_size>((const NFA *)ptr); \
|
||||
} \
|
||||
};)
|
||||
|
||||
MAKE_LIMEX_TRAITS(32, 1)
|
||||
MAKE_LIMEX_TRAITS(32, 2)
|
||||
MAKE_LIMEX_TRAITS(32, 3)
|
||||
MAKE_LIMEX_TRAITS(32, 4)
|
||||
MAKE_LIMEX_TRAITS(32, 5)
|
||||
MAKE_LIMEX_TRAITS(32, 6)
|
||||
MAKE_LIMEX_TRAITS(32, 7)
|
||||
MAKE_LIMEX_TRAITS(128, 1)
|
||||
MAKE_LIMEX_TRAITS(128, 2)
|
||||
MAKE_LIMEX_TRAITS(128, 3)
|
||||
MAKE_LIMEX_TRAITS(128, 4)
|
||||
MAKE_LIMEX_TRAITS(128, 5)
|
||||
MAKE_LIMEX_TRAITS(128, 6)
|
||||
MAKE_LIMEX_TRAITS(128, 7)
|
||||
MAKE_LIMEX_TRAITS(256, 1)
|
||||
MAKE_LIMEX_TRAITS(256, 2)
|
||||
MAKE_LIMEX_TRAITS(256, 3)
|
||||
MAKE_LIMEX_TRAITS(256, 4)
|
||||
MAKE_LIMEX_TRAITS(256, 5)
|
||||
MAKE_LIMEX_TRAITS(256, 6)
|
||||
MAKE_LIMEX_TRAITS(256, 7)
|
||||
MAKE_LIMEX_TRAITS(384, 1)
|
||||
MAKE_LIMEX_TRAITS(384, 2)
|
||||
MAKE_LIMEX_TRAITS(384, 3)
|
||||
MAKE_LIMEX_TRAITS(384, 4)
|
||||
MAKE_LIMEX_TRAITS(384, 5)
|
||||
MAKE_LIMEX_TRAITS(384, 6)
|
||||
MAKE_LIMEX_TRAITS(384, 7)
|
||||
MAKE_LIMEX_TRAITS(512, 1)
|
||||
MAKE_LIMEX_TRAITS(512, 2)
|
||||
MAKE_LIMEX_TRAITS(512, 3)
|
||||
MAKE_LIMEX_TRAITS(512, 4)
|
||||
MAKE_LIMEX_TRAITS(512, 5)
|
||||
MAKE_LIMEX_TRAITS(512, 6)
|
||||
MAKE_LIMEX_TRAITS(512, 7)
|
||||
MAKE_LIMEX_TRAITS(32)
|
||||
MAKE_LIMEX_TRAITS(128)
|
||||
MAKE_LIMEX_TRAITS(256)
|
||||
MAKE_LIMEX_TRAITS(384)
|
||||
MAKE_LIMEX_TRAITS(512)
|
||||
|
||||
template<> struct NFATraits<MCCLELLAN_NFA_8> {
|
||||
UNUSED static const char *name;
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 1;
|
||||
static const bool fast = true;
|
||||
static const has_accel_fn has_accel;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const has_accel_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_dfa;
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_mcclellan;
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<MCCLELLAN_NFA_8>::name = "McClellan 8";
|
||||
#endif
|
||||
@ -215,9 +226,13 @@ template<> struct NFATraits<MCCLELLAN_NFA_16> {
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 2;
|
||||
static const bool fast = true;
|
||||
static const has_accel_fn has_accel;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const has_accel_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_dfa;
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_mcclellan;
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<MCCLELLAN_NFA_16>::name = "McClellan 16";
|
||||
#endif
|
||||
@ -227,9 +242,13 @@ template<> struct NFATraits<GOUGH_NFA_8> {
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 8;
|
||||
static const bool fast = true;
|
||||
static const has_accel_fn has_accel;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const has_accel_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_dfa;
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_mcclellan;
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<GOUGH_NFA_8>::name = "Goughfish 8";
|
||||
#endif
|
||||
@ -239,9 +258,13 @@ template<> struct NFATraits<GOUGH_NFA_16> {
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 8;
|
||||
static const bool fast = true;
|
||||
static const has_accel_fn has_accel;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const has_accel_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_dfa;
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_mcclellan;
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<GOUGH_NFA_16>::name = "Goughfish 16";
|
||||
#endif
|
||||
@ -251,9 +274,13 @@ template<> struct NFATraits<MPV_NFA_0> {
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 8;
|
||||
static const bool fast = true;
|
||||
static const has_accel_fn has_accel;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const has_accel_fn NFATraits<MPV_NFA_0>::has_accel = has_accel_generic;
|
||||
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_accel = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<MPV_NFA_0>::name = "Mega-Puff-Vac";
|
||||
#endif
|
||||
@ -263,9 +290,13 @@ template<> struct NFATraits<CASTLE_NFA_0> {
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 8;
|
||||
static const bool fast = true;
|
||||
static const has_accel_fn has_accel;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const has_accel_fn NFATraits<CASTLE_NFA_0>::has_accel = has_accel_generic;
|
||||
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_accel = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<CASTLE_NFA_0>::name = "Castle";
|
||||
#endif
|
||||
@ -275,9 +306,13 @@ template<> struct NFATraits<LBR_NFA_Dot> {
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 8;
|
||||
static const bool fast = true;
|
||||
static const has_accel_fn has_accel;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const has_accel_fn NFATraits<LBR_NFA_Dot>::has_accel = has_accel_generic;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_accel = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<LBR_NFA_Dot>::name = "Lim Bounded Repeat (D)";
|
||||
#endif
|
||||
@ -287,9 +322,13 @@ template<> struct NFATraits<LBR_NFA_Verm> {
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 8;
|
||||
static const bool fast = true;
|
||||
static const has_accel_fn has_accel;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const has_accel_fn NFATraits<LBR_NFA_Verm>::has_accel = has_accel_generic;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_accel = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<LBR_NFA_Verm>::name = "Lim Bounded Repeat (V)";
|
||||
#endif
|
||||
@ -299,9 +338,13 @@ template<> struct NFATraits<LBR_NFA_NVerm> {
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 8;
|
||||
static const bool fast = true;
|
||||
static const has_accel_fn has_accel;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const has_accel_fn NFATraits<LBR_NFA_NVerm>::has_accel = has_accel_generic;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_accel = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<LBR_NFA_NVerm>::name = "Lim Bounded Repeat (NV)";
|
||||
#endif
|
||||
@ -311,9 +354,13 @@ template<> struct NFATraits<LBR_NFA_Shuf> {
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 8;
|
||||
static const bool fast = true;
|
||||
static const has_accel_fn has_accel;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const has_accel_fn NFATraits<LBR_NFA_Shuf>::has_accel = has_accel_generic;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_accel = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<LBR_NFA_Shuf>::name = "Lim Bounded Repeat (S)";
|
||||
#endif
|
||||
@ -323,13 +370,49 @@ template<> struct NFATraits<LBR_NFA_Truf> {
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 8;
|
||||
static const bool fast = true;
|
||||
static const has_accel_fn has_accel;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const has_accel_fn NFATraits<LBR_NFA_Truf>::has_accel = has_accel_generic;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_accel = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<LBR_NFA_Truf>::name = "Lim Bounded Repeat (M)";
|
||||
#endif
|
||||
|
||||
template<> struct NFATraits<SHENG_NFA_0> {
|
||||
UNUSED static const char *name;
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 1;
|
||||
static const bool fast = true;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_accel = has_accel_sheng;
|
||||
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<SHENG_NFA_0>::name = "Sheng";
|
||||
#endif
|
||||
|
||||
template<> struct NFATraits<TAMARAMA_NFA_0> {
|
||||
UNUSED static const char *name;
|
||||
static const NFACategory category = NFA_OTHER;
|
||||
static const u32 stateAlign = 32;
|
||||
static const bool fast = true;
|
||||
static const nfa_dispatch_fn has_accel;
|
||||
static const nfa_dispatch_fn has_repeats;
|
||||
static const nfa_dispatch_fn has_repeats_other_than_firsts;
|
||||
};
|
||||
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_accel = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_repeats = dispatch_false;
|
||||
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
|
||||
#if defined(DUMP_SUPPORT)
|
||||
const char *NFATraits<TAMARAMA_NFA_0>::name = "Tamarama";
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
||||
#if defined(DUMP_SUPPORT)
|
||||
@ -380,42 +463,39 @@ struct is_limex {
|
||||
};
|
||||
}
|
||||
|
||||
namespace {
|
||||
template<NFAEngineType t>
|
||||
struct has_repeats_other_than_firsts_dispatch {
|
||||
static nfa_dispatch_fn call(const void *) {
|
||||
return NFATraits<t>::has_repeats_other_than_firsts;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
bool has_bounded_repeats_other_than_firsts(const NFA &nfa) {
|
||||
if (!DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa)) {
|
||||
return false;
|
||||
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type,
|
||||
has_repeats_other_than_firsts_dispatch,
|
||||
&nfa)(&nfa);
|
||||
}
|
||||
|
||||
namespace {
|
||||
template<NFAEngineType t>
|
||||
struct has_repeats_dispatch {
|
||||
static nfa_dispatch_fn call(const void *) {
|
||||
return NFATraits<t>::has_repeats;
|
||||
}
|
||||
|
||||
const LimExNFABase *limex = (const LimExNFABase *)getImplNfa(&nfa);
|
||||
const char *ptr = (const char *)limex;
|
||||
|
||||
const u32 *repeatOffset = (const u32 *)(ptr + limex->repeatOffset);
|
||||
|
||||
for (u32 i = 0; i < limex->repeatCount; i++) {
|
||||
u32 offset = repeatOffset[i];
|
||||
const NFARepeatInfo *info = (const NFARepeatInfo *)(ptr + offset);
|
||||
const RepeatInfo *repeat =
|
||||
(const RepeatInfo *)((const char *)info + sizeof(*info));
|
||||
if (repeat->type != REPEAT_FIRST) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
}
|
||||
|
||||
bool has_bounded_repeats(const NFA &nfa) {
|
||||
if (!DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const LimExNFABase *limex = (const LimExNFABase *)getImplNfa(&nfa);
|
||||
return limex->repeatCount;
|
||||
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_repeats_dispatch,
|
||||
&nfa)(&nfa);
|
||||
}
|
||||
|
||||
namespace {
|
||||
template<NFAEngineType t>
|
||||
struct has_accel_dispatch {
|
||||
static has_accel_fn call(const void *) {
|
||||
static nfa_dispatch_fn call(const void *) {
|
||||
return NFATraits<t>::has_accel;
|
||||
}
|
||||
};
|
||||
@ -423,8 +503,7 @@ struct has_accel_dispatch {
|
||||
|
||||
bool has_accel(const NFA &nfa) {
|
||||
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_accel_dispatch,
|
||||
&nfa)
|
||||
(&nfa);
|
||||
&nfa)(&nfa);
|
||||
}
|
||||
|
||||
bool requires_decompress_key(const NFA &nfa) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -36,6 +36,7 @@
|
||||
#if defined(DUMP_SUPPORT)
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
struct NFA;
|
||||
|
||||
@ -45,7 +46,7 @@ namespace ue2 {
|
||||
* \brief Dump (in Graphviz 'dot' format) a representation of the NFA into the
|
||||
* file pointed to by dotFile.
|
||||
*/
|
||||
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile);
|
||||
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile, const std::string &base);
|
||||
|
||||
/** \brief Dump a textual representation of the NFA. */
|
||||
void nfaDumpText(const struct NFA *fact, FILE *textFile);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -40,6 +40,8 @@
|
||||
#include "limex.h"
|
||||
#include "mcclellandump.h"
|
||||
#include "mpv_dump.h"
|
||||
#include "shengdump.h"
|
||||
#include "tamarama_dump.h"
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error "no dump support"
|
||||
@ -57,41 +59,11 @@ namespace ue2 {
|
||||
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \
|
||||
DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \
|
||||
switch (nfa->type) { \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_1, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_2, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_3, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_4, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_5, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_6, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32_7, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_1, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_2, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_3, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_4, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_5, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_6, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128_7, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_1, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_2, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_3, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_4, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_5, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_6, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256_7, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_1, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_2, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_3, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_4, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_5, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_6, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384_7, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_1, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_2, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_3, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_4, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_5, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_6, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512_7, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \
|
||||
DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \
|
||||
DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \
|
||||
DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \
|
||||
DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \
|
||||
@ -103,12 +75,15 @@ namespace ue2 {
|
||||
DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \
|
||||
DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \
|
||||
DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \
|
||||
DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \
|
||||
DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \
|
||||
default: \
|
||||
assert(0); \
|
||||
}
|
||||
|
||||
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile) {
|
||||
DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile));
|
||||
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile,
|
||||
const std::string &base) {
|
||||
DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile, base));
|
||||
}
|
||||
|
||||
void nfaDumpText(const struct NFA *nfa, FILE *txtFile) {
|
||||
|
@ -51,41 +51,11 @@ extern "C"
|
||||
// Common data structures for NFAs
|
||||
|
||||
enum NFAEngineType {
|
||||
LIMEX_NFA_32_1,
|
||||
LIMEX_NFA_32_2,
|
||||
LIMEX_NFA_32_3,
|
||||
LIMEX_NFA_32_4,
|
||||
LIMEX_NFA_32_5,
|
||||
LIMEX_NFA_32_6,
|
||||
LIMEX_NFA_32_7,
|
||||
LIMEX_NFA_128_1,
|
||||
LIMEX_NFA_128_2,
|
||||
LIMEX_NFA_128_3,
|
||||
LIMEX_NFA_128_4,
|
||||
LIMEX_NFA_128_5,
|
||||
LIMEX_NFA_128_6,
|
||||
LIMEX_NFA_128_7,
|
||||
LIMEX_NFA_256_1,
|
||||
LIMEX_NFA_256_2,
|
||||
LIMEX_NFA_256_3,
|
||||
LIMEX_NFA_256_4,
|
||||
LIMEX_NFA_256_5,
|
||||
LIMEX_NFA_256_6,
|
||||
LIMEX_NFA_256_7,
|
||||
LIMEX_NFA_384_1,
|
||||
LIMEX_NFA_384_2,
|
||||
LIMEX_NFA_384_3,
|
||||
LIMEX_NFA_384_4,
|
||||
LIMEX_NFA_384_5,
|
||||
LIMEX_NFA_384_6,
|
||||
LIMEX_NFA_384_7,
|
||||
LIMEX_NFA_512_1,
|
||||
LIMEX_NFA_512_2,
|
||||
LIMEX_NFA_512_3,
|
||||
LIMEX_NFA_512_4,
|
||||
LIMEX_NFA_512_5,
|
||||
LIMEX_NFA_512_6,
|
||||
LIMEX_NFA_512_7,
|
||||
LIMEX_NFA_32,
|
||||
LIMEX_NFA_128,
|
||||
LIMEX_NFA_256,
|
||||
LIMEX_NFA_384,
|
||||
LIMEX_NFA_512,
|
||||
MCCLELLAN_NFA_8, /**< magic pseudo nfa */
|
||||
MCCLELLAN_NFA_16, /**< magic pseudo nfa */
|
||||
GOUGH_NFA_8, /**< magic pseudo nfa */
|
||||
@ -97,6 +67,8 @@ enum NFAEngineType {
|
||||
LBR_NFA_Shuf, /**< magic pseudo nfa */
|
||||
LBR_NFA_Truf, /**< magic pseudo nfa */
|
||||
CASTLE_NFA_0, /**< magic pseudo nfa */
|
||||
SHENG_NFA_0, /**< magic pseudo nfa */
|
||||
TAMARAMA_NFA_0, /**< magic nfa container */
|
||||
/** \brief bogus NFA - not used */
|
||||
INVALID_NFA
|
||||
};
|
||||
@ -175,50 +147,27 @@ static really_inline int isGoughType(u8 t) {
|
||||
return t == GOUGH_NFA_8 || t == GOUGH_NFA_16;
|
||||
}
|
||||
|
||||
/** \brief True if the given type (from NFA::type) is a McClellan or Gough DFA.
|
||||
* */
|
||||
/** \brief True if the given type (from NFA::type) is a Sheng DFA. */
|
||||
static really_inline int isShengType(u8 t) {
|
||||
return t == SHENG_NFA_0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief True if the given type (from NFA::type) is a McClellan, Gough or
|
||||
* Sheng DFA.
|
||||
*/
|
||||
static really_inline int isDfaType(u8 t) {
|
||||
return isMcClellanType(t) || isGoughType(t);
|
||||
return isMcClellanType(t) || isGoughType(t) || isShengType(t);
|
||||
}
|
||||
|
||||
/** \brief True if the given type (from NFA::type) is an NFA. */
|
||||
static really_inline int isNfaType(u8 t) {
|
||||
switch (t) {
|
||||
case LIMEX_NFA_32_1:
|
||||
case LIMEX_NFA_32_2:
|
||||
case LIMEX_NFA_32_3:
|
||||
case LIMEX_NFA_32_4:
|
||||
case LIMEX_NFA_32_5:
|
||||
case LIMEX_NFA_32_6:
|
||||
case LIMEX_NFA_32_7:
|
||||
case LIMEX_NFA_128_1:
|
||||
case LIMEX_NFA_128_2:
|
||||
case LIMEX_NFA_128_3:
|
||||
case LIMEX_NFA_128_4:
|
||||
case LIMEX_NFA_128_5:
|
||||
case LIMEX_NFA_128_6:
|
||||
case LIMEX_NFA_128_7:
|
||||
case LIMEX_NFA_256_1:
|
||||
case LIMEX_NFA_256_2:
|
||||
case LIMEX_NFA_256_3:
|
||||
case LIMEX_NFA_256_4:
|
||||
case LIMEX_NFA_256_5:
|
||||
case LIMEX_NFA_256_6:
|
||||
case LIMEX_NFA_256_7:
|
||||
case LIMEX_NFA_384_1:
|
||||
case LIMEX_NFA_384_2:
|
||||
case LIMEX_NFA_384_3:
|
||||
case LIMEX_NFA_384_4:
|
||||
case LIMEX_NFA_384_5:
|
||||
case LIMEX_NFA_384_6:
|
||||
case LIMEX_NFA_384_7:
|
||||
case LIMEX_NFA_512_1:
|
||||
case LIMEX_NFA_512_2:
|
||||
case LIMEX_NFA_512_3:
|
||||
case LIMEX_NFA_512_4:
|
||||
case LIMEX_NFA_512_5:
|
||||
case LIMEX_NFA_512_6:
|
||||
case LIMEX_NFA_512_7:
|
||||
case LIMEX_NFA_32:
|
||||
case LIMEX_NFA_128:
|
||||
case LIMEX_NFA_256:
|
||||
case LIMEX_NFA_384:
|
||||
case LIMEX_NFA_512:
|
||||
return 1;
|
||||
default:
|
||||
break;
|
||||
@ -233,6 +182,12 @@ int isLbrType(u8 t) {
|
||||
t == LBR_NFA_Shuf || t == LBR_NFA_Truf;
|
||||
}
|
||||
|
||||
/** \brief True if the given type (from NFA::type) is a container engine. */
|
||||
static really_inline
|
||||
int isContainerType(u8 t) {
|
||||
return t == TAMARAMA_NFA_0;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int isMultiTopType(u8 t) {
|
||||
return !isDfaType(t) && !isLbrType(t);
|
||||
|
@ -37,6 +37,8 @@
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Specify the use-case for an nfa engine. */
|
||||
@ -47,6 +49,7 @@ enum nfa_kind {
|
||||
NFA_OUTFIX, //!< "outfix" nfa not triggered by external events
|
||||
NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports
|
||||
NFA_REV_PREFIX, //! reverse running prefixes (for som)
|
||||
NFA_EAGER_PREFIX, //!< rose prefix that is also run up to matches
|
||||
};
|
||||
|
||||
/** \brief True if this kind of engine is triggered by a top event. */
|
||||
@ -63,8 +66,10 @@ bool is_triggered(enum nfa_kind k) {
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief True if this kind of engine generates callback events when it
|
||||
* enters accept states.
|
||||
* \brief True if this kind of engine generates actively checks for accept
|
||||
* states either to halt matching or to raise a callback. Only these engines
|
||||
* generated with this property should call nfaQueueExec() or
|
||||
* nfaQueueExecToMatch().
|
||||
*/
|
||||
inline
|
||||
bool generates_callbacks(enum nfa_kind k) {
|
||||
@ -73,6 +78,24 @@ bool generates_callbacks(enum nfa_kind k) {
|
||||
case NFA_OUTFIX:
|
||||
case NFA_OUTFIX_RAW:
|
||||
case NFA_REV_PREFIX:
|
||||
case NFA_EAGER_PREFIX:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief True if this kind of engine has its state inspected to see if it is in
|
||||
* an accept state. Engines generated with this property will commonly call
|
||||
* nfaQueueExecRose(), nfaInAcceptState(), and nfaInAnyAcceptState().
|
||||
*/
|
||||
inline
|
||||
bool inspects_states_for_accepts(enum nfa_kind k) {
|
||||
switch (k) {
|
||||
case NFA_PREFIX:
|
||||
case NFA_INFIX:
|
||||
case NFA_EAGER_PREFIX:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@ -94,6 +117,32 @@ bool has_managed_reports(enum nfa_kind k) {
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(DEBUG) || defined(DUMP_SUPPORT)
|
||||
|
||||
inline
|
||||
std::string to_string(nfa_kind k) {
|
||||
switch (k) {
|
||||
case NFA_PREFIX:
|
||||
return "PREFIX";
|
||||
case NFA_INFIX:
|
||||
return "INFIX";
|
||||
case NFA_SUFFIX:
|
||||
return "SUFFIX";
|
||||
case NFA_OUTFIX:
|
||||
return "OUTFIX";
|
||||
case NFA_REV_PREFIX:
|
||||
return "REV_PREFIX";
|
||||
case NFA_OUTFIX_RAW:
|
||||
return "OUTFIX_RAW";
|
||||
case NFA_EAGER_PREFIX:
|
||||
return "EAGER_PREFIX";
|
||||
}
|
||||
assert(0);
|
||||
return "?";
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
|
676
src/nfa/sheng.c
Normal file
676
src/nfa/sheng.c
Normal file
@ -0,0 +1,676 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "sheng.h"
|
||||
|
||||
#include "accel.h"
|
||||
#include "sheng_internal.h"
|
||||
#include "nfa_api.h"
|
||||
#include "nfa_api_queue.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/join.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
enum MatchMode {
|
||||
CALLBACK_OUTPUT,
|
||||
STOP_AT_MATCH,
|
||||
NO_MATCHES
|
||||
};
|
||||
|
||||
static really_inline
|
||||
const struct sheng *get_sheng(const struct NFA *n) {
|
||||
return (const struct sheng *)getImplNfa(n);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct sstate_aux *get_aux(const struct sheng *sh, u8 id) {
|
||||
u32 offset = sh->aux_offset - sizeof(struct NFA) +
|
||||
(id & SHENG_STATE_MASK) * sizeof(struct sstate_aux);
|
||||
DEBUG_PRINTF("Getting aux for state %u at offset %llu\n",
|
||||
id & SHENG_STATE_MASK, (u64a)offset + sizeof(struct NFA));
|
||||
return (const struct sstate_aux *)((const char *) sh + offset);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const union AccelAux *get_accel(const struct sheng *sh, u8 id) {
|
||||
const struct sstate_aux *saux = get_aux(sh, id);
|
||||
DEBUG_PRINTF("Getting accel aux at offset %u\n", saux->accel);
|
||||
const union AccelAux *aux = (const union AccelAux *)
|
||||
((const char *)sh + saux->accel - sizeof(struct NFA));
|
||||
return aux;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct report_list *get_rl(const struct sheng *sh,
|
||||
const struct sstate_aux *aux) {
|
||||
DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept);
|
||||
return (const struct report_list *)
|
||||
((const char *)sh + aux->accept - sizeof(struct NFA));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct report_list *get_eod_rl(const struct sheng *sh,
|
||||
const struct sstate_aux *aux) {
|
||||
DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept);
|
||||
return (const struct report_list *)
|
||||
((const char *)sh + aux->accept_eod - sizeof(struct NFA));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char shengHasAccept(const struct sheng *sh, const struct sstate_aux *aux,
|
||||
ReportID report) {
|
||||
assert(sh && aux);
|
||||
|
||||
const struct report_list *rl = get_rl(sh, aux);
|
||||
assert(ISALIGNED_N(rl, 4));
|
||||
|
||||
DEBUG_PRINTF("report list has %u entries\n", rl->count);
|
||||
|
||||
for (u32 i = 0; i < rl->count; i++) {
|
||||
if (rl->report[i] == report) {
|
||||
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char fireSingleReport(NfaCallback cb, void *ctxt, ReportID r, u64a loc) {
|
||||
DEBUG_PRINTF("reporting %u\n", r);
|
||||
if (cb(0, loc, r, ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING; /* termination requested */
|
||||
}
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt,
|
||||
const u8 state, u64a loc, u8 *const cached_accept_state,
|
||||
ReportID *const cached_accept_id, char eod) {
|
||||
DEBUG_PRINTF("reporting matches @ %llu\n", loc);
|
||||
|
||||
if (!eod && state == *cached_accept_state) {
|
||||
DEBUG_PRINTF("reporting %u\n", *cached_accept_id);
|
||||
if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING; /* termination requested */
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
const struct sstate_aux *aux = get_aux(sh, state);
|
||||
const struct report_list *rl = eod ? get_eod_rl(sh, aux) : get_rl(sh, aux);
|
||||
assert(ISALIGNED(rl));
|
||||
|
||||
DEBUG_PRINTF("report list has %u entries\n", rl->count);
|
||||
u32 count = rl->count;
|
||||
|
||||
if (!eod && count == 1) {
|
||||
*cached_accept_state = state;
|
||||
*cached_accept_id = rl->report[0];
|
||||
|
||||
DEBUG_PRINTF("reporting %u\n", rl->report[0]);
|
||||
if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING; /* termination requested */
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < count; i++) {
|
||||
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
|
||||
if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING; /* termination requested */
|
||||
}
|
||||
}
|
||||
return MO_CONTINUE_MATCHING; /* continue execution */
|
||||
}
|
||||
|
||||
/* include Sheng function definitions */
|
||||
#include "sheng_defs.h"
|
||||
|
||||
static really_inline
|
||||
char runShengCb(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset,
|
||||
u8 *const cached_accept_state, ReportID *const cached_accept_id,
|
||||
const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die,
|
||||
u8 has_accel, u8 single, const u8 **scanned, u8 *state) {
|
||||
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n",
|
||||
(u64a)(end - start), offset);
|
||||
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
|
||||
(s64a)(end - cur_buf));
|
||||
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
|
||||
!!has_accel, !!single);
|
||||
int rv;
|
||||
/* scan and report all matches */
|
||||
if (can_die) {
|
||||
if (has_accel) {
|
||||
rv = sheng4_coda(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
} else {
|
||||
rv = sheng4_cod(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_DEAD;
|
||||
}
|
||||
rv = sheng_cod(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, *scanned, end,
|
||||
scanned);
|
||||
} else {
|
||||
if (has_accel) {
|
||||
rv = sheng4_coa(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
} else {
|
||||
rv = sheng4_co(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_DEAD;
|
||||
}
|
||||
rv = sheng_co(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, *scanned, end,
|
||||
scanned);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_DEAD;
|
||||
}
|
||||
return MO_ALIVE;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void runShengNm(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset,
|
||||
u8 *const cached_accept_state, ReportID *const cached_accept_id,
|
||||
const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die,
|
||||
u8 has_accel, u8 single, const u8 **scanned, u8 *state) {
|
||||
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n",
|
||||
(u64a)(end - start), offset);
|
||||
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
|
||||
(s64a)(end - cur_buf));
|
||||
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
|
||||
!!has_accel, !!single);
|
||||
/* just scan the buffer */
|
||||
if (can_die) {
|
||||
if (has_accel) {
|
||||
sheng4_nmda(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start, end,
|
||||
scanned);
|
||||
} else {
|
||||
sheng4_nmd(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start, end,
|
||||
scanned);
|
||||
}
|
||||
sheng_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
|
||||
single, offset, cur_buf, *scanned, end, scanned);
|
||||
} else {
|
||||
sheng4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
|
||||
single, offset, cur_buf, start, end, scanned);
|
||||
sheng_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
|
||||
single, offset, cur_buf, *scanned, end, scanned);
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char runShengSam(const struct sheng *sh, NfaCallback cb, void *ctxt,
|
||||
u64a offset, u8 *const cached_accept_state,
|
||||
ReportID *const cached_accept_id, const u8 *cur_buf,
|
||||
const u8 *start, const u8 *end, u8 can_die, u8 has_accel,
|
||||
u8 single, const u8 **scanned, u8 *state) {
|
||||
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n",
|
||||
(u64a)(end - start), offset);
|
||||
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
|
||||
(s64a)(end - cur_buf));
|
||||
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
|
||||
!!has_accel, !!single);
|
||||
int rv;
|
||||
/* scan until first match */
|
||||
if (can_die) {
|
||||
if (has_accel) {
|
||||
rv = sheng4_samda(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
} else {
|
||||
rv = sheng4_samd(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_DEAD;
|
||||
}
|
||||
/* if we stopped before we expected, we found a match */
|
||||
if (rv == MO_MATCHES_PENDING) {
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
|
||||
rv = sheng_samd(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, *scanned,
|
||||
end, scanned);
|
||||
} else {
|
||||
if (has_accel) {
|
||||
rv = sheng4_sama(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
} else {
|
||||
rv = sheng4_sam(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, start,
|
||||
end, scanned);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_DEAD;
|
||||
}
|
||||
/* if we stopped before we expected, we found a match */
|
||||
if (rv == MO_MATCHES_PENDING) {
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
|
||||
rv = sheng_sam(state, cb, ctxt, sh, cached_accept_state,
|
||||
cached_accept_id, single, offset, cur_buf, *scanned, end,
|
||||
scanned);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_DEAD;
|
||||
}
|
||||
/* if we stopped before we expected, we found a match */
|
||||
if (rv == MO_MATCHES_PENDING) {
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
return MO_ALIVE;
|
||||
}
|
||||
|
||||
static never_inline
|
||||
char runSheng(const struct sheng *sh, struct mq *q, s64a b_end,
|
||||
enum MatchMode mode) {
|
||||
u8 state = *(u8 *)q->state;
|
||||
u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
|
||||
u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
|
||||
u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
|
||||
|
||||
u8 cached_accept_state = 0;
|
||||
ReportID cached_accept_id = 0;
|
||||
|
||||
DEBUG_PRINTF("starting Sheng execution in state %u\n",
|
||||
state & SHENG_STATE_MASK);
|
||||
|
||||
if (q->report_current) {
|
||||
DEBUG_PRINTF("reporting current pending matches\n");
|
||||
assert(sh);
|
||||
|
||||
q->report_current = 0;
|
||||
|
||||
int rv;
|
||||
if (single) {
|
||||
rv = fireSingleReport(q->cb, q->context, sh->report,
|
||||
q_cur_offset(q));
|
||||
} else {
|
||||
rv = fireReports(sh, q->cb, q->context, state, q_cur_offset(q),
|
||||
&cached_accept_state, &cached_accept_id, 0);
|
||||
}
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
|
||||
return MO_DEAD;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("proceeding with matching\n");
|
||||
}
|
||||
|
||||
assert(q_cur_type(q) == MQE_START);
|
||||
s64a start = q_cur_loc(q);
|
||||
|
||||
DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start,
|
||||
mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" :
|
||||
mode == NO_MATCHES ? "NO MATCHES" :
|
||||
mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???");
|
||||
|
||||
DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
|
||||
q_cur_type(q) == MQE_START ? "START" :
|
||||
q_cur_type(q) == MQE_TOP ? "TOP" :
|
||||
q_cur_type(q) == MQE_END ? "END" : "???");
|
||||
|
||||
const u8* cur_buf;
|
||||
if (start < 0) {
|
||||
DEBUG_PRINTF("negative location, scanning history\n");
|
||||
DEBUG_PRINTF("min location: %zd\n", -q->hlength);
|
||||
cur_buf = q->history + q->hlength;
|
||||
} else {
|
||||
DEBUG_PRINTF("positive location, scanning buffer\n");
|
||||
DEBUG_PRINTF("max location: %lli\n", b_end);
|
||||
cur_buf = q->buffer;
|
||||
}
|
||||
|
||||
/* if we our queue event is past our end */
|
||||
if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
|
||||
DEBUG_PRINTF("current location past buffer end\n");
|
||||
DEBUG_PRINTF("setting q location to %llu\n", b_end);
|
||||
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
|
||||
q->items[q->cur].location = b_end;
|
||||
return MO_ALIVE;
|
||||
}
|
||||
|
||||
q->cur++;
|
||||
|
||||
s64a cur_start = start;
|
||||
|
||||
while (1) {
|
||||
DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
|
||||
q_cur_type(q) == MQE_START ? "START" :
|
||||
q_cur_type(q) == MQE_TOP ? "TOP" :
|
||||
q_cur_type(q) == MQE_END ? "END" : "???");
|
||||
s64a end = q_cur_loc(q);
|
||||
if (mode != NO_MATCHES) {
|
||||
end = MIN(end, b_end);
|
||||
}
|
||||
assert(end <= (s64a) q->length);
|
||||
s64a cur_end = end;
|
||||
|
||||
/* we may cross the border between history and current buffer */
|
||||
if (cur_start < 0) {
|
||||
cur_end = MIN(0, cur_end);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("start: %lli end: %lli\n", start, end);
|
||||
|
||||
/* don't scan zero length buffer */
|
||||
if (cur_start != cur_end) {
|
||||
const u8 * scanned = cur_buf;
|
||||
char rv;
|
||||
|
||||
/* if we're in nomatch mode or if we're scanning history buffer */
|
||||
if (mode == NO_MATCHES ||
|
||||
(cur_start < 0 && mode == CALLBACK_OUTPUT)) {
|
||||
runShengNm(sh, q->cb, q->context, q->offset,
|
||||
&cached_accept_state, &cached_accept_id, cur_buf,
|
||||
cur_buf + cur_start, cur_buf + cur_end, can_die,
|
||||
has_accel, single, &scanned, &state);
|
||||
} else if (mode == CALLBACK_OUTPUT) {
|
||||
rv = runShengCb(sh, q->cb, q->context, q->offset,
|
||||
&cached_accept_state, &cached_accept_id,
|
||||
cur_buf, cur_buf + cur_start, cur_buf + cur_end,
|
||||
can_die, has_accel, single, &scanned, &state);
|
||||
if (rv == MO_DEAD) {
|
||||
DEBUG_PRINTF("exiting in state %u\n",
|
||||
state & SHENG_STATE_MASK);
|
||||
return MO_DEAD;
|
||||
}
|
||||
} else if (mode == STOP_AT_MATCH) {
|
||||
rv = runShengSam(sh, q->cb, q->context, q->offset,
|
||||
&cached_accept_state, &cached_accept_id,
|
||||
cur_buf, cur_buf + cur_start,
|
||||
cur_buf + cur_end, can_die, has_accel, single,
|
||||
&scanned, &state);
|
||||
if (rv == MO_DEAD) {
|
||||
DEBUG_PRINTF("exiting in state %u\n",
|
||||
state & SHENG_STATE_MASK);
|
||||
return rv;
|
||||
} else if (rv == MO_MATCHES_PENDING) {
|
||||
assert(q->cur);
|
||||
DEBUG_PRINTF("found a match, setting q location to %zd\n",
|
||||
scanned - cur_buf + 1);
|
||||
q->cur--;
|
||||
q->items[q->cur].type = MQE_START;
|
||||
q->items[q->cur].location =
|
||||
scanned - cur_buf + 1; /* due to exiting early */
|
||||
*(u8 *)q->state = state;
|
||||
DEBUG_PRINTF("exiting in state %u\n",
|
||||
state & SHENG_STATE_MASK);
|
||||
return rv;
|
||||
}
|
||||
} else {
|
||||
assert(!"invalid scanning mode!");
|
||||
}
|
||||
assert(scanned == cur_buf + cur_end);
|
||||
|
||||
cur_start = cur_end;
|
||||
}
|
||||
|
||||
/* if we our queue event is past our end */
|
||||
if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
|
||||
DEBUG_PRINTF("current location past buffer end\n");
|
||||
DEBUG_PRINTF("setting q location to %llu\n", b_end);
|
||||
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
|
||||
q->cur--;
|
||||
q->items[q->cur].type = MQE_START;
|
||||
q->items[q->cur].location = b_end;
|
||||
*(u8 *)q->state = state;
|
||||
return MO_ALIVE;
|
||||
}
|
||||
|
||||
/* crossing over into actual buffer */
|
||||
if (cur_start == 0) {
|
||||
DEBUG_PRINTF("positive location, scanning buffer\n");
|
||||
DEBUG_PRINTF("max offset: %lli\n", b_end);
|
||||
cur_buf = q->buffer;
|
||||
}
|
||||
|
||||
/* continue scanning the same buffer */
|
||||
if (end != cur_end) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (q_cur_type(q)) {
|
||||
case MQE_END:
|
||||
*(u8 *)q->state = state;
|
||||
q->cur++;
|
||||
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
|
||||
if (can_die) {
|
||||
return (state & SHENG_STATE_DEAD) ? MO_DEAD : MO_ALIVE;
|
||||
}
|
||||
return MO_ALIVE;
|
||||
case MQE_TOP:
|
||||
if (q->offset + cur_start == 0) {
|
||||
DEBUG_PRINTF("Anchored start, going to state %u\n",
|
||||
sh->anchored);
|
||||
state = sh->anchored;
|
||||
} else {
|
||||
u8 new_state = get_aux(sh, state)->top;
|
||||
DEBUG_PRINTF("Top event %u->%u\n", state & SHENG_STATE_MASK,
|
||||
new_state & SHENG_STATE_MASK);
|
||||
state = new_state;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(!"invalid queue event");
|
||||
break;
|
||||
}
|
||||
q->cur++;
|
||||
}
|
||||
}
|
||||
|
||||
char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
size_t length, NfaCallback cb, void *context) {
|
||||
DEBUG_PRINTF("smallwrite Sheng\n");
|
||||
assert(n->type == SHENG_NFA_0);
|
||||
const struct sheng *sh = getImplNfa(n);
|
||||
u8 state = sh->anchored;
|
||||
u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
|
||||
u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
|
||||
u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
|
||||
u8 cached_accept_state = 0;
|
||||
ReportID cached_accept_id = 0;
|
||||
|
||||
/* scan and report all matches */
|
||||
int rv;
|
||||
s64a end = length;
|
||||
const u8 *scanned;
|
||||
|
||||
rv = runShengCb(sh, cb, context, offset, &cached_accept_state,
|
||||
&cached_accept_id, buffer, buffer, buffer + end, can_die,
|
||||
has_accel, single, &scanned, &state);
|
||||
if (rv == MO_DEAD) {
|
||||
DEBUG_PRINTF("exiting in state %u\n",
|
||||
state & SHENG_STATE_MASK);
|
||||
return MO_DEAD;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%u\n", state & SHENG_STATE_MASK);
|
||||
|
||||
const struct sstate_aux *aux = get_aux(sh, state);
|
||||
|
||||
if (aux->accept_eod) {
|
||||
DEBUG_PRINTF("Reporting EOD matches\n");
|
||||
fireReports(sh, cb, context, state, end + offset, &cached_accept_state,
|
||||
&cached_accept_id, 1);
|
||||
}
|
||||
|
||||
return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end) {
|
||||
const struct sheng *sh = get_sheng(n);
|
||||
char rv = runSheng(sh, q, end, CALLBACK_OUTPUT);
|
||||
return rv;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end) {
|
||||
const struct sheng *sh = get_sheng(n);
|
||||
char rv = runSheng(sh, q, end, STOP_AT_MATCH);
|
||||
return rv;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report) {
|
||||
assert(q_cur_type(q) == MQE_START);
|
||||
|
||||
const struct sheng *sh = get_sheng(n);
|
||||
char rv = runSheng(sh, q, 0 /* end */, NO_MATCHES);
|
||||
|
||||
if (rv && nfaExecSheng0_inAccept(n, report, q)) {
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report,
|
||||
struct mq *q) {
|
||||
assert(n && q);
|
||||
|
||||
const struct sheng *sh = get_sheng(n);
|
||||
u8 s = *(const u8 *)q->state;
|
||||
DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
|
||||
|
||||
const struct sstate_aux *aux = get_aux(sh, s);
|
||||
|
||||
if (!aux->accept) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return shengHasAccept(sh, aux, report);
|
||||
}
|
||||
|
||||
char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q) {
|
||||
assert(n && q);
|
||||
|
||||
const struct sheng *sh = get_sheng(n);
|
||||
u8 s = *(const u8 *)q->state;
|
||||
DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
|
||||
|
||||
const struct sstate_aux *aux = get_aux(sh, s);
|
||||
return !!aux->accept;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state,
|
||||
UNUSED const char *streamState, u64a offset,
|
||||
NfaCallback cb, void *ctxt) {
|
||||
assert(nfa);
|
||||
|
||||
const struct sheng *sh = get_sheng(nfa);
|
||||
u8 s = *(const u8 *)state;
|
||||
DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
|
||||
|
||||
const struct sstate_aux *aux = get_aux(sh, s);
|
||||
|
||||
if (!aux->accept_eod) {
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
return fireReports(sh, cb, ctxt, s, offset, NULL, NULL, 1);
|
||||
}
|
||||
|
||||
char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q) {
|
||||
const struct sheng *sh = (const struct sheng *)getImplNfa(n);
|
||||
NfaCallback cb = q->cb;
|
||||
void *ctxt = q->context;
|
||||
u8 s = *(u8 *)q->state;
|
||||
const struct sstate_aux *aux = get_aux(sh, s);
|
||||
u64a offset = q_cur_offset(q);
|
||||
u8 cached_state_id = 0;
|
||||
ReportID cached_report_id = 0;
|
||||
assert(q_cur_type(q) == MQE_START);
|
||||
|
||||
if (aux->accept) {
|
||||
if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
|
||||
fireSingleReport(cb, ctxt, sh->report, offset);
|
||||
} else {
|
||||
fireReports(sh, cb, ctxt, s, offset, &cached_state_id,
|
||||
&cached_report_id, 1);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||
void *state, UNUSED u8 key) {
|
||||
const struct sheng *sh = get_sheng(nfa);
|
||||
u8 *s = (u8 *)state;
|
||||
*s = offset ? sh->floating: sh->anchored;
|
||||
return !(*s & SHENG_STATE_DEAD);
|
||||
}
|
||||
|
||||
char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q) {
|
||||
assert(nfa->scratchStateSize == 1);
|
||||
|
||||
/* starting in floating state */
|
||||
const struct sheng *sh = get_sheng(nfa);
|
||||
*(u8 *)q->state = sh->floating;
|
||||
DEBUG_PRINTF("starting in floating state\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_queueCompressState(UNUSED const struct NFA *nfa,
|
||||
const struct mq *q, UNUSED s64a loc) {
|
||||
void *dest = q->streamState;
|
||||
const void *src = q->state;
|
||||
assert(nfa->scratchStateSize == 1);
|
||||
assert(nfa->streamStateSize == 1);
|
||||
*(u8 *)dest = *(const u8 *)src;
|
||||
return 0;
|
||||
}
|
||||
|
||||
char nfaExecSheng0_expandState(UNUSED const struct NFA *nfa, void *dest,
|
||||
const void *src, UNUSED u64a offset,
|
||||
UNUSED u8 key) {
|
||||
assert(nfa->scratchStateSize == 1);
|
||||
assert(nfa->streamStateSize == 1);
|
||||
*(u8 *)dest = *(const u8 *)src;
|
||||
return 0;
|
||||
}
|
61
src/nfa/sheng.h
Normal file
61
src/nfa/sheng.h
Normal file
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SHENG_H_
|
||||
#define SHENG_H_
|
||||
|
||||
#include "callback.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
struct mq;
|
||||
struct NFA;
|
||||
|
||||
#define nfaExecSheng0_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecSheng0_zombie_status NFA_API_ZOMBIE_NO_IMPL
|
||||
|
||||
char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||
char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q);
|
||||
char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q);
|
||||
char nfaExecSheng0_queueCompressState(const struct NFA *nfa, const struct mq *q,
|
||||
s64a loc);
|
||||
char nfaExecSheng0_expandState(const struct NFA *nfa, void *dest,
|
||||
const void *src, u64a offset, u8 key);
|
||||
char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset,
|
||||
void *state, u8 key);
|
||||
char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state,
|
||||
const char *streamState, u64a offset,
|
||||
NfaCallback callback, void *context);
|
||||
char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
|
||||
char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer,
|
||||
size_t length, NfaCallback cb, void *context);
|
||||
|
||||
#endif /* SHENG_H_ */
|
353
src/nfa/sheng_defs.h
Normal file
353
src/nfa/sheng_defs.h
Normal file
@ -0,0 +1,353 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SHENG_DEFS_H
|
||||
#define SHENG_DEFS_H
|
||||
|
||||
/*
|
||||
* Utility functions used by various versions of Sheng engine
|
||||
*/
|
||||
static really_inline
|
||||
u8 isDeadState(const u8 a) {
|
||||
return a & SHENG_STATE_DEAD;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u8 isAcceptState(const u8 a) {
|
||||
return a & SHENG_STATE_ACCEPT;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u8 isAccelState(const u8 a) {
|
||||
return a & SHENG_STATE_ACCEL;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
|
||||
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
|
||||
}
|
||||
|
||||
/* these functions should be optimized out, used by NO_MATCHES mode */
|
||||
static really_inline
|
||||
u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c,
|
||||
UNUSED const u8 d) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u8 dummyFunc(UNUSED const u8 a) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sheng function definitions for single byte loops
|
||||
*/
|
||||
/* callback output, can die */
|
||||
#define SHENG_IMPL sheng_cod
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* callback output, can't die */
|
||||
#define SHENG_IMPL sheng_co
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* stop at match, can die */
|
||||
#define SHENG_IMPL sheng_samd
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 1
|
||||
#include "sheng_impl.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* stop at match, can't die */
|
||||
#define SHENG_IMPL sheng_sam
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 1
|
||||
#include "sheng_impl.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* no match, can die */
|
||||
#define SHENG_IMPL sheng_nmd
|
||||
#define DEAD_FUNC isDeadState
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* no match, can't die */
|
||||
#define SHENG_IMPL sheng_nm
|
||||
#define DEAD_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef DEAD_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/*
|
||||
* Sheng function definitions for 4-byte loops
|
||||
*/
|
||||
/* callback output, can die, accelerated */
|
||||
#define SHENG_IMPL sheng4_coda
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC isDeadState
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* callback output, can die, not accelerated */
|
||||
#define SHENG_IMPL sheng4_cod
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC isDeadState
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* callback output, can't die, accelerated */
|
||||
#define SHENG_IMPL sheng4_coa
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* callback output, can't die, not accelerated */
|
||||
#define SHENG_IMPL sheng4_co
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* stop at match, can die, accelerated */
|
||||
#define SHENG_IMPL sheng4_samda
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC isDeadState
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 1
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* stop at match, can die, not accelerated */
|
||||
#define SHENG_IMPL sheng4_samd
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC isDeadState
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 1
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* stop at match, can't die, accelerated */
|
||||
#define SHENG_IMPL sheng4_sama
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC isAccelState
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 1
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* stop at match, can't die, not accelerated */
|
||||
#define SHENG_IMPL sheng4_sam
|
||||
#define INTERESTING_FUNC hasInterestingStates
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC isAcceptState
|
||||
#define STOP_AT_MATCH 1
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* no-match have interesting func as dummy, and die/accel checks are outer */
|
||||
|
||||
/* no match, can die, accelerated */
|
||||
#define SHENG_IMPL sheng4_nmda
|
||||
#define INTERESTING_FUNC dummyFunc4
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC isDeadState
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC isAccelState
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* no match, can die, not accelerated */
|
||||
#define SHENG_IMPL sheng4_nmd
|
||||
#define INTERESTING_FUNC dummyFunc4
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC isDeadState
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
/* there is no performance benefit in accelerating a no-match case that can't
|
||||
* die */
|
||||
|
||||
/* no match, can't die */
|
||||
#define SHENG_IMPL sheng4_nm
|
||||
#define INTERESTING_FUNC dummyFunc4
|
||||
#define INNER_DEAD_FUNC dummyFunc
|
||||
#define OUTER_DEAD_FUNC dummyFunc
|
||||
#define INNER_ACCEL_FUNC dummyFunc
|
||||
#define OUTER_ACCEL_FUNC dummyFunc
|
||||
#define ACCEPT_FUNC dummyFunc
|
||||
#define STOP_AT_MATCH 0
|
||||
#include "sheng_impl4.h"
|
||||
#undef SHENG_IMPL
|
||||
#undef INTERESTING_FUNC
|
||||
#undef INNER_DEAD_FUNC
|
||||
#undef OUTER_DEAD_FUNC
|
||||
#undef INNER_ACCEL_FUNC
|
||||
#undef OUTER_ACCEL_FUNC
|
||||
#undef ACCEPT_FUNC
|
||||
#undef STOP_AT_MATCH
|
||||
|
||||
#endif // SHENG_DEFS_H
|
97
src/nfa/sheng_impl.h
Normal file
97
src/nfa/sheng_impl.h
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* In order to use this macro, the following things need to be defined:
|
||||
*
|
||||
* - SHENG_IMPL (name of the Sheng implementation function)
|
||||
* - DEAD_FUNC (name of the function checking for dead states)
|
||||
* - ACCEPT_FUNC (name of the function checking for accept state)
|
||||
* - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match)
|
||||
*/
|
||||
|
||||
/* byte-by-byte version. we don't do byte-by-byte death checking as it's
|
||||
* pretty pointless to do it over a buffer that's at most 3 bytes long */
|
||||
static really_inline
|
||||
char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||
u8 *const cached_accept_state, ReportID *const cached_accept_id,
|
||||
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
|
||||
const u8 *end, const u8 **scan_end) {
|
||||
DEBUG_PRINTF("Starting DFA execution in state %u\n",
|
||||
*state & SHENG_STATE_MASK);
|
||||
const u8 *cur_buf = start;
|
||||
if (DEAD_FUNC(*state)) {
|
||||
DEBUG_PRINTF("Dead on arrival\n");
|
||||
*scan_end = end;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
|
||||
|
||||
m128 cur_state = set16x8(*state);
|
||||
const m128 *masks = s->shuffle_masks;
|
||||
|
||||
while (likely(cur_buf != end)) {
|
||||
const u8 c = *cur_buf;
|
||||
const m128 shuffle_mask = masks[c];
|
||||
cur_state = pshufb(shuffle_mask, cur_state);
|
||||
const u8 tmp = movd(cur_state);
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
|
||||
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", tmp, (tmp & 0xF0) >> 4,
|
||||
tmp & 0xF);
|
||||
|
||||
if (unlikely(ACCEPT_FUNC(tmp))) {
|
||||
DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG_STATE_MASK);
|
||||
u64a match_offset = base_offset + (cur_buf - buf) + 1;
|
||||
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||
if (STOP_AT_MATCH) {
|
||||
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||
(u64a)(cur_buf - start));
|
||||
*state = tmp;
|
||||
*scan_end = cur_buf;
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
if (single) {
|
||||
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||
MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
} else {
|
||||
if (fireReports(s, cb, ctxt, tmp, match_offset,
|
||||
cached_accept_state, cached_accept_id,
|
||||
0) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
cur_buf++;
|
||||
}
|
||||
*state = movd(cur_state);
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
284
src/nfa/sheng_impl4.h
Normal file
284
src/nfa/sheng_impl4.h
Normal file
@ -0,0 +1,284 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* In order to use this macro, the following things need to be defined:
|
||||
*
|
||||
* - SHENG_IMPL (name of the Sheng implementation function)
|
||||
* - INTERESTING_FUNC (name of the function checking for accept, accel or dead
|
||||
* states)
|
||||
* - INNER_DEAD_FUNC (name of the inner function checking for dead states)
|
||||
* - OUTER_DEAD_FUNC (name of the outer function checking for dead states)
|
||||
* - INNER_ACCEL_FUNC (name of the inner function checking for accel states)
|
||||
* - OUTER_ACCEL_FUNC (name of the outer function checking for accel states)
|
||||
* - ACCEPT_FUNC (name of the function checking for accept state)
|
||||
* - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match)
|
||||
*/
|
||||
|
||||
/* unrolled 4-byte-at-a-time version.
|
||||
*
|
||||
* we put innerDeadFunc inside interestingFunc() block so that we don't pay for
|
||||
* dead states checking. however, if interestingFunc is dummy, innerDeadFunc
|
||||
* gets lost with it, so we need an additional check outside the
|
||||
* interestingFunc() branch - it's normally dummy so we don't pay for it, but
|
||||
* when interestingFunc is dummy, outerDeadFunc should be set if we want to
|
||||
* check for dead states.
|
||||
*
|
||||
* also, deadFunc only checks the last known state, but since we can't ever get
|
||||
* out of the dead state and we don't really care where we died, it's not a
|
||||
* problem.
|
||||
*/
|
||||
static really_inline
|
||||
char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
|
||||
u8 *const cached_accept_state, ReportID *const cached_accept_id,
|
||||
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
|
||||
const u8 *end, const u8 **scan_end) {
|
||||
DEBUG_PRINTF("Starting DFAx4 execution in state %u\n",
|
||||
*state & SHENG_STATE_MASK);
|
||||
const u8 *cur_buf = start;
|
||||
const u8 *min_accel_dist = start;
|
||||
base_offset++;
|
||||
DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start));
|
||||
|
||||
if (INNER_ACCEL_FUNC(*state) || OUTER_ACCEL_FUNC(*state)) {
|
||||
DEBUG_PRINTF("Accel state reached @ 0\n");
|
||||
const union AccelAux *aaux = get_accel(s, *state & SHENG_STATE_MASK);
|
||||
const u8 *new_offset = run_accel(aaux, cur_buf, end);
|
||||
if (new_offset < cur_buf + BAD_ACCEL_DIST) {
|
||||
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
|
||||
} else {
|
||||
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
|
||||
}
|
||||
DEBUG_PRINTF("Next accel chance: %llu\n",
|
||||
(u64a)(min_accel_dist - start));
|
||||
DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf);
|
||||
cur_buf = new_offset;
|
||||
DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start));
|
||||
}
|
||||
if (INNER_DEAD_FUNC(*state) || OUTER_DEAD_FUNC(*state)) {
|
||||
DEBUG_PRINTF("Dead on arrival\n");
|
||||
*scan_end = end;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
m128 cur_state = set16x8(*state);
|
||||
const m128 *masks = s->shuffle_masks;
|
||||
|
||||
while (likely(end - cur_buf >= 4)) {
|
||||
const u8 *b1 = cur_buf;
|
||||
const u8 *b2 = cur_buf + 1;
|
||||
const u8 *b3 = cur_buf + 2;
|
||||
const u8 *b4 = cur_buf + 3;
|
||||
const u8 c1 = *b1;
|
||||
const u8 c2 = *b2;
|
||||
const u8 c3 = *b3;
|
||||
const u8 c4 = *b4;
|
||||
|
||||
const m128 shuffle_mask1 = masks[c1];
|
||||
cur_state = pshufb(shuffle_mask1, cur_state);
|
||||
const u8 a1 = movd(cur_state);
|
||||
|
||||
const m128 shuffle_mask2 = masks[c2];
|
||||
cur_state = pshufb(shuffle_mask2, cur_state);
|
||||
const u8 a2 = movd(cur_state);
|
||||
|
||||
const m128 shuffle_mask3 = masks[c3];
|
||||
cur_state = pshufb(shuffle_mask3, cur_state);
|
||||
const u8 a3 = movd(cur_state);
|
||||
|
||||
const m128 shuffle_mask4 = masks[c4];
|
||||
cur_state = pshufb(shuffle_mask4, cur_state);
|
||||
const u8 a4 = movd(cur_state);
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
|
||||
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a1, (a1 & 0xF0) >> 4, a1 & 0xF);
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?');
|
||||
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a2, (a2 & 0xF0) >> 4, a2 & 0xF);
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?');
|
||||
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a3, (a3 & 0xF0) >> 4, a3 & 0xF);
|
||||
|
||||
DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?');
|
||||
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a4, (a4 & 0xF0) >> 4, a4 & 0xF);
|
||||
|
||||
if (unlikely(INTERESTING_FUNC(a1, a2, a3, a4))) {
|
||||
if (ACCEPT_FUNC(a1)) {
|
||||
u64a match_offset = base_offset + b1 - buf;
|
||||
DEBUG_PRINTF("Accept state %u reached\n",
|
||||
a1 & SHENG_STATE_MASK);
|
||||
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||
if (STOP_AT_MATCH) {
|
||||
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||
(s64a)(b1 - start));
|
||||
*scan_end = b1;
|
||||
*state = a1;
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
if (single) {
|
||||
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||
MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
} else {
|
||||
if (fireReports(s, cb, ctxt, a1, match_offset,
|
||||
cached_accept_state, cached_accept_id,
|
||||
0) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ACCEPT_FUNC(a2)) {
|
||||
u64a match_offset = base_offset + b2 - buf;
|
||||
DEBUG_PRINTF("Accept state %u reached\n",
|
||||
a2 & SHENG_STATE_MASK);
|
||||
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||
if (STOP_AT_MATCH) {
|
||||
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||
(s64a)(b2 - start));
|
||||
*scan_end = b2;
|
||||
*state = a2;
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
if (single) {
|
||||
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||
MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
} else {
|
||||
if (fireReports(s, cb, ctxt, a2, match_offset,
|
||||
cached_accept_state, cached_accept_id,
|
||||
0) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ACCEPT_FUNC(a3)) {
|
||||
u64a match_offset = base_offset + b3 - buf;
|
||||
DEBUG_PRINTF("Accept state %u reached\n",
|
||||
a3 & SHENG_STATE_MASK);
|
||||
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||
if (STOP_AT_MATCH) {
|
||||
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||
(s64a)(b3 - start));
|
||||
*scan_end = b3;
|
||||
*state = a3;
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
if (single) {
|
||||
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||
MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
} else {
|
||||
if (fireReports(s, cb, ctxt, a3, match_offset,
|
||||
cached_accept_state, cached_accept_id,
|
||||
0) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ACCEPT_FUNC(a4)) {
|
||||
u64a match_offset = base_offset + b4 - buf;
|
||||
DEBUG_PRINTF("Accept state %u reached\n",
|
||||
a4 & SHENG_STATE_MASK);
|
||||
DEBUG_PRINTF("Match @ %llu\n", match_offset);
|
||||
if (STOP_AT_MATCH) {
|
||||
DEBUG_PRINTF("Stopping at match @ %lli\n",
|
||||
(s64a)(b4 - start));
|
||||
*scan_end = b4;
|
||||
*state = a4;
|
||||
return MO_MATCHES_PENDING;
|
||||
}
|
||||
if (single) {
|
||||
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
|
||||
MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
} else {
|
||||
if (fireReports(s, cb, ctxt, a4, match_offset,
|
||||
cached_accept_state, cached_accept_id,
|
||||
0) == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (INNER_DEAD_FUNC(a4)) {
|
||||
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf));
|
||||
*scan_end = end;
|
||||
*state = a4;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC(a4)) {
|
||||
DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
|
||||
const union AccelAux *aaux =
|
||||
get_accel(s, a4 & SHENG_STATE_MASK);
|
||||
const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
|
||||
if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
|
||||
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
|
||||
} else {
|
||||
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
|
||||
}
|
||||
DEBUG_PRINTF("Next accel chance: %llu\n",
|
||||
(u64a)(min_accel_dist - start));
|
||||
DEBUG_PRINTF("Accel scanned %llu bytes\n",
|
||||
(u64a)(new_offset - cur_buf - 4));
|
||||
cur_buf = new_offset;
|
||||
DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (OUTER_DEAD_FUNC(a4)) {
|
||||
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf));
|
||||
*scan_end = end;
|
||||
*state = a4;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
};
|
||||
if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC(a4)) {
|
||||
DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
|
||||
const union AccelAux *aaux = get_accel(s, a4 & SHENG_STATE_MASK);
|
||||
const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
|
||||
if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
|
||||
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
|
||||
} else {
|
||||
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
|
||||
}
|
||||
DEBUG_PRINTF("Next accel chance: %llu\n",
|
||||
(u64a)(min_accel_dist - start));
|
||||
DEBUG_PRINTF("Accel scanned %llu bytes\n",
|
||||
(u64a)(new_offset - cur_buf - 4));
|
||||
cur_buf = new_offset;
|
||||
DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
|
||||
continue;
|
||||
};
|
||||
cur_buf += 4;
|
||||
}
|
||||
*state = movd(cur_state);
|
||||
*scan_end = cur_buf;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -26,44 +26,45 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief LimEx NFA: 512-bit SIMD runtime implementations.
|
||||
*/
|
||||
#ifndef SHENG_INTERNAL_H_
|
||||
#define SHENG_INTERNAL_H_
|
||||
|
||||
//#define DEBUG_INPUT
|
||||
//#define DEBUG_EXCEPTIONS
|
||||
|
||||
#include "limex.h"
|
||||
|
||||
#include "accel.h"
|
||||
#include "limex_internal.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
// Common code
|
||||
#include "limex_runtime.h"
|
||||
#define SHENG_STATE_ACCEPT 0x10
|
||||
#define SHENG_STATE_DEAD 0x20
|
||||
#define SHENG_STATE_ACCEL 0x40
|
||||
#define SHENG_STATE_MASK 0xF
|
||||
#define SHENG_STATE_FLAG_MASK 0x70
|
||||
|
||||
#define SIZE 512
|
||||
#define STATE_T m512
|
||||
#include "limex_exceptional.h"
|
||||
#define SHENG_FLAG_SINGLE_REPORT 0x1
|
||||
#define SHENG_FLAG_CAN_DIE 0x2
|
||||
#define SHENG_FLAG_HAS_ACCEL 0x4
|
||||
|
||||
#define SIZE 512
|
||||
#define STATE_T m512
|
||||
#include "limex_state_impl.h"
|
||||
struct report_list {
|
||||
u32 count;
|
||||
ReportID report[];
|
||||
};
|
||||
|
||||
#define SIZE 512
|
||||
#define STATE_T m512
|
||||
#define INLINE_ATTR really_inline
|
||||
#include "limex_common_impl.h"
|
||||
struct sstate_aux {
|
||||
u32 accept;
|
||||
u32 accept_eod;
|
||||
u32 accel;
|
||||
u32 top;
|
||||
};
|
||||
|
||||
#define SIZE 512
|
||||
#define STATE_T m512
|
||||
#define SHIFT 6
|
||||
#include "limex_runtime_impl.h"
|
||||
struct sheng {
|
||||
m128 shuffle_masks[256];
|
||||
u32 length;
|
||||
u32 aux_offset;
|
||||
u32 report_offset;
|
||||
u32 accel_offset;
|
||||
u8 n_states;
|
||||
u8 anchored;
|
||||
u8 floating;
|
||||
u8 flags;
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
#define SIZE 512
|
||||
#define STATE_T m512
|
||||
#define SHIFT 7
|
||||
#include "limex_runtime_impl.h"
|
||||
#endif /* SHENG_INTERNAL_H_ */
|
541
src/nfa/shengcompile.cpp
Normal file
541
src/nfa/shengcompile.cpp
Normal file
@ -0,0 +1,541 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "shengcompile.h"
|
||||
|
||||
#include "accel.h"
|
||||
#include "accelcompile.h"
|
||||
#include "shufticompile.h"
|
||||
#include "trufflecompile.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/container.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/unaligned.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "sheng_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/verify_types.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::adaptors::map_keys;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
|
||||
|
||||
/** Maximum tolerated number of escape character from an accel state.
|
||||
* This is larger than nfa, as we don't have a budget and the nfa cheats on stop
|
||||
* characters for sets of states */
|
||||
#define ACCEL_DFA_MAX_STOP_CHAR 160
|
||||
|
||||
/** Maximum tolerated number of escape character from a sds accel state. Larger
|
||||
* than normal states as accelerating sds is important. Matches NFA value */
|
||||
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
|
||||
|
||||
struct dfa_info {
|
||||
accel_dfa_build_strat &strat;
|
||||
raw_dfa &raw;
|
||||
vector<dstate> &states;
|
||||
dstate &floating;
|
||||
dstate &anchored;
|
||||
bool can_die;
|
||||
|
||||
explicit dfa_info(accel_dfa_build_strat &s)
|
||||
: strat(s), raw(strat.get_raw()), states(raw.states),
|
||||
floating(states[raw.start_floating]),
|
||||
anchored(states[raw.start_anchored]), can_die(dfaCanDie(raw)) {}
|
||||
|
||||
// returns adjusted size
|
||||
size_t size() const {
|
||||
return can_die ? states.size() : states.size() - 1;
|
||||
}
|
||||
// expects adjusted index
|
||||
dstate &operator[](dstate_id_t idx) {
|
||||
return states[raw_id(idx)];
|
||||
}
|
||||
dstate &top(dstate_id_t idx) {
|
||||
if (isDead(idx)) {
|
||||
return floating;
|
||||
}
|
||||
return next(idx, TOP);
|
||||
}
|
||||
dstate &next(dstate_id_t idx, u16 chr) {
|
||||
auto &src = (*this)[idx];
|
||||
auto next_id = src.next[raw.alpha_remap[chr]];
|
||||
return states[next_id];
|
||||
}
|
||||
// get original idx from adjusted idx
|
||||
dstate_id_t raw_id(dstate_id_t idx) {
|
||||
assert(idx < size());
|
||||
// if DFA can't die, shift all indices left by 1
|
||||
return can_die ? idx : idx + 1;
|
||||
}
|
||||
bool isDead(dstate &state) {
|
||||
return raw_id(state.impl_id) == DEAD_STATE;
|
||||
}
|
||||
bool isDead(dstate_id_t idx) {
|
||||
return raw_id(idx) == DEAD_STATE;
|
||||
}
|
||||
|
||||
private:
|
||||
static bool dfaCanDie(raw_dfa &rdfa) {
|
||||
for (unsigned chr = 0; chr < 256; chr++) {
|
||||
for (dstate_id_t state = 0; state < rdfa.states.size(); state++) {
|
||||
auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]];
|
||||
if (succ == DEAD_STATE) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
struct raw_report_list {
|
||||
flat_set<ReportID> reports;
|
||||
|
||||
raw_report_list(const flat_set<ReportID> &reports_in,
|
||||
const ReportManager &rm, bool do_remap) {
|
||||
if (do_remap) {
|
||||
for (auto &id : reports_in) {
|
||||
reports.insert(rm.getProgramOffset(id));
|
||||
}
|
||||
} else {
|
||||
reports = reports_in;
|
||||
}
|
||||
}
|
||||
|
||||
bool operator<(const raw_report_list &b) const {
|
||||
return reports < b.reports;
|
||||
}
|
||||
};
|
||||
|
||||
struct raw_report_info_impl : public raw_report_info {
|
||||
vector<raw_report_list> rl;
|
||||
u32 getReportListSize() const override;
|
||||
size_t size() const override;
|
||||
void fillReportLists(NFA *n, size_t base_offset,
|
||||
std::vector<u32> &ro /* out */) const override;
|
||||
};
|
||||
}
|
||||
|
||||
u32 raw_report_info_impl::getReportListSize() const {
|
||||
u32 rv = 0;
|
||||
|
||||
for (const auto &reps : rl) {
|
||||
rv += sizeof(report_list);
|
||||
rv += sizeof(ReportID) * reps.reports.size();
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
size_t raw_report_info_impl::size() const {
|
||||
return rl.size();
|
||||
}
|
||||
|
||||
void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
|
||||
vector<u32> &ro) const {
|
||||
for (const auto &reps : rl) {
|
||||
ro.push_back(base_offset);
|
||||
|
||||
report_list *p = (report_list *)((char *)n + base_offset);
|
||||
|
||||
u32 i = 0;
|
||||
for (const ReportID report : reps.reports) {
|
||||
p->report[i++] = report;
|
||||
}
|
||||
p->count = verify_u32(reps.reports.size());
|
||||
|
||||
base_offset += sizeof(report_list);
|
||||
base_offset += sizeof(ReportID) * reps.reports.size();
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<raw_report_info> sheng_build_strat::gatherReports(
|
||||
vector<u32> &reports,
|
||||
vector<u32> &reports_eod,
|
||||
u8 *isSingleReport,
|
||||
ReportID *arbReport) const {
|
||||
DEBUG_PRINTF("gathering reports\n");
|
||||
|
||||
const bool remap_reports = has_managed_reports(rdfa.kind);
|
||||
|
||||
auto ri = ue2::make_unique<raw_report_info_impl>();
|
||||
map<raw_report_list, u32> rev;
|
||||
|
||||
for (const dstate &s : rdfa.states) {
|
||||
if (s.reports.empty()) {
|
||||
reports.push_back(MO_INVALID_IDX);
|
||||
continue;
|
||||
}
|
||||
|
||||
raw_report_list rrl(s.reports, rm, remap_reports);
|
||||
DEBUG_PRINTF("non empty r\n");
|
||||
if (rev.find(rrl) != rev.end()) {
|
||||
reports.push_back(rev[rrl]);
|
||||
} else {
|
||||
DEBUG_PRINTF("adding to rl %zu\n", ri->size());
|
||||
rev[rrl] = ri->size();
|
||||
reports.push_back(ri->size());
|
||||
ri->rl.push_back(rrl);
|
||||
}
|
||||
}
|
||||
|
||||
for (const dstate &s : rdfa.states) {
|
||||
if (s.reports_eod.empty()) {
|
||||
reports_eod.push_back(MO_INVALID_IDX);
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("non empty r eod\n");
|
||||
raw_report_list rrl(s.reports_eod, rm, remap_reports);
|
||||
if (rev.find(rrl) != rev.end()) {
|
||||
reports_eod.push_back(rev[rrl]);
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size());
|
||||
rev[rrl] = ri->size();
|
||||
reports_eod.push_back(ri->size());
|
||||
ri->rl.push_back(rrl);
|
||||
}
|
||||
|
||||
assert(!ri->rl.empty()); /* all components should be able to generate
|
||||
reports */
|
||||
if (!ri->rl.empty()) {
|
||||
*arbReport = *ri->rl.begin()->reports.begin();
|
||||
} else {
|
||||
*arbReport = 0;
|
||||
}
|
||||
|
||||
/* if we have only a single report id generated from all accepts (not eod)
|
||||
* we can take some short cuts */
|
||||
set<ReportID> reps;
|
||||
|
||||
for (u32 rl_index : reports) {
|
||||
if (rl_index == MO_INVALID_IDX) {
|
||||
continue;
|
||||
}
|
||||
assert(rl_index < ri->size());
|
||||
insert(&reps, ri->rl[rl_index].reports);
|
||||
}
|
||||
|
||||
if (reps.size() == 1) {
|
||||
*isSingleReport = 1;
|
||||
*arbReport = *reps.begin();
|
||||
DEBUG_PRINTF("single -- %u\n", *arbReport);
|
||||
} else {
|
||||
*isSingleReport = 0;
|
||||
}
|
||||
|
||||
return move(ri);
|
||||
}
|
||||
|
||||
u32 sheng_build_strat::max_allowed_offset_accel() const {
|
||||
return ACCEL_DFA_MAX_OFFSET_DEPTH;
|
||||
}
|
||||
|
||||
u32 sheng_build_strat::max_stop_char() const {
|
||||
return ACCEL_DFA_MAX_STOP_CHAR;
|
||||
}
|
||||
|
||||
u32 sheng_build_strat::max_floating_stop_char() const {
|
||||
return ACCEL_DFA_MAX_FLOATING_STOP_CHAR;
|
||||
}
|
||||
|
||||
size_t sheng_build_strat::accelSize() const {
|
||||
return sizeof(AccelAux);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static really_inline
|
||||
void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) {
|
||||
stringstream o;
|
||||
|
||||
for (unsigned i = 0; i < sz; i++) {
|
||||
o.width(2);
|
||||
o << (buf[i] & SHENG_STATE_MASK) << " ";
|
||||
}
|
||||
DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
for (dstate_id_t i : accel_escape_info | map_keys) {
|
||||
accel_states->insert(i);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
u8 getShengState(dstate &state, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
u8 s = state.impl_id;
|
||||
if (!state.reports.empty()) {
|
||||
s |= SHENG_STATE_ACCEPT;
|
||||
}
|
||||
if (info.isDead(state)) {
|
||||
s |= SHENG_STATE_DEAD;
|
||||
}
|
||||
if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) {
|
||||
s |= SHENG_STATE_ACCEL;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
static
|
||||
void fillAccelAux(struct NFA *n, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
DEBUG_PRINTF("Filling accel aux structures\n");
|
||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||
u32 offset = s->accel_offset;
|
||||
|
||||
for (dstate_id_t i = 0; i < info.size(); i++) {
|
||||
dstate_id_t state_id = info.raw_id(i);
|
||||
if (accelInfo.find(state_id) != accelInfo.end()) {
|
||||
s->flags |= SHENG_FLAG_HAS_ACCEL;
|
||||
AccelAux *aux = (AccelAux *)((char *)n + offset);
|
||||
info.strat.buildAccel(state_id, accelInfo[state_id], aux);
|
||||
sstate_aux *saux =
|
||||
(sstate_aux *)((char *)n + s->aux_offset) + state_id;
|
||||
saux->accel = offset;
|
||||
DEBUG_PRINTF("Accel offset: %u\n", offset);
|
||||
offset += ROUNDUP_N(sizeof(AccelAux), alignof(AccelAux));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void populateBasicInfo(struct NFA *n, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo, u32 aux_offset,
|
||||
u32 report_offset, u32 accel_offset, u32 total_size,
|
||||
u32 dfa_size) {
|
||||
n->length = total_size;
|
||||
n->scratchStateSize = 1;
|
||||
n->streamStateSize = 1;
|
||||
n->nPositions = info.size();
|
||||
n->type = SHENG_NFA_0;
|
||||
n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0;
|
||||
|
||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||
s->aux_offset = aux_offset;
|
||||
s->report_offset = report_offset;
|
||||
s->accel_offset = accel_offset;
|
||||
s->n_states = info.size();
|
||||
s->length = dfa_size;
|
||||
s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
|
||||
|
||||
s->anchored = getShengState(info.anchored, info, accelInfo);
|
||||
s->floating = getShengState(info.floating, info, accelInfo);
|
||||
}
|
||||
|
||||
static
|
||||
void fillTops(NFA *n, dfa_info &info, dstate_id_t id,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||
u32 aux_base = s->aux_offset;
|
||||
|
||||
DEBUG_PRINTF("Filling tops for state %u\n", id);
|
||||
|
||||
sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id;
|
||||
|
||||
DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id,
|
||||
(char *)aux - (char *)n);
|
||||
|
||||
/* we could conceivably end up in an accept/dead state on a top event,
|
||||
* so mark top as accept/dead state if it indeed is.
|
||||
*/
|
||||
auto &top_state = info.top(id);
|
||||
|
||||
DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id);
|
||||
|
||||
aux->top = getShengState(top_state, info, accelInfo);
|
||||
}
|
||||
|
||||
static
|
||||
void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports,
|
||||
vector<u32> &reports_eod, vector<u32> &report_offsets) {
|
||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||
u32 aux_base = s->aux_offset;
|
||||
auto raw_id = info.raw_id(id);
|
||||
|
||||
auto &state = info[id];
|
||||
|
||||
sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id;
|
||||
|
||||
DEBUG_PRINTF("Filling aux and report structures for state %u\n", id);
|
||||
DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id,
|
||||
(char *)aux - (char *)n);
|
||||
|
||||
aux->accept = state.reports.empty() ? 0 : report_offsets[reports[raw_id]];
|
||||
aux->accept_eod =
|
||||
state.reports_eod.empty() ? 0 : report_offsets[reports_eod[raw_id]];
|
||||
|
||||
DEBUG_PRINTF("Report list offset: %u\n", aux->accept);
|
||||
DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod);
|
||||
}
|
||||
|
||||
static
|
||||
void fillSingleReport(NFA *n, ReportID r_id) {
|
||||
sheng *s = (sheng *)getMutableImplNfa(n);
|
||||
|
||||
DEBUG_PRINTF("Single report ID: %u\n", r_id);
|
||||
s->report = r_id;
|
||||
s->flags |= SHENG_FLAG_SINGLE_REPORT;
|
||||
}
|
||||
|
||||
static
|
||||
void createShuffleMasks(sheng *s, dfa_info &info,
|
||||
map<dstate_id_t, AccelScheme> &accelInfo) {
|
||||
for (u16 chr = 0; chr < 256; chr++) {
|
||||
u8 buf[16] = {0};
|
||||
|
||||
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||
auto &succ_state = info.next(idx, chr);
|
||||
|
||||
buf[idx] = getShengState(succ_state, info, accelInfo);
|
||||
}
|
||||
#ifdef DEBUG
|
||||
dumpShuffleMask(chr, buf, sizeof(buf));
|
||||
#endif
|
||||
m128 mask = loadu128(buf);
|
||||
s->shuffle_masks[chr] = mask;
|
||||
}
|
||||
}
|
||||
|
||||
bool has_accel_sheng(const NFA *nfa) {
|
||||
const sheng *s = (const sheng *)getImplNfa(nfa);
|
||||
return s->flags & SHENG_FLAG_HAS_ACCEL;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA> shengCompile(raw_dfa &raw,
|
||||
const CompileContext &cc,
|
||||
const ReportManager &rm,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
if (!cc.grey.allowSheng) {
|
||||
DEBUG_PRINTF("Sheng is not allowed!\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
sheng_build_strat strat(raw, rm);
|
||||
dfa_info info(strat);
|
||||
|
||||
DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
|
||||
|
||||
DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
|
||||
raw.start_anchored, raw.start_floating);
|
||||
|
||||
DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
|
||||
info.can_die ? "can" : "cannot", info.size());
|
||||
if (info.size() > 16) {
|
||||
DEBUG_PRINTF("Too many states\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
|
||||
* mode with our semantics */
|
||||
raw.stripExtraEodReports();
|
||||
}
|
||||
auto accelInfo = strat.getAccelInfo(cc.grey);
|
||||
|
||||
// set impl_id of each dfa state
|
||||
for (dstate_id_t i = 0; i < info.size(); i++) {
|
||||
info[i].impl_id = i;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n",
|
||||
info.anchored.impl_id, info.floating.impl_id);
|
||||
|
||||
u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(sheng));
|
||||
vector<u32> reports, eod_reports, report_offsets;
|
||||
u8 isSingle = 0;
|
||||
ReportID single_report = 0;
|
||||
|
||||
auto ri =
|
||||
strat.gatherReports(reports, eod_reports, &isSingle, &single_report);
|
||||
|
||||
u32 total_aux = sizeof(sstate_aux) * info.size();
|
||||
u32 total_accel = strat.accelSize() * accelInfo.size();
|
||||
u32 total_reports = ri->getReportListSize();
|
||||
|
||||
u32 reports_offset = nfa_size + total_aux;
|
||||
u32 accel_offset =
|
||||
ROUNDUP_N(reports_offset + total_reports, alignof(AccelAux));
|
||||
u32 total_size = ROUNDUP_N(accel_offset + total_accel, 64);
|
||||
|
||||
DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n",
|
||||
nfa_size, total_aux, total_reports, total_accel, total_size);
|
||||
|
||||
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
|
||||
|
||||
populateBasicInfo(nfa.get(), info, accelInfo, nfa_size, reports_offset,
|
||||
accel_offset, total_size, total_size - sizeof(NFA));
|
||||
|
||||
DEBUG_PRINTF("Setting up aux and report structures\n");
|
||||
|
||||
ri->fillReportLists(nfa.get(), reports_offset, report_offsets);
|
||||
|
||||
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
|
||||
fillTops(nfa.get(), info, idx, accelInfo);
|
||||
fillAux(nfa.get(), info, idx, reports, eod_reports, report_offsets);
|
||||
}
|
||||
if (isSingle) {
|
||||
fillSingleReport(nfa.get(), single_report);
|
||||
}
|
||||
|
||||
fillAccelAux(nfa.get(), info, accelInfo);
|
||||
|
||||
if (accel_states) {
|
||||
fillAccelOut(accelInfo, accel_states);
|
||||
}
|
||||
|
||||
createShuffleMasks((sheng *)getMutableImplNfa(nfa.get()), info, accelInfo);
|
||||
|
||||
return nfa;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
80
src/nfa/shengcompile.h
Normal file
80
src/nfa/shengcompile.h
Normal file
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SHENGCOMPILE_H_
|
||||
#define SHENGCOMPILE_H_
|
||||
|
||||
#include "accel_dfa_build_strat.h"
|
||||
#include "rdfa.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class ReportManager;
|
||||
struct CompileContext;
|
||||
struct raw_dfa;
|
||||
|
||||
class sheng_build_strat : public accel_dfa_build_strat {
|
||||
public:
|
||||
sheng_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in)
|
||||
: accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {}
|
||||
raw_dfa &get_raw() const override { return rdfa; }
|
||||
std::unique_ptr<raw_report_info> gatherReports(
|
||||
std::vector<u32> &reports /* out */,
|
||||
std::vector<u32> &reports_eod /* out */,
|
||||
u8 *isSingleReport /* out */,
|
||||
ReportID *arbReport /* out */) const override;
|
||||
size_t accelSize(void) const override;
|
||||
u32 max_allowed_offset_accel() const override;
|
||||
u32 max_stop_char() const override;
|
||||
u32 max_floating_stop_char() const override;
|
||||
|
||||
private:
|
||||
raw_dfa &rdfa;
|
||||
};
|
||||
|
||||
aligned_unique_ptr<NFA>
|
||||
shengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm,
|
||||
std::set<dstate_id_t> *accel_states = nullptr);
|
||||
|
||||
struct sheng_escape_info {
|
||||
CharReach outs;
|
||||
CharReach outs2_single;
|
||||
flat_set<std::pair<u8, u8>> outs2;
|
||||
bool outs2_broken = false;
|
||||
};
|
||||
|
||||
bool has_accel_sheng(const NFA *nfa);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif /* SHENGCOMPILE_H_ */
|
265
src/nfa/shengdump.cpp
Normal file
265
src/nfa/shengdump.cpp
Normal file
@ -0,0 +1,265 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "shengdump.h"
|
||||
|
||||
#include "accel_dump.h"
|
||||
#include "nfa_dump_internal.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "sheng_internal.h"
|
||||
#include "rdfa.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
const sstate_aux *get_aux(const NFA *n, dstate_id_t i) {
|
||||
assert(n && isShengType(n->type));
|
||||
|
||||
const sheng *s = (const sheng *)getImplNfa(n);
|
||||
const sstate_aux *aux_base =
|
||||
(const sstate_aux *)((const char *)n + s->aux_offset);
|
||||
|
||||
const sstate_aux *aux = aux_base + i;
|
||||
|
||||
assert((const char *)aux < (const char *)s + s->length);
|
||||
|
||||
return aux;
|
||||
}
|
||||
|
||||
static
|
||||
void dumpHeader(FILE *f, const sheng *s) {
|
||||
fprintf(f, "number of states: %u, DFA engine size: %u\n", s->n_states,
|
||||
s->length);
|
||||
fprintf(f, "aux base offset: %u, reports base offset: %u, "
|
||||
"accel offset: %u\n",
|
||||
s->aux_offset, s->report_offset, s->accel_offset);
|
||||
fprintf(f, "anchored start state: %u, floating start state: %u\n",
|
||||
s->anchored & SHENG_STATE_MASK, s->floating & SHENG_STATE_MASK);
|
||||
fprintf(f, "has accel: %u can die: %u single report: %u\n",
|
||||
!!(s->flags & SHENG_FLAG_HAS_ACCEL),
|
||||
!!(s->flags & SHENG_FLAG_CAN_DIE),
|
||||
!!(s->flags & SHENG_FLAG_SINGLE_REPORT));
|
||||
}
|
||||
|
||||
static
|
||||
void dumpAux(FILE *f, u32 state, const sstate_aux *aux) {
|
||||
fprintf(f, "state id: %u, reports offset: %u, EOD reports offset: %u, "
|
||||
"accel offset: %u, top: %u\n",
|
||||
state, aux->accept, aux->accept_eod, aux->accel,
|
||||
aux->top & SHENG_STATE_MASK);
|
||||
}
|
||||
|
||||
static
|
||||
void dumpReports(FILE *f, const report_list *rl) {
|
||||
fprintf(f, "reports count: %u\n", rl->count);
|
||||
for (u32 i = 0; i < rl->count; i++) {
|
||||
fprintf(f, " report: %u, report ID: %u\n", i, rl->report[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dumpMasks(FILE *f, const sheng *s) {
|
||||
for (u32 chr = 0; chr < 256; chr++) {
|
||||
u8 buf[16];
|
||||
m128 shuffle_mask = s->shuffle_masks[chr];
|
||||
store128(buf, shuffle_mask);
|
||||
|
||||
fprintf(f, "%3u: ", chr);
|
||||
for (u32 pos = 0; pos < 16; pos++) {
|
||||
u8 c = buf[pos];
|
||||
if (c & SHENG_STATE_FLAG_MASK) {
|
||||
fprintf(f, "%2u* ", c & SHENG_STATE_MASK);
|
||||
} else {
|
||||
fprintf(f, "%2u ", c & SHENG_STATE_MASK);
|
||||
}
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
void nfaExecSheng0_dumpText(const NFA *nfa, FILE *f) {
|
||||
assert(nfa->type == SHENG_NFA_0);
|
||||
const sheng *s = (const sheng *)getImplNfa(nfa);
|
||||
|
||||
fprintf(f, "sheng DFA\n");
|
||||
dumpHeader(f, s);
|
||||
|
||||
for (u32 state = 0; state < s->n_states; state++) {
|
||||
const sstate_aux *aux = get_aux(nfa, state);
|
||||
dumpAux(f, state, aux);
|
||||
if (aux->accept) {
|
||||
fprintf(f, "report list:\n");
|
||||
const report_list *rl =
|
||||
(const report_list *)((const char *)nfa + aux->accept);
|
||||
dumpReports(f, rl);
|
||||
}
|
||||
if (aux->accept_eod) {
|
||||
fprintf(f, "EOD report list:\n");
|
||||
const report_list *rl =
|
||||
(const report_list *)((const char *)nfa + aux->accept_eod);
|
||||
dumpReports(f, rl);
|
||||
}
|
||||
if (aux->accel) {
|
||||
fprintf(f, "accel:\n");
|
||||
const AccelAux *accel =
|
||||
(const AccelAux *)((const char *)nfa + aux->accel);
|
||||
dumpAccelInfo(f, *accel);
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(f, "\n");
|
||||
|
||||
dumpMasks(f, s);
|
||||
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
static
|
||||
void dumpDotPreambleDfa(FILE *f) {
|
||||
dumpDotPreamble(f);
|
||||
|
||||
// DFA specific additions.
|
||||
fprintf(f, "STARTF [style=invis];\n");
|
||||
fprintf(f, "STARTA [style=invis];\n");
|
||||
fprintf(f, "0 [style=invis];\n");
|
||||
}
|
||||
|
||||
static
|
||||
void describeNode(const NFA *n, const sheng *s, u16 i, FILE *f) {
|
||||
const sstate_aux *aux = get_aux(n, i);
|
||||
|
||||
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
|
||||
"label = \"%u\" ]; \n",
|
||||
i, i);
|
||||
|
||||
if (aux->accept_eod) {
|
||||
fprintf(f, "%u [ color = darkorchid ];\n", i);
|
||||
}
|
||||
|
||||
if (aux->accept) {
|
||||
fprintf(f, "%u [ shape = doublecircle ];\n", i);
|
||||
}
|
||||
|
||||
if (aux->top && (aux->top & SHENG_STATE_MASK) != i) {
|
||||
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
|
||||
aux->top & SHENG_STATE_MASK);
|
||||
}
|
||||
|
||||
if (i == (s->anchored & SHENG_STATE_MASK)) {
|
||||
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
|
||||
}
|
||||
|
||||
if (i == (s->floating & SHENG_STATE_MASK)) {
|
||||
fprintf(f, "STARTF -> %u [color = red ]\n", i);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void describeEdge(FILE *f, const u16 *t, u16 i) {
|
||||
for (u16 s = 0; s < N_CHARS; s++) {
|
||||
if (!t[s]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
u16 ss;
|
||||
for (ss = 0; ss < s; ss++) {
|
||||
if (t[s] == t[ss]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ss != s) {
|
||||
continue;
|
||||
}
|
||||
|
||||
CharReach reach;
|
||||
for (ss = s; ss < 256; ss++) {
|
||||
if (t[s] == t[ss]) {
|
||||
reach.set(ss);
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(f, "%u -> %u [ label = \"", i, t[s]);
|
||||
|
||||
describeClass(f, reach, 5, CC_OUT_DOT);
|
||||
|
||||
fprintf(f, "\" ];\n");
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void shengGetTransitions(const NFA *n, u16 state, u16 *t) {
|
||||
assert(isShengType(n->type));
|
||||
const sheng *s = (const sheng *)getImplNfa(n);
|
||||
const sstate_aux *aux = get_aux(n, state);
|
||||
|
||||
for (unsigned i = 0; i < N_CHARS; i++) {
|
||||
u8 buf[16];
|
||||
m128 shuffle_mask = s->shuffle_masks[i];
|
||||
|
||||
store128(buf, shuffle_mask);
|
||||
|
||||
t[i] = buf[state] & SHENG_STATE_MASK;
|
||||
}
|
||||
|
||||
t[TOP] = aux->top & SHENG_STATE_MASK;
|
||||
}
|
||||
|
||||
void nfaExecSheng0_dumpDot(const NFA *nfa, FILE *f, const string &) {
|
||||
assert(nfa->type == SHENG_NFA_0);
|
||||
const sheng *s = (const sheng *)getImplNfa(nfa);
|
||||
|
||||
dumpDotPreambleDfa(f);
|
||||
|
||||
for (u16 i = 1; i < s->n_states; i++) {
|
||||
describeNode(nfa, s, i, f);
|
||||
|
||||
u16 t[ALPHABET_SIZE];
|
||||
|
||||
shengGetTransitions(nfa, i, t);
|
||||
|
||||
describeEdge(f, t, i);
|
||||
}
|
||||
|
||||
fprintf(f, "}\n");
|
||||
}
|
||||
|
||||
} // namespace ue2
|
@ -26,15 +26,24 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "simd_utils_ssse3.h"
|
||||
#ifndef SHENGDUMP_H_
|
||||
#define SHENGDUMP_H_
|
||||
|
||||
const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = {
|
||||
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
|
||||
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
|
||||
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
|
||||
};
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void nfaExecSheng0_dumpDot(const struct NFA *nfa, FILE *file,
|
||||
const std::string &base);
|
||||
void nfaExecSheng0_dumpText(const struct NFA *nfa, FILE *file);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // DUMP_SUPPORT
|
||||
|
||||
#endif /* SHENGDUMP_H_ */
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -40,8 +40,6 @@
|
||||
|
||||
#include "shufti_common.h"
|
||||
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
/** \brief Naive byte-by-byte implementation. */
|
||||
static really_inline
|
||||
const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf,
|
||||
@ -235,7 +233,7 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi,
|
||||
|
||||
m128 c2_lo = pshufb(mask2_lo, chars_lo);
|
||||
m128 c2_hi = pshufb(mask2_hi, chars_hi);
|
||||
m128 t2 = or128(t, shiftRight8Bits(or128(c2_lo, c2_hi)));
|
||||
m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1));
|
||||
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF(" c2_lo: "); dumpMsk128(c2_lo); printf("\n");
|
||||
@ -472,7 +470,7 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi,
|
||||
|
||||
m256 c2_lo = vpshufb(mask2_lo, chars_lo);
|
||||
m256 c2_hi = vpshufb(mask2_hi, chars_hi);
|
||||
m256 t2 = or256(t, shift256Right8Bits(or256(c2_lo, c2_hi)));
|
||||
m256 t2 = or256(t, rshift128_m256(or256(c2_lo, c2_hi), 1));
|
||||
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF(" c2_lo: "); dumpMsk256(c2_lo); printf("\n");
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -34,7 +34,6 @@
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/unaligned.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
/*
|
||||
* Common stuff for all versions of shufti (single, multi and multidouble)
|
||||
@ -94,7 +93,7 @@ DUMP_MSK(128)
|
||||
#endif
|
||||
|
||||
#define GET_LO_4(chars) and128(chars, low4bits)
|
||||
#define GET_HI_4(chars) rshift2x64(andnot128(low4bits, chars), 4)
|
||||
#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
|
||||
|
||||
static really_inline
|
||||
u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
|
||||
@ -120,7 +119,7 @@ DUMP_MSK(256)
|
||||
#endif
|
||||
|
||||
#define GET_LO_4(chars) and256(chars, low4bits)
|
||||
#define GET_HI_4(chars) rshift4x64(andnot256(low4bits, chars), 4)
|
||||
#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4)
|
||||
|
||||
static really_inline
|
||||
u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user