Merge branch develop into master

This commit is contained in:
Matthew Barr 2016-08-24 14:29:28 +10:00
commit bf99ad00eb
227 changed files with 15736 additions and 5764 deletions

View File

@ -2,6 +2,36 @@
This is a list of notable changes to Hyperscan, in reverse chronological order.
## [4.3.0] 2016-08-24
- Introduce a new analysis pass ("Violet") used for decomposition of patterns
into literals and smaller engines.
- Introduce a new container engine ("Tamarama") for infix and suffix engines
that can be proven to run exclusively of one another. This reduces stream
state for pattern sets with many such engines.
- Introduce a new shuffle-based DFA engine ("Sheng"). This improves scanning
performance for pattern sets where small engines are generated.
- Improve the analysis used to extract extra mask information from short
literals.
- Reduced compile time spent in equivalence class analysis.
- Build: frame pointers are now only omitted for 32-bit release builds.
- Build: Workaround for C++ issues reported on FreeBSD/libc++ platforms.
(github issue #27)
- Simplify the LimEx NFA with a unified "variable shift" model, which reduces
the number of different NFA code paths to one per model size.
- Allow some anchored prefixes that may squash the literal to which they are
attached to run eagerly. This improves scanning performance for some
patterns.
- Simplify and improve EOD ("end of data") matching, using the interpreter for
all operations.
- Elide unnecessary instructions in the Rose interpreter at compile time.
- Reduce the number of inlined instantiations of the Rose interpreter in order
to reduce instruction cache pressure.
- Small improvements to literal matcher acceleration.
- Parser: ignore `\E` metacharacters that are not preceded by `\Q`. This
conforms to PCRE's behaviour, rather than returning a compile error.
- Check for misaligned memory when allocating an error structure in Hyperscan's
compile path and return an appropriate error if detected.
## [4.2.0] 2016-05-31
- Introduce an interpreter for many complex actions to replace the use of
internal reports within the core of Hyperscan (the "Rose" engine). This

View File

@ -1,12 +1,18 @@
cmake_minimum_required (VERSION 2.8.11)
# don't use the built-in default configs
set (CMAKE_NOT_USING_CONFIG_FLAGS TRUE)
project (Hyperscan C CXX)
set (HS_MAJOR_VERSION 4)
set (HS_MINOR_VERSION 2)
set (HS_MINOR_VERSION 3)
set (HS_PATCH_VERSION 0)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
# since we are doing this manually, we only have three types
set (CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo"
CACHE STRING "" FORCE)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
include(CheckCCompilerFlag)
@ -24,7 +30,7 @@ find_package(PkgConfig QUIET)
if (NOT CMAKE_BUILD_TYPE)
message(STATUS "Default build type 'Release with debug info'")
set(CMAKE_BUILD_TYPE "RELWITHDEBINFO")
set(CMAKE_BUILD_TYPE RELWITHDEBINFO CACHE STRING "" FORCE )
else()
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
message(STATUS "Build type ${CMAKE_BUILD_TYPE}")
@ -90,6 +96,18 @@ else()
message(FATAL_ERROR "No python interpreter found")
endif()
# allow for reproducible builds - python for portability
if (DEFINED ENV{SOURCE_DATE_EPOCH})
execute_process(
COMMAND "${PYTHON}" "${CMAKE_MODULE_PATH}/formatdate.py" "$ENV{SOURCE_DATE_EPOCH}"
OUTPUT_VARIABLE BUILD_DATE
OUTPUT_STRIP_TRAILING_WHITESPACE)
else ()
string (TIMESTAMP BUILD_DATE "%Y-%m-%d")
endif ()
message(STATUS "Build date: ${BUILD_DATE}")
if(${RAGEL} STREQUAL "RAGEL-NOTFOUND")
message(FATAL_ERROR "Ragel state machine compiler not found")
endif()
@ -121,13 +139,7 @@ endif()
CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in release builds" ON "NOT RELEASE_BUILD" OFF)
option(DISABLE_ASSERTS "Disable assert(); enabled in debug builds, disabled in release builds" FALSE)
if (DISABLE_ASSERTS)
if (CMAKE_BUILD_TYPE STREQUAL "DEBUG")
add_definitions(-DNDEBUG)
endif()
endif()
CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON)
option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF)
@ -139,18 +151,26 @@ if(MSVC OR MSVC_IDE)
if (MSVC_VERSION LESS 1700)
message(FATAL_ERROR "The project requires C++11 features.")
else()
# set base flags
set(CMAKE_C_FLAGS "/DWIN32 /D_WINDOWS /W3")
set(CMAKE_C_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od")
set(CMAKE_C_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi")
set(CMAKE_CXX_FLAGS "/DWIN32 /D_WINDOWS /W3 /GR /EHsc")
set(CMAKE_CXX_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od")
set(CMAKE_CXX_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi")
if (WINDOWS_ICC)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /O3 /wd4267 /Qdiag-disable:remark")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /O2 /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
else()
#TODO: don't hardcode arch
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /O2 /wd4267")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /O2 /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /wd4267")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
endif()
string(REGEX REPLACE "/RTC1" ""
CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" )
string(REGEX REPLACE "/RTC1" ""
CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}" )
endif()
@ -172,16 +192,34 @@ else()
unset(_GXX_OUTPUT)
endif()
# set compiler flags - more are tested and added later
set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual")
set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wno-shadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor")
if (NOT RELEASE_BUILD)
# -Werror is most useful during development, don't potentially break
# release builds
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror")
if(OPTIMISE)
set(OPT_C_FLAG "-O3")
set(OPT_CXX_FLAG "-O2")
else()
set(OPT_C_FLAG "-O0")
set(OPT_CXX_FLAG "-O0")
endif(OPTIMISE)
# set up base flags for build types
set(CMAKE_C_FLAGS_DEBUG "-g ${OPT_C_FLAG} -Werror")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-g ${OPT_C_FLAG}")
set(CMAKE_C_FLAGS_RELEASE "${OPT_C_FLAG}")
set(CMAKE_CXX_FLAGS_DEBUG "-g ${OPT_CXX_FLAG} -Werror")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g ${OPT_CXX_FLAG}")
set(CMAKE_CXX_FLAGS_RELEASE "${OPT_CXX_FLAG}")
if (DISABLE_ASSERTS)
# usually true for release builds, false for debug
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNDEBUG")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG")
endif()
# set compiler flags - more are tested and added later
set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing")
if (NOT CMAKE_C_FLAGS MATCHES .*march.*)
message(STATUS "Building for current host CPU")
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -march=native -mtune=native")
@ -199,15 +237,7 @@ else()
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0 -Wno-unused-local-typedefs -Wno-maybe-uninitialized")
endif()
if(OPTIMISE)
set(EXTRA_C_FLAGS "-O3 ${EXTRA_C_FLAGS}")
set(EXTRA_CXX_FLAGS "-O2 ${EXTRA_CXX_FLAGS}")
else()
set(EXTRA_C_FLAGS "-O0 ${EXTRA_C_FLAGS}")
set(EXTRA_CXX_FLAGS "-O0 ${EXTRA_CXX_FLAGS}")
endif(OPTIMISE)
if(NOT RELEASE_BUILD)
if (NOT(ARCH_IA32 AND RELEASE_BUILD))
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
endif()
@ -297,6 +327,11 @@ if (CXX_UNUSED_CONST_VAR)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable")
endif()
# gcc 6 complains about type attributes that get ignored, like alignment
CHECK_CXX_COMPILER_FLAG("-Wignored-attributes" CXX_IGNORED_ATTR)
if (CXX_IGNORED_ATTR)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-ignored-attributes")
endif()
# note this for later
# g++ doesn't have this flag but clang does
@ -438,15 +473,14 @@ set (hs_exec_SRCS
src/nfa/limex_simd128.c
src/nfa/limex_simd256.c
src/nfa/limex_simd384.c
src/nfa/limex_simd512a.c
src/nfa/limex_simd512b.c
src/nfa/limex_simd512c.c
src/nfa/limex_simd512.c
src/nfa/limex.h
src/nfa/limex_common_impl.h
src/nfa/limex_context.h
src/nfa/limex_internal.h
src/nfa/limex_runtime.h
src/nfa/limex_runtime_impl.h
src/nfa/limex_shuffle.h
src/nfa/limex_state_impl.h
src/nfa/mpv.h
src/nfa/mpv.c
@ -477,9 +511,18 @@ set (hs_exec_SRCS
src/nfa/repeat.c
src/nfa/repeat.h
src/nfa/repeat_internal.h
src/nfa/sheng.c
src/nfa/sheng.h
src/nfa/sheng_defs.h
src/nfa/sheng_impl.h
src/nfa/sheng_impl4.h
src/nfa/sheng_internal.h
src/nfa/shufti_common.h
src/nfa/shufti.c
src/nfa/shufti.h
src/nfa/tamarama.c
src/nfa/tamarama.h
src/nfa/tamarama_internal.h
src/nfa/truffle_common.h
src/nfa/truffle.c
src/nfa/truffle.h
@ -495,7 +538,6 @@ set (hs_exec_SRCS
src/rose/block.c
src/rose/catchup.h
src/rose/catchup.c
src/rose/eod.c
src/rose/infix.h
src/rose/init.h
src/rose/init.c
@ -503,6 +545,7 @@ set (hs_exec_SRCS
src/rose/match.h
src/rose/match.c
src/rose/miracle.h
src/rose/program_runtime.c
src/rose/program_runtime.h
src/rose/runtime.h
src/rose/rose.h
@ -510,6 +553,7 @@ set (hs_exec_SRCS
src/rose/rose_program.h
src/rose/rose_types.h
src/rose/rose_common.h
src/rose/validate_mask.h
src/util/bitutils.h
src/util/exhaust.h
src/util/fatbit.h
@ -524,11 +568,8 @@ set (hs_exec_SRCS
src/util/pqueue.h
src/util/scatter.h
src/util/scatter_runtime.h
src/util/shuffle.h
src/util/shuffle_ssse3.h
src/util/simd_utils.h
src/util/simd_utils_ssse3.h
src/util/simd_utils_ssse3.c
src/util/simd_utils.c
src/util/state_compress.h
src/util/state_compress.c
src/util/unaligned.h
@ -597,11 +638,15 @@ SET (hs_SRCS
src/hwlm/noodle_build.h
src/hwlm/noodle_internal.h
src/nfa/accel.h
src/nfa/accel_dfa_build_strat.cpp
src/nfa/accel_dfa_build_strat.h
src/nfa/accelcompile.cpp
src/nfa/accelcompile.h
src/nfa/callback.h
src/nfa/castlecompile.cpp
src/nfa/castlecompile.h
src/nfa/dfa_build_strat.cpp
src/nfa/dfa_build_strat.h
src/nfa/dfa_min.cpp
src/nfa/dfa_min.h
src/nfa/goughcompile.cpp
@ -613,8 +658,6 @@ SET (hs_SRCS
src/nfa/mcclellan_internal.h
src/nfa/mcclellancompile.cpp
src/nfa/mcclellancompile.h
src/nfa/mcclellancompile_accel.cpp
src/nfa/mcclellancompile_accel.h
src/nfa/mcclellancompile_util.cpp
src/nfa/mcclellancompile_util.h
src/nfa/limex_compile.cpp
@ -639,8 +682,13 @@ SET (hs_SRCS
src/nfa/repeat_internal.h
src/nfa/repeatcompile.cpp
src/nfa/repeatcompile.h
src/nfa/sheng_internal.h
src/nfa/shengcompile.cpp
src/nfa/shengcompile.h
src/nfa/shufticompile.cpp
src/nfa/shufticompile.h
src/nfa/tamaramacompile.cpp
src/nfa/tamaramacompile.h
src/nfa/trufflecompile.cpp
src/nfa/trufflecompile.h
src/nfagraph/ng.cpp
@ -746,6 +794,8 @@ SET (hs_SRCS
src/nfagraph/ng_util.h
src/nfagraph/ng_vacuous.cpp
src/nfagraph/ng_vacuous.h
src/nfagraph/ng_violet.cpp
src/nfagraph/ng_violet.h
src/nfagraph/ng_width.cpp
src/nfagraph/ng_width.h
src/parser/AsciiComponentClass.cpp
@ -825,6 +875,10 @@ SET (hs_SRCS
src/rose/rose_build_compile.cpp
src/rose/rose_build_convert.cpp
src/rose/rose_build_convert.h
src/rose/rose_build_exclusive.cpp
src/rose/rose_build_exclusive.h
src/rose/rose_build_groups.cpp
src/rose/rose_build_groups.h
src/rose/rose_build_impl.h
src/rose/rose_build_infix.cpp
src/rose/rose_build_infix.h
@ -853,6 +907,8 @@ SET (hs_SRCS
src/util/charreach.cpp
src/util/charreach.h
src/util/charreach_util.h
src/util/clique.cpp
src/util/clique.h
src/util/compare.h
src/util/compile_context.cpp
src/util/compile_context.h
@ -878,7 +934,6 @@ SET (hs_SRCS
src/util/report_manager.cpp
src/util/report_manager.h
src/util/simd_utils.h
src/util/simd_utils_ssse3.h
src/util/target_info.cpp
src/util/target_info.h
src/util/ue2_containers.h
@ -916,6 +971,10 @@ set(hs_dump_SRCS
src/nfa/nfa_dump_dispatch.cpp
src/nfa/nfa_dump_internal.cpp
src/nfa/nfa_dump_internal.h
src/nfa/shengdump.cpp
src/nfa/shengdump.h
src/nfa/tamarama_dump.cpp
src/nfa/tamarama_dump.h
src/parser/dump.cpp
src/parser/dump.h
src/parser/position_dump.h
@ -941,7 +1000,7 @@ endif()
# choose which ones to build
set (LIB_VERSION ${HS_VERSION})
set (LIB_SOVERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION})
set (LIB_SOVERSION ${HS_MAJOR_VERSION})
add_library(hs_exec OBJECT ${hs_exec_SRCS})

18
cmake/formatdate.py Executable file
View File

@ -0,0 +1,18 @@
#!/usr/bin/env python
from __future__ import print_function
import os
import sys
import datetime
def usage():
print("Usage:", os.path.basename(sys.argv[0]), "<seconds from epoch>")
if len(sys.argv) != 2:
usage()
sys.exit(1)
ts = sys.argv[1]
build_date = datetime.datetime.utcfromtimestamp(int(ts))
print(build_date.strftime("%Y-%m-%d"))

View File

@ -77,7 +77,7 @@ static int eventHandler(unsigned int id, unsigned long long from,
* length with its length. Returns NULL on failure.
*/
static char *readInputData(const char *inputFN, unsigned int *length) {
FILE *f = fopen(inputFN, "r");
FILE *f = fopen(inputFN, "rb");
if (!f) {
fprintf(stderr, "ERROR: unable to open file \"%s\": %s\n", inputFN,
strerror(errno));

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -52,7 +52,6 @@
#include "parser/shortcut_literal.h"
#include "parser/unsupported.h"
#include "parser/utf8_validate.h"
#include "smallwrite/smallwrite_build.h"
#include "rose/rose_build.h"
#include "rose/rose_build_dump.h"
#include "som/slot_manager_dump.h"
@ -304,15 +303,6 @@ aligned_unique_ptr<RoseEngine> generateRoseEngine(NG &ng) {
return nullptr;
}
/* avoid building a smwr if just a pure floating case. */
if (!roseIsPureLiteral(rose.get())) {
u32 qual = roseQuality(rose.get());
auto smwr = ng.smwr->build(qual);
if (smwr) {
rose = roseAddSmallWrite(rose.get(), smwr.get());
}
}
dumpRose(*ng.rose, rose.get(), ng.cc.grey);
dumpReportManager(ng.rm, ng.cc.grey);
dumpSomSlotManager(ng.ssm, ng.cc.grey);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -42,6 +42,7 @@ using std::string;
static const char failureNoMemory[] = "Unable to allocate memory.";
static const char failureInternal[] = "Internal error.";
static const char failureBadAlloc[] = "Allocator returned misaligned memory.";
extern const hs_compile_error_t hs_enomem = {
const_cast<char *>(failureNoMemory), 0
@ -49,6 +50,9 @@ extern const hs_compile_error_t hs_enomem = {
extern const hs_compile_error_t hs_einternal = {
const_cast<char *>(failureInternal), 0
};
extern const hs_compile_error_t hs_badalloc = {
const_cast<char *>(failureBadAlloc), 0
};
namespace ue2 {
@ -56,8 +60,18 @@ hs_compile_error_t *generateCompileError(const string &err, int expression) {
hs_compile_error_t *ret =
(struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t));
if (ret) {
hs_error_t e = hs_check_alloc(ret);
if (e != HS_SUCCESS) {
hs_misc_free(ret);
return const_cast<hs_compile_error_t *>(&hs_badalloc);
}
char *msg = (char *)hs_misc_alloc(err.size() + 1);
if (msg) {
e = hs_check_alloc(msg);
if (e != HS_SUCCESS) {
hs_misc_free(msg);
return const_cast<hs_compile_error_t *>(&hs_badalloc);
}
memcpy(msg, err.c_str(), err.size() + 1);
ret->message = msg;
} else {
@ -83,7 +97,8 @@ void freeCompileError(hs_compile_error_t *error) {
if (!error) {
return;
}
if (error == &hs_enomem || error == &hs_einternal) {
if (error == &hs_enomem || error == &hs_einternal ||
error == &hs_badalloc) {
// These are not allocated.
return;
}

View File

@ -458,33 +458,16 @@ hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
}
*info = NULL;
if (!bytes || length < sizeof(struct hs_database)) {
return HS_INVALID;
// Decode and check the header
hs_database_t header;
hs_error_t ret = db_decode_header(&bytes, length, &header);
if (ret != HS_SUCCESS) {
return ret;
}
const u32 *buf = (const u32 *)bytes;
u32 mode = unaligned_load_u32(bytes + offsetof(struct RoseEngine, mode));
u32 magic = unaligned_load_u32(buf++);
if (magic != HS_DB_MAGIC) {
return HS_INVALID;
}
u32 version = unaligned_load_u32(buf++);
buf++; /* length */
platform_t plat;
plat = unaligned_load_u64a(buf);
buf += 2;
buf++; /* crc */
buf++; /* reserved 0 */
buf++; /* reserved 1 */
const char *t_raw = (const char *)buf;
u32 mode = unaligned_load_u32(t_raw + offsetof(struct RoseEngine, mode));
return print_database_string(info, version, plat, mode);
return print_database_string(info, header.version, header.platform, mode);
}
HS_PUBLIC_API

View File

@ -36,7 +36,6 @@
#include "teddy.h"
#include "teddy_internal.h"
#include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
/** \brief number of bytes processed in each iteration */
#define ITER_BYTES 16
@ -132,7 +131,7 @@ m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft,
u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1);
tmp &= fdr->domainMask;
s = *((const m128 *)ft + tmp);
s = shiftRight8Bits(s);
s = rshiftbyte_m128(s, 1);
} else {
s = fdr->start;
}
@ -186,20 +185,20 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
m128 st14 = *(const m128 *)(ft + v14*8);
m128 st15 = *(const m128 *)(ft + v15*8);
st1 = byteShiftLeft128(st1, 1);
st2 = byteShiftLeft128(st2, 2);
st3 = byteShiftLeft128(st3, 3);
st4 = byteShiftLeft128(st4, 4);
st5 = byteShiftLeft128(st5, 5);
st6 = byteShiftLeft128(st6, 6);
st7 = byteShiftLeft128(st7, 7);
st9 = byteShiftLeft128(st9, 1);
st10 = byteShiftLeft128(st10, 2);
st11 = byteShiftLeft128(st11, 3);
st12 = byteShiftLeft128(st12, 4);
st13 = byteShiftLeft128(st13, 5);
st14 = byteShiftLeft128(st14, 6);
st15 = byteShiftLeft128(st15, 7);
st1 = lshiftbyte_m128(st1, 1);
st2 = lshiftbyte_m128(st2, 2);
st3 = lshiftbyte_m128(st3, 3);
st4 = lshiftbyte_m128(st4, 4);
st5 = lshiftbyte_m128(st5, 5);
st6 = lshiftbyte_m128(st6, 6);
st7 = lshiftbyte_m128(st7, 7);
st9 = lshiftbyte_m128(st9, 1);
st10 = lshiftbyte_m128(st10, 2);
st11 = lshiftbyte_m128(st11, 3);
st12 = lshiftbyte_m128(st12, 4);
st13 = lshiftbyte_m128(st13, 5);
st14 = lshiftbyte_m128(st14, 6);
st15 = lshiftbyte_m128(st15, 7);
*s = or128(*s, st0);
*s = or128(*s, st1);
@ -210,7 +209,7 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
*s = or128(*s, st6);
*s = or128(*s, st7);
*conf0 = movq(*s);
*s = byteShiftRight128(*s, 8);
*s = rshiftbyte_m128(*s, 8);
*conf0 ^= ~0ULL;
*s = or128(*s, st8);
@ -222,7 +221,7 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
*s = or128(*s, st14);
*s = or128(*s, st15);
*conf8 = movq(*s);
*s = byteShiftRight128(*s, 8);
*s = rshiftbyte_m128(*s, 8);
*conf8 ^= ~0ULL;
}
@ -253,19 +252,19 @@ void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
m128 st12 = *(const m128 *)(ft + v12*8);
m128 st14 = *(const m128 *)(ft + v14*8);
st2 = byteShiftLeft128(st2, 2);
st4 = byteShiftLeft128(st4, 4);
st6 = byteShiftLeft128(st6, 6);
st10 = byteShiftLeft128(st10, 2);
st12 = byteShiftLeft128(st12, 4);
st14 = byteShiftLeft128(st14, 6);
st2 = lshiftbyte_m128(st2, 2);
st4 = lshiftbyte_m128(st4, 4);
st6 = lshiftbyte_m128(st6, 6);
st10 = lshiftbyte_m128(st10, 2);
st12 = lshiftbyte_m128(st12, 4);
st14 = lshiftbyte_m128(st14, 6);
*s = or128(*s, st0);
*s = or128(*s, st2);
*s = or128(*s, st4);
*s = or128(*s, st6);
*conf0 = movq(*s);
*s = byteShiftRight128(*s, 8);
*s = rshiftbyte_m128(*s, 8);
*conf0 ^= ~0ULL;
*s = or128(*s, st8);
@ -273,7 +272,7 @@ void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
*s = or128(*s, st12);
*s = or128(*s, st14);
*conf8 = movq(*s);
*s = byteShiftRight128(*s, 8);
*s = rshiftbyte_m128(*s, 8);
*conf8 ^= ~0ULL;
}
@ -296,27 +295,26 @@ void get_conf_stride_4(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr,
m128 st8 = *(const m128 *)(ft + v8*8);
m128 st12 = *(const m128 *)(ft + v12*8);
st4 = byteShiftLeft128(st4, 4);
st12 = byteShiftLeft128(st12, 4);
st4 = lshiftbyte_m128(st4, 4);
st12 = lshiftbyte_m128(st12, 4);
*s = or128(*s, st0);
*s = or128(*s, st4);
*conf0 = movq(*s);
*s = byteShiftRight128(*s, 8);
*s = rshiftbyte_m128(*s, 8);
*conf0 ^= ~0ULL;
*s = or128(*s, st8);
*s = or128(*s, st12);
*conf8 = movq(*s);
*s = byteShiftRight128(*s, 8);
*s = rshiftbyte_m128(*s, 8);
*conf8 ^= ~0ULL;
}
static really_inline
void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *controlVal,
void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
const u32 *confBase, const struct FDR_Runtime_Args *a,
const u8 *ptr, hwlmcb_rv_t *control, u32 *last_match_id,
struct zone *z) {
const u8 *ptr, u32 *last_match_id, struct zone *z) {
const u8 bucket = 8;
const u8 pullback = 1;
@ -352,13 +350,13 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *controlVal,
continue;
}
*last_match_id = id;
*controlVal = a->cb(ptr_main + byte - a->buf,
ptr_main + byte - a->buf, id, a->ctxt);
*control = a->cb(ptr_main + byte - a->buf, ptr_main + byte - a->buf,
id, a->ctxt);
continue;
}
u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a));
confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback,
control, last_match_id, confVal);
confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, control,
last_match_id, confVal);
} while (unlikely(!!*conf));
}
@ -681,9 +679,9 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
itPtr += ITER_BYTES) { \
if (unlikely(itPtr > tryFloodDetect)) { \
tryFloodDetect = floodDetect(fdr, a, &itPtr, tryFloodDetect,\
&floodBackoff, &controlVal, \
&floodBackoff, &control, \
ITER_BYTES); \
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \
if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \
return HWLM_TERMINATED; \
} \
} \
@ -692,11 +690,11 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
u64a conf8; \
get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_adjusted, \
ft, &conf0, &conf8, &s); \
do_confirm_fdr(&conf0, 0, &controlVal, confBase, a, itPtr, \
control, &last_match_id, zz); \
do_confirm_fdr(&conf8, 8, &controlVal, confBase, a, itPtr, \
control, &last_match_id, zz); \
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \
do_confirm_fdr(&conf0, 0, &control, confBase, a, itPtr, \
&last_match_id, zz); \
do_confirm_fdr(&conf8, 8, &control, confBase, a, itPtr, \
&last_match_id, zz); \
if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \
return HWLM_TERMINATED; \
} \
} /* end for loop */ \
@ -704,9 +702,8 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
static never_inline
hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
u32 floodBackoff = FLOOD_BACKOFF_START;
u32 last_match_id = INVALID_MATCH_ID;
u64a domain_mask_adjusted = fdr->domainMask << 1;
@ -771,7 +768,10 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
#define ONLY_AVX2(func) NULL
#endif
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a);
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
static const FDRFUNCTYPE funcs[] = {
fdr_engine_exec,
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fast),
@ -814,7 +814,6 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
start,
cb,
ctxt,
&groups,
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
0
};
@ -822,7 +821,7 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
return HWLM_SUCCESS;
} else {
assert(funcs[fdr->engineID]);
return funcs[fdr->engineID](fdr, &a);
return funcs[fdr->engineID](fdr, &a, groups);
}
}
@ -840,7 +839,6 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
start,
cb,
ctxt,
&groups,
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
/* we are guaranteed to always have 16 initialised bytes at the end of
* the history buffer (they may be garbage). */
@ -853,7 +851,7 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
ret = HWLM_SUCCESS;
} else {
assert(funcs[fdr->engineID]);
ret = funcs[fdr->engineID](fdr, &a);
ret = funcs[fdr->engineID](fdr, &a, groups);
}
fdrPackState(fdr, &a, stream_state);

View File

@ -81,7 +81,7 @@ private:
void dumpMasks(const u8 *defaultMask);
#endif
void setupTab();
aligned_unique_ptr<FDR> setupFDR(pair<u8 *, size_t> link);
aligned_unique_ptr<FDR> setupFDR(pair<aligned_unique_ptr<u8>, size_t> &link);
void createInitialState(FDR *fdr);
public:
@ -90,7 +90,7 @@ public:
: eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in),
make_small(make_small_in) {}
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
aligned_unique_ptr<FDR> build(pair<aligned_unique_ptr<u8>, size_t> &link);
};
u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
@ -124,10 +124,8 @@ void FDRCompiler::createInitialState(FDR *fdr) {
// Find the minimum length for the literals in this bucket.
const vector<LiteralIndex> &bucket_lits = bucketToLits[b];
u32 min_len = ~0U;
for (vector<LiteralIndex>::const_iterator it = bucket_lits.begin(),
ite = bucket_lits.end();
it != ite; ++it) {
min_len = min(min_len, verify_u32(lits[*it].s.length()));
for (const LiteralIndex &lit_idx : bucket_lits) {
min_len = min(min_len, verify_u32(lits[lit_idx].s.length()));
}
DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
@ -141,13 +139,12 @@ void FDRCompiler::createInitialState(FDR *fdr) {
}
}
aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
aligned_unique_ptr<FDR>
FDRCompiler::setupFDR(pair<aligned_unique_ptr<u8>, size_t> &link) {
size_t tabSize = eng.getTabSizeBytes();
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
pair<u8 *, size_t> confirmTmp =
setupFullMultiConfs(lits, eng, bucketToLits, make_small);
auto floodControlTmp = setupFDRFloodControl(lits, eng);
auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small);
assert(ISALIGNED_16(tabSize));
assert(ISALIGNED_16(confirmTmp.second));
@ -175,14 +172,12 @@ aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
copy(tab.begin(), tab.end(), ptr);
ptr += tabSize;
memcpy(ptr, confirmTmp.first, confirmTmp.second);
memcpy(ptr, confirmTmp.first.get(), confirmTmp.second);
ptr += confirmTmp.second;
aligned_free(confirmTmp.first);
fdr->floodOffset = verify_u32(ptr - fdr_base);
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second);
ptr += floodControlTmp.second;
aligned_free(floodControlTmp.first);
/* we are allowing domains 9 to 15 only */
assert(eng.bits > 8 && eng.bits < 16);
@ -193,8 +188,7 @@ aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
if (link.first) {
fdr->link = verify_u32(ptr - fdr_base);
memcpy(ptr, link.first, link.second);
aligned_free(link.first);
memcpy(ptr, link.first.get(), link.second);
} else {
fdr->link = 0;
}
@ -217,13 +211,11 @@ struct LitOrder {
if (len1 != len2) {
return len1 < len2;
} else {
string::const_reverse_iterator it1, it2;
tie(it1, it2) =
std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
if (it1 == i1s.rend()) {
auto p = std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
if (p.first == i1s.rend()) {
return false;
}
return *it1 < *it2;
return *p.first < *p.second;
}
}
@ -266,9 +258,8 @@ void FDRCompiler::assignStringsToBuckets() {
stable_sort(vli.begin(), vli.end(), LitOrder(lits));
#ifdef DEBUG_ASSIGNMENT
for (map<u32, u32>::iterator i = lenCounts.begin(), e = lenCounts.end();
i != e; ++i) {
printf("l<%d>:%d ", i->first, i->second);
for (const auto &m : lenCounts) {
printf("l<%u>:%u ", m.first, m.second);
}
printf("\n");
#endif
@ -324,12 +315,12 @@ void FDRCompiler::assignStringsToBuckets() {
for (u32 k = j; k < nChunks; ++k) {
cnt += count[k];
}
t[j][0] = make_pair(getScoreUtil(length[j], cnt), 0);
t[j][0] = {getScoreUtil(length[j], cnt), 0};
}
for (u32 i = 1; i < nb; i++) {
for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row
SCORE_INDEX_PAIR best = make_pair(MAX_SCORE, 0);
SCORE_INDEX_PAIR best = {MAX_SCORE, 0};
u32 cnt = count[j];
for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) {
SCORE score = getScoreUtil(length[j], cnt);
@ -338,12 +329,12 @@ void FDRCompiler::assignStringsToBuckets() {
}
score += t[k][i-1].first;
if (score < best.first) {
best = make_pair(score, k);
best = {score, k};
}
}
t[j][i] = best;
}
t[nChunks - 1][i] = make_pair(0,0); // fill in empty final row for next iteration
t[nChunks - 1][i] = {0,0}; // fill in empty final row for next iteration
}
#ifdef DEBUG_ASSIGNMENT
@ -405,8 +396,7 @@ bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
distance = 4;
}
for (vector<LiteralIndex>::const_iterator i = vl.begin(), e = vl.end();
i != e; ++i) {
for (auto i = vl.begin(), e = vl.end(); i != e; ++i) {
if (e - i > 5) {
__builtin_prefetch(&lits[*(i + 5)]);
}
@ -460,31 +450,25 @@ void FDRCompiler::setupTab() {
memcpy(tabIndexToMask(i), &defaultMask[0], mask_size);
}
typedef std::map<u32, ue2::unordered_set<u32> > M2SET;
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
const vector<LiteralIndex> &vl = bucketToLits[b];
SuffixPositionInString pLimit = eng.getBucketWidth(b);
for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
u32 bit = eng.getSchemeBit(b, pos);
M2SET m2;
map<u32, ue2::unordered_set<u32>> m2;
bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
if (done) {
clearbit(&defaultMask[0], bit);
continue;
}
for (M2SET::const_iterator i = m2.begin(), e = m2.end(); i != e;
++i) {
u32 dc = i->first;
const ue2::unordered_set<u32> &mskSet = i->second;
for (const auto &elem : m2) {
u32 dc = elem.first;
const ue2::unordered_set<u32> &mskSet = elem.second;
u32 v = ~dc;
do {
u32 b2 = v & dc;
for (ue2::unordered_set<u32>::const_iterator
i2 = mskSet.begin(),
e2 = mskSet.end();
i2 != e2; ++i2) {
u32 val = (*i2 & ~dc) | b2;
for (const u32 &mskVal : mskSet) {
u32 val = (mskVal & ~dc) | b2;
clearbit(tabIndexToMask(val), bit);
}
v = (v + (dc & -dc)) | ~dc;
@ -502,7 +486,8 @@ void FDRCompiler::setupTab() {
#endif
}
aligned_unique_ptr<FDR> FDRCompiler::build(pair<u8 *, size_t> link) {
aligned_unique_ptr<FDR>
FDRCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
assignStringsToBuckets();
setupTab();
return setupFDR(link);
@ -515,16 +500,15 @@ aligned_unique_ptr<FDR>
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
const target_t &target, const Grey &grey, u32 hint,
hwlmStreamingControl *stream_control) {
pair<u8 *, size_t> link(nullptr, 0);
pair<aligned_unique_ptr<u8>, size_t> link(nullptr, 0);
if (stream_control) {
link = fdrBuildTableStreaming(lits, stream_control);
link = fdrBuildTableStreaming(lits, *stream_control);
}
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
if (grey.fdrAllowTeddy) {
aligned_unique_ptr<FDR> fdr
= teddyBuildTableHinted(lits, make_small, hint, target, link);
auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, link);
if (fdr) {
DEBUG_PRINTF("build with teddy succeeded\n");
return fdr;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -31,6 +31,7 @@
#include "ue2common.h"
#include "hwlm/hwlm_literal.h"
#include "util/alloc.h"
#include <map>
#include <utility>
@ -44,7 +45,6 @@ namespace ue2 {
// a pile of decorative typedefs
// good for documentation purposes more than anything else
typedef u32 LiteralIndex;
typedef u32 ConfirmIndex;
typedef u32 SuffixPositionInString; // zero is last byte, counting back
// into the string
typedef u32 BucketIndex;
@ -56,25 +56,22 @@ class EngineDescription;
class FDREngineDescription;
struct hwlmStreamingControl;
size_t getFDRConfirm(const std::vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
bool make_small);
std::pair<u8 *, size_t> setupFullMultiConfs(
std::pair<aligned_unique_ptr<u8>, size_t> setupFullMultiConfs(
const std::vector<hwlmLiteral> &lits, const EngineDescription &eng,
std::map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits,
std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
bool make_small);
// all suffixes include an implicit max_bucket_width suffix to ensure that
// we always read a full-scale flood "behind" us in terms of what's in our
// state; if we don't have a flood that's long enough we won't be in the
// right state yet to allow blindly advancing
std::pair<u8 *, size_t>
std::pair<aligned_unique_ptr<u8>, size_t>
setupFDRFloodControl(const std::vector<hwlmLiteral> &lits,
const EngineDescription &eng);
std::pair<u8 *, size_t>
std::pair<aligned_unique_ptr<u8>, size_t>
fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
hwlmStreamingControl *stream_control);
hwlmStreamingControl &stream_control);
static constexpr u32 HINT_INVALID = 0xffffffff;

View File

@ -45,9 +45,10 @@ using namespace std;
namespace ue2 {
typedef u8 ConfSplitType;
typedef pair<BucketIndex, ConfSplitType> BucketSplitPair;
typedef map<BucketSplitPair, pair<FDRConfirm *, size_t> > BC2CONF;
using ConfSplitType = u8;
using BucketSplitPair = pair<BucketIndex, ConfSplitType>;
using BC2CONF = map<BucketSplitPair,
pair<aligned_unique_ptr<FDRConfirm>, size_t>>;
// return the number of bytes beyond a length threshold in all strings in lits
static
@ -149,9 +150,9 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
//#define FDR_CONFIRM_DUMP 1
static
size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
bool applyOneCharOpt, bool make_small, bool make_confirm) {
static pair<aligned_unique_ptr<FDRConfirm>, size_t>
getFDRConfirm(const vector<hwlmLiteral> &lits, bool applyOneCharOpt,
bool make_small, bool make_confirm) {
vector<LitInfo> tmpLitInfo(lits.size());
CONF_TYPE andmsk;
fillLitInfo(lits, tmpLitInfo, andmsk);
@ -220,55 +221,61 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
#ifdef FDR_CONFIRM_DUMP
// print out the literals reversed - makes it easier to line up analyses
// that are end-offset based
for (map<u32, vector<LiteralIndex> >::iterator i = res2lits.begin(),
e = res2lits.end(); i != e; ++i) {
u32 hash = i->first;
vector<LiteralIndex> & vlidx = i->second;
if (vlidx.size() > 1) {
printf("%x -> %zu literals\n", hash, vlidx.size());
u32 min_len = lits[vlidx.front()].s.size();
vector<set<u8> > vsl; // contains the set of chars at each location
// reversed from the end
vsl.resize(1024);
u32 total_string_size = 0;
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
e2 = vlidx.end(); i2 != e2; ++i2) {
LiteralIndex litIdx = *i2;
total_string_size += lits[litIdx].s.size();
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
vsl[lits[litIdx].s.size()-j].insert(lits[litIdx].s.c_str()[j - 1]);
}
min_len = MIN(min_len, lits[litIdx].s.size());
for (const auto &m : res2lits) {
const u32 &hash = m.first;
const vector<LiteralIndex> &vlidx = m.second;
if (vlidx.size() <= 1) {
continue;
}
printf("%x -> %zu literals\n", hash, vlidx.size());
size_t min_len = lits[vlidx.front()].s.size();
vector<set<u8>> vsl; // contains the set of chars at each location
// reversed from the end
for (const auto &litIdx : vlidx) {
const auto &lit = lits[litIdx];
if (lit.s.size() > vsl.size()) {
vsl.resize(lit.s.size());
}
printf("common ");
for (u32 j = 0; j < min_len; j++) {
if (vsl[j].size() == 1) {
printf("%02x", (u32)*vsl[j].begin());
} else {
for (size_t j = lit.s.size(); j != 0; j--) {
vsl[lit.s.size() - j].insert(lit.s[j - 1]);
}
min_len = min(min_len, lit.s.size());
}
printf("common ");
for (size_t j = 0; j < min_len; j++) {
if (vsl[j].size() == 1) {
printf("%02x", *vsl[j].begin());
} else {
printf("__");
}
}
printf("\n");
for (const auto &litIdx : vlidx) {
const auto &lit = lits[litIdx];
printf("%8x %c", lit.id, lit.nocase ? '!' : ' ');
for (size_t j = lit.s.size(); j != 0; j--) {
size_t dist_from_end = lit.s.size() - j;
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
printf("__");
} else {
printf("%02x", lit.s[j - 1]);
}
}
printf("\n");
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
e2 = vlidx.end(); i2 != e2; ++i2) {
LiteralIndex litIdx = *i2;
printf("%8x %c", lits[litIdx].id, lits[litIdx].nocase ? '!' : ' ');
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
u32 dist_from_end = lits[litIdx].s.size() - j;
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
printf("__");
} else {
printf("%02x", (u32)lits[litIdx].s.c_str()[j-1]);
}
}
printf("\n");
}
u32 total_compares = 0;
for (u32 j = 0; j < 1024; j++) { // naughty
total_compares += vsl[j].size();
}
printf("Total compare load: %d Total string size: %d\n\n", total_compares, total_string_size);
}
size_t total_compares = 0;
for (const auto &v : vsl) {
total_compares += v.size();
}
size_t total_string_size = 0;
for (const auto &litIdx : vlidx) {
const auto &lit = lits[litIdx];
total_string_size += lit.s.size();
}
printf("Total compare load: %zu Total string size: %zu\n\n",
total_compares, total_string_size);
}
#endif
@ -281,7 +288,7 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
sizeof(LitInfo) * lits.size() + totalLitSize;
size = ROUNDUP_N(size, alignof(FDRConfirm));
FDRConfirm *fdrc = (FDRConfirm *)aligned_zmalloc(size);
auto fdrc = aligned_zmalloc_unique<FDRConfirm>(size);
assert(fdrc); // otherwise would have thrown std::bad_alloc
fdrc->andmsk = andmsk;
@ -295,7 +302,7 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
fdrc->groups = gm;
// After the FDRConfirm, we have the lit index array.
u8 *fdrc_base = (u8 *)fdrc;
u8 *fdrc_base = (u8 *)fdrc.get();
u8 *ptr = fdrc_base + sizeof(*fdrc);
ptr = ROUNDUP_PTR(ptr, alignof(u32));
u32 *bitsToLitIndex = (u32 *)ptr;
@ -307,14 +314,12 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
// Walk the map by hash value assigning indexes and laying out the
// elements (and their associated string confirm material) in memory.
for (std::map<u32, vector<LiteralIndex> >::const_iterator
i = res2lits.begin(), e = res2lits.end(); i != e; ++i) {
const u32 hash = i->first;
const vector<LiteralIndex> &vlidx = i->second;
bitsToLitIndex[hash] = verify_u32(ptr - (u8 *)fdrc);
for (vector<LiteralIndex>::const_iterator i2 = vlidx.begin(),
e2 = vlidx.end(); i2 != e2; ++i2) {
LiteralIndex litIdx = *i2;
for (const auto &m : res2lits) {
const u32 hash = m.first;
const vector<LiteralIndex> &vlidx = m.second;
bitsToLitIndex[hash] = verify_u32(ptr - fdrc_base);
for (auto i = vlidx.begin(), e = vlidx.end(); i != e; ++i) {
LiteralIndex litIdx = *i;
// Write LitInfo header.
u8 *oldPtr = ptr;
@ -333,7 +338,7 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
}
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
if (i2 + 1 == e2) {
if (next(i) == e) {
finalLI.next = 0x0;
} else {
// our next field represents an adjustment on top of
@ -348,14 +353,13 @@ size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
assert((size_t)(ptr - fdrc_base) <= size);
}
*fdrc_p = fdrc;
// Return actual used size, not worst-case size. Must be rounded up to
// FDRConfirm alignment so that the caller can lay out a sequence of these.
size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
alignof(FDRConfirm));
assert(actual_size <= size);
return actual_size;
return {move(fdrc), actual_size};
}
static
@ -377,12 +381,9 @@ u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
u32 totalConfirmSize = 0;
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
if (!bucketToLits[b].empty()) {
vector<vector<hwlmLiteral> > vl(eng.getConfirmTopLevelSplit());
for (vector<LiteralIndex>::const_iterator
i = bucketToLits[b].begin(),
e = bucketToLits[b].end();
i != e; ++i) {
hwlmLiteral lit = lits[*i]; // copy
vector<vector<hwlmLiteral>> vl(eng.getConfirmTopLevelSplit());
for (const LiteralIndex &lit_idx : bucketToLits[b]) {
hwlmLiteral lit = lits[lit_idx]; // copy
// c is last char of this literal
u8 c = *(lit.s.rbegin());
@ -424,26 +425,27 @@ u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
}
for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) {
if (!vl[c].empty()) {
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
FDRConfirm *fdrc;
size_t size = getFDRConfirm(vl[c], &fdrc,
eng.typicallyHoldsOneCharLits(),
make_small, makeConfirm);
BucketSplitPair p = make_pair(b, c);
bc2Conf[p] = make_pair(fdrc, size);
totalConfirmSize += size;
if (vl[c].empty()) {
continue;
}
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
auto key = make_pair(b, c);
auto fc = getFDRConfirm(vl[c], eng.typicallyHoldsOneCharLits(),
make_small, makeConfirm);
totalConfirmSize += fc.second;
assert(bc2Conf.find(key) == end(bc2Conf));
bc2Conf.emplace(key, move(fc));
}
}
}
return totalConfirmSize;
}
pair<u8 *, size_t> setupFullMultiConfs(const vector<hwlmLiteral> &lits,
const EngineDescription &eng,
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
bool make_small) {
pair<aligned_unique_ptr<u8>, size_t>
setupFullMultiConfs(const vector<hwlmLiteral> &lits,
const EngineDescription &eng,
map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
bool make_small) {
BC2CONF bc2Conf;
u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits,
make_small);
@ -453,26 +455,24 @@ pair<u8 *, size_t> setupFullMultiConfs(const vector<hwlmLiteral> &lits,
u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32);
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
auto buf = aligned_zmalloc_unique<u8>(totalSize);
assert(buf); // otherwise would have thrown std::bad_alloc
u32 *confBase = (u32 *)buf;
u8 *ptr = buf + totalConfSwitchSize;
u32 *confBase = (u32 *)buf.get();
u8 *ptr = buf.get() + totalConfSwitchSize;
for (BC2CONF::const_iterator i = bc2Conf.begin(), e = bc2Conf.end(); i != e;
++i) {
const pair<FDRConfirm *, size_t> &p = i->second;
for (const auto &m : bc2Conf) {
const BucketIndex &b = m.first.first;
const u8 &c = m.first.second;
const pair<aligned_unique_ptr<FDRConfirm>, size_t> &p = m.second;
// confirm offset is relative to the base of this structure, now
u32 confirm_offset = verify_u32(ptr - (u8 *)buf);
memcpy(ptr, p.first, p.second);
u32 confirm_offset = verify_u32(ptr - buf.get());
memcpy(ptr, p.first.get(), p.second);
ptr += p.second;
aligned_free(p.first);
BucketIndex b = i->first.first;
u8 c = i->first.second;
u32 idx = c * nBuckets + b;
confBase[idx] = confirm_offset;
}
return make_pair(buf, totalSize);
return {move(buf), totalSize};
}
} // namespace ue2

View File

@ -105,7 +105,6 @@ struct FDR_Runtime_Args {
size_t start_offset;
HWLMCallback cb;
void *ctxt;
hwlm_group_t *groups;
const u8 *firstFloodDetect;
const u64a histBytes;
};

View File

@ -94,14 +94,13 @@ static
bool setupLongLits(const vector<hwlmLiteral> &lits,
vector<hwlmLiteral> &long_lits, size_t max_len) {
long_lits.reserve(lits.size());
for (vector<hwlmLiteral>::const_iterator it = lits.begin();
it != lits.end(); ++it) {
if (it->s.length() > max_len) {
hwlmLiteral tmp = *it; // copy
tmp.s.erase(tmp.s.size() - 1, 1); // erase last char
for (const auto &lit : lits) {
if (lit.s.length() > max_len) {
hwlmLiteral tmp = lit; // copy
tmp.s.pop_back();
tmp.id = 0; // recalc later
tmp.groups = 0; // filled in later by hash bucket(s)
long_lits.push_back(tmp);
long_lits.push_back(move(tmp));
}
}
@ -112,15 +111,12 @@ bool setupLongLits(const vector<hwlmLiteral> &lits,
// sort long_literals by caseful/caseless and in lexicographical order,
// remove duplicates
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
vector<hwlmLiteral>::iterator new_end =
unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
auto new_end = unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
long_lits.erase(new_end, long_lits.end());
// fill in ids; not currently used
for (vector<hwlmLiteral>::iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
i->id = i - long_lits.begin();
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
i->id = distance(long_lits.begin(), i);
}
return true;
}
@ -143,23 +139,19 @@ void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
hashedPositions[m] = 0;
}
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
if (i->nocase) {
boundaries[CASEFUL] = verify_u32(i - long_lits.begin());
boundaries[CASEFUL] = verify_u32(distance(long_lits.begin(), i));
break;
}
}
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
MODES m = i->nocase ? CASELESS : CASEFUL;
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
for (const auto &lit : long_lits) {
Modes m = lit.nocase ? CASELESS : CASEFUL;
for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
hashedPositions[m]++;
}
positions[m] += i->s.size();
positions[m] += lit.s.size();
}
for (u32 m = CASEFUL; m < MAX_MODES; m++) {
@ -170,7 +162,7 @@ void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
#ifdef DEBUG_COMPILE
printf("analyzeLits:\n");
for (MODES m = CASEFUL; m < MAX_MODES; m++) {
for (Modes m = CASEFUL; m < MAX_MODES; m++) {
printf("mode %s boundary %d positions %d hashedPositions %d "
"hashEntries %d\n",
(m == CASEFUL) ? "caseful" : "caseless", boundaries[m],
@ -181,7 +173,7 @@ void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
}
static
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, MODES m) {
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, Modes m) {
return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m);
}
@ -203,24 +195,21 @@ struct OffsetIDFromEndOrder {
static
void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
FDRSHashEntry *tab, size_t numEntries, MODES m,
FDRSHashEntry *tab, size_t numEntries, Modes mode,
map<u32, u32> &litToOffsetVal) {
const u32 nbits = lg2(numEntries);
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
map<u32, u64a> bucketToBitfield;
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
const hwlmLiteral &l = *i;
if ((m == CASELESS) != i->nocase) {
for (const auto &lit : long_lits) {
if ((mode == CASELESS) != lit.nocase) {
continue;
}
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
u32 h = hashLit(l, j, max_len, m);
for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
u32 h = hashLit(lit, j, max_len, mode);
u32 h_ent = h & ((1U << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
bucketToLitOffPairs[h_ent].push_back(make_pair(i->id, j));
bucketToLitOffPairs[h_ent].emplace_back(lit.id, j);
bucketToBitfield[h_ent] |= (1ULL << h_low);
}
}
@ -231,11 +220,9 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
// sweep out bitfield entries and save the results swapped accordingly
// also, anything with bitfield entries is put in filledBuckets
for (map<u32, u64a>::const_iterator i = bucketToBitfield.begin(),
e = bucketToBitfield.end();
i != e; ++i) {
u32 bucket = i->first;
u64a contents = i->second;
for (const auto &m : bucketToBitfield) {
const u32 &bucket = m.first;
const u64a &contents = m.second;
tab[bucket].bitfield = contents;
filledBuckets.set(bucket);
}
@ -243,12 +230,9 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
// store out all our chains based on free values in our hash table.
// find nearest free locations that are empty (there will always be more
// entries than strings, at present)
for (map<u32, deque<pair<u32, u32> > >::iterator
i = bucketToLitOffPairs.begin(),
e = bucketToLitOffPairs.end();
i != e; ++i) {
u32 bucket = i->first;
deque<pair<u32, u32> > &d = i->second;
for (auto &m : bucketToLitOffPairs) {
u32 bucket = m.first;
deque<pair<u32, u32>> &d = m.second;
// sort d by distance of the residual string (len minus our depth into
// the string). We need to put the 'furthest back' string first...
@ -299,31 +283,30 @@ void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
static
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
size_t rv = 0;
vector<hwlmLiteral>::const_iterator it, ite;
for (it = lits.begin(), ite = lits.end(); it != ite; ++it) {
rv = max(rv, it->msk.size());
for (const auto &lit : lits) {
rv = max(rv, lit.msk.size());
}
return rv;
}
pair<u8 *, size_t>
pair<aligned_unique_ptr<u8>, size_t>
fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
hwlmStreamingControl *stream_control) {
hwlmStreamingControl &stream_control) {
// refuse to compile if we are forced to have smaller than minimum
// history required for long-literal support, full stop
// otherwise, choose the maximum of the preferred history quantity
// (currently a fairly extravagant 32) or the already used history
// quantity - subject to the limitation of stream_control->history_max
// quantity - subject to the limitation of stream_control.history_max
const size_t MIN_HISTORY_REQUIRED = 32;
if (MIN_HISTORY_REQUIRED > stream_control->history_max) {
if (MIN_HISTORY_REQUIRED > stream_control.history_max) {
throw std::logic_error("Cannot set history to minimum history required");
}
size_t max_len =
MIN(stream_control->history_max,
MAX(MIN_HISTORY_REQUIRED, stream_control->history_min));
MIN(stream_control.history_max,
MAX(MIN_HISTORY_REQUIRED, stream_control.history_min));
assert(max_len >= MIN_HISTORY_REQUIRED);
size_t max_mask_len = maxMaskLen(lits);
@ -334,10 +317,10 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
// we want enough history to manage the longest literal and the longest
// mask.
stream_control->literal_history_required =
stream_control.literal_history_required =
max(maxLen(lits), max_mask_len) - 1;
stream_control->literal_stream_state_required = 0;
return make_pair(nullptr, size_t{0});
stream_control.literal_stream_state_required = 0;
return {nullptr, size_t{0}};
}
// Ensure that we have enough room for the longest mask.
@ -381,11 +364,11 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2));
u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8;
u8 * secondaryTable = (u8 *)aligned_zmalloc(tabSize);
auto secondaryTable = aligned_zmalloc_unique<u8>(tabSize);
assert(secondaryTable); // otherwise would have thrown std::bad_alloc
// then fill it in
u8 * ptr = secondaryTable;
u8 * ptr = secondaryTable.get();
FDRSTableHeader * header = (FDRSTableHeader *)ptr;
// fill in header
header->pseudoEngineID = (u32)0xffffffff;
@ -407,11 +390,9 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
ptr += litTabSize;
map<u32, u32> litToOffsetVal;
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
u32 entry = verify_u32(i - long_lits.begin());
u32 offset = verify_u32(ptr - secondaryTable);
u32 offset = verify_u32(ptr - secondaryTable.get());
// point the table entry to the string location
litTabPtr[entry].offset = offset;
@ -425,20 +406,20 @@ fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
}
// fill in final lit table entry with current ptr (serves as end value)
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable);
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable.get());
// fill hash tables
ptr = secondaryTable + htOffset[CASEFUL];
ptr = secondaryTable.get() + htOffset[CASEFUL];
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m],
(MODES)m, litToOffsetVal);
(Modes)m, litToOffsetVal);
ptr += htSize[m];
}
// tell the world what we did
stream_control->literal_history_required = max_len;
stream_control->literal_stream_state_required = tot_state_bytes;
return make_pair(secondaryTable, tabSize);
stream_control.literal_history_required = max_len;
stream_control.literal_stream_state_required = tot_state_bytes;
return {move(secondaryTable), tabSize};
}
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -41,11 +41,11 @@
// hash table (caseful) (FDRSHashEntry)
// hash table (caseless) (FDRSHashEntry)
typedef enum {
enum Modes {
CASEFUL = 0,
CASELESS = 1,
MAX_MODES = 2
} MODES;
};
// We have one of these structures hanging off the 'link' of our secondary
// FDR table that handles streaming strings
@ -91,12 +91,12 @@ struct FDRSHashEntry {
};
static really_inline
u32 get_start_lit_idx(const struct FDRSTableHeader * h, MODES m) {
u32 get_start_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
return m == CASEFUL ? 0 : h->boundary[m-1];
}
static really_inline
u32 get_end_lit_idx(const struct FDRSTableHeader * h, MODES m) {
u32 get_end_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
return h->boundary[m];
}
@ -107,17 +107,17 @@ const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) {
}
static really_inline
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, MODES m) {
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, enum Modes m) {
return getLitTab(h)[get_start_lit_idx(h, m)].offset;
}
static really_inline
u32 packStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
u32 packStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
return v - getBaseOffsetOfLits(h, m) + 1;
}
static really_inline
u32 unpackStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
u32 unpackStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
return v + getBaseOffsetOfLits(h, m) - 1;
}
@ -127,7 +127,7 @@ u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) {
}
static really_inline
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, MODES mode) {
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, enum Modes mode) {
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
assert(len >= 32);

View File

@ -143,7 +143,7 @@ u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) {
// binary search for the literal index that contains the current state
static really_inline
u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable,
u32 stateValue, MODES m) {
u32 stateValue, enum Modes m) {
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 lo = get_start_lit_idx(streamingTable, m);
u32 hi = get_end_lit_idx(streamingTable, m);
@ -175,7 +175,7 @@ void fdrUnpackStateMode(struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
const struct FDRSLiteral * litTab,
const u32 *state_table,
const MODES m) {
const enum Modes m) {
if (!state_table[m]) {
return;
}
@ -213,8 +213,9 @@ void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a,
}
static really_inline
u32 do_single_confirm(const struct FDRSTableHeader * streamingTable,
const struct FDR_Runtime_Args * a, u32 hashState, MODES m) {
u32 do_single_confirm(const struct FDRSTableHeader *streamingTable,
const struct FDR_Runtime_Args *a, u32 hashState,
enum Modes m) {
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 idx = findLitTabEntry(streamingTable, hashState, m);
size_t found_offset = litTab[idx].offset;
@ -279,7 +280,7 @@ void fdrFindStreamingHash(const struct FDR_Runtime_Args *a,
static really_inline
const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
u32 h, const MODES m) {
u32 h, const enum Modes m) {
u32 nbits = streamingTable->hashNBits[m];
if (!nbits) {
return NULL;
@ -303,7 +304,7 @@ const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
static really_inline
void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
const struct FDRSHashEntry *ent, const MODES m) {
const struct FDRSHashEntry *ent, const enum Modes m) {
assert(ent);
assert(streamingTable->hashNBits[m]);

View File

@ -69,7 +69,7 @@ static
void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) {
FDRFlood &fl = tmpFlood[c];
fl.suffix = MAX(fl.suffix, suffix + 1);
DEBUG_PRINTF("Updated Flood Suffix for char '%c' to %u\n", c, fl.suffix);
DEBUG_PRINTF("Updated Flood Suffix for char 0x%02x to %u\n", c, fl.suffix);
}
static
@ -90,8 +90,9 @@ void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
}
}
pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
const EngineDescription &eng) {
pair<aligned_unique_ptr<u8>, size_t>
setupFDRFloodControl(const vector<hwlmLiteral> &lits,
const EngineDescription &eng) {
vector<FDRFlood> tmpFlood(N_CHARS);
u32 default_suffix = eng.getDefaultFloodSuffixLength();
@ -124,8 +125,9 @@ pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
for (u32 i = 0; i < iEnd; i++) {
if (i < litSize) {
if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) {
DEBUG_PRINTF("non-flood char in literal[%u] %c != %c\n",
i, c, lit.s[litSize - i - 1]);
DEBUG_PRINTF("non-flood char in literal[%u]: "
"0x%02x != 0x%02x\n",
i, c, lit.s[litSize - i - 1]);
upSuffix = MIN(upSuffix, i);
loSuffix = MIN(loSuffix, i); // makes sense only for case-less
break;
@ -195,11 +197,12 @@ pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
size_t floodHeaderSize = sizeof(u32) * N_CHARS;
size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
auto buf = aligned_zmalloc_unique<u8>(totalSize);
assert(buf); // otherwise would have thrown std::bad_alloc
u32 *floodHeader = (u32 *)buf;
FDRFlood *layoutFlood = (FDRFlood * )(buf + floodHeaderSize);
u32 *floodHeader = (u32 *)buf.get();
FDRFlood *layoutFlood = (FDRFlood *)(buf.get() + floodHeaderSize);
u32 currentFloodIndex = 0;
for (const auto &m : flood2chars) {
@ -215,7 +218,7 @@ pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
floodHeaderSize, floodStructSize, totalSize);
return make_pair((u8 *)buf, totalSize);
return {move(buf), totalSize};
}
} // namespace ue2

View File

@ -36,7 +36,6 @@
#include "teddy_internal.h"
#include "teddy_runtime_common.h"
#include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@ -80,15 +79,15 @@ const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
do { \
if (unlikely(isnonzero128(var))) { \
u64a lo = movq(var); \
u64a hi = movq(byteShiftRight128(var, 8)); \
u64a hi = movq(rshiftbyte_m128(var, 8)); \
if (unlikely(lo)) { \
conf_fn(&lo, bucket, offset, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
if (unlikely(hi)) { \
conf_fn(&hi, bucket, offset + 8, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
} \
@ -98,27 +97,27 @@ do { \
do { \
if (unlikely(isnonzero128(var))) { \
u32 part1 = movd(var); \
u32 part2 = movd(byteShiftRight128(var, 4)); \
u32 part3 = movd(byteShiftRight128(var, 8)); \
u32 part4 = movd(byteShiftRight128(var, 12)); \
u32 part2 = movd(rshiftbyte_m128(var, 4)); \
u32 part3 = movd(rshiftbyte_m128(var, 8)); \
u32 part4 = movd(rshiftbyte_m128(var, 12)); \
if (unlikely(part1)) { \
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
if (unlikely(part2)) { \
conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
if (unlikely(part3)) { \
conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
if (unlikely(part4)) { \
conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
} \
@ -126,36 +125,34 @@ do { \
#endif
static really_inline
m128 prep_conf_teddy_m1(const m128 *maskBase, m128 p_mask, m128 val) {
m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) {
m128 mask = set16x8(0xf);
m128 lo = and128(val, mask);
m128 hi = and128(rshift2x64(val, 4), mask);
return and128(and128(pshufb(maskBase[0*2], lo),
pshufb(maskBase[0*2+1], hi)), p_mask);
m128 hi = and128(rshift64_m128(val, 4), mask);
return and128(pshufb(maskBase[0*2], lo), pshufb(maskBase[0*2+1], hi));
}
static really_inline
m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 p_mask,
m128 val) {
m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) {
m128 mask = set16x8(0xf);
m128 lo = and128(val, mask);
m128 hi = and128(rshift2x64(val, 4), mask);
m128 r = prep_conf_teddy_m1(maskBase, p_mask, val);
m128 hi = and128(rshift64_m128(val, 4), mask);
m128 r = prep_conf_teddy_m1(maskBase, val);
m128 res_1 = and128(pshufb(maskBase[1*2], lo),
pshufb(maskBase[1*2+1], hi));
m128 res_shifted_1 = palignr(res_1, *old_1, 16-1);
*old_1 = res_1;
return and128(and128(r, p_mask), res_shifted_1);
return and128(r, res_shifted_1);
}
static really_inline
m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
m128 p_mask, m128 val) {
m128 val) {
m128 mask = set16x8(0xf);
m128 lo = and128(val, mask);
m128 hi = and128(rshift2x64(val, 4), mask);
m128 r = prep_conf_teddy_m2(maskBase, old_1, p_mask, val);
m128 hi = and128(rshift64_m128(val, 4), mask);
m128 r = prep_conf_teddy_m2(maskBase, old_1, val);
m128 res_2 = and128(pshufb(maskBase[2*2], lo),
pshufb(maskBase[2*2+1], hi));
@ -166,11 +163,11 @@ m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
static really_inline
m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
m128 *old_3, m128 p_mask, m128 val) {
m128 *old_3, m128 val) {
m128 mask = set16x8(0xf);
m128 lo = and128(val, mask);
m128 hi = and128(rshift2x64(val, 4), mask);
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, p_mask, val);
m128 hi = and128(rshift64_m128(val, 4), mask);
m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val);
m128 res_3 = and128(pshufb(maskBase[3*2], lo),
pshufb(maskBase[3*2+1], hi));
@ -180,11 +177,10 @@ m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
}
hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -203,13 +199,14 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1);
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
ptr += 16;
}
@ -217,9 +214,9 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit1_teddy);
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16));
m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit1_teddy);
}
@ -227,19 +224,19 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1);
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -258,13 +255,14 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1);
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
@ -272,9 +270,9 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr));
m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16));
m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
}
@ -282,19 +280,19 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1);
m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0);
m128 r_0 = prep_conf_teddy_m1(maskBase, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -314,14 +312,14 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2);
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
load128(ptr));
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16;
}
@ -329,11 +327,9 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
load128(ptr));
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
load128(ptr + 16));
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
}
@ -341,19 +337,19 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2);
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -373,14 +369,14 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2);
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
load128(ptr));
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
@ -388,11 +384,9 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
load128(ptr));
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(),
load128(ptr + 16));
m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
}
@ -400,19 +394,19 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2);
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -434,14 +428,15 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3);
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0);
val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones128(), load128(ptr));
load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16;
}
@ -450,10 +445,10 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones128(), load128(ptr));
load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones128(), load128(ptr + 16));
load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
}
@ -461,20 +456,19 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3);
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0);
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -496,14 +490,15 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3);
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0);
val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones128(), load128(ptr));
load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
@ -512,10 +507,10 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones128(), load128(ptr));
load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones128(), load128(ptr + 16));
load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
}
@ -523,20 +518,19 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
m128 p_mask;
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3);
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0);
m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -559,14 +553,15 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4);
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0);
&res_old_3, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones128(), load128(ptr));
&res_old_3, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16;
}
@ -575,10 +570,10 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones128(), load128(ptr));
&res_old_3, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones128(), load128(ptr + 16));
&res_old_3, load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
}
@ -587,19 +582,19 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4);
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0);
&res_old_3, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -622,14 +617,15 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4);
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0);
&res_old_3, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones128(), load128(ptr));
&res_old_3, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
@ -638,10 +634,10 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones128(), load128(ptr));
&res_old_3, load128(ptr));
CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones128(), load128(ptr + 16));
&res_old_3, load128(ptr + 16));
CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
}
@ -650,9 +646,10 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4);
m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0);
&res_old_3, val_0);
r_0 = and128(r_0, p_mask);
CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}

View File

@ -33,64 +33,85 @@
#ifndef TEDDY_H_
#define TEDDY_H_
#include "hwlm/hwlm.h" // for hwlm_group_t
struct FDR; // forward declaration from fdr_internal.h
struct FDR_Runtime_Args;
hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
#if defined(__AVX2__)
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
const struct FDR_Runtime_Args *a);
hwlm_error_t
fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
const struct FDR_Runtime_Args *a,
hwlm_group_t control);
#endif /* __AVX2__ */

View File

@ -36,7 +36,6 @@
#include "teddy_internal.h"
#include "teddy_runtime_common.h"
#include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
#if defined(__AVX2__)
@ -122,22 +121,22 @@ do { \
u64a part4 = extract64from256(r, 1); \
if (unlikely(part1)) { \
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
if (unlikely(part2)) { \
conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
if (unlikely(part3)) { \
conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
if (unlikely(part4)) { \
conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
} \
@ -159,41 +158,41 @@ do { \
u32 part8 = extract32from256(r, 3); \
if (unlikely(part1)) { \
conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
if (unlikely(part2)) { \
conf_fn(&part2, bucket, offset + 2, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
} \
if (unlikely(part3)) { \
conf_fn(&part3, bucket, offset + 4, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
if (unlikely(part4)) { \
conf_fn(&part4, bucket, offset + 6, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
if (unlikely(part5)) { \
conf_fn(&part5, bucket, offset + 8, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
if (unlikely(part6)) { \
conf_fn(&part6, bucket, offset + 10, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
if (unlikely(part7)) { \
conf_fn(&part7, bucket, offset + 12, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
if (unlikely(part8)) { \
conf_fn(&part8, bucket, offset + 14, confBase, reason, a, ptr, \
control, &last_match); \
&control, &last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
} \
@ -205,11 +204,11 @@ do { \
if (unlikely(isnonzero256(var))) { \
u32 arrCnt = 0; \
m128 lo = cast256to128(var); \
m128 hi = cast256to128(swap128in256(var)); \
m128 hi = movdq_hi(var); \
bit_array_fast_teddy(lo, bitArr, &arrCnt, offset); \
bit_array_fast_teddy(hi, bitArr, &arrCnt, offset + 2); \
for (u32 i = 0; i < arrCnt; i++) { \
conf_fn(bitArr[i], confBase, reason, a, ptr, control, \
conf_fn(bitArr[i], confBase, reason, a, ptr, &control, \
&last_match); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
@ -372,7 +371,7 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
64 * (offset);
*arrCnt += 1;
}
u64a part_1 = movq(byteShiftRight128(var, 8));
u64a part_1 = movq(rshiftbyte_m128(var, 8));
while (unlikely(part_1)) {
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
64 * (offset + 1);
@ -385,19 +384,19 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
32 * (offset * 2);
*arrCnt += 1;
}
u32 part_1 = movd(byteShiftRight128(var, 4));
u32 part_1 = movd(rshiftbyte_m128(var, 4));
while (unlikely(part_1)) {
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) +
32 * (offset * 2 + 1);
*arrCnt += 1;
}
u32 part_2 = movd(byteShiftRight128(var, 8));
u32 part_2 = movd(rshiftbyte_m128(var, 8));
while (unlikely(part_2)) {
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_2) +
32 * (offset * 2 + 2);
*arrCnt += 1;
}
u32 part_3 = movd(byteShiftRight128(var, 12));
u32 part_3 = movd(rshiftbyte_m128(var, 12));
while (unlikely(part_3)) {
bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_3) +
32 * (offset * 2 + 3);
@ -408,36 +407,35 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) {
}
static really_inline
m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 p_mask, m256 val) {
m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 val) {
m256 mask = set32x8(0xf);
m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask);
return and256(and256(vpshufb(maskBase[0*2], lo),
vpshufb(maskBase[0*2+1], hi)), p_mask);
m256 hi = and256(rshift64_m256(val, 4), mask);
return and256(vpshufb(maskBase[0*2], lo),
vpshufb(maskBase[0*2+1], hi));
}
static really_inline
m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 p_mask,
m256 val) {
m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 val) {
m256 mask = set32x8(0xf);
m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask);
m256 r = prep_conf_fat_teddy_m1(maskBase, p_mask, val);
m256 hi = and256(rshift64_m256(val, 4), mask);
m256 r = prep_conf_fat_teddy_m1(maskBase, val);
m256 res_1 = and256(vpshufb(maskBase[1*2], lo),
vpshufb(maskBase[1*2+1], hi));
m256 res_shifted_1 = vpalignr(res_1, *old_1, 16-1);
*old_1 = res_1;
return and256(and256(r, p_mask), res_shifted_1);
return and256(r, res_shifted_1);
}
static really_inline
m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2,
m256 p_mask, m256 val) {
m256 val) {
m256 mask = set32x8(0xf);
m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask);
m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, p_mask, val);
m256 hi = and256(rshift64_m256(val, 4), mask);
m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, val);
m256 res_2 = and256(vpshufb(maskBase[2*2], lo),
vpshufb(maskBase[2*2+1], hi));
@ -448,11 +446,11 @@ m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2,
static really_inline
m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2,
m256 *old_3, m256 p_mask, m256 val) {
m256 *old_3, m256 val) {
m256 mask = set32x8(0xf);
m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask);
m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, p_mask, val);
m256 hi = and256(rshift64_m256(val, 4), mask);
m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, val);
m256 res_3 = and256(vpshufb(maskBase[3*2], lo),
vpshufb(maskBase[3*2+1], hi));
@ -462,12 +460,10 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2,
}
static really_inline
m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi,
m256 p_mask) {
m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi) {
m256 lo = and256(val, mask);
m256 hi = and256(rshift4x64(val, 4), mask);
m256 res = and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi));
return and256(res, p_mask);
m256 hi = and256(rshift64_m256(val, 4), mask);
return and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi));
}
static really_inline
@ -482,11 +478,10 @@ const u32 * getConfBase_avx2(const struct Teddy *teddy, u8 numMask) {
}
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -505,13 +500,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1);
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0);
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr));
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
ptr += 16;
}
@ -519,10 +515,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr));
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit1_teddy);
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, ones256(),
load2x128(ptr + 16));
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit1_teddy);
}
@ -530,19 +525,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1);
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0);
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -561,13 +556,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1);
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0);
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr));
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
@ -575,10 +571,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr));
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, ones256(),
load2x128(ptr + 16));
m256 r_1 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
}
@ -586,19 +581,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 1);
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0);
m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -618,14 +613,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2);
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
load2x128(ptr));
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16;
}
@ -633,10 +628,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
load2x128(ptr));
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1,
load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
}
@ -645,19 +639,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2);
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -677,25 +671,24 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2);
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
load2x128(ptr));
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
ptr += 16;
}
for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) {
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
load2x128(ptr));
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(),
m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1,
load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
}
@ -704,19 +697,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
m256 p_mask;
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 2);
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0);
m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -738,14 +731,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3);
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0);
val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones256(), load2x128(ptr));
load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16;
}
@ -754,10 +748,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones256(), load2x128(ptr));
load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones256(), load2x128(ptr + 16));
load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
}
@ -766,19 +760,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3);
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0);
val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -800,14 +794,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3);
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0);
val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones256(), load2x128(ptr));
load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
@ -816,10 +811,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones256(), load2x128(ptr));
load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
ones256(), load2x128(ptr + 16));
load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
}
@ -828,19 +823,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 3);
m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2,
p_mask, val_0);
val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -863,15 +858,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4);
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0);
&res_old_3, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones256(),
load2x128(ptr));
&res_old_3, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
ptr += 16;
}
@ -880,12 +875,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones256(),
load2x128(ptr));
&res_old_3, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy);
m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones256(),
load2x128(ptr + 16));
&res_old_3, load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy);
}
@ -894,19 +887,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4);
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0);
&res_old_3, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -929,15 +922,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4);
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0);
&res_old_3, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
if (ptr + 16 < buf_end) {
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones256(),
load2x128(ptr));
&res_old_3, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
ptr += 16;
}
@ -946,12 +939,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
__builtin_prefetch(ptr + (iterBytes*4));
CHECK_FLOOD;
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones256(),
load2x128(ptr));
&res_old_3, load2x128(ptr));
CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy);
m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, ones256(),
load2x128(ptr + 16));
&res_old_3, load2x128(ptr + 16));
CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy);
}
@ -960,19 +951,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end,
a->buf_history, a->len_history, 4);
m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2,
&res_old_3, p_mask, val_0);
&res_old_3, val_0);
r_0 = and256(r_0, p_mask);
CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -996,16 +987,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
m256 p_mask;
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
buf_end, a->buf_history, a->len_history);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
p_mask);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
res_0 = and256(res_0, p_mask);
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
ptr += 32;
}
if (ptr + 32 < buf_end) {
m256 val_0 = load256(ptr + 0);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
ones256());
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
ptr += 32;
}
@ -1015,13 +1005,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
CHECK_FLOOD;
m256 val_0 = load256(ptr + 0);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
ones256());
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit1_fast_teddy);
m256 val_1 = load256(ptr + 32);
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi,
ones256());
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi);
CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit1_fast_teddy);
}
@ -1029,20 +1017,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr,
m256 p_mask;
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
buf_end, a->buf_history, a->len_history);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
p_mask);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
res_0 = and256(res_0, p_mask);
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
const struct FDR_Runtime_Args *a) {
const struct FDR_Runtime_Args *a,
hwlm_group_t control) {
const u8 *buf_end = a->buf + a->len;
const u8 *ptr = a->buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t *control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 *tryFloodDetect = a->firstFloodDetect;
u32 last_match = (u32)-1;
@ -1066,16 +1053,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
m256 p_mask;
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
buf_end, a->buf_history, a->len_history);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
p_mask);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
res_0 = and256(res_0, p_mask);
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
ptr += 32;
}
if (ptr + 32 < buf_end) {
m256 val_0 = load256(ptr + 0);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
ones256());
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
ptr += 32;
}
@ -1085,13 +1071,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
CHECK_FLOOD;
m256 val_0 = load256(ptr + 0);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
ones256());
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit_fast_teddy);
m256 val_1 = load256(ptr + 32);
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi,
ones256());
m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi);
CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit_fast_teddy);
}
@ -1099,11 +1083,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr,
m256 p_mask;
m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset,
buf_end, a->buf_history, a->len_history);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi,
p_mask);
m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi);
res_0 = and256(res_0, p_mask);
CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy);
}
*a->groups = controlVal;
return HWLM_SUCCESS;
}

View File

@ -74,12 +74,11 @@ public:
const TeddyEngineDescription &eng_in, bool make_small_in)
: eng(eng_in), lits(lits_in), make_small(make_small_in) {}
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
aligned_unique_ptr<FDR> build(pair<aligned_unique_ptr<u8>, size_t> &link);
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
};
class TeddySet {
const vector<hwlmLiteral> &lits;
u32 len;
// nibbleSets is a series of bitfields over 16 predicates
// that represent the whether shufti nibble set
@ -89,8 +88,7 @@ class TeddySet {
vector<u16> nibbleSets;
set<u32> litIds;
public:
TeddySet(const vector<hwlmLiteral> &lits_in, u32 len_in)
: lits(lits_in), len(len_in), nibbleSets(len_in * 2, 0) {}
explicit TeddySet(u32 len_in) : len(len_in), nibbleSets(len_in * 2, 0) {}
const set<u32> & getLits() const { return litIds; }
size_t litCount() const { return litIds.size(); }
@ -106,8 +104,8 @@ public:
}
printf("\nnlits: %zu\nLit ids: ", litCount());
printf("Prob: %llu\n", probability());
for (set<u32>::iterator i = litIds.begin(), e = litIds.end(); i != e; ++i) {
printf("%u ", *i);
for (const auto &id : litIds) {
printf("%u ", id);
}
printf("\n");
printf("Flood prone : %s\n", isRunProne()?"yes":"no");
@ -118,15 +116,15 @@ public:
return nibbleSets == ts.nibbleSets;
}
void addLiteral(u32 lit_id) {
const string &s = lits[lit_id].s;
void addLiteral(u32 lit_id, const hwlmLiteral &lit) {
const string &s = lit.s;
for (u32 i = 0; i < len; i++) {
if (i < s.size()) {
u8 c = s[s.size() - i - 1];
u8 c_hi = (c >> 4) & 0xf;
u8 c_lo = c & 0xf;
nibbleSets[i*2] = 1 << c_lo;
if (lits[lit_id].nocase && ourisalpha(c)) {
if (lit.nocase && ourisalpha(c)) {
nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2));
} else {
nibbleSets[i*2+1] = 1 << c_hi;
@ -185,28 +183,26 @@ bool TeddyCompiler::pack(map<BucketIndex,
set<TeddySet> sts;
for (u32 i = 0; i < lits.size(); i++) {
TeddySet ts(lits, eng.numMasks);
ts.addLiteral(i);
TeddySet ts(eng.numMasks);
ts.addLiteral(i, lits[i]);
sts.insert(ts);
}
while (1) {
#ifdef TEDDY_DEBUG
printf("Size %zu\n", sts.size());
for (set<TeddySet>::const_iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
printf("\n"); i1->dump();
for (const TeddySet &ts : sts) {
printf("\n"); ts.dump();
}
printf("\n===============================================\n");
#endif
set<TeddySet>::iterator m1 = sts.end(), m2 = sts.end();
auto m1 = sts.end(), m2 = sts.end();
u64a best = 0xffffffffffffffffULL;
for (set<TeddySet>::iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
set<TeddySet>::iterator i2 = i1;
++i2;
for (auto i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
const TeddySet &s1 = *i1;
for (set<TeddySet>::iterator e2 = sts.end(); i2 != e2; ++i2) {
for (auto i2 = next(i1), e2 = sts.end(); i2 != e2; ++i2) {
const TeddySet &s2 = *i2;
// be more conservative if we don't absolutely need to
@ -216,7 +212,7 @@ bool TeddyCompiler::pack(map<BucketIndex,
continue;
}
TeddySet tmpSet(lits, eng.numMasks);
TeddySet tmpSet(eng.numMasks);
tmpSet.merge(s1);
tmpSet.merge(s2);
u64a newScore = tmpSet.heuristic();
@ -246,7 +242,7 @@ bool TeddyCompiler::pack(map<BucketIndex,
}
// do the merge
TeddySet nts(lits, eng.numMasks);
TeddySet nts(eng.numMasks);
nts.merge(*m1);
nts.merge(*m2);
#ifdef TEDDY_DEBUG
@ -263,25 +259,23 @@ bool TeddyCompiler::pack(map<BucketIndex,
sts.erase(m2);
sts.insert(nts);
}
u32 cnt = 0;
if (sts.size() > eng.getNumBuckets()) {
return false;
}
for (set<TeddySet>::const_iterator i = sts.begin(), e = sts.end(); i != e;
++i) {
for (set<u32>::const_iterator i2 = i->getLits().begin(),
e2 = i->getLits().end();
i2 != e2; ++i2) {
bucketToLits[cnt].push_back(*i2);
}
cnt++;
u32 bucket_id = 0;
for (const TeddySet &ts : sts) {
const auto &ts_lits = ts.getLits();
auto &bucket_lits = bucketToLits[bucket_id];
bucket_lits.insert(end(bucket_lits), begin(ts_lits), end(ts_lits));
bucket_id++;
}
return true;
}
aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
aligned_unique_ptr<FDR>
TeddyCompiler::build(pair<aligned_unique_ptr<u8>, size_t> &link) {
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
return nullptr;
@ -314,9 +308,8 @@ aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
pair<u8 *, size_t> confirmTmp
= setupFullMultiConfs(lits, eng, bucketToLits, make_small);
auto floodControlTmp = setupFDRFloodControl(lits, eng);
auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small);
size_t size = ROUNDUP_N(sizeof(Teddy) +
maskLen +
@ -334,38 +327,29 @@ aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
teddy->maxStringLen = verify_u32(maxLen(lits));
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
memcpy(ptr, confirmTmp.first, confirmTmp.second);
memcpy(ptr, confirmTmp.first.get(), confirmTmp.second);
ptr += confirmTmp.second;
aligned_free(confirmTmp.first);
teddy->floodOffset = verify_u32(ptr - teddy_base);
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second);
ptr += floodControlTmp.second;
aligned_free(floodControlTmp.first);
if (link.first) {
teddy->link = verify_u32(ptr - teddy_base);
memcpy(ptr, link.first, link.second);
aligned_free(link.first);
memcpy(ptr, link.first.get(), link.second);
} else {
teddy->link = 0;
}
u8 *baseMsk = teddy_base + sizeof(Teddy);
for (map<BucketIndex, std::vector<LiteralIndex> >::const_iterator
i = bucketToLits.begin(),
e = bucketToLits.end();
i != e; ++i) {
const u32 bucket_id = i->first;
const vector<LiteralIndex> &ids = i->second;
for (const auto &b2l : bucketToLits) {
const u32 &bucket_id = b2l.first;
const vector<LiteralIndex> &ids = b2l.second;
const u8 bmsk = 1U << (bucket_id % 8);
for (vector<LiteralIndex>::const_iterator i2 = ids.begin(),
e2 = ids.end();
i2 != e2; ++i2) {
LiteralIndex lit_id = *i2;
const hwlmLiteral & l = lits[lit_id];
for (const LiteralIndex &lit_id : ids) {
const hwlmLiteral &l = lits[lit_id];
DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
const u32 sz = verify_u32(l.s.size());
@ -439,10 +423,10 @@ aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
} // namespace
aligned_unique_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
bool make_small, u32 hint,
const target_t &target,
pair<u8 *, size_t> link) {
aligned_unique_ptr<FDR>
teddyBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small,
u32 hint, const target_t &target,
pair<aligned_unique_ptr<u8>, size_t> &link) {
unique_ptr<TeddyEngineDescription> des;
if (hint == HINT_INVALID) {
des = chooseTeddyEngine(target, lits);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -49,7 +49,7 @@ struct hwlmLiteral;
ue2::aligned_unique_ptr<FDR>
teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
u32 hint, const target_t &target,
std::pair<u8 *, size_t> link);
std::pair<aligned_unique_ptr<u8>, size_t> &link);
} // namespace ue2

View File

@ -51,8 +51,7 @@ extern const u8 ALIGN_DIRECTIVE p_mask_arr[17][32];
#define CHECK_HWLM_TERMINATE_MATCHING \
do { \
if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \
*a->groups = controlVal; \
if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \
return HWLM_TERMINATED; \
} \
} while (0);
@ -61,8 +60,7 @@ do { \
do { \
if (unlikely(ptr > tryFloodDetect)) { \
tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, \
&floodBackoff, &controlVal, \
iterBytes); \
&floodBackoff, &control, iterBytes); \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
} while (0);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -34,7 +34,7 @@
#include <string>
#include <vector>
#define DEFAULT_MAX_HISTORY 60
#define DEFAULT_MAX_HISTORY 110
using namespace std;
@ -50,8 +50,11 @@ Grey::Grey(void) :
allowLitHaig(true),
allowLbr(true),
allowMcClellan(true),
allowSheng(true),
allowPuff(true),
allowLiteral(true),
allowRose(true),
allowViolet(true),
allowExtendedNFA(true), /* bounded repeats of course */
allowLimExNFA(true),
allowAnchoredAcyclic(true),
@ -60,6 +63,13 @@ Grey::Grey(void) :
allowDecoratedLiteral(true),
allowNoodle(true),
fdrAllowTeddy(true),
violetAvoidSuffixes(true),
violetAvoidWeakInfixes(true),
violetDoubleCut(true),
violetExtractStrongLiterals(true),
violetLiteralChains(true),
violetDoubleCutLiteralLen(3),
violetEarlyCleanLiteralLen(6),
puffImproveHead(true),
castleExclusive(true),
mergeSEP(true), /* short exhaustible passthroughs */
@ -81,7 +91,6 @@ Grey::Grey(void) :
allowZombies(true),
floodAsPuffette(false),
nfaForceSize(0),
nfaForceShifts(0),
maxHistoryAvailable(DEFAULT_MAX_HISTORY),
minHistoryAvailable(0), /* debugging only */
maxAnchoredRegion(63), /* for rose's atable to run over */
@ -119,6 +128,7 @@ Grey::Grey(void) :
equivalenceEnable(true),
allowSmallWrite(true), // McClellan dfas for small patterns
allowSmallWriteSheng(false), // allow use of Sheng for SMWR
smallWriteLargestBuffer(70), // largest buffer that can be
// considered a small write
@ -126,6 +136,10 @@ Grey::Grey(void) :
// are given to rose &co
smallWriteLargestBufferBad(35),
limitSmallWriteOutfixSize(1048576), // 1 MB
smallWriteMaxPatterns(10000),
smallWriteMaxLiterals(10000),
allowTamarama(true), // Tamarama engine
tamaChunkSize(100),
dumpFlags(0),
limitPatternCount(8000000), // 8M patterns
limitPatternLength(16000), // 16K bytes
@ -202,8 +216,11 @@ void applyGreyOverrides(Grey *g, const string &s) {
G_UPDATE(allowLitHaig);
G_UPDATE(allowLbr);
G_UPDATE(allowMcClellan);
G_UPDATE(allowSheng);
G_UPDATE(allowPuff);
G_UPDATE(allowLiteral);
G_UPDATE(allowRose);
G_UPDATE(allowViolet);
G_UPDATE(allowExtendedNFA);
G_UPDATE(allowLimExNFA);
G_UPDATE(allowAnchoredAcyclic);
@ -212,6 +229,13 @@ void applyGreyOverrides(Grey *g, const string &s) {
G_UPDATE(allowDecoratedLiteral);
G_UPDATE(allowNoodle);
G_UPDATE(fdrAllowTeddy);
G_UPDATE(violetAvoidSuffixes);
G_UPDATE(violetAvoidWeakInfixes);
G_UPDATE(violetDoubleCut);
G_UPDATE(violetExtractStrongLiterals);
G_UPDATE(violetLiteralChains);
G_UPDATE(violetDoubleCutLiteralLen);
G_UPDATE(violetEarlyCleanLiteralLen);
G_UPDATE(puffImproveHead);
G_UPDATE(castleExclusive);
G_UPDATE(mergeSEP);
@ -232,7 +256,6 @@ void applyGreyOverrides(Grey *g, const string &s) {
G_UPDATE(allowZombies);
G_UPDATE(floodAsPuffette);
G_UPDATE(nfaForceSize);
G_UPDATE(nfaForceShifts);
G_UPDATE(highlanderSquash);
G_UPDATE(maxHistoryAvailable);
G_UPDATE(minHistoryAvailable);
@ -270,9 +293,14 @@ void applyGreyOverrides(Grey *g, const string &s) {
G_UPDATE(miracleHistoryBonus);
G_UPDATE(equivalenceEnable);
G_UPDATE(allowSmallWrite);
G_UPDATE(allowSmallWriteSheng);
G_UPDATE(smallWriteLargestBuffer);
G_UPDATE(smallWriteLargestBufferBad);
G_UPDATE(limitSmallWriteOutfixSize);
G_UPDATE(smallWriteMaxPatterns);
G_UPDATE(smallWriteMaxLiterals);
G_UPDATE(allowTamarama);
G_UPDATE(tamaChunkSize);
G_UPDATE(limitPatternCount);
G_UPDATE(limitPatternLength);
G_UPDATE(limitGraphVertices);
@ -309,7 +337,9 @@ void applyGreyOverrides(Grey *g, const string &s) {
g->allowLitHaig = false;
g->allowMcClellan = false;
g->allowPuff = false;
g->allowLiteral = false;
g->allowRose = false;
g->allowViolet = false;
g->allowSmallLiteralSet = false;
g->roseMasks = false;
done = true;
@ -325,7 +355,9 @@ void applyGreyOverrides(Grey *g, const string &s) {
g->allowLitHaig = false;
g->allowMcClellan = true;
g->allowPuff = false;
g->allowLiteral = false;
g->allowRose = false;
g->allowViolet = false;
g->allowSmallLiteralSet = false;
g->roseMasks = false;
done = true;
@ -341,7 +373,9 @@ void applyGreyOverrides(Grey *g, const string &s) {
g->allowLitHaig = false;
g->allowMcClellan = true;
g->allowPuff = false;
g->allowLiteral = false;
g->allowRose = false;
g->allowViolet = false;
g->allowSmallLiteralSet = false;
g->roseMasks = false;
done = true;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -50,8 +50,11 @@ struct Grey {
bool allowLitHaig;
bool allowLbr;
bool allowMcClellan;
bool allowSheng;
bool allowPuff;
bool allowLiteral;
bool allowRose;
bool allowViolet;
bool allowExtendedNFA;
bool allowLimExNFA;
bool allowAnchoredAcyclic;
@ -62,6 +65,14 @@ struct Grey {
bool allowNoodle;
bool fdrAllowTeddy;
u32 violetAvoidSuffixes; /* 0=never, 1=sometimes, 2=always */
bool violetAvoidWeakInfixes;
bool violetDoubleCut;
bool violetExtractStrongLiterals;
bool violetLiteralChains;
u32 violetDoubleCutLiteralLen;
u32 violetEarlyCleanLiteralLen;
bool puffImproveHead;
bool castleExclusive; // enable castle mutual exclusion analysis
@ -88,7 +99,6 @@ struct Grey {
bool floodAsPuffette;
u32 nfaForceSize;
u32 nfaForceShifts;
u32 maxHistoryAvailable;
u32 minHistoryAvailable;
@ -140,9 +150,16 @@ struct Grey {
// SmallWrite engine
bool allowSmallWrite;
bool allowSmallWriteSheng;
u32 smallWriteLargestBuffer; // largest buffer that can be small write
u32 smallWriteLargestBufferBad;// largest buffer that can be small write
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
u32 smallWriteMaxPatterns; // only try small writes if fewer patterns
u32 smallWriteMaxLiterals; // only try small writes if fewer literals
// Tamarama engine
bool allowTamarama;
u32 tamaChunkSize; //!< max chunk size for exclusivity analysis in Tamarama
enum DumpFlags {
DUMP_NONE = 0,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -219,7 +219,7 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
: get_current_target();
CompileContext cc(isStreaming, isVectored, target_info, g);
NG ng(cc, somPrecision);
NG ng(cc, elements, somPrecision);
try {
for (unsigned int i = 0; i < elements; i++) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -98,6 +98,12 @@ extern "C"
* The library was unable to allocate temporary storage used during
* compilation time.
*
* - *Allocator returned misaligned memory*
*
* The memory allocator (either malloc() or the allocator set with @ref
* hs_set_allocator()) did not correctly return memory suitably aligned
* for the largest representable data type on this platform.
*
* - *Internal error*
*
* An unexpected error occurred: if this error is reported, please contact

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -37,6 +37,7 @@
#include "fdr/fdr.h"
#include "nfa/accel.h"
#include "nfa/shufti.h"
#include "nfa/truffle.h"
#include "nfa/vermicelli.h"
#include <string.h>
@ -64,8 +65,13 @@ const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr,
case ACCEL_SHUFTI:
DEBUG_PRINTF("single shufti\n");
return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
case ACCEL_TRUFFLE:
DEBUG_PRINTF("truffle\n");
return truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end);
default:
/* no acceleration, fall through and return current ptr */
DEBUG_PRINTF("no accel; %u\n", (int)aux->accel_type);
assert(aux->accel_type == ACCEL_NONE);
return ptr;
}
}

View File

@ -35,9 +35,11 @@
#include "hwlm_internal.h"
#include "noodle_engine.h"
#include "noodle_build.h"
#include "scratch.h"
#include "ue2common.h"
#include "fdr/fdr_compile.h"
#include "nfa/shufticompile.h"
#include "nfa/trufflecompile.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/charreach.h"
@ -62,6 +64,28 @@ namespace ue2 {
static const unsigned int MAX_ACCEL_OFFSET = 16;
static const unsigned int MAX_SHUFTI_WIDTH = 240;
static
size_t mask_overhang(const hwlmLiteral &lit) {
size_t msk_true_size = lit.msk.size();
assert(msk_true_size <= HWLM_MASKLEN);
assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET);
for (u8 c : lit.msk) {
if (!c) {
msk_true_size--;
} else {
break;
}
}
if (lit.s.length() >= msk_true_size) {
return 0;
}
/* only short literals should be able to have a mask which overhangs */
assert(lit.s.length() < MAX_ACCEL_OFFSET);
return msk_true_size - lit.s.length();
}
static
bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
const hwlmLiteral &first = *lits.front();
@ -167,7 +191,8 @@ bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
}
if (found) {
curr.max_offset = MAX(curr.max_offset, j);
assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
break;
}
}
@ -288,8 +313,8 @@ bool findSVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
}
if (found) {
curr.max_offset = MAX(curr.max_offset, j);
break;
assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
}
}
}
@ -346,6 +371,25 @@ void filterLits(const vector<hwlmLiteral> &lits, hwlm_group_t expected_groups,
}
}
static
bool litGuardedByCharReach(const CharReach &cr, const hwlmLiteral &lit,
u32 max_offset) {
for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) {
unsigned char c = lit.s[i];
if (lit.nocase) {
if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) {
return true;
}
} else {
if (cr.test(c)) {
return true;
}
}
}
return false;
}
static
void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
hwlm_group_t expected_groups, AccelAux *aux) {
@ -363,29 +407,45 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
return;
}
/* look for shufti/truffle */
vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
for (const auto &lit : lits) {
if (!(lit.groups & expected_groups)) {
continue;
}
for (u32 i = 0; i < MAX_ACCEL_OFFSET && i < lit.s.length(); i++) {
unsigned char c = lit.s[i];
u32 overhang = mask_overhang(lit);
for (u32 i = 0; i < overhang; i++) {
/* this offset overhangs the start of the real literal; look at the
* msk/cmp */
for (u32 j = 0; j < N_CHARS; j++) {
if ((j & lit.msk[i]) == lit.cmp[i]) {
reach[i].set(j);
}
}
}
for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) {
CharReach &reach_i = reach[i];
u32 i_effective = i - overhang;
if (litGuardedByCharReach(reach_i, lit, i_effective)) {
continue;
}
unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective]
: lit.s.back();
if (lit.nocase) {
DEBUG_PRINTF("adding %02hhx to %u\n", mytoupper(c), i);
DEBUG_PRINTF("adding %02hhx to %u\n", mytolower(c), i);
reach[i].set(mytoupper(c));
reach[i].set(mytolower(c));
reach_i.set(mytoupper(c));
reach_i.set(mytolower(c));
} else {
DEBUG_PRINTF("adding %02hhx to %u\n", c, i);
reach[i].set(c);
reach_i.set(c);
}
}
}
u32 min_count = ~0U;
u32 min_offset = ~0U;
for (u32 i = 0; i < min_len; i++) {
for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) {
size_t count = reach[i].count();
DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
describeClass(reach[i]).c_str(), count);
@ -394,10 +454,9 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
min_offset = i;
}
}
assert(min_offset <= min_len);
if (min_count > MAX_SHUFTI_WIDTH) {
DEBUG_PRINTF("min shufti with %u chars is too wide\n", min_count);
DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count);
return;
}
@ -410,7 +469,11 @@ void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
return;
}
DEBUG_PRINTF("fail\n");
truffleBuildMasks(cr, &aux->truffle.mask1, &aux->truffle.mask2);
DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n",
describeClass(cr).c_str(), cr.count(), min_offset);
aux->truffle.accel_type = ACCEL_TRUFFLE;
aux->truffle.offset = verify_u8(min_offset);
}
static
@ -466,6 +529,10 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
stream_control->history_max);
return false;
}
if (2 * lits.front().s.length() - 2 > FDR_TEMP_BUF_SIZE) {
assert(0);
return false;
}
}
if (!lits.front().msk.empty()) {

View File

@ -37,7 +37,6 @@
#include "util/compare.h"
#include "util/masked_move.h"
#include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
#include <ctype.h>
#include <stdbool.h>

View File

@ -115,7 +115,8 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
v = and128(v, caseMask);
}
u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v)));
u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
eq128(mask2, v)));
// mask out where we can't match
u32 mask = (0xFFFF >> (16 - l));
@ -142,7 +143,8 @@ hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
v = and128(v, caseMask);
}
u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v)));
u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
eq128(mask2, v)));
// mask out where we can't match
u32 buf_off = start - offset;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -26,18 +26,20 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "mcclellancompile_accel.h"
#include "mcclellancompile_util.h"
#include "accel_dfa_build_strat.h"
#include "accel.h"
#include "grey.h"
#include "nfagraph/ng_limex_accel.h"
#include "shufticompile.h"
#include "trufflecompile.h"
#include "util/charreach.h"
#include "util/container.h"
#include "util/dump_charclass.h"
#include "util/verify_types.h"
#include <vector>
#include <sstream>
#include <vector>
#define PATHS_LIMIT 500
@ -46,14 +48,13 @@ using namespace std;
namespace ue2 {
namespace {
struct path {
vector<CharReach> reach;
dstate_id_t dest = DEAD_STATE;
explicit path(dstate_id_t base) : dest(base) {}
explicit path(dstate_id_t base) : dest(base) {
}
};
};
}
static UNUSED
string describeClasses(const vector<CharReach> &v) {
@ -85,8 +86,8 @@ bool is_useful_path(const vector<path> &good, const path &p) {
goto next;
}
}
DEBUG_PRINTF("better: [%s] -> %u\n",
describeClasses(g.reach).c_str(), g.dest);
DEBUG_PRINTF("better: [%s] -> %u\n", describeClasses(g.reach).c_str(),
g.dest);
return false;
next:;
@ -106,8 +107,7 @@ path append(const path &orig, const CharReach &cr, u32 new_dest) {
static
void extend(const raw_dfa &rdfa, const path &p,
map<u32, vector<path> > &all,
vector<path> &out) {
map<u32, vector<path>> &all, vector<path> &out) {
dstate s = rdfa.states[p.dest];
if (!p.reach.empty() && p.reach.back().none()) {
@ -147,17 +147,17 @@ void extend(const raw_dfa &rdfa, const path &p,
}
DEBUG_PRINTF("----good: [%s] -> %u\n",
describeClasses(pp.reach).c_str(), pp.dest);
describeClasses(pp.reach).c_str(), pp.dest);
all[e.first].push_back(pp);
out.push_back(pp);
}
}
static
vector<vector<CharReach> > generate_paths(const raw_dfa &rdfa, dstate_id_t base,
u32 len) {
vector<path> paths{ path(base) };
map<u32, vector<path> > all;
vector<vector<CharReach>> generate_paths(const raw_dfa &rdfa,
dstate_id_t base, u32 len) {
vector<path> paths{path(base)};
map<u32, vector<path>> all;
all[base].push_back(path(base));
for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) {
vector<path> next_gen;
@ -170,7 +170,7 @@ vector<vector<CharReach> > generate_paths(const raw_dfa &rdfa, dstate_id_t base,
dump_paths(paths);
vector<vector<CharReach> > rv;
vector<vector<CharReach>> rv;
for (auto &p : paths) {
rv.push_back(move(p.reach));
}
@ -181,16 +181,58 @@ static
AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
u32 max_allowed_accel_offset) {
DEBUG_PRINTF("looking for accel for %hu\n", base);
vector<vector<CharReach> > paths = generate_paths(rdfa, base,
max_allowed_accel_offset + 1);
vector<vector<CharReach>> paths =
generate_paths(rdfa, base, max_allowed_accel_offset + 1);
AccelScheme as = findBestAccelScheme(paths, CharReach(), true);
DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset);
return as;
}
static UNUSED
bool better(const AccelScheme &a, const AccelScheme &b) {
if (!a.double_byte.empty() && b.double_byte.empty()) {
return true;
}
if (!b.double_byte.empty()) {
return false;
}
return a.cr.count() < b.cr.count();
}
static
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
for (u32 i = 0; i < N_CHARS; i++) {
rv.at(rdfa.alpha_remap[i]).set(i);
}
return rv;
}
static
bool double_byte_ok(const AccelScheme &info) {
return !info.double_byte.empty() &&
info.double_cr.count() < info.double_byte.size() &&
info.double_cr.count() <= 2 && !info.double_byte.empty();
}
static
bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
u16 top_remap = raw.alpha_remap[TOP];
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
if (i != top_remap && raw.states[s].next[i] == s) {
return true;
}
}
return false;
}
static
vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
const CharReach &escape) {
const CharReach &escape) {
set<u16> rv;
CharReach nonexit = ~escape;
for (auto i = nonexit.find_first(); i != CharReach::npos;
@ -201,9 +243,58 @@ vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
return vector<u16>(rv.begin(), rv.end());
}
static
dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
if (raw.start_floating != DEAD_STATE) {
DEBUG_PRINTF("has floating start\n");
return raw.start_floating;
}
DEBUG_PRINTF("looking for SDS proxy\n");
dstate_id_t s = raw.start_anchored;
if (has_self_loop(s, raw)) {
return s;
}
u16 top_remap = raw.alpha_remap[TOP];
ue2::unordered_set<dstate_id_t> seen;
while (true) {
seen.insert(s);
DEBUG_PRINTF("basis %hu\n", s);
/* check if we are connected to a state with a self loop */
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
dstate_id_t t = raw.states[s].next[i];
if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
return t;
}
}
/* find a neighbour to use as a basis for looking for the sds proxy */
dstate_id_t t = DEAD_STATE;
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
dstate_id_t tt = raw.states[s].next[i];
if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
t = tt;
break;
}
}
if (t == DEAD_STATE) {
/* we were unable to find a state to use as a SDS proxy */
return DEAD_STATE;
}
s = t;
}
}
static
set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
const AccelScheme &ei) {
const AccelScheme &ei) {
DEBUG_PRINTF("looking for region around %hu\n", base);
set<dstate_id_t> region = {base};
@ -236,98 +327,10 @@ set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
return region;
}
static
bool better(const AccelScheme &a, const AccelScheme &b) {
if (!a.double_byte.empty() && b.double_byte.empty()) {
return true;
}
if (!b.double_byte.empty()) {
return false;
}
return a.cr.count() < b.cr.count();
}
static
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
for (u32 i = 0; i < N_CHARS; i++) {
rv.at(rdfa.alpha_remap[i]).set(i);
}
return rv;
}
map<dstate_id_t, AccelScheme> populateAccelerationInfo(const raw_dfa &rdfa,
const dfa_build_strat &strat,
const Grey &grey) {
map<dstate_id_t, AccelScheme> rv;
if (!grey.accelerateDFA) {
return rv;
}
dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
DEBUG_PRINTF("sds %hu\n", sds_proxy);
for (size_t i = 0; i < rdfa.states.size(); i++) {
if (i == DEAD_STATE) {
continue;
}
/* Note on report acceleration states: While we can't accelerate while we
* are spamming out callbacks, the QR code paths don't raise reports
* during scanning so they can accelerate report states. */
if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) {
continue;
}
size_t single_limit = i == sds_proxy ? ACCEL_DFA_MAX_FLOATING_STOP_CHAR
: ACCEL_DFA_MAX_STOP_CHAR;
DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
AccelScheme ei = strat.find_escape_strings(i);
if (ei.cr.count() > single_limit) {
DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
ei.cr.count());
continue;
}
DEBUG_PRINTF("state %zu should be accelerable %zu\n",
i, ei.cr.count());
rv[i] = ei;
}
/* provide accleration states to states in the region of sds */
if (contains(rv, sds_proxy)) {
AccelScheme sds_ei = rv[sds_proxy];
sds_ei.double_byte.clear(); /* region based on single byte scheme
* may differ from double byte */
DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
sds_ei.cr.count());
auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
for (auto s : sds_region) {
if (!contains(rv, s) || better(sds_ei, rv[s])) {
rv[s] = sds_ei;
}
}
}
return rv;
}
static
bool double_byte_ok(const AccelScheme &info) {
return !info.double_byte.empty()
&& info.double_cr.count() < info.double_byte.size()
&& info.double_cr.count() <= 2 && !info.double_byte.empty();
}
AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx,
u32 max_allowed_accel_offset) {
AccelScheme
accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const {
AccelScheme rv;
const raw_dfa &rdfa = get_raw();
rv.cr.clear();
rv.offset = 0;
const dstate &raw = rdfa.states[this_idx];
@ -354,7 +357,7 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx
if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) {
DEBUG_PRINTF("leads to report\n");
outs2_broken = true; /* cannot accelerate over reports */
outs2_broken = true; /* cannot accelerate over reports */
continue;
}
succs[next_id] |= cr_i;
@ -402,14 +405,12 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx
DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa));
DEBUG_PRINTF("broken %d\n", outs2_broken);
if (!double_byte_ok(rv) && !is_triggered(rdfa.kind)
&& this_idx == rdfa.start_floating
&& this_idx != DEAD_STATE) {
if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) &&
this_idx == rdfa.start_floating && this_idx != DEAD_STATE) {
DEBUG_PRINTF("looking for offset accel at %u\n", this_idx);
auto offset = look_for_offset_accel(rdfa, this_idx,
max_allowed_accel_offset);
DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(),
rv.cr.count());
auto offset =
look_for_offset_accel(rdfa, this_idx, max_allowed_offset_accel());
DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), rv.cr.count());
if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) {
DEBUG_PRINTF("using offset accel\n");
rv = offset;
@ -419,4 +420,172 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx
return rv;
}
void
accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
const AccelScheme &info,
void *accel_out) {
AccelAux *accel = (AccelAux *)accel_out;
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
info.double_offset);
accel->generic.offset = verify_u8(info.offset);
if (double_byte_ok(info) && info.double_cr.none() &&
info.double_byte.size() == 1) {
accel->accel_type = ACCEL_DVERM;
accel->dverm.c1 = info.double_byte.begin()->first;
accel->dverm.c2 = info.double_byte.begin()->second;
accel->dverm.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
return;
}
if (double_byte_ok(info) && info.double_cr.none() &&
(info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
bool ok = true;
assert(!info.double_byte.empty());
u8 firstC = info.double_byte.begin()->first & CASE_CLEAR;
u8 secondC = info.double_byte.begin()->second & CASE_CLEAR;
for (const pair<u8, u8> &p : info.double_byte) {
if ((p.first & CASE_CLEAR) != firstC ||
(p.second & CASE_CLEAR) != secondC) {
ok = false;
break;
}
}
if (ok) {
accel->accel_type = ACCEL_DVERM_NOCASE;
accel->dverm.c1 = firstC;
accel->dverm.c2 = secondC;
accel->dverm.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
return;
}
u8 m1;
u8 m2;
if (buildDvermMask(info.double_byte, &m1, &m2)) {
accel->accel_type = ACCEL_DVERM_MASKED;
accel->dverm.offset = verify_u8(info.double_offset);
accel->dverm.c1 = info.double_byte.begin()->first & m1;
accel->dverm.c2 = info.double_byte.begin()->second & m2;
accel->dverm.m1 = m1;
accel->dverm.m2 = m2;
DEBUG_PRINTF(
"building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
accel->dverm.c1, accel->dverm.c2);
return;
}
}
if (double_byte_ok(info) &&
shuftiBuildDoubleMasks(info.double_cr, info.double_byte,
&accel->dshufti.lo1, &accel->dshufti.hi1,
&accel->dshufti.lo2, &accel->dshufti.hi2)) {
accel->accel_type = ACCEL_DSHUFTI;
accel->dshufti.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
return;
}
if (info.cr.none()) {
accel->accel_type = ACCEL_RED_TAPE;
DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
" from which there is no escape\n",
this_idx);
return;
}
if (info.cr.count() == 1) {
accel->accel_type = ACCEL_VERM;
accel->verm.c = info.cr.find_first();
DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
return;
}
if (info.cr.count() == 2 && info.cr.isCaselessChar()) {
accel->accel_type = ACCEL_VERM_NOCASE;
accel->verm.c = info.cr.find_first() & CASE_CLEAR;
DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
return;
}
if (info.cr.count() > max_floating_stop_char()) {
accel->accel_type = ACCEL_NONE;
DEBUG_PRINTF("state %hu is too broad\n", this_idx);
return;
}
accel->accel_type = ACCEL_SHUFTI;
if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo, &accel->shufti.hi)) {
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
return;
}
assert(!info.cr.none());
accel->accel_type = ACCEL_TRUFFLE;
truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2);
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
}
map<dstate_id_t, AccelScheme>
accel_dfa_build_strat::getAccelInfo(const Grey &grey) {
map<dstate_id_t, AccelScheme> rv;
raw_dfa &rdfa = get_raw();
if (!grey.accelerateDFA) {
return rv;
}
dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
DEBUG_PRINTF("sds %hu\n", sds_proxy);
for (size_t i = 0; i < rdfa.states.size(); i++) {
if (i == DEAD_STATE) {
continue;
}
/* Note on report acceleration states: While we can't accelerate while
* we
* are spamming out callbacks, the QR code paths don't raise reports
* during scanning so they can accelerate report states. */
if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) {
continue;
}
size_t single_limit =
i == sds_proxy ? max_floating_stop_char() : max_stop_char();
DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
AccelScheme ei = find_escape_strings(i);
if (ei.cr.count() > single_limit) {
DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
ei.cr.count());
continue;
}
DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count());
rv[i] = ei;
}
/* provide accleration states to states in the region of sds */
if (contains(rv, sds_proxy)) {
AccelScheme sds_ei = rv[sds_proxy];
sds_ei.double_byte.clear(); /* region based on single byte scheme
* may differ from double byte */
DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
sds_ei.cr.count());
auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
for (auto s : sds_region) {
if (!contains(rv, s) || better(sds_ei, rv[s])) {
rv[s] = sds_ei;
}
}
}
return rv;
}
};

60
src/nfa/accel_dfa_build_strat.h Executable file
View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ACCEL_DFA_BUILD_STRAT_H
#define ACCEL_DFA_BUILD_STRAT_H
#include "rdfa.h"
#include "dfa_build_strat.h"
#include "ue2common.h"
#include "util/accel_scheme.h"
#include <map>
namespace ue2 {
class ReportManager;
struct Grey;
class accel_dfa_build_strat : public dfa_build_strat {
public:
explicit accel_dfa_build_strat(const ReportManager &rm_in)
: dfa_build_strat(rm_in) {}
virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const;
virtual size_t accelSize(void) const = 0;
virtual u32 max_allowed_offset_accel() const = 0;
virtual u32 max_stop_char() const = 0;
virtual u32 max_floating_stop_char() const = 0;
virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
void *accel_out);
virtual std::map<dstate_id_t, AccelScheme> getAccelInfo(const Grey &grey);
};
} // namespace ue2
#endif // ACCEL_DFA_BUILD_STRAT_H

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -37,30 +37,26 @@
/** \brief The type for an NFA callback.
*
* This is a function that takes as arguments the current offset where the
* match occurs, the id of the match and the context pointer that was passed
* into the NFA API function that executed the NFA.
* This is a function that takes as arguments the current start and end offsets
* where the match occurs, the id of the match and the context pointer that was
* passed into the NFA API function that executed the NFA.
*
* The offset where the match occurs will be the offset after the character
* that caused the match. Thus, if we have a buffer containing 'abc', then a
* pattern that matches an empty string will have an offset of 0, a pattern
* that matches 'a' will have an offset of 1, and a pattern that matches 'abc'
* will have an offset of 3, which will be a value that is 'beyond' the size of
* the buffer. That is, if we have n characters in the buffer, there are n+1
* different potential offsets for matches.
* The start offset is the "start of match" (SOM) offset for the match. It is
* only provided by engines that natively support SOM tracking (e.g. Gough).
*
* The end offset will be the offset after the character that caused the match.
* Thus, if we have a buffer containing 'abc', then a pattern that matches an
* empty string will have an offset of 0, a pattern that matches 'a' will have
* an offset of 1, and a pattern that matches 'abc' will have an offset of 3,
* which will be a value that is 'beyond' the size of the buffer. That is, if
* we have n characters in the buffer, there are n+1 different potential
* offsets for matches.
*
* This function should return an int - currently the possible return values
* are 0, which means 'stop running the engine' or non-zero, which means
* 'continue matching'.
*/
typedef int (*NfaCallback)(u64a offset, ReportID id, void *context);
/** \brief The type for an NFA callback which also tracks start of match.
*
* see \ref NfaCallback
*/
typedef int (*SomNfaCallback)(u64a from_offset, u64a to_offset, ReportID id,
void *context);
typedef int (*NfaCallback)(u64a start, u64a end, ReportID id, void *context);
/**
* standard \ref NfaCallback return value indicating that engine execution

View File

@ -98,7 +98,7 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q,
if (match == REPEAT_MATCH) {
DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset,
subIdx, sub->report);
if (q->cb(offset, sub->report, q->context) == MO_HALT_MATCHING) {
if (q->cb(0, offset, sub->report, q->context) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
@ -457,7 +457,7 @@ char subCastleFireMatch(const struct Castle *c, const void *full_state,
i = mmbit_iterate(matching, c->numRepeats, i)) {
const struct SubCastle *sub = getSubCastle(c, i);
DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, i);
if (cb(offset, sub->report, ctx) == MO_HALT_MATCHING) {
if (cb(0, offset, sub->report, ctx) == MO_HALT_MATCHING) {
DEBUG_PRINTF("caller told us to halt\n");
return MO_HALT_MATCHING;
}
@ -979,6 +979,46 @@ char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
return castleInAccept(c, q, report, q_cur_offset(q));
}
char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) {
assert(n && q);
assert(n->type == CASTLE_NFA_0);
DEBUG_PRINTF("entry\n");
const struct Castle *c = getImplNfa(n);
const u64a offset = q_cur_offset(q);
DEBUG_PRINTF("offset=%llu\n", offset);
if (c->exclusive) {
u8 *active = (u8 *)q->streamState;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
DEBUG_PRINTF("subcastle %u\n", activeIdx);
const struct SubCastle *sub = getSubCastle(c, activeIdx);
if (subCastleInAccept(c, q, sub->report, offset, activeIdx)) {
return 1;
}
}
}
if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
DEBUG_PRINTF("subcastle %u\n", i);
const struct SubCastle *sub = getSubCastle(c, i);
if (subCastleInAccept(c, q, sub->report, offset, i)) {
return 1;
}
}
}
return 0;
}
char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
assert(n && q);
assert(n->type == CASTLE_NFA_0);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -44,6 +44,7 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -48,7 +48,8 @@
namespace ue2 {
void nfaExecCastle0_dumpDot(const struct NFA *, FILE *) {
void nfaExecCastle0_dumpDot(const struct NFA *, FILE *,
UNUSED const std::string &base) {
// No GraphViz output for Castles.
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -32,12 +32,14 @@
#if defined(DUMP_SUPPORT)
#include <cstdio>
#include <string>
struct NFA;
namespace ue2 {
void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file);
void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file,
const std::string &base);
void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file);
} // namespace ue2

40
src/nfa/dfa_build_strat.cpp Executable file
View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "dfa_build_strat.h"
namespace ue2 {
// prevent weak vtables for raw_report_info, dfa_build_strat and raw_dfa
raw_report_info::~raw_report_info() {}
dfa_build_strat::~dfa_build_strat() {}
raw_dfa::~raw_dfa() {}
} // namespace ue2

68
src/nfa/dfa_build_strat.h Normal file
View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DFA_BUILD_STRAT_H
#define DFA_BUILD_STRAT_H
#include "rdfa.h"
#include "ue2common.h"
#include <memory>
#include <vector>
struct NFA;
namespace ue2 {
class ReportManager;
struct raw_report_info {
virtual ~raw_report_info();
virtual u32 getReportListSize() const = 0; /* in bytes */
virtual size_t size() const = 0; /* number of lists */
virtual void fillReportLists(NFA *n, size_t base_offset,
std::vector<u32> &ro /* out */) const = 0;
};
class dfa_build_strat {
public:
explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {}
virtual ~dfa_build_strat();
virtual raw_dfa &get_raw() const = 0;
virtual std::unique_ptr<raw_report_info> gatherReports(
std::vector<u32> &reports /* out */,
std::vector<u32> &reports_eod /* out */,
u8 *isSingleReport /* out */,
ReportID *arbReport /* out */) const = 0;
protected:
const ReportManager &rm;
};
} // namespace ue2
#endif // DFA_BUILD_STRAT_H

View File

@ -110,7 +110,7 @@ u64a expandSomValue(u32 comp_slot_width, u64a curr_offset,
}
static really_inline
char doReports(SomNfaCallback cb, void *ctxt, const struct mcclellan *m,
char doReports(NfaCallback cb, void *ctxt, const struct mcclellan *m,
const struct gough_som_info *som, u16 s, u64a loc,
char eod, u16 * const cached_accept_state,
u32 * const cached_accept_id, u32 * const cached_accept_som) {
@ -307,7 +307,7 @@ u16 goughEnableStarts(const struct mcclellan *m, u16 s, u64a som_offset,
static really_inline
char goughExec16_i(const struct mcclellan *m, struct gough_som_info *som,
u16 *state, const u8 *buf, size_t len, u64a offAdj,
SomNfaCallback cb, void *ctxt, const u8 **c_final,
NfaCallback cb, void *ctxt, const u8 **c_final,
enum MatchMode mode) {
assert(ISALIGNED_N(state, 2));
@ -461,7 +461,7 @@ with_accel:
static really_inline
char goughExec8_i(const struct mcclellan *m, struct gough_som_info *som,
u8 *state, const u8 *buf, size_t len, u64a offAdj,
SomNfaCallback cb, void *ctxt, const u8 **c_final,
NfaCallback cb, void *ctxt, const u8 **c_final,
enum MatchMode mode) {
u8 s = *state;
const u8 *c = buf, *c_end = buf + len;
@ -595,7 +595,7 @@ with_accel:
static never_inline
char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som,
u8 *state, const u8 *buf, size_t len, u64a offAdj,
SomNfaCallback cb, void *ctxt, const u8 **final_point,
NfaCallback cb, void *ctxt, const u8 **final_point,
enum MatchMode mode) {
return goughExec8_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
mode);
@ -604,7 +604,7 @@ char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som,
static never_inline
char goughExec16_i_ni(const struct mcclellan *m, struct gough_som_info *som,
u16 *state, const u8 *buf, size_t len, u64a offAdj,
SomNfaCallback cb, void *ctxt, const u8 **final_point,
NfaCallback cb, void *ctxt, const u8 **final_point,
enum MatchMode mode) {
return goughExec16_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
mode);
@ -622,7 +622,7 @@ const struct gough_som_info *getSomInfoConst(const char *state_base) {
static really_inline
char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
const u8 *hend, SomNfaCallback cb, void *context,
const u8 *hend, NfaCallback cb, void *context,
struct mq *q, s64a end, enum MatchMode mode) {
DEBUG_PRINTF("enter\n");
struct gough_som_info *som = getSomInfo(q->state);
@ -755,7 +755,7 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
static really_inline
char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
const u8 *hend, SomNfaCallback cb, void *context,
const u8 *hend, NfaCallback cb, void *context,
struct mq *q, s64a end, enum MatchMode mode) {
struct gough_som_info *som = getSomInfo(q->state);
assert(n->type == GOUGH_NFA_16);
@ -887,7 +887,7 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) {
u64a offset = q->offset;
const u8 *buffer = q->buffer;
SomNfaCallback cb = q->som_cb;
NfaCallback cb = q->cb;
void *context = q->context;
assert(n->type == GOUGH_NFA_8);
const u8 *hend = q->history + q->hlength;
@ -899,7 +899,7 @@ char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) {
char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) {
u64a offset = q->offset;
const u8 *buffer = q->buffer;
SomNfaCallback cb = q->som_cb;
NfaCallback cb = q->cb;
void *context = q->context;
assert(n->type == GOUGH_NFA_16);
const u8 *hend = q->history + q->hlength;
@ -911,7 +911,7 @@ char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) {
char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) {
u64a offset = q->offset;
const u8 *buffer = q->buffer;
SomNfaCallback cb = q->som_cb;
NfaCallback cb = q->cb;
void *context = q->context;
assert(n->type == GOUGH_NFA_8);
const u8 *hend = q->history + q->hlength;
@ -923,7 +923,7 @@ char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) {
char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) {
u64a offset = q->offset;
const u8 *buffer = q->buffer;
SomNfaCallback cb = q->som_cb;
NfaCallback cb = q->cb;
void *context = q->context;
assert(n->type == GOUGH_NFA_16);
const u8 *hend = q->history + q->hlength;
@ -935,7 +935,7 @@ char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) {
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) {
u64a offset = q->offset;
const u8 *buffer = q->buffer;
SomNfaCallback cb = q->som_cb;
NfaCallback cb = q->cb;
void *context = q->context;
assert(n->type == GOUGH_NFA_8);
const u8 *hend = q->history + q->hlength;
@ -952,7 +952,7 @@ char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) {
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report) {
u64a offset = q->offset;
const u8 *buffer = q->buffer;
SomNfaCallback cb = q->som_cb;
NfaCallback cb = q->cb;
void *context = q->context;
assert(n->type == GOUGH_NFA_16);
const u8 *hend = q->history + q->hlength;
@ -994,7 +994,7 @@ char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset,
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
SomNfaCallback cb = q->som_cb;
NfaCallback cb = q->cb;
void *ctxt = q->context;
u8 s = *(u8 *)q->state;
u64a offset = q_cur_offset(q);
@ -1016,7 +1016,7 @@ char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
SomNfaCallback cb = q->som_cb;
NfaCallback cb = q->cb;
void *ctxt = q->context;
u16 s = *(u16 *)q->state;
const struct mstate_aux *aux = get_aux(m, s);
@ -1048,10 +1048,18 @@ char nfaExecGough16_inAccept(const struct NFA *n, ReportID report,
return nfaExecMcClellan16_inAccept(n, report, q);
}
char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q) {
return nfaExecMcClellan8_inAnyAccept(n, q);
}
char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q) {
return nfaExecMcClellan16_inAnyAccept(n, q);
}
static
char goughCheckEOD(const struct NFA *nfa, u16 s,
const struct gough_som_info *som,
u64a offset, SomNfaCallback cb, void *ctxt) {
u64a offset, NfaCallback cb, void *ctxt) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
const struct mstate_aux *aux = get_aux(m, s);
@ -1062,21 +1070,19 @@ char goughCheckEOD(const struct NFA *nfa, u16 s,
}
char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState, u64a offset,
UNUSED NfaCallback callback,
SomNfaCallback som_callback, void *context) {
UNUSED const char *streamState, u64a offset,
NfaCallback callback, void *context) {
const struct gough_som_info *som = getSomInfoConst(state);
return goughCheckEOD(nfa, *(const u8 *)state, som, offset, som_callback,
return goughCheckEOD(nfa, *(const u8 *)state, som, offset, callback,
context);
}
char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState, u64a offset,
UNUSED NfaCallback callback,
SomNfaCallback som_callback, void *context) {
UNUSED const char *streamState, u64a offset,
NfaCallback callback, void *context) {
assert(ISALIGNED_N(state, 8));
const struct gough_som_info *som = getSomInfoConst(state);
return goughCheckEOD(nfa, *(const u16 *)state, som, offset, som_callback,
return goughCheckEOD(nfa, *(const u16 *)state, som, offset, callback,
context);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -39,13 +39,13 @@ struct mq;
char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb,
void *context);
NfaCallback callback, void *context);
char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
@ -61,13 +61,13 @@ char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb,
void *context);
NfaCallback callback, void *context);
char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);

View File

@ -79,9 +79,9 @@ namespace {
class gough_build_strat : public mcclellan_build_strat {
public:
gough_build_strat(
raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm,
raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in,
const map<dstate_id_t, gough_accel_state_info> &accel_info)
: mcclellan_build_strat(r, rm), rdfa(r), gg(g),
: mcclellan_build_strat(r, rm_in), rdfa(r), gg(g),
accel_gough_info(accel_info) {}
unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */,
vector<u32> &reports_eod /* out */,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -259,7 +259,8 @@ void dumpTransitions(const NFA *nfa, FILE *f,
fprintf(f, "\n");
}
void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f) {
void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f,
UNUSED const string &base) {
assert(nfa->type == GOUGH_NFA_8);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
@ -302,7 +303,8 @@ void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) {
dumpTextReverse(nfa, f);
}
void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f) {
void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f,
UNUSED const string &base) {
assert(nfa->type == GOUGH_NFA_16);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -33,12 +33,16 @@
#include "ue2common.h"
#include <string>
struct NFA;
namespace ue2 {
void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file);
void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file);
void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file,
const std::string &base);
void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file,
const std::string &base);
void nfaExecGough8_dumpText(const NFA *nfa, FILE *file);
void nfaExecGough16_dumpText(const NFA *nfa, FILE *file);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -293,7 +293,7 @@ char lbrMatchLoop(const struct lbr_common *l, const u64a begin, const u64a end,
}
DEBUG_PRINTF("firing match at %llu\n", i);
if (cb(i, l->report, ctx) == MO_HALT_MATCHING) {
if (cb(0, i, l->report, ctx) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -46,6 +46,7 @@ char nfaExecLbrDot_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecLbrDot_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
@ -66,6 +67,7 @@ char nfaExecLbrVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecLbrVerm_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
@ -86,6 +88,7 @@ char nfaExecLbrNVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecLbrNVerm_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
@ -106,6 +109,7 @@ char nfaExecLbrShuf_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecLbrShuf_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
@ -126,6 +130,7 @@ char nfaExecLbrTruf_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecLbrTruf_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -72,7 +72,7 @@ char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa,
const struct lbr_common *l = getImplNfa(nfa);
u64a offset = q_cur_offset(q);
DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset);
q->cb(offset, l->report, q->context);
q->cb(0, offset, l->report, q->context);
return 0;
}
@ -94,6 +94,15 @@ char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa,
return lbrInAccept(l, lstate, q->streamState, offset, report);
}
char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
assert(nfa && q);
assert(isLbrType(nfa->type));
DEBUG_PRINTF("entry\n");
const struct lbr_common *l = getImplNfa(nfa);
return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q);
}
char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa,
struct mq *q) {
assert(nfa && q);
@ -206,7 +215,7 @@ char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q,
if (q->report_current) {
DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q));
int rv = q->cb(q_cur_offset(q), l->report, q->context);
int rv = q->cb(0, q_cur_offset(q), l->report, q->context);
q->report_current = 0;
if (rv == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -49,23 +49,28 @@
namespace ue2 {
void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl
}
void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl
}
void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl
}
void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl
}
void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) {
void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f,
UNUSED const std::string &base) {
// No impl
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -32,16 +32,22 @@
#ifdef DUMP_SUPPORT
#include <cstdio>
#include <string>
struct NFA;
namespace ue2 {
void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file);
void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file);
void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file);
void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file);
void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file);
void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecLbrDot_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecLbrVerm_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecLbrNVerm_dumpText(const struct NFA *nfa, FILE *file);

View File

@ -30,6 +30,7 @@
#define LIMEX_H
#ifdef __cplusplus
#include <string>
extern "C"
{
#endif
@ -40,7 +41,8 @@ extern "C"
#define GENERATE_NFA_DUMP_DECL(gf_name) \
} /* extern "C" */ \
namespace ue2 { \
void gf_name##_dumpDot(const struct NFA *nfa, FILE *file); \
void gf_name##_dumpDot(const struct NFA *nfa, FILE *file, \
const std::string &base); \
void gf_name##_dumpText(const struct NFA *nfa, FILE *file); \
} /* namespace ue2 */ \
extern "C" {
@ -52,14 +54,14 @@ extern "C"
#define GENERATE_NFA_DECL(gf_name) \
char gf_name##_testEOD(const struct NFA *nfa, const char *state, \
const char *streamState, u64a offset, \
NfaCallback callback, SomNfaCallback som_cb, \
void *context); \
NfaCallback callback, void *context); \
char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \
char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \
char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \
char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \
char gf_name##_inAccept(const struct NFA *n, ReportID report, \
struct mq *q); \
char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \
char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \
char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \
void *state, u8 key); \
@ -74,41 +76,11 @@ extern "C"
struct mq *q, s64a loc); \
GENERATE_NFA_DUMP_DECL(gf_name)
GENERATE_NFA_DECL(nfaExecLimEx32_1)
GENERATE_NFA_DECL(nfaExecLimEx32_2)
GENERATE_NFA_DECL(nfaExecLimEx32_3)
GENERATE_NFA_DECL(nfaExecLimEx32_4)
GENERATE_NFA_DECL(nfaExecLimEx32_5)
GENERATE_NFA_DECL(nfaExecLimEx32_6)
GENERATE_NFA_DECL(nfaExecLimEx32_7)
GENERATE_NFA_DECL(nfaExecLimEx128_1)
GENERATE_NFA_DECL(nfaExecLimEx128_2)
GENERATE_NFA_DECL(nfaExecLimEx128_3)
GENERATE_NFA_DECL(nfaExecLimEx128_4)
GENERATE_NFA_DECL(nfaExecLimEx128_5)
GENERATE_NFA_DECL(nfaExecLimEx128_6)
GENERATE_NFA_DECL(nfaExecLimEx128_7)
GENERATE_NFA_DECL(nfaExecLimEx256_1)
GENERATE_NFA_DECL(nfaExecLimEx256_2)
GENERATE_NFA_DECL(nfaExecLimEx256_3)
GENERATE_NFA_DECL(nfaExecLimEx256_4)
GENERATE_NFA_DECL(nfaExecLimEx256_5)
GENERATE_NFA_DECL(nfaExecLimEx256_6)
GENERATE_NFA_DECL(nfaExecLimEx256_7)
GENERATE_NFA_DECL(nfaExecLimEx384_1)
GENERATE_NFA_DECL(nfaExecLimEx384_2)
GENERATE_NFA_DECL(nfaExecLimEx384_3)
GENERATE_NFA_DECL(nfaExecLimEx384_4)
GENERATE_NFA_DECL(nfaExecLimEx384_5)
GENERATE_NFA_DECL(nfaExecLimEx384_6)
GENERATE_NFA_DECL(nfaExecLimEx384_7)
GENERATE_NFA_DECL(nfaExecLimEx512_1)
GENERATE_NFA_DECL(nfaExecLimEx512_2)
GENERATE_NFA_DECL(nfaExecLimEx512_3)
GENERATE_NFA_DECL(nfaExecLimEx512_4)
GENERATE_NFA_DECL(nfaExecLimEx512_5)
GENERATE_NFA_DECL(nfaExecLimEx512_6)
GENERATE_NFA_DECL(nfaExecLimEx512_7)
GENERATE_NFA_DECL(nfaExecLimEx32)
GENERATE_NFA_DECL(nfaExecLimEx128)
GENERATE_NFA_DECL(nfaExecLimEx256)
GENERATE_NFA_DECL(nfaExecLimEx384)
GENERATE_NFA_DECL(nfaExecLimEx512)
#undef GENERATE_NFA_DECL
#undef GENERATE_NFA_DUMP_DECL

View File

@ -35,6 +35,7 @@
#include "accel.h"
#include "limex_internal.h"
#include "limex_limits.h"
#include "limex_shuffle.h"
#include "nfa_internal.h"
#include "shufti.h"
#include "truffle.h"
@ -44,10 +45,7 @@
#include "ue2common.h"
#include "vermicelli.h"
#include "util/bitutils.h"
#include "util/shuffle.h"
#include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
#include "util/shuffle_ssse3.h"
static really_inline
size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
@ -80,7 +78,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
size_t end) {
u32 idx = shuffleDynamic32(s, accel);
u32 idx = packedExtract32(s, accel);
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
@ -92,7 +90,7 @@ size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex,
DEBUG_PRINTF("using PSHUFB for 128-bit shuffle\n");
m128 accelPerm = limex->accelPermute;
m128 accelComp = limex->accelCompare;
idx = shufflePshufb128(s, accelPerm, accelComp);
idx = packedExtract128(s, accelPerm, accelComp);
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
@ -105,17 +103,13 @@ size_t doAccel256(const m256 *state, const struct LimExNFA256 *limex,
m256 accelPerm = limex->accelPermute;
m256 accelComp = limex->accelCompare;
#if !defined(__AVX2__)
u32 idx1 = shufflePshufb128(s.lo, accelPerm.lo, accelComp.lo);
u32 idx2 = shufflePshufb128(s.hi, accelPerm.hi, accelComp.hi);
#else
// TODO: learn you some avx2 shuffles for great good
u32 idx1 = shufflePshufb128(movdq_lo(s), movdq_lo(accelPerm),
movdq_lo(accelComp));
u32 idx2 = shufflePshufb128(movdq_hi(s), movdq_hi(accelPerm),
movdq_hi(accelComp));
#endif
u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo);
u32 idx2 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi);
assert((idx1 & idx2) == 0); // should be no shared bits
idx = idx1 | idx2;
#else
idx = packedExtract256(s, accelPerm, accelComp);
#endif
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}
@ -127,9 +121,9 @@ size_t doAccel384(const m384 *state, const struct LimExNFA384 *limex,
DEBUG_PRINTF("using PSHUFB for 384-bit shuffle\n");
m384 accelPerm = limex->accelPermute;
m384 accelComp = limex->accelCompare;
u32 idx1 = shufflePshufb128(s.lo, accelPerm.lo, accelComp.lo);
u32 idx2 = shufflePshufb128(s.mid, accelPerm.mid, accelComp.mid);
u32 idx3 = shufflePshufb128(s.hi, accelPerm.hi, accelComp.hi);
u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo);
u32 idx2 = packedExtract128(s.mid, accelPerm.mid, accelComp.mid);
u32 idx3 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi);
assert((idx1 & idx2 & idx3) == 0); // should be no shared bits
idx = idx1 | idx2 | idx3;
return accelScanWrapper(accelTable, aux, input, idx, i, end);
@ -144,21 +138,17 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex,
m512 accelPerm = limex->accelPermute;
m512 accelComp = limex->accelCompare;
#if !defined(__AVX2__)
u32 idx1 = shufflePshufb128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo);
u32 idx2 = shufflePshufb128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi);
u32 idx3 = shufflePshufb128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo);
u32 idx4 = shufflePshufb128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi);
#else
u32 idx1 = shufflePshufb128(movdq_lo(s.lo), movdq_lo(accelPerm.lo),
movdq_lo(accelComp.lo));
u32 idx2 = shufflePshufb128(movdq_hi(s.lo), movdq_hi(accelPerm.lo),
movdq_hi(accelComp.lo));
u32 idx3 = shufflePshufb128(movdq_lo(s.hi), movdq_lo(accelPerm.hi),
movdq_lo(accelComp.hi));
u32 idx4 = shufflePshufb128(movdq_hi(s.hi), movdq_hi(accelPerm.hi),
movdq_hi(accelComp.hi));
#endif
u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo);
u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi);
u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo);
u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi);
assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits
idx = idx1 | idx2 | idx3 | idx4;
#else
u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo);
u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi);
assert((idx1 & idx2) == 0); // should be no shared bits
idx = idx1 | idx2;
#endif
return accelScanWrapper(accelTable, aux, input, idx, i, end);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -40,6 +40,7 @@
#define TESTEOD_FN JOIN(moNfaTestEod, SIZE)
#define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE)
#define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE)
#define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE)
#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE)
#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE)
#define INITIAL_FN JOIN(moNfaInitial, SIZE)
@ -118,7 +119,7 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s,
if (TESTBIT_STATE(s, a->state)) {
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
a->state, a->externalId, offset);
int rv = callback(offset, a->externalId, context);
int rv = callback(0, offset, a->externalId, context);
if (unlikely(rv == MO_HALT_MATCHING)) {
return 1;
}
@ -149,7 +150,7 @@ char PROCESS_ACCEPTS_NOSQUASH_FN(const STATE_T *s,
if (TESTBIT_STATE(s, a->state)) {
DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n",
a->state, a->externalId, offset);
int rv = callback(offset, a->externalId, context);
int rv = callback(0, offset, a->externalId, context);
if (unlikely(rv == MO_HALT_MATCHING)) {
return 1;
}
@ -374,11 +375,32 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
return 0;
}
static really_inline
char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
union RepeatControl *repeat_ctrl, char *repeat_state,
u64a offset) {
assert(limex);
const STATE_T acceptMask = LOAD_STATE(&limex->accept);
STATE_T accstate = AND_STATE(state, acceptMask);
// Are we in an accept state?
if (ISZERO_STATE(accstate)) {
DEBUG_PRINTF("no accept states are on\n");
return 0;
}
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accstate);
return ISNONZERO_STATE(accstate);
}
#undef TESTEOD_FN
#undef TESTEOD_REV_FN
#undef REPORTCURRENT_FN
#undef EXPIRE_ESTATE_FN
#undef LIMEX_INACCEPT_FN
#undef LIMEX_INANYACCEPT_FN
#undef INITIAL_FN
#undef TOP_FN
#undef TOPN_FN

View File

@ -167,12 +167,10 @@ struct build_info {
limex_accel_info accel;
};
#define LAST_LIMEX_NFA LIMEX_NFA_512
// Constants for scoring mechanism
#define LAST_LIMEX_NFA LIMEX_NFA_512_7
const int LIMEX_INITIAL_SCORE = 2000;
const int SHIFT_COST = 20; // limex: cost per shift mask
const int SHIFT_COST = 10; // limex: cost per shift mask
const int EXCEPTION_COST = 4; // limex: per exception
template<NFAEngineType t> struct NFATraits { };
@ -261,6 +259,17 @@ void maskSetBits(Mask &m, const NFAStateSet &bits) {
}
}
template<class Mask>
bool isMaskZero(Mask &m) {
u8 *m8 = (u8 *)&m;
for (u32 i = 0; i < sizeof(m); i++) {
if (m8[i]) {
return false;
}
}
return true;
}
// Sets an entire byte in a mask to the given value
template<class Mask>
void maskSetByte(Mask &m, const unsigned int idx, const char val) {
@ -336,7 +345,7 @@ void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
}
struct AccelBuild {
AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0), ma_len1(0),
AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0), ma_len1(0),
ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {}
NFAVertex v;
u32 state;
@ -999,7 +1008,8 @@ void findMaskedCompressionStates(const build_info &args,
// Suffixes and outfixes can mask out leaf states, which should all be
// accepts. Right now we can only do this when there is nothing in initDs,
// as we switch that on unconditionally in the expand call.
if (generates_callbacks(h) && !hasInitDsStates(h, args.state_ids)) {
if (!inspects_states_for_accepts(h)
&& !hasInitDsStates(h, args.state_ids)) {
NFAStateSet nonleaf(args.num_states);
for (const auto &e : edges_range(h)) {
u32 from = args.state_ids.at(source(e, h));
@ -1162,12 +1172,13 @@ u32 getReportListIndex(const flat_set<ReportID> &reports,
}
static
void buildExceptionMap(const build_info &args,
const ue2::unordered_set<NFAEdge> &exceptional,
map<ExceptionProto, vector<u32> > &exceptionMap,
vector<ReportID> &exceptionReports) {
u32 buildExceptionMap(const build_info &args,
const ue2::unordered_set<NFAEdge> &exceptional,
map<ExceptionProto, vector<u32> > &exceptionMap,
vector<ReportID> &exceptionReports) {
const NGHolder &h = args.h;
const u32 num_states = args.num_states;
u32 exceptionCount = 0;
ue2::unordered_map<NFAVertex, u32> pos_trigger;
ue2::unordered_map<NFAVertex, u32> tug_trigger;
@ -1297,10 +1308,13 @@ void buildExceptionMap(const build_info &args,
assert(e.succ_states.size() == num_states);
assert(e.squash_states.size() == num_states);
exceptionMap[e].push_back(i);
exceptionCount++;
}
}
DEBUG_PRINTF("%zu unique exceptions found.\n", exceptionMap.size());
DEBUG_PRINTF("%u exceptions found (%zu unique)\n", exceptionCount,
exceptionMap.size());
return exceptionCount;
}
static
@ -1315,6 +1329,92 @@ u32 depth_to_u32(const depth &d) {
return d_val;
}
static
bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e,
const build_info &args, u32 maxShift) {
NFAVertex from = source(e, h);
NFAVertex to = target(e, h);
u32 f = args.state_ids.at(from);
u32 t = args.state_ids.at(to);
if (!isLimitedTransition(f, t, maxShift)) {
return true;
}
// All transitions out of a tug trigger are exceptional.
if (contains(args.tugs, from)) {
return true;
}
return false;
}
static
u32 findMaxVarShift(const build_info &args, u32 nShifts) {
const NGHolder &h = args.h;
u32 shiftMask = 0;
for (const auto &e : edges_range(h)) {
u32 from = args.state_ids.at(source(e, h));
u32 to = args.state_ids.at(target(e, h));
if (from == NO_STATE || to == NO_STATE) {
continue;
}
if (!isExceptionalTransition(h, e, args, MAX_SHIFT_AMOUNT)) {
shiftMask |= (1UL << (to - from));
}
}
u32 maxVarShift = 0;
for (u32 shiftCnt = 0; shiftMask != 0 && shiftCnt < nShifts; shiftCnt++) {
maxVarShift = findAndClearLSB_32(&shiftMask);
}
return maxVarShift;
}
static
int getLimexScore(const build_info &args, u32 nShifts) {
const NGHolder &h = args.h;
u32 maxVarShift = nShifts;
int score = 0;
score += SHIFT_COST * nShifts;
maxVarShift = findMaxVarShift(args, nShifts);
NFAStateSet exceptionalStates(args.num_states);
for (const auto &e : edges_range(h)) {
u32 from = args.state_ids.at(source(e, h));
u32 to = args.state_ids.at(target(e, h));
if (from == NO_STATE || to == NO_STATE) {
continue;
}
if (isExceptionalTransition(h, e, args, maxVarShift)) {
exceptionalStates.set(from);
}
}
score += EXCEPTION_COST * exceptionalStates.count();
return score;
}
// This function finds the best shift scheme with highest score
// Returns number of shifts and score calculated for appropriate scheme
// Returns zero if no appropriate scheme was found
static
u32 findBestNumOfVarShifts(const build_info &args,
int *bestScoreRet = nullptr) {
u32 bestNumOfVarShifts = 0;
int bestScore = INT_MAX;
for (u32 shiftCount = 1; shiftCount <= MAX_SHIFT_COUNT; shiftCount++) {
int score = getLimexScore(args, shiftCount);
if (score < bestScore) {
bestScore = score;
bestNumOfVarShifts = shiftCount;
}
}
if (bestScoreRet != nullptr) {
*bestScoreRet = bestScore;
}
return bestNumOfVarShifts;
}
template<NFAEngineType dtype>
struct Factory {
// typedefs for readability, for types derived from traits
@ -1322,25 +1422,6 @@ struct Factory {
typedef typename NFATraits<dtype>::implNFA_t implNFA_t;
typedef typename NFATraits<dtype>::tableRow_t tableRow_t;
static
bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
const ue2::unordered_set<NFAVertex> &tugs) {
NFAVertex from = source(e, h);
NFAVertex to = target(e, h);
u32 f = state_ids.at(from);
u32 t = state_ids.at(to);
if (!isLimitedTransition(f, t, NFATraits<dtype>::maxShift)) {
return true;
}
// All transitions out of a tug trigger are exceptional.
if (contains(tugs, from)) {
return true;
}
return false;
}
static
void allocState(NFA *nfa, u32 repeatscratchStateSize,
u32 repeatStreamState) {
@ -1504,6 +1585,9 @@ struct Factory {
static
void writeShiftMasks(const build_info &args, implNFA_t *limex) {
const NGHolder &h = args.h;
u32 maxShift = findMaxVarShift(args, limex->shiftCount);
u32 shiftMask = 0;
int shiftMaskIdx = 0;
for (const auto &e : edges_range(h)) {
u32 from = args.state_ids.at(source(e, h));
@ -1515,15 +1599,32 @@ struct Factory {
// We check for exceptional transitions here, as we don't want tug
// trigger transitions emitted as limited transitions (even if they
// could be in this model).
if (!isExceptionalTransition(h, e, args.state_ids, args.tugs)) {
maskSetBit(limex->shift[to - from], from);
if (!isExceptionalTransition(h, e, args, maxShift)) {
u32 shift = to - from;
if ((shiftMask & (1UL << shift)) == 0UL) {
shiftMask |= (1UL << shift);
limex->shiftAmount[shiftMaskIdx++] = (u8)shift;
}
assert(limex->shiftCount <= MAX_SHIFT_COUNT);
for (u32 i = 0; i < limex->shiftCount; i++) {
if (limex->shiftAmount[i] == (u8)shift) {
maskSetBit(limex->shift[i], from);
break;
}
}
}
}
if (maxShift && limex->shiftCount > 1) {
for (u32 i = 0; i < limex->shiftCount; i++) {
assert(!isMaskZero(limex->shift[i]));
}
}
}
static
void findExceptionalTransitions(const build_info &args,
ue2::unordered_set<NFAEdge> &exceptional) {
ue2::unordered_set<NFAEdge> &exceptional,
u32 maxShift) {
const NGHolder &h = args.h;
for (const auto &e : edges_range(h)) {
@ -1533,7 +1634,7 @@ struct Factory {
continue;
}
if (isExceptionalTransition(h, e, args.state_ids, args.tugs)) {
if (isExceptionalTransition(h, e, args, maxShift)) {
exceptional.insert(e);
}
}
@ -1545,19 +1646,25 @@ struct Factory {
implNFA_t *limex, const u32 exceptionsOffset) {
DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset);
// to make testing easier, we pre-set the exceptionMap to all invalid
// values
memset(limex->exceptionMap, 0xff, sizeof(limex->exceptionMap));
exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset);
assert(ISALIGNED(etable));
u32 ecount = 0;
map<u32, ExceptionProto> exception_by_state;
for (const auto &m : exceptionMap) {
const ExceptionProto &proto = m.first;
const vector<u32> &states = m.second;
DEBUG_PRINTF("exception %u, triggered by %zu states.\n", ecount,
states.size());
for (u32 i : states) {
assert(!contains(exception_by_state, i));
exception_by_state.emplace(i, proto);
}
}
u32 ecount = 0;
for (const auto &m : exception_by_state) {
const ExceptionProto &proto = m.second;
u32 state_id = m.first;
DEBUG_PRINTF("exception %u, triggered by state %u\n", ecount,
state_id);
// Write the exception entry.
exception_t &e = etable[ecount];
@ -1571,13 +1678,10 @@ struct Factory {
: repeatOffsets[proto.repeat_index];
e.repeatOffset = repeat_offset;
// for each state that can switch it on
for (auto state_id : states) {
// set this bit in the exception mask
maskSetBit(limex->exceptionMask, state_id);
// set this index in the exception map
limex->exceptionMap[state_id] = ecount;
}
// for the state that can switch it on
// set this bit in the exception mask
maskSetBit(limex->exceptionMask, state_id);
ecount++;
}
@ -1778,16 +1882,17 @@ struct Factory {
}
ue2::unordered_set<NFAEdge> exceptional;
findExceptionalTransitions(args, exceptional);
u32 shiftCount = findBestNumOfVarShifts(args);
assert(shiftCount);
u32 maxShift = findMaxVarShift(args, shiftCount);
findExceptionalTransitions(args, exceptional, maxShift);
map<ExceptionProto, vector<u32> > exceptionMap;
vector<ReportID> exceptionReports;
buildExceptionMap(args, exceptional, exceptionMap, exceptionReports);
u32 exceptionCount = buildExceptionMap(args, exceptional, exceptionMap,
exceptionReports);
if (exceptionMap.size() > ~0U) {
DEBUG_PRINTF("too many exceptions!\n");
return nullptr;
}
assert(exceptionCount <= args.num_states);
// Build reach table and character mapping.
vector<NFAStateSet> reach;
@ -1842,7 +1947,7 @@ struct Factory {
offset = ROUNDUP_CL(offset);
const u32 exceptionsOffset = offset;
offset += sizeof(exception_t) * exceptionMap.size();
offset += sizeof(exception_t) * exceptionCount;
const u32 exceptionReportsOffset = offset;
offset += sizeof(ReportID) * exceptionReports.size();
@ -1874,6 +1979,7 @@ struct Factory {
writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash,
limex, acceptsOffset, acceptsEodOffset, squashOffset);
limex->shiftCount = shiftCount;
writeShiftMasks(args, limex);
// Determine the state required for our state vector.
@ -1907,8 +2013,6 @@ struct Factory {
}
static int score(const build_info &args) {
const NGHolder &h = args.h;
// LimEx NFAs are available in sizes from 32 to 512-bit.
size_t num_states = args.num_states;
@ -1928,45 +2032,17 @@ struct Factory {
sz = args.cc.grey.nfaForceSize;
}
if (args.cc.grey.nfaForceShifts &&
NFATraits<dtype>::maxShift != args.cc.grey.nfaForceShifts) {
return -1;
}
if (sz != NFATraits<dtype>::maxStates) {
return -1; // fail, size not appropriate
}
// We are of the right size, calculate a score based on the number
// of exceptions and the number of shifts used by this LimEx.
int score = LIMEX_INITIAL_SCORE;
if (NFATraits<dtype>::maxShift != 0) {
score -= SHIFT_COST / 2; // first shift mask is cheap
score -= SHIFT_COST * (NFATraits<dtype>::maxShift - 1);
int score;
u32 shiftCount = findBestNumOfVarShifts(args, &score);
if (shiftCount == 0) {
return -1;
}
NFAStateSet exceptionalStates(num_states); // outbound exc trans
for (const auto &e : edges_range(h)) {
u32 from = args.state_ids.at(source(e, h));
u32 to = args.state_ids.at(target(e, h));
if (from == NO_STATE || to == NO_STATE) {
continue;
}
if (isExceptionalTransition(h, e, args.state_ids, args.tugs)) {
exceptionalStates.set(from);
}
}
DEBUG_PRINTF("%zu exceptional states\n", exceptionalStates.count());
score -= EXCEPTION_COST * exceptionalStates.count();
/* ensure that we always report a valid score if have the right number
* of states */
if (score < 0) {
score = 0;
}
return score;
}
};
@ -1985,50 +2061,19 @@ struct scoreNfa {
}
};
#define MAKE_LIMEX_TRAITS(mlt_size, mlt_shift) \
template<> struct NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift> { \
typedef LimExNFA##mlt_size implNFA_t; \
typedef u_##mlt_size tableRow_t; \
typedef NFAException##mlt_size exception_t; \
static const size_t maxStates = mlt_size; \
static const u32 maxShift = mlt_shift; \
}; \
#define MAKE_LIMEX_TRAITS(mlt_size) \
template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \
typedef LimExNFA##mlt_size implNFA_t; \
typedef u_##mlt_size tableRow_t; \
typedef NFAException##mlt_size exception_t; \
static const size_t maxStates = mlt_size; \
};
MAKE_LIMEX_TRAITS(32, 1)
MAKE_LIMEX_TRAITS(32, 2)
MAKE_LIMEX_TRAITS(32, 3)
MAKE_LIMEX_TRAITS(32, 4)
MAKE_LIMEX_TRAITS(32, 5)
MAKE_LIMEX_TRAITS(32, 6)
MAKE_LIMEX_TRAITS(32, 7)
MAKE_LIMEX_TRAITS(128, 1)
MAKE_LIMEX_TRAITS(128, 2)
MAKE_LIMEX_TRAITS(128, 3)
MAKE_LIMEX_TRAITS(128, 4)
MAKE_LIMEX_TRAITS(128, 5)
MAKE_LIMEX_TRAITS(128, 6)
MAKE_LIMEX_TRAITS(128, 7)
MAKE_LIMEX_TRAITS(256, 1)
MAKE_LIMEX_TRAITS(256, 2)
MAKE_LIMEX_TRAITS(256, 3)
MAKE_LIMEX_TRAITS(256, 4)
MAKE_LIMEX_TRAITS(256, 5)
MAKE_LIMEX_TRAITS(256, 6)
MAKE_LIMEX_TRAITS(256, 7)
MAKE_LIMEX_TRAITS(384, 1)
MAKE_LIMEX_TRAITS(384, 2)
MAKE_LIMEX_TRAITS(384, 3)
MAKE_LIMEX_TRAITS(384, 4)
MAKE_LIMEX_TRAITS(384, 5)
MAKE_LIMEX_TRAITS(384, 6)
MAKE_LIMEX_TRAITS(384, 7)
MAKE_LIMEX_TRAITS(512, 1)
MAKE_LIMEX_TRAITS(512, 2)
MAKE_LIMEX_TRAITS(512, 3)
MAKE_LIMEX_TRAITS(512, 4)
MAKE_LIMEX_TRAITS(512, 5)
MAKE_LIMEX_TRAITS(512, 6)
MAKE_LIMEX_TRAITS(512, 7)
MAKE_LIMEX_TRAITS(32)
MAKE_LIMEX_TRAITS(128)
MAKE_LIMEX_TRAITS(256)
MAKE_LIMEX_TRAITS(384)
MAKE_LIMEX_TRAITS(512)
} // namespace
@ -2133,20 +2178,18 @@ aligned_unique_ptr<NFA> generate(NGHolder &h,
// Acceleration analysis.
fillAccelInfo(arg);
typedef pair<int, NFAEngineType> EngineScore;
vector<EngineScore> scores;
vector<pair<int, NFAEngineType>> scores;
if (hint != INVALID_NFA) {
// The caller has told us what to (attempt to) build.
scores.push_back(make_pair(0, (NFAEngineType)hint));
scores.emplace_back(0, (NFAEngineType)hint);
} else {
for (size_t i = 0; i <= LAST_LIMEX_NFA; i++) {
NFAEngineType ntype = (NFAEngineType)i;
int score = DISPATCH_BY_LIMEX_TYPE(ntype, scoreNfa, arg);
if (score >= 0) {
DEBUG_PRINTF("%s scores %d\n", nfa_type_name(ntype), score);
scores.push_back(make_pair(score, ntype));
scores.emplace_back(score, ntype);
}
}
}
@ -2156,22 +2199,22 @@ aligned_unique_ptr<NFA> generate(NGHolder &h,
return nullptr;
}
sort(scores.begin(), scores.end(), greater<EngineScore>());
// Sort acceptable models in priority order, lowest score first.
sort(scores.begin(), scores.end());
aligned_unique_ptr<NFA> nfa;
for (auto i = scores.begin(); !nfa && i != scores.end(); ++i) {
assert(i->first >= 0);
nfa = DISPATCH_BY_LIMEX_TYPE(i->second, generateNfa, arg);
for (const auto &elem : scores) {
assert(elem.first >= 0);
NFAEngineType limex_model = elem.second;
auto nfa = DISPATCH_BY_LIMEX_TYPE(limex_model, generateNfa, arg);
if (nfa) {
DEBUG_PRINTF("successful build with NFA engine: %s\n",
nfa_type_name(limex_model));
return nfa;
}
}
if (!nfa) {
DEBUG_PRINTF("NFA build failed.\n");
return nullptr;
}
DEBUG_PRINTF("successful build with NFA engine: %s\n",
nfa_type_name((NFAEngineType)nfa->type));
return nfa;
DEBUG_PRINTF("NFA build failed.\n");
return nullptr;
}
u32 countAccelStates(NGHolder &h,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -80,6 +80,23 @@ void dumpMask(FILE *f, const char *name, const u8 *mask, u32 mask_bits) {
fprintf(f, "MSK %-20s %s\n", name, dumpMask(mask, mask_bits).c_str());
}
template<typename mask_t>
static
u32 rank_in_mask(mask_t mask, u32 bit) {
assert(bit < 8 * sizeof(mask));
u32 chunks[sizeof(mask)/sizeof(u32)];
memcpy(chunks, &mask, sizeof(mask));
u32 base_rank = 0;
for (u32 i = 0; i < bit / 32; i++) {
base_rank += popcount32(chunks[i]);
}
u32 chunk = chunks[bit / 32];
u32 local_bit = bit % 32;
assert(chunk & (1U << local_bit));
return base_rank + popcount32(chunk & ((1U << local_bit) - 1));
}
template <typename limex_type>
static
void dumpRepeats(const limex_type *limex, u32 model_size, FILE *f) {
@ -244,6 +261,16 @@ void dumpLimexExceptions(const limex_type *limex, FILE *f) {
}
}
template<typename limex_type>
static
void dumpLimexShifts(const limex_type *limex, FILE *f) {
u32 size = limex_traits<limex_type>::size;
fprintf(f, "Shift Masks:\n");
for(u32 i = 0; i < limex->shiftCount; i++) {
fprintf(f, "\t Shift %u(%hhu)\t\tMask: %s\n", i, limex->shiftAmount[i],
dumpMask((const u8 *)&limex->shift[i], size).c_str());
}
}
template<typename limex_type>
static
void dumpLimexText(const limex_type *limex, FILE *f) {
@ -270,6 +297,9 @@ void dumpLimexText(const limex_type *limex, FILE *f) {
topMask += size / 8;
}
// Dump shift masks
dumpLimexShifts(limex, f);
dumpSquash(limex, f);
dumpLimexReachMap(limex->reachMap, f);
@ -325,7 +355,7 @@ struct limex_labeller : public nfa_labeller {
return;
}
u32 ex_index = limex->exceptionMap[state];
u32 ex_index = rank_in_mask(limex->exceptionMask, state);
const typename limex_traits<limex_type>::exception_type *e
= &exceptions[ex_index];
@ -396,7 +426,7 @@ void dumpExDotInfo(const limex_type *limex, u32 state, FILE *f) {
const typename limex_traits<limex_type>::exception_type *exceptions
= getExceptionTable(limex);
u32 ex_index = limex->exceptionMap[state];
u32 ex_index = rank_in_mask(limex->exceptionMask, state);
const typename limex_traits<limex_type>::exception_type *e
= &exceptions[ex_index];
@ -420,78 +450,45 @@ void dumpExDotInfo(const limex_type *limex, u32 state, FILE *f) {
template<typename limex_type>
static
void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
for (u32 j = 0; j < MAX_MAX_SHIFT; j++) {
for (u32 j = 0; j < limex->shiftCount; j++) {
const u32 shift_amount = limex->shiftAmount[j];
if (testbit((const u8 *)&limex->shift[j],
limex_traits<limex_type>::size, state)) {
fprintf(f, "%u -> %u;\n", state, state + j);
fprintf(f, "%u -> %u;\n", state, state + shift_amount);
}
}
}
#define DUMP_TEXT_FN(ddf_u, ddf_n, ddf_s) \
void nfaExecLimEx##ddf_n##_##ddf_s##_dumpText(const NFA *nfa, FILE *f) { \
#define DUMP_TEXT_FN(ddf_n) \
void nfaExecLimEx##ddf_n##_dumpText(const NFA *nfa, FILE *f) { \
dumpLimexText((const LimExNFA##ddf_n *)getImplNfa(nfa), f); \
}
#define DUMP_DOT_FN(ddf_u, ddf_n, ddf_s) \
void nfaExecLimEx##ddf_n##_##ddf_s##_dumpDot(const NFA *nfa, FILE *f) { \
#define DUMP_DOT_FN(ddf_n) \
void nfaExecLimEx##ddf_n##_dumpDot(const NFA *nfa, FILE *f, \
UNUSED const string &base) { \
const LimExNFA##ddf_n *limex = \
(const LimExNFA##ddf_n *)getImplNfa(nfa); \
\
dumpDotPreamble(f); \
u32 state_count = nfa->nPositions; \
u32 state_count = nfa->nPositions; \
dumpVertexDotInfo(limex, state_count, f, \
limex_labeller<LimExNFA##ddf_n>(limex)); \
for (u32 i = 0; i < state_count; i++) { \
dumpLimDotInfo(limex, i, f); \
dumpExDotInfo(limex, i, f); \
} \
\
dumpDotTrailer(f); \
}
#define LIMEX_DUMP_FNS(ntype, size, shifts) \
DUMP_TEXT_FN(ntype, size, shifts) \
DUMP_DOT_FN(ntype, size, shifts)
#define LIMEX_DUMP_FNS(size) \
DUMP_TEXT_FN(size) \
DUMP_DOT_FN(size)
LIMEX_DUMP_FNS(u32, 32, 1)
LIMEX_DUMP_FNS(u32, 32, 2)
LIMEX_DUMP_FNS(u32, 32, 3)
LIMEX_DUMP_FNS(u32, 32, 4)
LIMEX_DUMP_FNS(u32, 32, 5)
LIMEX_DUMP_FNS(u32, 32, 6)
LIMEX_DUMP_FNS(u32, 32, 7)
LIMEX_DUMP_FNS(m128, 128, 1)
LIMEX_DUMP_FNS(m128, 128, 2)
LIMEX_DUMP_FNS(m128, 128, 3)
LIMEX_DUMP_FNS(m128, 128, 4)
LIMEX_DUMP_FNS(m128, 128, 5)
LIMEX_DUMP_FNS(m128, 128, 6)
LIMEX_DUMP_FNS(m128, 128, 7)
LIMEX_DUMP_FNS(m256, 256, 1)
LIMEX_DUMP_FNS(m256, 256, 2)
LIMEX_DUMP_FNS(m256, 256, 3)
LIMEX_DUMP_FNS(m256, 256, 4)
LIMEX_DUMP_FNS(m256, 256, 5)
LIMEX_DUMP_FNS(m256, 256, 6)
LIMEX_DUMP_FNS(m256, 256, 7)
LIMEX_DUMP_FNS(m384, 384, 1)
LIMEX_DUMP_FNS(m384, 384, 2)
LIMEX_DUMP_FNS(m384, 384, 3)
LIMEX_DUMP_FNS(m384, 384, 4)
LIMEX_DUMP_FNS(m384, 384, 5)
LIMEX_DUMP_FNS(m384, 384, 6)
LIMEX_DUMP_FNS(m384, 384, 7)
LIMEX_DUMP_FNS(m512, 512, 1)
LIMEX_DUMP_FNS(m512, 512, 2)
LIMEX_DUMP_FNS(m512, 512, 3)
LIMEX_DUMP_FNS(m512, 512, 4)
LIMEX_DUMP_FNS(m512, 512, 5)
LIMEX_DUMP_FNS(m512, 512, 6)
LIMEX_DUMP_FNS(m512, 512, 7)
LIMEX_DUMP_FNS(32)
LIMEX_DUMP_FNS(128)
LIMEX_DUMP_FNS(256)
LIMEX_DUMP_FNS(384)
LIMEX_DUMP_FNS(512)
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -79,9 +79,13 @@
#ifdef ARCH_64_BIT
#define CHUNK_T u64a
#define FIND_AND_CLEAR_FN findAndClearLSB_64
#define POPCOUNT_FN popcount64
#define RANK_IN_MASK_FN rank_in_mask64
#else
#define CHUNK_T u32
#define FIND_AND_CLEAR_FN findAndClearLSB_32
#define POPCOUNT_FN popcount32
#define RANK_IN_MASK_FN rank_in_mask32
#endif
/** \brief Process a single exception. Returns 1 if exception handling should
@ -206,13 +210,13 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
#ifndef RUN_EXCEPTION_FN_ONLY
/** \brief Process all of the exceptions associated with the states in the \a estate. */
/** \brief Process all of the exceptions associated with the states in the \a
* estate. */
static really_inline
int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
const struct IMPL_NFA_T *limex,
const u32 *exceptionMap, const EXCEPTION_T *exceptions,
const ReportID *exReports,
u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) {
const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
const ReportID *exReports, u64a offset, struct CONTEXT_T *ctx,
char in_rev, char flags) {
assert(diffmask > 0); // guaranteed by caller macro
if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) {
@ -237,15 +241,23 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
// A copy of the estate as an array of GPR-sized chunks.
CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
#ifdef ESTATE_ON_STACK
memcpy(chunks, &estate, sizeof(STATE_T));
#else
memcpy(chunks, estatep, sizeof(STATE_T));
#endif
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
struct proto_cache new_cache = {0, NULL};
enum CacheResult cacheable = CACHE_RESULT;
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
base_index[0] = 0;
for (u32 i = 0; i < ARRAY_LENGTH(base_index) - 1; i++) {
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
}
do {
u32 t = findAndClearLSB_32(&diffmask);
#ifdef ARCH_64_BIT
@ -254,10 +266,10 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ,
assert(t < ARRAY_LENGTH(chunks));
CHUNK_T word = chunks[t];
assert(word != 0);
u32 base = t * sizeof(CHUNK_T) * 8;
do {
u32 bit = FIND_AND_CLEAR_FN(&word) + base;
u32 idx = exceptionMap[bit];
u32 bit = FIND_AND_CLEAR_FN(&word);
u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit);
u32 idx = local_index + base_index[t];
const EXCEPTION_T *e = &exceptions[idx];
if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -68,6 +68,9 @@
The value of NFA.stateSize gives the total state size in bytes (the sum of
all the above).
Number of shifts should be always greater or equal to 1
Number of shifts 0 means that no appropriate NFA engine was found.
*/
#ifndef LIMEX_INTERNAL_H
@ -77,7 +80,8 @@
#include "repeat_internal.h"
// Constants
#define MAX_MAX_SHIFT 8 /**< largest maxshift used by a LimEx NFA */
#define MAX_SHIFT_COUNT 8 /**< largest number of shifts used by a LimEx NFA */
#define MAX_SHIFT_AMOUNT 16 /**< largest shift amount used by a LimEx NFA */
#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
@ -95,24 +99,6 @@ enum LimExSquash {
LIMEX_SQUASH_REPORT = 3 //!< squash when report is raised
};
struct LimExNFABase {
u8 reachMap[N_CHARS];
u32 reachSize;
u32 accelCount;
u32 accelTableOffset;
u32 accelAuxCount;
u32 accelAuxOffset;
u32 acceptCount;
u32 acceptOffset;
u32 acceptEodCount;
u32 acceptEodOffset;
u32 exceptionCount;
u32 exceptionOffset;
u32 exReportOffset;
u32 repeatCount;
u32 repeatOffset;
};
/* uniform looking types for the macros */
typedef u8 u_8;
typedef u16 u_16;
@ -133,7 +119,7 @@ struct NFAException##size { \
u8 trigger; /**< from enum LimExTrigger */ \
}; \
\
struct LimExNFA##size { /* MUST align with LimExNFABase */ \
struct LimExNFA##size { \
u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */ \
u32 reachSize; /**< number of reach masks */ \
u32 accelCount; /**< number of entries in accel table */ \
@ -149,7 +135,6 @@ struct LimExNFA##size { /* MUST align with LimExNFABase */ \
u32 exReportOffset; /* rel. to start of LimExNFA */ \
u32 repeatCount; \
u32 repeatOffset; \
u32 exceptionMap[size]; \
u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \
u32 squashCount; \
u32 topCount; \
@ -168,8 +153,10 @@ struct LimExNFA##size { /* MUST align with LimExNFABase */ \
u_##size compressMask; /**< switch off before compress */ \
u_##size exceptionMask; \
u_##size repeatCyclicMask; \
u_##size shift[MAX_MAX_SHIFT]; \
u_##size zombieMask; /**< zombie if in any of the set states */ \
u_##size shift[MAX_SHIFT_COUNT]; \
u32 shiftCount; /**< number of shift masks used */ \
u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \
};
CREATE_NFA_LIMEX(32)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -74,7 +74,6 @@
static really_inline
int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
const struct LimExNFA32 *limex,
const u32 *exceptionMap,
const struct NFAException32 *exceptions,
const ReportID *exReports, u64a offset,
struct NFAContext32 *ctx, char in_rev, char flags) {
@ -104,7 +103,7 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
do {
u32 bit = findAndClearLSB_32(&estate);
u32 idx = exceptionMap[bit];
u32 idx = rank_in_mask32(limex->exceptionMask, bit);
const struct NFAException32 *e = &exceptions[idx];
if (!runException32(e, s, succ, &local_succ, limex, exReports, offset,
ctx, &new_cache, &cacheable, in_rev, flags)) {
@ -132,35 +131,4 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
#define SIZE 32
#define STATE_T u32
#define SHIFT 1
#include "limex_runtime_impl.h"
#define SIZE 32
#define STATE_T u32
#define SHIFT 2
#include "limex_runtime_impl.h"
#define SIZE 32
#define STATE_T u32
#define SHIFT 3
#include "limex_runtime_impl.h"
#define SIZE 32
#define STATE_T u32
#define SHIFT 4
#include "limex_runtime_impl.h"
#define SIZE 32
#define STATE_T u32
#define SHIFT 5
#include "limex_runtime_impl.h"
#define SIZE 32
#define STATE_T u32
#define SHIFT 6
#include "limex_runtime_impl.h"
#define SIZE 32
#define STATE_T u32
#define SHIFT 7
#include "limex_runtime_impl.h"

View File

@ -73,34 +73,35 @@ struct proto_cache {
};
// Shift macros for Limited NFAs. Defined in terms of uniform ops.
// LimExNFAxxx ptr in 'limex' and the current state in 's'
#define NFA_EXEC_LIM_SHIFT(nels_type, nels_i) \
(JOIN(shift_, nels_type)( \
(JOIN(lshift_, nels_type)( \
JOIN(and_, nels_type)(s, \
JOIN(load_, nels_type)(&limex->shift[nels_i])), \
nels_i))
limex->shiftAmount[nels_i]))
// Calculate the (limited model) successors for a given max shift. Assumes
// LimExNFAxxx ptr in 'l', current state in 's' and successors in 'succ'.
// Calculate the (limited model) successors for a number of variable shifts.
// Assumes current state in 's' and successors in 'succ'.
#define NFA_EXEC_GET_LIM_SUCC(gls_type, gls_shift) \
#define NFA_EXEC_GET_LIM_SUCC(gls_type) \
do { \
succ = \
JOIN(and_, gls_type)(s, JOIN(load_, gls_type)(&limex->shift[0])); \
switch (gls_shift) { \
case 7: \
succ = NFA_EXEC_LIM_SHIFT(gls_type, 0); \
switch (limex->shiftCount) { \
case 8: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 7)); \
case 6: \
case 7: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 6)); \
case 5: \
case 6: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 5)); \
case 4: \
case 5: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 4)); \
case 3: \
case 4: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 3)); \
case 2: \
case 3: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 2)); \
case 1: \
case 2: \
succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 1)); \
case 1: \
case 0: \
; \
} \
@ -129,7 +130,7 @@ int limexRunReports(const ReportID *reports, NfaCallback callback,
for (; *reports != MO_INVALID_IDX; ++reports) {
DEBUG_PRINTF("firing report for id %u at offset %llu\n",
*reports, offset);
int rv = callback(offset, *reports, context);
int rv = callback(0, offset, *reports, context);
if (rv == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}

View File

@ -37,11 +37,11 @@
* Version 2.0: now with X-Macros, so you get line numbers in your debugger.
*/
#if !defined(SIZE) || !defined(STATE_T) || !defined(SHIFT)
# error Must define SIZE and STATE_T and SHIFT in includer.
#if !defined(SIZE) || !defined(STATE_T)
# error Must define SIZE and STATE_T in includer.
#endif
#define LIMEX_API_ROOT JOIN(JOIN(JOIN(nfaExecLimEx, SIZE), _), SHIFT)
#define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE)
#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
@ -73,6 +73,7 @@
#define ANDNOT_STATE JOIN(andnot_, STATE_T)
#define OR_STATE JOIN(or_, STATE_T)
#define TESTBIT_STATE JOIN(testbit_, STATE_T)
#define CLEARBIT_STATE JOIN(clearbit_, STATE_T)
#define ZERO_STATE JOIN(zero_, STATE_T)
#define ISNONZERO_STATE JOIN(isNonZero_, STATE_T)
#define ISZERO_STATE JOIN(isZero_, STATE_T)
@ -104,8 +105,8 @@
// continue, 1 if an accept was fired and the user instructed us to halt.
static really_inline
char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
const ReportID *exReports, const u32 *exceptionMap,
STATE_T s, const STATE_T emask, size_t i, u64a offset,
const ReportID *exReports, STATE_T s,
const STATE_T emask, size_t i, u64a offset,
STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx,
const char flags, const char in_rev,
const char first_match) {
@ -132,8 +133,8 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags;
int rv = JOIN(processExceptional, SIZE)(
pass_state, pass_estate, diffmask, succ, limex, exceptionMap,
exceptions, exReports, callback_offset, ctx, in_rev, localflags);
pass_state, pass_estate, diffmask, succ, limex, exceptions, exReports,
callback_offset, ctx, in_rev, localflags);
if (rv == PE_RV_HALT) {
return 1; // Halt matching.
}
@ -175,7 +176,6 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
(const union AccelAux *)((const char *)limex + limex->accelAuxOffset);
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
const ReportID *exReports = getExReports(limex);
const u32 *exceptionMap = limex->exceptionMap;
STATE_T s = LOAD_STATE(&ctx->s);
/* assert(ISALIGNED_16(exceptions)); */
@ -201,11 +201,11 @@ without_accel:
u8 c = input[i];
STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT);
NFA_EXEC_GET_LIM_SUCC(STATE_T);
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s,
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx,
flags, 0, first_match)) {
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK,
i, offset, &succ, final_loc, ctx, flags, 0,
first_match)) {
return MO_HALT_MATCHING;
}
@ -252,11 +252,11 @@ with_accel:
u8 c = input[i];
STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT);
NFA_EXEC_GET_LIM_SUCC(STATE_T);
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s,
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx,
flags, 0, first_match)) {
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK,
i, offset, &succ, final_loc, ctx, flags, 0,
first_match)) {
return MO_HALT_MATCHING;
}
@ -300,7 +300,6 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
#endif
const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
const ReportID *exReports = getExReports(limex);
const u32 *exceptionMap = limex->exceptionMap;
STATE_T s = LOAD_STATE(&ctx->s);
/* assert(ISALIGNED_16(exceptions)); */
@ -318,9 +317,9 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
u8 c = input[i-1];
STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT);
NFA_EXEC_GET_LIM_SUCC(STATE_T);
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s,
if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s,
EXCEPTION_MASK, i, offset, &succ, final_loc, ctx,
flags, 1, 0)) {
return MO_HALT_MATCHING;
@ -349,36 +348,57 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
}
static really_inline
void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src,
u64a offset) {
if (!limex->repeatCount) {
return;
}
// Note: we compress all repeats, as they may have *just* had their
// cyclic states switched off a moment ago. TODO: is this required
STATE_T s = LOAD_STATE(src);
if (ISZERO_STATE(AND_STATE(s, LOAD_STATE(&limex->repeatCyclicMask)))) {
DEBUG_PRINTF("no cyclics are on\n");
return;
}
const union RepeatControl *ctrl =
getRepeatControlBaseConst((const char *)src, sizeof(STATE_T));
char *state_base = (char *)dest + limex->stateSize;
for (u32 i = 0; i < limex->repeatCount; i++) {
DEBUG_PRINTF("repeat %u\n", i);
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
if (!TESTBIT_STATE(&s, info->cyclicState)) {
DEBUG_PRINTF("is dead\n");
continue;
}
const struct RepeatInfo *repeat = getRepeatInfo(info);
if (repeatHasMatch(repeat, &ctrl[i], state_base + info->stateOffset,
offset) == REPEAT_STALE) {
DEBUG_PRINTF("is stale, clearing state\n");
CLEARBIT_STATE(&s, info->cyclicState);
continue;
}
DEBUG_PRINTF("packing state (packedCtrlOffset=%u)\n",
info->packedCtrlOffset);
repeatPack(state_base + info->packedCtrlOffset, repeat, &ctrl[i],
offset);
}
STORE_STATE(src, s);
}
char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n,
const struct mq *q,
s64a loc) {
const struct mq *q, s64a loc) {
void *dest = q->streamState;
const void *src = q->state;
void *src = q->state;
u8 key = queue_prev_byte(q, loc);
const IMPL_NFA_T *limex = getImplNfa(n);
COMPRESS_FN(limex, dest, src, key);
COMPRESS_REPEATS_FN(limex, dest, src, q->offset + loc);
COMPRESS_FN(limex, dest, src, key);
return 0;
}
@ -389,15 +409,29 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
return;
}
// Note: we expand all repeats, as they may have *just* had their
// cyclic states switched off a moment ago. TODO: is this required?
// Note: state has already been expanded into 'dest'.
const STATE_T cyclics =
AND_STATE(LOAD_STATE(dest), LOAD_STATE(&limex->repeatCyclicMask));
if (ISZERO_STATE(cyclics)) {
DEBUG_PRINTF("no cyclics are on\n");
return;
}
union RepeatControl *ctrl =
getRepeatControlBase((char *)dest, sizeof(STATE_T));
const char *state_base = (const char *)src + limex->stateSize;
for (u32 i = 0; i < limex->repeatCount; i++) {
DEBUG_PRINTF("repeat %u\n", i);
const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
if (!TESTBIT_STATE(&cyclics, info->cyclicState)) {
DEBUG_PRINTF("is dead\n");
continue;
}
DEBUG_PRINTF("unpacking state (packedCtrlOffset=%u)\n",
info->packedCtrlOffset);
const struct RepeatInfo *repeat = getRepeatInfo(info);
repeatUnpack(state_base + info->packedCtrlOffset, repeat, offset,
&ctrl[i]);
@ -650,7 +684,27 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
ep = MIN(ep, end_abs);
assert(ep >= sp);
assert(sp >= offset); // We no longer do history buffer scans here.
if (sp < offset) {
DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
assert(offset - sp <= q->hlength);
u64a local_ep = MIN(offset, ep);
u64a final_look = 0;
/* we are starting inside the history buffer */
if (STREAMFIRST_FN(limex, q->history + q->hlength + sp - offset,
local_ep - sp, &ctx, sp,
&final_look) == MO_HALT_MATCHING) {
DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu "
"offset:%llu\n", final_look, sp, end_abs, offset);
assert(q->cur);
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = sp + final_look - offset;
STORE_STATE(q->state, LOAD_STATE(&ctx.s));
return MO_MATCHES_PENDING;
}
sp = local_ep;
}
if (sp >= ep) {
goto scan_done;
@ -789,10 +843,8 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
}
char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
const char *streamState, u64a offset,
NfaCallback callback,
UNUSED SomNfaCallback som_callback,
void *context) {
const char *streamState, u64a offset,
NfaCallback callback, void *context) {
assert(n && state);
const IMPL_NFA_T *limex = getImplNfa(n);
@ -868,6 +920,21 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa,
offset, report);
}
char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
assert(nfa && q);
assert(q->state && q->streamState);
const IMPL_NFA_T *limex = getImplNfa(nfa);
union RepeatControl *repeat_ctrl =
getRepeatControlBase(q->state, sizeof(STATE_T));
char *repeat_state = q->streamState + limex->stateSize;
STATE_T state = LOAD_STATE(q->state);
u64a offset = q->offset + q_last_loc(q) + 1;
return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state,
offset);
}
enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
const struct NFA *nfa,
struct mq *q,
@ -920,6 +987,7 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
#undef ANDNOT_STATE
#undef OR_STATE
#undef TESTBIT_STATE
#undef CLEARBIT_STATE
#undef ZERO_STATE
#undef ISNONZERO_STATE
#undef ISZERO_STATE
@ -935,5 +1003,4 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
// Parameters.
#undef SIZE
#undef STATE_T
#undef SHIFT
#undef LIMEX_API_ROOT

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -34,20 +34,19 @@
* be faster and actually correct if these assumptions don't hold true.
*/
#ifndef SHUFFLE_H
#define SHUFFLE_H
#ifndef LIMEX_SHUFFLE_H
#define LIMEX_SHUFFLE_H
#include "config.h"
#include "bitutils.h"
#include "simd_utils.h"
#include "ue2common.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__))
#define HAVE_PEXT
#endif
static really_inline
u32 shuffleDynamic32(u32 x, u32 mask) {
u32 packedExtract32(u32 x, u32 mask) {
#if defined(HAVE_PEXT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u32(x, mask);
@ -67,7 +66,7 @@ u32 shuffleDynamic32(u32 x, u32 mask) {
}
static really_inline
u32 shuffleDynamic64(u64a x, u64a mask) {
u32 packedExtract64(u64a x, u64a mask) {
#if defined(HAVE_PEXT) && defined(ARCH_64_BIT)
// Intel BMI2 can do this operation in one instruction.
return _pext_u64(x, mask);
@ -88,4 +87,24 @@ u32 shuffleDynamic64(u64a x, u64a mask) {
#undef HAVE_PEXT
#endif // SHUFFLE_H
static really_inline
u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
m128 shuffled = pshufb(s, permute);
m128 compared = and128(shuffled, compare);
u16 rv = ~movemask128(eq128(compared, shuffled));
return (u32)rv;
}
#if defined(__AVX2__)
static really_inline
u32 packedExtract256(m256 s, const m256 permute, const m256 compare) {
// vpshufb doesn't cross lanes, so this is a bit of a cheat
m256 shuffled = vpshufb(s, permute);
m256 compared = and256(shuffled, compare);
u32 rv = ~movemask256(eq256(compared, shuffled));
// stitch the lane-wise results back together
return (u32)((rv >> 16) | (rv & 0xffffU));
}
#endif // AVX2
#endif // LIMEX_SHUFFLE_H

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -61,37 +61,6 @@
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#define SIZE 128
#define STATE_T m128
#define SHIFT 1
#include "limex_runtime_impl.h"
#define SIZE 128
#define STATE_T m128
#define SHIFT 2
#include "limex_runtime_impl.h"
#define SIZE 128
#define STATE_T m128
#define SHIFT 3
#include "limex_runtime_impl.h"
#define SIZE 128
#define STATE_T m128
#define SHIFT 4
#include "limex_runtime_impl.h"
#define SIZE 128
#define STATE_T m128
#define SHIFT 5
#include "limex_runtime_impl.h"
#define SIZE 128
#define STATE_T m128
#define SHIFT 6
#include "limex_runtime_impl.h"
#define SIZE 128
#define STATE_T m128
#define SHIFT 7
#define SIZE 128
#define STATE_T m128
#include "limex_runtime_impl.h"

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -58,37 +58,6 @@
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#define SIZE 256
#define STATE_T m256
#define SHIFT 1
#include "limex_runtime_impl.h"
#define SIZE 256
#define STATE_T m256
#define SHIFT 2
#include "limex_runtime_impl.h"
#define SIZE 256
#define STATE_T m256
#define SHIFT 3
#include "limex_runtime_impl.h"
#define SIZE 256
#define STATE_T m256
#define SHIFT 4
#include "limex_runtime_impl.h"
#define SIZE 256
#define STATE_T m256
#define SHIFT 5
#include "limex_runtime_impl.h"
#define SIZE 256
#define STATE_T m256
#define SHIFT 6
#include "limex_runtime_impl.h"
#define SIZE 256
#define STATE_T m256
#define SHIFT 7
#define SIZE 256
#define STATE_T m256
#include "limex_runtime_impl.h"

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -58,37 +58,6 @@
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#define SIZE 384
#define STATE_T m384
#define SHIFT 1
#include "limex_runtime_impl.h"
#define SIZE 384
#define STATE_T m384
#define SHIFT 2
#include "limex_runtime_impl.h"
#define SIZE 384
#define STATE_T m384
#define SHIFT 3
#include "limex_runtime_impl.h"
#define SIZE 384
#define STATE_T m384
#define SHIFT 4
#include "limex_runtime_impl.h"
#define SIZE 384
#define STATE_T m384
#define SHIFT 5
#include "limex_runtime_impl.h"
#define SIZE 384
#define STATE_T m384
#define SHIFT 6
#include "limex_runtime_impl.h"
#define SIZE 384
#define STATE_T m384
#define SHIFT 7
#define SIZE 384
#define STATE_T m384
#include "limex_runtime_impl.h"

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -58,12 +58,6 @@
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
#define SIZE 512
#define STATE_T m512
#define SHIFT 4
#include "limex_runtime_impl.h"
#define SIZE 512
#define STATE_T m512
#define SHIFT 5
#define SIZE 512
#define STATE_T m512
#include "limex_runtime_impl.h"

View File

@ -42,13 +42,13 @@
static really_inline
char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
u16 s, u64a loc, char eod, u16 * const cached_accept_state,
u32 * const cached_accept_id) {
u16 s, u64a loc, char eod, u16 *const cached_accept_state,
u32 *const cached_accept_id) {
DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n",
(u16)(s & STATE_MASK), loc, eod);
if (!eod && s == *cached_accept_state) {
if (cb(loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
}
@ -71,7 +71,7 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
*cached_accept_id = rl->report[0];
DEBUG_PRINTF("reporting %u\n", rl->report[0]);
if (cb(loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
}
@ -80,7 +80,7 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
for (u32 i = 0; i < count; i++) {
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
if (cb(loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
}
}
@ -146,7 +146,7 @@ without_accel:
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
}
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
@ -186,7 +186,7 @@ with_accel:
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
}
} else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
@ -328,7 +328,7 @@ without_accel:
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
@ -360,7 +360,7 @@ with_accel:
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
@ -475,7 +475,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
int rv;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
rv = cb(q_cur_offset(q), m->arb_report, context);
rv = cb(0, q_cur_offset(q), m->arb_report, context);
} else {
u32 cached_accept_id = 0;
u16 cached_accept_state = 0;
@ -632,7 +632,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
int rv;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
rv = cb(q_cur_offset(q), m->arb_report, context);
rv = cb(0, q_cur_offset(q), m->arb_report, context);
} else {
u32 cached_accept_id = 0;
u16 cached_accept_state = 0;
@ -836,7 +836,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
if (s >= m->accept_limit_8) {
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
cb(offset, m->arb_report, ctxt);
cb(0, offset, m->arb_report, ctxt);
} else {
u32 cached_accept_id = 0;
u16 cached_accept_state = 0;
@ -850,7 +850,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
}
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
const struct mcclellan *m = getImplNfa(n);
NfaCallback cb = q->cb;
void *ctxt = q->context;
u16 s = *(u16 *)q->state;
@ -864,7 +864,7 @@ char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
if (aux->accept) {
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
cb(offset, m->arb_report, ctxt);
cb(0, offset, m->arb_report, ctxt);
} else {
u32 cached_accept_id = 0;
u16 cached_accept_state = 0;
@ -905,7 +905,7 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
struct mq *q) {
assert(n && q);
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
const struct mcclellan *m = getImplNfa(n);
u8 s = *(u8 *)q->state;
DEBUG_PRINTF("checking accepts for %hhu\n", s);
if (s < m->accept_limit_8) {
@ -915,25 +915,45 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
return mcclellanHasAccept(m, get_aux(m, s), report);
}
char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) {
assert(n && q);
const struct mcclellan *m = getImplNfa(n);
u8 s = *(u8 *)q->state;
DEBUG_PRINTF("checking accepts for %hhu\n", s);
assert(s < m->accept_limit_8 || get_aux(m, s)->accept);
return s >= m->accept_limit_8;
}
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
struct mq *q) {
assert(n && q);
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
const struct mcclellan *m = getImplNfa(n);
u16 s = *(u16 *)q->state;
DEBUG_PRINTF("checking accepts for %hu\n", s);
return mcclellanHasAccept(m, get_aux(m, s), report);
}
char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) {
assert(n && q);
const struct mcclellan *m = getImplNfa(n);
u16 s = *(u16 *)q->state;
DEBUG_PRINTF("checking accepts for %hu\n", s);
return !!get_aux(m, s)->accept;
}
char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) {
u64a offset = q->offset;
const u8 *buffer = q->buffer;
NfaCallback cb = q->cb;
void *context = q->context;
assert(n->type == MCCLELLAN_NFA_8);
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
const struct mcclellan *m = getImplNfa(n);
const u8 *hend = q->history + q->hlength;
return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
@ -947,7 +967,7 @@ char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end) {
NfaCallback cb = q->cb;
void *context = q->context;
assert(n->type == MCCLELLAN_NFA_16);
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
const struct mcclellan *m = getImplNfa(n);
const u8 *hend = q->history + q->hlength;
return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
@ -961,7 +981,7 @@ char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report) {
NfaCallback cb = q->cb;
void *context = q->context;
assert(n->type == MCCLELLAN_NFA_8);
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
const struct mcclellan *m = getImplNfa(n);
const u8 *hend = q->history + q->hlength;
char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
@ -980,7 +1000,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
NfaCallback cb = q->cb;
void *context = q->context;
assert(n->type == MCCLELLAN_NFA_16);
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
const struct mcclellan *m = getImplNfa(n);
const u8 *hend = q->history + q->hlength;
char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
@ -996,7 +1016,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
void *state, UNUSED u8 key) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
const struct mcclellan *m = getImplNfa(nfa);
u8 s = offset ? m->start_floating : m->start_anchored;
if (s) {
*(u8 *)state = s;
@ -1007,7 +1027,7 @@ char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
void *state, UNUSED u8 key) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
const struct mcclellan *m = getImplNfa(nfa);
u16 s = offset ? m->start_floating : m->start_anchored;
if (s) {
unaligned_store_u16(state, s);
@ -1019,7 +1039,7 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
const u8 *buf, char top, size_t start_off,
size_t len, NfaCallback cb, void *ctxt) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
const struct mcclellan *m = getImplNfa(nfa);
u8 s = top ? m->start_anchored : *(u8 *)state;
@ -1037,7 +1057,7 @@ void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
const u8 *buf, char top, size_t start_off,
size_t len, NfaCallback cb, void *ctxt) {
const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
const struct mcclellan *m = getImplNfa(nfa);
u16 s = top ? m->start_anchored : unaligned_load_u16(state);
@ -1053,17 +1073,15 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
}
char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState,
u64a offset, NfaCallback callback,
UNUSED SomNfaCallback som_cb, void *context) {
UNUSED const char *streamState, u64a offset,
NfaCallback callback, void *context) {
return mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback,
context);
}
char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState,
u64a offset, NfaCallback callback,
UNUSED SomNfaCallback som_cb, void *context) {
UNUSED const char *streamState, u64a offset,
NfaCallback callback, void *context) {
assert(ISALIGNED_N(state, 2));
return mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback,
context);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -39,14 +39,14 @@ struct NFA;
char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb,
void *context);
NfaCallback callback, void *context);
char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
@ -62,14 +62,14 @@ char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest,
char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb,
void *context);
NfaCallback callback, void *context);
char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);

View File

@ -32,7 +32,6 @@
#include "accelcompile.h"
#include "grey.h"
#include "mcclellan_internal.h"
#include "mcclellancompile_accel.h"
#include "mcclellancompile_util.h"
#include "nfa_internal.h"
#include "shufticompile.h"
@ -65,6 +64,17 @@
using namespace std;
using boost::adaptors::map_keys;
#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
/** Maximum tolerated number of escape character from an accel state.
* This is larger than nfa, as we don't have a budget and the nfa cheats on stop
* characters for sets of states */
#define ACCEL_DFA_MAX_STOP_CHAR 160
/** Maximum tolerated number of escape character from a sds accel state. Larger
* than normal states as accelerating sds is important. Matches NFA value */
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
namespace ue2 {
namespace /* anon */ {
@ -75,7 +85,7 @@ struct dstate_extra {
};
struct dfa_info {
dfa_build_strat &strat;
accel_dfa_build_strat &strat;
raw_dfa &raw;
vector<dstate> &states;
vector<dstate_extra> extra;
@ -85,7 +95,7 @@ struct dfa_info {
u8 getAlphaShift() const;
explicit dfa_info(dfa_build_strat &s)
explicit dfa_info(accel_dfa_build_strat &s)
: strat(s),
raw(s.get_raw()),
states(raw.states),
@ -128,13 +138,6 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) {
return aux;
}
static
bool double_byte_ok(const AccelScheme &info) {
return !info.double_byte.empty()
&& info.double_cr.count() < info.double_byte.size()
&& info.double_cr.count() <= 2 && !info.double_byte.empty();
}
static
void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
assert((size_t)succ_table % 2 == 0);
@ -190,120 +193,12 @@ u32 mcclellan_build_strat::max_allowed_offset_accel() const {
return ACCEL_DFA_MAX_OFFSET_DEPTH;
}
AccelScheme mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx)
const {
return find_mcclellan_escape_info(rdfa, this_idx,
max_allowed_offset_accel());
u32 mcclellan_build_strat::max_stop_char() const {
return ACCEL_DFA_MAX_STOP_CHAR;
}
/** builds acceleration schemes for states */
void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
const AccelScheme &info,
void *accel_out) {
AccelAux *accel = (AccelAux *)accel_out;
DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
info.double_offset);
accel->generic.offset = verify_u8(info.offset);
if (double_byte_ok(info) && info.double_cr.none()
&& info.double_byte.size() == 1) {
accel->accel_type = ACCEL_DVERM;
accel->dverm.c1 = info.double_byte.begin()->first;
accel->dverm.c2 = info.double_byte.begin()->second;
accel->dverm.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
return;
}
if (double_byte_ok(info) && info.double_cr.none()
&& (info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
bool ok = true;
assert(!info.double_byte.empty());
u8 firstC = info.double_byte.begin()->first & CASE_CLEAR;
u8 secondC = info.double_byte.begin()->second & CASE_CLEAR;
for (const pair<u8, u8> &p : info.double_byte) {
if ((p.first & CASE_CLEAR) != firstC
|| (p.second & CASE_CLEAR) != secondC) {
ok = false;
break;
}
}
if (ok) {
accel->accel_type = ACCEL_DVERM_NOCASE;
accel->dverm.c1 = firstC;
accel->dverm.c2 = secondC;
accel->dverm.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
return;
}
u8 m1;
u8 m2;
if (buildDvermMask(info.double_byte, &m1, &m2)) {
accel->accel_type = ACCEL_DVERM_MASKED;
accel->dverm.offset = verify_u8(info.double_offset);
accel->dverm.c1 = info.double_byte.begin()->first & m1;
accel->dverm.c2 = info.double_byte.begin()->second & m2;
accel->dverm.m1 = m1;
accel->dverm.m2 = m2;
DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
accel->dverm.c1, accel->dverm.c2);
return;
}
}
if (double_byte_ok(info)
&& shuftiBuildDoubleMasks(info.double_cr, info.double_byte,
&accel->dshufti.lo1, &accel->dshufti.hi1,
&accel->dshufti.lo2, &accel->dshufti.hi2)) {
accel->accel_type = ACCEL_DSHUFTI;
accel->dshufti.offset = verify_u8(info.double_offset);
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
return;
}
if (info.cr.none()) {
accel->accel_type = ACCEL_RED_TAPE;
DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
" from which there is no escape\n", this_idx);
return;
}
if (info.cr.count() == 1) {
accel->accel_type = ACCEL_VERM;
accel->verm.c = info.cr.find_first();
DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
return;
}
if (info.cr.count() == 2 && info.cr.isCaselessChar()) {
accel->accel_type = ACCEL_VERM_NOCASE;
accel->verm.c = info.cr.find_first() & CASE_CLEAR;
DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
return;
}
if (info.cr.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) {
accel->accel_type = ACCEL_NONE;
DEBUG_PRINTF("state %hu is too broad\n", this_idx);
return;
}
accel->accel_type = ACCEL_SHUFTI;
if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo,
&accel->shufti.hi)) {
DEBUG_PRINTF("state %hu is shufti\n", this_idx);
return;
}
assert(!info.cr.none());
accel->accel_type = ACCEL_TRUFFLE;
truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2);
DEBUG_PRINTF("state %hu is truffle\n", this_idx);
u32 mcclellan_build_strat::max_floating_stop_char() const {
return ACCEL_DFA_MAX_FLOATING_STOP_CHAR;
}
static
@ -343,15 +238,6 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
}
}
raw_dfa::~raw_dfa() {
}
raw_report_info::raw_report_info() {
}
raw_report_info::~raw_report_info() {
}
namespace {
struct raw_report_list {
@ -592,7 +478,7 @@ aligned_unique_ptr<NFA> mcclellanCompile16(dfa_info &info,
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
map<dstate_id_t, AccelScheme> accel_escape_info
= populateAccelerationInfo(info.raw, info.strat, cc.grey);
= info.strat.getAccelInfo(cc.grey);
size_t tran_size = (1 << info.getAlphaShift())
* sizeof(u16) * count_real_states;
@ -811,7 +697,7 @@ aligned_unique_ptr<NFA> mcclellanCompile8(dfa_info &info,
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
map<dstate_id_t, AccelScheme> accel_escape_info
= populateAccelerationInfo(info.raw, info.strat, cc.grey);
= info.strat.getAccelInfo(cc.grey);
size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size();
size_t aux_size = sizeof(mstate_aux) * info.size();
@ -1053,7 +939,7 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
return false;
}
aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat,
aligned_unique_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
const CompileContext &cc,
set<dstate_id_t> *accel_states) {
u16 total_daddy = 0;
@ -1123,12 +1009,9 @@ u32 mcclellanStartReachSize(const raw_dfa *raw) {
return out.count();
}
bool has_accel_dfa(const NFA *nfa) {
bool has_accel_mcclellan(const NFA *nfa) {
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
return m->has_accel;
}
dfa_build_strat::~dfa_build_strat() {
}
} // namespace ue2

View File

@ -29,6 +29,7 @@
#ifndef MCCLELLANCOMPILE_H
#define MCCLELLANCOMPILE_H
#include "accel_dfa_build_strat.h"
#include "rdfa.h"
#include "ue2common.h"
#include "util/accel_scheme.h"
@ -47,48 +48,20 @@ namespace ue2 {
class ReportManager;
struct CompileContext;
struct raw_report_info {
raw_report_info();
virtual ~raw_report_info();
virtual u32 getReportListSize() const = 0; /* in bytes */
virtual size_t size() const = 0; /* number of lists */
virtual void fillReportLists(NFA *n, size_t base_offset,
std::vector<u32> &ro /* out */) const = 0;
};
class dfa_build_strat {
public:
explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {}
virtual ~dfa_build_strat();
virtual raw_dfa &get_raw() const = 0;
virtual std::unique_ptr<raw_report_info> gatherReports(
std::vector<u32> &reports /* out */,
std::vector<u32> &reports_eod /* out */,
u8 *isSingleReport /* out */,
ReportID *arbReport /* out */) const = 0;
virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const = 0;
virtual size_t accelSize(void) const = 0;
virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
void *accel_out) = 0;
protected:
const ReportManager &rm;
};
class mcclellan_build_strat : public dfa_build_strat {
class mcclellan_build_strat : public accel_dfa_build_strat {
public:
mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in)
: dfa_build_strat(rm_in), rdfa(rdfa_in) {}
: accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {}
raw_dfa &get_raw() const override { return rdfa; }
std::unique_ptr<raw_report_info> gatherReports(
std::vector<u32> &reports /* out */,
std::vector<u32> &reports_eod /* out */,
u8 *isSingleReport /* out */,
ReportID *arbReport /* out */) const override;
AccelScheme find_escape_strings(dstate_id_t this_idx) const override;
size_t accelSize(void) const override;
void buildAccel(dstate_id_t this_idx,const AccelScheme &info,
void *accel_out) override;
virtual u32 max_allowed_offset_accel() const;
u32 max_allowed_offset_accel() const override;
u32 max_stop_char() const override;
u32 max_floating_stop_char() const override;
private:
raw_dfa &rdfa;
@ -103,7 +76,7 @@ mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
/* used internally by mcclellan/haig/gough compile process */
ue2::aligned_unique_ptr<NFA>
mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat,
mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
const CompileContext &cc,
std::set<dstate_id_t> *accel_states = nullptr);
@ -114,7 +87,7 @@ u32 mcclellanStartReachSize(const raw_dfa *raw);
std::set<ReportID> all_reports(const raw_dfa &rdfa);
bool has_accel_dfa(const NFA *nfa);
bool has_accel_mcclellan(const NFA *nfa);
} // namespace ue2

View File

@ -337,62 +337,35 @@ size_t hash_dfa(const raw_dfa &rdfa) {
}
static
bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
u16 top_remap = raw.alpha_remap[TOP];
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
if (i != top_remap && raw.states[s].next[i] == s) {
bool can_die_early(const raw_dfa &raw, dstate_id_t s,
map<dstate_id_t, u32> &visited, u32 age_limit) {
if (contains(visited, s) && visited[s] >= age_limit) {
/* we have already visited (or are in the process of visiting) here with
* a looser limit. */
return false;
}
visited[s] = age_limit;
if (s == DEAD_STATE) {
return true;
}
if (age_limit == 0) {
return false;
}
for (const auto &next : raw.states[s].next) {
if (can_die_early(raw, next, visited, age_limit - 1)) {
return true;
}
}
return false;
}
dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
if (raw.start_floating != DEAD_STATE) {
DEBUG_PRINTF("has floating start\n");
return raw.start_floating;
}
DEBUG_PRINTF("looking for SDS proxy\n");
dstate_id_t s = raw.start_anchored;
if (has_self_loop(s, raw)) {
return s;
}
u16 top_remap = raw.alpha_remap[TOP];
ue2::unordered_set<dstate_id_t> seen;
while (true) {
seen.insert(s);
DEBUG_PRINTF("basis %hu\n", s);
/* check if we are connected to a state with a self loop */
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
dstate_id_t t = raw.states[s].next[i];
if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
return t;
}
}
/* find a neighbour to use as a basis for looking for the sds proxy */
dstate_id_t t = DEAD_STATE;
for (u32 i = 0; i < raw.states[s].next.size(); i++) {
dstate_id_t tt = raw.states[s].next[i];
if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
t = tt;
break;
}
}
if (t == DEAD_STATE) {
/* we were unable to find a state to use as a SDS proxy */
return DEAD_STATE;
}
s = t;
}
bool can_die_early(const raw_dfa &raw, u32 age_limit) {
map<dstate_id_t, u32> visited;
return can_die_early(raw, raw.start_anchored, visited, age_limit);
}
} // namespace ue2

View File

@ -55,7 +55,7 @@ size_t hash_dfa_no_reports(const raw_dfa &rdfa);
/** \brief Compute a simple hash of this raw_dfa, including its reports. */
size_t hash_dfa(const raw_dfa &rdfa);
dstate_id_t get_sds_or_proxy(const raw_dfa &raw);
bool can_die_early(const raw_dfa &raw, u32 age_limit);
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -267,7 +267,8 @@ void dumpDotPreambleDfa(FILE *f) {
fprintf(f, "0 [style=invis];\n");
}
void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) {
void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f,
UNUSED const string &base) {
assert(nfa->type == MCCLELLAN_NFA_16);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
@ -286,7 +287,8 @@ void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) {
fprintf(f, "}\n");
}
void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f) {
void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f,
UNUSED const string &base) {
assert(nfa->type == MCCLELLAN_NFA_8);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -34,6 +34,7 @@
#include "rdfa.h"
#include <cstdio>
#include <string>
struct mcclellan;
struct mstate_aux;
@ -42,8 +43,10 @@ union AccelAux;
namespace ue2 {
void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file);
void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file);
void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecMcClellan8_dumpText(const struct NFA *nfa, FILE *file);
void nfaExecMcClellan16_dumpText(const struct NFA *nfa, FILE *file);

View File

@ -131,7 +131,8 @@ char processReports(const struct mpv *m, u8 *reporters,
rl_count++;
}
if (cb(report_offset, curr->report, ctxt) == MO_HALT_MATCHING) {
if (cb(0, report_offset, curr->report, ctxt) ==
MO_HALT_MATCHING) {
DEBUG_PRINTF("bailing\n");
return MO_HALT_MATCHING;
}
@ -180,7 +181,7 @@ char processReportsForRange(const struct mpv *m, u8 *reporters,
for (size_t i = 2; i <= length; i++) {
for (u32 j = 0; j < rl_count; j++) {
if (cb(first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) {
if (cb(0, first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) {
DEBUG_PRINTF("bailing\n");
return MO_HALT_MATCHING;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -36,7 +36,6 @@ struct NFA;
char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMpv0_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
@ -47,6 +46,7 @@ char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src,
#define nfaExecMpv0_testEOD NFA_API_NO_IMPL
#define nfaExecMpv0_inAccept NFA_API_NO_IMPL
#define nfaExecMpv0_inAnyAccept NFA_API_NO_IMPL
#define nfaExecMpv0_QR NFA_API_NO_IMPL
#define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
#define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL

View File

@ -48,7 +48,8 @@
namespace ue2 {
void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file) {
void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file,
UNUSED const std::string &base) {
}
static really_inline

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -32,12 +32,14 @@
#if defined(DUMP_SUPPORT)
#include <cstdio>
#include <string>
struct NFA;
namespace ue2 {
void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file);
void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecMpv0_dumpText(const struct NFA *nfa, FILE *file);
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -347,9 +347,9 @@ void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) {
}
}
MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, u32 off,
unsigned max_len) :
cr(ref_cr), offset(off), max_len(max_len) {
MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr,
u32 off, unsigned max_length)
: cr(ref_cr), offset(off), max_len(max_length) {
int accel_num = (int) MultibyteAccelInfo::MAT_MAX;
accels.resize(accel_num);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -31,7 +31,6 @@
#include "ue2common.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
static really_inline
const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -31,7 +31,6 @@
#include "ue2common.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
/* Normal SSSE3 shufti */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -32,7 +32,6 @@
#include "multitruffle.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
#include "multiaccel_common.h"

View File

@ -120,6 +120,16 @@ char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state,
*/
char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
/**
* Main execution function that doesn't perform the checks and optimisations of
* nfaQueueExec() and just dispatches directly to the nfa implementations. It is
* intended to be used by the Tamarama engine.
*/
char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end);
/** Return value indicating that the engine is dead. */
#define MO_DEAD 0
/** Return value indicating that the engine is alive. */
#define MO_ALIVE 1
@ -155,6 +165,13 @@ char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
*/
char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end);
/**
* Main execution function that doesn't perform the checks and optimisations of
* nfaQueueExecToMatch() and just dispatches directly to the nfa
* implementations. It is intended to be used by the Tamarama engine.
*/
char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end);
/**
* Report matches at the current queue location.
*
@ -175,10 +192,16 @@ char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q);
*/
char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
/**
* Returns non-zero if the NFA is in any accept state regardless of report
* ID.
*/
char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q);
/**
* Process the queued commands on the given NFA up to end or the first match.
*
* Note: This version is meant for rose prefix NFAs:
* Note: This version is meant for rose prefix/infix NFAs:
* - never uses a callback
* - loading of state at a point in history is not special cased
*
@ -187,9 +210,9 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
* end with some variant of end. The location field of the events must
* be monotonically increasing. If not all the data was processed during
* the call, the queue is updated to reflect the remaining work.
* @param report we are interested in, if set at the end of the scan returns
* @ref MO_MATCHES_PENDING. If no report is desired, MO_INVALID_IDX should
* be passed in.
* @param report we are interested in. If the given report will be raised at
* the end location, the function returns @ref MO_MATCHES_PENDING. If no
* match information is desired, MO_INVALID_IDX should be passed in.
* @return @ref MO_ALIVE if the nfa is still active with no matches pending,
* and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not
* alive
@ -205,6 +228,9 @@ char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID report);
* Runs an NFA in reverse from (buf + buflen) to buf and then from (hbuf + hlen)
* to hbuf (main buffer and history buffer).
*
* Note: provides the match location as the "end" offset when the callback is
* called.
*
* @param nfa engine to run
* @param offset base offset of buf
* @param buf main buffer
@ -229,7 +255,6 @@ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf,
* (including br region)
* @param offset the offset to return (via the callback) with each match
* @param callback the callback to call for each match raised
* @param som_cb the callback to call for each match raised (Haig)
* @param context context pointer passed to each callback
*
* @return @ref MO_HALT_MATCHING if the user instructed us to halt, otherwise
@ -237,8 +262,7 @@ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf,
*/
char nfaCheckFinalState(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb,
void *context);
NfaCallback callback, void *context);
/**
* Indicates if an engine is a zombie.

View File

@ -42,6 +42,8 @@
#include "limex.h"
#include "mcclellan.h"
#include "mpv.h"
#include "sheng.h"
#include "tamarama.h"
#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \
case dc_ltype##_NFA_##dc_subtype: \
@ -52,41 +54,11 @@
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \
switch (nfa->type) { \
DISPATCH_CASE(LIMEX, LimEx, 32_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \
DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \
DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \
DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \
@ -98,21 +70,22 @@
DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \
DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \
DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \
DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \
DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \
default: \
assert(0); \
}
char nfaCheckFinalState(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb,
void *context) {
NfaCallback callback, void *context) {
assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
// Caller should avoid calling us if we can never produce matches.
assert(nfaAcceptsEod(nfa));
DISPATCH_BY_NFA_TYPE(_testEOD(nfa, state, streamState, offset, callback,
som_cb, context));
context));
return 0;
}
@ -135,6 +108,14 @@ char nfaQueueExec2_i(const struct NFA *nfa, struct mq *q, s64a end) {
return 0;
}
char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end) {
return nfaQueueExec_i(nfa, q, end);
}
char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end) {
return nfaQueueExec2_i(nfa, q, end);
}
static really_inline
char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) {
DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report));
@ -258,7 +239,6 @@ char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end) {
assert(q);
assert(end >= 0);
assert(q->context);
assert(q->state);
assert(q->cur < q->end);
assert(q->end <= MAX_MQE_LEN);
@ -315,6 +295,11 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q) {
return 0;
}
char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q) {
DISPATCH_BY_NFA_TYPE(_inAnyAccept(nfa, q));
return 0;
}
char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) {
DEBUG_PRINTF("nfa=%p\n", nfa);
#ifdef DEBUG

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -91,12 +91,12 @@ struct mq {
* history buffer; (logically) immediately before the
* main buffer */
size_t hlength; /**< length of the history buffer */
struct hs_scratch *scratch; /**< global scratch space */
char report_current; /**<
* report_current matches at starting offset through
* callback. If true, the queue must be located at a
* point where MO_MATCHES_PENDING was returned */
NfaCallback cb; /**< callback to trigger on matches */
SomNfaCallback som_cb; /**< callback with som info; used by haig */
void *context; /**< context to pass along with a callback */
struct mq_item items[MAX_MQE_LEN]; /**< queue items */
};

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -30,6 +30,7 @@
#include "limex_internal.h"
#include "mcclellancompile.h"
#include "shengcompile.h"
#include "nfa_internal.h"
#include "repeat_internal.h"
#include "ue2common.h"
@ -78,7 +79,7 @@ struct DISPATCH_BY_NFA_TYPE_INT<sfunc, rv_t, arg_t, INVALID_NFA> {
decltype(arg), (NFAEngineType)0>::doOp(i, arg)
}
typedef bool (*has_accel_fn)(const NFA *nfa);
typedef bool (*nfa_dispatch_fn)(const NFA *nfa);
template<typename T>
static
@ -87,8 +88,37 @@ bool has_accel_limex(const NFA *nfa) {
return limex->accelCount;
}
template<typename T>
static
bool has_accel_generic(const NFA *) {
bool has_repeats_limex(const NFA *nfa) {
const T *limex = (const T *)getImplNfa(nfa);
return limex->repeatCount;
}
template<typename T>
static
bool has_repeats_other_than_firsts_limex(const NFA *nfa) {
const T *limex = (const T *)getImplNfa(nfa);
const char *ptr = (const char *)limex;
const u32 *repeatOffset = (const u32 *)(ptr + limex->repeatOffset);
for (u32 i = 0; i < limex->repeatCount; i++) {
u32 offset = repeatOffset[i];
const NFARepeatInfo *info = (const NFARepeatInfo *)(ptr + offset);
const RepeatInfo *repeat =
(const RepeatInfo *)((const char *)info + sizeof(*info));
if (repeat->type != REPEAT_FIRST) {
return true;
}
}
return false;
}
static
bool dispatch_false(const NFA *) {
return false;
}
@ -140,72 +170,53 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER};
#define DO_IF_DUMP_SUPPORT(a)
#endif
#define MAKE_LIMEX_TRAITS(mlt_size, mlt_shift) \
template<> struct NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift> { \
#define MAKE_LIMEX_TRAITS(mlt_size) \
template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \
static UNUSED const char *name; \
static const NFACategory category = NFA_LIMEX; \
typedef LimExNFA##mlt_size implNFA_t; \
typedef u_##mlt_size tableRow_t; \
static const has_accel_fn has_accel; \
static const nfa_dispatch_fn has_accel; \
static const nfa_dispatch_fn has_repeats; \
static const nfa_dispatch_fn has_repeats_other_than_firsts; \
static const u32 stateAlign = \
MAX(alignof(tableRow_t), alignof(RepeatControl)); \
static const bool fast = mlt_size <= 64; \
}; \
const has_accel_fn NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift>::has_accel \
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_accel \
= has_accel_limex<LimExNFA##mlt_size>; \
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_repeats \
= has_repeats_limex<LimExNFA##mlt_size>; \
const nfa_dispatch_fn \
NFATraits<LIMEX_NFA_##mlt_size>::has_repeats_other_than_firsts \
= has_repeats_other_than_firsts_limex<LimExNFA##mlt_size>; \
DO_IF_DUMP_SUPPORT( \
const char *NFATraits<LIMEX_NFA_##mlt_size##_##mlt_shift>::name \
= "LimEx (0-"#mlt_shift") "#mlt_size; \
template<> struct getDescription<LIMEX_NFA_##mlt_size##_##mlt_shift> { \
static string call(const void *ptr) { \
return getDescriptionLimEx<LIMEX_NFA_##mlt_size##_##mlt_shift>((const NFA *)ptr); \
const char *NFATraits<LIMEX_NFA_##mlt_size>::name \
= "LimEx "#mlt_size; \
template<> struct getDescription<LIMEX_NFA_##mlt_size> { \
static string call(const void *ptr) { \
return getDescriptionLimEx<LIMEX_NFA_##mlt_size>((const NFA *)ptr); \
} \
};)
MAKE_LIMEX_TRAITS(32, 1)
MAKE_LIMEX_TRAITS(32, 2)
MAKE_LIMEX_TRAITS(32, 3)
MAKE_LIMEX_TRAITS(32, 4)
MAKE_LIMEX_TRAITS(32, 5)
MAKE_LIMEX_TRAITS(32, 6)
MAKE_LIMEX_TRAITS(32, 7)
MAKE_LIMEX_TRAITS(128, 1)
MAKE_LIMEX_TRAITS(128, 2)
MAKE_LIMEX_TRAITS(128, 3)
MAKE_LIMEX_TRAITS(128, 4)
MAKE_LIMEX_TRAITS(128, 5)
MAKE_LIMEX_TRAITS(128, 6)
MAKE_LIMEX_TRAITS(128, 7)
MAKE_LIMEX_TRAITS(256, 1)
MAKE_LIMEX_TRAITS(256, 2)
MAKE_LIMEX_TRAITS(256, 3)
MAKE_LIMEX_TRAITS(256, 4)
MAKE_LIMEX_TRAITS(256, 5)
MAKE_LIMEX_TRAITS(256, 6)
MAKE_LIMEX_TRAITS(256, 7)
MAKE_LIMEX_TRAITS(384, 1)
MAKE_LIMEX_TRAITS(384, 2)
MAKE_LIMEX_TRAITS(384, 3)
MAKE_LIMEX_TRAITS(384, 4)
MAKE_LIMEX_TRAITS(384, 5)
MAKE_LIMEX_TRAITS(384, 6)
MAKE_LIMEX_TRAITS(384, 7)
MAKE_LIMEX_TRAITS(512, 1)
MAKE_LIMEX_TRAITS(512, 2)
MAKE_LIMEX_TRAITS(512, 3)
MAKE_LIMEX_TRAITS(512, 4)
MAKE_LIMEX_TRAITS(512, 5)
MAKE_LIMEX_TRAITS(512, 6)
MAKE_LIMEX_TRAITS(512, 7)
MAKE_LIMEX_TRAITS(32)
MAKE_LIMEX_TRAITS(128)
MAKE_LIMEX_TRAITS(256)
MAKE_LIMEX_TRAITS(384)
MAKE_LIMEX_TRAITS(512)
template<> struct NFATraits<MCCLELLAN_NFA_8> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1;
static const bool fast = true;
static const has_accel_fn has_accel;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const has_accel_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_dfa;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_mcclellan;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<MCCLELLAN_NFA_8>::name = "McClellan 8";
#endif
@ -215,9 +226,13 @@ template<> struct NFATraits<MCCLELLAN_NFA_16> {
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 2;
static const bool fast = true;
static const has_accel_fn has_accel;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const has_accel_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_dfa;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_mcclellan;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<MCCLELLAN_NFA_16>::name = "McClellan 16";
#endif
@ -227,9 +242,13 @@ template<> struct NFATraits<GOUGH_NFA_8> {
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const has_accel_fn has_accel;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const has_accel_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_dfa;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_mcclellan;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<GOUGH_NFA_8>::name = "Goughfish 8";
#endif
@ -239,9 +258,13 @@ template<> struct NFATraits<GOUGH_NFA_16> {
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const has_accel_fn has_accel;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const has_accel_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_dfa;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_mcclellan;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<GOUGH_NFA_16>::name = "Goughfish 16";
#endif
@ -251,9 +274,13 @@ template<> struct NFATraits<MPV_NFA_0> {
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const has_accel_fn has_accel;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const has_accel_fn NFATraits<MPV_NFA_0>::has_accel = has_accel_generic;
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MPV_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<MPV_NFA_0>::name = "Mega-Puff-Vac";
#endif
@ -263,9 +290,13 @@ template<> struct NFATraits<CASTLE_NFA_0> {
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const has_accel_fn has_accel;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const has_accel_fn NFATraits<CASTLE_NFA_0>::has_accel = has_accel_generic;
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<CASTLE_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<CASTLE_NFA_0>::name = "Castle";
#endif
@ -275,9 +306,13 @@ template<> struct NFATraits<LBR_NFA_Dot> {
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const has_accel_fn has_accel;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const has_accel_fn NFATraits<LBR_NFA_Dot>::has_accel = has_accel_generic;
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Dot>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_Dot>::name = "Lim Bounded Repeat (D)";
#endif
@ -287,9 +322,13 @@ template<> struct NFATraits<LBR_NFA_Verm> {
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const has_accel_fn has_accel;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const has_accel_fn NFATraits<LBR_NFA_Verm>::has_accel = has_accel_generic;
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Verm>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_Verm>::name = "Lim Bounded Repeat (V)";
#endif
@ -299,9 +338,13 @@ template<> struct NFATraits<LBR_NFA_NVerm> {
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const has_accel_fn has_accel;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const has_accel_fn NFATraits<LBR_NFA_NVerm>::has_accel = has_accel_generic;
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_NVerm>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_NVerm>::name = "Lim Bounded Repeat (NV)";
#endif
@ -311,9 +354,13 @@ template<> struct NFATraits<LBR_NFA_Shuf> {
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const has_accel_fn has_accel;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const has_accel_fn NFATraits<LBR_NFA_Shuf>::has_accel = has_accel_generic;
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Shuf>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_Shuf>::name = "Lim Bounded Repeat (S)";
#endif
@ -323,13 +370,49 @@ template<> struct NFATraits<LBR_NFA_Truf> {
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 8;
static const bool fast = true;
static const has_accel_fn has_accel;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const has_accel_fn NFATraits<LBR_NFA_Truf>::has_accel = has_accel_generic;
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_Truf>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_Truf>::name = "Lim Bounded Repeat (M)";
#endif
template<> struct NFATraits<SHENG_NFA_0> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 1;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_accel = has_accel_sheng;
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<SHENG_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<SHENG_NFA_0>::name = "Sheng";
#endif
template<> struct NFATraits<TAMARAMA_NFA_0> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
static const u32 stateAlign = 32;
static const bool fast = true;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
};
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<TAMARAMA_NFA_0>::has_repeats_other_than_firsts = dispatch_false;
#if defined(DUMP_SUPPORT)
const char *NFATraits<TAMARAMA_NFA_0>::name = "Tamarama";
#endif
} // namespace
#if defined(DUMP_SUPPORT)
@ -380,42 +463,39 @@ struct is_limex {
};
}
namespace {
template<NFAEngineType t>
struct has_repeats_other_than_firsts_dispatch {
static nfa_dispatch_fn call(const void *) {
return NFATraits<t>::has_repeats_other_than_firsts;
}
};
}
bool has_bounded_repeats_other_than_firsts(const NFA &nfa) {
if (!DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa)) {
return false;
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type,
has_repeats_other_than_firsts_dispatch,
&nfa)(&nfa);
}
namespace {
template<NFAEngineType t>
struct has_repeats_dispatch {
static nfa_dispatch_fn call(const void *) {
return NFATraits<t>::has_repeats;
}
const LimExNFABase *limex = (const LimExNFABase *)getImplNfa(&nfa);
const char *ptr = (const char *)limex;
const u32 *repeatOffset = (const u32 *)(ptr + limex->repeatOffset);
for (u32 i = 0; i < limex->repeatCount; i++) {
u32 offset = repeatOffset[i];
const NFARepeatInfo *info = (const NFARepeatInfo *)(ptr + offset);
const RepeatInfo *repeat =
(const RepeatInfo *)((const char *)info + sizeof(*info));
if (repeat->type != REPEAT_FIRST) {
return true;
}
}
return false;
};
}
bool has_bounded_repeats(const NFA &nfa) {
if (!DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa)) {
return false;
}
const LimExNFABase *limex = (const LimExNFABase *)getImplNfa(&nfa);
return limex->repeatCount;
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_repeats_dispatch,
&nfa)(&nfa);
}
namespace {
template<NFAEngineType t>
struct has_accel_dispatch {
static has_accel_fn call(const void *) {
static nfa_dispatch_fn call(const void *) {
return NFATraits<t>::has_accel;
}
};
@ -423,8 +503,7 @@ struct has_accel_dispatch {
bool has_accel(const NFA &nfa) {
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_accel_dispatch,
&nfa)
(&nfa);
&nfa)(&nfa);
}
bool requires_decompress_key(const NFA &nfa) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -36,6 +36,7 @@
#if defined(DUMP_SUPPORT)
#include <cstdio>
#include <string>
struct NFA;
@ -45,7 +46,7 @@ namespace ue2 {
* \brief Dump (in Graphviz 'dot' format) a representation of the NFA into the
* file pointed to by dotFile.
*/
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile);
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile, const std::string &base);
/** \brief Dump a textual representation of the NFA. */
void nfaDumpText(const struct NFA *fact, FILE *textFile);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -40,6 +40,8 @@
#include "limex.h"
#include "mcclellandump.h"
#include "mpv_dump.h"
#include "shengdump.h"
#include "tamarama_dump.h"
#ifndef DUMP_SUPPORT
#error "no dump support"
@ -57,41 +59,11 @@ namespace ue2 {
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \
DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \
switch (nfa->type) { \
DISPATCH_CASE(LIMEX, LimEx, 32_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_1, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_2, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_3, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_4, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_5, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_6, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512_7, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \
DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \
DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \
DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \
DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \
@ -103,12 +75,15 @@ namespace ue2 {
DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \
DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \
DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \
DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \
DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \
default: \
assert(0); \
}
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile) {
DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile));
void nfaDumpDot(const struct NFA *nfa, FILE *dotFile,
const std::string &base) {
DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile, base));
}
void nfaDumpText(const struct NFA *nfa, FILE *txtFile) {

View File

@ -51,41 +51,11 @@ extern "C"
// Common data structures for NFAs
enum NFAEngineType {
LIMEX_NFA_32_1,
LIMEX_NFA_32_2,
LIMEX_NFA_32_3,
LIMEX_NFA_32_4,
LIMEX_NFA_32_5,
LIMEX_NFA_32_6,
LIMEX_NFA_32_7,
LIMEX_NFA_128_1,
LIMEX_NFA_128_2,
LIMEX_NFA_128_3,
LIMEX_NFA_128_4,
LIMEX_NFA_128_5,
LIMEX_NFA_128_6,
LIMEX_NFA_128_7,
LIMEX_NFA_256_1,
LIMEX_NFA_256_2,
LIMEX_NFA_256_3,
LIMEX_NFA_256_4,
LIMEX_NFA_256_5,
LIMEX_NFA_256_6,
LIMEX_NFA_256_7,
LIMEX_NFA_384_1,
LIMEX_NFA_384_2,
LIMEX_NFA_384_3,
LIMEX_NFA_384_4,
LIMEX_NFA_384_5,
LIMEX_NFA_384_6,
LIMEX_NFA_384_7,
LIMEX_NFA_512_1,
LIMEX_NFA_512_2,
LIMEX_NFA_512_3,
LIMEX_NFA_512_4,
LIMEX_NFA_512_5,
LIMEX_NFA_512_6,
LIMEX_NFA_512_7,
LIMEX_NFA_32,
LIMEX_NFA_128,
LIMEX_NFA_256,
LIMEX_NFA_384,
LIMEX_NFA_512,
MCCLELLAN_NFA_8, /**< magic pseudo nfa */
MCCLELLAN_NFA_16, /**< magic pseudo nfa */
GOUGH_NFA_8, /**< magic pseudo nfa */
@ -97,6 +67,8 @@ enum NFAEngineType {
LBR_NFA_Shuf, /**< magic pseudo nfa */
LBR_NFA_Truf, /**< magic pseudo nfa */
CASTLE_NFA_0, /**< magic pseudo nfa */
SHENG_NFA_0, /**< magic pseudo nfa */
TAMARAMA_NFA_0, /**< magic nfa container */
/** \brief bogus NFA - not used */
INVALID_NFA
};
@ -175,50 +147,27 @@ static really_inline int isGoughType(u8 t) {
return t == GOUGH_NFA_8 || t == GOUGH_NFA_16;
}
/** \brief True if the given type (from NFA::type) is a McClellan or Gough DFA.
* */
/** \brief True if the given type (from NFA::type) is a Sheng DFA. */
static really_inline int isShengType(u8 t) {
return t == SHENG_NFA_0;
}
/**
* \brief True if the given type (from NFA::type) is a McClellan, Gough or
* Sheng DFA.
*/
static really_inline int isDfaType(u8 t) {
return isMcClellanType(t) || isGoughType(t);
return isMcClellanType(t) || isGoughType(t) || isShengType(t);
}
/** \brief True if the given type (from NFA::type) is an NFA. */
static really_inline int isNfaType(u8 t) {
switch (t) {
case LIMEX_NFA_32_1:
case LIMEX_NFA_32_2:
case LIMEX_NFA_32_3:
case LIMEX_NFA_32_4:
case LIMEX_NFA_32_5:
case LIMEX_NFA_32_6:
case LIMEX_NFA_32_7:
case LIMEX_NFA_128_1:
case LIMEX_NFA_128_2:
case LIMEX_NFA_128_3:
case LIMEX_NFA_128_4:
case LIMEX_NFA_128_5:
case LIMEX_NFA_128_6:
case LIMEX_NFA_128_7:
case LIMEX_NFA_256_1:
case LIMEX_NFA_256_2:
case LIMEX_NFA_256_3:
case LIMEX_NFA_256_4:
case LIMEX_NFA_256_5:
case LIMEX_NFA_256_6:
case LIMEX_NFA_256_7:
case LIMEX_NFA_384_1:
case LIMEX_NFA_384_2:
case LIMEX_NFA_384_3:
case LIMEX_NFA_384_4:
case LIMEX_NFA_384_5:
case LIMEX_NFA_384_6:
case LIMEX_NFA_384_7:
case LIMEX_NFA_512_1:
case LIMEX_NFA_512_2:
case LIMEX_NFA_512_3:
case LIMEX_NFA_512_4:
case LIMEX_NFA_512_5:
case LIMEX_NFA_512_6:
case LIMEX_NFA_512_7:
case LIMEX_NFA_32:
case LIMEX_NFA_128:
case LIMEX_NFA_256:
case LIMEX_NFA_384:
case LIMEX_NFA_512:
return 1;
default:
break;
@ -233,6 +182,12 @@ int isLbrType(u8 t) {
t == LBR_NFA_Shuf || t == LBR_NFA_Truf;
}
/** \brief True if the given type (from NFA::type) is a container engine. */
static really_inline
int isContainerType(u8 t) {
return t == TAMARAMA_NFA_0;
}
static really_inline
int isMultiTopType(u8 t) {
return !isDfaType(t) && !isLbrType(t);

View File

@ -37,6 +37,8 @@
#include "ue2common.h"
#include <string>
namespace ue2 {
/** \brief Specify the use-case for an nfa engine. */
@ -47,6 +49,7 @@ enum nfa_kind {
NFA_OUTFIX, //!< "outfix" nfa not triggered by external events
NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports
NFA_REV_PREFIX, //! reverse running prefixes (for som)
NFA_EAGER_PREFIX, //!< rose prefix that is also run up to matches
};
/** \brief True if this kind of engine is triggered by a top event. */
@ -63,8 +66,10 @@ bool is_triggered(enum nfa_kind k) {
}
/**
* \brief True if this kind of engine generates callback events when it
* enters accept states.
* \brief True if this kind of engine generates actively checks for accept
* states either to halt matching or to raise a callback. Only these engines
* generated with this property should call nfaQueueExec() or
* nfaQueueExecToMatch().
*/
inline
bool generates_callbacks(enum nfa_kind k) {
@ -73,6 +78,24 @@ bool generates_callbacks(enum nfa_kind k) {
case NFA_OUTFIX:
case NFA_OUTFIX_RAW:
case NFA_REV_PREFIX:
case NFA_EAGER_PREFIX:
return true;
default:
return false;
}
}
/**
* \brief True if this kind of engine has its state inspected to see if it is in
* an accept state. Engines generated with this property will commonly call
* nfaQueueExecRose(), nfaInAcceptState(), and nfaInAnyAcceptState().
*/
inline
bool inspects_states_for_accepts(enum nfa_kind k) {
switch (k) {
case NFA_PREFIX:
case NFA_INFIX:
case NFA_EAGER_PREFIX:
return true;
default:
return false;
@ -94,6 +117,32 @@ bool has_managed_reports(enum nfa_kind k) {
}
}
#if defined(DEBUG) || defined(DUMP_SUPPORT)
inline
std::string to_string(nfa_kind k) {
switch (k) {
case NFA_PREFIX:
return "PREFIX";
case NFA_INFIX:
return "INFIX";
case NFA_SUFFIX:
return "SUFFIX";
case NFA_OUTFIX:
return "OUTFIX";
case NFA_REV_PREFIX:
return "REV_PREFIX";
case NFA_OUTFIX_RAW:
return "OUTFIX_RAW";
case NFA_EAGER_PREFIX:
return "EAGER_PREFIX";
}
assert(0);
return "?";
}
#endif
} // namespace ue2
#endif

676
src/nfa/sheng.c Normal file
View File

@ -0,0 +1,676 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "sheng.h"
#include "accel.h"
#include "sheng_internal.h"
#include "nfa_api.h"
#include "nfa_api_queue.h"
#include "nfa_internal.h"
#include "util/bitutils.h"
#include "util/compare.h"
#include "util/join.h"
#include "util/simd_utils.h"
enum MatchMode {
CALLBACK_OUTPUT,
STOP_AT_MATCH,
NO_MATCHES
};
static really_inline
const struct sheng *get_sheng(const struct NFA *n) {
return (const struct sheng *)getImplNfa(n);
}
static really_inline
const struct sstate_aux *get_aux(const struct sheng *sh, u8 id) {
u32 offset = sh->aux_offset - sizeof(struct NFA) +
(id & SHENG_STATE_MASK) * sizeof(struct sstate_aux);
DEBUG_PRINTF("Getting aux for state %u at offset %llu\n",
id & SHENG_STATE_MASK, (u64a)offset + sizeof(struct NFA));
return (const struct sstate_aux *)((const char *) sh + offset);
}
static really_inline
const union AccelAux *get_accel(const struct sheng *sh, u8 id) {
const struct sstate_aux *saux = get_aux(sh, id);
DEBUG_PRINTF("Getting accel aux at offset %u\n", saux->accel);
const union AccelAux *aux = (const union AccelAux *)
((const char *)sh + saux->accel - sizeof(struct NFA));
return aux;
}
static really_inline
const struct report_list *get_rl(const struct sheng *sh,
const struct sstate_aux *aux) {
DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept);
return (const struct report_list *)
((const char *)sh + aux->accept - sizeof(struct NFA));
}
static really_inline
const struct report_list *get_eod_rl(const struct sheng *sh,
const struct sstate_aux *aux) {
DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept);
return (const struct report_list *)
((const char *)sh + aux->accept_eod - sizeof(struct NFA));
}
static really_inline
char shengHasAccept(const struct sheng *sh, const struct sstate_aux *aux,
ReportID report) {
assert(sh && aux);
const struct report_list *rl = get_rl(sh, aux);
assert(ISALIGNED_N(rl, 4));
DEBUG_PRINTF("report list has %u entries\n", rl->count);
for (u32 i = 0; i < rl->count; i++) {
if (rl->report[i] == report) {
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
return 1;
}
}
return 0;
}
static really_inline
char fireSingleReport(NfaCallback cb, void *ctxt, ReportID r, u64a loc) {
DEBUG_PRINTF("reporting %u\n", r);
if (cb(0, loc, r, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
}
return MO_CONTINUE_MATCHING; /* continue execution */
}
static really_inline
char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt,
const u8 state, u64a loc, u8 *const cached_accept_state,
ReportID *const cached_accept_id, char eod) {
DEBUG_PRINTF("reporting matches @ %llu\n", loc);
if (!eod && state == *cached_accept_state) {
DEBUG_PRINTF("reporting %u\n", *cached_accept_id);
if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
}
return MO_CONTINUE_MATCHING; /* continue execution */
}
const struct sstate_aux *aux = get_aux(sh, state);
const struct report_list *rl = eod ? get_eod_rl(sh, aux) : get_rl(sh, aux);
assert(ISALIGNED(rl));
DEBUG_PRINTF("report list has %u entries\n", rl->count);
u32 count = rl->count;
if (!eod && count == 1) {
*cached_accept_state = state;
*cached_accept_id = rl->report[0];
DEBUG_PRINTF("reporting %u\n", rl->report[0]);
if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
}
return MO_CONTINUE_MATCHING; /* continue execution */
}
for (u32 i = 0; i < count; i++) {
DEBUG_PRINTF("reporting %u\n", rl->report[i]);
if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING; /* termination requested */
}
}
return MO_CONTINUE_MATCHING; /* continue execution */
}
/* include Sheng function definitions */
#include "sheng_defs.h"
static really_inline
char runShengCb(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset,
u8 *const cached_accept_state, ReportID *const cached_accept_id,
const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die,
u8 has_accel, u8 single, const u8 **scanned, u8 *state) {
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n",
(u64a)(end - start), offset);
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
(s64a)(end - cur_buf));
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
!!has_accel, !!single);
int rv;
/* scan and report all matches */
if (can_die) {
if (has_accel) {
rv = sheng4_coda(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
} else {
rv = sheng4_cod(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
}
if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
}
rv = sheng_cod(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, *scanned, end,
scanned);
} else {
if (has_accel) {
rv = sheng4_coa(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
} else {
rv = sheng4_co(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
}
if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
}
rv = sheng_co(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, *scanned, end,
scanned);
}
if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
}
return MO_ALIVE;
}
static really_inline
void runShengNm(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset,
u8 *const cached_accept_state, ReportID *const cached_accept_id,
const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die,
u8 has_accel, u8 single, const u8 **scanned, u8 *state) {
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n",
(u64a)(end - start), offset);
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
(s64a)(end - cur_buf));
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
!!has_accel, !!single);
/* just scan the buffer */
if (can_die) {
if (has_accel) {
sheng4_nmda(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start, end,
scanned);
} else {
sheng4_nmd(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start, end,
scanned);
}
sheng_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
single, offset, cur_buf, *scanned, end, scanned);
} else {
sheng4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
single, offset, cur_buf, start, end, scanned);
sheng_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id,
single, offset, cur_buf, *scanned, end, scanned);
}
}
static really_inline
char runShengSam(const struct sheng *sh, NfaCallback cb, void *ctxt,
u64a offset, u8 *const cached_accept_state,
ReportID *const cached_accept_id, const u8 *cur_buf,
const u8 *start, const u8 *end, u8 can_die, u8 has_accel,
u8 single, const u8 **scanned, u8 *state) {
DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n",
(u64a)(end - start), offset);
DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf),
(s64a)(end - cur_buf));
DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die,
!!has_accel, !!single);
int rv;
/* scan until first match */
if (can_die) {
if (has_accel) {
rv = sheng4_samda(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
} else {
rv = sheng4_samd(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
}
if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
}
/* if we stopped before we expected, we found a match */
if (rv == MO_MATCHES_PENDING) {
return MO_MATCHES_PENDING;
}
rv = sheng_samd(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, *scanned,
end, scanned);
} else {
if (has_accel) {
rv = sheng4_sama(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
} else {
rv = sheng4_sam(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, start,
end, scanned);
}
if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
}
/* if we stopped before we expected, we found a match */
if (rv == MO_MATCHES_PENDING) {
return MO_MATCHES_PENDING;
}
rv = sheng_sam(state, cb, ctxt, sh, cached_accept_state,
cached_accept_id, single, offset, cur_buf, *scanned, end,
scanned);
}
if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
}
/* if we stopped before we expected, we found a match */
if (rv == MO_MATCHES_PENDING) {
return MO_MATCHES_PENDING;
}
return MO_ALIVE;
}
static never_inline
char runSheng(const struct sheng *sh, struct mq *q, s64a b_end,
enum MatchMode mode) {
u8 state = *(u8 *)q->state;
u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
u8 cached_accept_state = 0;
ReportID cached_accept_id = 0;
DEBUG_PRINTF("starting Sheng execution in state %u\n",
state & SHENG_STATE_MASK);
if (q->report_current) {
DEBUG_PRINTF("reporting current pending matches\n");
assert(sh);
q->report_current = 0;
int rv;
if (single) {
rv = fireSingleReport(q->cb, q->context, sh->report,
q_cur_offset(q));
} else {
rv = fireReports(sh, q->cb, q->context, state, q_cur_offset(q),
&cached_accept_state, &cached_accept_id, 0);
}
if (rv == MO_HALT_MATCHING) {
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
return MO_DEAD;
}
DEBUG_PRINTF("proceeding with matching\n");
}
assert(q_cur_type(q) == MQE_START);
s64a start = q_cur_loc(q);
DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start,
mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" :
mode == NO_MATCHES ? "NO MATCHES" :
mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???");
DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
q_cur_type(q) == MQE_START ? "START" :
q_cur_type(q) == MQE_TOP ? "TOP" :
q_cur_type(q) == MQE_END ? "END" : "???");
const u8* cur_buf;
if (start < 0) {
DEBUG_PRINTF("negative location, scanning history\n");
DEBUG_PRINTF("min location: %zd\n", -q->hlength);
cur_buf = q->history + q->hlength;
} else {
DEBUG_PRINTF("positive location, scanning buffer\n");
DEBUG_PRINTF("max location: %lli\n", b_end);
cur_buf = q->buffer;
}
/* if we our queue event is past our end */
if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
DEBUG_PRINTF("current location past buffer end\n");
DEBUG_PRINTF("setting q location to %llu\n", b_end);
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
q->items[q->cur].location = b_end;
return MO_ALIVE;
}
q->cur++;
s64a cur_start = start;
while (1) {
DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q),
q_cur_type(q) == MQE_START ? "START" :
q_cur_type(q) == MQE_TOP ? "TOP" :
q_cur_type(q) == MQE_END ? "END" : "???");
s64a end = q_cur_loc(q);
if (mode != NO_MATCHES) {
end = MIN(end, b_end);
}
assert(end <= (s64a) q->length);
s64a cur_end = end;
/* we may cross the border between history and current buffer */
if (cur_start < 0) {
cur_end = MIN(0, cur_end);
}
DEBUG_PRINTF("start: %lli end: %lli\n", start, end);
/* don't scan zero length buffer */
if (cur_start != cur_end) {
const u8 * scanned = cur_buf;
char rv;
/* if we're in nomatch mode or if we're scanning history buffer */
if (mode == NO_MATCHES ||
(cur_start < 0 && mode == CALLBACK_OUTPUT)) {
runShengNm(sh, q->cb, q->context, q->offset,
&cached_accept_state, &cached_accept_id, cur_buf,
cur_buf + cur_start, cur_buf + cur_end, can_die,
has_accel, single, &scanned, &state);
} else if (mode == CALLBACK_OUTPUT) {
rv = runShengCb(sh, q->cb, q->context, q->offset,
&cached_accept_state, &cached_accept_id,
cur_buf, cur_buf + cur_start, cur_buf + cur_end,
can_die, has_accel, single, &scanned, &state);
if (rv == MO_DEAD) {
DEBUG_PRINTF("exiting in state %u\n",
state & SHENG_STATE_MASK);
return MO_DEAD;
}
} else if (mode == STOP_AT_MATCH) {
rv = runShengSam(sh, q->cb, q->context, q->offset,
&cached_accept_state, &cached_accept_id,
cur_buf, cur_buf + cur_start,
cur_buf + cur_end, can_die, has_accel, single,
&scanned, &state);
if (rv == MO_DEAD) {
DEBUG_PRINTF("exiting in state %u\n",
state & SHENG_STATE_MASK);
return rv;
} else if (rv == MO_MATCHES_PENDING) {
assert(q->cur);
DEBUG_PRINTF("found a match, setting q location to %zd\n",
scanned - cur_buf + 1);
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location =
scanned - cur_buf + 1; /* due to exiting early */
*(u8 *)q->state = state;
DEBUG_PRINTF("exiting in state %u\n",
state & SHENG_STATE_MASK);
return rv;
}
} else {
assert(!"invalid scanning mode!");
}
assert(scanned == cur_buf + cur_end);
cur_start = cur_end;
}
/* if we our queue event is past our end */
if (mode != NO_MATCHES && q_cur_loc(q) > b_end) {
DEBUG_PRINTF("current location past buffer end\n");
DEBUG_PRINTF("setting q location to %llu\n", b_end);
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
q->cur--;
q->items[q->cur].type = MQE_START;
q->items[q->cur].location = b_end;
*(u8 *)q->state = state;
return MO_ALIVE;
}
/* crossing over into actual buffer */
if (cur_start == 0) {
DEBUG_PRINTF("positive location, scanning buffer\n");
DEBUG_PRINTF("max offset: %lli\n", b_end);
cur_buf = q->buffer;
}
/* continue scanning the same buffer */
if (end != cur_end) {
continue;
}
switch (q_cur_type(q)) {
case MQE_END:
*(u8 *)q->state = state;
q->cur++;
DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK);
if (can_die) {
return (state & SHENG_STATE_DEAD) ? MO_DEAD : MO_ALIVE;
}
return MO_ALIVE;
case MQE_TOP:
if (q->offset + cur_start == 0) {
DEBUG_PRINTF("Anchored start, going to state %u\n",
sh->anchored);
state = sh->anchored;
} else {
u8 new_state = get_aux(sh, state)->top;
DEBUG_PRINTF("Top event %u->%u\n", state & SHENG_STATE_MASK,
new_state & SHENG_STATE_MASK);
state = new_state;
}
break;
default:
assert(!"invalid queue event");
break;
}
q->cur++;
}
}
char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context) {
DEBUG_PRINTF("smallwrite Sheng\n");
assert(n->type == SHENG_NFA_0);
const struct sheng *sh = getImplNfa(n);
u8 state = sh->anchored;
u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE;
u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL;
u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT;
u8 cached_accept_state = 0;
ReportID cached_accept_id = 0;
/* scan and report all matches */
int rv;
s64a end = length;
const u8 *scanned;
rv = runShengCb(sh, cb, context, offset, &cached_accept_state,
&cached_accept_id, buffer, buffer, buffer + end, can_die,
has_accel, single, &scanned, &state);
if (rv == MO_DEAD) {
DEBUG_PRINTF("exiting in state %u\n",
state & SHENG_STATE_MASK);
return MO_DEAD;
}
DEBUG_PRINTF("%u\n", state & SHENG_STATE_MASK);
const struct sstate_aux *aux = get_aux(sh, state);
if (aux->accept_eod) {
DEBUG_PRINTF("Reporting EOD matches\n");
fireReports(sh, cb, context, state, end + offset, &cached_accept_state,
&cached_accept_id, 1);
}
return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE;
}
char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end) {
const struct sheng *sh = get_sheng(n);
char rv = runSheng(sh, q, end, CALLBACK_OUTPUT);
return rv;
}
char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end) {
const struct sheng *sh = get_sheng(n);
char rv = runSheng(sh, q, end, STOP_AT_MATCH);
return rv;
}
char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report) {
assert(q_cur_type(q) == MQE_START);
const struct sheng *sh = get_sheng(n);
char rv = runSheng(sh, q, 0 /* end */, NO_MATCHES);
if (rv && nfaExecSheng0_inAccept(n, report, q)) {
return MO_MATCHES_PENDING;
}
return rv;
}
char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report,
struct mq *q) {
assert(n && q);
const struct sheng *sh = get_sheng(n);
u8 s = *(const u8 *)q->state;
DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
const struct sstate_aux *aux = get_aux(sh, s);
if (!aux->accept) {
return 0;
}
return shengHasAccept(sh, aux, report);
}
char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q) {
assert(n && q);
const struct sheng *sh = get_sheng(n);
u8 s = *(const u8 *)q->state;
DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
const struct sstate_aux *aux = get_aux(sh, s);
return !!aux->accept;
}
char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState, u64a offset,
NfaCallback cb, void *ctxt) {
assert(nfa);
const struct sheng *sh = get_sheng(nfa);
u8 s = *(const u8 *)state;
DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG_STATE_MASK));
const struct sstate_aux *aux = get_aux(sh, s);
if (!aux->accept_eod) {
return MO_CONTINUE_MATCHING;
}
return fireReports(sh, cb, ctxt, s, offset, NULL, NULL, 1);
}
char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q) {
const struct sheng *sh = (const struct sheng *)getImplNfa(n);
NfaCallback cb = q->cb;
void *ctxt = q->context;
u8 s = *(u8 *)q->state;
const struct sstate_aux *aux = get_aux(sh, s);
u64a offset = q_cur_offset(q);
u8 cached_state_id = 0;
ReportID cached_report_id = 0;
assert(q_cur_type(q) == MQE_START);
if (aux->accept) {
if (sh->flags & SHENG_FLAG_SINGLE_REPORT) {
fireSingleReport(cb, ctxt, sh->report, offset);
} else {
fireReports(sh, cb, ctxt, s, offset, &cached_state_id,
&cached_report_id, 1);
}
}
return 0;
}
char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset,
void *state, UNUSED u8 key) {
const struct sheng *sh = get_sheng(nfa);
u8 *s = (u8 *)state;
*s = offset ? sh->floating: sh->anchored;
return !(*s & SHENG_STATE_DEAD);
}
char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q) {
assert(nfa->scratchStateSize == 1);
/* starting in floating state */
const struct sheng *sh = get_sheng(nfa);
*(u8 *)q->state = sh->floating;
DEBUG_PRINTF("starting in floating state\n");
return 0;
}
char nfaExecSheng0_queueCompressState(UNUSED const struct NFA *nfa,
const struct mq *q, UNUSED s64a loc) {
void *dest = q->streamState;
const void *src = q->state;
assert(nfa->scratchStateSize == 1);
assert(nfa->streamStateSize == 1);
*(u8 *)dest = *(const u8 *)src;
return 0;
}
char nfaExecSheng0_expandState(UNUSED const struct NFA *nfa, void *dest,
const void *src, UNUSED u64a offset,
UNUSED u8 key) {
assert(nfa->scratchStateSize == 1);
assert(nfa->streamStateSize == 1);
*(u8 *)dest = *(const u8 *)src;
return 0;
}

61
src/nfa/sheng.h Normal file
View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SHENG_H_
#define SHENG_H_
#include "callback.h"
#include "ue2common.h"
struct mq;
struct NFA;
#define nfaExecSheng0_B_Reverse NFA_API_NO_IMPL
#define nfaExecSheng0_zombie_status NFA_API_ZOMBIE_NO_IMPL
char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q);
char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q);
char nfaExecSheng0_queueCompressState(const struct NFA *nfa, const struct mq *q,
s64a loc);
char nfaExecSheng0_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset,
void *state, u8 key);
char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, void *context);
char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context);
#endif /* SHENG_H_ */

353
src/nfa/sheng_defs.h Normal file
View File

@ -0,0 +1,353 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SHENG_DEFS_H
#define SHENG_DEFS_H
/*
* Utility functions used by various versions of Sheng engine
*/
static really_inline
u8 isDeadState(const u8 a) {
return a & SHENG_STATE_DEAD;
}
static really_inline
u8 isAcceptState(const u8 a) {
return a & SHENG_STATE_ACCEPT;
}
static really_inline
u8 isAccelState(const u8 a) {
return a & SHENG_STATE_ACCEL;
}
static really_inline
u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) {
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
}
/* these functions should be optimized out, used by NO_MATCHES mode */
static really_inline
u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c,
UNUSED const u8 d) {
return 0;
}
static really_inline
u8 dummyFunc(UNUSED const u8 a) {
return 0;
}
/*
* Sheng function definitions for single byte loops
*/
/* callback output, can die */
#define SHENG_IMPL sheng_cod
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* callback output, can't die */
#define SHENG_IMPL sheng_co
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* stop at match, can die */
#define SHENG_IMPL sheng_samd
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 1
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* stop at match, can't die */
#define SHENG_IMPL sheng_sam
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 1
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* no match, can die */
#define SHENG_IMPL sheng_nmd
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC dummyFunc
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* no match, can't die */
#define SHENG_IMPL sheng_nm
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
#define STOP_AT_MATCH 0
#include "sheng_impl.h"
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/*
* Sheng function definitions for 4-byte loops
*/
/* callback output, can die, accelerated */
#define SHENG_IMPL sheng4_coda
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC isDeadState
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* callback output, can die, not accelerated */
#define SHENG_IMPL sheng4_cod
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC isDeadState
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* callback output, can't die, accelerated */
#define SHENG_IMPL sheng4_coa
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* callback output, can't die, not accelerated */
#define SHENG_IMPL sheng4_co
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* stop at match, can die, accelerated */
#define SHENG_IMPL sheng4_samda
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC isDeadState
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* stop at match, can die, not accelerated */
#define SHENG_IMPL sheng4_samd
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC isDeadState
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* stop at match, can't die, accelerated */
#define SHENG_IMPL sheng4_sama
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* stop at match, can't die, not accelerated */
#define SHENG_IMPL sheng4_sam
#define INTERESTING_FUNC hasInterestingStates
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
#define STOP_AT_MATCH 1
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* no-match have interesting func as dummy, and die/accel checks are outer */
/* no match, can die, accelerated */
#define SHENG_IMPL sheng4_nmda
#define INTERESTING_FUNC dummyFunc4
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC isDeadState
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC isAccelState
#define ACCEPT_FUNC dummyFunc
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* no match, can die, not accelerated */
#define SHENG_IMPL sheng4_nmd
#define INTERESTING_FUNC dummyFunc4
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC isDeadState
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
/* there is no performance benefit in accelerating a no-match case that can't
* die */
/* no match, can't die */
#define SHENG_IMPL sheng4_nm
#define INTERESTING_FUNC dummyFunc4
#define INNER_DEAD_FUNC dummyFunc
#define OUTER_DEAD_FUNC dummyFunc
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
#define STOP_AT_MATCH 0
#include "sheng_impl4.h"
#undef SHENG_IMPL
#undef INTERESTING_FUNC
#undef INNER_DEAD_FUNC
#undef OUTER_DEAD_FUNC
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
#undef STOP_AT_MATCH
#endif // SHENG_DEFS_H

97
src/nfa/sheng_impl.h Normal file
View File

@ -0,0 +1,97 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* In order to use this macro, the following things need to be defined:
*
* - SHENG_IMPL (name of the Sheng implementation function)
* - DEAD_FUNC (name of the function checking for dead states)
* - ACCEPT_FUNC (name of the function checking for accept state)
* - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match)
*/
/* byte-by-byte version. we don't do byte-by-byte death checking as it's
* pretty pointless to do it over a buffer that's at most 3 bytes long */
static really_inline
char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
u8 *const cached_accept_state, ReportID *const cached_accept_id,
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
const u8 *end, const u8 **scan_end) {
DEBUG_PRINTF("Starting DFA execution in state %u\n",
*state & SHENG_STATE_MASK);
const u8 *cur_buf = start;
if (DEAD_FUNC(*state)) {
DEBUG_PRINTF("Dead on arrival\n");
*scan_end = end;
return MO_CONTINUE_MATCHING;
}
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
m128 cur_state = set16x8(*state);
const m128 *masks = s->shuffle_masks;
while (likely(cur_buf != end)) {
const u8 c = *cur_buf;
const m128 shuffle_mask = masks[c];
cur_state = pshufb(shuffle_mask, cur_state);
const u8 tmp = movd(cur_state);
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", tmp, (tmp & 0xF0) >> 4,
tmp & 0xF);
if (unlikely(ACCEPT_FUNC(tmp))) {
DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG_STATE_MASK);
u64a match_offset = base_offset + (cur_buf - buf) + 1;
DEBUG_PRINTF("Match @ %llu\n", match_offset);
if (STOP_AT_MATCH) {
DEBUG_PRINTF("Stopping at match @ %lli\n",
(u64a)(cur_buf - start));
*state = tmp;
*scan_end = cur_buf;
return MO_MATCHES_PENDING;
}
if (single) {
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else {
if (fireReports(s, cb, ctxt, tmp, match_offset,
cached_accept_state, cached_accept_id,
0) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
cur_buf++;
}
*state = movd(cur_state);
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}

284
src/nfa/sheng_impl4.h Normal file
View File

@ -0,0 +1,284 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* In order to use this macro, the following things need to be defined:
*
* - SHENG_IMPL (name of the Sheng implementation function)
* - INTERESTING_FUNC (name of the function checking for accept, accel or dead
* states)
* - INNER_DEAD_FUNC (name of the inner function checking for dead states)
* - OUTER_DEAD_FUNC (name of the outer function checking for dead states)
* - INNER_ACCEL_FUNC (name of the inner function checking for accel states)
* - OUTER_ACCEL_FUNC (name of the outer function checking for accel states)
* - ACCEPT_FUNC (name of the function checking for accept state)
* - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match)
*/
/* unrolled 4-byte-at-a-time version.
*
* we put innerDeadFunc inside interestingFunc() block so that we don't pay for
* dead states checking. however, if interestingFunc is dummy, innerDeadFunc
* gets lost with it, so we need an additional check outside the
* interestingFunc() branch - it's normally dummy so we don't pay for it, but
* when interestingFunc is dummy, outerDeadFunc should be set if we want to
* check for dead states.
*
* also, deadFunc only checks the last known state, but since we can't ever get
* out of the dead state and we don't really care where we died, it's not a
* problem.
*/
static really_inline
char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
u8 *const cached_accept_state, ReportID *const cached_accept_id,
u8 single, u64a base_offset, const u8 *buf, const u8 *start,
const u8 *end, const u8 **scan_end) {
DEBUG_PRINTF("Starting DFAx4 execution in state %u\n",
*state & SHENG_STATE_MASK);
const u8 *cur_buf = start;
const u8 *min_accel_dist = start;
base_offset++;
DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start));
if (INNER_ACCEL_FUNC(*state) || OUTER_ACCEL_FUNC(*state)) {
DEBUG_PRINTF("Accel state reached @ 0\n");
const union AccelAux *aaux = get_accel(s, *state & SHENG_STATE_MASK);
const u8 *new_offset = run_accel(aaux, cur_buf, end);
if (new_offset < cur_buf + BAD_ACCEL_DIST) {
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
} else {
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
}
DEBUG_PRINTF("Next accel chance: %llu\n",
(u64a)(min_accel_dist - start));
DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf);
cur_buf = new_offset;
DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start));
}
if (INNER_DEAD_FUNC(*state) || OUTER_DEAD_FUNC(*state)) {
DEBUG_PRINTF("Dead on arrival\n");
*scan_end = end;
return MO_CONTINUE_MATCHING;
}
m128 cur_state = set16x8(*state);
const m128 *masks = s->shuffle_masks;
while (likely(end - cur_buf >= 4)) {
const u8 *b1 = cur_buf;
const u8 *b2 = cur_buf + 1;
const u8 *b3 = cur_buf + 2;
const u8 *b4 = cur_buf + 3;
const u8 c1 = *b1;
const u8 c2 = *b2;
const u8 c3 = *b3;
const u8 c4 = *b4;
const m128 shuffle_mask1 = masks[c1];
cur_state = pshufb(shuffle_mask1, cur_state);
const u8 a1 = movd(cur_state);
const m128 shuffle_mask2 = masks[c2];
cur_state = pshufb(shuffle_mask2, cur_state);
const u8 a2 = movd(cur_state);
const m128 shuffle_mask3 = masks[c3];
cur_state = pshufb(shuffle_mask3, cur_state);
const u8 a3 = movd(cur_state);
const m128 shuffle_mask4 = masks[c4];
cur_state = pshufb(shuffle_mask4, cur_state);
const u8 a4 = movd(cur_state);
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a1, (a1 & 0xF0) >> 4, a1 & 0xF);
DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?');
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a2, (a2 & 0xF0) >> 4, a2 & 0xF);
DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?');
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a3, (a3 & 0xF0) >> 4, a3 & 0xF);
DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?');
DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a4, (a4 & 0xF0) >> 4, a4 & 0xF);
if (unlikely(INTERESTING_FUNC(a1, a2, a3, a4))) {
if (ACCEPT_FUNC(a1)) {
u64a match_offset = base_offset + b1 - buf;
DEBUG_PRINTF("Accept state %u reached\n",
a1 & SHENG_STATE_MASK);
DEBUG_PRINTF("Match @ %llu\n", match_offset);
if (STOP_AT_MATCH) {
DEBUG_PRINTF("Stopping at match @ %lli\n",
(s64a)(b1 - start));
*scan_end = b1;
*state = a1;
return MO_MATCHES_PENDING;
}
if (single) {
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else {
if (fireReports(s, cb, ctxt, a1, match_offset,
cached_accept_state, cached_accept_id,
0) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
if (ACCEPT_FUNC(a2)) {
u64a match_offset = base_offset + b2 - buf;
DEBUG_PRINTF("Accept state %u reached\n",
a2 & SHENG_STATE_MASK);
DEBUG_PRINTF("Match @ %llu\n", match_offset);
if (STOP_AT_MATCH) {
DEBUG_PRINTF("Stopping at match @ %lli\n",
(s64a)(b2 - start));
*scan_end = b2;
*state = a2;
return MO_MATCHES_PENDING;
}
if (single) {
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else {
if (fireReports(s, cb, ctxt, a2, match_offset,
cached_accept_state, cached_accept_id,
0) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
if (ACCEPT_FUNC(a3)) {
u64a match_offset = base_offset + b3 - buf;
DEBUG_PRINTF("Accept state %u reached\n",
a3 & SHENG_STATE_MASK);
DEBUG_PRINTF("Match @ %llu\n", match_offset);
if (STOP_AT_MATCH) {
DEBUG_PRINTF("Stopping at match @ %lli\n",
(s64a)(b3 - start));
*scan_end = b3;
*state = a3;
return MO_MATCHES_PENDING;
}
if (single) {
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else {
if (fireReports(s, cb, ctxt, a3, match_offset,
cached_accept_state, cached_accept_id,
0) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
if (ACCEPT_FUNC(a4)) {
u64a match_offset = base_offset + b4 - buf;
DEBUG_PRINTF("Accept state %u reached\n",
a4 & SHENG_STATE_MASK);
DEBUG_PRINTF("Match @ %llu\n", match_offset);
if (STOP_AT_MATCH) {
DEBUG_PRINTF("Stopping at match @ %lli\n",
(s64a)(b4 - start));
*scan_end = b4;
*state = a4;
return MO_MATCHES_PENDING;
}
if (single) {
if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
} else {
if (fireReports(s, cb, ctxt, a4, match_offset,
cached_accept_state, cached_accept_id,
0) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
if (INNER_DEAD_FUNC(a4)) {
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf));
*scan_end = end;
*state = a4;
return MO_CONTINUE_MATCHING;
}
if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC(a4)) {
DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
const union AccelAux *aaux =
get_accel(s, a4 & SHENG_STATE_MASK);
const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
} else {
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
}
DEBUG_PRINTF("Next accel chance: %llu\n",
(u64a)(min_accel_dist - start));
DEBUG_PRINTF("Accel scanned %llu bytes\n",
(u64a)(new_offset - cur_buf - 4));
cur_buf = new_offset;
DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
continue;
}
}
if (OUTER_DEAD_FUNC(a4)) {
DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf));
*scan_end = end;
*state = a4;
return MO_CONTINUE_MATCHING;
};
if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC(a4)) {
DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
const union AccelAux *aaux = get_accel(s, a4 & SHENG_STATE_MASK);
const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
} else {
min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
}
DEBUG_PRINTF("Next accel chance: %llu\n",
(u64a)(min_accel_dist - start));
DEBUG_PRINTF("Accel scanned %llu bytes\n",
(u64a)(new_offset - cur_buf - 4));
cur_buf = new_offset;
DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
continue;
};
cur_buf += 4;
}
*state = movd(cur_state);
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -26,44 +26,45 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief LimEx NFA: 512-bit SIMD runtime implementations.
*/
#ifndef SHENG_INTERNAL_H_
#define SHENG_INTERNAL_H_
//#define DEBUG_INPUT
//#define DEBUG_EXCEPTIONS
#include "limex.h"
#include "accel.h"
#include "limex_internal.h"
#include "nfa_internal.h"
#include "ue2common.h"
#include "util/bitutils.h"
#include "util/simd_utils.h"
// Common code
#include "limex_runtime.h"
#define SHENG_STATE_ACCEPT 0x10
#define SHENG_STATE_DEAD 0x20
#define SHENG_STATE_ACCEL 0x40
#define SHENG_STATE_MASK 0xF
#define SHENG_STATE_FLAG_MASK 0x70
#define SIZE 512
#define STATE_T m512
#include "limex_exceptional.h"
#define SHENG_FLAG_SINGLE_REPORT 0x1
#define SHENG_FLAG_CAN_DIE 0x2
#define SHENG_FLAG_HAS_ACCEL 0x4
#define SIZE 512
#define STATE_T m512
#include "limex_state_impl.h"
struct report_list {
u32 count;
ReportID report[];
};
#define SIZE 512
#define STATE_T m512
#define INLINE_ATTR really_inline
#include "limex_common_impl.h"
struct sstate_aux {
u32 accept;
u32 accept_eod;
u32 accel;
u32 top;
};
#define SIZE 512
#define STATE_T m512
#define SHIFT 6
#include "limex_runtime_impl.h"
struct sheng {
m128 shuffle_masks[256];
u32 length;
u32 aux_offset;
u32 report_offset;
u32 accel_offset;
u8 n_states;
u8 anchored;
u8 floating;
u8 flags;
ReportID report;
};
#define SIZE 512
#define STATE_T m512
#define SHIFT 7
#include "limex_runtime_impl.h"
#endif /* SHENG_INTERNAL_H_ */

541
src/nfa/shengcompile.cpp Normal file
View File

@ -0,0 +1,541 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "shengcompile.h"
#include "accel.h"
#include "accelcompile.h"
#include "shufticompile.h"
#include "trufflecompile.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/charreach.h"
#include "util/compare.h"
#include "util/container.h"
#include "util/order_check.h"
#include "util/report_manager.h"
#include "util/unaligned.h"
#include "grey.h"
#include "nfa_internal.h"
#include "sheng_internal.h"
#include "ue2common.h"
#include "util/compile_context.h"
#include "util/make_unique.h"
#include "util/verify_types.h"
#include "util/simd_utils.h"
#include <map>
#include <vector>
#include <sstream>
#include <boost/range/adaptor/map.hpp>
using namespace std;
using boost::adaptors::map_keys;
namespace ue2 {
#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
/** Maximum tolerated number of escape character from an accel state.
* This is larger than nfa, as we don't have a budget and the nfa cheats on stop
* characters for sets of states */
#define ACCEL_DFA_MAX_STOP_CHAR 160
/** Maximum tolerated number of escape character from a sds accel state. Larger
* than normal states as accelerating sds is important. Matches NFA value */
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
struct dfa_info {
accel_dfa_build_strat &strat;
raw_dfa &raw;
vector<dstate> &states;
dstate &floating;
dstate &anchored;
bool can_die;
explicit dfa_info(accel_dfa_build_strat &s)
: strat(s), raw(strat.get_raw()), states(raw.states),
floating(states[raw.start_floating]),
anchored(states[raw.start_anchored]), can_die(dfaCanDie(raw)) {}
// returns adjusted size
size_t size() const {
return can_die ? states.size() : states.size() - 1;
}
// expects adjusted index
dstate &operator[](dstate_id_t idx) {
return states[raw_id(idx)];
}
dstate &top(dstate_id_t idx) {
if (isDead(idx)) {
return floating;
}
return next(idx, TOP);
}
dstate &next(dstate_id_t idx, u16 chr) {
auto &src = (*this)[idx];
auto next_id = src.next[raw.alpha_remap[chr]];
return states[next_id];
}
// get original idx from adjusted idx
dstate_id_t raw_id(dstate_id_t idx) {
assert(idx < size());
// if DFA can't die, shift all indices left by 1
return can_die ? idx : idx + 1;
}
bool isDead(dstate &state) {
return raw_id(state.impl_id) == DEAD_STATE;
}
bool isDead(dstate_id_t idx) {
return raw_id(idx) == DEAD_STATE;
}
private:
static bool dfaCanDie(raw_dfa &rdfa) {
for (unsigned chr = 0; chr < 256; chr++) {
for (dstate_id_t state = 0; state < rdfa.states.size(); state++) {
auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]];
if (succ == DEAD_STATE) {
return true;
}
}
}
return false;
}
};
namespace {
struct raw_report_list {
flat_set<ReportID> reports;
raw_report_list(const flat_set<ReportID> &reports_in,
const ReportManager &rm, bool do_remap) {
if (do_remap) {
for (auto &id : reports_in) {
reports.insert(rm.getProgramOffset(id));
}
} else {
reports = reports_in;
}
}
bool operator<(const raw_report_list &b) const {
return reports < b.reports;
}
};
struct raw_report_info_impl : public raw_report_info {
vector<raw_report_list> rl;
u32 getReportListSize() const override;
size_t size() const override;
void fillReportLists(NFA *n, size_t base_offset,
std::vector<u32> &ro /* out */) const override;
};
}
u32 raw_report_info_impl::getReportListSize() const {
u32 rv = 0;
for (const auto &reps : rl) {
rv += sizeof(report_list);
rv += sizeof(ReportID) * reps.reports.size();
}
return rv;
}
size_t raw_report_info_impl::size() const {
return rl.size();
}
void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
vector<u32> &ro) const {
for (const auto &reps : rl) {
ro.push_back(base_offset);
report_list *p = (report_list *)((char *)n + base_offset);
u32 i = 0;
for (const ReportID report : reps.reports) {
p->report[i++] = report;
}
p->count = verify_u32(reps.reports.size());
base_offset += sizeof(report_list);
base_offset += sizeof(ReportID) * reps.reports.size();
}
}
unique_ptr<raw_report_info> sheng_build_strat::gatherReports(
vector<u32> &reports,
vector<u32> &reports_eod,
u8 *isSingleReport,
ReportID *arbReport) const {
DEBUG_PRINTF("gathering reports\n");
const bool remap_reports = has_managed_reports(rdfa.kind);
auto ri = ue2::make_unique<raw_report_info_impl>();
map<raw_report_list, u32> rev;
for (const dstate &s : rdfa.states) {
if (s.reports.empty()) {
reports.push_back(MO_INVALID_IDX);
continue;
}
raw_report_list rrl(s.reports, rm, remap_reports);
DEBUG_PRINTF("non empty r\n");
if (rev.find(rrl) != rev.end()) {
reports.push_back(rev[rrl]);
} else {
DEBUG_PRINTF("adding to rl %zu\n", ri->size());
rev[rrl] = ri->size();
reports.push_back(ri->size());
ri->rl.push_back(rrl);
}
}
for (const dstate &s : rdfa.states) {
if (s.reports_eod.empty()) {
reports_eod.push_back(MO_INVALID_IDX);
continue;
}
DEBUG_PRINTF("non empty r eod\n");
raw_report_list rrl(s.reports_eod, rm, remap_reports);
if (rev.find(rrl) != rev.end()) {
reports_eod.push_back(rev[rrl]);
continue;
}
DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size());
rev[rrl] = ri->size();
reports_eod.push_back(ri->size());
ri->rl.push_back(rrl);
}
assert(!ri->rl.empty()); /* all components should be able to generate
reports */
if (!ri->rl.empty()) {
*arbReport = *ri->rl.begin()->reports.begin();
} else {
*arbReport = 0;
}
/* if we have only a single report id generated from all accepts (not eod)
* we can take some short cuts */
set<ReportID> reps;
for (u32 rl_index : reports) {
if (rl_index == MO_INVALID_IDX) {
continue;
}
assert(rl_index < ri->size());
insert(&reps, ri->rl[rl_index].reports);
}
if (reps.size() == 1) {
*isSingleReport = 1;
*arbReport = *reps.begin();
DEBUG_PRINTF("single -- %u\n", *arbReport);
} else {
*isSingleReport = 0;
}
return move(ri);
}
u32 sheng_build_strat::max_allowed_offset_accel() const {
return ACCEL_DFA_MAX_OFFSET_DEPTH;
}
u32 sheng_build_strat::max_stop_char() const {
return ACCEL_DFA_MAX_STOP_CHAR;
}
u32 sheng_build_strat::max_floating_stop_char() const {
return ACCEL_DFA_MAX_FLOATING_STOP_CHAR;
}
size_t sheng_build_strat::accelSize() const {
return sizeof(AccelAux);
}
#ifdef DEBUG
static really_inline
void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) {
stringstream o;
for (unsigned i = 0; i < sz; i++) {
o.width(2);
o << (buf[i] & SHENG_STATE_MASK) << " ";
}
DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
}
#endif
static
void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
set<dstate_id_t> *accel_states) {
for (dstate_id_t i : accel_escape_info | map_keys) {
accel_states->insert(i);
}
}
static
u8 getShengState(dstate &state, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo) {
u8 s = state.impl_id;
if (!state.reports.empty()) {
s |= SHENG_STATE_ACCEPT;
}
if (info.isDead(state)) {
s |= SHENG_STATE_DEAD;
}
if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) {
s |= SHENG_STATE_ACCEL;
}
return s;
}
static
void fillAccelAux(struct NFA *n, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo) {
DEBUG_PRINTF("Filling accel aux structures\n");
sheng *s = (sheng *)getMutableImplNfa(n);
u32 offset = s->accel_offset;
for (dstate_id_t i = 0; i < info.size(); i++) {
dstate_id_t state_id = info.raw_id(i);
if (accelInfo.find(state_id) != accelInfo.end()) {
s->flags |= SHENG_FLAG_HAS_ACCEL;
AccelAux *aux = (AccelAux *)((char *)n + offset);
info.strat.buildAccel(state_id, accelInfo[state_id], aux);
sstate_aux *saux =
(sstate_aux *)((char *)n + s->aux_offset) + state_id;
saux->accel = offset;
DEBUG_PRINTF("Accel offset: %u\n", offset);
offset += ROUNDUP_N(sizeof(AccelAux), alignof(AccelAux));
}
}
}
static
void populateBasicInfo(struct NFA *n, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo, u32 aux_offset,
u32 report_offset, u32 accel_offset, u32 total_size,
u32 dfa_size) {
n->length = total_size;
n->scratchStateSize = 1;
n->streamStateSize = 1;
n->nPositions = info.size();
n->type = SHENG_NFA_0;
n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0;
sheng *s = (sheng *)getMutableImplNfa(n);
s->aux_offset = aux_offset;
s->report_offset = report_offset;
s->accel_offset = accel_offset;
s->n_states = info.size();
s->length = dfa_size;
s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
s->anchored = getShengState(info.anchored, info, accelInfo);
s->floating = getShengState(info.floating, info, accelInfo);
}
static
void fillTops(NFA *n, dfa_info &info, dstate_id_t id,
map<dstate_id_t, AccelScheme> &accelInfo) {
sheng *s = (sheng *)getMutableImplNfa(n);
u32 aux_base = s->aux_offset;
DEBUG_PRINTF("Filling tops for state %u\n", id);
sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id;
DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id,
(char *)aux - (char *)n);
/* we could conceivably end up in an accept/dead state on a top event,
* so mark top as accept/dead state if it indeed is.
*/
auto &top_state = info.top(id);
DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id);
aux->top = getShengState(top_state, info, accelInfo);
}
static
void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports,
vector<u32> &reports_eod, vector<u32> &report_offsets) {
sheng *s = (sheng *)getMutableImplNfa(n);
u32 aux_base = s->aux_offset;
auto raw_id = info.raw_id(id);
auto &state = info[id];
sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id;
DEBUG_PRINTF("Filling aux and report structures for state %u\n", id);
DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id,
(char *)aux - (char *)n);
aux->accept = state.reports.empty() ? 0 : report_offsets[reports[raw_id]];
aux->accept_eod =
state.reports_eod.empty() ? 0 : report_offsets[reports_eod[raw_id]];
DEBUG_PRINTF("Report list offset: %u\n", aux->accept);
DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod);
}
static
void fillSingleReport(NFA *n, ReportID r_id) {
sheng *s = (sheng *)getMutableImplNfa(n);
DEBUG_PRINTF("Single report ID: %u\n", r_id);
s->report = r_id;
s->flags |= SHENG_FLAG_SINGLE_REPORT;
}
static
void createShuffleMasks(sheng *s, dfa_info &info,
map<dstate_id_t, AccelScheme> &accelInfo) {
for (u16 chr = 0; chr < 256; chr++) {
u8 buf[16] = {0};
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
auto &succ_state = info.next(idx, chr);
buf[idx] = getShengState(succ_state, info, accelInfo);
}
#ifdef DEBUG
dumpShuffleMask(chr, buf, sizeof(buf));
#endif
m128 mask = loadu128(buf);
s->shuffle_masks[chr] = mask;
}
}
bool has_accel_sheng(const NFA *nfa) {
const sheng *s = (const sheng *)getImplNfa(nfa);
return s->flags & SHENG_FLAG_HAS_ACCEL;
}
aligned_unique_ptr<NFA> shengCompile(raw_dfa &raw,
const CompileContext &cc,
const ReportManager &rm,
set<dstate_id_t> *accel_states) {
if (!cc.grey.allowSheng) {
DEBUG_PRINTF("Sheng is not allowed!\n");
return nullptr;
}
sheng_build_strat strat(raw, rm);
dfa_info info(strat);
DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
raw.start_anchored, raw.start_floating);
DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
info.can_die ? "can" : "cannot", info.size());
if (info.size() > 16) {
DEBUG_PRINTF("Too many states\n");
return nullptr;
}
if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
* mode with our semantics */
raw.stripExtraEodReports();
}
auto accelInfo = strat.getAccelInfo(cc.grey);
// set impl_id of each dfa state
for (dstate_id_t i = 0; i < info.size(); i++) {
info[i].impl_id = i;
}
DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n",
info.anchored.impl_id, info.floating.impl_id);
u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(sheng));
vector<u32> reports, eod_reports, report_offsets;
u8 isSingle = 0;
ReportID single_report = 0;
auto ri =
strat.gatherReports(reports, eod_reports, &isSingle, &single_report);
u32 total_aux = sizeof(sstate_aux) * info.size();
u32 total_accel = strat.accelSize() * accelInfo.size();
u32 total_reports = ri->getReportListSize();
u32 reports_offset = nfa_size + total_aux;
u32 accel_offset =
ROUNDUP_N(reports_offset + total_reports, alignof(AccelAux));
u32 total_size = ROUNDUP_N(accel_offset + total_accel, 64);
DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n",
nfa_size, total_aux, total_reports, total_accel, total_size);
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(total_size);
populateBasicInfo(nfa.get(), info, accelInfo, nfa_size, reports_offset,
accel_offset, total_size, total_size - sizeof(NFA));
DEBUG_PRINTF("Setting up aux and report structures\n");
ri->fillReportLists(nfa.get(), reports_offset, report_offsets);
for (dstate_id_t idx = 0; idx < info.size(); idx++) {
fillTops(nfa.get(), info, idx, accelInfo);
fillAux(nfa.get(), info, idx, reports, eod_reports, report_offsets);
}
if (isSingle) {
fillSingleReport(nfa.get(), single_report);
}
fillAccelAux(nfa.get(), info, accelInfo);
if (accel_states) {
fillAccelOut(accelInfo, accel_states);
}
createShuffleMasks((sheng *)getMutableImplNfa(nfa.get()), info, accelInfo);
return nfa;
}
} // namespace ue2

80
src/nfa/shengcompile.h Normal file
View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SHENGCOMPILE_H_
#define SHENGCOMPILE_H_
#include "accel_dfa_build_strat.h"
#include "rdfa.h"
#include "util/alloc.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
struct NFA;
namespace ue2 {
class ReportManager;
struct CompileContext;
struct raw_dfa;
class sheng_build_strat : public accel_dfa_build_strat {
public:
sheng_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in)
: accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {}
raw_dfa &get_raw() const override { return rdfa; }
std::unique_ptr<raw_report_info> gatherReports(
std::vector<u32> &reports /* out */,
std::vector<u32> &reports_eod /* out */,
u8 *isSingleReport /* out */,
ReportID *arbReport /* out */) const override;
size_t accelSize(void) const override;
u32 max_allowed_offset_accel() const override;
u32 max_stop_char() const override;
u32 max_floating_stop_char() const override;
private:
raw_dfa &rdfa;
};
aligned_unique_ptr<NFA>
shengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm,
std::set<dstate_id_t> *accel_states = nullptr);
struct sheng_escape_info {
CharReach outs;
CharReach outs2_single;
flat_set<std::pair<u8, u8>> outs2;
bool outs2_broken = false;
};
bool has_accel_sheng(const NFA *nfa);
} // namespace ue2
#endif /* SHENGCOMPILE_H_ */

265
src/nfa/shengdump.cpp Normal file
View File

@ -0,0 +1,265 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "shengdump.h"
#include "accel_dump.h"
#include "nfa_dump_internal.h"
#include "nfa_internal.h"
#include "sheng_internal.h"
#include "rdfa.h"
#include "ue2common.h"
#include "util/charreach.h"
#include "util/dump_charclass.h"
#include "util/simd_utils.h"
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
using namespace std;
namespace ue2 {
static
const sstate_aux *get_aux(const NFA *n, dstate_id_t i) {
assert(n && isShengType(n->type));
const sheng *s = (const sheng *)getImplNfa(n);
const sstate_aux *aux_base =
(const sstate_aux *)((const char *)n + s->aux_offset);
const sstate_aux *aux = aux_base + i;
assert((const char *)aux < (const char *)s + s->length);
return aux;
}
static
void dumpHeader(FILE *f, const sheng *s) {
fprintf(f, "number of states: %u, DFA engine size: %u\n", s->n_states,
s->length);
fprintf(f, "aux base offset: %u, reports base offset: %u, "
"accel offset: %u\n",
s->aux_offset, s->report_offset, s->accel_offset);
fprintf(f, "anchored start state: %u, floating start state: %u\n",
s->anchored & SHENG_STATE_MASK, s->floating & SHENG_STATE_MASK);
fprintf(f, "has accel: %u can die: %u single report: %u\n",
!!(s->flags & SHENG_FLAG_HAS_ACCEL),
!!(s->flags & SHENG_FLAG_CAN_DIE),
!!(s->flags & SHENG_FLAG_SINGLE_REPORT));
}
static
void dumpAux(FILE *f, u32 state, const sstate_aux *aux) {
fprintf(f, "state id: %u, reports offset: %u, EOD reports offset: %u, "
"accel offset: %u, top: %u\n",
state, aux->accept, aux->accept_eod, aux->accel,
aux->top & SHENG_STATE_MASK);
}
static
void dumpReports(FILE *f, const report_list *rl) {
fprintf(f, "reports count: %u\n", rl->count);
for (u32 i = 0; i < rl->count; i++) {
fprintf(f, " report: %u, report ID: %u\n", i, rl->report[i]);
}
}
static
void dumpMasks(FILE *f, const sheng *s) {
for (u32 chr = 0; chr < 256; chr++) {
u8 buf[16];
m128 shuffle_mask = s->shuffle_masks[chr];
store128(buf, shuffle_mask);
fprintf(f, "%3u: ", chr);
for (u32 pos = 0; pos < 16; pos++) {
u8 c = buf[pos];
if (c & SHENG_STATE_FLAG_MASK) {
fprintf(f, "%2u* ", c & SHENG_STATE_MASK);
} else {
fprintf(f, "%2u ", c & SHENG_STATE_MASK);
}
}
fprintf(f, "\n");
}
}
void nfaExecSheng0_dumpText(const NFA *nfa, FILE *f) {
assert(nfa->type == SHENG_NFA_0);
const sheng *s = (const sheng *)getImplNfa(nfa);
fprintf(f, "sheng DFA\n");
dumpHeader(f, s);
for (u32 state = 0; state < s->n_states; state++) {
const sstate_aux *aux = get_aux(nfa, state);
dumpAux(f, state, aux);
if (aux->accept) {
fprintf(f, "report list:\n");
const report_list *rl =
(const report_list *)((const char *)nfa + aux->accept);
dumpReports(f, rl);
}
if (aux->accept_eod) {
fprintf(f, "EOD report list:\n");
const report_list *rl =
(const report_list *)((const char *)nfa + aux->accept_eod);
dumpReports(f, rl);
}
if (aux->accel) {
fprintf(f, "accel:\n");
const AccelAux *accel =
(const AccelAux *)((const char *)nfa + aux->accel);
dumpAccelInfo(f, *accel);
}
}
fprintf(f, "\n");
dumpMasks(f, s);
fprintf(f, "\n");
}
static
void dumpDotPreambleDfa(FILE *f) {
dumpDotPreamble(f);
// DFA specific additions.
fprintf(f, "STARTF [style=invis];\n");
fprintf(f, "STARTA [style=invis];\n");
fprintf(f, "0 [style=invis];\n");
}
static
void describeNode(const NFA *n, const sheng *s, u16 i, FILE *f) {
const sstate_aux *aux = get_aux(n, i);
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
"label = \"%u\" ]; \n",
i, i);
if (aux->accept_eod) {
fprintf(f, "%u [ color = darkorchid ];\n", i);
}
if (aux->accept) {
fprintf(f, "%u [ shape = doublecircle ];\n", i);
}
if (aux->top && (aux->top & SHENG_STATE_MASK) != i) {
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
aux->top & SHENG_STATE_MASK);
}
if (i == (s->anchored & SHENG_STATE_MASK)) {
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
}
if (i == (s->floating & SHENG_STATE_MASK)) {
fprintf(f, "STARTF -> %u [color = red ]\n", i);
}
}
static
void describeEdge(FILE *f, const u16 *t, u16 i) {
for (u16 s = 0; s < N_CHARS; s++) {
if (!t[s]) {
continue;
}
u16 ss;
for (ss = 0; ss < s; ss++) {
if (t[s] == t[ss]) {
break;
}
}
if (ss != s) {
continue;
}
CharReach reach;
for (ss = s; ss < 256; ss++) {
if (t[s] == t[ss]) {
reach.set(ss);
}
}
fprintf(f, "%u -> %u [ label = \"", i, t[s]);
describeClass(f, reach, 5, CC_OUT_DOT);
fprintf(f, "\" ];\n");
}
}
static
void shengGetTransitions(const NFA *n, u16 state, u16 *t) {
assert(isShengType(n->type));
const sheng *s = (const sheng *)getImplNfa(n);
const sstate_aux *aux = get_aux(n, state);
for (unsigned i = 0; i < N_CHARS; i++) {
u8 buf[16];
m128 shuffle_mask = s->shuffle_masks[i];
store128(buf, shuffle_mask);
t[i] = buf[state] & SHENG_STATE_MASK;
}
t[TOP] = aux->top & SHENG_STATE_MASK;
}
void nfaExecSheng0_dumpDot(const NFA *nfa, FILE *f, const string &) {
assert(nfa->type == SHENG_NFA_0);
const sheng *s = (const sheng *)getImplNfa(nfa);
dumpDotPreambleDfa(f);
for (u16 i = 1; i < s->n_states; i++) {
describeNode(nfa, s, i, f);
u16 t[ALPHABET_SIZE];
shengGetTransitions(nfa, i, t);
describeEdge(f, t, i);
}
fprintf(f, "}\n");
}
} // namespace ue2

View File

@ -26,15 +26,24 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "simd_utils_ssse3.h"
#ifndef SHENGDUMP_H_
#define SHENGDUMP_H_
const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = {
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
#ifdef DUMP_SUPPORT
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
#include <cstdio>
#include <string>
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
};
struct NFA;
namespace ue2 {
void nfaExecSheng0_dumpDot(const struct NFA *nfa, FILE *file,
const std::string &base);
void nfaExecSheng0_dumpText(const struct NFA *nfa, FILE *file);
} // namespace ue2
#endif // DUMP_SUPPORT
#endif /* SHENGDUMP_H_ */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -40,8 +40,6 @@
#include "shufti_common.h"
#include "util/simd_utils_ssse3.h"
/** \brief Naive byte-by-byte implementation. */
static really_inline
const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf,
@ -235,7 +233,7 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi,
m128 c2_lo = pshufb(mask2_lo, chars_lo);
m128 c2_hi = pshufb(mask2_hi, chars_hi);
m128 t2 = or128(t, shiftRight8Bits(or128(c2_lo, c2_hi)));
m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1));
#ifdef DEBUG
DEBUG_PRINTF(" c2_lo: "); dumpMsk128(c2_lo); printf("\n");
@ -472,7 +470,7 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi,
m256 c2_lo = vpshufb(mask2_lo, chars_lo);
m256 c2_hi = vpshufb(mask2_hi, chars_hi);
m256 t2 = or256(t, shift256Right8Bits(or256(c2_lo, c2_hi)));
m256 t2 = or256(t, rshift128_m256(or256(c2_lo, c2_hi), 1));
#ifdef DEBUG
DEBUG_PRINTF(" c2_lo: "); dumpMsk256(c2_lo); printf("\n");

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -34,7 +34,6 @@
#include "util/bitutils.h"
#include "util/simd_utils.h"
#include "util/unaligned.h"
#include "util/simd_utils_ssse3.h"
/*
* Common stuff for all versions of shufti (single, multi and multidouble)
@ -94,7 +93,7 @@ DUMP_MSK(128)
#endif
#define GET_LO_4(chars) and128(chars, low4bits)
#define GET_HI_4(chars) rshift2x64(andnot128(low4bits, chars), 4)
#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
static really_inline
u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
@ -120,7 +119,7 @@ DUMP_MSK(256)
#endif
#define GET_LO_4(chars) and256(chars, low4bits)
#define GET_HI_4(chars) rshift4x64(andnot256(low4bits, chars), 4)
#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4)
static really_inline
u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,

Some files were not shown because too many files have changed in this diff Show More