diff --git a/CHANGELOG.md b/CHANGELOG.md index 19a37b8e..7dc0fd79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,36 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. +## [4.3.0] 2016-08-24 +- Introduce a new analysis pass ("Violet") used for decomposition of patterns + into literals and smaller engines. +- Introduce a new container engine ("Tamarama") for infix and suffix engines + that can be proven to run exclusively of one another. This reduces stream + state for pattern sets with many such engines. +- Introduce a new shuffle-based DFA engine ("Sheng"). This improves scanning + performance for pattern sets where small engines are generated. +- Improve the analysis used to extract extra mask information from short + literals. +- Reduced compile time spent in equivalence class analysis. +- Build: frame pointers are now only omitted for 32-bit release builds. +- Build: Workaround for C++ issues reported on FreeBSD/libc++ platforms. + (github issue #27) +- Simplify the LimEx NFA with a unified "variable shift" model, which reduces + the number of different NFA code paths to one per model size. +- Allow some anchored prefixes that may squash the literal to which they are + attached to run eagerly. This improves scanning performance for some + patterns. +- Simplify and improve EOD ("end of data") matching, using the interpreter for + all operations. +- Elide unnecessary instructions in the Rose interpreter at compile time. +- Reduce the number of inlined instantiations of the Rose interpreter in order + to reduce instruction cache pressure. +- Small improvements to literal matcher acceleration. +- Parser: ignore `\E` metacharacters that are not preceded by `\Q`. This + conforms to PCRE's behaviour, rather than returning a compile error. +- Check for misaligned memory when allocating an error structure in Hyperscan's + compile path and return an appropriate error if detected. + ## [4.2.0] 2016-05-31 - Introduce an interpreter for many complex actions to replace the use of internal reports within the core of Hyperscan (the "Rose" engine). This diff --git a/CMakeLists.txt b/CMakeLists.txt index c824b6a6..abbfe53b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,12 +1,18 @@ cmake_minimum_required (VERSION 2.8.11) + +# don't use the built-in default configs +set (CMAKE_NOT_USING_CONFIG_FLAGS TRUE) + project (Hyperscan C CXX) set (HS_MAJOR_VERSION 4) -set (HS_MINOR_VERSION 2) +set (HS_MINOR_VERSION 3) set (HS_PATCH_VERSION 0) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) -string (TIMESTAMP BUILD_DATE "%Y-%m-%d") +# since we are doing this manually, we only have three types +set (CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo" + CACHE STRING "" FORCE) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) include(CheckCCompilerFlag) @@ -24,7 +30,7 @@ find_package(PkgConfig QUIET) if (NOT CMAKE_BUILD_TYPE) message(STATUS "Default build type 'Release with debug info'") - set(CMAKE_BUILD_TYPE "RELWITHDEBINFO") + set(CMAKE_BUILD_TYPE RELWITHDEBINFO CACHE STRING "" FORCE ) else() string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE) message(STATUS "Build type ${CMAKE_BUILD_TYPE}") @@ -90,6 +96,18 @@ else() message(FATAL_ERROR "No python interpreter found") endif() +# allow for reproducible builds - python for portability +if (DEFINED ENV{SOURCE_DATE_EPOCH}) + execute_process( + COMMAND "${PYTHON}" "${CMAKE_MODULE_PATH}/formatdate.py" "$ENV{SOURCE_DATE_EPOCH}" + OUTPUT_VARIABLE BUILD_DATE + OUTPUT_STRIP_TRAILING_WHITESPACE) +else () + string (TIMESTAMP BUILD_DATE "%Y-%m-%d") +endif () +message(STATUS "Build date: ${BUILD_DATE}") + + if(${RAGEL} STREQUAL "RAGEL-NOTFOUND") message(FATAL_ERROR "Ragel state machine compiler not found") endif() @@ -121,13 +139,7 @@ endif() CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in release builds" ON "NOT RELEASE_BUILD" OFF) -option(DISABLE_ASSERTS "Disable assert(); enabled in debug builds, disabled in release builds" FALSE) - -if (DISABLE_ASSERTS) - if (CMAKE_BUILD_TYPE STREQUAL "DEBUG") - add_definitions(-DNDEBUG) - endif() -endif() +CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON) option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF) @@ -139,18 +151,26 @@ if(MSVC OR MSVC_IDE) if (MSVC_VERSION LESS 1700) message(FATAL_ERROR "The project requires C++11 features.") else() + # set base flags + set(CMAKE_C_FLAGS "/DWIN32 /D_WINDOWS /W3") + set(CMAKE_C_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od") + set(CMAKE_C_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi") + + set(CMAKE_CXX_FLAGS "/DWIN32 /D_WINDOWS /W3 /GR /EHsc") + set(CMAKE_CXX_FLAGS_DEBUG "/D_DEBUG /MDd /Zi /Od") + set(CMAKE_CXX_FLAGS_RELEASE "/MD /O2 /Ob2 /Oi") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /MD /O2 /Ob2 /Oi") + if (WINDOWS_ICC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /O3 /wd4267 /Qdiag-disable:remark") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /O2 /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") else() #TODO: don't hardcode arch - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /O2 /wd4267") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /O2 /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX /wd4267") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX /wd4244 /wd4267 /wd4800 /wd2586 /wd1170 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") endif() - string(REGEX REPLACE "/RTC1" "" - CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" ) - string(REGEX REPLACE "/RTC1" "" - CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}" ) + endif() @@ -172,16 +192,34 @@ else() unset(_GXX_OUTPUT) endif() - # set compiler flags - more are tested and added later - set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual") - set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wno-shadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor") - if (NOT RELEASE_BUILD) - # -Werror is most useful during development, don't potentially break - # release builds - set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror") - set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Werror") + if(OPTIMISE) + set(OPT_C_FLAG "-O3") + set(OPT_CXX_FLAG "-O2") + else() + set(OPT_C_FLAG "-O0") + set(OPT_CXX_FLAG "-O0") + endif(OPTIMISE) + + # set up base flags for build types + set(CMAKE_C_FLAGS_DEBUG "-g ${OPT_C_FLAG} -Werror") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "-g ${OPT_C_FLAG}") + set(CMAKE_C_FLAGS_RELEASE "${OPT_C_FLAG}") + + set(CMAKE_CXX_FLAGS_DEBUG "-g ${OPT_CXX_FLAG} -Werror") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g ${OPT_CXX_FLAG}") + set(CMAKE_CXX_FLAGS_RELEASE "${OPT_CXX_FLAG}") + + if (DISABLE_ASSERTS) + # usually true for release builds, false for debug + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNDEBUG") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG") endif() + + # set compiler flags - more are tested and added later + set(EXTRA_C_FLAGS "-std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing") + set(EXTRA_CXX_FLAGS "-std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing") + if (NOT CMAKE_C_FLAGS MATCHES .*march.*) message(STATUS "Building for current host CPU") set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -march=native -mtune=native") @@ -199,15 +237,7 @@ else() set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0 -Wno-unused-local-typedefs -Wno-maybe-uninitialized") endif() - if(OPTIMISE) - set(EXTRA_C_FLAGS "-O3 ${EXTRA_C_FLAGS}") - set(EXTRA_CXX_FLAGS "-O2 ${EXTRA_CXX_FLAGS}") - else() - set(EXTRA_C_FLAGS "-O0 ${EXTRA_C_FLAGS}") - set(EXTRA_CXX_FLAGS "-O0 ${EXTRA_CXX_FLAGS}") - endif(OPTIMISE) - - if(NOT RELEASE_BUILD) + if (NOT(ARCH_IA32 AND RELEASE_BUILD)) set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fno-omit-frame-pointer") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer") endif() @@ -297,6 +327,11 @@ if (CXX_UNUSED_CONST_VAR) set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-const-variable") endif() +# gcc 6 complains about type attributes that get ignored, like alignment +CHECK_CXX_COMPILER_FLAG("-Wignored-attributes" CXX_IGNORED_ATTR) +if (CXX_IGNORED_ATTR) + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-ignored-attributes") +endif() # note this for later # g++ doesn't have this flag but clang does @@ -438,15 +473,14 @@ set (hs_exec_SRCS src/nfa/limex_simd128.c src/nfa/limex_simd256.c src/nfa/limex_simd384.c - src/nfa/limex_simd512a.c - src/nfa/limex_simd512b.c - src/nfa/limex_simd512c.c + src/nfa/limex_simd512.c src/nfa/limex.h src/nfa/limex_common_impl.h src/nfa/limex_context.h src/nfa/limex_internal.h src/nfa/limex_runtime.h src/nfa/limex_runtime_impl.h + src/nfa/limex_shuffle.h src/nfa/limex_state_impl.h src/nfa/mpv.h src/nfa/mpv.c @@ -477,9 +511,18 @@ set (hs_exec_SRCS src/nfa/repeat.c src/nfa/repeat.h src/nfa/repeat_internal.h + src/nfa/sheng.c + src/nfa/sheng.h + src/nfa/sheng_defs.h + src/nfa/sheng_impl.h + src/nfa/sheng_impl4.h + src/nfa/sheng_internal.h src/nfa/shufti_common.h src/nfa/shufti.c src/nfa/shufti.h + src/nfa/tamarama.c + src/nfa/tamarama.h + src/nfa/tamarama_internal.h src/nfa/truffle_common.h src/nfa/truffle.c src/nfa/truffle.h @@ -495,7 +538,6 @@ set (hs_exec_SRCS src/rose/block.c src/rose/catchup.h src/rose/catchup.c - src/rose/eod.c src/rose/infix.h src/rose/init.h src/rose/init.c @@ -503,6 +545,7 @@ set (hs_exec_SRCS src/rose/match.h src/rose/match.c src/rose/miracle.h + src/rose/program_runtime.c src/rose/program_runtime.h src/rose/runtime.h src/rose/rose.h @@ -510,6 +553,7 @@ set (hs_exec_SRCS src/rose/rose_program.h src/rose/rose_types.h src/rose/rose_common.h + src/rose/validate_mask.h src/util/bitutils.h src/util/exhaust.h src/util/fatbit.h @@ -524,11 +568,8 @@ set (hs_exec_SRCS src/util/pqueue.h src/util/scatter.h src/util/scatter_runtime.h - src/util/shuffle.h - src/util/shuffle_ssse3.h src/util/simd_utils.h - src/util/simd_utils_ssse3.h - src/util/simd_utils_ssse3.c + src/util/simd_utils.c src/util/state_compress.h src/util/state_compress.c src/util/unaligned.h @@ -597,11 +638,15 @@ SET (hs_SRCS src/hwlm/noodle_build.h src/hwlm/noodle_internal.h src/nfa/accel.h + src/nfa/accel_dfa_build_strat.cpp + src/nfa/accel_dfa_build_strat.h src/nfa/accelcompile.cpp src/nfa/accelcompile.h src/nfa/callback.h src/nfa/castlecompile.cpp src/nfa/castlecompile.h + src/nfa/dfa_build_strat.cpp + src/nfa/dfa_build_strat.h src/nfa/dfa_min.cpp src/nfa/dfa_min.h src/nfa/goughcompile.cpp @@ -613,8 +658,6 @@ SET (hs_SRCS src/nfa/mcclellan_internal.h src/nfa/mcclellancompile.cpp src/nfa/mcclellancompile.h - src/nfa/mcclellancompile_accel.cpp - src/nfa/mcclellancompile_accel.h src/nfa/mcclellancompile_util.cpp src/nfa/mcclellancompile_util.h src/nfa/limex_compile.cpp @@ -639,8 +682,13 @@ SET (hs_SRCS src/nfa/repeat_internal.h src/nfa/repeatcompile.cpp src/nfa/repeatcompile.h + src/nfa/sheng_internal.h + src/nfa/shengcompile.cpp + src/nfa/shengcompile.h src/nfa/shufticompile.cpp src/nfa/shufticompile.h + src/nfa/tamaramacompile.cpp + src/nfa/tamaramacompile.h src/nfa/trufflecompile.cpp src/nfa/trufflecompile.h src/nfagraph/ng.cpp @@ -746,6 +794,8 @@ SET (hs_SRCS src/nfagraph/ng_util.h src/nfagraph/ng_vacuous.cpp src/nfagraph/ng_vacuous.h + src/nfagraph/ng_violet.cpp + src/nfagraph/ng_violet.h src/nfagraph/ng_width.cpp src/nfagraph/ng_width.h src/parser/AsciiComponentClass.cpp @@ -825,6 +875,10 @@ SET (hs_SRCS src/rose/rose_build_compile.cpp src/rose/rose_build_convert.cpp src/rose/rose_build_convert.h + src/rose/rose_build_exclusive.cpp + src/rose/rose_build_exclusive.h + src/rose/rose_build_groups.cpp + src/rose/rose_build_groups.h src/rose/rose_build_impl.h src/rose/rose_build_infix.cpp src/rose/rose_build_infix.h @@ -853,6 +907,8 @@ SET (hs_SRCS src/util/charreach.cpp src/util/charreach.h src/util/charreach_util.h + src/util/clique.cpp + src/util/clique.h src/util/compare.h src/util/compile_context.cpp src/util/compile_context.h @@ -878,7 +934,6 @@ SET (hs_SRCS src/util/report_manager.cpp src/util/report_manager.h src/util/simd_utils.h - src/util/simd_utils_ssse3.h src/util/target_info.cpp src/util/target_info.h src/util/ue2_containers.h @@ -916,6 +971,10 @@ set(hs_dump_SRCS src/nfa/nfa_dump_dispatch.cpp src/nfa/nfa_dump_internal.cpp src/nfa/nfa_dump_internal.h + src/nfa/shengdump.cpp + src/nfa/shengdump.h + src/nfa/tamarama_dump.cpp + src/nfa/tamarama_dump.h src/parser/dump.cpp src/parser/dump.h src/parser/position_dump.h @@ -941,7 +1000,7 @@ endif() # choose which ones to build set (LIB_VERSION ${HS_VERSION}) -set (LIB_SOVERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}) +set (LIB_SOVERSION ${HS_MAJOR_VERSION}) add_library(hs_exec OBJECT ${hs_exec_SRCS}) diff --git a/cmake/formatdate.py b/cmake/formatdate.py new file mode 100755 index 00000000..1b9c62d2 --- /dev/null +++ b/cmake/formatdate.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python +from __future__ import print_function +import os +import sys +import datetime + +def usage(): + print("Usage:", os.path.basename(sys.argv[0]), "") + +if len(sys.argv) != 2: + usage() + sys.exit(1) + +ts = sys.argv[1] + +build_date = datetime.datetime.utcfromtimestamp(int(ts)) + +print(build_date.strftime("%Y-%m-%d")) diff --git a/examples/simplegrep.c b/examples/simplegrep.c index 9e392a8f..d6bd4b39 100644 --- a/examples/simplegrep.c +++ b/examples/simplegrep.c @@ -77,7 +77,7 @@ static int eventHandler(unsigned int id, unsigned long long from, * length with its length. Returns NULL on failure. */ static char *readInputData(const char *inputFN, unsigned int *length) { - FILE *f = fopen(inputFN, "r"); + FILE *f = fopen(inputFN, "rb"); if (!f) { fprintf(stderr, "ERROR: unable to open file \"%s\": %s\n", inputFN, strerror(errno)); diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index ce5f8723..d56aff88 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -52,7 +52,6 @@ #include "parser/shortcut_literal.h" #include "parser/unsupported.h" #include "parser/utf8_validate.h" -#include "smallwrite/smallwrite_build.h" #include "rose/rose_build.h" #include "rose/rose_build_dump.h" #include "som/slot_manager_dump.h" @@ -304,15 +303,6 @@ aligned_unique_ptr generateRoseEngine(NG &ng) { return nullptr; } - /* avoid building a smwr if just a pure floating case. */ - if (!roseIsPureLiteral(rose.get())) { - u32 qual = roseQuality(rose.get()); - auto smwr = ng.smwr->build(qual); - if (smwr) { - rose = roseAddSmallWrite(rose.get(), smwr.get()); - } - } - dumpRose(*ng.rose, rose.get(), ng.cc.grey); dumpReportManager(ng.rm, ng.cc.grey); dumpSomSlotManager(ng.ssm, ng.cc.grey); diff --git a/src/compiler/error.cpp b/src/compiler/error.cpp index e806b7a0..07db9819 100644 --- a/src/compiler/error.cpp +++ b/src/compiler/error.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,6 +42,7 @@ using std::string; static const char failureNoMemory[] = "Unable to allocate memory."; static const char failureInternal[] = "Internal error."; +static const char failureBadAlloc[] = "Allocator returned misaligned memory."; extern const hs_compile_error_t hs_enomem = { const_cast(failureNoMemory), 0 @@ -49,6 +50,9 @@ extern const hs_compile_error_t hs_enomem = { extern const hs_compile_error_t hs_einternal = { const_cast(failureInternal), 0 }; +extern const hs_compile_error_t hs_badalloc = { + const_cast(failureBadAlloc), 0 +}; namespace ue2 { @@ -56,8 +60,18 @@ hs_compile_error_t *generateCompileError(const string &err, int expression) { hs_compile_error_t *ret = (struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t)); if (ret) { + hs_error_t e = hs_check_alloc(ret); + if (e != HS_SUCCESS) { + hs_misc_free(ret); + return const_cast(&hs_badalloc); + } char *msg = (char *)hs_misc_alloc(err.size() + 1); if (msg) { + e = hs_check_alloc(msg); + if (e != HS_SUCCESS) { + hs_misc_free(msg); + return const_cast(&hs_badalloc); + } memcpy(msg, err.c_str(), err.size() + 1); ret->message = msg; } else { @@ -83,7 +97,8 @@ void freeCompileError(hs_compile_error_t *error) { if (!error) { return; } - if (error == &hs_enomem || error == &hs_einternal) { + if (error == &hs_enomem || error == &hs_einternal || + error == &hs_badalloc) { // These are not allocated. return; } diff --git a/src/database.c b/src/database.c index 635a3b66..a4e10c22 100644 --- a/src/database.c +++ b/src/database.c @@ -458,33 +458,16 @@ hs_error_t hs_serialized_database_info(const char *bytes, size_t length, } *info = NULL; - if (!bytes || length < sizeof(struct hs_database)) { - return HS_INVALID; + // Decode and check the header + hs_database_t header; + hs_error_t ret = db_decode_header(&bytes, length, &header); + if (ret != HS_SUCCESS) { + return ret; } - const u32 *buf = (const u32 *)bytes; + u32 mode = unaligned_load_u32(bytes + offsetof(struct RoseEngine, mode)); - u32 magic = unaligned_load_u32(buf++); - if (magic != HS_DB_MAGIC) { - return HS_INVALID; - } - - u32 version = unaligned_load_u32(buf++); - - buf++; /* length */ - - platform_t plat; - plat = unaligned_load_u64a(buf); - buf += 2; - - buf++; /* crc */ - buf++; /* reserved 0 */ - buf++; /* reserved 1 */ - - const char *t_raw = (const char *)buf; - u32 mode = unaligned_load_u32(t_raw + offsetof(struct RoseEngine, mode)); - - return print_database_string(info, version, plat, mode); + return print_database_string(info, header.version, header.platform, mode); } HS_PUBLIC_API diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index ff69853e..4230c2b1 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -36,7 +36,6 @@ #include "teddy.h" #include "teddy_internal.h" #include "util/simd_utils.h" -#include "util/simd_utils_ssse3.h" /** \brief number of bytes processed in each iteration */ #define ITER_BYTES 16 @@ -132,7 +131,7 @@ m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft, u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1); tmp &= fdr->domainMask; s = *((const m128 *)ft + tmp); - s = shiftRight8Bits(s); + s = rshiftbyte_m128(s, 1); } else { s = fdr->start; } @@ -186,20 +185,20 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, m128 st14 = *(const m128 *)(ft + v14*8); m128 st15 = *(const m128 *)(ft + v15*8); - st1 = byteShiftLeft128(st1, 1); - st2 = byteShiftLeft128(st2, 2); - st3 = byteShiftLeft128(st3, 3); - st4 = byteShiftLeft128(st4, 4); - st5 = byteShiftLeft128(st5, 5); - st6 = byteShiftLeft128(st6, 6); - st7 = byteShiftLeft128(st7, 7); - st9 = byteShiftLeft128(st9, 1); - st10 = byteShiftLeft128(st10, 2); - st11 = byteShiftLeft128(st11, 3); - st12 = byteShiftLeft128(st12, 4); - st13 = byteShiftLeft128(st13, 5); - st14 = byteShiftLeft128(st14, 6); - st15 = byteShiftLeft128(st15, 7); + st1 = lshiftbyte_m128(st1, 1); + st2 = lshiftbyte_m128(st2, 2); + st3 = lshiftbyte_m128(st3, 3); + st4 = lshiftbyte_m128(st4, 4); + st5 = lshiftbyte_m128(st5, 5); + st6 = lshiftbyte_m128(st6, 6); + st7 = lshiftbyte_m128(st7, 7); + st9 = lshiftbyte_m128(st9, 1); + st10 = lshiftbyte_m128(st10, 2); + st11 = lshiftbyte_m128(st11, 3); + st12 = lshiftbyte_m128(st12, 4); + st13 = lshiftbyte_m128(st13, 5); + st14 = lshiftbyte_m128(st14, 6); + st15 = lshiftbyte_m128(st15, 7); *s = or128(*s, st0); *s = or128(*s, st1); @@ -210,7 +209,7 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, *s = or128(*s, st6); *s = or128(*s, st7); *conf0 = movq(*s); - *s = byteShiftRight128(*s, 8); + *s = rshiftbyte_m128(*s, 8); *conf0 ^= ~0ULL; *s = or128(*s, st8); @@ -222,7 +221,7 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, *s = or128(*s, st14); *s = or128(*s, st15); *conf8 = movq(*s); - *s = byteShiftRight128(*s, 8); + *s = rshiftbyte_m128(*s, 8); *conf8 ^= ~0ULL; } @@ -253,19 +252,19 @@ void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, m128 st12 = *(const m128 *)(ft + v12*8); m128 st14 = *(const m128 *)(ft + v14*8); - st2 = byteShiftLeft128(st2, 2); - st4 = byteShiftLeft128(st4, 4); - st6 = byteShiftLeft128(st6, 6); - st10 = byteShiftLeft128(st10, 2); - st12 = byteShiftLeft128(st12, 4); - st14 = byteShiftLeft128(st14, 6); + st2 = lshiftbyte_m128(st2, 2); + st4 = lshiftbyte_m128(st4, 4); + st6 = lshiftbyte_m128(st6, 6); + st10 = lshiftbyte_m128(st10, 2); + st12 = lshiftbyte_m128(st12, 4); + st14 = lshiftbyte_m128(st14, 6); *s = or128(*s, st0); *s = or128(*s, st2); *s = or128(*s, st4); *s = or128(*s, st6); *conf0 = movq(*s); - *s = byteShiftRight128(*s, 8); + *s = rshiftbyte_m128(*s, 8); *conf0 ^= ~0ULL; *s = or128(*s, st8); @@ -273,7 +272,7 @@ void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, *s = or128(*s, st12); *s = or128(*s, st14); *conf8 = movq(*s); - *s = byteShiftRight128(*s, 8); + *s = rshiftbyte_m128(*s, 8); *conf8 ^= ~0ULL; } @@ -296,27 +295,26 @@ void get_conf_stride_4(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, m128 st8 = *(const m128 *)(ft + v8*8); m128 st12 = *(const m128 *)(ft + v12*8); - st4 = byteShiftLeft128(st4, 4); - st12 = byteShiftLeft128(st12, 4); + st4 = lshiftbyte_m128(st4, 4); + st12 = lshiftbyte_m128(st12, 4); *s = or128(*s, st0); *s = or128(*s, st4); *conf0 = movq(*s); - *s = byteShiftRight128(*s, 8); + *s = rshiftbyte_m128(*s, 8); *conf0 ^= ~0ULL; *s = or128(*s, st8); *s = or128(*s, st12); *conf8 = movq(*s); - *s = byteShiftRight128(*s, 8); + *s = rshiftbyte_m128(*s, 8); *conf8 ^= ~0ULL; } static really_inline -void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *controlVal, +void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, const u32 *confBase, const struct FDR_Runtime_Args *a, - const u8 *ptr, hwlmcb_rv_t *control, u32 *last_match_id, - struct zone *z) { + const u8 *ptr, u32 *last_match_id, struct zone *z) { const u8 bucket = 8; const u8 pullback = 1; @@ -352,13 +350,13 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *controlVal, continue; } *last_match_id = id; - *controlVal = a->cb(ptr_main + byte - a->buf, - ptr_main + byte - a->buf, id, a->ctxt); + *control = a->cb(ptr_main + byte - a->buf, ptr_main + byte - a->buf, + id, a->ctxt); continue; } u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a)); - confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, - control, last_match_id, confVal); + confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, control, + last_match_id, confVal); } while (unlikely(!!*conf)); } @@ -681,9 +679,9 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend, itPtr += ITER_BYTES) { \ if (unlikely(itPtr > tryFloodDetect)) { \ tryFloodDetect = floodDetect(fdr, a, &itPtr, tryFloodDetect,\ - &floodBackoff, &controlVal, \ + &floodBackoff, &control, \ ITER_BYTES); \ - if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \ + if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \ return HWLM_TERMINATED; \ } \ } \ @@ -692,11 +690,11 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend, u64a conf8; \ get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_adjusted, \ ft, &conf0, &conf8, &s); \ - do_confirm_fdr(&conf0, 0, &controlVal, confBase, a, itPtr, \ - control, &last_match_id, zz); \ - do_confirm_fdr(&conf8, 8, &controlVal, confBase, a, itPtr, \ - control, &last_match_id, zz); \ - if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \ + do_confirm_fdr(&conf0, 0, &control, confBase, a, itPtr, \ + &last_match_id, zz); \ + do_confirm_fdr(&conf8, 8, &control, confBase, a, itPtr, \ + &last_match_id, zz); \ + if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \ return HWLM_TERMINATED; \ } \ } /* end for loop */ \ @@ -704,9 +702,8 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend, static never_inline hwlm_error_t fdr_engine_exec(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { u32 floodBackoff = FLOOD_BACKOFF_START; u32 last_match_id = INVALID_MATCH_ID; u64a domain_mask_adjusted = fdr->domainMask << 1; @@ -771,7 +768,10 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr, #define ONLY_AVX2(func) NULL #endif -typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a); +typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control); + static const FDRFUNCTYPE funcs[] = { fdr_engine_exec, ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fast), @@ -814,7 +814,6 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, start, cb, ctxt, - &groups, nextFloodDetect(buf, len, FLOOD_BACKOFF_START), 0 }; @@ -822,7 +821,7 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, return HWLM_SUCCESS; } else { assert(funcs[fdr->engineID]); - return funcs[fdr->engineID](fdr, &a); + return funcs[fdr->engineID](fdr, &a, groups); } } @@ -840,7 +839,6 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, start, cb, ctxt, - &groups, nextFloodDetect(buf, len, FLOOD_BACKOFF_START), /* we are guaranteed to always have 16 initialised bytes at the end of * the history buffer (they may be garbage). */ @@ -853,7 +851,7 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, ret = HWLM_SUCCESS; } else { assert(funcs[fdr->engineID]); - ret = funcs[fdr->engineID](fdr, &a); + ret = funcs[fdr->engineID](fdr, &a, groups); } fdrPackState(fdr, &a, stream_state); diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 0c4ef35d..89a0ff72 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -81,7 +81,7 @@ private: void dumpMasks(const u8 *defaultMask); #endif void setupTab(); - aligned_unique_ptr setupFDR(pair link); + aligned_unique_ptr setupFDR(pair, size_t> &link); void createInitialState(FDR *fdr); public: @@ -90,7 +90,7 @@ public: : eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in), make_small(make_small_in) {} - aligned_unique_ptr build(pair link); + aligned_unique_ptr build(pair, size_t> &link); }; u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) { @@ -124,10 +124,8 @@ void FDRCompiler::createInitialState(FDR *fdr) { // Find the minimum length for the literals in this bucket. const vector &bucket_lits = bucketToLits[b]; u32 min_len = ~0U; - for (vector::const_iterator it = bucket_lits.begin(), - ite = bucket_lits.end(); - it != ite; ++it) { - min_len = min(min_len, verify_u32(lits[*it].s.length())); + for (const LiteralIndex &lit_idx : bucket_lits) { + min_len = min(min_len, verify_u32(lits[lit_idx].s.length())); } DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len); @@ -141,13 +139,12 @@ void FDRCompiler::createInitialState(FDR *fdr) { } } -aligned_unique_ptr FDRCompiler::setupFDR(pair link) { +aligned_unique_ptr +FDRCompiler::setupFDR(pair, size_t> &link) { size_t tabSize = eng.getTabSizeBytes(); - pair floodControlTmp = setupFDRFloodControl(lits, eng); - - pair confirmTmp = - setupFullMultiConfs(lits, eng, bucketToLits, make_small); + auto floodControlTmp = setupFDRFloodControl(lits, eng); + auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small); assert(ISALIGNED_16(tabSize)); assert(ISALIGNED_16(confirmTmp.second)); @@ -175,14 +172,12 @@ aligned_unique_ptr FDRCompiler::setupFDR(pair link) { copy(tab.begin(), tab.end(), ptr); ptr += tabSize; - memcpy(ptr, confirmTmp.first, confirmTmp.second); + memcpy(ptr, confirmTmp.first.get(), confirmTmp.second); ptr += confirmTmp.second; - aligned_free(confirmTmp.first); fdr->floodOffset = verify_u32(ptr - fdr_base); - memcpy(ptr, floodControlTmp.first, floodControlTmp.second); + memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second); ptr += floodControlTmp.second; - aligned_free(floodControlTmp.first); /* we are allowing domains 9 to 15 only */ assert(eng.bits > 8 && eng.bits < 16); @@ -193,8 +188,7 @@ aligned_unique_ptr FDRCompiler::setupFDR(pair link) { if (link.first) { fdr->link = verify_u32(ptr - fdr_base); - memcpy(ptr, link.first, link.second); - aligned_free(link.first); + memcpy(ptr, link.first.get(), link.second); } else { fdr->link = 0; } @@ -217,13 +211,11 @@ struct LitOrder { if (len1 != len2) { return len1 < len2; } else { - string::const_reverse_iterator it1, it2; - tie(it1, it2) = - std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin()); - if (it1 == i1s.rend()) { + auto p = std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin()); + if (p.first == i1s.rend()) { return false; } - return *it1 < *it2; + return *p.first < *p.second; } } @@ -266,9 +258,8 @@ void FDRCompiler::assignStringsToBuckets() { stable_sort(vli.begin(), vli.end(), LitOrder(lits)); #ifdef DEBUG_ASSIGNMENT - for (map::iterator i = lenCounts.begin(), e = lenCounts.end(); - i != e; ++i) { - printf("l<%d>:%d ", i->first, i->second); + for (const auto &m : lenCounts) { + printf("l<%u>:%u ", m.first, m.second); } printf("\n"); #endif @@ -324,12 +315,12 @@ void FDRCompiler::assignStringsToBuckets() { for (u32 k = j; k < nChunks; ++k) { cnt += count[k]; } - t[j][0] = make_pair(getScoreUtil(length[j], cnt), 0); + t[j][0] = {getScoreUtil(length[j], cnt), 0}; } for (u32 i = 1; i < nb; i++) { for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row - SCORE_INDEX_PAIR best = make_pair(MAX_SCORE, 0); + SCORE_INDEX_PAIR best = {MAX_SCORE, 0}; u32 cnt = count[j]; for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) { SCORE score = getScoreUtil(length[j], cnt); @@ -338,12 +329,12 @@ void FDRCompiler::assignStringsToBuckets() { } score += t[k][i-1].first; if (score < best.first) { - best = make_pair(score, k); + best = {score, k}; } } t[j][i] = best; } - t[nChunks - 1][i] = make_pair(0,0); // fill in empty final row for next iteration + t[nChunks - 1][i] = {0,0}; // fill in empty final row for next iteration } #ifdef DEBUG_ASSIGNMENT @@ -405,8 +396,7 @@ bool getMultiEntriesAtPosition(const FDREngineDescription &eng, distance = 4; } - for (vector::const_iterator i = vl.begin(), e = vl.end(); - i != e; ++i) { + for (auto i = vl.begin(), e = vl.end(); i != e; ++i) { if (e - i > 5) { __builtin_prefetch(&lits[*(i + 5)]); } @@ -460,31 +450,25 @@ void FDRCompiler::setupTab() { memcpy(tabIndexToMask(i), &defaultMask[0], mask_size); } - typedef std::map > M2SET; - for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) { const vector &vl = bucketToLits[b]; SuffixPositionInString pLimit = eng.getBucketWidth(b); for (SuffixPositionInString pos = 0; pos < pLimit; pos++) { u32 bit = eng.getSchemeBit(b, pos); - M2SET m2; + map> m2; bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2); if (done) { clearbit(&defaultMask[0], bit); continue; } - for (M2SET::const_iterator i = m2.begin(), e = m2.end(); i != e; - ++i) { - u32 dc = i->first; - const ue2::unordered_set &mskSet = i->second; + for (const auto &elem : m2) { + u32 dc = elem.first; + const ue2::unordered_set &mskSet = elem.second; u32 v = ~dc; do { u32 b2 = v & dc; - for (ue2::unordered_set::const_iterator - i2 = mskSet.begin(), - e2 = mskSet.end(); - i2 != e2; ++i2) { - u32 val = (*i2 & ~dc) | b2; + for (const u32 &mskVal : mskSet) { + u32 val = (mskVal & ~dc) | b2; clearbit(tabIndexToMask(val), bit); } v = (v + (dc & -dc)) | ~dc; @@ -502,7 +486,8 @@ void FDRCompiler::setupTab() { #endif } -aligned_unique_ptr FDRCompiler::build(pair link) { +aligned_unique_ptr +FDRCompiler::build(pair, size_t> &link) { assignStringsToBuckets(); setupTab(); return setupFDR(link); @@ -515,16 +500,15 @@ aligned_unique_ptr fdrBuildTableInternal(const vector &lits, bool make_small, const target_t &target, const Grey &grey, u32 hint, hwlmStreamingControl *stream_control) { - pair link(nullptr, 0); + pair, size_t> link(nullptr, 0); if (stream_control) { - link = fdrBuildTableStreaming(lits, stream_control); + link = fdrBuildTableStreaming(lits, *stream_control); } DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2"); if (grey.fdrAllowTeddy) { - aligned_unique_ptr fdr - = teddyBuildTableHinted(lits, make_small, hint, target, link); + auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, link); if (fdr) { DEBUG_PRINTF("build with teddy succeeded\n"); return fdr; diff --git a/src/fdr/fdr_compile_internal.h b/src/fdr/fdr_compile_internal.h index d98bb518..48e2ed6f 100644 --- a/src/fdr/fdr_compile_internal.h +++ b/src/fdr/fdr_compile_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,6 +31,7 @@ #include "ue2common.h" #include "hwlm/hwlm_literal.h" +#include "util/alloc.h" #include #include @@ -44,7 +45,6 @@ namespace ue2 { // a pile of decorative typedefs // good for documentation purposes more than anything else typedef u32 LiteralIndex; -typedef u32 ConfirmIndex; typedef u32 SuffixPositionInString; // zero is last byte, counting back // into the string typedef u32 BucketIndex; @@ -56,25 +56,22 @@ class EngineDescription; class FDREngineDescription; struct hwlmStreamingControl; -size_t getFDRConfirm(const std::vector &lits, FDRConfirm **fdrc_p, - bool make_small); - -std::pair setupFullMultiConfs( +std::pair, size_t> setupFullMultiConfs( const std::vector &lits, const EngineDescription &eng, - std::map > &bucketToLits, + std::map> &bucketToLits, bool make_small); // all suffixes include an implicit max_bucket_width suffix to ensure that // we always read a full-scale flood "behind" us in terms of what's in our // state; if we don't have a flood that's long enough we won't be in the // right state yet to allow blindly advancing -std::pair +std::pair, size_t> setupFDRFloodControl(const std::vector &lits, const EngineDescription &eng); -std::pair +std::pair, size_t> fdrBuildTableStreaming(const std::vector &lits, - hwlmStreamingControl *stream_control); + hwlmStreamingControl &stream_control); static constexpr u32 HINT_INVALID = 0xffffffff; diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 08946a5f..23437fe2 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -45,9 +45,10 @@ using namespace std; namespace ue2 { -typedef u8 ConfSplitType; -typedef pair BucketSplitPair; -typedef map > BC2CONF; +using ConfSplitType = u8; +using BucketSplitPair = pair; +using BC2CONF = map, size_t>>; // return the number of bytes beyond a length threshold in all strings in lits static @@ -149,9 +150,9 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, //#define FDR_CONFIRM_DUMP 1 -static -size_t getFDRConfirm(const vector &lits, FDRConfirm **fdrc_p, - bool applyOneCharOpt, bool make_small, bool make_confirm) { +static pair, size_t> +getFDRConfirm(const vector &lits, bool applyOneCharOpt, + bool make_small, bool make_confirm) { vector tmpLitInfo(lits.size()); CONF_TYPE andmsk; fillLitInfo(lits, tmpLitInfo, andmsk); @@ -220,55 +221,61 @@ size_t getFDRConfirm(const vector &lits, FDRConfirm **fdrc_p, #ifdef FDR_CONFIRM_DUMP // print out the literals reversed - makes it easier to line up analyses // that are end-offset based - for (map >::iterator i = res2lits.begin(), - e = res2lits.end(); i != e; ++i) { - u32 hash = i->first; - vector & vlidx = i->second; - if (vlidx.size() > 1) { - printf("%x -> %zu literals\n", hash, vlidx.size()); - u32 min_len = lits[vlidx.front()].s.size(); - vector > vsl; // contains the set of chars at each location - // reversed from the end - vsl.resize(1024); - u32 total_string_size = 0; - for (vector::iterator i2 = vlidx.begin(), - e2 = vlidx.end(); i2 != e2; ++i2) { - LiteralIndex litIdx = *i2; - total_string_size += lits[litIdx].s.size(); - for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) { - vsl[lits[litIdx].s.size()-j].insert(lits[litIdx].s.c_str()[j - 1]); - } - min_len = MIN(min_len, lits[litIdx].s.size()); + for (const auto &m : res2lits) { + const u32 &hash = m.first; + const vector &vlidx = m.second; + if (vlidx.size() <= 1) { + continue; + } + printf("%x -> %zu literals\n", hash, vlidx.size()); + size_t min_len = lits[vlidx.front()].s.size(); + + vector> vsl; // contains the set of chars at each location + // reversed from the end + + for (const auto &litIdx : vlidx) { + const auto &lit = lits[litIdx]; + if (lit.s.size() > vsl.size()) { + vsl.resize(lit.s.size()); } - printf("common "); - for (u32 j = 0; j < min_len; j++) { - if (vsl[j].size() == 1) { - printf("%02x", (u32)*vsl[j].begin()); - } else { + for (size_t j = lit.s.size(); j != 0; j--) { + vsl[lit.s.size() - j].insert(lit.s[j - 1]); + } + min_len = min(min_len, lit.s.size()); + } + printf("common "); + for (size_t j = 0; j < min_len; j++) { + if (vsl[j].size() == 1) { + printf("%02x", *vsl[j].begin()); + } else { + printf("__"); + } + } + printf("\n"); + for (const auto &litIdx : vlidx) { + const auto &lit = lits[litIdx]; + printf("%8x %c", lit.id, lit.nocase ? '!' : ' '); + for (size_t j = lit.s.size(); j != 0; j--) { + size_t dist_from_end = lit.s.size() - j; + if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) { printf("__"); + } else { + printf("%02x", lit.s[j - 1]); } } printf("\n"); - for (vector::iterator i2 = vlidx.begin(), - e2 = vlidx.end(); i2 != e2; ++i2) { - LiteralIndex litIdx = *i2; - printf("%8x %c", lits[litIdx].id, lits[litIdx].nocase ? '!' : ' '); - for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) { - u32 dist_from_end = lits[litIdx].s.size() - j; - if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) { - printf("__"); - } else { - printf("%02x", (u32)lits[litIdx].s.c_str()[j-1]); - } - } - printf("\n"); - } - u32 total_compares = 0; - for (u32 j = 0; j < 1024; j++) { // naughty - total_compares += vsl[j].size(); - } - printf("Total compare load: %d Total string size: %d\n\n", total_compares, total_string_size); } + size_t total_compares = 0; + for (const auto &v : vsl) { + total_compares += v.size(); + } + size_t total_string_size = 0; + for (const auto &litIdx : vlidx) { + const auto &lit = lits[litIdx]; + total_string_size += lit.s.size(); + } + printf("Total compare load: %zu Total string size: %zu\n\n", + total_compares, total_string_size); } #endif @@ -281,7 +288,7 @@ size_t getFDRConfirm(const vector &lits, FDRConfirm **fdrc_p, sizeof(LitInfo) * lits.size() + totalLitSize; size = ROUNDUP_N(size, alignof(FDRConfirm)); - FDRConfirm *fdrc = (FDRConfirm *)aligned_zmalloc(size); + auto fdrc = aligned_zmalloc_unique(size); assert(fdrc); // otherwise would have thrown std::bad_alloc fdrc->andmsk = andmsk; @@ -295,7 +302,7 @@ size_t getFDRConfirm(const vector &lits, FDRConfirm **fdrc_p, fdrc->groups = gm; // After the FDRConfirm, we have the lit index array. - u8 *fdrc_base = (u8 *)fdrc; + u8 *fdrc_base = (u8 *)fdrc.get(); u8 *ptr = fdrc_base + sizeof(*fdrc); ptr = ROUNDUP_PTR(ptr, alignof(u32)); u32 *bitsToLitIndex = (u32 *)ptr; @@ -307,14 +314,12 @@ size_t getFDRConfirm(const vector &lits, FDRConfirm **fdrc_p, // Walk the map by hash value assigning indexes and laying out the // elements (and their associated string confirm material) in memory. - for (std::map >::const_iterator - i = res2lits.begin(), e = res2lits.end(); i != e; ++i) { - const u32 hash = i->first; - const vector &vlidx = i->second; - bitsToLitIndex[hash] = verify_u32(ptr - (u8 *)fdrc); - for (vector::const_iterator i2 = vlidx.begin(), - e2 = vlidx.end(); i2 != e2; ++i2) { - LiteralIndex litIdx = *i2; + for (const auto &m : res2lits) { + const u32 hash = m.first; + const vector &vlidx = m.second; + bitsToLitIndex[hash] = verify_u32(ptr - fdrc_base); + for (auto i = vlidx.begin(), e = vlidx.end(); i != e; ++i) { + LiteralIndex litIdx = *i; // Write LitInfo header. u8 *oldPtr = ptr; @@ -333,7 +338,7 @@ size_t getFDRConfirm(const vector &lits, FDRConfirm **fdrc_p, } ptr = ROUNDUP_PTR(ptr, alignof(LitInfo)); - if (i2 + 1 == e2) { + if (next(i) == e) { finalLI.next = 0x0; } else { // our next field represents an adjustment on top of @@ -348,14 +353,13 @@ size_t getFDRConfirm(const vector &lits, FDRConfirm **fdrc_p, assert((size_t)(ptr - fdrc_base) <= size); } - *fdrc_p = fdrc; - // Return actual used size, not worst-case size. Must be rounded up to // FDRConfirm alignment so that the caller can lay out a sequence of these. size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base), alignof(FDRConfirm)); assert(actual_size <= size); - return actual_size; + + return {move(fdrc), actual_size}; } static @@ -377,12 +381,9 @@ u32 setupMultiConfirms(const vector &lits, u32 totalConfirmSize = 0; for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) { if (!bucketToLits[b].empty()) { - vector > vl(eng.getConfirmTopLevelSplit()); - for (vector::const_iterator - i = bucketToLits[b].begin(), - e = bucketToLits[b].end(); - i != e; ++i) { - hwlmLiteral lit = lits[*i]; // copy + vector> vl(eng.getConfirmTopLevelSplit()); + for (const LiteralIndex &lit_idx : bucketToLits[b]) { + hwlmLiteral lit = lits[lit_idx]; // copy // c is last char of this literal u8 c = *(lit.s.rbegin()); @@ -424,26 +425,27 @@ u32 setupMultiConfirms(const vector &lits, } for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) { - if (!vl[c].empty()) { - DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size()); - FDRConfirm *fdrc; - size_t size = getFDRConfirm(vl[c], &fdrc, - eng.typicallyHoldsOneCharLits(), - make_small, makeConfirm); - BucketSplitPair p = make_pair(b, c); - bc2Conf[p] = make_pair(fdrc, size); - totalConfirmSize += size; + if (vl[c].empty()) { + continue; } + DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size()); + auto key = make_pair(b, c); + auto fc = getFDRConfirm(vl[c], eng.typicallyHoldsOneCharLits(), + make_small, makeConfirm); + totalConfirmSize += fc.second; + assert(bc2Conf.find(key) == end(bc2Conf)); + bc2Conf.emplace(key, move(fc)); } } } return totalConfirmSize; } -pair setupFullMultiConfs(const vector &lits, - const EngineDescription &eng, - map > &bucketToLits, - bool make_small) { +pair, size_t> +setupFullMultiConfs(const vector &lits, + const EngineDescription &eng, + map> &bucketToLits, + bool make_small) { BC2CONF bc2Conf; u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits, make_small); @@ -453,26 +455,24 @@ pair setupFullMultiConfs(const vector &lits, u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32); u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize); - u8 *buf = (u8 *)aligned_zmalloc(totalSize); + auto buf = aligned_zmalloc_unique(totalSize); assert(buf); // otherwise would have thrown std::bad_alloc - u32 *confBase = (u32 *)buf; - u8 *ptr = buf + totalConfSwitchSize; + u32 *confBase = (u32 *)buf.get(); + u8 *ptr = buf.get() + totalConfSwitchSize; - for (BC2CONF::const_iterator i = bc2Conf.begin(), e = bc2Conf.end(); i != e; - ++i) { - const pair &p = i->second; + for (const auto &m : bc2Conf) { + const BucketIndex &b = m.first.first; + const u8 &c = m.first.second; + const pair, size_t> &p = m.second; // confirm offset is relative to the base of this structure, now - u32 confirm_offset = verify_u32(ptr - (u8 *)buf); - memcpy(ptr, p.first, p.second); + u32 confirm_offset = verify_u32(ptr - buf.get()); + memcpy(ptr, p.first.get(), p.second); ptr += p.second; - aligned_free(p.first); - BucketIndex b = i->first.first; - u8 c = i->first.second; u32 idx = c * nBuckets + b; confBase[idx] = confirm_offset; } - return make_pair(buf, totalSize); + return {move(buf), totalSize}; } } // namespace ue2 diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index cde13f6c..6272b69e 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -105,7 +105,6 @@ struct FDR_Runtime_Args { size_t start_offset; HWLMCallback cb; void *ctxt; - hwlm_group_t *groups; const u8 *firstFloodDetect; const u64a histBytes; }; diff --git a/src/fdr/fdr_streaming_compile.cpp b/src/fdr/fdr_streaming_compile.cpp index 34536eec..b2e1656c 100644 --- a/src/fdr/fdr_streaming_compile.cpp +++ b/src/fdr/fdr_streaming_compile.cpp @@ -94,14 +94,13 @@ static bool setupLongLits(const vector &lits, vector &long_lits, size_t max_len) { long_lits.reserve(lits.size()); - for (vector::const_iterator it = lits.begin(); - it != lits.end(); ++it) { - if (it->s.length() > max_len) { - hwlmLiteral tmp = *it; // copy - tmp.s.erase(tmp.s.size() - 1, 1); // erase last char + for (const auto &lit : lits) { + if (lit.s.length() > max_len) { + hwlmLiteral tmp = lit; // copy + tmp.s.pop_back(); tmp.id = 0; // recalc later tmp.groups = 0; // filled in later by hash bucket(s) - long_lits.push_back(tmp); + long_lits.push_back(move(tmp)); } } @@ -112,15 +111,12 @@ bool setupLongLits(const vector &lits, // sort long_literals by caseful/caseless and in lexicographical order, // remove duplicates stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder()); - vector::iterator new_end = - unique(long_lits.begin(), long_lits.end(), hwlmLitEqual); + auto new_end = unique(long_lits.begin(), long_lits.end(), hwlmLitEqual); long_lits.erase(new_end, long_lits.end()); // fill in ids; not currently used - for (vector::iterator i = long_lits.begin(), - e = long_lits.end(); - i != e; ++i) { - i->id = i - long_lits.begin(); + for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) { + i->id = distance(long_lits.begin(), i); } return true; } @@ -143,23 +139,19 @@ void analyzeLits(const vector &long_lits, size_t max_len, hashedPositions[m] = 0; } - for (vector::const_iterator i = long_lits.begin(), - e = long_lits.end(); - i != e; ++i) { + for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) { if (i->nocase) { - boundaries[CASEFUL] = verify_u32(i - long_lits.begin()); + boundaries[CASEFUL] = verify_u32(distance(long_lits.begin(), i)); break; } } - for (vector::const_iterator i = long_lits.begin(), - e = long_lits.end(); - i != e; ++i) { - MODES m = i->nocase ? CASELESS : CASEFUL; - for (u32 j = 1; j < i->s.size() - max_len + 1; j++) { + for (const auto &lit : long_lits) { + Modes m = lit.nocase ? CASELESS : CASEFUL; + for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) { hashedPositions[m]++; } - positions[m] += i->s.size(); + positions[m] += lit.s.size(); } for (u32 m = CASEFUL; m < MAX_MODES; m++) { @@ -170,7 +162,7 @@ void analyzeLits(const vector &long_lits, size_t max_len, #ifdef DEBUG_COMPILE printf("analyzeLits:\n"); - for (MODES m = CASEFUL; m < MAX_MODES; m++) { + for (Modes m = CASEFUL; m < MAX_MODES; m++) { printf("mode %s boundary %d positions %d hashedPositions %d " "hashEntries %d\n", (m == CASEFUL) ? "caseful" : "caseless", boundaries[m], @@ -181,7 +173,7 @@ void analyzeLits(const vector &long_lits, size_t max_len, } static -u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, MODES m) { +u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, Modes m) { return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m); } @@ -203,24 +195,21 @@ struct OffsetIDFromEndOrder { static void fillHashes(const vector &long_lits, size_t max_len, - FDRSHashEntry *tab, size_t numEntries, MODES m, + FDRSHashEntry *tab, size_t numEntries, Modes mode, map &litToOffsetVal) { const u32 nbits = lg2(numEntries); map > > bucketToLitOffPairs; map bucketToBitfield; - for (vector::const_iterator i = long_lits.begin(), - e = long_lits.end(); - i != e; ++i) { - const hwlmLiteral &l = *i; - if ((m == CASELESS) != i->nocase) { + for (const auto &lit : long_lits) { + if ((mode == CASELESS) != lit.nocase) { continue; } - for (u32 j = 1; j < i->s.size() - max_len + 1; j++) { - u32 h = hashLit(l, j, max_len, m); + for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) { + u32 h = hashLit(lit, j, max_len, mode); u32 h_ent = h & ((1U << nbits) - 1); u32 h_low = (h >> nbits) & 63; - bucketToLitOffPairs[h_ent].push_back(make_pair(i->id, j)); + bucketToLitOffPairs[h_ent].emplace_back(lit.id, j); bucketToBitfield[h_ent] |= (1ULL << h_low); } } @@ -231,11 +220,9 @@ void fillHashes(const vector &long_lits, size_t max_len, // sweep out bitfield entries and save the results swapped accordingly // also, anything with bitfield entries is put in filledBuckets - for (map::const_iterator i = bucketToBitfield.begin(), - e = bucketToBitfield.end(); - i != e; ++i) { - u32 bucket = i->first; - u64a contents = i->second; + for (const auto &m : bucketToBitfield) { + const u32 &bucket = m.first; + const u64a &contents = m.second; tab[bucket].bitfield = contents; filledBuckets.set(bucket); } @@ -243,12 +230,9 @@ void fillHashes(const vector &long_lits, size_t max_len, // store out all our chains based on free values in our hash table. // find nearest free locations that are empty (there will always be more // entries than strings, at present) - for (map > >::iterator - i = bucketToLitOffPairs.begin(), - e = bucketToLitOffPairs.end(); - i != e; ++i) { - u32 bucket = i->first; - deque > &d = i->second; + for (auto &m : bucketToLitOffPairs) { + u32 bucket = m.first; + deque> &d = m.second; // sort d by distance of the residual string (len minus our depth into // the string). We need to put the 'furthest back' string first... @@ -299,31 +283,30 @@ void fillHashes(const vector &long_lits, size_t max_len, static size_t maxMaskLen(const vector &lits) { size_t rv = 0; - vector::const_iterator it, ite; - for (it = lits.begin(), ite = lits.end(); it != ite; ++it) { - rv = max(rv, it->msk.size()); + for (const auto &lit : lits) { + rv = max(rv, lit.msk.size()); } return rv; } -pair +pair, size_t> fdrBuildTableStreaming(const vector &lits, - hwlmStreamingControl *stream_control) { + hwlmStreamingControl &stream_control) { // refuse to compile if we are forced to have smaller than minimum // history required for long-literal support, full stop // otherwise, choose the maximum of the preferred history quantity // (currently a fairly extravagant 32) or the already used history - // quantity - subject to the limitation of stream_control->history_max + // quantity - subject to the limitation of stream_control.history_max const size_t MIN_HISTORY_REQUIRED = 32; - if (MIN_HISTORY_REQUIRED > stream_control->history_max) { + if (MIN_HISTORY_REQUIRED > stream_control.history_max) { throw std::logic_error("Cannot set history to minimum history required"); } size_t max_len = - MIN(stream_control->history_max, - MAX(MIN_HISTORY_REQUIRED, stream_control->history_min)); + MIN(stream_control.history_max, + MAX(MIN_HISTORY_REQUIRED, stream_control.history_min)); assert(max_len >= MIN_HISTORY_REQUIRED); size_t max_mask_len = maxMaskLen(lits); @@ -334,10 +317,10 @@ fdrBuildTableStreaming(const vector &lits, // we want enough history to manage the longest literal and the longest // mask. - stream_control->literal_history_required = + stream_control.literal_history_required = max(maxLen(lits), max_mask_len) - 1; - stream_control->literal_stream_state_required = 0; - return make_pair(nullptr, size_t{0}); + stream_control.literal_stream_state_required = 0; + return {nullptr, size_t{0}}; } // Ensure that we have enough room for the longest mask. @@ -381,11 +364,11 @@ fdrBuildTableStreaming(const vector &lits, streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2)); u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8; - u8 * secondaryTable = (u8 *)aligned_zmalloc(tabSize); + auto secondaryTable = aligned_zmalloc_unique(tabSize); assert(secondaryTable); // otherwise would have thrown std::bad_alloc // then fill it in - u8 * ptr = secondaryTable; + u8 * ptr = secondaryTable.get(); FDRSTableHeader * header = (FDRSTableHeader *)ptr; // fill in header header->pseudoEngineID = (u32)0xffffffff; @@ -407,11 +390,9 @@ fdrBuildTableStreaming(const vector &lits, ptr += litTabSize; map litToOffsetVal; - for (vector::const_iterator i = long_lits.begin(), - e = long_lits.end(); - i != e; ++i) { + for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) { u32 entry = verify_u32(i - long_lits.begin()); - u32 offset = verify_u32(ptr - secondaryTable); + u32 offset = verify_u32(ptr - secondaryTable.get()); // point the table entry to the string location litTabPtr[entry].offset = offset; @@ -425,20 +406,20 @@ fdrBuildTableStreaming(const vector &lits, } // fill in final lit table entry with current ptr (serves as end value) - litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable); + litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable.get()); // fill hash tables - ptr = secondaryTable + htOffset[CASEFUL]; + ptr = secondaryTable.get() + htOffset[CASEFUL]; for (u32 m = CASEFUL; m < MAX_MODES; ++m) { fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m], - (MODES)m, litToOffsetVal); + (Modes)m, litToOffsetVal); ptr += htSize[m]; } // tell the world what we did - stream_control->literal_history_required = max_len; - stream_control->literal_stream_state_required = tot_state_bytes; - return make_pair(secondaryTable, tabSize); + stream_control.literal_history_required = max_len; + stream_control.literal_stream_state_required = tot_state_bytes; + return {move(secondaryTable), tabSize}; } } // namespace ue2 diff --git a/src/fdr/fdr_streaming_internal.h b/src/fdr/fdr_streaming_internal.h index 26602ce1..11b07b56 100644 --- a/src/fdr/fdr_streaming_internal.h +++ b/src/fdr/fdr_streaming_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,11 +41,11 @@ // hash table (caseful) (FDRSHashEntry) // hash table (caseless) (FDRSHashEntry) -typedef enum { +enum Modes { CASEFUL = 0, CASELESS = 1, MAX_MODES = 2 -} MODES; +}; // We have one of these structures hanging off the 'link' of our secondary // FDR table that handles streaming strings @@ -91,12 +91,12 @@ struct FDRSHashEntry { }; static really_inline -u32 get_start_lit_idx(const struct FDRSTableHeader * h, MODES m) { +u32 get_start_lit_idx(const struct FDRSTableHeader * h, enum Modes m) { return m == CASEFUL ? 0 : h->boundary[m-1]; } static really_inline -u32 get_end_lit_idx(const struct FDRSTableHeader * h, MODES m) { +u32 get_end_lit_idx(const struct FDRSTableHeader * h, enum Modes m) { return h->boundary[m]; } @@ -107,17 +107,17 @@ const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) { } static really_inline -u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, MODES m) { +u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, enum Modes m) { return getLitTab(h)[get_start_lit_idx(h, m)].offset; } static really_inline -u32 packStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) { +u32 packStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) { return v - getBaseOffsetOfLits(h, m) + 1; } static really_inline -u32 unpackStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) { +u32 unpackStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) { return v + getBaseOffsetOfLits(h, m) - 1; } @@ -127,7 +127,7 @@ u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) { } static really_inline -u32 streaming_hash(const u8 *ptr, UNUSED size_t len, MODES mode) { +u32 streaming_hash(const u8 *ptr, UNUSED size_t len, enum Modes mode) { const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL; const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL; assert(len >= 32); diff --git a/src/fdr/fdr_streaming_runtime.h b/src/fdr/fdr_streaming_runtime.h index fa5843c5..8e264c76 100644 --- a/src/fdr/fdr_streaming_runtime.h +++ b/src/fdr/fdr_streaming_runtime.h @@ -143,7 +143,7 @@ u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) { // binary search for the literal index that contains the current state static really_inline u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable, - u32 stateValue, MODES m) { + u32 stateValue, enum Modes m) { const struct FDRSLiteral * litTab = getLitTab(streamingTable); u32 lo = get_start_lit_idx(streamingTable, m); u32 hi = get_end_lit_idx(streamingTable, m); @@ -175,7 +175,7 @@ void fdrUnpackStateMode(struct FDR_Runtime_Args *a, const struct FDRSTableHeader *streamingTable, const struct FDRSLiteral * litTab, const u32 *state_table, - const MODES m) { + const enum Modes m) { if (!state_table[m]) { return; } @@ -213,8 +213,9 @@ void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a, } static really_inline -u32 do_single_confirm(const struct FDRSTableHeader * streamingTable, - const struct FDR_Runtime_Args * a, u32 hashState, MODES m) { +u32 do_single_confirm(const struct FDRSTableHeader *streamingTable, + const struct FDR_Runtime_Args *a, u32 hashState, + enum Modes m) { const struct FDRSLiteral * litTab = getLitTab(streamingTable); u32 idx = findLitTabEntry(streamingTable, hashState, m); size_t found_offset = litTab[idx].offset; @@ -279,7 +280,7 @@ void fdrFindStreamingHash(const struct FDR_Runtime_Args *a, static really_inline const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable, - u32 h, const MODES m) { + u32 h, const enum Modes m) { u32 nbits = streamingTable->hashNBits[m]; if (!nbits) { return NULL; @@ -303,7 +304,7 @@ const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable, static really_inline void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a, const struct FDRSTableHeader *streamingTable, - const struct FDRSHashEntry *ent, const MODES m) { + const struct FDRSHashEntry *ent, const enum Modes m) { assert(ent); assert(streamingTable->hashNBits[m]); diff --git a/src/fdr/flood_compile.cpp b/src/fdr/flood_compile.cpp index 2c131788..62693c30 100644 --- a/src/fdr/flood_compile.cpp +++ b/src/fdr/flood_compile.cpp @@ -69,7 +69,7 @@ static void updateFloodSuffix(vector &tmpFlood, u8 c, u32 suffix) { FDRFlood &fl = tmpFlood[c]; fl.suffix = MAX(fl.suffix, suffix + 1); - DEBUG_PRINTF("Updated Flood Suffix for char '%c' to %u\n", c, fl.suffix); + DEBUG_PRINTF("Updated Flood Suffix for char 0x%02x to %u\n", c, fl.suffix); } static @@ -90,8 +90,9 @@ void addFlood(vector &tmpFlood, u8 c, const hwlmLiteral &lit, } } -pair setupFDRFloodControl(const vector &lits, - const EngineDescription &eng) { +pair, size_t> +setupFDRFloodControl(const vector &lits, + const EngineDescription &eng) { vector tmpFlood(N_CHARS); u32 default_suffix = eng.getDefaultFloodSuffixLength(); @@ -124,8 +125,9 @@ pair setupFDRFloodControl(const vector &lits, for (u32 i = 0; i < iEnd; i++) { if (i < litSize) { if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) { - DEBUG_PRINTF("non-flood char in literal[%u] %c != %c\n", - i, c, lit.s[litSize - i - 1]); + DEBUG_PRINTF("non-flood char in literal[%u]: " + "0x%02x != 0x%02x\n", + i, c, lit.s[litSize - i - 1]); upSuffix = MIN(upSuffix, i); loSuffix = MIN(loSuffix, i); // makes sense only for case-less break; @@ -195,11 +197,12 @@ pair setupFDRFloodControl(const vector &lits, size_t floodHeaderSize = sizeof(u32) * N_CHARS; size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods; size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize); - u8 *buf = (u8 *)aligned_zmalloc(totalSize); + + auto buf = aligned_zmalloc_unique(totalSize); assert(buf); // otherwise would have thrown std::bad_alloc - u32 *floodHeader = (u32 *)buf; - FDRFlood *layoutFlood = (FDRFlood * )(buf + floodHeaderSize); + u32 *floodHeader = (u32 *)buf.get(); + FDRFlood *layoutFlood = (FDRFlood *)(buf.get() + floodHeaderSize); u32 currentFloodIndex = 0; for (const auto &m : flood2chars) { @@ -215,7 +218,7 @@ pair setupFDRFloodControl(const vector &lits, DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n", floodHeaderSize, floodStructSize, totalSize); - return make_pair((u8 *)buf, totalSize); + return {move(buf), totalSize}; } } // namespace ue2 diff --git a/src/fdr/teddy.c b/src/fdr/teddy.c index 08b761c0..e7a0fccd 100644 --- a/src/fdr/teddy.c +++ b/src/fdr/teddy.c @@ -36,7 +36,6 @@ #include "teddy_internal.h" #include "teddy_runtime_common.h" #include "util/simd_utils.h" -#include "util/simd_utils_ssse3.h" const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = { {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -80,15 +79,15 @@ const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = { do { \ if (unlikely(isnonzero128(var))) { \ u64a lo = movq(var); \ - u64a hi = movq(byteShiftRight128(var, 8)); \ + u64a hi = movq(rshiftbyte_m128(var, 8)); \ if (unlikely(lo)) { \ conf_fn(&lo, bucket, offset, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ if (unlikely(hi)) { \ conf_fn(&hi, bucket, offset + 8, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ } \ @@ -98,27 +97,27 @@ do { \ do { \ if (unlikely(isnonzero128(var))) { \ u32 part1 = movd(var); \ - u32 part2 = movd(byteShiftRight128(var, 4)); \ - u32 part3 = movd(byteShiftRight128(var, 8)); \ - u32 part4 = movd(byteShiftRight128(var, 12)); \ + u32 part2 = movd(rshiftbyte_m128(var, 4)); \ + u32 part3 = movd(rshiftbyte_m128(var, 8)); \ + u32 part4 = movd(rshiftbyte_m128(var, 12)); \ if (unlikely(part1)) { \ conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ if (unlikely(part2)) { \ conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ if (unlikely(part3)) { \ conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ if (unlikely(part4)) { \ conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ } \ @@ -126,36 +125,34 @@ do { \ #endif static really_inline -m128 prep_conf_teddy_m1(const m128 *maskBase, m128 p_mask, m128 val) { +m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) { m128 mask = set16x8(0xf); m128 lo = and128(val, mask); - m128 hi = and128(rshift2x64(val, 4), mask); - return and128(and128(pshufb(maskBase[0*2], lo), - pshufb(maskBase[0*2+1], hi)), p_mask); + m128 hi = and128(rshift64_m128(val, 4), mask); + return and128(pshufb(maskBase[0*2], lo), pshufb(maskBase[0*2+1], hi)); } static really_inline -m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 p_mask, - m128 val) { +m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) { m128 mask = set16x8(0xf); m128 lo = and128(val, mask); - m128 hi = and128(rshift2x64(val, 4), mask); - m128 r = prep_conf_teddy_m1(maskBase, p_mask, val); + m128 hi = and128(rshift64_m128(val, 4), mask); + m128 r = prep_conf_teddy_m1(maskBase, val); m128 res_1 = and128(pshufb(maskBase[1*2], lo), pshufb(maskBase[1*2+1], hi)); m128 res_shifted_1 = palignr(res_1, *old_1, 16-1); *old_1 = res_1; - return and128(and128(r, p_mask), res_shifted_1); + return and128(r, res_shifted_1); } static really_inline m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2, - m128 p_mask, m128 val) { + m128 val) { m128 mask = set16x8(0xf); m128 lo = and128(val, mask); - m128 hi = and128(rshift2x64(val, 4), mask); - m128 r = prep_conf_teddy_m2(maskBase, old_1, p_mask, val); + m128 hi = and128(rshift64_m128(val, 4), mask); + m128 r = prep_conf_teddy_m2(maskBase, old_1, val); m128 res_2 = and128(pshufb(maskBase[2*2], lo), pshufb(maskBase[2*2+1], hi)); @@ -166,11 +163,11 @@ m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2, static really_inline m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2, - m128 *old_3, m128 p_mask, m128 val) { + m128 *old_3, m128 val) { m128 mask = set16x8(0xf); m128 lo = and128(val, mask); - m128 hi = and128(rshift2x64(val, 4), mask); - m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, p_mask, val); + m128 hi = and128(rshift64_m128(val, 4), mask); + m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val); m128 res_3 = and128(pshufb(maskBase[3*2], lo), pshufb(maskBase[3*2+1], hi)); @@ -180,11 +177,10 @@ m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2, } hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -203,13 +199,14 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, m128 p_mask; m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 1); - m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0); + m128 r_0 = prep_conf_teddy_m1(maskBase, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy); ptr += 16; } if (ptr + 16 < buf_end) { - m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr)); + m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy); ptr += 16; } @@ -217,9 +214,9 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; - m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr)); + m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit1_teddy); - m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16)); + m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16)); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit1_teddy); } @@ -227,19 +224,19 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, m128 p_mask; m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 1); - m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0); + m128 r_0 = prep_conf_teddy_m1(maskBase, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -258,13 +255,14 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr, m128 p_mask; m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 1); - m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0); + m128 r_0 = prep_conf_teddy_m1(maskBase, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } if (ptr + 16 < buf_end) { - m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr)); + m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } @@ -272,9 +270,9 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr, for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; - m128 r_0 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr)); + m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); - m128 r_1 = prep_conf_teddy_m1(maskBase, ones128(), load128(ptr + 16)); + m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16)); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); } @@ -282,19 +280,19 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr, m128 p_mask; m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 1); - m128 r_0 = prep_conf_teddy_m1(maskBase, p_mask, val_0); + m128 r_0 = prep_conf_teddy_m1(maskBase, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -314,14 +312,14 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr, m128 p_mask; m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 2); - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); ptr += 16; } if (ptr + 16 < buf_end) { - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), - load128(ptr)); + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); ptr += 16; } @@ -329,11 +327,9 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr, for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), - load128(ptr)); + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); - m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), - load128(ptr + 16)); + m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16)); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); } @@ -341,19 +337,19 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr, m128 p_mask; m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 2); - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -373,14 +369,14 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr, m128 p_mask; m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 2); - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } if (ptr + 16 < buf_end) { - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), - load128(ptr)); + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } @@ -388,11 +384,9 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr, for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), - load128(ptr)); + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); - m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, ones128(), - load128(ptr + 16)); + m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16)); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); } @@ -400,19 +394,19 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr, m128 p_mask; m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 2); - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -434,14 +428,15 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 3); m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - p_mask, val_0); + val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); ptr += 16; } if (ptr + 16 < buf_end) { m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - ones128(), load128(ptr)); + load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); ptr += 16; } @@ -450,10 +445,10 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr, __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - ones128(), load128(ptr)); + load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - ones128(), load128(ptr + 16)); + load128(ptr + 16)); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); } @@ -461,20 +456,19 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr, m128 p_mask; m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 3); - m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - p_mask, val_0); + m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -496,14 +490,15 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 3); m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - p_mask, val_0); + val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } if (ptr + 16 < buf_end) { m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - ones128(), load128(ptr)); + load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } @@ -512,10 +507,10 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr, __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - ones128(), load128(ptr)); + load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - ones128(), load128(ptr + 16)); + load128(ptr + 16)); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); } @@ -523,20 +518,19 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr, m128 p_mask; m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 3); - m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - p_mask, val_0); + m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -559,14 +553,15 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 4); m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, p_mask, val_0); + &res_old_3, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); ptr += 16; } if (ptr + 16 < buf_end) { m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, ones128(), load128(ptr)); + &res_old_3, load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); ptr += 16; } @@ -575,10 +570,10 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr, __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, ones128(), load128(ptr)); + &res_old_3, load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, ones128(), load128(ptr + 16)); + &res_old_3, load128(ptr + 16)); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); } @@ -587,19 +582,19 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 4); m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, p_mask, val_0); + &res_old_3, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -622,14 +617,15 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 4); m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, p_mask, val_0); + &res_old_3, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } if (ptr + 16 < buf_end) { m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, ones128(), load128(ptr)); + &res_old_3, load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } @@ -638,10 +634,10 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, ones128(), load128(ptr)); + &res_old_3, load128(ptr)); CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, ones128(), load128(ptr + 16)); + &res_old_3, load128(ptr + 16)); CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); } @@ -650,9 +646,10 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 4); m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, p_mask, val_0); + &res_old_3, val_0); + r_0 = and128(r_0, p_mask); CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } diff --git a/src/fdr/teddy.h b/src/fdr/teddy.h index f3902723..e2936723 100644 --- a/src/fdr/teddy.h +++ b/src/fdr/teddy.h @@ -33,64 +33,85 @@ #ifndef TEDDY_H_ #define TEDDY_H_ +#include "hwlm/hwlm.h" // for hwlm_group_t + struct FDR; // forward declaration from fdr_internal.h struct FDR_Runtime_Args; hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); #if defined(__AVX2__) hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); + const struct FDR_Runtime_Args *a, + hwlm_group_t control); -hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, - const struct FDR_Runtime_Args *a); +hwlm_error_t +fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control); #endif /* __AVX2__ */ diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index 33dd8a30..e4a836d4 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -36,7 +36,6 @@ #include "teddy_internal.h" #include "teddy_runtime_common.h" #include "util/simd_utils.h" -#include "util/simd_utils_ssse3.h" #if defined(__AVX2__) @@ -122,22 +121,22 @@ do { \ u64a part4 = extract64from256(r, 1); \ if (unlikely(part1)) { \ conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ if (unlikely(part2)) { \ conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ if (unlikely(part3)) { \ conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ if (unlikely(part4)) { \ conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ } \ @@ -159,41 +158,41 @@ do { \ u32 part8 = extract32from256(r, 3); \ if (unlikely(part1)) { \ conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ if (unlikely(part2)) { \ conf_fn(&part2, bucket, offset + 2, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ } \ if (unlikely(part3)) { \ conf_fn(&part3, bucket, offset + 4, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ if (unlikely(part4)) { \ conf_fn(&part4, bucket, offset + 6, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ if (unlikely(part5)) { \ conf_fn(&part5, bucket, offset + 8, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ if (unlikely(part6)) { \ conf_fn(&part6, bucket, offset + 10, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ if (unlikely(part7)) { \ conf_fn(&part7, bucket, offset + 12, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ if (unlikely(part8)) { \ conf_fn(&part8, bucket, offset + 14, confBase, reason, a, ptr, \ - control, &last_match); \ + &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ } \ @@ -205,11 +204,11 @@ do { \ if (unlikely(isnonzero256(var))) { \ u32 arrCnt = 0; \ m128 lo = cast256to128(var); \ - m128 hi = cast256to128(swap128in256(var)); \ + m128 hi = movdq_hi(var); \ bit_array_fast_teddy(lo, bitArr, &arrCnt, offset); \ bit_array_fast_teddy(hi, bitArr, &arrCnt, offset + 2); \ for (u32 i = 0; i < arrCnt; i++) { \ - conf_fn(bitArr[i], confBase, reason, a, ptr, control, \ + conf_fn(bitArr[i], confBase, reason, a, ptr, &control, \ &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ @@ -372,7 +371,7 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) { 64 * (offset); *arrCnt += 1; } - u64a part_1 = movq(byteShiftRight128(var, 8)); + u64a part_1 = movq(rshiftbyte_m128(var, 8)); while (unlikely(part_1)) { bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) + 64 * (offset + 1); @@ -385,19 +384,19 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) { 32 * (offset * 2); *arrCnt += 1; } - u32 part_1 = movd(byteShiftRight128(var, 4)); + u32 part_1 = movd(rshiftbyte_m128(var, 4)); while (unlikely(part_1)) { bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) + 32 * (offset * 2 + 1); *arrCnt += 1; } - u32 part_2 = movd(byteShiftRight128(var, 8)); + u32 part_2 = movd(rshiftbyte_m128(var, 8)); while (unlikely(part_2)) { bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_2) + 32 * (offset * 2 + 2); *arrCnt += 1; } - u32 part_3 = movd(byteShiftRight128(var, 12)); + u32 part_3 = movd(rshiftbyte_m128(var, 12)); while (unlikely(part_3)) { bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_3) + 32 * (offset * 2 + 3); @@ -408,36 +407,35 @@ void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) { } static really_inline -m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 p_mask, m256 val) { +m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 val) { m256 mask = set32x8(0xf); m256 lo = and256(val, mask); - m256 hi = and256(rshift4x64(val, 4), mask); - return and256(and256(vpshufb(maskBase[0*2], lo), - vpshufb(maskBase[0*2+1], hi)), p_mask); + m256 hi = and256(rshift64_m256(val, 4), mask); + return and256(vpshufb(maskBase[0*2], lo), + vpshufb(maskBase[0*2+1], hi)); } static really_inline -m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 p_mask, - m256 val) { +m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 val) { m256 mask = set32x8(0xf); m256 lo = and256(val, mask); - m256 hi = and256(rshift4x64(val, 4), mask); - m256 r = prep_conf_fat_teddy_m1(maskBase, p_mask, val); + m256 hi = and256(rshift64_m256(val, 4), mask); + m256 r = prep_conf_fat_teddy_m1(maskBase, val); m256 res_1 = and256(vpshufb(maskBase[1*2], lo), vpshufb(maskBase[1*2+1], hi)); m256 res_shifted_1 = vpalignr(res_1, *old_1, 16-1); *old_1 = res_1; - return and256(and256(r, p_mask), res_shifted_1); + return and256(r, res_shifted_1); } static really_inline m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2, - m256 p_mask, m256 val) { + m256 val) { m256 mask = set32x8(0xf); m256 lo = and256(val, mask); - m256 hi = and256(rshift4x64(val, 4), mask); - m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, p_mask, val); + m256 hi = and256(rshift64_m256(val, 4), mask); + m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, val); m256 res_2 = and256(vpshufb(maskBase[2*2], lo), vpshufb(maskBase[2*2+1], hi)); @@ -448,11 +446,11 @@ m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2, static really_inline m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2, - m256 *old_3, m256 p_mask, m256 val) { + m256 *old_3, m256 val) { m256 mask = set32x8(0xf); m256 lo = and256(val, mask); - m256 hi = and256(rshift4x64(val, 4), mask); - m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, p_mask, val); + m256 hi = and256(rshift64_m256(val, 4), mask); + m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, val); m256 res_3 = and256(vpshufb(maskBase[3*2], lo), vpshufb(maskBase[3*2+1], hi)); @@ -462,12 +460,10 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2, } static really_inline -m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi, - m256 p_mask) { +m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi) { m256 lo = and256(val, mask); - m256 hi = and256(rshift4x64(val, 4), mask); - m256 res = and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi)); - return and256(res, p_mask); + m256 hi = and256(rshift64_m256(val, 4), mask); + return and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi)); } static really_inline @@ -482,11 +478,10 @@ const u32 * getConfBase_avx2(const struct Teddy *teddy, u8 numMask) { } hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -505,13 +500,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, m256 p_mask; m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 1); - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0); + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy); ptr += 16; } if (ptr + 16 < buf_end) { - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr)); + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy); ptr += 16; } @@ -519,10 +515,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr)); + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit1_teddy); - m256 r_1 = prep_conf_fat_teddy_m1(maskBase, ones256(), - load2x128(ptr + 16)); + m256 r_1 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr + 16)); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit1_teddy); } @@ -530,19 +525,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, m256 p_mask; m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 1); - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0); + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -561,13 +556,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr, m256 p_mask; m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 1); - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0); + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } if (ptr + 16 < buf_end) { - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr)); + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } @@ -575,10 +571,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr, for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, ones256(), load2x128(ptr)); + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); - m256 r_1 = prep_conf_fat_teddy_m1(maskBase, ones256(), - load2x128(ptr + 16)); + m256 r_1 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr + 16)); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); } @@ -586,19 +581,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr, m256 p_mask; m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 1); - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, p_mask, val_0); + m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -618,14 +613,14 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr, m256 p_mask; m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 2); - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); ptr += 16; } if (ptr + 16 < buf_end) { - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), - load2x128(ptr)); + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); ptr += 16; } @@ -633,10 +628,9 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr, for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), - load2x128(ptr)); + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); - m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), + m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr + 16)); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); } @@ -645,19 +639,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr, m256 p_mask; m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 2); - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -677,25 +671,24 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr, m256 p_mask; m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 2); - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } if (ptr + 16 < buf_end) { - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), - load2x128(ptr)); + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; + ptr += 16; } for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), - load2x128(ptr)); + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); - m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, ones256(), + m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr + 16)); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); } @@ -704,19 +697,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr, m256 p_mask; m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 2); - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, p_mask, val_0); + m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -738,14 +731,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 3); m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - p_mask, val_0); + val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); ptr += 16; } if (ptr + 16 < buf_end) { m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - ones256(), load2x128(ptr)); + load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); ptr += 16; } @@ -754,10 +748,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr, __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - ones256(), load2x128(ptr)); + load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - ones256(), load2x128(ptr + 16)); + load2x128(ptr + 16)); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); } @@ -766,19 +760,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 3); m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - p_mask, val_0); + val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -800,14 +794,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 3); m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - p_mask, val_0); + val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } if (ptr + 16 < buf_end) { m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - ones256(), load2x128(ptr)); + load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } @@ -816,10 +811,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr, __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - ones256(), load2x128(ptr)); + load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - ones256(), load2x128(ptr + 16)); + load2x128(ptr + 16)); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); } @@ -828,19 +823,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 3); m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - p_mask, val_0); + val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -863,15 +858,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 4); m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, p_mask, val_0); + &res_old_3, val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); ptr += 16; } if (ptr + 16 < buf_end) { m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, ones256(), - load2x128(ptr)); + &res_old_3, load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); ptr += 16; } @@ -880,12 +875,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr, __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, ones256(), - load2x128(ptr)); + &res_old_3, load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, ones256(), - load2x128(ptr + 16)); + &res_old_3, load2x128(ptr + 16)); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); } @@ -894,19 +887,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 4); m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, p_mask, val_0); + &res_old_3, val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -929,15 +922,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 4); m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, p_mask, val_0); + &res_old_3, val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } if (ptr + 16 < buf_end) { m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, ones256(), - load2x128(ptr)); + &res_old_3, load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); ptr += 16; } @@ -946,12 +939,10 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, __builtin_prefetch(ptr + (iterBytes*4)); CHECK_FLOOD; m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, ones256(), - load2x128(ptr)); + &res_old_3, load2x128(ptr)); CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, ones256(), - load2x128(ptr + 16)); + &res_old_3, load2x128(ptr + 16)); CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); } @@ -960,19 +951,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, a->buf_history, a->len_history, 4); m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, p_mask, val_0); + &res_old_3, val_0); + r_0 = and256(r_0, p_mask); CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -996,16 +987,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr, m256 p_mask; m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, buf_end, a->buf_history, a->len_history); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, - p_mask); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); + res_0 = and256(res_0, p_mask); CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); ptr += 32; } if (ptr + 32 < buf_end) { m256 val_0 = load256(ptr + 0); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, - ones256()); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); ptr += 32; } @@ -1015,13 +1005,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr, CHECK_FLOOD; m256 val_0 = load256(ptr + 0); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, - ones256()); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit1_fast_teddy); m256 val_1 = load256(ptr + 32); - m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi, - ones256()); + m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi); CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit1_fast_teddy); } @@ -1029,20 +1017,19 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr, m256 p_mask; m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, buf_end, a->buf_history, a->len_history); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, - p_mask); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); + res_0 = and256(res_0, p_mask); CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, - const struct FDR_Runtime_Args *a) { + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { const u8 *buf_end = a->buf + a->len; const u8 *ptr = a->buf + a->start_offset; - hwlmcb_rv_t controlVal = *a->groups; - hwlmcb_rv_t *control = &controlVal; u32 floodBackoff = FLOOD_BACKOFF_START; const u8 *tryFloodDetect = a->firstFloodDetect; u32 last_match = (u32)-1; @@ -1066,16 +1053,15 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, m256 p_mask; m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, buf_end, a->buf_history, a->len_history); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, - p_mask); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); + res_0 = and256(res_0, p_mask); CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); ptr += 32; } if (ptr + 32 < buf_end) { m256 val_0 = load256(ptr + 0); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, - ones256()); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); ptr += 32; } @@ -1085,13 +1071,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, CHECK_FLOOD; m256 val_0 = load256(ptr + 0); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, - ones256()); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit_fast_teddy); m256 val_1 = load256(ptr + 32); - m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi, - ones256()); + m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi); CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit_fast_teddy); } @@ -1099,11 +1083,11 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, m256 p_mask; m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, buf_end, a->buf_history, a->len_history); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi, - p_mask); + m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); + res_0 = and256(res_0, p_mask); CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); } - *a->groups = controlVal; + return HWLM_SUCCESS; } diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index c1e46d85..15b9665b 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -74,12 +74,11 @@ public: const TeddyEngineDescription &eng_in, bool make_small_in) : eng(eng_in), lits(lits_in), make_small(make_small_in) {} - aligned_unique_ptr build(pair link); + aligned_unique_ptr build(pair, size_t> &link); bool pack(map > &bucketToLits); }; class TeddySet { - const vector &lits; u32 len; // nibbleSets is a series of bitfields over 16 predicates // that represent the whether shufti nibble set @@ -89,8 +88,7 @@ class TeddySet { vector nibbleSets; set litIds; public: - TeddySet(const vector &lits_in, u32 len_in) - : lits(lits_in), len(len_in), nibbleSets(len_in * 2, 0) {} + explicit TeddySet(u32 len_in) : len(len_in), nibbleSets(len_in * 2, 0) {} const set & getLits() const { return litIds; } size_t litCount() const { return litIds.size(); } @@ -106,8 +104,8 @@ public: } printf("\nnlits: %zu\nLit ids: ", litCount()); printf("Prob: %llu\n", probability()); - for (set::iterator i = litIds.begin(), e = litIds.end(); i != e; ++i) { - printf("%u ", *i); + for (const auto &id : litIds) { + printf("%u ", id); } printf("\n"); printf("Flood prone : %s\n", isRunProne()?"yes":"no"); @@ -118,15 +116,15 @@ public: return nibbleSets == ts.nibbleSets; } - void addLiteral(u32 lit_id) { - const string &s = lits[lit_id].s; + void addLiteral(u32 lit_id, const hwlmLiteral &lit) { + const string &s = lit.s; for (u32 i = 0; i < len; i++) { if (i < s.size()) { u8 c = s[s.size() - i - 1]; u8 c_hi = (c >> 4) & 0xf; u8 c_lo = c & 0xf; nibbleSets[i*2] = 1 << c_lo; - if (lits[lit_id].nocase && ourisalpha(c)) { + if (lit.nocase && ourisalpha(c)) { nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2)); } else { nibbleSets[i*2+1] = 1 << c_hi; @@ -185,28 +183,26 @@ bool TeddyCompiler::pack(map sts; for (u32 i = 0; i < lits.size(); i++) { - TeddySet ts(lits, eng.numMasks); - ts.addLiteral(i); + TeddySet ts(eng.numMasks); + ts.addLiteral(i, lits[i]); sts.insert(ts); } while (1) { #ifdef TEDDY_DEBUG printf("Size %zu\n", sts.size()); - for (set::const_iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) { - printf("\n"); i1->dump(); + for (const TeddySet &ts : sts) { + printf("\n"); ts.dump(); } printf("\n===============================================\n"); #endif - set::iterator m1 = sts.end(), m2 = sts.end(); + auto m1 = sts.end(), m2 = sts.end(); u64a best = 0xffffffffffffffffULL; - for (set::iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) { - set::iterator i2 = i1; - ++i2; + for (auto i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) { const TeddySet &s1 = *i1; - for (set::iterator e2 = sts.end(); i2 != e2; ++i2) { + for (auto i2 = next(i1), e2 = sts.end(); i2 != e2; ++i2) { const TeddySet &s2 = *i2; // be more conservative if we don't absolutely need to @@ -216,7 +212,7 @@ bool TeddyCompiler::pack(map eng.getNumBuckets()) { return false; } - for (set::const_iterator i = sts.begin(), e = sts.end(); i != e; - ++i) { - for (set::const_iterator i2 = i->getLits().begin(), - e2 = i->getLits().end(); - i2 != e2; ++i2) { - bucketToLits[cnt].push_back(*i2); - } - cnt++; + u32 bucket_id = 0; + for (const TeddySet &ts : sts) { + const auto &ts_lits = ts.getLits(); + auto &bucket_lits = bucketToLits[bucket_id]; + bucket_lits.insert(end(bucket_lits), begin(ts_lits), end(ts_lits)); + bucket_id++; } return true; } -aligned_unique_ptr TeddyCompiler::build(pair link) { +aligned_unique_ptr +TeddyCompiler::build(pair, size_t> &link) { if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) { DEBUG_PRINTF("too many literals: %zu\n", lits.size()); return nullptr; @@ -314,9 +308,8 @@ aligned_unique_ptr TeddyCompiler::build(pair link) { size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; - pair floodControlTmp = setupFDRFloodControl(lits, eng); - pair confirmTmp - = setupFullMultiConfs(lits, eng, bucketToLits, make_small); + auto floodControlTmp = setupFDRFloodControl(lits, eng); + auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small); size_t size = ROUNDUP_N(sizeof(Teddy) + maskLen + @@ -334,38 +327,29 @@ aligned_unique_ptr TeddyCompiler::build(pair link) { teddy->maxStringLen = verify_u32(maxLen(lits)); u8 *ptr = teddy_base + sizeof(Teddy) + maskLen; - memcpy(ptr, confirmTmp.first, confirmTmp.second); + memcpy(ptr, confirmTmp.first.get(), confirmTmp.second); ptr += confirmTmp.second; - aligned_free(confirmTmp.first); teddy->floodOffset = verify_u32(ptr - teddy_base); - memcpy(ptr, floodControlTmp.first, floodControlTmp.second); + memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second); ptr += floodControlTmp.second; - aligned_free(floodControlTmp.first); if (link.first) { teddy->link = verify_u32(ptr - teddy_base); - memcpy(ptr, link.first, link.second); - aligned_free(link.first); + memcpy(ptr, link.first.get(), link.second); } else { teddy->link = 0; } u8 *baseMsk = teddy_base + sizeof(Teddy); - for (map >::const_iterator - i = bucketToLits.begin(), - e = bucketToLits.end(); - i != e; ++i) { - const u32 bucket_id = i->first; - const vector &ids = i->second; + for (const auto &b2l : bucketToLits) { + const u32 &bucket_id = b2l.first; + const vector &ids = b2l.second; const u8 bmsk = 1U << (bucket_id % 8); - for (vector::const_iterator i2 = ids.begin(), - e2 = ids.end(); - i2 != e2; ++i2) { - LiteralIndex lit_id = *i2; - const hwlmLiteral & l = lits[lit_id]; + for (const LiteralIndex &lit_id : ids) { + const hwlmLiteral &l = lits[lit_id]; DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id); const u32 sz = verify_u32(l.s.size()); @@ -439,10 +423,10 @@ aligned_unique_ptr TeddyCompiler::build(pair link) { } // namespace -aligned_unique_ptr teddyBuildTableHinted(const vector &lits, - bool make_small, u32 hint, - const target_t &target, - pair link) { +aligned_unique_ptr +teddyBuildTableHinted(const vector &lits, bool make_small, + u32 hint, const target_t &target, + pair, size_t> &link) { unique_ptr des; if (hint == HINT_INVALID) { des = chooseTeddyEngine(target, lits); diff --git a/src/fdr/teddy_compile.h b/src/fdr/teddy_compile.h index fba6a3d1..276c1347 100644 --- a/src/fdr/teddy_compile.h +++ b/src/fdr/teddy_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -49,7 +49,7 @@ struct hwlmLiteral; ue2::aligned_unique_ptr teddyBuildTableHinted(const std::vector &lits, bool make_small, u32 hint, const target_t &target, - std::pair link); + std::pair, size_t> &link); } // namespace ue2 diff --git a/src/fdr/teddy_runtime_common.h b/src/fdr/teddy_runtime_common.h index c50b4d16..dc65c70a 100644 --- a/src/fdr/teddy_runtime_common.h +++ b/src/fdr/teddy_runtime_common.h @@ -51,8 +51,7 @@ extern const u8 ALIGN_DIRECTIVE p_mask_arr[17][32]; #define CHECK_HWLM_TERMINATE_MATCHING \ do { \ - if (unlikely(controlVal == HWLM_TERMINATE_MATCHING)) { \ - *a->groups = controlVal; \ + if (unlikely(control == HWLM_TERMINATE_MATCHING)) { \ return HWLM_TERMINATED; \ } \ } while (0); @@ -61,8 +60,7 @@ do { \ do { \ if (unlikely(ptr > tryFloodDetect)) { \ tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, \ - &floodBackoff, &controlVal, \ - iterBytes); \ + &floodBackoff, &control, iterBytes); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ } while (0); diff --git a/src/grey.cpp b/src/grey.cpp index 69dab627..bad56b56 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,7 @@ #include #include -#define DEFAULT_MAX_HISTORY 60 +#define DEFAULT_MAX_HISTORY 110 using namespace std; @@ -50,8 +50,11 @@ Grey::Grey(void) : allowLitHaig(true), allowLbr(true), allowMcClellan(true), + allowSheng(true), allowPuff(true), + allowLiteral(true), allowRose(true), + allowViolet(true), allowExtendedNFA(true), /* bounded repeats of course */ allowLimExNFA(true), allowAnchoredAcyclic(true), @@ -60,6 +63,13 @@ Grey::Grey(void) : allowDecoratedLiteral(true), allowNoodle(true), fdrAllowTeddy(true), + violetAvoidSuffixes(true), + violetAvoidWeakInfixes(true), + violetDoubleCut(true), + violetExtractStrongLiterals(true), + violetLiteralChains(true), + violetDoubleCutLiteralLen(3), + violetEarlyCleanLiteralLen(6), puffImproveHead(true), castleExclusive(true), mergeSEP(true), /* short exhaustible passthroughs */ @@ -81,7 +91,6 @@ Grey::Grey(void) : allowZombies(true), floodAsPuffette(false), nfaForceSize(0), - nfaForceShifts(0), maxHistoryAvailable(DEFAULT_MAX_HISTORY), minHistoryAvailable(0), /* debugging only */ maxAnchoredRegion(63), /* for rose's atable to run over */ @@ -119,6 +128,7 @@ Grey::Grey(void) : equivalenceEnable(true), allowSmallWrite(true), // McClellan dfas for small patterns + allowSmallWriteSheng(false), // allow use of Sheng for SMWR smallWriteLargestBuffer(70), // largest buffer that can be // considered a small write @@ -126,6 +136,10 @@ Grey::Grey(void) : // are given to rose &co smallWriteLargestBufferBad(35), limitSmallWriteOutfixSize(1048576), // 1 MB + smallWriteMaxPatterns(10000), + smallWriteMaxLiterals(10000), + allowTamarama(true), // Tamarama engine + tamaChunkSize(100), dumpFlags(0), limitPatternCount(8000000), // 8M patterns limitPatternLength(16000), // 16K bytes @@ -202,8 +216,11 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowLitHaig); G_UPDATE(allowLbr); G_UPDATE(allowMcClellan); + G_UPDATE(allowSheng); G_UPDATE(allowPuff); + G_UPDATE(allowLiteral); G_UPDATE(allowRose); + G_UPDATE(allowViolet); G_UPDATE(allowExtendedNFA); G_UPDATE(allowLimExNFA); G_UPDATE(allowAnchoredAcyclic); @@ -212,6 +229,13 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowDecoratedLiteral); G_UPDATE(allowNoodle); G_UPDATE(fdrAllowTeddy); + G_UPDATE(violetAvoidSuffixes); + G_UPDATE(violetAvoidWeakInfixes); + G_UPDATE(violetDoubleCut); + G_UPDATE(violetExtractStrongLiterals); + G_UPDATE(violetLiteralChains); + G_UPDATE(violetDoubleCutLiteralLen); + G_UPDATE(violetEarlyCleanLiteralLen); G_UPDATE(puffImproveHead); G_UPDATE(castleExclusive); G_UPDATE(mergeSEP); @@ -232,7 +256,6 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowZombies); G_UPDATE(floodAsPuffette); G_UPDATE(nfaForceSize); - G_UPDATE(nfaForceShifts); G_UPDATE(highlanderSquash); G_UPDATE(maxHistoryAvailable); G_UPDATE(minHistoryAvailable); @@ -270,9 +293,14 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(miracleHistoryBonus); G_UPDATE(equivalenceEnable); G_UPDATE(allowSmallWrite); + G_UPDATE(allowSmallWriteSheng); G_UPDATE(smallWriteLargestBuffer); G_UPDATE(smallWriteLargestBufferBad); G_UPDATE(limitSmallWriteOutfixSize); + G_UPDATE(smallWriteMaxPatterns); + G_UPDATE(smallWriteMaxLiterals); + G_UPDATE(allowTamarama); + G_UPDATE(tamaChunkSize); G_UPDATE(limitPatternCount); G_UPDATE(limitPatternLength); G_UPDATE(limitGraphVertices); @@ -309,7 +337,9 @@ void applyGreyOverrides(Grey *g, const string &s) { g->allowLitHaig = false; g->allowMcClellan = false; g->allowPuff = false; + g->allowLiteral = false; g->allowRose = false; + g->allowViolet = false; g->allowSmallLiteralSet = false; g->roseMasks = false; done = true; @@ -325,7 +355,9 @@ void applyGreyOverrides(Grey *g, const string &s) { g->allowLitHaig = false; g->allowMcClellan = true; g->allowPuff = false; + g->allowLiteral = false; g->allowRose = false; + g->allowViolet = false; g->allowSmallLiteralSet = false; g->roseMasks = false; done = true; @@ -341,7 +373,9 @@ void applyGreyOverrides(Grey *g, const string &s) { g->allowLitHaig = false; g->allowMcClellan = true; g->allowPuff = false; + g->allowLiteral = false; g->allowRose = false; + g->allowViolet = false; g->allowSmallLiteralSet = false; g->roseMasks = false; done = true; diff --git a/src/grey.h b/src/grey.h index a2261052..90f5f826 100644 --- a/src/grey.h +++ b/src/grey.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -50,8 +50,11 @@ struct Grey { bool allowLitHaig; bool allowLbr; bool allowMcClellan; + bool allowSheng; bool allowPuff; + bool allowLiteral; bool allowRose; + bool allowViolet; bool allowExtendedNFA; bool allowLimExNFA; bool allowAnchoredAcyclic; @@ -62,6 +65,14 @@ struct Grey { bool allowNoodle; bool fdrAllowTeddy; + u32 violetAvoidSuffixes; /* 0=never, 1=sometimes, 2=always */ + bool violetAvoidWeakInfixes; + bool violetDoubleCut; + bool violetExtractStrongLiterals; + bool violetLiteralChains; + u32 violetDoubleCutLiteralLen; + u32 violetEarlyCleanLiteralLen; + bool puffImproveHead; bool castleExclusive; // enable castle mutual exclusion analysis @@ -88,7 +99,6 @@ struct Grey { bool floodAsPuffette; u32 nfaForceSize; - u32 nfaForceShifts; u32 maxHistoryAvailable; u32 minHistoryAvailable; @@ -140,9 +150,16 @@ struct Grey { // SmallWrite engine bool allowSmallWrite; + bool allowSmallWriteSheng; u32 smallWriteLargestBuffer; // largest buffer that can be small write u32 smallWriteLargestBufferBad;// largest buffer that can be small write u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs + u32 smallWriteMaxPatterns; // only try small writes if fewer patterns + u32 smallWriteMaxLiterals; // only try small writes if fewer literals + + // Tamarama engine + bool allowTamarama; + u32 tamaChunkSize; //!< max chunk size for exclusivity analysis in Tamarama enum DumpFlags { DUMP_NONE = 0, diff --git a/src/hs.cpp b/src/hs.cpp index 3680e79e..07f6d2c1 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -219,7 +219,7 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, : get_current_target(); CompileContext cc(isStreaming, isVectored, target_info, g); - NG ng(cc, somPrecision); + NG ng(cc, elements, somPrecision); try { for (unsigned int i = 0; i < elements; i++) { diff --git a/src/hs_compile.h b/src/hs_compile.h index 48168cc2..c5212cbe 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -98,6 +98,12 @@ extern "C" * The library was unable to allocate temporary storage used during * compilation time. * + * - *Allocator returned misaligned memory* + * + * The memory allocator (either malloc() or the allocator set with @ref + * hs_set_allocator()) did not correctly return memory suitably aligned + * for the largest representable data type on this platform. + * * - *Internal error* * * An unexpected error occurred: if this error is reported, please contact diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index 054f05c4..2e16f1ac 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,6 +37,7 @@ #include "fdr/fdr.h" #include "nfa/accel.h" #include "nfa/shufti.h" +#include "nfa/truffle.h" #include "nfa/vermicelli.h" #include @@ -64,8 +65,13 @@ const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr, case ACCEL_SHUFTI: DEBUG_PRINTF("single shufti\n"); return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end); + case ACCEL_TRUFFLE: + DEBUG_PRINTF("truffle\n"); + return truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end); default: /* no acceleration, fall through and return current ptr */ + DEBUG_PRINTF("no accel; %u\n", (int)aux->accel_type); + assert(aux->accel_type == ACCEL_NONE); return ptr; } } diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index b3978017..b1814245 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -35,9 +35,11 @@ #include "hwlm_internal.h" #include "noodle_engine.h" #include "noodle_build.h" +#include "scratch.h" #include "ue2common.h" #include "fdr/fdr_compile.h" #include "nfa/shufticompile.h" +#include "nfa/trufflecompile.h" #include "util/alloc.h" #include "util/bitutils.h" #include "util/charreach.h" @@ -62,6 +64,28 @@ namespace ue2 { static const unsigned int MAX_ACCEL_OFFSET = 16; static const unsigned int MAX_SHUFTI_WIDTH = 240; +static +size_t mask_overhang(const hwlmLiteral &lit) { + size_t msk_true_size = lit.msk.size(); + assert(msk_true_size <= HWLM_MASKLEN); + assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET); + for (u8 c : lit.msk) { + if (!c) { + msk_true_size--; + } else { + break; + } + } + + if (lit.s.length() >= msk_true_size) { + return 0; + } + + /* only short literals should be able to have a mask which overhangs */ + assert(lit.s.length() < MAX_ACCEL_OFFSET); + return msk_true_size - lit.s.length(); +} + static bool findDVerm(const vector &lits, AccelAux *aux) { const hwlmLiteral &first = *lits.front(); @@ -167,7 +191,8 @@ bool findDVerm(const vector &lits, AccelAux *aux) { } if (found) { - curr.max_offset = MAX(curr.max_offset, j); + assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); + ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); break; } } @@ -288,8 +313,8 @@ bool findSVerm(const vector &lits, AccelAux *aux) { } if (found) { - curr.max_offset = MAX(curr.max_offset, j); - break; + assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); + ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); } } } @@ -346,6 +371,25 @@ void filterLits(const vector &lits, hwlm_group_t expected_groups, } } +static +bool litGuardedByCharReach(const CharReach &cr, const hwlmLiteral &lit, + u32 max_offset) { + for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) { + unsigned char c = lit.s[i]; + if (lit.nocase) { + if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) { + return true; + } + } else { + if (cr.test(c)) { + return true; + } + } + } + + return false; +} + static void findForwardAccelScheme(const vector &lits, hwlm_group_t expected_groups, AccelAux *aux) { @@ -363,29 +407,45 @@ void findForwardAccelScheme(const vector &lits, return; } + /* look for shufti/truffle */ + vector reach(MAX_ACCEL_OFFSET, CharReach()); for (const auto &lit : lits) { if (!(lit.groups & expected_groups)) { continue; } - for (u32 i = 0; i < MAX_ACCEL_OFFSET && i < lit.s.length(); i++) { - unsigned char c = lit.s[i]; + u32 overhang = mask_overhang(lit); + for (u32 i = 0; i < overhang; i++) { + /* this offset overhangs the start of the real literal; look at the + * msk/cmp */ + for (u32 j = 0; j < N_CHARS; j++) { + if ((j & lit.msk[i]) == lit.cmp[i]) { + reach[i].set(j); + } + } + } + for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) { + CharReach &reach_i = reach[i]; + u32 i_effective = i - overhang; + + if (litGuardedByCharReach(reach_i, lit, i_effective)) { + continue; + } + unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective] + : lit.s.back(); if (lit.nocase) { - DEBUG_PRINTF("adding %02hhx to %u\n", mytoupper(c), i); - DEBUG_PRINTF("adding %02hhx to %u\n", mytolower(c), i); - reach[i].set(mytoupper(c)); - reach[i].set(mytolower(c)); + reach_i.set(mytoupper(c)); + reach_i.set(mytolower(c)); } else { - DEBUG_PRINTF("adding %02hhx to %u\n", c, i); - reach[i].set(c); + reach_i.set(c); } } } u32 min_count = ~0U; u32 min_offset = ~0U; - for (u32 i = 0; i < min_len; i++) { + for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) { size_t count = reach[i].count(); DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i, describeClass(reach[i]).c_str(), count); @@ -394,10 +454,9 @@ void findForwardAccelScheme(const vector &lits, min_offset = i; } } - assert(min_offset <= min_len); if (min_count > MAX_SHUFTI_WIDTH) { - DEBUG_PRINTF("min shufti with %u chars is too wide\n", min_count); + DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count); return; } @@ -410,7 +469,11 @@ void findForwardAccelScheme(const vector &lits, return; } - DEBUG_PRINTF("fail\n"); + truffleBuildMasks(cr, &aux->truffle.mask1, &aux->truffle.mask2); + DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n", + describeClass(cr).c_str(), cr.count(), min_offset); + aux->truffle.accel_type = ACCEL_TRUFFLE; + aux->truffle.offset = verify_u8(min_offset); } static @@ -466,6 +529,10 @@ bool isNoodleable(const vector &lits, stream_control->history_max); return false; } + if (2 * lits.front().s.length() - 2 > FDR_TEMP_BUF_SIZE) { + assert(0); + return false; + } } if (!lits.front().msk.empty()) { diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index e2f80a59..1d1ab4e6 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -37,7 +37,6 @@ #include "util/compare.h" #include "util/masked_move.h" #include "util/simd_utils.h" -#include "util/simd_utils_ssse3.h" #include #include diff --git a/src/hwlm/noodle_engine_sse.c b/src/hwlm/noodle_engine_sse.c index b3673246..40575409 100644 --- a/src/hwlm/noodle_engine_sse.c +++ b/src/hwlm/noodle_engine_sse.c @@ -115,7 +115,8 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, v = and128(v, caseMask); } - u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v))); + u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1), + eq128(mask2, v))); // mask out where we can't match u32 mask = (0xFFFF >> (16 - l)); @@ -142,7 +143,8 @@ hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset, v = and128(v, caseMask); } - u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v))); + u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1), + eq128(mask2, v))); // mask out where we can't match u32 buf_off = start - offset; diff --git a/src/nfa/mcclellancompile_accel.cpp b/src/nfa/accel_dfa_build_strat.cpp old mode 100644 new mode 100755 similarity index 58% rename from src/nfa/mcclellancompile_accel.cpp rename to src/nfa/accel_dfa_build_strat.cpp index c5325fcc..ba21adc7 --- a/src/nfa/mcclellancompile_accel.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,18 +26,20 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "mcclellancompile_accel.h" - -#include "mcclellancompile_util.h" +#include "accel_dfa_build_strat.h" +#include "accel.h" #include "grey.h" #include "nfagraph/ng_limex_accel.h" +#include "shufticompile.h" +#include "trufflecompile.h" #include "util/charreach.h" #include "util/container.h" #include "util/dump_charclass.h" +#include "util/verify_types.h" -#include #include +#include #define PATHS_LIMIT 500 @@ -46,14 +48,13 @@ using namespace std; namespace ue2 { namespace { - struct path { vector reach; dstate_id_t dest = DEAD_STATE; - explicit path(dstate_id_t base) : dest(base) {} + explicit path(dstate_id_t base) : dest(base) { + } +}; }; - -} static UNUSED string describeClasses(const vector &v) { @@ -85,8 +86,8 @@ bool is_useful_path(const vector &good, const path &p) { goto next; } } - DEBUG_PRINTF("better: [%s] -> %u\n", - describeClasses(g.reach).c_str(), g.dest); + DEBUG_PRINTF("better: [%s] -> %u\n", describeClasses(g.reach).c_str(), + g.dest); return false; next:; @@ -106,8 +107,7 @@ path append(const path &orig, const CharReach &cr, u32 new_dest) { static void extend(const raw_dfa &rdfa, const path &p, - map > &all, - vector &out) { + map> &all, vector &out) { dstate s = rdfa.states[p.dest]; if (!p.reach.empty() && p.reach.back().none()) { @@ -147,17 +147,17 @@ void extend(const raw_dfa &rdfa, const path &p, } DEBUG_PRINTF("----good: [%s] -> %u\n", - describeClasses(pp.reach).c_str(), pp.dest); + describeClasses(pp.reach).c_str(), pp.dest); all[e.first].push_back(pp); out.push_back(pp); } } static -vector > generate_paths(const raw_dfa &rdfa, dstate_id_t base, - u32 len) { - vector paths{ path(base) }; - map > all; +vector> generate_paths(const raw_dfa &rdfa, + dstate_id_t base, u32 len) { + vector paths{path(base)}; + map> all; all[base].push_back(path(base)); for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) { vector next_gen; @@ -170,7 +170,7 @@ vector > generate_paths(const raw_dfa &rdfa, dstate_id_t base, dump_paths(paths); - vector > rv; + vector> rv; for (auto &p : paths) { rv.push_back(move(p.reach)); } @@ -181,16 +181,58 @@ static AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, u32 max_allowed_accel_offset) { DEBUG_PRINTF("looking for accel for %hu\n", base); - vector > paths = generate_paths(rdfa, base, - max_allowed_accel_offset + 1); + vector> paths = + generate_paths(rdfa, base, max_allowed_accel_offset + 1); AccelScheme as = findBestAccelScheme(paths, CharReach(), true); DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset); return as; } +static UNUSED +bool better(const AccelScheme &a, const AccelScheme &b) { + if (!a.double_byte.empty() && b.double_byte.empty()) { + return true; + } + + if (!b.double_byte.empty()) { + return false; + } + + return a.cr.count() < b.cr.count(); +} + +static +vector reverse_alpha_remapping(const raw_dfa &rdfa) { + vector rv(rdfa.alpha_size - 1); /* TOP not required */ + + for (u32 i = 0; i < N_CHARS; i++) { + rv.at(rdfa.alpha_remap[i]).set(i); + } + + return rv; +} + +static +bool double_byte_ok(const AccelScheme &info) { + return !info.double_byte.empty() && + info.double_cr.count() < info.double_byte.size() && + info.double_cr.count() <= 2 && !info.double_byte.empty(); +} + +static +bool has_self_loop(dstate_id_t s, const raw_dfa &raw) { + u16 top_remap = raw.alpha_remap[TOP]; + for (u32 i = 0; i < raw.states[s].next.size(); i++) { + if (i != top_remap && raw.states[s].next[i] == s) { + return true; + } + } + return false; +} + static vector find_nonexit_symbols(const raw_dfa &rdfa, - const CharReach &escape) { + const CharReach &escape) { set rv; CharReach nonexit = ~escape; for (auto i = nonexit.find_first(); i != CharReach::npos; @@ -201,9 +243,58 @@ vector find_nonexit_symbols(const raw_dfa &rdfa, return vector(rv.begin(), rv.end()); } +static +dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { + if (raw.start_floating != DEAD_STATE) { + DEBUG_PRINTF("has floating start\n"); + return raw.start_floating; + } + + DEBUG_PRINTF("looking for SDS proxy\n"); + + dstate_id_t s = raw.start_anchored; + + if (has_self_loop(s, raw)) { + return s; + } + + u16 top_remap = raw.alpha_remap[TOP]; + + ue2::unordered_set seen; + while (true) { + seen.insert(s); + DEBUG_PRINTF("basis %hu\n", s); + + /* check if we are connected to a state with a self loop */ + for (u32 i = 0; i < raw.states[s].next.size(); i++) { + dstate_id_t t = raw.states[s].next[i]; + if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) { + return t; + } + } + + /* find a neighbour to use as a basis for looking for the sds proxy */ + dstate_id_t t = DEAD_STATE; + for (u32 i = 0; i < raw.states[s].next.size(); i++) { + dstate_id_t tt = raw.states[s].next[i]; + if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) { + t = tt; + break; + } + } + + if (t == DEAD_STATE) { + /* we were unable to find a state to use as a SDS proxy */ + return DEAD_STATE; + } + + s = t; + } +} + static set find_region(const raw_dfa &rdfa, dstate_id_t base, - const AccelScheme &ei) { + const AccelScheme &ei) { DEBUG_PRINTF("looking for region around %hu\n", base); set region = {base}; @@ -236,98 +327,10 @@ set find_region(const raw_dfa &rdfa, dstate_id_t base, return region; } -static -bool better(const AccelScheme &a, const AccelScheme &b) { - if (!a.double_byte.empty() && b.double_byte.empty()) { - return true; - } - - if (!b.double_byte.empty()) { - return false; - } - - return a.cr.count() < b.cr.count(); -} - -static -vector reverse_alpha_remapping(const raw_dfa &rdfa) { - vector rv(rdfa.alpha_size - 1); /* TOP not required */ - - for (u32 i = 0; i < N_CHARS; i++) { - rv.at(rdfa.alpha_remap[i]).set(i); - } - - return rv; -} - -map populateAccelerationInfo(const raw_dfa &rdfa, - const dfa_build_strat &strat, - const Grey &grey) { - map rv; - if (!grey.accelerateDFA) { - return rv; - } - - dstate_id_t sds_proxy = get_sds_or_proxy(rdfa); - DEBUG_PRINTF("sds %hu\n", sds_proxy); - - for (size_t i = 0; i < rdfa.states.size(); i++) { - if (i == DEAD_STATE) { - continue; - } - - /* Note on report acceleration states: While we can't accelerate while we - * are spamming out callbacks, the QR code paths don't raise reports - * during scanning so they can accelerate report states. */ - if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) { - continue; - } - - size_t single_limit = i == sds_proxy ? ACCEL_DFA_MAX_FLOATING_STOP_CHAR - : ACCEL_DFA_MAX_STOP_CHAR; - DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit); - - AccelScheme ei = strat.find_escape_strings(i); - if (ei.cr.count() > single_limit) { - DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i, - ei.cr.count()); - continue; - } - - DEBUG_PRINTF("state %zu should be accelerable %zu\n", - i, ei.cr.count()); - - rv[i] = ei; - } - - /* provide accleration states to states in the region of sds */ - if (contains(rv, sds_proxy)) { - AccelScheme sds_ei = rv[sds_proxy]; - sds_ei.double_byte.clear(); /* region based on single byte scheme - * may differ from double byte */ - DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n", - sds_ei.cr.count()); - auto sds_region = find_region(rdfa, sds_proxy, sds_ei); - for (auto s : sds_region) { - if (!contains(rv, s) || better(sds_ei, rv[s])) { - rv[s] = sds_ei; - } - } - } - - return rv; -} - -static -bool double_byte_ok(const AccelScheme &info) { - return !info.double_byte.empty() - && info.double_cr.count() < info.double_byte.size() - && info.double_cr.count() <= 2 && !info.double_byte.empty(); -} - -AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx, - u32 max_allowed_accel_offset) { +AccelScheme +accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const { AccelScheme rv; + const raw_dfa &rdfa = get_raw(); rv.cr.clear(); rv.offset = 0; const dstate &raw = rdfa.states[this_idx]; @@ -354,7 +357,7 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { DEBUG_PRINTF("leads to report\n"); - outs2_broken = true; /* cannot accelerate over reports */ + outs2_broken = true; /* cannot accelerate over reports */ continue; } succs[next_id] |= cr_i; @@ -402,14 +405,12 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa)); DEBUG_PRINTF("broken %d\n", outs2_broken); - if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) - && this_idx == rdfa.start_floating - && this_idx != DEAD_STATE) { + if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) && + this_idx == rdfa.start_floating && this_idx != DEAD_STATE) { DEBUG_PRINTF("looking for offset accel at %u\n", this_idx); - auto offset = look_for_offset_accel(rdfa, this_idx, - max_allowed_accel_offset); - DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), - rv.cr.count()); + auto offset = + look_for_offset_accel(rdfa, this_idx, max_allowed_offset_accel()); + DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), rv.cr.count()); if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) { DEBUG_PRINTF("using offset accel\n"); rv = offset; @@ -419,4 +420,172 @@ AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx return rv; } +void +accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, + const AccelScheme &info, + void *accel_out) { + AccelAux *accel = (AccelAux *)accel_out; + + DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset, + info.double_offset); + accel->generic.offset = verify_u8(info.offset); + + if (double_byte_ok(info) && info.double_cr.none() && + info.double_byte.size() == 1) { + accel->accel_type = ACCEL_DVERM; + accel->dverm.c1 = info.double_byte.begin()->first; + accel->dverm.c2 = info.double_byte.begin()->second; + accel->dverm.offset = verify_u8(info.double_offset); + DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx); + return; + } + + if (double_byte_ok(info) && info.double_cr.none() && + (info.double_byte.size() == 2 || info.double_byte.size() == 4)) { + bool ok = true; + + assert(!info.double_byte.empty()); + u8 firstC = info.double_byte.begin()->first & CASE_CLEAR; + u8 secondC = info.double_byte.begin()->second & CASE_CLEAR; + + for (const pair &p : info.double_byte) { + if ((p.first & CASE_CLEAR) != firstC || + (p.second & CASE_CLEAR) != secondC) { + ok = false; + break; + } + } + + if (ok) { + accel->accel_type = ACCEL_DVERM_NOCASE; + accel->dverm.c1 = firstC; + accel->dverm.c2 = secondC; + accel->dverm.offset = verify_u8(info.double_offset); + DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx); + return; + } + + u8 m1; + u8 m2; + if (buildDvermMask(info.double_byte, &m1, &m2)) { + accel->accel_type = ACCEL_DVERM_MASKED; + accel->dverm.offset = verify_u8(info.double_offset); + accel->dverm.c1 = info.double_byte.begin()->first & m1; + accel->dverm.c2 = info.double_byte.begin()->second & m2; + accel->dverm.m1 = m1; + accel->dverm.m2 = m2; + DEBUG_PRINTF( + "building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", + accel->dverm.c1, accel->dverm.c2); + return; + } + } + + if (double_byte_ok(info) && + shuftiBuildDoubleMasks(info.double_cr, info.double_byte, + &accel->dshufti.lo1, &accel->dshufti.hi1, + &accel->dshufti.lo2, &accel->dshufti.hi2)) { + accel->accel_type = ACCEL_DSHUFTI; + accel->dshufti.offset = verify_u8(info.double_offset); + DEBUG_PRINTF("state %hu is double shufti\n", this_idx); + return; + } + + if (info.cr.none()) { + accel->accel_type = ACCEL_RED_TAPE; + DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape" + " from which there is no escape\n", + this_idx); + return; + } + + if (info.cr.count() == 1) { + accel->accel_type = ACCEL_VERM; + accel->verm.c = info.cr.find_first(); + DEBUG_PRINTF("state %hu is vermicelli\n", this_idx); + return; + } + + if (info.cr.count() == 2 && info.cr.isCaselessChar()) { + accel->accel_type = ACCEL_VERM_NOCASE; + accel->verm.c = info.cr.find_first() & CASE_CLEAR; + DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx); + return; + } + + if (info.cr.count() > max_floating_stop_char()) { + accel->accel_type = ACCEL_NONE; + DEBUG_PRINTF("state %hu is too broad\n", this_idx); + return; + } + + accel->accel_type = ACCEL_SHUFTI; + if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo, &accel->shufti.hi)) { + DEBUG_PRINTF("state %hu is shufti\n", this_idx); + return; + } + + assert(!info.cr.none()); + accel->accel_type = ACCEL_TRUFFLE; + truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2); + DEBUG_PRINTF("state %hu is truffle\n", this_idx); } + +map +accel_dfa_build_strat::getAccelInfo(const Grey &grey) { + map rv; + raw_dfa &rdfa = get_raw(); + if (!grey.accelerateDFA) { + return rv; + } + + dstate_id_t sds_proxy = get_sds_or_proxy(rdfa); + DEBUG_PRINTF("sds %hu\n", sds_proxy); + + for (size_t i = 0; i < rdfa.states.size(); i++) { + if (i == DEAD_STATE) { + continue; + } + + /* Note on report acceleration states: While we can't accelerate while + * we + * are spamming out callbacks, the QR code paths don't raise reports + * during scanning so they can accelerate report states. */ + if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) { + continue; + } + + size_t single_limit = + i == sds_proxy ? max_floating_stop_char() : max_stop_char(); + DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit); + + AccelScheme ei = find_escape_strings(i); + if (ei.cr.count() > single_limit) { + DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i, + ei.cr.count()); + continue; + } + + DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count()); + + rv[i] = ei; + } + + /* provide accleration states to states in the region of sds */ + if (contains(rv, sds_proxy)) { + AccelScheme sds_ei = rv[sds_proxy]; + sds_ei.double_byte.clear(); /* region based on single byte scheme + * may differ from double byte */ + DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n", + sds_ei.cr.count()); + auto sds_region = find_region(rdfa, sds_proxy, sds_ei); + for (auto s : sds_region) { + if (!contains(rv, s) || better(sds_ei, rv[s])) { + rv[s] = sds_ei; + } + } + } + + return rv; +} +}; diff --git a/src/nfa/accel_dfa_build_strat.h b/src/nfa/accel_dfa_build_strat.h new file mode 100755 index 00000000..3cfaf272 --- /dev/null +++ b/src/nfa/accel_dfa_build_strat.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ACCEL_DFA_BUILD_STRAT_H +#define ACCEL_DFA_BUILD_STRAT_H + +#include "rdfa.h" +#include "dfa_build_strat.h" +#include "ue2common.h" +#include "util/accel_scheme.h" + +#include + +namespace ue2 { + +class ReportManager; +struct Grey; + +class accel_dfa_build_strat : public dfa_build_strat { +public: + explicit accel_dfa_build_strat(const ReportManager &rm_in) + : dfa_build_strat(rm_in) {} + virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const; + virtual size_t accelSize(void) const = 0; + virtual u32 max_allowed_offset_accel() const = 0; + virtual u32 max_stop_char() const = 0; + virtual u32 max_floating_stop_char() const = 0; + virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, + void *accel_out); + virtual std::map getAccelInfo(const Grey &grey); +}; + +} // namespace ue2 + +#endif // ACCEL_DFA_BUILD_STRAT_H diff --git a/src/nfa/callback.h b/src/nfa/callback.h index dfcd1b9f..9bdaa8d1 100644 --- a/src/nfa/callback.h +++ b/src/nfa/callback.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,30 +37,26 @@ /** \brief The type for an NFA callback. * - * This is a function that takes as arguments the current offset where the - * match occurs, the id of the match and the context pointer that was passed - * into the NFA API function that executed the NFA. + * This is a function that takes as arguments the current start and end offsets + * where the match occurs, the id of the match and the context pointer that was + * passed into the NFA API function that executed the NFA. * - * The offset where the match occurs will be the offset after the character - * that caused the match. Thus, if we have a buffer containing 'abc', then a - * pattern that matches an empty string will have an offset of 0, a pattern - * that matches 'a' will have an offset of 1, and a pattern that matches 'abc' - * will have an offset of 3, which will be a value that is 'beyond' the size of - * the buffer. That is, if we have n characters in the buffer, there are n+1 - * different potential offsets for matches. + * The start offset is the "start of match" (SOM) offset for the match. It is + * only provided by engines that natively support SOM tracking (e.g. Gough). + * + * The end offset will be the offset after the character that caused the match. + * Thus, if we have a buffer containing 'abc', then a pattern that matches an + * empty string will have an offset of 0, a pattern that matches 'a' will have + * an offset of 1, and a pattern that matches 'abc' will have an offset of 3, + * which will be a value that is 'beyond' the size of the buffer. That is, if + * we have n characters in the buffer, there are n+1 different potential + * offsets for matches. * * This function should return an int - currently the possible return values * are 0, which means 'stop running the engine' or non-zero, which means * 'continue matching'. */ -typedef int (*NfaCallback)(u64a offset, ReportID id, void *context); - -/** \brief The type for an NFA callback which also tracks start of match. - * - * see \ref NfaCallback - */ -typedef int (*SomNfaCallback)(u64a from_offset, u64a to_offset, ReportID id, - void *context); +typedef int (*NfaCallback)(u64a start, u64a end, ReportID id, void *context); /** * standard \ref NfaCallback return value indicating that engine execution diff --git a/src/nfa/castle.c b/src/nfa/castle.c index 13a44a97..6a72ae31 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -98,7 +98,7 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q, if (match == REPEAT_MATCH) { DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset, subIdx, sub->report); - if (q->cb(offset, sub->report, q->context) == MO_HALT_MATCHING) { + if (q->cb(0, offset, sub->report, q->context) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } } @@ -457,7 +457,7 @@ char subCastleFireMatch(const struct Castle *c, const void *full_state, i = mmbit_iterate(matching, c->numRepeats, i)) { const struct SubCastle *sub = getSubCastle(c, i); DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, i); - if (cb(offset, sub->report, ctx) == MO_HALT_MATCHING) { + if (cb(0, offset, sub->report, ctx) == MO_HALT_MATCHING) { DEBUG_PRINTF("caller told us to halt\n"); return MO_HALT_MATCHING; } @@ -979,6 +979,46 @@ char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report, return castleInAccept(c, q, report, q_cur_offset(q)); } +char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + assert(n->type == CASTLE_NFA_0); + DEBUG_PRINTF("entry\n"); + + const struct Castle *c = getImplNfa(n); + const u64a offset = q_cur_offset(q); + DEBUG_PRINTF("offset=%llu\n", offset); + + if (c->exclusive) { + u8 *active = (u8 *)q->streamState; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); + DEBUG_PRINTF("subcastle %u\n", activeIdx); + const struct SubCastle *sub = getSubCastle(c, activeIdx); + if (subCastleInAccept(c, q, sub->report, offset, activeIdx)) { + return 1; + } + } + } + + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)q->streamState + c->activeOffset; + for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { + DEBUG_PRINTF("subcastle %u\n", i); + const struct SubCastle *sub = getSubCastle(c, i); + if (subCastleInAccept(c, q, sub->report, offset, i)) { + return 1; + } + } + } + + return 0; +} + + char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) { assert(n && q); assert(n->type == CASTLE_NFA_0); diff --git a/src/nfa/castle.h b/src/nfa/castle.h index 8fc3514b..84d79097 100644 --- a/src/nfa/castle.h +++ b/src/nfa/castle.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,6 +44,7 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q); char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); diff --git a/src/nfa/castle_dump.cpp b/src/nfa/castle_dump.cpp index dd0e369f..fd1521a5 100644 --- a/src/nfa/castle_dump.cpp +++ b/src/nfa/castle_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,7 +48,8 @@ namespace ue2 { -void nfaExecCastle0_dumpDot(const struct NFA *, FILE *) { +void nfaExecCastle0_dumpDot(const struct NFA *, FILE *, + UNUSED const std::string &base) { // No GraphViz output for Castles. } diff --git a/src/nfa/castle_dump.h b/src/nfa/castle_dump.h index c0b1f899..94dadec0 100644 --- a/src/nfa/castle_dump.h +++ b/src/nfa/castle_dump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,12 +32,14 @@ #if defined(DUMP_SUPPORT) #include +#include struct NFA; namespace ue2 { -void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file); +void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file, + const std::string &base); void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file); } // namespace ue2 diff --git a/src/nfa/dfa_build_strat.cpp b/src/nfa/dfa_build_strat.cpp new file mode 100755 index 00000000..d4d418aa --- /dev/null +++ b/src/nfa/dfa_build_strat.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "dfa_build_strat.h" + +namespace ue2 { + +// prevent weak vtables for raw_report_info, dfa_build_strat and raw_dfa +raw_report_info::~raw_report_info() {} + +dfa_build_strat::~dfa_build_strat() {} + +raw_dfa::~raw_dfa() {} + +} // namespace ue2 diff --git a/src/nfa/dfa_build_strat.h b/src/nfa/dfa_build_strat.h new file mode 100644 index 00000000..cda00162 --- /dev/null +++ b/src/nfa/dfa_build_strat.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DFA_BUILD_STRAT_H +#define DFA_BUILD_STRAT_H + +#include "rdfa.h" +#include "ue2common.h" + +#include +#include + +struct NFA; + +namespace ue2 { + +class ReportManager; + +struct raw_report_info { + virtual ~raw_report_info(); + virtual u32 getReportListSize() const = 0; /* in bytes */ + virtual size_t size() const = 0; /* number of lists */ + virtual void fillReportLists(NFA *n, size_t base_offset, + std::vector &ro /* out */) const = 0; +}; + +class dfa_build_strat { +public: + explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {} + virtual ~dfa_build_strat(); + virtual raw_dfa &get_raw() const = 0; + virtual std::unique_ptr gatherReports( + std::vector &reports /* out */, + std::vector &reports_eod /* out */, + u8 *isSingleReport /* out */, + ReportID *arbReport /* out */) const = 0; +protected: + const ReportManager &rm; +}; + +} // namespace ue2 + +#endif // DFA_BUILD_STRAT_H diff --git a/src/nfa/gough.c b/src/nfa/gough.c index c52bca06..520aca93 100644 --- a/src/nfa/gough.c +++ b/src/nfa/gough.c @@ -110,7 +110,7 @@ u64a expandSomValue(u32 comp_slot_width, u64a curr_offset, } static really_inline -char doReports(SomNfaCallback cb, void *ctxt, const struct mcclellan *m, +char doReports(NfaCallback cb, void *ctxt, const struct mcclellan *m, const struct gough_som_info *som, u16 s, u64a loc, char eod, u16 * const cached_accept_state, u32 * const cached_accept_id, u32 * const cached_accept_som) { @@ -307,7 +307,7 @@ u16 goughEnableStarts(const struct mcclellan *m, u16 s, u64a som_offset, static really_inline char goughExec16_i(const struct mcclellan *m, struct gough_som_info *som, u16 *state, const u8 *buf, size_t len, u64a offAdj, - SomNfaCallback cb, void *ctxt, const u8 **c_final, + NfaCallback cb, void *ctxt, const u8 **c_final, enum MatchMode mode) { assert(ISALIGNED_N(state, 2)); @@ -461,7 +461,7 @@ with_accel: static really_inline char goughExec8_i(const struct mcclellan *m, struct gough_som_info *som, u8 *state, const u8 *buf, size_t len, u64a offAdj, - SomNfaCallback cb, void *ctxt, const u8 **c_final, + NfaCallback cb, void *ctxt, const u8 **c_final, enum MatchMode mode) { u8 s = *state; const u8 *c = buf, *c_end = buf + len; @@ -595,7 +595,7 @@ with_accel: static never_inline char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som, u8 *state, const u8 *buf, size_t len, u64a offAdj, - SomNfaCallback cb, void *ctxt, const u8 **final_point, + NfaCallback cb, void *ctxt, const u8 **final_point, enum MatchMode mode) { return goughExec8_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point, mode); @@ -604,7 +604,7 @@ char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som, static never_inline char goughExec16_i_ni(const struct mcclellan *m, struct gough_som_info *som, u16 *state, const u8 *buf, size_t len, u64a offAdj, - SomNfaCallback cb, void *ctxt, const u8 **final_point, + NfaCallback cb, void *ctxt, const u8 **final_point, enum MatchMode mode) { return goughExec16_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point, mode); @@ -622,7 +622,7 @@ const struct gough_som_info *getSomInfoConst(const char *state_base) { static really_inline char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, - const u8 *hend, SomNfaCallback cb, void *context, + const u8 *hend, NfaCallback cb, void *context, struct mq *q, s64a end, enum MatchMode mode) { DEBUG_PRINTF("enter\n"); struct gough_som_info *som = getSomInfo(q->state); @@ -755,7 +755,7 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, static really_inline char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, - const u8 *hend, SomNfaCallback cb, void *context, + const u8 *hend, NfaCallback cb, void *context, struct mq *q, s64a end, enum MatchMode mode) { struct gough_som_info *som = getSomInfo(q->state); assert(n->type == GOUGH_NFA_16); @@ -887,7 +887,7 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; - SomNfaCallback cb = q->som_cb; + NfaCallback cb = q->cb; void *context = q->context; assert(n->type == GOUGH_NFA_8); const u8 *hend = q->history + q->hlength; @@ -899,7 +899,7 @@ char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) { char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; - SomNfaCallback cb = q->som_cb; + NfaCallback cb = q->cb; void *context = q->context; assert(n->type == GOUGH_NFA_16); const u8 *hend = q->history + q->hlength; @@ -911,7 +911,7 @@ char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) { char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; - SomNfaCallback cb = q->som_cb; + NfaCallback cb = q->cb; void *context = q->context; assert(n->type == GOUGH_NFA_8); const u8 *hend = q->history + q->hlength; @@ -923,7 +923,7 @@ char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) { char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; - SomNfaCallback cb = q->som_cb; + NfaCallback cb = q->cb; void *context = q->context; assert(n->type == GOUGH_NFA_16); const u8 *hend = q->history + q->hlength; @@ -935,7 +935,7 @@ char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) { char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) { u64a offset = q->offset; const u8 *buffer = q->buffer; - SomNfaCallback cb = q->som_cb; + NfaCallback cb = q->cb; void *context = q->context; assert(n->type == GOUGH_NFA_8); const u8 *hend = q->history + q->hlength; @@ -952,7 +952,7 @@ char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) { char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report) { u64a offset = q->offset; const u8 *buffer = q->buffer; - SomNfaCallback cb = q->som_cb; + NfaCallback cb = q->cb; void *context = q->context; assert(n->type == GOUGH_NFA_16); const u8 *hend = q->history + q->hlength; @@ -994,7 +994,7 @@ char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset, char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) { const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); - SomNfaCallback cb = q->som_cb; + NfaCallback cb = q->cb; void *ctxt = q->context; u8 s = *(u8 *)q->state; u64a offset = q_cur_offset(q); @@ -1016,7 +1016,7 @@ char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) { char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) { const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); - SomNfaCallback cb = q->som_cb; + NfaCallback cb = q->cb; void *ctxt = q->context; u16 s = *(u16 *)q->state; const struct mstate_aux *aux = get_aux(m, s); @@ -1048,10 +1048,18 @@ char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, return nfaExecMcClellan16_inAccept(n, report, q); } +char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q) { + return nfaExecMcClellan8_inAnyAccept(n, q); +} + +char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q) { + return nfaExecMcClellan16_inAnyAccept(n, q); +} + static char goughCheckEOD(const struct NFA *nfa, u16 s, const struct gough_som_info *som, - u64a offset, SomNfaCallback cb, void *ctxt) { + u64a offset, NfaCallback cb, void *ctxt) { const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); const struct mstate_aux *aux = get_aux(m, s); @@ -1062,21 +1070,19 @@ char goughCheckEOD(const struct NFA *nfa, u16 s, } char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - UNUSED NfaCallback callback, - SomNfaCallback som_callback, void *context) { + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { const struct gough_som_info *som = getSomInfoConst(state); - return goughCheckEOD(nfa, *(const u8 *)state, som, offset, som_callback, + return goughCheckEOD(nfa, *(const u8 *)state, som, offset, callback, context); } char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - UNUSED NfaCallback callback, - SomNfaCallback som_callback, void *context) { + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { assert(ISALIGNED_N(state, 8)); const struct gough_som_info *som = getSomInfoConst(state); - return goughCheckEOD(nfa, *(const u16 *)state, som, offset, som_callback, + return goughCheckEOD(nfa, *(const u16 *)state, som, offset, callback, context); } diff --git a/src/nfa/gough.h b/src/nfa/gough.h index 41d4cb5a..a7f48892 100644 --- a/src/nfa/gough.h +++ b/src/nfa/gough.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,13 +39,13 @@ struct mq; char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state, const char *streamState, u64a offset, - NfaCallback callback, SomNfaCallback som_cb, - void *context); + NfaCallback callback, void *context); char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q); char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -61,13 +61,13 @@ char nfaExecGough8_expandState(const struct NFA *nfa, void *dest, char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state, const char *streamState, u64a offset, - NfaCallback callback, SomNfaCallback som_cb, - void *context); + NfaCallback callback, void *context); char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q); char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index 647dc496..314b6fd0 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -79,9 +79,9 @@ namespace { class gough_build_strat : public mcclellan_build_strat { public: gough_build_strat( - raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm, + raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in, const map &accel_info) - : mcclellan_build_strat(r, rm), rdfa(r), gg(g), + : mcclellan_build_strat(r, rm_in), rdfa(r), gg(g), accel_gough_info(accel_info) {} unique_ptr gatherReports(vector &reports /* out */, vector &reports_eod /* out */, diff --git a/src/nfa/goughdump.cpp b/src/nfa/goughdump.cpp index f4f15eea..4e6e5425 100644 --- a/src/nfa/goughdump.cpp +++ b/src/nfa/goughdump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -259,7 +259,8 @@ void dumpTransitions(const NFA *nfa, FILE *f, fprintf(f, "\n"); } -void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f) { +void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f, + UNUSED const string &base) { assert(nfa->type == GOUGH_NFA_8); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -302,7 +303,8 @@ void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) { dumpTextReverse(nfa, f); } -void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f) { +void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f, + UNUSED const string &base) { assert(nfa->type == GOUGH_NFA_16); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); diff --git a/src/nfa/goughdump.h b/src/nfa/goughdump.h index 5e15356d..b96938e4 100644 --- a/src/nfa/goughdump.h +++ b/src/nfa/goughdump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,12 +33,16 @@ #include "ue2common.h" +#include + struct NFA; namespace ue2 { -void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file); -void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file); +void nfaExecGough8_dumpDot(const NFA *nfa, FILE *file, + const std::string &base); +void nfaExecGough16_dumpDot(const NFA *nfa, FILE *file, + const std::string &base); void nfaExecGough8_dumpText(const NFA *nfa, FILE *file); void nfaExecGough16_dumpText(const NFA *nfa, FILE *file); diff --git a/src/nfa/lbr.c b/src/nfa/lbr.c index 0d69cc2a..07e59239 100644 --- a/src/nfa/lbr.c +++ b/src/nfa/lbr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -293,7 +293,7 @@ char lbrMatchLoop(const struct lbr_common *l, const u64a begin, const u64a end, } DEBUG_PRINTF("firing match at %llu\n", i); - if (cb(i, l->report, ctx) == MO_HALT_MATCHING) { + if (cb(0, i, l->report, ctx) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } } diff --git a/src/nfa/lbr.h b/src/nfa/lbr.h index b770477d..a9e42046 100644 --- a/src/nfa/lbr.h +++ b/src/nfa/lbr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,6 +46,7 @@ char nfaExecLbrDot_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecLbrDot_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -66,6 +67,7 @@ char nfaExecLbrVerm_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecLbrVerm_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -86,6 +88,7 @@ char nfaExecLbrNVerm_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecLbrNVerm_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -106,6 +109,7 @@ char nfaExecLbrShuf_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecLbrShuf_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -126,6 +130,7 @@ char nfaExecLbrTruf_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecLbrTruf_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); diff --git a/src/nfa/lbr_common_impl.h b/src/nfa/lbr_common_impl.h index 917a8e91..5ae35431 100644 --- a/src/nfa/lbr_common_impl.h +++ b/src/nfa/lbr_common_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -72,7 +72,7 @@ char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa, const struct lbr_common *l = getImplNfa(nfa); u64a offset = q_cur_offset(q); DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset); - q->cb(offset, l->report, q->context); + q->cb(0, offset, l->report, q->context); return 0; } @@ -94,6 +94,15 @@ char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa, return lbrInAccept(l, lstate, q->streamState, offset, report); } +char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { + assert(nfa && q); + assert(isLbrType(nfa->type)); + DEBUG_PRINTF("entry\n"); + + const struct lbr_common *l = getImplNfa(nfa); + return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q); +} + char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa, struct mq *q) { assert(nfa && q); @@ -206,7 +215,7 @@ char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q, if (q->report_current) { DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q)); - int rv = q->cb(q_cur_offset(q), l->report, q->context); + int rv = q->cb(0, q_cur_offset(q), l->report, q->context); q->report_current = 0; if (rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; diff --git a/src/nfa/lbr_dump.cpp b/src/nfa/lbr_dump.cpp index 3de75333..3412ddf5 100644 --- a/src/nfa/lbr_dump.cpp +++ b/src/nfa/lbr_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -49,23 +49,28 @@ namespace ue2 { -void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) { +void nfaExecLbrDot_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, + UNUSED const std::string &base) { // No impl } -void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) { +void nfaExecLbrVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, + UNUSED const std::string &base) { // No impl } -void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) { +void nfaExecLbrNVerm_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, + UNUSED const std::string &base) { // No impl } -void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) { +void nfaExecLbrShuf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, + UNUSED const std::string &base) { // No impl } -void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f) { +void nfaExecLbrTruf_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *f, + UNUSED const std::string &base) { // No impl } diff --git a/src/nfa/lbr_dump.h b/src/nfa/lbr_dump.h index 5f6dd261..06ed51e2 100644 --- a/src/nfa/lbr_dump.h +++ b/src/nfa/lbr_dump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,16 +32,22 @@ #ifdef DUMP_SUPPORT #include +#include struct NFA; namespace ue2 { -void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file); -void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file); -void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file); -void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file); -void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file); +void nfaExecLbrDot_dumpDot(const struct NFA *nfa, FILE *file, + const std::string &base); +void nfaExecLbrVerm_dumpDot(const struct NFA *nfa, FILE *file, + const std::string &base); +void nfaExecLbrNVerm_dumpDot(const struct NFA *nfa, FILE *file, + const std::string &base); +void nfaExecLbrShuf_dumpDot(const struct NFA *nfa, FILE *file, + const std::string &base); +void nfaExecLbrTruf_dumpDot(const struct NFA *nfa, FILE *file, + const std::string &base); void nfaExecLbrDot_dumpText(const struct NFA *nfa, FILE *file); void nfaExecLbrVerm_dumpText(const struct NFA *nfa, FILE *file); void nfaExecLbrNVerm_dumpText(const struct NFA *nfa, FILE *file); diff --git a/src/nfa/limex.h b/src/nfa/limex.h index 2c429a67..ad53503c 100644 --- a/src/nfa/limex.h +++ b/src/nfa/limex.h @@ -30,6 +30,7 @@ #define LIMEX_H #ifdef __cplusplus +#include extern "C" { #endif @@ -40,7 +41,8 @@ extern "C" #define GENERATE_NFA_DUMP_DECL(gf_name) \ } /* extern "C" */ \ namespace ue2 { \ - void gf_name##_dumpDot(const struct NFA *nfa, FILE *file); \ + void gf_name##_dumpDot(const struct NFA *nfa, FILE *file, \ + const std::string &base); \ void gf_name##_dumpText(const struct NFA *nfa, FILE *file); \ } /* namespace ue2 */ \ extern "C" { @@ -52,14 +54,14 @@ extern "C" #define GENERATE_NFA_DECL(gf_name) \ char gf_name##_testEOD(const struct NFA *nfa, const char *state, \ const char *streamState, u64a offset, \ - NfaCallback callback, SomNfaCallback som_cb, \ - void *context); \ + NfaCallback callback, void *context); \ char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \ char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \ char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \ char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \ char gf_name##_inAccept(const struct NFA *n, ReportID report, \ struct mq *q); \ + char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \ char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \ char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \ void *state, u8 key); \ @@ -74,41 +76,11 @@ extern "C" struct mq *q, s64a loc); \ GENERATE_NFA_DUMP_DECL(gf_name) -GENERATE_NFA_DECL(nfaExecLimEx32_1) -GENERATE_NFA_DECL(nfaExecLimEx32_2) -GENERATE_NFA_DECL(nfaExecLimEx32_3) -GENERATE_NFA_DECL(nfaExecLimEx32_4) -GENERATE_NFA_DECL(nfaExecLimEx32_5) -GENERATE_NFA_DECL(nfaExecLimEx32_6) -GENERATE_NFA_DECL(nfaExecLimEx32_7) -GENERATE_NFA_DECL(nfaExecLimEx128_1) -GENERATE_NFA_DECL(nfaExecLimEx128_2) -GENERATE_NFA_DECL(nfaExecLimEx128_3) -GENERATE_NFA_DECL(nfaExecLimEx128_4) -GENERATE_NFA_DECL(nfaExecLimEx128_5) -GENERATE_NFA_DECL(nfaExecLimEx128_6) -GENERATE_NFA_DECL(nfaExecLimEx128_7) -GENERATE_NFA_DECL(nfaExecLimEx256_1) -GENERATE_NFA_DECL(nfaExecLimEx256_2) -GENERATE_NFA_DECL(nfaExecLimEx256_3) -GENERATE_NFA_DECL(nfaExecLimEx256_4) -GENERATE_NFA_DECL(nfaExecLimEx256_5) -GENERATE_NFA_DECL(nfaExecLimEx256_6) -GENERATE_NFA_DECL(nfaExecLimEx256_7) -GENERATE_NFA_DECL(nfaExecLimEx384_1) -GENERATE_NFA_DECL(nfaExecLimEx384_2) -GENERATE_NFA_DECL(nfaExecLimEx384_3) -GENERATE_NFA_DECL(nfaExecLimEx384_4) -GENERATE_NFA_DECL(nfaExecLimEx384_5) -GENERATE_NFA_DECL(nfaExecLimEx384_6) -GENERATE_NFA_DECL(nfaExecLimEx384_7) -GENERATE_NFA_DECL(nfaExecLimEx512_1) -GENERATE_NFA_DECL(nfaExecLimEx512_2) -GENERATE_NFA_DECL(nfaExecLimEx512_3) -GENERATE_NFA_DECL(nfaExecLimEx512_4) -GENERATE_NFA_DECL(nfaExecLimEx512_5) -GENERATE_NFA_DECL(nfaExecLimEx512_6) -GENERATE_NFA_DECL(nfaExecLimEx512_7) +GENERATE_NFA_DECL(nfaExecLimEx32) +GENERATE_NFA_DECL(nfaExecLimEx128) +GENERATE_NFA_DECL(nfaExecLimEx256) +GENERATE_NFA_DECL(nfaExecLimEx384) +GENERATE_NFA_DECL(nfaExecLimEx512) #undef GENERATE_NFA_DECL #undef GENERATE_NFA_DUMP_DECL diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index 2c73f9ff..28f37083 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -35,6 +35,7 @@ #include "accel.h" #include "limex_internal.h" #include "limex_limits.h" +#include "limex_shuffle.h" #include "nfa_internal.h" #include "shufti.h" #include "truffle.h" @@ -44,10 +45,7 @@ #include "ue2common.h" #include "vermicelli.h" #include "util/bitutils.h" -#include "util/shuffle.h" #include "util/simd_utils.h" -#include "util/simd_utils_ssse3.h" -#include "util/shuffle_ssse3.h" static really_inline size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, @@ -80,7 +78,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end) { - u32 idx = shuffleDynamic32(s, accel); + u32 idx = packedExtract32(s, accel); return accelScanWrapper(accelTable, aux, input, idx, i, end); } @@ -92,7 +90,7 @@ size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex, DEBUG_PRINTF("using PSHUFB for 128-bit shuffle\n"); m128 accelPerm = limex->accelPermute; m128 accelComp = limex->accelCompare; - idx = shufflePshufb128(s, accelPerm, accelComp); + idx = packedExtract128(s, accelPerm, accelComp); return accelScanWrapper(accelTable, aux, input, idx, i, end); } @@ -105,17 +103,13 @@ size_t doAccel256(const m256 *state, const struct LimExNFA256 *limex, m256 accelPerm = limex->accelPermute; m256 accelComp = limex->accelCompare; #if !defined(__AVX2__) - u32 idx1 = shufflePshufb128(s.lo, accelPerm.lo, accelComp.lo); - u32 idx2 = shufflePshufb128(s.hi, accelPerm.hi, accelComp.hi); -#else - // TODO: learn you some avx2 shuffles for great good - u32 idx1 = shufflePshufb128(movdq_lo(s), movdq_lo(accelPerm), - movdq_lo(accelComp)); - u32 idx2 = shufflePshufb128(movdq_hi(s), movdq_hi(accelPerm), - movdq_hi(accelComp)); -#endif + u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo); + u32 idx2 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi); assert((idx1 & idx2) == 0); // should be no shared bits idx = idx1 | idx2; +#else + idx = packedExtract256(s, accelPerm, accelComp); +#endif return accelScanWrapper(accelTable, aux, input, idx, i, end); } @@ -127,9 +121,9 @@ size_t doAccel384(const m384 *state, const struct LimExNFA384 *limex, DEBUG_PRINTF("using PSHUFB for 384-bit shuffle\n"); m384 accelPerm = limex->accelPermute; m384 accelComp = limex->accelCompare; - u32 idx1 = shufflePshufb128(s.lo, accelPerm.lo, accelComp.lo); - u32 idx2 = shufflePshufb128(s.mid, accelPerm.mid, accelComp.mid); - u32 idx3 = shufflePshufb128(s.hi, accelPerm.hi, accelComp.hi); + u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo); + u32 idx2 = packedExtract128(s.mid, accelPerm.mid, accelComp.mid); + u32 idx3 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi); assert((idx1 & idx2 & idx3) == 0); // should be no shared bits idx = idx1 | idx2 | idx3; return accelScanWrapper(accelTable, aux, input, idx, i, end); @@ -144,21 +138,17 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex, m512 accelPerm = limex->accelPermute; m512 accelComp = limex->accelCompare; #if !defined(__AVX2__) - u32 idx1 = shufflePshufb128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo); - u32 idx2 = shufflePshufb128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi); - u32 idx3 = shufflePshufb128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo); - u32 idx4 = shufflePshufb128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi); -#else - u32 idx1 = shufflePshufb128(movdq_lo(s.lo), movdq_lo(accelPerm.lo), - movdq_lo(accelComp.lo)); - u32 idx2 = shufflePshufb128(movdq_hi(s.lo), movdq_hi(accelPerm.lo), - movdq_hi(accelComp.lo)); - u32 idx3 = shufflePshufb128(movdq_lo(s.hi), movdq_lo(accelPerm.hi), - movdq_lo(accelComp.hi)); - u32 idx4 = shufflePshufb128(movdq_hi(s.hi), movdq_hi(accelPerm.hi), - movdq_hi(accelComp.hi)); -#endif + u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo); + u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi); + u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo); + u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi); assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits idx = idx1 | idx2 | idx3 | idx4; +#else + u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo); + u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi); + assert((idx1 & idx2) == 0); // should be no shared bits + idx = idx1 | idx2; +#endif return accelScanWrapper(accelTable, aux, input, idx, i, end); } diff --git a/src/nfa/limex_common_impl.h b/src/nfa/limex_common_impl.h index 6e4b7718..9523b073 100644 --- a/src/nfa/limex_common_impl.h +++ b/src/nfa/limex_common_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,7 @@ #define TESTEOD_FN JOIN(moNfaTestEod, SIZE) #define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE) #define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE) +#define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE) #define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE) #define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE) #define INITIAL_FN JOIN(moNfaInitial, SIZE) @@ -118,7 +119,7 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, if (TESTBIT_STATE(s, a->state)) { DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n", a->state, a->externalId, offset); - int rv = callback(offset, a->externalId, context); + int rv = callback(0, offset, a->externalId, context); if (unlikely(rv == MO_HALT_MATCHING)) { return 1; } @@ -149,7 +150,7 @@ char PROCESS_ACCEPTS_NOSQUASH_FN(const STATE_T *s, if (TESTBIT_STATE(s, a->state)) { DEBUG_PRINTF("state %u is on, firing report id=%u, offset=%llu\n", a->state, a->externalId, offset); - int rv = callback(offset, a->externalId, context); + int rv = callback(0, offset, a->externalId, context); if (unlikely(rv == MO_HALT_MATCHING)) { return 1; } @@ -374,11 +375,32 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, return 0; } +static really_inline +char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, + union RepeatControl *repeat_ctrl, char *repeat_state, + u64a offset) { + assert(limex); + + const STATE_T acceptMask = LOAD_STATE(&limex->accept); + STATE_T accstate = AND_STATE(state, acceptMask); + + // Are we in an accept state? + if (ISZERO_STATE(accstate)) { + DEBUG_PRINTF("no accept states are on\n"); + return 0; + } + + SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accstate); + + return ISNONZERO_STATE(accstate); +} + #undef TESTEOD_FN #undef TESTEOD_REV_FN #undef REPORTCURRENT_FN #undef EXPIRE_ESTATE_FN #undef LIMEX_INACCEPT_FN +#undef LIMEX_INANYACCEPT_FN #undef INITIAL_FN #undef TOP_FN #undef TOPN_FN diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 5d51feb9..77754e0b 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -167,12 +167,10 @@ struct build_info { limex_accel_info accel; }; +#define LAST_LIMEX_NFA LIMEX_NFA_512 + // Constants for scoring mechanism - -#define LAST_LIMEX_NFA LIMEX_NFA_512_7 - -const int LIMEX_INITIAL_SCORE = 2000; -const int SHIFT_COST = 20; // limex: cost per shift mask +const int SHIFT_COST = 10; // limex: cost per shift mask const int EXCEPTION_COST = 4; // limex: per exception template struct NFATraits { }; @@ -261,6 +259,17 @@ void maskSetBits(Mask &m, const NFAStateSet &bits) { } } +template +bool isMaskZero(Mask &m) { + u8 *m8 = (u8 *)&m; + for (u32 i = 0; i < sizeof(m); i++) { + if (m8[i]) { + return false; + } + } + return true; +} + // Sets an entire byte in a mask to the given value template void maskSetByte(Mask &m, const unsigned int idx, const char val) { @@ -336,7 +345,7 @@ void buildReachMapping(const build_info &args, vector &reach, } struct AccelBuild { - AccelBuild() : v(NFAGraph::null_vertex()), state(0), offset(0), ma_len1(0), + AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0), ma_len1(0), ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {} NFAVertex v; u32 state; @@ -999,7 +1008,8 @@ void findMaskedCompressionStates(const build_info &args, // Suffixes and outfixes can mask out leaf states, which should all be // accepts. Right now we can only do this when there is nothing in initDs, // as we switch that on unconditionally in the expand call. - if (generates_callbacks(h) && !hasInitDsStates(h, args.state_ids)) { + if (!inspects_states_for_accepts(h) + && !hasInitDsStates(h, args.state_ids)) { NFAStateSet nonleaf(args.num_states); for (const auto &e : edges_range(h)) { u32 from = args.state_ids.at(source(e, h)); @@ -1162,12 +1172,13 @@ u32 getReportListIndex(const flat_set &reports, } static -void buildExceptionMap(const build_info &args, - const ue2::unordered_set &exceptional, - map > &exceptionMap, - vector &exceptionReports) { +u32 buildExceptionMap(const build_info &args, + const ue2::unordered_set &exceptional, + map > &exceptionMap, + vector &exceptionReports) { const NGHolder &h = args.h; const u32 num_states = args.num_states; + u32 exceptionCount = 0; ue2::unordered_map pos_trigger; ue2::unordered_map tug_trigger; @@ -1297,10 +1308,13 @@ void buildExceptionMap(const build_info &args, assert(e.succ_states.size() == num_states); assert(e.squash_states.size() == num_states); exceptionMap[e].push_back(i); + exceptionCount++; } } - DEBUG_PRINTF("%zu unique exceptions found.\n", exceptionMap.size()); + DEBUG_PRINTF("%u exceptions found (%zu unique)\n", exceptionCount, + exceptionMap.size()); + return exceptionCount; } static @@ -1315,6 +1329,92 @@ u32 depth_to_u32(const depth &d) { return d_val; } +static +bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e, + const build_info &args, u32 maxShift) { + NFAVertex from = source(e, h); + NFAVertex to = target(e, h); + u32 f = args.state_ids.at(from); + u32 t = args.state_ids.at(to); + if (!isLimitedTransition(f, t, maxShift)) { + return true; + } + + // All transitions out of a tug trigger are exceptional. + if (contains(args.tugs, from)) { + return true; + } + return false; +} + +static +u32 findMaxVarShift(const build_info &args, u32 nShifts) { + const NGHolder &h = args.h; + u32 shiftMask = 0; + for (const auto &e : edges_range(h)) { + u32 from = args.state_ids.at(source(e, h)); + u32 to = args.state_ids.at(target(e, h)); + if (from == NO_STATE || to == NO_STATE) { + continue; + } + if (!isExceptionalTransition(h, e, args, MAX_SHIFT_AMOUNT)) { + shiftMask |= (1UL << (to - from)); + } + } + + u32 maxVarShift = 0; + for (u32 shiftCnt = 0; shiftMask != 0 && shiftCnt < nShifts; shiftCnt++) { + maxVarShift = findAndClearLSB_32(&shiftMask); + } + + return maxVarShift; +} + +static +int getLimexScore(const build_info &args, u32 nShifts) { + const NGHolder &h = args.h; + u32 maxVarShift = nShifts; + int score = 0; + + score += SHIFT_COST * nShifts; + maxVarShift = findMaxVarShift(args, nShifts); + + NFAStateSet exceptionalStates(args.num_states); + for (const auto &e : edges_range(h)) { + u32 from = args.state_ids.at(source(e, h)); + u32 to = args.state_ids.at(target(e, h)); + if (from == NO_STATE || to == NO_STATE) { + continue; + } + if (isExceptionalTransition(h, e, args, maxVarShift)) { + exceptionalStates.set(from); + } + } + score += EXCEPTION_COST * exceptionalStates.count(); + return score; +} + +// This function finds the best shift scheme with highest score +// Returns number of shifts and score calculated for appropriate scheme +// Returns zero if no appropriate scheme was found +static +u32 findBestNumOfVarShifts(const build_info &args, + int *bestScoreRet = nullptr) { + u32 bestNumOfVarShifts = 0; + int bestScore = INT_MAX; + for (u32 shiftCount = 1; shiftCount <= MAX_SHIFT_COUNT; shiftCount++) { + int score = getLimexScore(args, shiftCount); + if (score < bestScore) { + bestScore = score; + bestNumOfVarShifts = shiftCount; + } + } + if (bestScoreRet != nullptr) { + *bestScoreRet = bestScore; + } + return bestNumOfVarShifts; +} + template struct Factory { // typedefs for readability, for types derived from traits @@ -1322,25 +1422,6 @@ struct Factory { typedef typename NFATraits::implNFA_t implNFA_t; typedef typename NFATraits::tableRow_t tableRow_t; - static - bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e, - const ue2::unordered_map &state_ids, - const ue2::unordered_set &tugs) { - NFAVertex from = source(e, h); - NFAVertex to = target(e, h); - u32 f = state_ids.at(from); - u32 t = state_ids.at(to); - if (!isLimitedTransition(f, t, NFATraits::maxShift)) { - return true; - } - - // All transitions out of a tug trigger are exceptional. - if (contains(tugs, from)) { - return true; - } - return false; - } - static void allocState(NFA *nfa, u32 repeatscratchStateSize, u32 repeatStreamState) { @@ -1504,6 +1585,9 @@ struct Factory { static void writeShiftMasks(const build_info &args, implNFA_t *limex) { const NGHolder &h = args.h; + u32 maxShift = findMaxVarShift(args, limex->shiftCount); + u32 shiftMask = 0; + int shiftMaskIdx = 0; for (const auto &e : edges_range(h)) { u32 from = args.state_ids.at(source(e, h)); @@ -1515,15 +1599,32 @@ struct Factory { // We check for exceptional transitions here, as we don't want tug // trigger transitions emitted as limited transitions (even if they // could be in this model). - if (!isExceptionalTransition(h, e, args.state_ids, args.tugs)) { - maskSetBit(limex->shift[to - from], from); + if (!isExceptionalTransition(h, e, args, maxShift)) { + u32 shift = to - from; + if ((shiftMask & (1UL << shift)) == 0UL) { + shiftMask |= (1UL << shift); + limex->shiftAmount[shiftMaskIdx++] = (u8)shift; + } + assert(limex->shiftCount <= MAX_SHIFT_COUNT); + for (u32 i = 0; i < limex->shiftCount; i++) { + if (limex->shiftAmount[i] == (u8)shift) { + maskSetBit(limex->shift[i], from); + break; + } + } + } + } + if (maxShift && limex->shiftCount > 1) { + for (u32 i = 0; i < limex->shiftCount; i++) { + assert(!isMaskZero(limex->shift[i])); } } } static void findExceptionalTransitions(const build_info &args, - ue2::unordered_set &exceptional) { + ue2::unordered_set &exceptional, + u32 maxShift) { const NGHolder &h = args.h; for (const auto &e : edges_range(h)) { @@ -1533,7 +1634,7 @@ struct Factory { continue; } - if (isExceptionalTransition(h, e, args.state_ids, args.tugs)) { + if (isExceptionalTransition(h, e, args, maxShift)) { exceptional.insert(e); } } @@ -1545,19 +1646,25 @@ struct Factory { implNFA_t *limex, const u32 exceptionsOffset) { DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset); - // to make testing easier, we pre-set the exceptionMap to all invalid - // values - memset(limex->exceptionMap, 0xff, sizeof(limex->exceptionMap)); - exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset); assert(ISALIGNED(etable)); - u32 ecount = 0; + map exception_by_state; for (const auto &m : exceptionMap) { const ExceptionProto &proto = m.first; const vector &states = m.second; - DEBUG_PRINTF("exception %u, triggered by %zu states.\n", ecount, - states.size()); + for (u32 i : states) { + assert(!contains(exception_by_state, i)); + exception_by_state.emplace(i, proto); + } + } + + u32 ecount = 0; + for (const auto &m : exception_by_state) { + const ExceptionProto &proto = m.second; + u32 state_id = m.first; + DEBUG_PRINTF("exception %u, triggered by state %u\n", ecount, + state_id); // Write the exception entry. exception_t &e = etable[ecount]; @@ -1571,13 +1678,10 @@ struct Factory { : repeatOffsets[proto.repeat_index]; e.repeatOffset = repeat_offset; - // for each state that can switch it on - for (auto state_id : states) { - // set this bit in the exception mask - maskSetBit(limex->exceptionMask, state_id); - // set this index in the exception map - limex->exceptionMap[state_id] = ecount; - } + // for the state that can switch it on + // set this bit in the exception mask + maskSetBit(limex->exceptionMask, state_id); + ecount++; } @@ -1778,16 +1882,17 @@ struct Factory { } ue2::unordered_set exceptional; - findExceptionalTransitions(args, exceptional); + u32 shiftCount = findBestNumOfVarShifts(args); + assert(shiftCount); + u32 maxShift = findMaxVarShift(args, shiftCount); + findExceptionalTransitions(args, exceptional, maxShift); map > exceptionMap; vector exceptionReports; - buildExceptionMap(args, exceptional, exceptionMap, exceptionReports); + u32 exceptionCount = buildExceptionMap(args, exceptional, exceptionMap, + exceptionReports); - if (exceptionMap.size() > ~0U) { - DEBUG_PRINTF("too many exceptions!\n"); - return nullptr; - } + assert(exceptionCount <= args.num_states); // Build reach table and character mapping. vector reach; @@ -1842,7 +1947,7 @@ struct Factory { offset = ROUNDUP_CL(offset); const u32 exceptionsOffset = offset; - offset += sizeof(exception_t) * exceptionMap.size(); + offset += sizeof(exception_t) * exceptionCount; const u32 exceptionReportsOffset = offset; offset += sizeof(ReportID) * exceptionReports.size(); @@ -1874,6 +1979,7 @@ struct Factory { writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash, limex, acceptsOffset, acceptsEodOffset, squashOffset); + limex->shiftCount = shiftCount; writeShiftMasks(args, limex); // Determine the state required for our state vector. @@ -1907,8 +2013,6 @@ struct Factory { } static int score(const build_info &args) { - const NGHolder &h = args.h; - // LimEx NFAs are available in sizes from 32 to 512-bit. size_t num_states = args.num_states; @@ -1928,45 +2032,17 @@ struct Factory { sz = args.cc.grey.nfaForceSize; } - if (args.cc.grey.nfaForceShifts && - NFATraits::maxShift != args.cc.grey.nfaForceShifts) { - return -1; - } - if (sz != NFATraits::maxStates) { return -1; // fail, size not appropriate } // We are of the right size, calculate a score based on the number // of exceptions and the number of shifts used by this LimEx. - int score = LIMEX_INITIAL_SCORE; - if (NFATraits::maxShift != 0) { - score -= SHIFT_COST / 2; // first shift mask is cheap - score -= SHIFT_COST * (NFATraits::maxShift - 1); + int score; + u32 shiftCount = findBestNumOfVarShifts(args, &score); + if (shiftCount == 0) { + return -1; } - - NFAStateSet exceptionalStates(num_states); // outbound exc trans - - for (const auto &e : edges_range(h)) { - u32 from = args.state_ids.at(source(e, h)); - u32 to = args.state_ids.at(target(e, h)); - if (from == NO_STATE || to == NO_STATE) { - continue; - } - - if (isExceptionalTransition(h, e, args.state_ids, args.tugs)) { - exceptionalStates.set(from); - } - } - DEBUG_PRINTF("%zu exceptional states\n", exceptionalStates.count()); - score -= EXCEPTION_COST * exceptionalStates.count(); - - /* ensure that we always report a valid score if have the right number - * of states */ - if (score < 0) { - score = 0; - } - return score; } }; @@ -1985,50 +2061,19 @@ struct scoreNfa { } }; -#define MAKE_LIMEX_TRAITS(mlt_size, mlt_shift) \ - template<> struct NFATraits { \ - typedef LimExNFA##mlt_size implNFA_t; \ - typedef u_##mlt_size tableRow_t; \ - typedef NFAException##mlt_size exception_t; \ - static const size_t maxStates = mlt_size; \ - static const u32 maxShift = mlt_shift; \ - }; \ +#define MAKE_LIMEX_TRAITS(mlt_size) \ + template<> struct NFATraits { \ + typedef LimExNFA##mlt_size implNFA_t; \ + typedef u_##mlt_size tableRow_t; \ + typedef NFAException##mlt_size exception_t; \ + static const size_t maxStates = mlt_size; \ + }; -MAKE_LIMEX_TRAITS(32, 1) -MAKE_LIMEX_TRAITS(32, 2) -MAKE_LIMEX_TRAITS(32, 3) -MAKE_LIMEX_TRAITS(32, 4) -MAKE_LIMEX_TRAITS(32, 5) -MAKE_LIMEX_TRAITS(32, 6) -MAKE_LIMEX_TRAITS(32, 7) -MAKE_LIMEX_TRAITS(128, 1) -MAKE_LIMEX_TRAITS(128, 2) -MAKE_LIMEX_TRAITS(128, 3) -MAKE_LIMEX_TRAITS(128, 4) -MAKE_LIMEX_TRAITS(128, 5) -MAKE_LIMEX_TRAITS(128, 6) -MAKE_LIMEX_TRAITS(128, 7) -MAKE_LIMEX_TRAITS(256, 1) -MAKE_LIMEX_TRAITS(256, 2) -MAKE_LIMEX_TRAITS(256, 3) -MAKE_LIMEX_TRAITS(256, 4) -MAKE_LIMEX_TRAITS(256, 5) -MAKE_LIMEX_TRAITS(256, 6) -MAKE_LIMEX_TRAITS(256, 7) -MAKE_LIMEX_TRAITS(384, 1) -MAKE_LIMEX_TRAITS(384, 2) -MAKE_LIMEX_TRAITS(384, 3) -MAKE_LIMEX_TRAITS(384, 4) -MAKE_LIMEX_TRAITS(384, 5) -MAKE_LIMEX_TRAITS(384, 6) -MAKE_LIMEX_TRAITS(384, 7) -MAKE_LIMEX_TRAITS(512, 1) -MAKE_LIMEX_TRAITS(512, 2) -MAKE_LIMEX_TRAITS(512, 3) -MAKE_LIMEX_TRAITS(512, 4) -MAKE_LIMEX_TRAITS(512, 5) -MAKE_LIMEX_TRAITS(512, 6) -MAKE_LIMEX_TRAITS(512, 7) +MAKE_LIMEX_TRAITS(32) +MAKE_LIMEX_TRAITS(128) +MAKE_LIMEX_TRAITS(256) +MAKE_LIMEX_TRAITS(384) +MAKE_LIMEX_TRAITS(512) } // namespace @@ -2133,20 +2178,18 @@ aligned_unique_ptr generate(NGHolder &h, // Acceleration analysis. fillAccelInfo(arg); - typedef pair EngineScore; - vector scores; + vector> scores; if (hint != INVALID_NFA) { // The caller has told us what to (attempt to) build. - scores.push_back(make_pair(0, (NFAEngineType)hint)); + scores.emplace_back(0, (NFAEngineType)hint); } else { for (size_t i = 0; i <= LAST_LIMEX_NFA; i++) { NFAEngineType ntype = (NFAEngineType)i; - int score = DISPATCH_BY_LIMEX_TYPE(ntype, scoreNfa, arg); if (score >= 0) { DEBUG_PRINTF("%s scores %d\n", nfa_type_name(ntype), score); - scores.push_back(make_pair(score, ntype)); + scores.emplace_back(score, ntype); } } } @@ -2156,22 +2199,22 @@ aligned_unique_ptr generate(NGHolder &h, return nullptr; } - sort(scores.begin(), scores.end(), greater()); + // Sort acceptable models in priority order, lowest score first. + sort(scores.begin(), scores.end()); - aligned_unique_ptr nfa; - for (auto i = scores.begin(); !nfa && i != scores.end(); ++i) { - assert(i->first >= 0); - nfa = DISPATCH_BY_LIMEX_TYPE(i->second, generateNfa, arg); + for (const auto &elem : scores) { + assert(elem.first >= 0); + NFAEngineType limex_model = elem.second; + auto nfa = DISPATCH_BY_LIMEX_TYPE(limex_model, generateNfa, arg); + if (nfa) { + DEBUG_PRINTF("successful build with NFA engine: %s\n", + nfa_type_name(limex_model)); + return nfa; + } } - if (!nfa) { - DEBUG_PRINTF("NFA build failed.\n"); - return nullptr; - } - - DEBUG_PRINTF("successful build with NFA engine: %s\n", - nfa_type_name((NFAEngineType)nfa->type)); - return nfa; + DEBUG_PRINTF("NFA build failed.\n"); + return nullptr; } u32 countAccelStates(NGHolder &h, diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp index 084f35dd..c52adc46 100644 --- a/src/nfa/limex_dump.cpp +++ b/src/nfa/limex_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -80,6 +80,23 @@ void dumpMask(FILE *f, const char *name, const u8 *mask, u32 mask_bits) { fprintf(f, "MSK %-20s %s\n", name, dumpMask(mask, mask_bits).c_str()); } +template +static +u32 rank_in_mask(mask_t mask, u32 bit) { + assert(bit < 8 * sizeof(mask)); + + u32 chunks[sizeof(mask)/sizeof(u32)]; + memcpy(chunks, &mask, sizeof(mask)); + u32 base_rank = 0; + for (u32 i = 0; i < bit / 32; i++) { + base_rank += popcount32(chunks[i]); + } + u32 chunk = chunks[bit / 32]; + u32 local_bit = bit % 32; + assert(chunk & (1U << local_bit)); + return base_rank + popcount32(chunk & ((1U << local_bit) - 1)); +} + template static void dumpRepeats(const limex_type *limex, u32 model_size, FILE *f) { @@ -244,6 +261,16 @@ void dumpLimexExceptions(const limex_type *limex, FILE *f) { } } +template +static +void dumpLimexShifts(const limex_type *limex, FILE *f) { + u32 size = limex_traits::size; + fprintf(f, "Shift Masks:\n"); + for(u32 i = 0; i < limex->shiftCount; i++) { + fprintf(f, "\t Shift %u(%hhu)\t\tMask: %s\n", i, limex->shiftAmount[i], + dumpMask((const u8 *)&limex->shift[i], size).c_str()); + } +} template static void dumpLimexText(const limex_type *limex, FILE *f) { @@ -270,6 +297,9 @@ void dumpLimexText(const limex_type *limex, FILE *f) { topMask += size / 8; } + // Dump shift masks + dumpLimexShifts(limex, f); + dumpSquash(limex, f); dumpLimexReachMap(limex->reachMap, f); @@ -325,7 +355,7 @@ struct limex_labeller : public nfa_labeller { return; } - u32 ex_index = limex->exceptionMap[state]; + u32 ex_index = rank_in_mask(limex->exceptionMask, state); const typename limex_traits::exception_type *e = &exceptions[ex_index]; @@ -396,7 +426,7 @@ void dumpExDotInfo(const limex_type *limex, u32 state, FILE *f) { const typename limex_traits::exception_type *exceptions = getExceptionTable(limex); - u32 ex_index = limex->exceptionMap[state]; + u32 ex_index = rank_in_mask(limex->exceptionMask, state); const typename limex_traits::exception_type *e = &exceptions[ex_index]; @@ -420,78 +450,45 @@ void dumpExDotInfo(const limex_type *limex, u32 state, FILE *f) { template static void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) { - for (u32 j = 0; j < MAX_MAX_SHIFT; j++) { + for (u32 j = 0; j < limex->shiftCount; j++) { + const u32 shift_amount = limex->shiftAmount[j]; if (testbit((const u8 *)&limex->shift[j], limex_traits::size, state)) { - fprintf(f, "%u -> %u;\n", state, state + j); + fprintf(f, "%u -> %u;\n", state, state + shift_amount); } } } -#define DUMP_TEXT_FN(ddf_u, ddf_n, ddf_s) \ - void nfaExecLimEx##ddf_n##_##ddf_s##_dumpText(const NFA *nfa, FILE *f) { \ +#define DUMP_TEXT_FN(ddf_n) \ + void nfaExecLimEx##ddf_n##_dumpText(const NFA *nfa, FILE *f) { \ dumpLimexText((const LimExNFA##ddf_n *)getImplNfa(nfa), f); \ } -#define DUMP_DOT_FN(ddf_u, ddf_n, ddf_s) \ - void nfaExecLimEx##ddf_n##_##ddf_s##_dumpDot(const NFA *nfa, FILE *f) { \ +#define DUMP_DOT_FN(ddf_n) \ + void nfaExecLimEx##ddf_n##_dumpDot(const NFA *nfa, FILE *f, \ + UNUSED const string &base) { \ const LimExNFA##ddf_n *limex = \ (const LimExNFA##ddf_n *)getImplNfa(nfa); \ \ dumpDotPreamble(f); \ - u32 state_count = nfa->nPositions; \ + u32 state_count = nfa->nPositions; \ dumpVertexDotInfo(limex, state_count, f, \ limex_labeller(limex)); \ for (u32 i = 0; i < state_count; i++) { \ dumpLimDotInfo(limex, i, f); \ dumpExDotInfo(limex, i, f); \ } \ - \ dumpDotTrailer(f); \ } -#define LIMEX_DUMP_FNS(ntype, size, shifts) \ - DUMP_TEXT_FN(ntype, size, shifts) \ - DUMP_DOT_FN(ntype, size, shifts) +#define LIMEX_DUMP_FNS(size) \ + DUMP_TEXT_FN(size) \ + DUMP_DOT_FN(size) -LIMEX_DUMP_FNS(u32, 32, 1) -LIMEX_DUMP_FNS(u32, 32, 2) -LIMEX_DUMP_FNS(u32, 32, 3) -LIMEX_DUMP_FNS(u32, 32, 4) -LIMEX_DUMP_FNS(u32, 32, 5) -LIMEX_DUMP_FNS(u32, 32, 6) -LIMEX_DUMP_FNS(u32, 32, 7) - -LIMEX_DUMP_FNS(m128, 128, 1) -LIMEX_DUMP_FNS(m128, 128, 2) -LIMEX_DUMP_FNS(m128, 128, 3) -LIMEX_DUMP_FNS(m128, 128, 4) -LIMEX_DUMP_FNS(m128, 128, 5) -LIMEX_DUMP_FNS(m128, 128, 6) -LIMEX_DUMP_FNS(m128, 128, 7) - -LIMEX_DUMP_FNS(m256, 256, 1) -LIMEX_DUMP_FNS(m256, 256, 2) -LIMEX_DUMP_FNS(m256, 256, 3) -LIMEX_DUMP_FNS(m256, 256, 4) -LIMEX_DUMP_FNS(m256, 256, 5) -LIMEX_DUMP_FNS(m256, 256, 6) -LIMEX_DUMP_FNS(m256, 256, 7) - -LIMEX_DUMP_FNS(m384, 384, 1) -LIMEX_DUMP_FNS(m384, 384, 2) -LIMEX_DUMP_FNS(m384, 384, 3) -LIMEX_DUMP_FNS(m384, 384, 4) -LIMEX_DUMP_FNS(m384, 384, 5) -LIMEX_DUMP_FNS(m384, 384, 6) -LIMEX_DUMP_FNS(m384, 384, 7) - -LIMEX_DUMP_FNS(m512, 512, 1) -LIMEX_DUMP_FNS(m512, 512, 2) -LIMEX_DUMP_FNS(m512, 512, 3) -LIMEX_DUMP_FNS(m512, 512, 4) -LIMEX_DUMP_FNS(m512, 512, 5) -LIMEX_DUMP_FNS(m512, 512, 6) -LIMEX_DUMP_FNS(m512, 512, 7) +LIMEX_DUMP_FNS(32) +LIMEX_DUMP_FNS(128) +LIMEX_DUMP_FNS(256) +LIMEX_DUMP_FNS(384) +LIMEX_DUMP_FNS(512) } // namespace ue2 diff --git a/src/nfa/limex_exceptional.h b/src/nfa/limex_exceptional.h index 26c5e5a5..175ca393 100644 --- a/src/nfa/limex_exceptional.h +++ b/src/nfa/limex_exceptional.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -79,9 +79,13 @@ #ifdef ARCH_64_BIT #define CHUNK_T u64a #define FIND_AND_CLEAR_FN findAndClearLSB_64 +#define POPCOUNT_FN popcount64 +#define RANK_IN_MASK_FN rank_in_mask64 #else #define CHUNK_T u32 #define FIND_AND_CLEAR_FN findAndClearLSB_32 +#define POPCOUNT_FN popcount32 +#define RANK_IN_MASK_FN rank_in_mask32 #endif /** \brief Process a single exception. Returns 1 if exception handling should @@ -206,13 +210,13 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, #ifndef RUN_EXCEPTION_FN_ONLY -/** \brief Process all of the exceptions associated with the states in the \a estate. */ +/** \brief Process all of the exceptions associated with the states in the \a + * estate. */ static really_inline int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, - const struct IMPL_NFA_T *limex, - const u32 *exceptionMap, const EXCEPTION_T *exceptions, - const ReportID *exReports, - u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) { + const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, + const ReportID *exReports, u64a offset, struct CONTEXT_T *ctx, + char in_rev, char flags) { assert(diffmask > 0); // guaranteed by caller macro if (EQ_STATE(estate, LOAD_STATE(&ctx->cached_estate))) { @@ -237,15 +241,23 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, // A copy of the estate as an array of GPR-sized chunks. CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; + CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; #ifdef ESTATE_ON_STACK memcpy(chunks, &estate, sizeof(STATE_T)); #else memcpy(chunks, estatep, sizeof(STATE_T)); #endif + memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T)); struct proto_cache new_cache = {0, NULL}; enum CacheResult cacheable = CACHE_RESULT; + u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; + base_index[0] = 0; + for (u32 i = 0; i < ARRAY_LENGTH(base_index) - 1; i++) { + base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]); + } + do { u32 t = findAndClearLSB_32(&diffmask); #ifdef ARCH_64_BIT @@ -254,10 +266,10 @@ int PE_FN(STATE_ARG, ESTATE_ARG, u32 diffmask, STATE_T *succ, assert(t < ARRAY_LENGTH(chunks)); CHUNK_T word = chunks[t]; assert(word != 0); - u32 base = t * sizeof(CHUNK_T) * 8; do { - u32 bit = FIND_AND_CLEAR_FN(&word) + base; - u32 idx = exceptionMap[bit]; + u32 bit = FIND_AND_CLEAR_FN(&word); + u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit); + u32 idx = local_index + base_index[t]; const EXCEPTION_T *e = &exceptions[idx]; if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ, diff --git a/src/nfa/limex_internal.h b/src/nfa/limex_internal.h index adae6ab7..c37f5f40 100644 --- a/src/nfa/limex_internal.h +++ b/src/nfa/limex_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -68,6 +68,9 @@ The value of NFA.stateSize gives the total state size in bytes (the sum of all the above). + Number of shifts should be always greater or equal to 1 + Number of shifts 0 means that no appropriate NFA engine was found. + */ #ifndef LIMEX_INTERNAL_H @@ -77,7 +80,8 @@ #include "repeat_internal.h" // Constants -#define MAX_MAX_SHIFT 8 /**< largest maxshift used by a LimEx NFA */ +#define MAX_SHIFT_COUNT 8 /**< largest number of shifts used by a LimEx NFA */ +#define MAX_SHIFT_AMOUNT 16 /**< largest shift amount used by a LimEx NFA */ #define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */ #define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */ @@ -95,24 +99,6 @@ enum LimExSquash { LIMEX_SQUASH_REPORT = 3 //!< squash when report is raised }; -struct LimExNFABase { - u8 reachMap[N_CHARS]; - u32 reachSize; - u32 accelCount; - u32 accelTableOffset; - u32 accelAuxCount; - u32 accelAuxOffset; - u32 acceptCount; - u32 acceptOffset; - u32 acceptEodCount; - u32 acceptEodOffset; - u32 exceptionCount; - u32 exceptionOffset; - u32 exReportOffset; - u32 repeatCount; - u32 repeatOffset; -}; - /* uniform looking types for the macros */ typedef u8 u_8; typedef u16 u_16; @@ -133,7 +119,7 @@ struct NFAException##size { \ u8 trigger; /**< from enum LimExTrigger */ \ }; \ \ -struct LimExNFA##size { /* MUST align with LimExNFABase */ \ +struct LimExNFA##size { \ u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */ \ u32 reachSize; /**< number of reach masks */ \ u32 accelCount; /**< number of entries in accel table */ \ @@ -149,7 +135,6 @@ struct LimExNFA##size { /* MUST align with LimExNFABase */ \ u32 exReportOffset; /* rel. to start of LimExNFA */ \ u32 repeatCount; \ u32 repeatOffset; \ - u32 exceptionMap[size]; \ u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \ u32 squashCount; \ u32 topCount; \ @@ -168,8 +153,10 @@ struct LimExNFA##size { /* MUST align with LimExNFABase */ \ u_##size compressMask; /**< switch off before compress */ \ u_##size exceptionMask; \ u_##size repeatCyclicMask; \ - u_##size shift[MAX_MAX_SHIFT]; \ u_##size zombieMask; /**< zombie if in any of the set states */ \ + u_##size shift[MAX_SHIFT_COUNT]; \ + u32 shiftCount; /**< number of shift masks used */ \ + u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \ }; CREATE_NFA_LIMEX(32) diff --git a/src/nfa/limex_native.c b/src/nfa/limex_native.c index 471e4bf0..8a0a8acd 100644 --- a/src/nfa/limex_native.c +++ b/src/nfa/limex_native.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -74,7 +74,6 @@ static really_inline int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, const struct LimExNFA32 *limex, - const u32 *exceptionMap, const struct NFAException32 *exceptions, const ReportID *exReports, u64a offset, struct NFAContext32 *ctx, char in_rev, char flags) { @@ -104,7 +103,7 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, do { u32 bit = findAndClearLSB_32(&estate); - u32 idx = exceptionMap[bit]; + u32 idx = rank_in_mask32(limex->exceptionMask, bit); const struct NFAException32 *e = &exceptions[idx]; if (!runException32(e, s, succ, &local_succ, limex, exReports, offset, ctx, &new_cache, &cacheable, in_rev, flags)) { @@ -132,35 +131,4 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, #define SIZE 32 #define STATE_T u32 -#define SHIFT 1 -#include "limex_runtime_impl.h" - -#define SIZE 32 -#define STATE_T u32 -#define SHIFT 2 -#include "limex_runtime_impl.h" - -#define SIZE 32 -#define STATE_T u32 -#define SHIFT 3 -#include "limex_runtime_impl.h" - -#define SIZE 32 -#define STATE_T u32 -#define SHIFT 4 -#include "limex_runtime_impl.h" - -#define SIZE 32 -#define STATE_T u32 -#define SHIFT 5 -#include "limex_runtime_impl.h" - -#define SIZE 32 -#define STATE_T u32 -#define SHIFT 6 -#include "limex_runtime_impl.h" - -#define SIZE 32 -#define STATE_T u32 -#define SHIFT 7 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_runtime.h b/src/nfa/limex_runtime.h index 4e111aa6..e0c182fc 100644 --- a/src/nfa/limex_runtime.h +++ b/src/nfa/limex_runtime.h @@ -73,34 +73,35 @@ struct proto_cache { }; // Shift macros for Limited NFAs. Defined in terms of uniform ops. +// LimExNFAxxx ptr in 'limex' and the current state in 's' #define NFA_EXEC_LIM_SHIFT(nels_type, nels_i) \ - (JOIN(shift_, nels_type)( \ + (JOIN(lshift_, nels_type)( \ JOIN(and_, nels_type)(s, \ JOIN(load_, nels_type)(&limex->shift[nels_i])), \ - nels_i)) + limex->shiftAmount[nels_i])) -// Calculate the (limited model) successors for a given max shift. Assumes -// LimExNFAxxx ptr in 'l', current state in 's' and successors in 'succ'. +// Calculate the (limited model) successors for a number of variable shifts. +// Assumes current state in 's' and successors in 'succ'. -#define NFA_EXEC_GET_LIM_SUCC(gls_type, gls_shift) \ +#define NFA_EXEC_GET_LIM_SUCC(gls_type) \ do { \ - succ = \ - JOIN(and_, gls_type)(s, JOIN(load_, gls_type)(&limex->shift[0])); \ - switch (gls_shift) { \ - case 7: \ + succ = NFA_EXEC_LIM_SHIFT(gls_type, 0); \ + switch (limex->shiftCount) { \ + case 8: \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 7)); \ - case 6: \ + case 7: \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 6)); \ - case 5: \ + case 6: \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 5)); \ - case 4: \ + case 5: \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 4)); \ - case 3: \ + case 4: \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 3)); \ - case 2: \ + case 3: \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 2)); \ - case 1: \ + case 2: \ succ = JOIN(or_, gls_type)(succ, NFA_EXEC_LIM_SHIFT(gls_type, 1)); \ + case 1: \ case 0: \ ; \ } \ @@ -129,7 +130,7 @@ int limexRunReports(const ReportID *reports, NfaCallback callback, for (; *reports != MO_INVALID_IDX; ++reports) { DEBUG_PRINTF("firing report for id %u at offset %llu\n", *reports, offset); - int rv = callback(offset, *reports, context); + int rv = callback(0, offset, *reports, context); if (rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 676ed370..881e41fd 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -37,11 +37,11 @@ * Version 2.0: now with X-Macros, so you get line numbers in your debugger. */ -#if !defined(SIZE) || !defined(STATE_T) || !defined(SHIFT) -# error Must define SIZE and STATE_T and SHIFT in includer. +#if !defined(SIZE) || !defined(STATE_T) +# error Must define SIZE and STATE_T in includer. #endif -#define LIMEX_API_ROOT JOIN(JOIN(JOIN(nfaExecLimEx, SIZE), _), SHIFT) +#define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE) #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) @@ -73,6 +73,7 @@ #define ANDNOT_STATE JOIN(andnot_, STATE_T) #define OR_STATE JOIN(or_, STATE_T) #define TESTBIT_STATE JOIN(testbit_, STATE_T) +#define CLEARBIT_STATE JOIN(clearbit_, STATE_T) #define ZERO_STATE JOIN(zero_, STATE_T) #define ISNONZERO_STATE JOIN(isNonZero_, STATE_T) #define ISZERO_STATE JOIN(isZero_, STATE_T) @@ -104,8 +105,8 @@ // continue, 1 if an accept was fired and the user instructed us to halt. static really_inline char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, - const ReportID *exReports, const u32 *exceptionMap, - STATE_T s, const STATE_T emask, size_t i, u64a offset, + const ReportID *exReports, STATE_T s, + const STATE_T emask, size_t i, u64a offset, STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx, const char flags, const char in_rev, const char first_match) { @@ -132,8 +133,8 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags; int rv = JOIN(processExceptional, SIZE)( - pass_state, pass_estate, diffmask, succ, limex, exceptionMap, - exceptions, exReports, callback_offset, ctx, in_rev, localflags); + pass_state, pass_estate, diffmask, succ, limex, exceptions, exReports, + callback_offset, ctx, in_rev, localflags); if (rv == PE_RV_HALT) { return 1; // Halt matching. } @@ -175,7 +176,6 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, (const union AccelAux *)((const char *)limex + limex->accelAuxOffset); const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); const ReportID *exReports = getExReports(limex); - const u32 *exceptionMap = limex->exceptionMap; STATE_T s = LOAD_STATE(&ctx->s); /* assert(ISALIGNED_16(exceptions)); */ @@ -201,11 +201,11 @@ without_accel: u8 c = input[i]; STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT); + NFA_EXEC_GET_LIM_SUCC(STATE_T); - if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s, - EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, - flags, 0, first_match)) { + if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, + i, offset, &succ, final_loc, ctx, flags, 0, + first_match)) { return MO_HALT_MATCHING; } @@ -252,11 +252,11 @@ with_accel: u8 c = input[i]; STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT); + NFA_EXEC_GET_LIM_SUCC(STATE_T); - if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s, - EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, - flags, 0, first_match)) { + if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, + i, offset, &succ, final_loc, ctx, flags, 0, + first_match)) { return MO_HALT_MATCHING; } @@ -300,7 +300,6 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, #endif const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); const ReportID *exReports = getExReports(limex); - const u32 *exceptionMap = limex->exceptionMap; STATE_T s = LOAD_STATE(&ctx->s); /* assert(ISALIGNED_16(exceptions)); */ @@ -318,9 +317,9 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, u8 c = input[i-1]; STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(STATE_T, SHIFT); + NFA_EXEC_GET_LIM_SUCC(STATE_T); - if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, exceptionMap, s, + if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, flags, 1, 0)) { return MO_HALT_MATCHING; @@ -349,36 +348,57 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, } static really_inline -void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, +void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, u64a offset) { if (!limex->repeatCount) { return; } - // Note: we compress all repeats, as they may have *just* had their - // cyclic states switched off a moment ago. TODO: is this required + STATE_T s = LOAD_STATE(src); + + if (ISZERO_STATE(AND_STATE(s, LOAD_STATE(&limex->repeatCyclicMask)))) { + DEBUG_PRINTF("no cyclics are on\n"); + return; + } const union RepeatControl *ctrl = getRepeatControlBaseConst((const char *)src, sizeof(STATE_T)); char *state_base = (char *)dest + limex->stateSize; for (u32 i = 0; i < limex->repeatCount; i++) { + DEBUG_PRINTF("repeat %u\n", i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); + + if (!TESTBIT_STATE(&s, info->cyclicState)) { + DEBUG_PRINTF("is dead\n"); + continue; + } + const struct RepeatInfo *repeat = getRepeatInfo(info); + if (repeatHasMatch(repeat, &ctrl[i], state_base + info->stateOffset, + offset) == REPEAT_STALE) { + DEBUG_PRINTF("is stale, clearing state\n"); + CLEARBIT_STATE(&s, info->cyclicState); + continue; + } + + DEBUG_PRINTF("packing state (packedCtrlOffset=%u)\n", + info->packedCtrlOffset); repeatPack(state_base + info->packedCtrlOffset, repeat, &ctrl[i], offset); } + + STORE_STATE(src, s); } char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n, - const struct mq *q, - s64a loc) { + const struct mq *q, s64a loc) { void *dest = q->streamState; - const void *src = q->state; + void *src = q->state; u8 key = queue_prev_byte(q, loc); const IMPL_NFA_T *limex = getImplNfa(n); - COMPRESS_FN(limex, dest, src, key); COMPRESS_REPEATS_FN(limex, dest, src, q->offset + loc); + COMPRESS_FN(limex, dest, src, key); return 0; } @@ -389,15 +409,29 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, return; } - // Note: we expand all repeats, as they may have *just* had their - // cyclic states switched off a moment ago. TODO: is this required? + // Note: state has already been expanded into 'dest'. + const STATE_T cyclics = + AND_STATE(LOAD_STATE(dest), LOAD_STATE(&limex->repeatCyclicMask)); + if (ISZERO_STATE(cyclics)) { + DEBUG_PRINTF("no cyclics are on\n"); + return; + } union RepeatControl *ctrl = getRepeatControlBase((char *)dest, sizeof(STATE_T)); const char *state_base = (const char *)src + limex->stateSize; for (u32 i = 0; i < limex->repeatCount; i++) { + DEBUG_PRINTF("repeat %u\n", i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); + + if (!TESTBIT_STATE(&cyclics, info->cyclicState)) { + DEBUG_PRINTF("is dead\n"); + continue; + } + + DEBUG_PRINTF("unpacking state (packedCtrlOffset=%u)\n", + info->packedCtrlOffset); const struct RepeatInfo *repeat = getRepeatInfo(info); repeatUnpack(state_base + info->packedCtrlOffset, repeat, offset, &ctrl[i]); @@ -650,7 +684,27 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { ep = MIN(ep, end_abs); assert(ep >= sp); - assert(sp >= offset); // We no longer do history buffer scans here. + if (sp < offset) { + DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); + assert(offset - sp <= q->hlength); + u64a local_ep = MIN(offset, ep); + u64a final_look = 0; + /* we are starting inside the history buffer */ + if (STREAMFIRST_FN(limex, q->history + q->hlength + sp - offset, + local_ep - sp, &ctx, sp, + &final_look) == MO_HALT_MATCHING) { + DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu " + "offset:%llu\n", final_look, sp, end_abs, offset); + assert(q->cur); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = sp + final_look - offset; + STORE_STATE(q->state, LOAD_STATE(&ctx.s)); + return MO_MATCHES_PENDING; + } + + sp = local_ep; + } if (sp >= ep) { goto scan_done; @@ -789,10 +843,8 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, } char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, - UNUSED SomNfaCallback som_callback, - void *context) { + const char *streamState, u64a offset, + NfaCallback callback, void *context) { assert(n && state); const IMPL_NFA_T *limex = getImplNfa(n); @@ -868,6 +920,21 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa, offset, report); } +char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { + assert(nfa && q); + assert(q->state && q->streamState); + + const IMPL_NFA_T *limex = getImplNfa(nfa); + union RepeatControl *repeat_ctrl = + getRepeatControlBase(q->state, sizeof(STATE_T)); + char *repeat_state = q->streamState + limex->stateSize; + STATE_T state = LOAD_STATE(q->state); + u64a offset = q->offset + q_last_loc(q) + 1; + + return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, + offset); +} + enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( const struct NFA *nfa, struct mq *q, @@ -920,6 +987,7 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( #undef ANDNOT_STATE #undef OR_STATE #undef TESTBIT_STATE +#undef CLEARBIT_STATE #undef ZERO_STATE #undef ISNONZERO_STATE #undef ISZERO_STATE @@ -935,5 +1003,4 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( // Parameters. #undef SIZE #undef STATE_T -#undef SHIFT #undef LIMEX_API_ROOT diff --git a/src/util/shuffle.h b/src/nfa/limex_shuffle.h similarity index 74% rename from src/util/shuffle.h rename to src/nfa/limex_shuffle.h index ba85fb5d..e45e4331 100644 --- a/src/util/shuffle.h +++ b/src/nfa/limex_shuffle.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,20 +34,19 @@ * be faster and actually correct if these assumptions don't hold true. */ -#ifndef SHUFFLE_H -#define SHUFFLE_H +#ifndef LIMEX_SHUFFLE_H +#define LIMEX_SHUFFLE_H -#include "config.h" -#include "bitutils.h" -#include "simd_utils.h" #include "ue2common.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" #if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) #define HAVE_PEXT #endif static really_inline -u32 shuffleDynamic32(u32 x, u32 mask) { +u32 packedExtract32(u32 x, u32 mask) { #if defined(HAVE_PEXT) // Intel BMI2 can do this operation in one instruction. return _pext_u32(x, mask); @@ -67,7 +66,7 @@ u32 shuffleDynamic32(u32 x, u32 mask) { } static really_inline -u32 shuffleDynamic64(u64a x, u64a mask) { +u32 packedExtract64(u64a x, u64a mask) { #if defined(HAVE_PEXT) && defined(ARCH_64_BIT) // Intel BMI2 can do this operation in one instruction. return _pext_u64(x, mask); @@ -88,4 +87,24 @@ u32 shuffleDynamic64(u64a x, u64a mask) { #undef HAVE_PEXT -#endif // SHUFFLE_H +static really_inline +u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { + m128 shuffled = pshufb(s, permute); + m128 compared = and128(shuffled, compare); + u16 rv = ~movemask128(eq128(compared, shuffled)); + return (u32)rv; +} + +#if defined(__AVX2__) +static really_inline +u32 packedExtract256(m256 s, const m256 permute, const m256 compare) { + // vpshufb doesn't cross lanes, so this is a bit of a cheat + m256 shuffled = vpshufb(s, permute); + m256 compared = and256(shuffled, compare); + u32 rv = ~movemask256(eq256(compared, shuffled)); + // stitch the lane-wise results back together + return (u32)((rv >> 16) | (rv & 0xffffU)); +} +#endif // AVX2 + +#endif // LIMEX_SHUFFLE_H diff --git a/src/nfa/limex_simd128.c b/src/nfa/limex_simd128.c index 781c7972..f0fb1dd4 100644 --- a/src/nfa/limex_simd128.c +++ b/src/nfa/limex_simd128.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -61,37 +61,6 @@ #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 128 -#define STATE_T m128 -#define SHIFT 1 -#include "limex_runtime_impl.h" - -#define SIZE 128 -#define STATE_T m128 -#define SHIFT 2 -#include "limex_runtime_impl.h" - -#define SIZE 128 -#define STATE_T m128 -#define SHIFT 3 -#include "limex_runtime_impl.h" - -#define SIZE 128 -#define STATE_T m128 -#define SHIFT 4 -#include "limex_runtime_impl.h" - -#define SIZE 128 -#define STATE_T m128 -#define SHIFT 5 -#include "limex_runtime_impl.h" - -#define SIZE 128 -#define STATE_T m128 -#define SHIFT 6 -#include "limex_runtime_impl.h" - -#define SIZE 128 -#define STATE_T m128 -#define SHIFT 7 +#define SIZE 128 +#define STATE_T m128 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_simd256.c b/src/nfa/limex_simd256.c index b4df1459..57648b69 100644 --- a/src/nfa/limex_simd256.c +++ b/src/nfa/limex_simd256.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,37 +58,6 @@ #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 256 -#define STATE_T m256 -#define SHIFT 1 -#include "limex_runtime_impl.h" - -#define SIZE 256 -#define STATE_T m256 -#define SHIFT 2 -#include "limex_runtime_impl.h" - -#define SIZE 256 -#define STATE_T m256 -#define SHIFT 3 -#include "limex_runtime_impl.h" - -#define SIZE 256 -#define STATE_T m256 -#define SHIFT 4 -#include "limex_runtime_impl.h" - -#define SIZE 256 -#define STATE_T m256 -#define SHIFT 5 -#include "limex_runtime_impl.h" - -#define SIZE 256 -#define STATE_T m256 -#define SHIFT 6 -#include "limex_runtime_impl.h" - -#define SIZE 256 -#define STATE_T m256 -#define SHIFT 7 +#define SIZE 256 +#define STATE_T m256 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_simd384.c b/src/nfa/limex_simd384.c index 4b4b44bb..84061f61 100644 --- a/src/nfa/limex_simd384.c +++ b/src/nfa/limex_simd384.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,37 +58,6 @@ #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 384 -#define STATE_T m384 -#define SHIFT 1 -#include "limex_runtime_impl.h" - -#define SIZE 384 -#define STATE_T m384 -#define SHIFT 2 -#include "limex_runtime_impl.h" - -#define SIZE 384 -#define STATE_T m384 -#define SHIFT 3 -#include "limex_runtime_impl.h" - -#define SIZE 384 -#define STATE_T m384 -#define SHIFT 4 -#include "limex_runtime_impl.h" - -#define SIZE 384 -#define STATE_T m384 -#define SHIFT 5 -#include "limex_runtime_impl.h" - -#define SIZE 384 -#define STATE_T m384 -#define SHIFT 6 -#include "limex_runtime_impl.h" - -#define SIZE 384 -#define STATE_T m384 -#define SHIFT 7 +#define SIZE 384 +#define STATE_T m384 #include "limex_runtime_impl.h" diff --git a/src/nfa/limex_simd512b.c b/src/nfa/limex_simd512.c similarity index 88% rename from src/nfa/limex_simd512b.c rename to src/nfa/limex_simd512.c index a3b705df..a6646d83 100644 --- a/src/nfa/limex_simd512b.c +++ b/src/nfa/limex_simd512.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,12 +58,6 @@ #define INLINE_ATTR really_inline #include "limex_common_impl.h" -#define SIZE 512 -#define STATE_T m512 -#define SHIFT 4 -#include "limex_runtime_impl.h" - -#define SIZE 512 -#define STATE_T m512 -#define SHIFT 5 +#define SIZE 512 +#define STATE_T m512 #include "limex_runtime_impl.h" diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index 314e88e7..88da27c0 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -42,13 +42,13 @@ static really_inline char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, - u16 s, u64a loc, char eod, u16 * const cached_accept_state, - u32 * const cached_accept_id) { + u16 s, u64a loc, char eod, u16 *const cached_accept_state, + u32 *const cached_accept_id) { DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n", (u16)(s & STATE_MASK), loc, eod); if (!eod && s == *cached_accept_state) { - if (cb(loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; /* termination requested */ } @@ -71,7 +71,7 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, *cached_accept_id = rl->report[0]; DEBUG_PRINTF("reporting %u\n", rl->report[0]); - if (cb(loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { + if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; /* termination requested */ } @@ -80,7 +80,7 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, for (u32 i = 0; i < count; i++) { DEBUG_PRINTF("reporting %u\n", rl->report[i]); - if (cb(loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { + if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; /* termination requested */ } } @@ -146,7 +146,7 @@ without_accel: if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; /* termination requested */ } } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, @@ -186,7 +186,7 @@ with_accel: if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; /* termination requested */ } } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, @@ -328,7 +328,7 @@ without_accel: u64a loc = (c - 1) - buf + offAdj + 1; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } } else if (doComplexReport(cb, ctxt, m, s, loc, 0, @@ -360,7 +360,7 @@ with_accel: u64a loc = (c - 1) - buf + offAdj + 1; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } } else if (doComplexReport(cb, ctxt, m, s, loc, 0, @@ -475,7 +475,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, int rv; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - rv = cb(q_cur_offset(q), m->arb_report, context); + rv = cb(0, q_cur_offset(q), m->arb_report, context); } else { u32 cached_accept_id = 0; u16 cached_accept_state = 0; @@ -632,7 +632,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, int rv; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - rv = cb(q_cur_offset(q), m->arb_report, context); + rv = cb(0, q_cur_offset(q), m->arb_report, context); } else { u32 cached_accept_id = 0; u16 cached_accept_state = 0; @@ -836,7 +836,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) { if (s >= m->accept_limit_8) { if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - cb(offset, m->arb_report, ctxt); + cb(0, offset, m->arb_report, ctxt); } else { u32 cached_accept_id = 0; u16 cached_accept_state = 0; @@ -850,7 +850,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) { } char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); NfaCallback cb = q->cb; void *ctxt = q->context; u16 s = *(u16 *)q->state; @@ -864,7 +864,7 @@ char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) { if (aux->accept) { if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - cb(offset, m->arb_report, ctxt); + cb(0, offset, m->arb_report, ctxt); } else { u32 cached_accept_id = 0; u16 cached_accept_state = 0; @@ -905,7 +905,7 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, struct mq *q) { assert(n && q); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); u8 s = *(u8 *)q->state; DEBUG_PRINTF("checking accepts for %hhu\n", s); if (s < m->accept_limit_8) { @@ -915,25 +915,45 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, return mcclellanHasAccept(m, get_aux(m, s), report); } +char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcclellan *m = getImplNfa(n); + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + assert(s < m->accept_limit_8 || get_aux(m, s)->accept); + + return s >= m->accept_limit_8; +} char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, struct mq *q) { assert(n && q); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); u16 s = *(u16 *)q->state; DEBUG_PRINTF("checking accepts for %hu\n", s); return mcclellanHasAccept(m, get_aux(m, s), report); } +char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcclellan *m = getImplNfa(n); + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + + return !!get_aux(m, s)->accept; +} + char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; NfaCallback cb = q->cb; void *context = q->context; assert(n->type == MCCLELLAN_NFA_8); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); const u8 *hend = q->history + q->hlength; return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, @@ -947,7 +967,7 @@ char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end) { NfaCallback cb = q->cb; void *context = q->context; assert(n->type == MCCLELLAN_NFA_16); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); const u8 *hend = q->history + q->hlength; return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, @@ -961,7 +981,7 @@ char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report) { NfaCallback cb = q->cb; void *context = q->context; assert(n->type == MCCLELLAN_NFA_8); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); const u8 *hend = q->history + q->hlength; char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, @@ -980,7 +1000,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) { NfaCallback cb = q->cb; void *context = q->context; assert(n->type == MCCLELLAN_NFA_16); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); const u8 *hend = q->history + q->hlength; char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, @@ -996,7 +1016,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) { char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset, void *state, UNUSED u8 key) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); + const struct mcclellan *m = getImplNfa(nfa); u8 s = offset ? m->start_floating : m->start_anchored; if (s) { *(u8 *)state = s; @@ -1007,7 +1027,7 @@ char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset, char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, void *state, UNUSED u8 key) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); + const struct mcclellan *m = getImplNfa(nfa); u16 s = offset ? m->start_floating : m->start_anchored; if (s) { unaligned_store_u16(state, s); @@ -1019,7 +1039,7 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state, const u8 *buf, char top, size_t start_off, size_t len, NfaCallback cb, void *ctxt) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); + const struct mcclellan *m = getImplNfa(nfa); u8 s = top ? m->start_anchored : *(u8 *)state; @@ -1037,7 +1057,7 @@ void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state, void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, const u8 *buf, char top, size_t start_off, size_t len, NfaCallback cb, void *ctxt) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); + const struct mcclellan *m = getImplNfa(nfa); u16 s = top ? m->start_anchored : unaligned_load_u16(state); @@ -1053,17 +1073,15 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, } char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, - u64a offset, NfaCallback callback, - UNUSED SomNfaCallback som_cb, void *context) { + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { return mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback, context); } char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, - u64a offset, NfaCallback callback, - UNUSED SomNfaCallback som_cb, void *context) { + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { assert(ISALIGNED_N(state, 2)); return mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback, context); diff --git a/src/nfa/mcclellan.h b/src/nfa/mcclellan.h index 6b4ec2d5..9c6b3eec 100644 --- a/src/nfa/mcclellan.h +++ b/src/nfa/mcclellan.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,14 +39,14 @@ struct NFA; char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state, const char *streamState, u64a offset, - NfaCallback callback, SomNfaCallback som_cb, - void *context); + NfaCallback callback, void *context); char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q); char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -62,14 +62,14 @@ char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest, char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, const char *streamState, u64a offset, - NfaCallback callback, SomNfaCallback som_cb, - void *context); + NfaCallback callback, void *context); char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q); char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index a9fbce94..09006d5b 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -32,7 +32,6 @@ #include "accelcompile.h" #include "grey.h" #include "mcclellan_internal.h" -#include "mcclellancompile_accel.h" #include "mcclellancompile_util.h" #include "nfa_internal.h" #include "shufticompile.h" @@ -65,6 +64,17 @@ using namespace std; using boost::adaptors::map_keys; +#define ACCEL_DFA_MAX_OFFSET_DEPTH 4 + +/** Maximum tolerated number of escape character from an accel state. + * This is larger than nfa, as we don't have a budget and the nfa cheats on stop + * characters for sets of states */ +#define ACCEL_DFA_MAX_STOP_CHAR 160 + +/** Maximum tolerated number of escape character from a sds accel state. Larger + * than normal states as accelerating sds is important. Matches NFA value */ +#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 + namespace ue2 { namespace /* anon */ { @@ -75,7 +85,7 @@ struct dstate_extra { }; struct dfa_info { - dfa_build_strat &strat; + accel_dfa_build_strat &strat; raw_dfa &raw; vector &states; vector extra; @@ -85,7 +95,7 @@ struct dfa_info { u8 getAlphaShift() const; - explicit dfa_info(dfa_build_strat &s) + explicit dfa_info(accel_dfa_build_strat &s) : strat(s), raw(s.get_raw()), states(raw.states), @@ -128,13 +138,6 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) { return aux; } -static -bool double_byte_ok(const AccelScheme &info) { - return !info.double_byte.empty() - && info.double_cr.count() < info.double_byte.size() - && info.double_cr.count() <= 2 && !info.double_byte.empty(); -} - static void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { assert((size_t)succ_table % 2 == 0); @@ -190,120 +193,12 @@ u32 mcclellan_build_strat::max_allowed_offset_accel() const { return ACCEL_DFA_MAX_OFFSET_DEPTH; } -AccelScheme mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx) - const { - return find_mcclellan_escape_info(rdfa, this_idx, - max_allowed_offset_accel()); +u32 mcclellan_build_strat::max_stop_char() const { + return ACCEL_DFA_MAX_STOP_CHAR; } -/** builds acceleration schemes for states */ -void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, - const AccelScheme &info, - void *accel_out) { - AccelAux *accel = (AccelAux *)accel_out; - - DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset, - info.double_offset); - accel->generic.offset = verify_u8(info.offset); - - if (double_byte_ok(info) && info.double_cr.none() - && info.double_byte.size() == 1) { - accel->accel_type = ACCEL_DVERM; - accel->dverm.c1 = info.double_byte.begin()->first; - accel->dverm.c2 = info.double_byte.begin()->second; - accel->dverm.offset = verify_u8(info.double_offset); - DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx); - return; - } - - if (double_byte_ok(info) && info.double_cr.none() - && (info.double_byte.size() == 2 || info.double_byte.size() == 4)) { - bool ok = true; - - assert(!info.double_byte.empty()); - u8 firstC = info.double_byte.begin()->first & CASE_CLEAR; - u8 secondC = info.double_byte.begin()->second & CASE_CLEAR; - - for (const pair &p : info.double_byte) { - if ((p.first & CASE_CLEAR) != firstC - || (p.second & CASE_CLEAR) != secondC) { - ok = false; - break; - } - } - - if (ok) { - accel->accel_type = ACCEL_DVERM_NOCASE; - accel->dverm.c1 = firstC; - accel->dverm.c2 = secondC; - accel->dverm.offset = verify_u8(info.double_offset); - DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx); - return; - } - - u8 m1; - u8 m2; - if (buildDvermMask(info.double_byte, &m1, &m2)) { - accel->accel_type = ACCEL_DVERM_MASKED; - accel->dverm.offset = verify_u8(info.double_offset); - accel->dverm.c1 = info.double_byte.begin()->first & m1; - accel->dverm.c2 = info.double_byte.begin()->second & m2; - accel->dverm.m1 = m1; - accel->dverm.m2 = m2; - DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", - accel->dverm.c1, accel->dverm.c2); - return; - } - } - - if (double_byte_ok(info) - && shuftiBuildDoubleMasks(info.double_cr, info.double_byte, - &accel->dshufti.lo1, &accel->dshufti.hi1, - &accel->dshufti.lo2, &accel->dshufti.hi2)) { - accel->accel_type = ACCEL_DSHUFTI; - accel->dshufti.offset = verify_u8(info.double_offset); - DEBUG_PRINTF("state %hu is double shufti\n", this_idx); - return; - } - - if (info.cr.none()) { - accel->accel_type = ACCEL_RED_TAPE; - DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape" - " from which there is no escape\n", this_idx); - return; - } - - if (info.cr.count() == 1) { - accel->accel_type = ACCEL_VERM; - accel->verm.c = info.cr.find_first(); - DEBUG_PRINTF("state %hu is vermicelli\n", this_idx); - return; - } - - if (info.cr.count() == 2 && info.cr.isCaselessChar()) { - accel->accel_type = ACCEL_VERM_NOCASE; - accel->verm.c = info.cr.find_first() & CASE_CLEAR; - DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx); - return; - } - - if (info.cr.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) { - accel->accel_type = ACCEL_NONE; - DEBUG_PRINTF("state %hu is too broad\n", this_idx); - return; - } - - accel->accel_type = ACCEL_SHUFTI; - if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo, - &accel->shufti.hi)) { - DEBUG_PRINTF("state %hu is shufti\n", this_idx); - return; - } - - assert(!info.cr.none()); - accel->accel_type = ACCEL_TRUFFLE; - truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2); - DEBUG_PRINTF("state %hu is truffle\n", this_idx); +u32 mcclellan_build_strat::max_floating_stop_char() const { + return ACCEL_DFA_MAX_FLOATING_STOP_CHAR; } static @@ -343,15 +238,6 @@ void populateBasicInfo(size_t state_size, const dfa_info &info, } } -raw_dfa::~raw_dfa() { -} - -raw_report_info::raw_report_info() { -} - -raw_report_info::~raw_report_info() { -} - namespace { struct raw_report_list { @@ -592,7 +478,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); map accel_escape_info - = populateAccelerationInfo(info.raw, info.strat, cc.grey); + = info.strat.getAccelInfo(cc.grey); size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) * count_real_states; @@ -811,7 +697,7 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); map accel_escape_info - = populateAccelerationInfo(info.raw, info.strat, cc.grey); + = info.strat.getAccelInfo(cc.grey); size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size(); size_t aux_size = sizeof(mstate_aux) * info.size(); @@ -1053,7 +939,7 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { return false; } -aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat, +aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, const CompileContext &cc, set *accel_states) { u16 total_daddy = 0; @@ -1123,12 +1009,9 @@ u32 mcclellanStartReachSize(const raw_dfa *raw) { return out.count(); } -bool has_accel_dfa(const NFA *nfa) { +bool has_accel_mcclellan(const NFA *nfa) { const mcclellan *m = (const mcclellan *)getImplNfa(nfa); return m->has_accel; } -dfa_build_strat::~dfa_build_strat() { -} - } // namespace ue2 diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index ba519cac..e6f548a7 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -29,6 +29,7 @@ #ifndef MCCLELLANCOMPILE_H #define MCCLELLANCOMPILE_H +#include "accel_dfa_build_strat.h" #include "rdfa.h" #include "ue2common.h" #include "util/accel_scheme.h" @@ -47,48 +48,20 @@ namespace ue2 { class ReportManager; struct CompileContext; -struct raw_report_info { - raw_report_info(); - virtual ~raw_report_info(); - virtual u32 getReportListSize() const = 0; /* in bytes */ - virtual size_t size() const = 0; /* number of lists */ - virtual void fillReportLists(NFA *n, size_t base_offset, - std::vector &ro /* out */) const = 0; -}; - -class dfa_build_strat { -public: - explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {} - virtual ~dfa_build_strat(); - virtual raw_dfa &get_raw() const = 0; - virtual std::unique_ptr gatherReports( - std::vector &reports /* out */, - std::vector &reports_eod /* out */, - u8 *isSingleReport /* out */, - ReportID *arbReport /* out */) const = 0; - virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const = 0; - virtual size_t accelSize(void) const = 0; - virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, - void *accel_out) = 0; -protected: - const ReportManager &rm; -}; - -class mcclellan_build_strat : public dfa_build_strat { +class mcclellan_build_strat : public accel_dfa_build_strat { public: mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in) - : dfa_build_strat(rm_in), rdfa(rdfa_in) {} + : accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {} raw_dfa &get_raw() const override { return rdfa; } std::unique_ptr gatherReports( std::vector &reports /* out */, std::vector &reports_eod /* out */, u8 *isSingleReport /* out */, ReportID *arbReport /* out */) const override; - AccelScheme find_escape_strings(dstate_id_t this_idx) const override; size_t accelSize(void) const override; - void buildAccel(dstate_id_t this_idx,const AccelScheme &info, - void *accel_out) override; - virtual u32 max_allowed_offset_accel() const; + u32 max_allowed_offset_accel() const override; + u32 max_stop_char() const override; + u32 max_floating_stop_char() const override; private: raw_dfa &rdfa; @@ -103,7 +76,7 @@ mcclellanCompile(raw_dfa &raw, const CompileContext &cc, /* used internally by mcclellan/haig/gough compile process */ ue2::aligned_unique_ptr -mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat, +mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, const CompileContext &cc, std::set *accel_states = nullptr); @@ -114,7 +87,7 @@ u32 mcclellanStartReachSize(const raw_dfa *raw); std::set all_reports(const raw_dfa &rdfa); -bool has_accel_dfa(const NFA *nfa); +bool has_accel_mcclellan(const NFA *nfa); } // namespace ue2 diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index 234574d8..a61a19ab 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -337,62 +337,35 @@ size_t hash_dfa(const raw_dfa &rdfa) { } static -bool has_self_loop(dstate_id_t s, const raw_dfa &raw) { - u16 top_remap = raw.alpha_remap[TOP]; - for (u32 i = 0; i < raw.states[s].next.size(); i++) { - if (i != top_remap && raw.states[s].next[i] == s) { +bool can_die_early(const raw_dfa &raw, dstate_id_t s, + map &visited, u32 age_limit) { + if (contains(visited, s) && visited[s] >= age_limit) { + /* we have already visited (or are in the process of visiting) here with + * a looser limit. */ + return false; + } + visited[s] = age_limit; + + if (s == DEAD_STATE) { + return true; + } + + if (age_limit == 0) { + return false; + } + + for (const auto &next : raw.states[s].next) { + if (can_die_early(raw, next, visited, age_limit - 1)) { return true; } } + return false; } -dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { - if (raw.start_floating != DEAD_STATE) { - DEBUG_PRINTF("has floating start\n"); - return raw.start_floating; - } - - DEBUG_PRINTF("looking for SDS proxy\n"); - - dstate_id_t s = raw.start_anchored; - - if (has_self_loop(s, raw)) { - return s; - } - - u16 top_remap = raw.alpha_remap[TOP]; - - ue2::unordered_set seen; - while (true) { - seen.insert(s); - DEBUG_PRINTF("basis %hu\n", s); - - /* check if we are connected to a state with a self loop */ - for (u32 i = 0; i < raw.states[s].next.size(); i++) { - dstate_id_t t = raw.states[s].next[i]; - if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) { - return t; - } - } - - /* find a neighbour to use as a basis for looking for the sds proxy */ - dstate_id_t t = DEAD_STATE; - for (u32 i = 0; i < raw.states[s].next.size(); i++) { - dstate_id_t tt = raw.states[s].next[i]; - if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) { - t = tt; - break; - } - } - - if (t == DEAD_STATE) { - /* we were unable to find a state to use as a SDS proxy */ - return DEAD_STATE; - } - - s = t; - } +bool can_die_early(const raw_dfa &raw, u32 age_limit) { + map visited; + return can_die_early(raw, raw.start_anchored, visited, age_limit); } } // namespace ue2 diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h index 7b6c033a..554c1efd 100644 --- a/src/nfa/mcclellancompile_util.h +++ b/src/nfa/mcclellancompile_util.h @@ -55,7 +55,7 @@ size_t hash_dfa_no_reports(const raw_dfa &rdfa); /** \brief Compute a simple hash of this raw_dfa, including its reports. */ size_t hash_dfa(const raw_dfa &rdfa); -dstate_id_t get_sds_or_proxy(const raw_dfa &raw); +bool can_die_early(const raw_dfa &raw, u32 age_limit); } // namespace ue2 diff --git a/src/nfa/mcclellandump.cpp b/src/nfa/mcclellandump.cpp index 52711bf1..dcbb0915 100644 --- a/src/nfa/mcclellandump.cpp +++ b/src/nfa/mcclellandump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -267,7 +267,8 @@ void dumpDotPreambleDfa(FILE *f) { fprintf(f, "0 [style=invis];\n"); } -void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) { +void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f, + UNUSED const string &base) { assert(nfa->type == MCCLELLAN_NFA_16); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); @@ -286,7 +287,8 @@ void nfaExecMcClellan16_dumpDot(const NFA *nfa, FILE *f) { fprintf(f, "}\n"); } -void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f) { +void nfaExecMcClellan8_dumpDot(const NFA *nfa, FILE *f, + UNUSED const string &base) { assert(nfa->type == MCCLELLAN_NFA_8); const mcclellan *m = (const mcclellan *)getImplNfa(nfa); diff --git a/src/nfa/mcclellandump.h b/src/nfa/mcclellandump.h index d74a6b6d..efa61544 100644 --- a/src/nfa/mcclellandump.h +++ b/src/nfa/mcclellandump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #include "rdfa.h" #include +#include struct mcclellan; struct mstate_aux; @@ -42,8 +43,10 @@ union AccelAux; namespace ue2 { -void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file); -void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file); +void nfaExecMcClellan8_dumpDot(const struct NFA *nfa, FILE *file, + const std::string &base); +void nfaExecMcClellan16_dumpDot(const struct NFA *nfa, FILE *file, + const std::string &base); void nfaExecMcClellan8_dumpText(const struct NFA *nfa, FILE *file); void nfaExecMcClellan16_dumpText(const struct NFA *nfa, FILE *file); diff --git a/src/nfa/mpv.c b/src/nfa/mpv.c index 4bae7b18..c6c8cb88 100644 --- a/src/nfa/mpv.c +++ b/src/nfa/mpv.c @@ -131,7 +131,8 @@ char processReports(const struct mpv *m, u8 *reporters, rl_count++; } - if (cb(report_offset, curr->report, ctxt) == MO_HALT_MATCHING) { + if (cb(0, report_offset, curr->report, ctxt) == + MO_HALT_MATCHING) { DEBUG_PRINTF("bailing\n"); return MO_HALT_MATCHING; } @@ -180,7 +181,7 @@ char processReportsForRange(const struct mpv *m, u8 *reporters, for (size_t i = 2; i <= length; i++) { for (u32 j = 0; j < rl_count; j++) { - if (cb(first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) { + if (cb(0, first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) { DEBUG_PRINTF("bailing\n"); return MO_HALT_MATCHING; } diff --git a/src/nfa/mpv.h b/src/nfa/mpv.h index dc5dad6f..a3f90719 100644 --- a/src/nfa/mpv.h +++ b/src/nfa/mpv.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,6 @@ struct NFA; char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecMpv0_inAccept(const struct NFA *n, ReportID report, struct mq *q); char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q); char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -47,6 +46,7 @@ char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src, #define nfaExecMpv0_testEOD NFA_API_NO_IMPL #define nfaExecMpv0_inAccept NFA_API_NO_IMPL +#define nfaExecMpv0_inAnyAccept NFA_API_NO_IMPL #define nfaExecMpv0_QR NFA_API_NO_IMPL #define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */ #define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL diff --git a/src/nfa/mpv_dump.cpp b/src/nfa/mpv_dump.cpp index 504cc677..da21d7cf 100644 --- a/src/nfa/mpv_dump.cpp +++ b/src/nfa/mpv_dump.cpp @@ -48,7 +48,8 @@ namespace ue2 { -void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file) { +void nfaExecMpv0_dumpDot(UNUSED const NFA *nfa, UNUSED FILE *file, + UNUSED const std::string &base) { } static really_inline diff --git a/src/nfa/mpv_dump.h b/src/nfa/mpv_dump.h index 5dcd9f8b..23910dce 100644 --- a/src/nfa/mpv_dump.h +++ b/src/nfa/mpv_dump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,12 +32,14 @@ #if defined(DUMP_SUPPORT) #include +#include struct NFA; namespace ue2 { -void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file); +void nfaExecMpv0_dumpDot(const struct NFA *nfa, FILE *file, + const std::string &base); void nfaExecMpv0_dumpText(const struct NFA *nfa, FILE *file); } // namespace ue2 diff --git a/src/nfa/multiaccel_compilehelper.cpp b/src/nfa/multiaccel_compilehelper.cpp index f1cf2a4c..4c1f8101 100644 --- a/src/nfa/multiaccel_compilehelper.cpp +++ b/src/nfa/multiaccel_compilehelper.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -347,9 +347,9 @@ void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) { } } -MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, u32 off, - unsigned max_len) : - cr(ref_cr), offset(off), max_len(max_len) { +MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, + u32 off, unsigned max_length) + : cr(ref_cr), offset(off), max_len(max_length) { int accel_num = (int) MultibyteAccelInfo::MAT_MAX; accels.resize(accel_num); diff --git a/src/nfa/multishufti_avx2.h b/src/nfa/multishufti_avx2.h index e9980872..042f5570 100644 --- a/src/nfa/multishufti_avx2.h +++ b/src/nfa/multishufti_avx2.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,7 +31,6 @@ #include "ue2common.h" #include "util/bitutils.h" #include "util/simd_utils.h" -#include "util/simd_utils_ssse3.h" static really_inline const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars, diff --git a/src/nfa/multishufti_sse.h b/src/nfa/multishufti_sse.h index 7ea5946d..0a9b543e 100644 --- a/src/nfa/multishufti_sse.h +++ b/src/nfa/multishufti_sse.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,7 +31,6 @@ #include "ue2common.h" #include "util/bitutils.h" #include "util/simd_utils.h" -#include "util/simd_utils_ssse3.h" /* Normal SSSE3 shufti */ diff --git a/src/nfa/multitruffle.c b/src/nfa/multitruffle.c index 3af6394a..381bda93 100644 --- a/src/nfa/multitruffle.c +++ b/src/nfa/multitruffle.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,7 +32,6 @@ #include "multitruffle.h" #include "util/bitutils.h" #include "util/simd_utils.h" -#include "util/simd_utils_ssse3.h" #include "multiaccel_common.h" diff --git a/src/nfa/nfa_api.h b/src/nfa/nfa_api.h index 84f5c4a0..e3f7f743 100644 --- a/src/nfa/nfa_api.h +++ b/src/nfa/nfa_api.h @@ -120,6 +120,16 @@ char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state, */ char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end); +/** + * Main execution function that doesn't perform the checks and optimisations of + * nfaQueueExec() and just dispatches directly to the nfa implementations. It is + * intended to be used by the Tamarama engine. + */ +char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end); + +/** Return value indicating that the engine is dead. */ +#define MO_DEAD 0 + /** Return value indicating that the engine is alive. */ #define MO_ALIVE 1 @@ -155,6 +165,13 @@ char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end); */ char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end); +/** + * Main execution function that doesn't perform the checks and optimisations of + * nfaQueueExecToMatch() and just dispatches directly to the nfa + * implementations. It is intended to be used by the Tamarama engine. + */ +char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end); + /** * Report matches at the current queue location. * @@ -175,10 +192,16 @@ char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q); */ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q); +/** + * Returns non-zero if the NFA is in any accept state regardless of report + * ID. + */ +char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q); + /** * Process the queued commands on the given NFA up to end or the first match. * - * Note: This version is meant for rose prefix NFAs: + * Note: This version is meant for rose prefix/infix NFAs: * - never uses a callback * - loading of state at a point in history is not special cased * @@ -187,9 +210,9 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q); * end with some variant of end. The location field of the events must * be monotonically increasing. If not all the data was processed during * the call, the queue is updated to reflect the remaining work. - * @param report we are interested in, if set at the end of the scan returns - * @ref MO_MATCHES_PENDING. If no report is desired, MO_INVALID_IDX should - * be passed in. + * @param report we are interested in. If the given report will be raised at + * the end location, the function returns @ref MO_MATCHES_PENDING. If no + * match information is desired, MO_INVALID_IDX should be passed in. * @return @ref MO_ALIVE if the nfa is still active with no matches pending, * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not * alive @@ -205,6 +228,9 @@ char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID report); * Runs an NFA in reverse from (buf + buflen) to buf and then from (hbuf + hlen) * to hbuf (main buffer and history buffer). * + * Note: provides the match location as the "end" offset when the callback is + * called. + * * @param nfa engine to run * @param offset base offset of buf * @param buf main buffer @@ -229,7 +255,6 @@ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf, * (including br region) * @param offset the offset to return (via the callback) with each match * @param callback the callback to call for each match raised - * @param som_cb the callback to call for each match raised (Haig) * @param context context pointer passed to each callback * * @return @ref MO_HALT_MATCHING if the user instructed us to halt, otherwise @@ -237,8 +262,7 @@ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf, */ char nfaCheckFinalState(const struct NFA *nfa, const char *state, const char *streamState, u64a offset, - NfaCallback callback, SomNfaCallback som_cb, - void *context); + NfaCallback callback, void *context); /** * Indicates if an engine is a zombie. diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c index fb27e4eb..c67103b3 100644 --- a/src/nfa/nfa_api_dispatch.c +++ b/src/nfa/nfa_api_dispatch.c @@ -42,6 +42,8 @@ #include "limex.h" #include "mcclellan.h" #include "mpv.h" +#include "sheng.h" +#include "tamarama.h" #define DISPATCH_CASE(dc_ltype, dc_ftype, dc_subtype, dc_func_call) \ case dc_ltype##_NFA_##dc_subtype: \ @@ -52,41 +54,11 @@ #define DISPATCH_BY_NFA_TYPE(dbnt_func) \ switch (nfa->type) { \ - DISPATCH_CASE(LIMEX, LimEx, 32_1, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 32_2, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 32_3, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 32_4, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 32_5, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 32_6, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 32_7, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_1, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_2, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_3, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_4, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_5, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_6, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_7, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_1, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_2, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_3, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_4, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_5, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_6, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_7, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_1, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_2, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_3, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_4, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_5, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_6, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_7, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_1, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_2, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_3, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_4, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_5, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_6, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_7, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \ DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \ DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \ DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \ @@ -98,21 +70,22 @@ DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \ DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \ DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \ + DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \ + DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \ default: \ assert(0); \ } char nfaCheckFinalState(const struct NFA *nfa, const char *state, const char *streamState, u64a offset, - NfaCallback callback, SomNfaCallback som_cb, - void *context) { + NfaCallback callback, void *context) { assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); // Caller should avoid calling us if we can never produce matches. assert(nfaAcceptsEod(nfa)); DISPATCH_BY_NFA_TYPE(_testEOD(nfa, state, streamState, offset, callback, - som_cb, context)); + context)); return 0; } @@ -135,6 +108,14 @@ char nfaQueueExec2_i(const struct NFA *nfa, struct mq *q, s64a end) { return 0; } +char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end) { + return nfaQueueExec_i(nfa, q, end); +} + +char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end) { + return nfaQueueExec2_i(nfa, q, end); +} + static really_inline char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) { DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report)); @@ -258,7 +239,6 @@ char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end) { assert(q); assert(end >= 0); - assert(q->context); assert(q->state); assert(q->cur < q->end); assert(q->end <= MAX_MQE_LEN); @@ -315,6 +295,11 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q) { return 0; } +char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q) { + DISPATCH_BY_NFA_TYPE(_inAnyAccept(nfa, q)); + return 0; +} + char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) { DEBUG_PRINTF("nfa=%p\n", nfa); #ifdef DEBUG diff --git a/src/nfa/nfa_api_queue.h b/src/nfa/nfa_api_queue.h index 1373425d..e3579a7e 100644 --- a/src/nfa/nfa_api_queue.h +++ b/src/nfa/nfa_api_queue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -91,12 +91,12 @@ struct mq { * history buffer; (logically) immediately before the * main buffer */ size_t hlength; /**< length of the history buffer */ + struct hs_scratch *scratch; /**< global scratch space */ char report_current; /**< * report_current matches at starting offset through * callback. If true, the queue must be located at a * point where MO_MATCHES_PENDING was returned */ NfaCallback cb; /**< callback to trigger on matches */ - SomNfaCallback som_cb; /**< callback with som info; used by haig */ void *context; /**< context to pass along with a callback */ struct mq_item items[MAX_MQE_LEN]; /**< queue items */ }; diff --git a/src/nfa/nfa_build_util.cpp b/src/nfa/nfa_build_util.cpp index 2ac0505e..93376b01 100644 --- a/src/nfa/nfa_build_util.cpp +++ b/src/nfa/nfa_build_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "limex_internal.h" #include "mcclellancompile.h" +#include "shengcompile.h" #include "nfa_internal.h" #include "repeat_internal.h" #include "ue2common.h" @@ -78,7 +79,7 @@ struct DISPATCH_BY_NFA_TYPE_INT { decltype(arg), (NFAEngineType)0>::doOp(i, arg) } -typedef bool (*has_accel_fn)(const NFA *nfa); +typedef bool (*nfa_dispatch_fn)(const NFA *nfa); template static @@ -87,8 +88,37 @@ bool has_accel_limex(const NFA *nfa) { return limex->accelCount; } +template static -bool has_accel_generic(const NFA *) { +bool has_repeats_limex(const NFA *nfa) { + const T *limex = (const T *)getImplNfa(nfa); + return limex->repeatCount; +} + + +template +static +bool has_repeats_other_than_firsts_limex(const NFA *nfa) { + const T *limex = (const T *)getImplNfa(nfa); + const char *ptr = (const char *)limex; + + const u32 *repeatOffset = (const u32 *)(ptr + limex->repeatOffset); + + for (u32 i = 0; i < limex->repeatCount; i++) { + u32 offset = repeatOffset[i]; + const NFARepeatInfo *info = (const NFARepeatInfo *)(ptr + offset); + const RepeatInfo *repeat = + (const RepeatInfo *)((const char *)info + sizeof(*info)); + if (repeat->type != REPEAT_FIRST) { + return true; + } + } + + return false; +} + +static +bool dispatch_false(const NFA *) { return false; } @@ -140,72 +170,53 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER}; #define DO_IF_DUMP_SUPPORT(a) #endif -#define MAKE_LIMEX_TRAITS(mlt_size, mlt_shift) \ - template<> struct NFATraits { \ +#define MAKE_LIMEX_TRAITS(mlt_size) \ + template<> struct NFATraits { \ static UNUSED const char *name; \ static const NFACategory category = NFA_LIMEX; \ typedef LimExNFA##mlt_size implNFA_t; \ typedef u_##mlt_size tableRow_t; \ - static const has_accel_fn has_accel; \ + static const nfa_dispatch_fn has_accel; \ + static const nfa_dispatch_fn has_repeats; \ + static const nfa_dispatch_fn has_repeats_other_than_firsts; \ static const u32 stateAlign = \ MAX(alignof(tableRow_t), alignof(RepeatControl)); \ static const bool fast = mlt_size <= 64; \ }; \ - const has_accel_fn NFATraits::has_accel \ + const nfa_dispatch_fn NFATraits::has_accel \ = has_accel_limex; \ + const nfa_dispatch_fn NFATraits::has_repeats \ + = has_repeats_limex; \ + const nfa_dispatch_fn \ + NFATraits::has_repeats_other_than_firsts \ + = has_repeats_other_than_firsts_limex; \ DO_IF_DUMP_SUPPORT( \ - const char *NFATraits::name \ - = "LimEx (0-"#mlt_shift") "#mlt_size; \ - template<> struct getDescription { \ - static string call(const void *ptr) { \ - return getDescriptionLimEx((const NFA *)ptr); \ + const char *NFATraits::name \ + = "LimEx "#mlt_size; \ + template<> struct getDescription { \ + static string call(const void *ptr) { \ + return getDescriptionLimEx((const NFA *)ptr); \ } \ };) -MAKE_LIMEX_TRAITS(32, 1) -MAKE_LIMEX_TRAITS(32, 2) -MAKE_LIMEX_TRAITS(32, 3) -MAKE_LIMEX_TRAITS(32, 4) -MAKE_LIMEX_TRAITS(32, 5) -MAKE_LIMEX_TRAITS(32, 6) -MAKE_LIMEX_TRAITS(32, 7) -MAKE_LIMEX_TRAITS(128, 1) -MAKE_LIMEX_TRAITS(128, 2) -MAKE_LIMEX_TRAITS(128, 3) -MAKE_LIMEX_TRAITS(128, 4) -MAKE_LIMEX_TRAITS(128, 5) -MAKE_LIMEX_TRAITS(128, 6) -MAKE_LIMEX_TRAITS(128, 7) -MAKE_LIMEX_TRAITS(256, 1) -MAKE_LIMEX_TRAITS(256, 2) -MAKE_LIMEX_TRAITS(256, 3) -MAKE_LIMEX_TRAITS(256, 4) -MAKE_LIMEX_TRAITS(256, 5) -MAKE_LIMEX_TRAITS(256, 6) -MAKE_LIMEX_TRAITS(256, 7) -MAKE_LIMEX_TRAITS(384, 1) -MAKE_LIMEX_TRAITS(384, 2) -MAKE_LIMEX_TRAITS(384, 3) -MAKE_LIMEX_TRAITS(384, 4) -MAKE_LIMEX_TRAITS(384, 5) -MAKE_LIMEX_TRAITS(384, 6) -MAKE_LIMEX_TRAITS(384, 7) -MAKE_LIMEX_TRAITS(512, 1) -MAKE_LIMEX_TRAITS(512, 2) -MAKE_LIMEX_TRAITS(512, 3) -MAKE_LIMEX_TRAITS(512, 4) -MAKE_LIMEX_TRAITS(512, 5) -MAKE_LIMEX_TRAITS(512, 6) -MAKE_LIMEX_TRAITS(512, 7) +MAKE_LIMEX_TRAITS(32) +MAKE_LIMEX_TRAITS(128) +MAKE_LIMEX_TRAITS(256) +MAKE_LIMEX_TRAITS(384) +MAKE_LIMEX_TRAITS(512) template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 1; static const bool fast = true; - static const has_accel_fn has_accel; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const has_accel_fn NFATraits::has_accel = has_accel_dfa; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcclellan; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits::name = "McClellan 8"; #endif @@ -215,9 +226,13 @@ template<> struct NFATraits { static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 2; static const bool fast = true; - static const has_accel_fn has_accel; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const has_accel_fn NFATraits::has_accel = has_accel_dfa; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcclellan; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits::name = "McClellan 16"; #endif @@ -227,9 +242,13 @@ template<> struct NFATraits { static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; static const bool fast = true; - static const has_accel_fn has_accel; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const has_accel_fn NFATraits::has_accel = has_accel_dfa; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcclellan; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits::name = "Goughfish 8"; #endif @@ -239,9 +258,13 @@ template<> struct NFATraits { static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; static const bool fast = true; - static const has_accel_fn has_accel; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const has_accel_fn NFATraits::has_accel = has_accel_dfa; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_mcclellan; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits::name = "Goughfish 16"; #endif @@ -251,9 +274,13 @@ template<> struct NFATraits { static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; static const bool fast = true; - static const has_accel_fn has_accel; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const has_accel_fn NFATraits::has_accel = has_accel_generic; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits::name = "Mega-Puff-Vac"; #endif @@ -263,9 +290,13 @@ template<> struct NFATraits { static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; static const bool fast = true; - static const has_accel_fn has_accel; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const has_accel_fn NFATraits::has_accel = has_accel_generic; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits::name = "Castle"; #endif @@ -275,9 +306,13 @@ template<> struct NFATraits { static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; static const bool fast = true; - static const has_accel_fn has_accel; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const has_accel_fn NFATraits::has_accel = has_accel_generic; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits::name = "Lim Bounded Repeat (D)"; #endif @@ -287,9 +322,13 @@ template<> struct NFATraits { static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; static const bool fast = true; - static const has_accel_fn has_accel; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const has_accel_fn NFATraits::has_accel = has_accel_generic; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits::name = "Lim Bounded Repeat (V)"; #endif @@ -299,9 +338,13 @@ template<> struct NFATraits { static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; static const bool fast = true; - static const has_accel_fn has_accel; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const has_accel_fn NFATraits::has_accel = has_accel_generic; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits::name = "Lim Bounded Repeat (NV)"; #endif @@ -311,9 +354,13 @@ template<> struct NFATraits { static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; static const bool fast = true; - static const has_accel_fn has_accel; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const has_accel_fn NFATraits::has_accel = has_accel_generic; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits::name = "Lim Bounded Repeat (S)"; #endif @@ -323,13 +370,49 @@ template<> struct NFATraits { static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; static const bool fast = true; - static const has_accel_fn has_accel; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const has_accel_fn NFATraits::has_accel = has_accel_generic; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits::name = "Lim Bounded Repeat (M)"; #endif +template<> struct NFATraits { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 1; + static const bool fast = true; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits::has_accel = has_accel_sheng; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits::name = "Sheng"; +#endif + +template<> struct NFATraits { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 32; + static const bool fast = true; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits::name = "Tamarama"; +#endif + } // namespace #if defined(DUMP_SUPPORT) @@ -380,42 +463,39 @@ struct is_limex { }; } +namespace { +template +struct has_repeats_other_than_firsts_dispatch { + static nfa_dispatch_fn call(const void *) { + return NFATraits::has_repeats_other_than_firsts; + } +}; +} + bool has_bounded_repeats_other_than_firsts(const NFA &nfa) { - if (!DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa)) { - return false; + return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, + has_repeats_other_than_firsts_dispatch, + &nfa)(&nfa); +} + +namespace { +template +struct has_repeats_dispatch { + static nfa_dispatch_fn call(const void *) { + return NFATraits::has_repeats; } - - const LimExNFABase *limex = (const LimExNFABase *)getImplNfa(&nfa); - const char *ptr = (const char *)limex; - - const u32 *repeatOffset = (const u32 *)(ptr + limex->repeatOffset); - - for (u32 i = 0; i < limex->repeatCount; i++) { - u32 offset = repeatOffset[i]; - const NFARepeatInfo *info = (const NFARepeatInfo *)(ptr + offset); - const RepeatInfo *repeat = - (const RepeatInfo *)((const char *)info + sizeof(*info)); - if (repeat->type != REPEAT_FIRST) { - return true; - } - } - - return false; +}; } bool has_bounded_repeats(const NFA &nfa) { - if (!DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa)) { - return false; - } - - const LimExNFABase *limex = (const LimExNFABase *)getImplNfa(&nfa); - return limex->repeatCount; + return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_repeats_dispatch, + &nfa)(&nfa); } namespace { template struct has_accel_dispatch { - static has_accel_fn call(const void *) { + static nfa_dispatch_fn call(const void *) { return NFATraits::has_accel; } }; @@ -423,8 +503,7 @@ struct has_accel_dispatch { bool has_accel(const NFA &nfa) { return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_accel_dispatch, - &nfa) - (&nfa); + &nfa)(&nfa); } bool requires_decompress_key(const NFA &nfa) { diff --git a/src/nfa/nfa_dump_api.h b/src/nfa/nfa_dump_api.h index 8675dd5d..1054a204 100644 --- a/src/nfa/nfa_dump_api.h +++ b/src/nfa/nfa_dump_api.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +36,7 @@ #if defined(DUMP_SUPPORT) #include +#include struct NFA; @@ -45,7 +46,7 @@ namespace ue2 { * \brief Dump (in Graphviz 'dot' format) a representation of the NFA into the * file pointed to by dotFile. */ -void nfaDumpDot(const struct NFA *nfa, FILE *dotFile); +void nfaDumpDot(const struct NFA *nfa, FILE *dotFile, const std::string &base); /** \brief Dump a textual representation of the NFA. */ void nfaDumpText(const struct NFA *fact, FILE *textFile); diff --git a/src/nfa/nfa_dump_dispatch.cpp b/src/nfa/nfa_dump_dispatch.cpp index 4a59dc1e..388ac003 100644 --- a/src/nfa/nfa_dump_dispatch.cpp +++ b/src/nfa/nfa_dump_dispatch.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,8 @@ #include "limex.h" #include "mcclellandump.h" #include "mpv_dump.h" +#include "shengdump.h" +#include "tamarama_dump.h" #ifndef DUMP_SUPPORT #error "no dump support" @@ -57,41 +59,11 @@ namespace ue2 { #define DISPATCH_BY_NFA_TYPE(dbnt_func) \ DEBUG_PRINTF("dispatch for NFA type %u\n", nfa->type); \ switch (nfa->type) { \ - DISPATCH_CASE(LIMEX, LimEx, 32_1, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 32_2, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 32_3, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 32_4, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 32_5, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 32_6, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 32_7, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_1, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_2, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_3, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_4, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_5, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_6, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 128_7, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_1, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_2, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_3, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_4, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_5, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_6, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 256_7, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_1, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_2, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_3, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_4, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_5, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_6, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 384_7, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_1, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_2, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_3, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_4, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_5, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_6, dbnt_func); \ - DISPATCH_CASE(LIMEX, LimEx, 512_7, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 32, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 128, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 256, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 384, dbnt_func); \ + DISPATCH_CASE(LIMEX, LimEx, 512, dbnt_func); \ DISPATCH_CASE(MCCLELLAN, McClellan, 8, dbnt_func); \ DISPATCH_CASE(MCCLELLAN, McClellan, 16, dbnt_func); \ DISPATCH_CASE(GOUGH, Gough, 8, dbnt_func); \ @@ -103,12 +75,15 @@ namespace ue2 { DISPATCH_CASE(LBR, Lbr, Shuf, dbnt_func); \ DISPATCH_CASE(LBR, Lbr, Truf, dbnt_func); \ DISPATCH_CASE(CASTLE, Castle, 0, dbnt_func); \ + DISPATCH_CASE(SHENG, Sheng, 0, dbnt_func); \ + DISPATCH_CASE(TAMARAMA, Tamarama, 0, dbnt_func); \ default: \ assert(0); \ } -void nfaDumpDot(const struct NFA *nfa, FILE *dotFile) { - DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile)); +void nfaDumpDot(const struct NFA *nfa, FILE *dotFile, + const std::string &base) { + DISPATCH_BY_NFA_TYPE(_dumpDot(nfa, dotFile, base)); } void nfaDumpText(const struct NFA *nfa, FILE *txtFile) { diff --git a/src/nfa/nfa_internal.h b/src/nfa/nfa_internal.h index 089e9683..41fee73e 100644 --- a/src/nfa/nfa_internal.h +++ b/src/nfa/nfa_internal.h @@ -51,41 +51,11 @@ extern "C" // Common data structures for NFAs enum NFAEngineType { - LIMEX_NFA_32_1, - LIMEX_NFA_32_2, - LIMEX_NFA_32_3, - LIMEX_NFA_32_4, - LIMEX_NFA_32_5, - LIMEX_NFA_32_6, - LIMEX_NFA_32_7, - LIMEX_NFA_128_1, - LIMEX_NFA_128_2, - LIMEX_NFA_128_3, - LIMEX_NFA_128_4, - LIMEX_NFA_128_5, - LIMEX_NFA_128_6, - LIMEX_NFA_128_7, - LIMEX_NFA_256_1, - LIMEX_NFA_256_2, - LIMEX_NFA_256_3, - LIMEX_NFA_256_4, - LIMEX_NFA_256_5, - LIMEX_NFA_256_6, - LIMEX_NFA_256_7, - LIMEX_NFA_384_1, - LIMEX_NFA_384_2, - LIMEX_NFA_384_3, - LIMEX_NFA_384_4, - LIMEX_NFA_384_5, - LIMEX_NFA_384_6, - LIMEX_NFA_384_7, - LIMEX_NFA_512_1, - LIMEX_NFA_512_2, - LIMEX_NFA_512_3, - LIMEX_NFA_512_4, - LIMEX_NFA_512_5, - LIMEX_NFA_512_6, - LIMEX_NFA_512_7, + LIMEX_NFA_32, + LIMEX_NFA_128, + LIMEX_NFA_256, + LIMEX_NFA_384, + LIMEX_NFA_512, MCCLELLAN_NFA_8, /**< magic pseudo nfa */ MCCLELLAN_NFA_16, /**< magic pseudo nfa */ GOUGH_NFA_8, /**< magic pseudo nfa */ @@ -97,6 +67,8 @@ enum NFAEngineType { LBR_NFA_Shuf, /**< magic pseudo nfa */ LBR_NFA_Truf, /**< magic pseudo nfa */ CASTLE_NFA_0, /**< magic pseudo nfa */ + SHENG_NFA_0, /**< magic pseudo nfa */ + TAMARAMA_NFA_0, /**< magic nfa container */ /** \brief bogus NFA - not used */ INVALID_NFA }; @@ -175,50 +147,27 @@ static really_inline int isGoughType(u8 t) { return t == GOUGH_NFA_8 || t == GOUGH_NFA_16; } -/** \brief True if the given type (from NFA::type) is a McClellan or Gough DFA. - * */ +/** \brief True if the given type (from NFA::type) is a Sheng DFA. */ +static really_inline int isShengType(u8 t) { + return t == SHENG_NFA_0; +} + +/** + * \brief True if the given type (from NFA::type) is a McClellan, Gough or + * Sheng DFA. + */ static really_inline int isDfaType(u8 t) { - return isMcClellanType(t) || isGoughType(t); + return isMcClellanType(t) || isGoughType(t) || isShengType(t); } /** \brief True if the given type (from NFA::type) is an NFA. */ static really_inline int isNfaType(u8 t) { switch (t) { - case LIMEX_NFA_32_1: - case LIMEX_NFA_32_2: - case LIMEX_NFA_32_3: - case LIMEX_NFA_32_4: - case LIMEX_NFA_32_5: - case LIMEX_NFA_32_6: - case LIMEX_NFA_32_7: - case LIMEX_NFA_128_1: - case LIMEX_NFA_128_2: - case LIMEX_NFA_128_3: - case LIMEX_NFA_128_4: - case LIMEX_NFA_128_5: - case LIMEX_NFA_128_6: - case LIMEX_NFA_128_7: - case LIMEX_NFA_256_1: - case LIMEX_NFA_256_2: - case LIMEX_NFA_256_3: - case LIMEX_NFA_256_4: - case LIMEX_NFA_256_5: - case LIMEX_NFA_256_6: - case LIMEX_NFA_256_7: - case LIMEX_NFA_384_1: - case LIMEX_NFA_384_2: - case LIMEX_NFA_384_3: - case LIMEX_NFA_384_4: - case LIMEX_NFA_384_5: - case LIMEX_NFA_384_6: - case LIMEX_NFA_384_7: - case LIMEX_NFA_512_1: - case LIMEX_NFA_512_2: - case LIMEX_NFA_512_3: - case LIMEX_NFA_512_4: - case LIMEX_NFA_512_5: - case LIMEX_NFA_512_6: - case LIMEX_NFA_512_7: + case LIMEX_NFA_32: + case LIMEX_NFA_128: + case LIMEX_NFA_256: + case LIMEX_NFA_384: + case LIMEX_NFA_512: return 1; default: break; @@ -233,6 +182,12 @@ int isLbrType(u8 t) { t == LBR_NFA_Shuf || t == LBR_NFA_Truf; } +/** \brief True if the given type (from NFA::type) is a container engine. */ +static really_inline +int isContainerType(u8 t) { + return t == TAMARAMA_NFA_0; +} + static really_inline int isMultiTopType(u8 t) { return !isDfaType(t) && !isLbrType(t); diff --git a/src/nfa/nfa_kind.h b/src/nfa/nfa_kind.h index 46d0bc4c..f2ac6189 100644 --- a/src/nfa/nfa_kind.h +++ b/src/nfa/nfa_kind.h @@ -37,6 +37,8 @@ #include "ue2common.h" +#include + namespace ue2 { /** \brief Specify the use-case for an nfa engine. */ @@ -47,6 +49,7 @@ enum nfa_kind { NFA_OUTFIX, //!< "outfix" nfa not triggered by external events NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports NFA_REV_PREFIX, //! reverse running prefixes (for som) + NFA_EAGER_PREFIX, //!< rose prefix that is also run up to matches }; /** \brief True if this kind of engine is triggered by a top event. */ @@ -63,8 +66,10 @@ bool is_triggered(enum nfa_kind k) { } /** - * \brief True if this kind of engine generates callback events when it - * enters accept states. + * \brief True if this kind of engine generates actively checks for accept + * states either to halt matching or to raise a callback. Only these engines + * generated with this property should call nfaQueueExec() or + * nfaQueueExecToMatch(). */ inline bool generates_callbacks(enum nfa_kind k) { @@ -73,6 +78,24 @@ bool generates_callbacks(enum nfa_kind k) { case NFA_OUTFIX: case NFA_OUTFIX_RAW: case NFA_REV_PREFIX: + case NFA_EAGER_PREFIX: + return true; + default: + return false; + } +} + +/** + * \brief True if this kind of engine has its state inspected to see if it is in + * an accept state. Engines generated with this property will commonly call + * nfaQueueExecRose(), nfaInAcceptState(), and nfaInAnyAcceptState(). + */ +inline +bool inspects_states_for_accepts(enum nfa_kind k) { + switch (k) { + case NFA_PREFIX: + case NFA_INFIX: + case NFA_EAGER_PREFIX: return true; default: return false; @@ -94,6 +117,32 @@ bool has_managed_reports(enum nfa_kind k) { } } +#if defined(DEBUG) || defined(DUMP_SUPPORT) + +inline +std::string to_string(nfa_kind k) { + switch (k) { + case NFA_PREFIX: + return "PREFIX"; + case NFA_INFIX: + return "INFIX"; + case NFA_SUFFIX: + return "SUFFIX"; + case NFA_OUTFIX: + return "OUTFIX"; + case NFA_REV_PREFIX: + return "REV_PREFIX"; + case NFA_OUTFIX_RAW: + return "OUTFIX_RAW"; + case NFA_EAGER_PREFIX: + return "EAGER_PREFIX"; + } + assert(0); + return "?"; +} + +#endif + } // namespace ue2 #endif diff --git a/src/nfa/sheng.c b/src/nfa/sheng.c new file mode 100644 index 00000000..bbbf1f20 --- /dev/null +++ b/src/nfa/sheng.c @@ -0,0 +1,676 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sheng.h" + +#include "accel.h" +#include "sheng_internal.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "util/bitutils.h" +#include "util/compare.h" +#include "util/join.h" +#include "util/simd_utils.h" + +enum MatchMode { + CALLBACK_OUTPUT, + STOP_AT_MATCH, + NO_MATCHES +}; + +static really_inline +const struct sheng *get_sheng(const struct NFA *n) { + return (const struct sheng *)getImplNfa(n); +} + +static really_inline +const struct sstate_aux *get_aux(const struct sheng *sh, u8 id) { + u32 offset = sh->aux_offset - sizeof(struct NFA) + + (id & SHENG_STATE_MASK) * sizeof(struct sstate_aux); + DEBUG_PRINTF("Getting aux for state %u at offset %llu\n", + id & SHENG_STATE_MASK, (u64a)offset + sizeof(struct NFA)); + return (const struct sstate_aux *)((const char *) sh + offset); +} + +static really_inline +const union AccelAux *get_accel(const struct sheng *sh, u8 id) { + const struct sstate_aux *saux = get_aux(sh, id); + DEBUG_PRINTF("Getting accel aux at offset %u\n", saux->accel); + const union AccelAux *aux = (const union AccelAux *) + ((const char *)sh + saux->accel - sizeof(struct NFA)); + return aux; +} + +static really_inline +const struct report_list *get_rl(const struct sheng *sh, + const struct sstate_aux *aux) { + DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept); + return (const struct report_list *) + ((const char *)sh + aux->accept - sizeof(struct NFA)); +} + +static really_inline +const struct report_list *get_eod_rl(const struct sheng *sh, + const struct sstate_aux *aux) { + DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept); + return (const struct report_list *) + ((const char *)sh + aux->accept_eod - sizeof(struct NFA)); +} + +static really_inline +char shengHasAccept(const struct sheng *sh, const struct sstate_aux *aux, + ReportID report) { + assert(sh && aux); + + const struct report_list *rl = get_rl(sh, aux); + assert(ISALIGNED_N(rl, 4)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + + for (u32 i = 0; i < rl->count; i++) { + if (rl->report[i] == report) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + return 1; + } + } + + return 0; +} + +static really_inline +char fireSingleReport(NfaCallback cb, void *ctxt, ReportID r, u64a loc) { + DEBUG_PRINTF("reporting %u\n", r); + if (cb(0, loc, r, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + return MO_CONTINUE_MATCHING; /* continue execution */ +} + +static really_inline +char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt, + const u8 state, u64a loc, u8 *const cached_accept_state, + ReportID *const cached_accept_id, char eod) { + DEBUG_PRINTF("reporting matches @ %llu\n", loc); + + if (!eod && state == *cached_accept_state) { + DEBUG_PRINTF("reporting %u\n", *cached_accept_id); + if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + const struct sstate_aux *aux = get_aux(sh, state); + const struct report_list *rl = eod ? get_eod_rl(sh, aux) : get_rl(sh, aux); + assert(ISALIGNED(rl)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + u32 count = rl->count; + + if (!eod && count == 1) { + *cached_accept_state = state; + *cached_accept_id = rl->report[0]; + + DEBUG_PRINTF("reporting %u\n", rl->report[0]); + if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + for (u32 i = 0; i < count; i++) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + } + return MO_CONTINUE_MATCHING; /* continue execution */ +} + +/* include Sheng function definitions */ +#include "sheng_defs.h" + +static really_inline +char runShengCb(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset, + u8 *const cached_accept_state, ReportID *const cached_accept_id, + const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die, + u8 has_accel, u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, + !!has_accel, !!single); + int rv; + /* scan and report all matches */ + if (can_die) { + if (has_accel) { + rv = sheng4_coda(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } else { + rv = sheng4_cod(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + rv = sheng_cod(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, *scanned, end, + scanned); + } else { + if (has_accel) { + rv = sheng4_coa(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } else { + rv = sheng4_co(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + rv = sheng_co(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, *scanned, end, + scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + return MO_ALIVE; +} + +static really_inline +void runShengNm(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset, + u8 *const cached_accept_state, ReportID *const cached_accept_id, + const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die, + u8 has_accel, u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, + !!has_accel, !!single); + /* just scan the buffer */ + if (can_die) { + if (has_accel) { + sheng4_nmda(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, end, + scanned); + } else { + sheng4_nmd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, end, + scanned); + } + sheng_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, *scanned, end, scanned); + } else { + sheng4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, start, end, scanned); + sheng_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, *scanned, end, scanned); + } +} + +static really_inline +char runShengSam(const struct sheng *sh, NfaCallback cb, void *ctxt, + u64a offset, u8 *const cached_accept_state, + ReportID *const cached_accept_id, const u8 *cur_buf, + const u8 *start, const u8 *end, u8 can_die, u8 has_accel, + u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, + !!has_accel, !!single); + int rv; + /* scan until first match */ + if (can_die) { + if (has_accel) { + rv = sheng4_samda(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } else { + rv = sheng4_samd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + + rv = sheng_samd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, *scanned, + end, scanned); + } else { + if (has_accel) { + rv = sheng4_sama(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } else { + rv = sheng4_sam(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + + rv = sheng_sam(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, *scanned, end, + scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + return MO_ALIVE; +} + +static never_inline +char runSheng(const struct sheng *sh, struct mq *q, s64a b_end, + enum MatchMode mode) { + u8 state = *(u8 *)q->state; + u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; + u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; + u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; + + u8 cached_accept_state = 0; + ReportID cached_accept_id = 0; + + DEBUG_PRINTF("starting Sheng execution in state %u\n", + state & SHENG_STATE_MASK); + + if (q->report_current) { + DEBUG_PRINTF("reporting current pending matches\n"); + assert(sh); + + q->report_current = 0; + + int rv; + if (single) { + rv = fireSingleReport(q->cb, q->context, sh->report, + q_cur_offset(q)); + } else { + rv = fireReports(sh, q->cb, q->context, state, q_cur_offset(q), + &cached_accept_state, &cached_accept_id, 0); + } + if (rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); + return MO_DEAD; + } + + DEBUG_PRINTF("proceeding with matching\n"); + } + + assert(q_cur_type(q) == MQE_START); + s64a start = q_cur_loc(q); + + DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start, + mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" : + mode == NO_MATCHES ? "NO MATCHES" : + mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???"); + + DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), + q_cur_type(q) == MQE_START ? "START" : + q_cur_type(q) == MQE_TOP ? "TOP" : + q_cur_type(q) == MQE_END ? "END" : "???"); + + const u8* cur_buf; + if (start < 0) { + DEBUG_PRINTF("negative location, scanning history\n"); + DEBUG_PRINTF("min location: %zd\n", -q->hlength); + cur_buf = q->history + q->hlength; + } else { + DEBUG_PRINTF("positive location, scanning buffer\n"); + DEBUG_PRINTF("max location: %lli\n", b_end); + cur_buf = q->buffer; + } + + /* if we our queue event is past our end */ + if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { + DEBUG_PRINTF("current location past buffer end\n"); + DEBUG_PRINTF("setting q location to %llu\n", b_end); + DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); + q->items[q->cur].location = b_end; + return MO_ALIVE; + } + + q->cur++; + + s64a cur_start = start; + + while (1) { + DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), + q_cur_type(q) == MQE_START ? "START" : + q_cur_type(q) == MQE_TOP ? "TOP" : + q_cur_type(q) == MQE_END ? "END" : "???"); + s64a end = q_cur_loc(q); + if (mode != NO_MATCHES) { + end = MIN(end, b_end); + } + assert(end <= (s64a) q->length); + s64a cur_end = end; + + /* we may cross the border between history and current buffer */ + if (cur_start < 0) { + cur_end = MIN(0, cur_end); + } + + DEBUG_PRINTF("start: %lli end: %lli\n", start, end); + + /* don't scan zero length buffer */ + if (cur_start != cur_end) { + const u8 * scanned = cur_buf; + char rv; + + /* if we're in nomatch mode or if we're scanning history buffer */ + if (mode == NO_MATCHES || + (cur_start < 0 && mode == CALLBACK_OUTPUT)) { + runShengNm(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, cur_buf, + cur_buf + cur_start, cur_buf + cur_end, can_die, + has_accel, single, &scanned, &state); + } else if (mode == CALLBACK_OUTPUT) { + rv = runShengCb(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, + cur_buf, cur_buf + cur_start, cur_buf + cur_end, + can_die, has_accel, single, &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG_STATE_MASK); + return MO_DEAD; + } + } else if (mode == STOP_AT_MATCH) { + rv = runShengSam(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, + cur_buf, cur_buf + cur_start, + cur_buf + cur_end, can_die, has_accel, single, + &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG_STATE_MASK); + return rv; + } else if (rv == MO_MATCHES_PENDING) { + assert(q->cur); + DEBUG_PRINTF("found a match, setting q location to %zd\n", + scanned - cur_buf + 1); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = + scanned - cur_buf + 1; /* due to exiting early */ + *(u8 *)q->state = state; + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG_STATE_MASK); + return rv; + } + } else { + assert(!"invalid scanning mode!"); + } + assert(scanned == cur_buf + cur_end); + + cur_start = cur_end; + } + + /* if we our queue event is past our end */ + if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { + DEBUG_PRINTF("current location past buffer end\n"); + DEBUG_PRINTF("setting q location to %llu\n", b_end); + DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = b_end; + *(u8 *)q->state = state; + return MO_ALIVE; + } + + /* crossing over into actual buffer */ + if (cur_start == 0) { + DEBUG_PRINTF("positive location, scanning buffer\n"); + DEBUG_PRINTF("max offset: %lli\n", b_end); + cur_buf = q->buffer; + } + + /* continue scanning the same buffer */ + if (end != cur_end) { + continue; + } + + switch (q_cur_type(q)) { + case MQE_END: + *(u8 *)q->state = state; + q->cur++; + DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); + if (can_die) { + return (state & SHENG_STATE_DEAD) ? MO_DEAD : MO_ALIVE; + } + return MO_ALIVE; + case MQE_TOP: + if (q->offset + cur_start == 0) { + DEBUG_PRINTF("Anchored start, going to state %u\n", + sh->anchored); + state = sh->anchored; + } else { + u8 new_state = get_aux(sh, state)->top; + DEBUG_PRINTF("Top event %u->%u\n", state & SHENG_STATE_MASK, + new_state & SHENG_STATE_MASK); + state = new_state; + } + break; + default: + assert(!"invalid queue event"); + break; + } + q->cur++; + } +} + +char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context) { + DEBUG_PRINTF("smallwrite Sheng\n"); + assert(n->type == SHENG_NFA_0); + const struct sheng *sh = getImplNfa(n); + u8 state = sh->anchored; + u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; + u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; + u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; + u8 cached_accept_state = 0; + ReportID cached_accept_id = 0; + + /* scan and report all matches */ + int rv; + s64a end = length; + const u8 *scanned; + + rv = runShengCb(sh, cb, context, offset, &cached_accept_state, + &cached_accept_id, buffer, buffer, buffer + end, can_die, + has_accel, single, &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG_STATE_MASK); + return MO_DEAD; + } + + DEBUG_PRINTF("%u\n", state & SHENG_STATE_MASK); + + const struct sstate_aux *aux = get_aux(sh, state); + + if (aux->accept_eod) { + DEBUG_PRINTF("Reporting EOD matches\n"); + fireReports(sh, cb, context, state, end + offset, &cached_accept_state, + &cached_accept_id, 1); + } + + return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE; +} + +char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end) { + const struct sheng *sh = get_sheng(n); + char rv = runSheng(sh, q, end, CALLBACK_OUTPUT); + return rv; +} + +char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end) { + const struct sheng *sh = get_sheng(n); + char rv = runSheng(sh, q, end, STOP_AT_MATCH); + return rv; +} + +char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report) { + assert(q_cur_type(q) == MQE_START); + + const struct sheng *sh = get_sheng(n); + char rv = runSheng(sh, q, 0 /* end */, NO_MATCHES); + + if (rv && nfaExecSheng0_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } + return rv; +} + +char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct sheng *sh = get_sheng(n); + u8 s = *(const u8 *)q->state; + DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK)); + + const struct sstate_aux *aux = get_aux(sh, s); + + if (!aux->accept) { + return 0; + } + + return shengHasAccept(sh, aux, report); +} + +char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct sheng *sh = get_sheng(n); + u8 s = *(const u8 *)q->state; + DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK)); + + const struct sstate_aux *aux = get_aux(sh, s); + return !!aux->accept; +} + +char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback cb, void *ctxt) { + assert(nfa); + + const struct sheng *sh = get_sheng(nfa); + u8 s = *(const u8 *)state; + DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG_STATE_MASK)); + + const struct sstate_aux *aux = get_aux(sh, s); + + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; + } + + return fireReports(sh, cb, ctxt, s, offset, NULL, NULL, 1); +} + +char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q) { + const struct sheng *sh = (const struct sheng *)getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u8 s = *(u8 *)q->state; + const struct sstate_aux *aux = get_aux(sh, s); + u64a offset = q_cur_offset(q); + u8 cached_state_id = 0; + ReportID cached_report_id = 0; + assert(q_cur_type(q) == MQE_START); + + if (aux->accept) { + if (sh->flags & SHENG_FLAG_SINGLE_REPORT) { + fireSingleReport(cb, ctxt, sh->report, offset); + } else { + fireReports(sh, cb, ctxt, s, offset, &cached_state_id, + &cached_report_id, 1); + } + } + + return 0; +} + +char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct sheng *sh = get_sheng(nfa); + u8 *s = (u8 *)state; + *s = offset ? sh->floating: sh->anchored; + return !(*s & SHENG_STATE_DEAD); +} + +char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 1); + + /* starting in floating state */ + const struct sheng *sh = get_sheng(nfa); + *(u8 *)q->state = sh->floating; + DEBUG_PRINTF("starting in floating state\n"); + return 0; +} + +char nfaExecSheng0_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecSheng0_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} diff --git a/src/nfa/sheng.h b/src/nfa/sheng.h new file mode 100644 index 00000000..46ead180 --- /dev/null +++ b/src/nfa/sheng.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHENG_H_ +#define SHENG_H_ + +#include "callback.h" +#include "ue2common.h" + +struct mq; +struct NFA; + +#define nfaExecSheng0_B_Reverse NFA_API_NO_IMPL +#define nfaExecSheng0_zombie_status NFA_API_ZOMBIE_NO_IMPL + +char nfaExecSheng0_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng0_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng0_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecSheng0_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecSheng0_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecSheng0_queueInitState(const struct NFA *nfa, struct mq *q); +char nfaExecSheng0_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc); +char nfaExecSheng0_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); +char nfaExecSheng0_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, u8 key); +char nfaExecSheng0_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecSheng0_reportCurrent(const struct NFA *n, struct mq *q); + +char nfaExecSheng0_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context); + +#endif /* SHENG_H_ */ diff --git a/src/nfa/sheng_defs.h b/src/nfa/sheng_defs.h new file mode 100644 index 00000000..26bdbcee --- /dev/null +++ b/src/nfa/sheng_defs.h @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHENG_DEFS_H +#define SHENG_DEFS_H + +/* + * Utility functions used by various versions of Sheng engine + */ +static really_inline +u8 isDeadState(const u8 a) { + return a & SHENG_STATE_DEAD; +} + +static really_inline +u8 isAcceptState(const u8 a) { + return a & SHENG_STATE_ACCEPT; +} + +static really_inline +u8 isAccelState(const u8 a) { + return a & SHENG_STATE_ACCEL; +} + +static really_inline +u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) { + return (a | b | c | d) & (SHENG_STATE_FLAG_MASK); +} + +/* these functions should be optimized out, used by NO_MATCHES mode */ +static really_inline +u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c, + UNUSED const u8 d) { + return 0; +} + +static really_inline +u8 dummyFunc(UNUSED const u8 a) { + return 0; +} + +/* + * Sheng function definitions for single byte loops + */ +/* callback output, can die */ +#define SHENG_IMPL sheng_cod +#define DEAD_FUNC isDeadState +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 0 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* callback output, can't die */ +#define SHENG_IMPL sheng_co +#define DEAD_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 0 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* stop at match, can die */ +#define SHENG_IMPL sheng_samd +#define DEAD_FUNC isDeadState +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 1 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* stop at match, can't die */ +#define SHENG_IMPL sheng_sam +#define DEAD_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 1 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* no match, can die */ +#define SHENG_IMPL sheng_nmd +#define DEAD_FUNC isDeadState +#define ACCEPT_FUNC dummyFunc +#define STOP_AT_MATCH 0 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* no match, can't die */ +#define SHENG_IMPL sheng_nm +#define DEAD_FUNC dummyFunc +#define ACCEPT_FUNC dummyFunc +#define STOP_AT_MATCH 0 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* + * Sheng function definitions for 4-byte loops + */ +/* callback output, can die, accelerated */ +#define SHENG_IMPL sheng4_coda +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC isDeadState +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC isAccelState +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* callback output, can die, not accelerated */ +#define SHENG_IMPL sheng4_cod +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC isDeadState +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* callback output, can't die, accelerated */ +#define SHENG_IMPL sheng4_coa +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC isAccelState +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* callback output, can't die, not accelerated */ +#define SHENG_IMPL sheng4_co +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* stop at match, can die, accelerated */ +#define SHENG_IMPL sheng4_samda +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC isDeadState +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC isAccelState +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 1 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* stop at match, can die, not accelerated */ +#define SHENG_IMPL sheng4_samd +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC isDeadState +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 1 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* stop at match, can't die, accelerated */ +#define SHENG_IMPL sheng4_sama +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC isAccelState +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 1 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* stop at match, can't die, not accelerated */ +#define SHENG_IMPL sheng4_sam +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState +#define STOP_AT_MATCH 1 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* no-match have interesting func as dummy, and die/accel checks are outer */ + +/* no match, can die, accelerated */ +#define SHENG_IMPL sheng4_nmda +#define INTERESTING_FUNC dummyFunc4 +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC isDeadState +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC isAccelState +#define ACCEPT_FUNC dummyFunc +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* no match, can die, not accelerated */ +#define SHENG_IMPL sheng4_nmd +#define INTERESTING_FUNC dummyFunc4 +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC isDeadState +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC dummyFunc +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +/* there is no performance benefit in accelerating a no-match case that can't + * die */ + +/* no match, can't die */ +#define SHENG_IMPL sheng4_nm +#define INTERESTING_FUNC dummyFunc4 +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC dummyFunc +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC +#undef STOP_AT_MATCH + +#endif // SHENG_DEFS_H diff --git a/src/nfa/sheng_impl.h b/src/nfa/sheng_impl.h new file mode 100644 index 00000000..fc3e54aa --- /dev/null +++ b/src/nfa/sheng_impl.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * In order to use this macro, the following things need to be defined: + * + * - SHENG_IMPL (name of the Sheng implementation function) + * - DEAD_FUNC (name of the function checking for dead states) + * - ACCEPT_FUNC (name of the function checking for accept state) + * - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match) + */ + +/* byte-by-byte version. we don't do byte-by-byte death checking as it's + * pretty pointless to do it over a buffer that's at most 3 bytes long */ +static really_inline +char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, + u8 *const cached_accept_state, ReportID *const cached_accept_id, + u8 single, u64a base_offset, const u8 *buf, const u8 *start, + const u8 *end, const u8 **scan_end) { + DEBUG_PRINTF("Starting DFA execution in state %u\n", + *state & SHENG_STATE_MASK); + const u8 *cur_buf = start; + if (DEAD_FUNC(*state)) { + DEBUG_PRINTF("Dead on arrival\n"); + *scan_end = end; + return MO_CONTINUE_MATCHING; + } + DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); + + m128 cur_state = set16x8(*state); + const m128 *masks = s->shuffle_masks; + + while (likely(cur_buf != end)) { + const u8 c = *cur_buf; + const m128 shuffle_mask = masks[c]; + cur_state = pshufb(shuffle_mask, cur_state); + const u8 tmp = movd(cur_state); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", tmp, (tmp & 0xF0) >> 4, + tmp & 0xF); + + if (unlikely(ACCEPT_FUNC(tmp))) { + DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG_STATE_MASK); + u64a match_offset = base_offset + (cur_buf - buf) + 1; + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (u64a)(cur_buf - start)); + *state = tmp; + *scan_end = cur_buf; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, tmp, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + cur_buf++; + } + *state = movd(cur_state); + *scan_end = cur_buf; + return MO_CONTINUE_MATCHING; +} diff --git a/src/nfa/sheng_impl4.h b/src/nfa/sheng_impl4.h new file mode 100644 index 00000000..2561e52d --- /dev/null +++ b/src/nfa/sheng_impl4.h @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * In order to use this macro, the following things need to be defined: + * + * - SHENG_IMPL (name of the Sheng implementation function) + * - INTERESTING_FUNC (name of the function checking for accept, accel or dead + * states) + * - INNER_DEAD_FUNC (name of the inner function checking for dead states) + * - OUTER_DEAD_FUNC (name of the outer function checking for dead states) + * - INNER_ACCEL_FUNC (name of the inner function checking for accel states) + * - OUTER_ACCEL_FUNC (name of the outer function checking for accel states) + * - ACCEPT_FUNC (name of the function checking for accept state) + * - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match) + */ + +/* unrolled 4-byte-at-a-time version. + * + * we put innerDeadFunc inside interestingFunc() block so that we don't pay for + * dead states checking. however, if interestingFunc is dummy, innerDeadFunc + * gets lost with it, so we need an additional check outside the + * interestingFunc() branch - it's normally dummy so we don't pay for it, but + * when interestingFunc is dummy, outerDeadFunc should be set if we want to + * check for dead states. + * + * also, deadFunc only checks the last known state, but since we can't ever get + * out of the dead state and we don't really care where we died, it's not a + * problem. + */ +static really_inline +char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, + u8 *const cached_accept_state, ReportID *const cached_accept_id, + u8 single, u64a base_offset, const u8 *buf, const u8 *start, + const u8 *end, const u8 **scan_end) { + DEBUG_PRINTF("Starting DFAx4 execution in state %u\n", + *state & SHENG_STATE_MASK); + const u8 *cur_buf = start; + const u8 *min_accel_dist = start; + base_offset++; + DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start)); + + if (INNER_ACCEL_FUNC(*state) || OUTER_ACCEL_FUNC(*state)) { + DEBUG_PRINTF("Accel state reached @ 0\n"); + const union AccelAux *aaux = get_accel(s, *state & SHENG_STATE_MASK); + const u8 *new_offset = run_accel(aaux, cur_buf, end); + if (new_offset < cur_buf + BAD_ACCEL_DIST) { + min_accel_dist = new_offset + BIG_ACCEL_PENALTY; + } else { + min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; + } + DEBUG_PRINTF("Next accel chance: %llu\n", + (u64a)(min_accel_dist - start)); + DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf); + cur_buf = new_offset; + DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start)); + } + if (INNER_DEAD_FUNC(*state) || OUTER_DEAD_FUNC(*state)) { + DEBUG_PRINTF("Dead on arrival\n"); + *scan_end = end; + return MO_CONTINUE_MATCHING; + } + + m128 cur_state = set16x8(*state); + const m128 *masks = s->shuffle_masks; + + while (likely(end - cur_buf >= 4)) { + const u8 *b1 = cur_buf; + const u8 *b2 = cur_buf + 1; + const u8 *b3 = cur_buf + 2; + const u8 *b4 = cur_buf + 3; + const u8 c1 = *b1; + const u8 c2 = *b2; + const u8 c3 = *b3; + const u8 c4 = *b4; + + const m128 shuffle_mask1 = masks[c1]; + cur_state = pshufb(shuffle_mask1, cur_state); + const u8 a1 = movd(cur_state); + + const m128 shuffle_mask2 = masks[c2]; + cur_state = pshufb(shuffle_mask2, cur_state); + const u8 a2 = movd(cur_state); + + const m128 shuffle_mask3 = masks[c3]; + cur_state = pshufb(shuffle_mask3, cur_state); + const u8 a3 = movd(cur_state); + + const m128 shuffle_mask4 = masks[c4]; + cur_state = pshufb(shuffle_mask4, cur_state); + const u8 a4 = movd(cur_state); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a1, (a1 & 0xF0) >> 4, a1 & 0xF); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a2, (a2 & 0xF0) >> 4, a2 & 0xF); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a3, (a3 & 0xF0) >> 4, a3 & 0xF); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a4, (a4 & 0xF0) >> 4, a4 & 0xF); + + if (unlikely(INTERESTING_FUNC(a1, a2, a3, a4))) { + if (ACCEPT_FUNC(a1)) { + u64a match_offset = base_offset + b1 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a1 & SHENG_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b1 - start)); + *scan_end = b1; + *state = a1; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, a1, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC(a2)) { + u64a match_offset = base_offset + b2 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a2 & SHENG_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b2 - start)); + *scan_end = b2; + *state = a2; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, a2, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC(a3)) { + u64a match_offset = base_offset + b3 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a3 & SHENG_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b3 - start)); + *scan_end = b3; + *state = a3; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, a3, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC(a4)) { + u64a match_offset = base_offset + b4 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a4 & SHENG_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b4 - start)); + *scan_end = b4; + *state = a4; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, a4, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (INNER_DEAD_FUNC(a4)) { + DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf)); + *scan_end = end; + *state = a4; + return MO_CONTINUE_MATCHING; + } + if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC(a4)) { + DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); + const union AccelAux *aaux = + get_accel(s, a4 & SHENG_STATE_MASK); + const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); + if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { + min_accel_dist = new_offset + BIG_ACCEL_PENALTY; + } else { + min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; + } + DEBUG_PRINTF("Next accel chance: %llu\n", + (u64a)(min_accel_dist - start)); + DEBUG_PRINTF("Accel scanned %llu bytes\n", + (u64a)(new_offset - cur_buf - 4)); + cur_buf = new_offset; + DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); + continue; + } + } + if (OUTER_DEAD_FUNC(a4)) { + DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf)); + *scan_end = end; + *state = a4; + return MO_CONTINUE_MATCHING; + }; + if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC(a4)) { + DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); + const union AccelAux *aaux = get_accel(s, a4 & SHENG_STATE_MASK); + const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); + if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { + min_accel_dist = new_offset + BIG_ACCEL_PENALTY; + } else { + min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; + } + DEBUG_PRINTF("Next accel chance: %llu\n", + (u64a)(min_accel_dist - start)); + DEBUG_PRINTF("Accel scanned %llu bytes\n", + (u64a)(new_offset - cur_buf - 4)); + cur_buf = new_offset; + DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); + continue; + }; + cur_buf += 4; + } + *state = movd(cur_state); + *scan_end = cur_buf; + return MO_CONTINUE_MATCHING; +} diff --git a/src/nfa/limex_simd512c.c b/src/nfa/sheng_internal.h similarity index 65% rename from src/nfa/limex_simd512c.c rename to src/nfa/sheng_internal.h index 0918fca5..046eb759 100644 --- a/src/nfa/limex_simd512c.c +++ b/src/nfa/sheng_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,44 +26,45 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file - * \brief LimEx NFA: 512-bit SIMD runtime implementations. - */ +#ifndef SHENG_INTERNAL_H_ +#define SHENG_INTERNAL_H_ -//#define DEBUG_INPUT -//#define DEBUG_EXCEPTIONS - -#include "limex.h" - -#include "accel.h" -#include "limex_internal.h" -#include "nfa_internal.h" #include "ue2common.h" -#include "util/bitutils.h" #include "util/simd_utils.h" -// Common code -#include "limex_runtime.h" +#define SHENG_STATE_ACCEPT 0x10 +#define SHENG_STATE_DEAD 0x20 +#define SHENG_STATE_ACCEL 0x40 +#define SHENG_STATE_MASK 0xF +#define SHENG_STATE_FLAG_MASK 0x70 -#define SIZE 512 -#define STATE_T m512 -#include "limex_exceptional.h" +#define SHENG_FLAG_SINGLE_REPORT 0x1 +#define SHENG_FLAG_CAN_DIE 0x2 +#define SHENG_FLAG_HAS_ACCEL 0x4 -#define SIZE 512 -#define STATE_T m512 -#include "limex_state_impl.h" +struct report_list { + u32 count; + ReportID report[]; +}; -#define SIZE 512 -#define STATE_T m512 -#define INLINE_ATTR really_inline -#include "limex_common_impl.h" +struct sstate_aux { + u32 accept; + u32 accept_eod; + u32 accel; + u32 top; +}; -#define SIZE 512 -#define STATE_T m512 -#define SHIFT 6 -#include "limex_runtime_impl.h" +struct sheng { + m128 shuffle_masks[256]; + u32 length; + u32 aux_offset; + u32 report_offset; + u32 accel_offset; + u8 n_states; + u8 anchored; + u8 floating; + u8 flags; + ReportID report; +}; -#define SIZE 512 -#define STATE_T m512 -#define SHIFT 7 -#include "limex_runtime_impl.h" +#endif /* SHENG_INTERNAL_H_ */ diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp new file mode 100644 index 00000000..911f6d70 --- /dev/null +++ b/src/nfa/shengcompile.cpp @@ -0,0 +1,541 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "shengcompile.h" + +#include "accel.h" +#include "accelcompile.h" +#include "shufticompile.h" +#include "trufflecompile.h" +#include "util/alloc.h" +#include "util/bitutils.h" +#include "util/charreach.h" +#include "util/compare.h" +#include "util/container.h" +#include "util/order_check.h" +#include "util/report_manager.h" +#include "util/unaligned.h" + +#include "grey.h" +#include "nfa_internal.h" +#include "sheng_internal.h" +#include "ue2common.h" +#include "util/compile_context.h" +#include "util/make_unique.h" +#include "util/verify_types.h" +#include "util/simd_utils.h" + +#include +#include +#include + +#include + +using namespace std; +using boost::adaptors::map_keys; + +namespace ue2 { + +#define ACCEL_DFA_MAX_OFFSET_DEPTH 4 + +/** Maximum tolerated number of escape character from an accel state. + * This is larger than nfa, as we don't have a budget and the nfa cheats on stop + * characters for sets of states */ +#define ACCEL_DFA_MAX_STOP_CHAR 160 + +/** Maximum tolerated number of escape character from a sds accel state. Larger + * than normal states as accelerating sds is important. Matches NFA value */ +#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 + +struct dfa_info { + accel_dfa_build_strat &strat; + raw_dfa &raw; + vector &states; + dstate &floating; + dstate &anchored; + bool can_die; + + explicit dfa_info(accel_dfa_build_strat &s) + : strat(s), raw(strat.get_raw()), states(raw.states), + floating(states[raw.start_floating]), + anchored(states[raw.start_anchored]), can_die(dfaCanDie(raw)) {} + + // returns adjusted size + size_t size() const { + return can_die ? states.size() : states.size() - 1; + } + // expects adjusted index + dstate &operator[](dstate_id_t idx) { + return states[raw_id(idx)]; + } + dstate &top(dstate_id_t idx) { + if (isDead(idx)) { + return floating; + } + return next(idx, TOP); + } + dstate &next(dstate_id_t idx, u16 chr) { + auto &src = (*this)[idx]; + auto next_id = src.next[raw.alpha_remap[chr]]; + return states[next_id]; + } + // get original idx from adjusted idx + dstate_id_t raw_id(dstate_id_t idx) { + assert(idx < size()); + // if DFA can't die, shift all indices left by 1 + return can_die ? idx : idx + 1; + } + bool isDead(dstate &state) { + return raw_id(state.impl_id) == DEAD_STATE; + } + bool isDead(dstate_id_t idx) { + return raw_id(idx) == DEAD_STATE; + } + +private: + static bool dfaCanDie(raw_dfa &rdfa) { + for (unsigned chr = 0; chr < 256; chr++) { + for (dstate_id_t state = 0; state < rdfa.states.size(); state++) { + auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]]; + if (succ == DEAD_STATE) { + return true; + } + } + } + return false; + } +}; + +namespace { + +struct raw_report_list { + flat_set reports; + + raw_report_list(const flat_set &reports_in, + const ReportManager &rm, bool do_remap) { + if (do_remap) { + for (auto &id : reports_in) { + reports.insert(rm.getProgramOffset(id)); + } + } else { + reports = reports_in; + } + } + + bool operator<(const raw_report_list &b) const { + return reports < b.reports; + } +}; + +struct raw_report_info_impl : public raw_report_info { + vector rl; + u32 getReportListSize() const override; + size_t size() const override; + void fillReportLists(NFA *n, size_t base_offset, + std::vector &ro /* out */) const override; +}; +} + +u32 raw_report_info_impl::getReportListSize() const { + u32 rv = 0; + + for (const auto &reps : rl) { + rv += sizeof(report_list); + rv += sizeof(ReportID) * reps.reports.size(); + } + + return rv; +} + +size_t raw_report_info_impl::size() const { + return rl.size(); +} + +void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, + vector &ro) const { + for (const auto &reps : rl) { + ro.push_back(base_offset); + + report_list *p = (report_list *)((char *)n + base_offset); + + u32 i = 0; + for (const ReportID report : reps.reports) { + p->report[i++] = report; + } + p->count = verify_u32(reps.reports.size()); + + base_offset += sizeof(report_list); + base_offset += sizeof(ReportID) * reps.reports.size(); + } +} + +unique_ptr sheng_build_strat::gatherReports( + vector &reports, + vector &reports_eod, + u8 *isSingleReport, + ReportID *arbReport) const { + DEBUG_PRINTF("gathering reports\n"); + + const bool remap_reports = has_managed_reports(rdfa.kind); + + auto ri = ue2::make_unique(); + map rev; + + for (const dstate &s : rdfa.states) { + if (s.reports.empty()) { + reports.push_back(MO_INVALID_IDX); + continue; + } + + raw_report_list rrl(s.reports, rm, remap_reports); + DEBUG_PRINTF("non empty r\n"); + if (rev.find(rrl) != rev.end()) { + reports.push_back(rev[rrl]); + } else { + DEBUG_PRINTF("adding to rl %zu\n", ri->size()); + rev[rrl] = ri->size(); + reports.push_back(ri->size()); + ri->rl.push_back(rrl); + } + } + + for (const dstate &s : rdfa.states) { + if (s.reports_eod.empty()) { + reports_eod.push_back(MO_INVALID_IDX); + continue; + } + + DEBUG_PRINTF("non empty r eod\n"); + raw_report_list rrl(s.reports_eod, rm, remap_reports); + if (rev.find(rrl) != rev.end()) { + reports_eod.push_back(rev[rrl]); + continue; + } + + DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size()); + rev[rrl] = ri->size(); + reports_eod.push_back(ri->size()); + ri->rl.push_back(rrl); + } + + assert(!ri->rl.empty()); /* all components should be able to generate + reports */ + if (!ri->rl.empty()) { + *arbReport = *ri->rl.begin()->reports.begin(); + } else { + *arbReport = 0; + } + + /* if we have only a single report id generated from all accepts (not eod) + * we can take some short cuts */ + set reps; + + for (u32 rl_index : reports) { + if (rl_index == MO_INVALID_IDX) { + continue; + } + assert(rl_index < ri->size()); + insert(&reps, ri->rl[rl_index].reports); + } + + if (reps.size() == 1) { + *isSingleReport = 1; + *arbReport = *reps.begin(); + DEBUG_PRINTF("single -- %u\n", *arbReport); + } else { + *isSingleReport = 0; + } + + return move(ri); +} + +u32 sheng_build_strat::max_allowed_offset_accel() const { + return ACCEL_DFA_MAX_OFFSET_DEPTH; +} + +u32 sheng_build_strat::max_stop_char() const { + return ACCEL_DFA_MAX_STOP_CHAR; +} + +u32 sheng_build_strat::max_floating_stop_char() const { + return ACCEL_DFA_MAX_FLOATING_STOP_CHAR; +} + +size_t sheng_build_strat::accelSize() const { + return sizeof(AccelAux); +} + +#ifdef DEBUG +static really_inline +void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) { + stringstream o; + + for (unsigned i = 0; i < sz; i++) { + o.width(2); + o << (buf[i] & SHENG_STATE_MASK) << " "; + } + DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str()); +} +#endif + +static +void fillAccelOut(const map &accel_escape_info, + set *accel_states) { + for (dstate_id_t i : accel_escape_info | map_keys) { + accel_states->insert(i); + } +} + +static +u8 getShengState(dstate &state, dfa_info &info, + map &accelInfo) { + u8 s = state.impl_id; + if (!state.reports.empty()) { + s |= SHENG_STATE_ACCEPT; + } + if (info.isDead(state)) { + s |= SHENG_STATE_DEAD; + } + if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) { + s |= SHENG_STATE_ACCEL; + } + return s; +} + +static +void fillAccelAux(struct NFA *n, dfa_info &info, + map &accelInfo) { + DEBUG_PRINTF("Filling accel aux structures\n"); + sheng *s = (sheng *)getMutableImplNfa(n); + u32 offset = s->accel_offset; + + for (dstate_id_t i = 0; i < info.size(); i++) { + dstate_id_t state_id = info.raw_id(i); + if (accelInfo.find(state_id) != accelInfo.end()) { + s->flags |= SHENG_FLAG_HAS_ACCEL; + AccelAux *aux = (AccelAux *)((char *)n + offset); + info.strat.buildAccel(state_id, accelInfo[state_id], aux); + sstate_aux *saux = + (sstate_aux *)((char *)n + s->aux_offset) + state_id; + saux->accel = offset; + DEBUG_PRINTF("Accel offset: %u\n", offset); + offset += ROUNDUP_N(sizeof(AccelAux), alignof(AccelAux)); + } + } +} + +static +void populateBasicInfo(struct NFA *n, dfa_info &info, + map &accelInfo, u32 aux_offset, + u32 report_offset, u32 accel_offset, u32 total_size, + u32 dfa_size) { + n->length = total_size; + n->scratchStateSize = 1; + n->streamStateSize = 1; + n->nPositions = info.size(); + n->type = SHENG_NFA_0; + n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0; + + sheng *s = (sheng *)getMutableImplNfa(n); + s->aux_offset = aux_offset; + s->report_offset = report_offset; + s->accel_offset = accel_offset; + s->n_states = info.size(); + s->length = dfa_size; + s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0; + + s->anchored = getShengState(info.anchored, info, accelInfo); + s->floating = getShengState(info.floating, info, accelInfo); +} + +static +void fillTops(NFA *n, dfa_info &info, dstate_id_t id, + map &accelInfo) { + sheng *s = (sheng *)getMutableImplNfa(n); + u32 aux_base = s->aux_offset; + + DEBUG_PRINTF("Filling tops for state %u\n", id); + + sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id; + + DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id, + (char *)aux - (char *)n); + + /* we could conceivably end up in an accept/dead state on a top event, + * so mark top as accept/dead state if it indeed is. + */ + auto &top_state = info.top(id); + + DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id); + + aux->top = getShengState(top_state, info, accelInfo); +} + +static +void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector &reports, + vector &reports_eod, vector &report_offsets) { + sheng *s = (sheng *)getMutableImplNfa(n); + u32 aux_base = s->aux_offset; + auto raw_id = info.raw_id(id); + + auto &state = info[id]; + + sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id; + + DEBUG_PRINTF("Filling aux and report structures for state %u\n", id); + DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id, + (char *)aux - (char *)n); + + aux->accept = state.reports.empty() ? 0 : report_offsets[reports[raw_id]]; + aux->accept_eod = + state.reports_eod.empty() ? 0 : report_offsets[reports_eod[raw_id]]; + + DEBUG_PRINTF("Report list offset: %u\n", aux->accept); + DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod); +} + +static +void fillSingleReport(NFA *n, ReportID r_id) { + sheng *s = (sheng *)getMutableImplNfa(n); + + DEBUG_PRINTF("Single report ID: %u\n", r_id); + s->report = r_id; + s->flags |= SHENG_FLAG_SINGLE_REPORT; +} + +static +void createShuffleMasks(sheng *s, dfa_info &info, + map &accelInfo) { + for (u16 chr = 0; chr < 256; chr++) { + u8 buf[16] = {0}; + + for (dstate_id_t idx = 0; idx < info.size(); idx++) { + auto &succ_state = info.next(idx, chr); + + buf[idx] = getShengState(succ_state, info, accelInfo); + } +#ifdef DEBUG + dumpShuffleMask(chr, buf, sizeof(buf)); +#endif + m128 mask = loadu128(buf); + s->shuffle_masks[chr] = mask; + } +} + +bool has_accel_sheng(const NFA *nfa) { + const sheng *s = (const sheng *)getImplNfa(nfa); + return s->flags & SHENG_FLAG_HAS_ACCEL; +} + +aligned_unique_ptr shengCompile(raw_dfa &raw, + const CompileContext &cc, + const ReportManager &rm, + set *accel_states) { + if (!cc.grey.allowSheng) { + DEBUG_PRINTF("Sheng is not allowed!\n"); + return nullptr; + } + + sheng_build_strat strat(raw, rm); + dfa_info info(strat); + + DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size()); + + DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n", + raw.start_anchored, raw.start_floating); + + DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n", + info.can_die ? "can" : "cannot", info.size()); + if (info.size() > 16) { + DEBUG_PRINTF("Too many states\n"); + return nullptr; + } + + if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming + * mode with our semantics */ + raw.stripExtraEodReports(); + } + auto accelInfo = strat.getAccelInfo(cc.grey); + + // set impl_id of each dfa state + for (dstate_id_t i = 0; i < info.size(); i++) { + info[i].impl_id = i; + } + + DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n", + info.anchored.impl_id, info.floating.impl_id); + + u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(sheng)); + vector reports, eod_reports, report_offsets; + u8 isSingle = 0; + ReportID single_report = 0; + + auto ri = + strat.gatherReports(reports, eod_reports, &isSingle, &single_report); + + u32 total_aux = sizeof(sstate_aux) * info.size(); + u32 total_accel = strat.accelSize() * accelInfo.size(); + u32 total_reports = ri->getReportListSize(); + + u32 reports_offset = nfa_size + total_aux; + u32 accel_offset = + ROUNDUP_N(reports_offset + total_reports, alignof(AccelAux)); + u32 total_size = ROUNDUP_N(accel_offset + total_accel, 64); + + DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n", + nfa_size, total_aux, total_reports, total_accel, total_size); + + aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + + populateBasicInfo(nfa.get(), info, accelInfo, nfa_size, reports_offset, + accel_offset, total_size, total_size - sizeof(NFA)); + + DEBUG_PRINTF("Setting up aux and report structures\n"); + + ri->fillReportLists(nfa.get(), reports_offset, report_offsets); + + for (dstate_id_t idx = 0; idx < info.size(); idx++) { + fillTops(nfa.get(), info, idx, accelInfo); + fillAux(nfa.get(), info, idx, reports, eod_reports, report_offsets); + } + if (isSingle) { + fillSingleReport(nfa.get(), single_report); + } + + fillAccelAux(nfa.get(), info, accelInfo); + + if (accel_states) { + fillAccelOut(accelInfo, accel_states); + } + + createShuffleMasks((sheng *)getMutableImplNfa(nfa.get()), info, accelInfo); + + return nfa; +} + +} // namespace ue2 diff --git a/src/nfa/shengcompile.h b/src/nfa/shengcompile.h new file mode 100644 index 00000000..873b7c75 --- /dev/null +++ b/src/nfa/shengcompile.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHENGCOMPILE_H_ +#define SHENGCOMPILE_H_ + +#include "accel_dfa_build_strat.h" +#include "rdfa.h" +#include "util/alloc.h" +#include "util/charreach.h" +#include "util/ue2_containers.h" + +struct NFA; + +namespace ue2 { + +class ReportManager; +struct CompileContext; +struct raw_dfa; + +class sheng_build_strat : public accel_dfa_build_strat { +public: + sheng_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in) + : accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {} + raw_dfa &get_raw() const override { return rdfa; } + std::unique_ptr gatherReports( + std::vector &reports /* out */, + std::vector &reports_eod /* out */, + u8 *isSingleReport /* out */, + ReportID *arbReport /* out */) const override; + size_t accelSize(void) const override; + u32 max_allowed_offset_accel() const override; + u32 max_stop_char() const override; + u32 max_floating_stop_char() const override; + +private: + raw_dfa &rdfa; +}; + +aligned_unique_ptr +shengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, + std::set *accel_states = nullptr); + +struct sheng_escape_info { + CharReach outs; + CharReach outs2_single; + flat_set> outs2; + bool outs2_broken = false; +}; + +bool has_accel_sheng(const NFA *nfa); + +} // namespace ue2 + +#endif /* SHENGCOMPILE_H_ */ diff --git a/src/nfa/shengdump.cpp b/src/nfa/shengdump.cpp new file mode 100644 index 00000000..037dfb05 --- /dev/null +++ b/src/nfa/shengdump.cpp @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "shengdump.h" + +#include "accel_dump.h" +#include "nfa_dump_internal.h" +#include "nfa_internal.h" +#include "sheng_internal.h" +#include "rdfa.h" +#include "ue2common.h" +#include "util/charreach.h" +#include "util/dump_charclass.h" +#include "util/simd_utils.h" + + +#ifndef DUMP_SUPPORT +#error No dump support! +#endif + +using namespace std; + +namespace ue2 { + +static +const sstate_aux *get_aux(const NFA *n, dstate_id_t i) { + assert(n && isShengType(n->type)); + + const sheng *s = (const sheng *)getImplNfa(n); + const sstate_aux *aux_base = + (const sstate_aux *)((const char *)n + s->aux_offset); + + const sstate_aux *aux = aux_base + i; + + assert((const char *)aux < (const char *)s + s->length); + + return aux; +} + +static +void dumpHeader(FILE *f, const sheng *s) { + fprintf(f, "number of states: %u, DFA engine size: %u\n", s->n_states, + s->length); + fprintf(f, "aux base offset: %u, reports base offset: %u, " + "accel offset: %u\n", + s->aux_offset, s->report_offset, s->accel_offset); + fprintf(f, "anchored start state: %u, floating start state: %u\n", + s->anchored & SHENG_STATE_MASK, s->floating & SHENG_STATE_MASK); + fprintf(f, "has accel: %u can die: %u single report: %u\n", + !!(s->flags & SHENG_FLAG_HAS_ACCEL), + !!(s->flags & SHENG_FLAG_CAN_DIE), + !!(s->flags & SHENG_FLAG_SINGLE_REPORT)); +} + +static +void dumpAux(FILE *f, u32 state, const sstate_aux *aux) { + fprintf(f, "state id: %u, reports offset: %u, EOD reports offset: %u, " + "accel offset: %u, top: %u\n", + state, aux->accept, aux->accept_eod, aux->accel, + aux->top & SHENG_STATE_MASK); +} + +static +void dumpReports(FILE *f, const report_list *rl) { + fprintf(f, "reports count: %u\n", rl->count); + for (u32 i = 0; i < rl->count; i++) { + fprintf(f, " report: %u, report ID: %u\n", i, rl->report[i]); + } +} + +static +void dumpMasks(FILE *f, const sheng *s) { + for (u32 chr = 0; chr < 256; chr++) { + u8 buf[16]; + m128 shuffle_mask = s->shuffle_masks[chr]; + store128(buf, shuffle_mask); + + fprintf(f, "%3u: ", chr); + for (u32 pos = 0; pos < 16; pos++) { + u8 c = buf[pos]; + if (c & SHENG_STATE_FLAG_MASK) { + fprintf(f, "%2u* ", c & SHENG_STATE_MASK); + } else { + fprintf(f, "%2u ", c & SHENG_STATE_MASK); + } + } + fprintf(f, "\n"); + } +} + +void nfaExecSheng0_dumpText(const NFA *nfa, FILE *f) { + assert(nfa->type == SHENG_NFA_0); + const sheng *s = (const sheng *)getImplNfa(nfa); + + fprintf(f, "sheng DFA\n"); + dumpHeader(f, s); + + for (u32 state = 0; state < s->n_states; state++) { + const sstate_aux *aux = get_aux(nfa, state); + dumpAux(f, state, aux); + if (aux->accept) { + fprintf(f, "report list:\n"); + const report_list *rl = + (const report_list *)((const char *)nfa + aux->accept); + dumpReports(f, rl); + } + if (aux->accept_eod) { + fprintf(f, "EOD report list:\n"); + const report_list *rl = + (const report_list *)((const char *)nfa + aux->accept_eod); + dumpReports(f, rl); + } + if (aux->accel) { + fprintf(f, "accel:\n"); + const AccelAux *accel = + (const AccelAux *)((const char *)nfa + aux->accel); + dumpAccelInfo(f, *accel); + } + } + + fprintf(f, "\n"); + + dumpMasks(f, s); + + fprintf(f, "\n"); +} + +static +void dumpDotPreambleDfa(FILE *f) { + dumpDotPreamble(f); + + // DFA specific additions. + fprintf(f, "STARTF [style=invis];\n"); + fprintf(f, "STARTA [style=invis];\n"); + fprintf(f, "0 [style=invis];\n"); +} + +static +void describeNode(const NFA *n, const sheng *s, u16 i, FILE *f) { + const sstate_aux *aux = get_aux(n, i); + + fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, " + "label = \"%u\" ]; \n", + i, i); + + if (aux->accept_eod) { + fprintf(f, "%u [ color = darkorchid ];\n", i); + } + + if (aux->accept) { + fprintf(f, "%u [ shape = doublecircle ];\n", i); + } + + if (aux->top && (aux->top & SHENG_STATE_MASK) != i) { + fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i, + aux->top & SHENG_STATE_MASK); + } + + if (i == (s->anchored & SHENG_STATE_MASK)) { + fprintf(f, "STARTA -> %u [color = blue ]\n", i); + } + + if (i == (s->floating & SHENG_STATE_MASK)) { + fprintf(f, "STARTF -> %u [color = red ]\n", i); + } +} + +static +void describeEdge(FILE *f, const u16 *t, u16 i) { + for (u16 s = 0; s < N_CHARS; s++) { + if (!t[s]) { + continue; + } + + u16 ss; + for (ss = 0; ss < s; ss++) { + if (t[s] == t[ss]) { + break; + } + } + + if (ss != s) { + continue; + } + + CharReach reach; + for (ss = s; ss < 256; ss++) { + if (t[s] == t[ss]) { + reach.set(ss); + } + } + + fprintf(f, "%u -> %u [ label = \"", i, t[s]); + + describeClass(f, reach, 5, CC_OUT_DOT); + + fprintf(f, "\" ];\n"); + } +} + +static +void shengGetTransitions(const NFA *n, u16 state, u16 *t) { + assert(isShengType(n->type)); + const sheng *s = (const sheng *)getImplNfa(n); + const sstate_aux *aux = get_aux(n, state); + + for (unsigned i = 0; i < N_CHARS; i++) { + u8 buf[16]; + m128 shuffle_mask = s->shuffle_masks[i]; + + store128(buf, shuffle_mask); + + t[i] = buf[state] & SHENG_STATE_MASK; + } + + t[TOP] = aux->top & SHENG_STATE_MASK; +} + +void nfaExecSheng0_dumpDot(const NFA *nfa, FILE *f, const string &) { + assert(nfa->type == SHENG_NFA_0); + const sheng *s = (const sheng *)getImplNfa(nfa); + + dumpDotPreambleDfa(f); + + for (u16 i = 1; i < s->n_states; i++) { + describeNode(nfa, s, i, f); + + u16 t[ALPHABET_SIZE]; + + shengGetTransitions(nfa, i, t); + + describeEdge(f, t, i); + } + + fprintf(f, "}\n"); +} + +} // namespace ue2 diff --git a/src/util/simd_utils_ssse3.c b/src/nfa/shengdump.h similarity index 79% rename from src/util/simd_utils_ssse3.c rename to src/nfa/shengdump.h index 50cbe007..5334894f 100644 --- a/src/util/simd_utils_ssse3.c +++ b/src/nfa/shengdump.h @@ -26,15 +26,24 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "simd_utils_ssse3.h" +#ifndef SHENGDUMP_H_ +#define SHENGDUMP_H_ -const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = { - 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, - 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, +#ifdef DUMP_SUPPORT - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, +#include +#include - 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, - 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, -}; +struct NFA; + +namespace ue2 { + +void nfaExecSheng0_dumpDot(const struct NFA *nfa, FILE *file, + const std::string &base); +void nfaExecSheng0_dumpText(const struct NFA *nfa, FILE *file); + +} // namespace ue2 + +#endif // DUMP_SUPPORT + +#endif /* SHENGDUMP_H_ */ diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index b1fec488..903e04da 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,8 +40,6 @@ #include "shufti_common.h" -#include "util/simd_utils_ssse3.h" - /** \brief Naive byte-by-byte implementation. */ static really_inline const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf, @@ -235,7 +233,7 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, m128 c2_lo = pshufb(mask2_lo, chars_lo); m128 c2_hi = pshufb(mask2_hi, chars_hi); - m128 t2 = or128(t, shiftRight8Bits(or128(c2_lo, c2_hi))); + m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1)); #ifdef DEBUG DEBUG_PRINTF(" c2_lo: "); dumpMsk128(c2_lo); printf("\n"); @@ -472,7 +470,7 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi, m256 c2_lo = vpshufb(mask2_lo, chars_lo); m256 c2_hi = vpshufb(mask2_hi, chars_hi); - m256 t2 = or256(t, shift256Right8Bits(or256(c2_lo, c2_hi))); + m256 t2 = or256(t, rshift128_m256(or256(c2_lo, c2_hi), 1)); #ifdef DEBUG DEBUG_PRINTF(" c2_lo: "); dumpMsk256(c2_lo); printf("\n"); diff --git a/src/nfa/shufti_common.h b/src/nfa/shufti_common.h index 9c11f2b9..e63ad27a 100644 --- a/src/nfa/shufti_common.h +++ b/src/nfa/shufti_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,6 @@ #include "util/bitutils.h" #include "util/simd_utils.h" #include "util/unaligned.h" -#include "util/simd_utils_ssse3.h" /* * Common stuff for all versions of shufti (single, multi and multidouble) @@ -94,7 +93,7 @@ DUMP_MSK(128) #endif #define GET_LO_4(chars) and128(chars, low4bits) -#define GET_HI_4(chars) rshift2x64(andnot128(low4bits, chars), 4) +#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4) static really_inline u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, @@ -120,7 +119,7 @@ DUMP_MSK(256) #endif #define GET_LO_4(chars) and256(chars, low4bits) -#define GET_HI_4(chars) rshift4x64(andnot256(low4bits, chars), 4) +#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4) static really_inline u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, diff --git a/src/nfa/tamarama.c b/src/nfa/tamarama.c new file mode 100644 index 00000000..b5f90e85 --- /dev/null +++ b/src/nfa/tamarama.c @@ -0,0 +1,443 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + \brief Tamarama: container engine for exclusive engines, runtime code. +*/ +#include "config.h" + +#include "tamarama.h" + +#include "tamarama_internal.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_api_util.h" +#include "nfa_internal.h" +#include "scratch.h" +#include "util/partial_store.h" + +static really_inline +u32 getSubOffset(const struct Tamarama *t, u32 num) { + DEBUG_PRINTF("subengine:%u\n", num); + assert(num < t->numSubEngines); + const u32 *sub = + (const u32 *)((const char *)t + sizeof(struct Tamarama) + + t->numSubEngines * sizeof(u32)); + assert(ISALIGNED(sub)); + return sub[num]; +} + +static +const struct NFA *getSubEngine(const struct Tamarama *t, + const u32 activeIdx) { + const u32 offset = getSubOffset(t, activeIdx); + DEBUG_PRINTF("activeIdx:%u offsets:%u\n", activeIdx, offset); + const char *base = (const char *)t; + return (const struct NFA *)(base + offset); +} + +static +void storeActiveIdx(const struct Tamarama *t, char *state, + const u32 idx) { + assert(idx <= t->numSubEngines); + partial_store_u32(state, idx, t->activeIdxSize); +} + +static +u32 loadActiveIdx(const char *state, + const u32 activeIdxSize) { + return partial_load_u32(state, activeIdxSize); +} + +static really_inline +void copyQueueProperties(const struct mq *q1, struct mq *q2, + const u32 activeIdxSize) { + q2->state = q1->state; + q2->streamState = q1->streamState + activeIdxSize; + q2->offset = q1->offset; + q2->buffer = q1->buffer; + q2->length = q1->length; + q2->history = q1->history; + q2->hlength = q1->hlength; + q2->cb = q1->cb; + q2->context = q1->context; + q2->scratch = q1->scratch; + q2->report_current = q1->report_current; +} + +static +void copyQueueItems(const struct Tamarama *t, const struct NFA *sub, + struct mq *q1, struct mq *q2, const u32 activeIdx) { + const u32 *baseTop = (const u32 *)((const char *)t + + sizeof(struct Tamarama)); + + u32 lower = baseTop[activeIdx]; + u32 upper = activeIdx == t->numSubEngines - 1 ? + ~0U : baseTop[activeIdx + 1]; + u32 event_base = isMultiTopType(sub->type) ? MQE_TOP_FIRST : MQE_TOP; + while (q1->cur < q1->end) { + u32 type = q1->items[q1->cur].type; + s64a loc = q1->items[q1->cur].location; + DEBUG_PRINTF("type:%u lower:%u upper:%u\n", type, lower, upper); + if (type >= lower && type < upper) { + u32 event = event_base; + if (event == MQE_TOP_FIRST) { + event += type - lower; + } + pushQueue(q2, event, loc); + } else { + pushQueueNoMerge(q2, MQE_END, loc); + break; + } + q1->cur++; + } +} + +static +void copyQueue(const struct Tamarama *t, const struct NFA *sub, + struct mq *q1, struct mq *q2, const u32 activeIdx) { + copyQueueProperties(q1, q2, t->activeIdxSize); + + // copy MQE_START item + u32 cur = q1->cur++; + q2->cur = cur; + q2->items[cur] = q1->items[cur]; + q2->end = cur + 1; + + copyQueueItems(t, sub, q1, q2, activeIdx); + // restore cur index of the main queue + q1->cur = cur; +} + +static +u32 findEngineForTop(const u32 *baseTop, const u32 cur, + const u32 numSubEngines) { + u32 i; + for (i = 0; i < numSubEngines; ++i) { + DEBUG_PRINTF("cur:%u base:%u\n", cur, baseTop[i]); + if (cur >= baseTop[i] && + (i == numSubEngines - 1 || cur < baseTop[i + 1])) { + break; + } + } + return i; +} + +static +void initSubQueue(const struct Tamarama *t, struct mq *q1, + struct mq *q2, const u32 lastActiveIdx, + const u32 activeIdx) { + // Push events to the new queue + const struct NFA *sub = getSubEngine(t, activeIdx); + assert(!isContainerType(sub->type)); + q2->nfa = sub; + + // Reinitialize state if the last active subengine is different + // from current one + if (lastActiveIdx == t->numSubEngines || + lastActiveIdx != activeIdx) { + nfaQueueInitState(q2->nfa, q2); + } + + copyQueueItems(t, sub, q1, q2, activeIdx); + if (q1->items[q1->cur].type == MQE_END) { + q1->cur++; + } + DEBUG_PRINTF("update lastIdx:%u\n", activeIdx); + storeActiveIdx(t, q1->streamState, activeIdx); +} + +static +void updateQueues(const struct Tamarama *t, struct mq *q1, struct mq *q2) { + q2->cur = q2->end = 0; + copyQueueProperties(q1, q2, t->activeIdxSize); + + const u32 numSubEngines = t->numSubEngines; + u32 lastActiveIdx = loadActiveIdx(q1->streamState, + t->activeIdxSize); +#ifdef DEBUG + DEBUG_PRINTF("external queue\n"); + debugQueue(q1); +#endif + + // Push MQE_START event to the subqueue + s64a loc = q1->items[q1->cur].location; + pushQueueAt(q2, 0, MQE_START, loc); + char hasStart = 0; + if (q1->items[q1->cur].type == MQE_START) { + hasStart = 1; + q1->cur++; + } + + u32 activeIdx = lastActiveIdx; + // If we have top events in the main queue, update current active id + if (q1->cur < q1->end - 1) { + const u32 *baseTop = (const u32 *)((const char *)t + + sizeof(struct Tamarama)); + u32 curTop = q1->items[q1->cur].type; + activeIdx = findEngineForTop(baseTop, curTop, numSubEngines); + } + + assert(activeIdx < numSubEngines); + DEBUG_PRINTF("last id:%u, current id:%u, num of subengines:%u\n", + lastActiveIdx, activeIdx, numSubEngines); + // Handle unfinished last alive subengine + if (lastActiveIdx != activeIdx && + lastActiveIdx != numSubEngines && hasStart) { + loc = q1->items[q1->cur].location; + pushQueueNoMerge(q2, MQE_END, loc); + q2->nfa = getSubEngine(t, lastActiveIdx); + return; + } + + initSubQueue(t, q1, q2, lastActiveIdx, activeIdx); + DEBUG_PRINTF("finish queues\n"); +} + +// After processing subqueue items for subengines, we need to copy back +// remaining items in subqueue if there are any to Tamarama main queue +static +void copyBack(const struct Tamarama *t, struct mq *q, struct mq *q1) { + DEBUG_PRINTF("copy back %u, %u\n", q1->cur, q1->end); + q->report_current = q1->report_current; + if (q->cur >= q->end && q1->cur >= q1->end) { + return; + } + + const u32 *baseTop = (const u32 *)((const char *)t + + sizeof(struct Tamarama)); + const u32 lastIdx = loadActiveIdx(q->streamState, + t->activeIdxSize); + u32 base = 0, event_base = 0; + if (lastIdx != t->numSubEngines) { + base = baseTop[lastIdx]; + const struct NFA *sub = getSubEngine(t, lastIdx); + event_base = isMultiTopType(sub->type) ? MQE_TOP_FIRST : MQE_TOP; + } + + u32 numItems = q1->end > q1->cur + 1 ? q1->end - q1->cur - 1 : 1; + // Also need to copy MQE_END if the main queue is empty + if (q->cur == q->end) { + assert(q->cur > 1 && q1->items[q1->end - 1].type == MQE_END); + q->items[--q->cur] = q1->items[q1->end - 1]; + } + u32 cur = q->cur - numItems; + q->items[cur] = q1->items[q1->cur++]; + q->items[cur].type = MQE_START; + q->cur = cur++; + for (u32 i = 0; i < numItems - 1; ++i) { + assert(q1->cur < q1->end); + u32 type = q1->items[q1->cur].type; + if (type > MQE_END) { + q1->items[q1->cur].type = type - event_base + base; + } + q->items[cur++] = q1->items[q1->cur++]; + } + +#ifdef DEBUG + DEBUG_PRINTF("external queue\n"); + debugQueue(q); +#endif +} + +char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(streamState, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return MO_CONTINUE_MATCHING; + } + + const struct NFA *sub = getSubEngine(t, activeIdx); + if (nfaAcceptsEod(sub)) { + assert(!isContainerType(sub->type)); + const char *subStreamState = streamState + t->activeIdxSize; + return nfaCheckFinalState(sub, state, subStreamState, offset, callback, + context); + } + + return MO_CONTINUE_MATCHING; +} + +char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q, + ReportID report) { + DEBUG_PRINTF("exec rose\n"); + struct mq q1; + q1.cur = q1.end = 0; + char rv = 0; + const struct Tamarama *t = getImplNfa(n); + while (q->cur < q->end) { + updateQueues(t, q, &q1); + } + + if (q1.cur < q1.end) { + rv = nfaQueueExecRose(q1.nfa, &q1, report); + } + + DEBUG_PRINTF("exec rose rv:%u\n", rv); + return rv; +} + +char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return 1; + } + + const struct NFA *sub = getSubEngine(t, activeIdx); + struct mq q1; + copyQueue(t, sub, q, &q1, activeIdx); + return nfaReportCurrentMatches(sub, &q1); +} + +char nfaExecTamarama0_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return 0; + } + const struct NFA *sub = getSubEngine(t, activeIdx); + + struct mq q1; + copyQueue(t, sub, q, &q1, activeIdx); + return nfaInAcceptState(sub, report, &q1); +} + +char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return 0; + } + const struct NFA *sub = getSubEngine(t, activeIdx); + + struct mq q1; + copyQueue(t, sub, q, &q1, activeIdx); + return nfaInAnyAcceptState(sub, &q1); +} + +char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q) { + DEBUG_PRINTF("init state\n"); + const struct Tamarama *t = getImplNfa(n); + char *ptr = q->streamState; + // Use activeIdxSize as a sentinel value and initialize the state to + // an invalid engine as nothing has been triggered yet + storeActiveIdx(t, ptr, t->numSubEngines); + return 0; +} + +char nfaExecTamarama0_queueCompressState(const struct NFA *n, + const struct mq *q, s64a loc) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return 0; + } + + const struct NFA *sub = getSubEngine(t, activeIdx); + + struct mq q1; + copyQueueProperties(q, &q1, t->activeIdxSize); + return nfaQueueCompressState(sub, &q1, loc); +} + +char nfaExecTamarama0_expandState(const struct NFA *n, void *dest, + const void *src, u64a offset, u8 key) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(src, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return 0; + } + + const struct NFA *sub = getSubEngine(t, activeIdx); + + const char *subStreamState = (const char *)src + t->activeIdxSize; + return nfaExpandState(sub, dest, subStreamState, offset, key); +} + +enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n, + struct mq *q, s64a loc) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return NFA_ZOMBIE_NO; + } + const struct NFA *sub = getSubEngine(t, activeIdx); + + struct mq q1; + copyQueue(t, sub, q, &q1, activeIdx); + return nfaGetZombieStatus(sub, &q1, loc); +} + +char nfaExecTamarama0_Q(const struct NFA *n, struct mq *q, s64a end) { + DEBUG_PRINTF("exec\n"); + struct mq q1; + char rv = MO_ALIVE; + char copy = 0; + const struct Tamarama *t = getImplNfa(n); + while (q->cur < q->end && q_cur_loc(q) <= end) { + updateQueues(t, q, &q1); + rv = nfaQueueExec_raw(q1.nfa, &q1, end); + q->report_current = q1.report_current; + copy = 1; + if (can_stop_matching(q->scratch)) { + break; + } + } + if (copy) { + copyBack(t, q, &q1); + } + return rv; +} + +char nfaExecTamarama0_Q2(const struct NFA *n, + struct mq *q, s64a end) { + DEBUG_PRINTF("exec to match\n"); + struct mq q1; + char rv = 0; + char copy = 0; + const struct Tamarama *t = getImplNfa(n); + while (q->cur < q->end && q_cur_loc(q) <= end && + rv != MO_MATCHES_PENDING) { + updateQueues(t, q, &q1); + rv = nfaQueueExec2_raw(q1.nfa, &q1, end); + q->report_current = q1.report_current; + copy = 1; + if (can_stop_matching(q->scratch)) { + break; + } + } + if (copy) { + copyBack(t, q, &q1); + } + return rv; +} + diff --git a/src/nfa/tamarama.h b/src/nfa/tamarama.h new file mode 100644 index 00000000..7ccfa5a0 --- /dev/null +++ b/src/nfa/tamarama.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TAMARAMA_H +#define TAMARAMA_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include "callback.h" +#include "ue2common.h" + +struct mq; +struct NFA; +struct hs_scratch; + +char nfaExecTamarama0_testEOD(const struct NFA *n, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecTamarama0_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecTamarama0_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecTamarama0_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecTamarama0_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecTamarama0_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecTamarama0_queueCompressState(const struct NFA *n, + const struct mq *q, + s64a loc); +char nfaExecTamarama0_expandState(const struct NFA *n, void *dest, + const void *src, u64a offset, u8 key); +enum nfa_zombie_status nfaExecTamarama0_zombie_status(const struct NFA *n, + struct mq *q, s64a loc); +char nfaExecTamarama0_Q(const struct NFA *nfa, struct mq *q, s64a end); +char nfaExecTamarama0_Q2(const struct NFA *nfa, struct mq *q, s64a end); + +// only used by outfix and miracles, no implementation for tamarama +#define nfaExecTamarama0_initCompressedState NFA_API_NO_IMPL +#define nfaExecTamarama0_B_Reverse NFA_API_NO_IMPL + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/nfa/tamarama_dump.cpp b/src/nfa/tamarama_dump.cpp new file mode 100644 index 00000000..181fa9af --- /dev/null +++ b/src/nfa/tamarama_dump.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Tamarama: container engine for exclusve engines, dump code. + */ + +#include "config.h" + +#include "tamarama_dump.h" + +#include "tamarama_internal.h" +#include "nfa_dump_api.h" +#include "nfa_dump_internal.h" +#include "nfa_internal.h" + +#include +#include + +#ifndef DUMP_SUPPORT +#error No dump support! +#endif + +namespace ue2 { + +void nfaExecTamarama0_dumpDot(const struct NFA *nfa, UNUSED FILE *f, + const std::string &base) { + const Tamarama *t = (const Tamarama *)getImplNfa(nfa); + const u32 *subOffset = + (const u32 *)((const char *)t + sizeof(struct Tamarama) + + t->numSubEngines * sizeof(u32)); + for (u32 i = 0; i < t->numSubEngines; i++) { + std::stringstream ssdot; + ssdot << base << "rose_nfa_" << nfa->queueIndex + << "_sub_" << i << ".dot"; + const NFA *sub = (const struct NFA *)((const char *)t + subOffset[i]); + FILE *f1 = fopen(ssdot.str().c_str(), "w"); + nfaDumpDot(sub, f1, base); + fclose(f1); + } +} + +void nfaExecTamarama0_dumpText(const struct NFA *nfa, FILE *f) { + const Tamarama *t = (const Tamarama *)getImplNfa(nfa); + + fprintf(f, "Tamarama container engine\n"); + fprintf(f, "\n"); + fprintf(f, "Number of subengine tenants: %u\n", t->numSubEngines); + + fprintf(f, "\n"); + dumpTextReverse(nfa, f); + fprintf(f, "\n"); + + const u32 *subOffset = + (const u32 *)((const char *)t + sizeof(struct Tamarama) + + t->numSubEngines * sizeof(u32)); + for (u32 i = 0; i < t->numSubEngines; i++) { + fprintf(f, "Sub %u:\n", i); + const NFA *sub = (const struct NFA *)((const char *)t + subOffset[i]); + nfaDumpText(sub, f); + fprintf(f, "\n"); + } +} + +} // namespace ue2 diff --git a/src/nfa/mcclellancompile_accel.h b/src/nfa/tamarama_dump.h similarity index 61% rename from src/nfa/mcclellancompile_accel.h rename to src/nfa/tamarama_dump.h index 427267d7..dc976004 100644 --- a/src/nfa/mcclellancompile_accel.h +++ b/src/nfa/tamarama_dump.h @@ -26,36 +26,24 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef MCCLELLANCOMPILE_ACCEL_H -#define MCCLELLANCOMPILE_ACCEL_H +#ifndef TAMARAMA_DUMP_H +#define TAMARAMA_DUMP_H -#include "mcclellancompile.h" +#if defined(DUMP_SUPPORT) -#include +#include +#include + +struct NFA; namespace ue2 { -struct Grey; +void nfaExecTamarama0_dumpDot(const NFA *nfa, FILE *file, + const std::string &base); +void nfaExecTamarama0_dumpText(const NFA *nfa, FILE *file); -#define ACCEL_DFA_MAX_OFFSET_DEPTH 4 +} // namespace ue2 -/** Maximum tolerated number of escape character from an accel state. - * This is larger than nfa, as we don't have a budget and the nfa cheats on stop - * characters for sets of states */ -#define ACCEL_DFA_MAX_STOP_CHAR 160 - -/** Maximum tolerated number of escape character from a sds accel state. Larger - * than normal states as accelerating sds is important. Matches NFA value */ -#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 - -std::map populateAccelerationInfo(const raw_dfa &rdfa, - const dfa_build_strat &strat, - const Grey &grey); - -AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, - dstate_id_t this_idx, - u32 max_allowed_accel_offset); - -} +#endif // DUMP_SUPPORT #endif diff --git a/src/nfa/tamarama_internal.h b/src/nfa/tamarama_internal.h new file mode 100644 index 00000000..5cdc70d4 --- /dev/null +++ b/src/nfa/tamarama_internal.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + *\brief Tamarama: container engine for exclusive engines, + * data structures. + */ + +/* Tamarama bytecode layout: + * * |-----| + * * | | struct NFA + * * |-----| + * * | | struct Tamarama + * * | | + * * |-----| + * * | | top remapping table: + * * | | stores top base for each subengine. + * * | | old_top = remapped_top - top_base; + * * | | The size of table is equal to the number of subengines. + * * ... + * * | | + * * |-----| + * * | | offsets from the start of struct Tamarama to subengines --\ + * * ... | + * * | | -----------\ | + * * |-----| | | + * * ||--| | subengine 1 (struct NFA + rest of subengine) <--/ | + * * || | | | + * * ||--| | | + * * || | | | + * * || | | | + * * ||--| | | + * * | | | + * * ||--| | subengine 2 (struct NFA + rest of subengine) <-------/ + * * || | | + * * ||--| | + * * || | | + * * || | | + * * ||--| | + * * | | + * * ... + * * | | + * * |-----| total size of tamarama + * * + * * Tamarama stream state: + * * + * * |---| + * * | | active subengine id + * * |---| + * * | | common pool of stream state for each engine + * * | | + * * | | + * * ... + * * | | + * * | | + * * |---| + * * + * * Tamarama scratch space: + * * + * * |---| + * * | | common pool of scratch for each engine + * * | | + * * | | + * * ... + * * | | + * * | | + * * |---| + * */ + +#ifndef NFA_TAMARAMA_INTERNAL_H +#define NFA_TAMARAMA_INTERNAL_H + +#include "ue2common.h" + +struct ALIGN_AVX_DIRECTIVE Tamarama { + u32 numSubEngines; + u8 activeIdxSize; +}; + +#endif // NFA_TAMARAMA_INTERNAL_H diff --git a/src/nfa/tamaramacompile.cpp b/src/nfa/tamaramacompile.cpp new file mode 100644 index 00000000..73d19595 --- /dev/null +++ b/src/nfa/tamaramacompile.cpp @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Tamarama: container engine for exclusive engines, + * compiler code. + */ + +#include "config.h" + +#include "tamaramacompile.h" + +#include "tamarama_internal.h" +#include "nfa_internal.h" +#include "nfa_api_queue.h" +#include "repeatcompile.h" +#include "util/container.h" +#include "util/verify_types.h" + +using namespace std; + +namespace ue2 { + +static +void remapTops(const TamaInfo &tamaInfo, + vector &top_base, + map, u32> &out_top_remap) { + u32 i = 0; + u32 cur = 0; + for (const auto &sub : tamaInfo.subengines) { + u32 base = cur; + top_base.push_back(base + MQE_TOP_FIRST); + DEBUG_PRINTF("subengine:%u\n", i); + for (const auto &t : tamaInfo.tops[i++]) { + cur = base + t; + DEBUG_PRINTF("top remapping %u:%u\n", t ,cur); + out_top_remap.emplace(make_pair(sub, t), cur++); + } + } +} + +/** + * update stream state and scratch state sizes and copy in + * subengines in Tamarama. + */ +static +void copyInSubnfas(const char *base_offset, NFA &nfa, + const TamaInfo &tamaInfo, u32 *offsets, + char *sub_nfa_offset, const u32 activeIdxSize) { + u32 maxStreamStateSize = 0; + u32 maxScratchStateSize = 0; + sub_nfa_offset = ROUNDUP_PTR(sub_nfa_offset, 64); + bool infinite_max_width = false; + for (auto &sub : tamaInfo.subengines) { + u32 streamStateSize = verify_u32(sub->streamStateSize); + u32 scratchStateSize = verify_u32(sub->scratchStateSize); + maxStreamStateSize = max(maxStreamStateSize, streamStateSize); + maxScratchStateSize = max(maxScratchStateSize, scratchStateSize); + sub->queueIndex = nfa.queueIndex; + + memcpy(sub_nfa_offset, sub, sub->length); + *offsets = verify_u32(sub_nfa_offset - base_offset); + DEBUG_PRINTF("type:%u offsets:%u\n", sub->type, *offsets); + ++offsets; + sub_nfa_offset += ROUNDUP_CL(sub->length); + + // update nfa properties + nfa.flags |= sub->flags; + if (!sub->maxWidth) { + infinite_max_width = true; + } else if (!infinite_max_width) { + nfa.maxWidth = max(nfa.maxWidth, sub->maxWidth); + } + } + + if (infinite_max_width) { + nfa.maxWidth = 0; + } + nfa.maxBiAnchoredWidth = 0; + nfa.streamStateSize = activeIdxSize + maxStreamStateSize; + nfa.scratchStateSize = maxScratchStateSize; +} + +/** + * Take in a collection of exclusive sub engines and produces a tamarama, also + * returns via out_top_remap, a mapping indicating how tops in the subengines in + * relate to the tamarama's tops. + */ +aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, const u32 queue, + map, u32> &out_top_remap) { + vector top_base; + remapTops(tamaInfo, top_base, out_top_remap); + + size_t subSize = tamaInfo.subengines.size(); + DEBUG_PRINTF("subSize:%lu\n", subSize); + size_t total_size = + sizeof(NFA) + // initial NFA structure + sizeof(Tamarama) + // Tamarama structure + sizeof(u32) * subSize + // base top event value for subengines, + // used for top remapping at runtime + sizeof(u32) * subSize + 64; // offsets to subengines in bytecode and + // padding for subengines + + for (const auto &sub : tamaInfo.subengines) { + total_size += ROUNDUP_CL(sub->length); + } + + // use subSize as a sentinel value for no active subengines, + // so add one to subSize here + u32 activeIdxSize = calcPackedBytes(subSize + 1); + aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + nfa->type = verify_u8(TAMARAMA_NFA_0); + nfa->length = verify_u32(total_size); + nfa->queueIndex = queue; + + char *ptr = (char *)nfa.get() + sizeof(NFA); + char *base_offset = ptr; + Tamarama *t = (Tamarama *)ptr; + t->numSubEngines = verify_u32(subSize); + t->activeIdxSize = verify_u8(activeIdxSize); + + ptr += sizeof(Tamarama); + copy_bytes(ptr, top_base); + ptr += byte_length(top_base); + + u32 *offsets = (u32*)ptr; + char *sub_nfa_offset = ptr + sizeof(u32) * subSize; + copyInSubnfas(base_offset, *nfa, tamaInfo, offsets, sub_nfa_offset, + activeIdxSize); + assert((size_t)(sub_nfa_offset - (char *)nfa.get()) <= total_size); + return nfa; +} + +set all_reports(const TamaProto &proto) { + return proto.reports; +} + +void TamaInfo::add(NFA *sub, const set &top) { + assert(subengines.size() < max_occupancy); + subengines.push_back(sub); + tops.push_back(top); +} + +void TamaProto::add(const NFA *n, const u32 id, const u32 top, + const map, u32> &out_top_remap) { + top_remap.emplace(make_pair(id, top), out_top_remap.at(make_pair(n, top))); +} + +} // namespace ue2 + diff --git a/src/nfa/tamaramacompile.h b/src/nfa/tamaramacompile.h new file mode 100644 index 00000000..048b966b --- /dev/null +++ b/src/nfa/tamaramacompile.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Tamarama: container engine for exclusive engines, compiler code. + */ + +#ifndef NFA_TAMARAMACOMPILE_H +#define NFA_TAMARAMACOMPILE_H + +#include "ue2common.h" +#include "util/alloc.h" + +#include +#include +#include + +struct NFA; + +namespace ue2 { + +/** + * \brief A TamaProto that contains top remapping and reports info + */ +struct TamaProto { + void add(const NFA *n, const u32 id, const u32 top, + const std::map, u32> &out_top_remap); + /** Top remapping between and + ** remapped top value. */ + std::map, u32> top_remap; + + /** All the reports in subengines */ + std::set reports; +}; + +/** + * \brief Contruction info for a Tamarama engine: + * contains at least two subengines. + * + * A TamaInfo is converted into a single NFA, with each top triggering a + * subengine. A TamaInfo can contain at most TamaInfo::max_occupancy + * subengines. + */ +struct TamaInfo { + static constexpr size_t max_occupancy = 65536; // arbitrary limit + + /** \brief Add a new subengine. */ + void add(NFA* sub, const std::set &top); + + /** \brief All the subengines */ + std::vector subengines; + + /** \brief Tops of subengines */ + std::vector> tops; +}; + +std::set all_reports(const TamaProto &proto); + +/** + * Take in a collection of exclusive subengines and produces a tamarama, also + * returns via out_top_remap, a mapping indicating how tops in the subengines in + * relate to the tamarama's tops. + */ +ue2::aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, + const u32 queue, + std::map, u32> &out_top_remap); +} // namespace ue2 + +#endif // NFA_TAMARAMACOMPILE_H diff --git a/src/nfa/truffle.c b/src/nfa/truffle.c index 8863c71a..1eff269a 100644 --- a/src/nfa/truffle.c +++ b/src/nfa/truffle.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,6 @@ #include "truffle.h" #include "util/bitutils.h" #include "util/simd_utils.h" -#include "util/simd_utils_ssse3.h" #include "truffle_common.h" diff --git a/src/nfa/truffle_common.h b/src/nfa/truffle_common.h index 122f65c4..7368e550 100644 --- a/src/nfa/truffle_common.h +++ b/src/nfa/truffle_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,7 +31,6 @@ #include "util/bitutils.h" #include "util/simd_utils.h" -#include "util/simd_utils_ssse3.h" /* * Common stuff for all versions of truffle (single, multi and multidouble) @@ -49,7 +48,6 @@ const u8 *firstMatch(const u8 *buf, u32 z) { return NULL; // no match } -#define shift128r(a, b) _mm_srli_epi64((a), (b)) static really_inline u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { @@ -60,7 +58,7 @@ u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { m128 shuf1 = pshufb(shuf_mask_lo_highclear, v); m128 t1 = xor128(v, highconst); m128 shuf2 = pshufb(shuf_mask_lo_highset, t1); - m128 t2 = andnot128(highconst, shift128r(v, 4)); + m128 t2 = andnot128(highconst, rshift64_m128(v, 4)); m128 shuf3 = pshufb(shuf_mask_hi, t2); m128 tmp = and128(or128(shuf1, shuf2), shuf3); m128 tmp2 = eq128(tmp, zeroes128()); @@ -103,7 +101,6 @@ const u8 *firstMatch(const u8 *buf, u32 z) { return NULL; // no match } -#define shift256r(a, b) _mm256_srli_epi64((a), (b)) static really_inline u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) { @@ -114,7 +111,7 @@ u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) { m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v); m256 t1 = xor256(v, highconst); m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1); - m256 t2 = andnot256(highconst, shift256r(v, 4)); + m256 t2 = andnot256(highconst, rshift64_m256(v, 4)); m256 shuf3 = vpshufb(shuf_mask_hi, t2); m256 tmp = and256(or256(shuf1, shuf2), shuf3); m256 tmp2 = eq256(tmp, zeroes256()); diff --git a/src/nfa/vermicelli_sse.h b/src/nfa/vermicelli_sse.h index 1883a44c..0749470f 100644 --- a/src/nfa/vermicelli_sse.h +++ b/src/nfa/vermicelli_sse.h @@ -138,7 +138,7 @@ const u8 *dvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2, for (; buf + 16 < buf_end; buf += 16) { m128 data = load128(buf); u32 z = movemask128(and128(eq128(chars1, data), - shiftRight8Bits(eq128(chars2, data)))); + rshiftbyte_m128(eq128(chars2, data), 1))); if (buf[15] == c1 && buf[16] == c2) { z |= (1 << 15); } @@ -161,7 +161,7 @@ const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2, m128 data = load128(buf); m128 v = and128(casemask, data); u32 z = movemask128(and128(eq128(chars1, v), - shiftRight8Bits(eq128(chars2, v)))); + rshiftbyte_m128(eq128(chars2, v), 1))); if ((buf[15] & CASE_CLEAR) == c1 && (buf[16] & CASE_CLEAR) == c2) { z |= (1 << 15); } @@ -182,8 +182,10 @@ const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2, for (; buf + 16 < buf_end; buf += 16) { m128 data = load128(buf); - u32 z = movemask128(and128(eq128(chars1, and128(data, mask1)), - shiftRight8Bits(eq128(chars2, and128(data, mask2))))); + m128 v1 = eq128(chars1, and128(data, mask1)); + m128 v2 = eq128(chars2, and128(data, mask2)); + u32 z = movemask128(and128(v1, rshiftbyte_m128(v2, 1))); + if ((buf[15] & m1) == c1 && (buf[16] & m2) == c2) { z |= (1 << 15); } @@ -201,7 +203,7 @@ static really_inline const u8 *dvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) { m128 data = loadu128(buf); // unaligned u32 z = movemask128(and128(eq128(chars1, data), - shiftRight8Bits(eq128(chars2, data)))); + rshiftbyte_m128(eq128(chars2, data), 1))); /* no fixup of the boundary required - the aligned run will pick it up */ if (unlikely(z)) { @@ -219,7 +221,7 @@ const u8 *dvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) { m128 data = loadu128(buf); // unaligned m128 v = and128(casemask, data); u32 z = movemask128(and128(eq128(chars1, v), - shiftRight8Bits(eq128(chars2, v)))); + rshiftbyte_m128(eq128(chars2, v), 1))); /* no fixup of the boundary required - the aligned run will pick it up */ if (unlikely(z)) { @@ -234,8 +236,9 @@ static really_inline const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2, m128 mask1, m128 mask2, const u8 *buf) { m128 data = loadu128(buf); // unaligned - u32 z = movemask128(and128(eq128(chars1, and128(data, mask1)), - shiftRight8Bits(eq128(chars2, and128(data, mask2))))); + m128 v1 = eq128(chars1, and128(data, mask1)); + m128 v2 = eq128(chars2, and128(data, mask2)); + u32 z = movemask128(and128(v1, rshiftbyte_m128(v2, 1))); /* no fixup of the boundary required - the aligned run will pick it up */ if (unlikely(z)) { @@ -324,7 +327,7 @@ const u8 *rdvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2, for (; buf + 16 < buf_end; buf_end -= 16) { m128 data = load128(buf_end - 16); u32 z = movemask128(and128(eq128(chars2, data), - shiftLeft8Bits(eq128(chars1, data)))); + lshiftbyte_m128(eq128(chars1, data), 1))); if (buf_end[-17] == c1 && buf_end[-16] == c2) { z |= 1; } @@ -345,7 +348,7 @@ const u8 *rdvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2, m128 data = load128(buf_end - 16); m128 v = and128(casemask, data); u32 z = movemask128(and128(eq128(chars2, v), - shiftLeft8Bits(eq128(chars1, v)))); + lshiftbyte_m128(eq128(chars1, v), 1))); if ((buf_end[-17] & CASE_CLEAR) == c1 && (buf_end[-16] & CASE_CLEAR) == c2) { z |= 1; @@ -362,7 +365,7 @@ static really_inline const u8 *rdvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) { m128 data = loadu128(buf); u32 z = movemask128(and128(eq128(chars2, data), - shiftLeft8Bits(eq128(chars1, data)))); + lshiftbyte_m128(eq128(chars1, data), 1))); /* no fixup of the boundary required - the aligned run will pick it up */ if (unlikely(z)) { @@ -380,7 +383,7 @@ const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) { m128 data = loadu128(buf); m128 v = and128(casemask, data); u32 z = movemask128(and128(eq128(chars2, v), - shiftLeft8Bits(eq128(chars1, v)))); + lshiftbyte_m128(eq128(chars1, v), 1))); /* no fixup of the boundary required - the aligned run will pick it up */ if (unlikely(z)) { return lastMatchOffset(buf + 16, z); diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index b4b34d74..deca3fd5 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -57,13 +57,14 @@ #include "ng_small_literal_set.h" #include "ng_som.h" #include "ng_vacuous.h" +#include "ng_violet.h" #include "ng_utf8.h" #include "ng_util.h" #include "ng_width.h" #include "ue2common.h" #include "nfa/goughcompile.h" -#include "smallwrite/smallwrite_build.h" #include "rose/rose_build.h" +#include "smallwrite/smallwrite_build.h" #include "util/compile_error.h" #include "util/container.h" #include "util/depth.h" @@ -75,14 +76,15 @@ using namespace std; namespace ue2 { -NG::NG(const CompileContext &in_cc, unsigned in_somPrecision) +NG::NG(const CompileContext &in_cc, size_t num_patterns, + unsigned in_somPrecision) : maxSomRevHistoryAvailable(in_cc.grey.somMaxRevNfaLength), minWidth(depth::infinity()), rm(in_cc.grey), ssm(in_somPrecision), cc(in_cc), - rose(makeRoseBuilder(rm, ssm, cc, boundary)), - smwr(makeSmallWriteBuilder(rm, cc)) { + smwr(makeSmallWriteBuilder(num_patterns, rm, cc)), + rose(makeRoseBuilder(rm, ssm, *smwr, cc, boundary)) { } NG::~NG() { @@ -103,6 +105,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, DEBUG_PRINTF("doing som\n"); dumpComponent(g, "03_presom", w.expressionIndex, comp_id, ng.cc.grey); assert(hasCorrectlyNumberedVertices(g)); + assert(allMatchStatesHaveReports(w)); // First, we try the "SOM chain" support in ng_som.cpp. @@ -206,6 +209,8 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, dumpComponent(g, "01_begin", w.expressionIndex, comp_id, ng.cc.grey); + assert(allMatchStatesHaveReports(w)); + reduceGraph(g, som, w.utf8, cc); dumpComponent(g, "02_reduced", w.expressionIndex, comp_id, ng.cc.grey); @@ -230,6 +235,8 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, } } + assert(allMatchStatesHaveReports(w)); + if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) { return true; } @@ -243,6 +250,10 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } + if (doViolet(*ng.rose, g, w.prefilter, cc)) { + return true; + } + if (splitOffRose(*ng.rose, g, w.prefilter, cc)) { return true; } @@ -260,6 +271,10 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } + if (doViolet(*ng.rose, g, w.prefilter, cc)) { + return true; + } + if (splitOffRose(*ng.rose, g, w.prefilter, cc)) { return true; } @@ -579,7 +594,8 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index, minWidth = min(minWidth, depth(literal.length())); - smwr->add(literal, id); /* inform small write handler about this literal */ + /* inform small write handler about this literal */ + smwr->add(literal, id); return true; } diff --git a/src/nfagraph/ng.h b/src/nfagraph/ng.h index 52353da9..4aa6a7dc 100644 --- a/src/nfagraph/ng.h +++ b/src/nfagraph/ng.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -87,7 +87,8 @@ class SmallWriteBuild; class NG : boost::noncopyable { public: - NG(const CompileContext &in_cc, unsigned in_somPrecision); + NG(const CompileContext &in_cc, size_t num_patterns, + unsigned in_somPrecision); ~NG(); /** \brief Consumes a pattern, returns false or throws a CompileError @@ -118,8 +119,8 @@ public: BoundaryReports boundary; const CompileContext cc; - const std::unique_ptr rose; //!< Rose builder. const std::unique_ptr smwr; //!< SmallWrite builder. + const std::unique_ptr rose; //!< Rose builder. }; /** \brief Run graph reduction passes. diff --git a/src/nfagraph/ng_anchored_dots.cpp b/src/nfagraph/ng_anchored_dots.cpp index 1b6d8826..ba352e60 100644 --- a/src/nfagraph/ng_anchored_dots.cpp +++ b/src/nfagraph/ng_anchored_dots.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -119,7 +119,7 @@ NFAVertex findReformable(const NGHolder &g, const set &starts, } if (dotq.empty()) { - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } const DotInfo &dot = dotq.top(); @@ -165,10 +165,10 @@ void reformAnchoredRepeatsComponent(NGHolder &g, return; } - NFAVertex dotV = NFAGraph::null_vertex(); + NFAVertex dotV = NGHolder::null_vertex(); set otherV; dotV = findReformable(g, compAnchoredStarts, otherV); - if (dotV == NFAGraph::null_vertex()) { + if (dotV == NGHolder::null_vertex()) { DEBUG_PRINTF("no candidate reformable dot found.\n"); return; } @@ -268,10 +268,10 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, } while (true) { - NFAVertex dotV = NFAGraph::null_vertex(); + NFAVertex dotV = NGHolder::null_vertex(); set otherV; dotV = findReformable(g, compUnanchoredStarts, otherV); - if (dotV == NFAGraph::null_vertex()) { + if (dotV == NGHolder::null_vertex()) { DEBUG_PRINTF("no candidate reformable dot found.\n"); return; } @@ -464,7 +464,7 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, // The first of our optional dots must be connected to start. The jump edge // past it will be verified in gatherParticipants(). If start is // graph.start, it should not be connected to startDs. - NFAVertex initialDot = NFAGraph::null_vertex(); + NFAVertex initialDot = NGHolder::null_vertex(); for (auto v : adjacent_vertices_range(start, g)) { if (is_special(v, g)) { continue; diff --git a/src/nfagraph/ng_asserts.cpp b/src/nfagraph/ng_asserts.cpp index 2d02751f..e9e39345 100644 --- a/src/nfagraph/ng_asserts.cpp +++ b/src/nfagraph/ng_asserts.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -553,6 +553,7 @@ void ensureCodePointStart(ReportManager &rm, NGWrapper &g) { add_edge(g.startDs, v_4, g); remove_edge(orig, g); g.renumberEdges(); + clearReports(g); } } diff --git a/src/nfagraph/ng_builder.cpp b/src/nfagraph/ng_builder.cpp index 36ce80b0..8a92b7ee 100644 --- a/src/nfagraph/ng_builder.cpp +++ b/src/nfagraph/ng_builder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -131,7 +131,7 @@ NFABuilderImpl::~NFABuilderImpl() { NFAVertex NFABuilderImpl::getVertex(Position pos) const { assert(id2vertex.size() >= pos); const NFAVertex v = id2vertex[pos]; - assert(v != NFAGraph::null_vertex()); + assert(v != NGHolder::null_vertex()); assert(graph->g[v].index == pos); return v; } diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index 5ca5ce3a..658e7001 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -219,8 +219,8 @@ vector findShellEdges(const NGHolder &g, static void removeVertices(const flat_set &verts, NFAUndirectedGraph &ug, - ue2::unordered_map &old2new, - ue2::unordered_map &new2old) { + ue2::unordered_map &old2new, + ue2::unordered_map &new2old) { for (auto v : verts) { assert(contains(old2new, v)); auto uv = old2new.at(v); @@ -280,7 +280,7 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, createUnGraph(g.g, true, true, ug, old2new, newIdx2old); // Construct reverse mapping. - ue2::unordered_map new2old; + ue2::unordered_map new2old; for (const auto &m : old2new) { new2old.emplace(m.second, m.first); } @@ -308,7 +308,7 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, // Collect vertex lists per component. for (const auto &m : split_components) { - NFAVertex uv = m.first; + NFAUndirectedVertex uv = m.first; u32 c = m.second; assert(contains(new2old, uv)); NFAVertex v = new2old.at(uv); @@ -363,6 +363,12 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, *shell_comp = true; } + // Ensure that only vertices with accept edges have reports. + for (auto &gc : comps) { + assert(gc); + clearReports(*gc); + } + // We should never produce empty component graphs. assert(all_of(begin(comps), end(comps), [](const unique_ptr &g_comp) { diff --git a/src/nfagraph/ng_dump.cpp b/src/nfagraph/ng_dump.cpp index 60122cf3..57668caf 100644 --- a/src/nfagraph/ng_dump.cpp +++ b/src/nfagraph/ng_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -450,7 +450,13 @@ void dumpReportManager(const ReportManager &rm, const Grey &grey) { fprintf(f, " reverse nfa: %u", report.revNfaIndex); } if (isSomRelSetReport(report)) { - fprintf(f, " set, adjust: %lld", report.somDistance); + fprintf(f, " set, adjust: %llu", report.somDistance); + } + if (report.type == EXTERNAL_CALLBACK_SOM_REL) { + fprintf(f, " relative: %llu", report.somDistance); + } + if (report.type == EXTERNAL_CALLBACK_SOM_ABS) { + fprintf(f, " absolute: %llu", report.somDistance); } fprintf(f, "\n"); } diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp index b8e5a8d6..d0ab7c4a 100644 --- a/src/nfagraph/ng_equivalence.cpp +++ b/src/nfagraph/ng_equivalence.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,7 +38,7 @@ #include "ng_util.h" #include "util/compile_context.h" #include "util/graph_range.h" -#include "util/order_check.h" +#include "util/ue2_containers.h" #include #include @@ -53,9 +53,8 @@ using boost::ptr_vector; namespace ue2 { enum EquivalenceType { - LEFT_EQUIVALENCE = 0, + LEFT_EQUIVALENCE, RIGHT_EQUIVALENCE, - MAX_EQUIVALENCE }; namespace { @@ -91,7 +90,6 @@ public: } typedef ue2::unordered_set VertexInfoSet; -typedef ue2::unordered_map ClassMap; // compare two vertex info pointers on their vertex index bool VertexInfoPtrCmp::operator()(const VertexInfo *a, @@ -118,27 +116,34 @@ public: DepthMinMax d1; DepthMinMax d2; }; - ClassInfo(const NGHolder &g, VertexInfo &vi, ClassDepth &d_in, + ClassInfo(const NGHolder &g, const VertexInfo &vi, const ClassDepth &d_in, EquivalenceType eq) - : vertex_flags(vi.vertex_flags), edge_top(vi.edge_top), cr(vi.cr), - depth(d_in) { + : /* reports only matter for right-equiv */ + rs(eq == RIGHT_EQUIVALENCE ? g[vi.v].reports : flat_set()), + vertex_flags(vi.vertex_flags), edge_top(vi.edge_top), cr(vi.cr), + adjacent_cr(eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr), + /* treat non-special vertices the same */ + node_type(min(g[vi.v].index, u32{N_SPECIALS})), depth(d_in) {} - // hackety-hack! - node_type = g[vi.v].index; - if (node_type > N_SPECIALS) { - // we treat all regular vertices the same - node_type = N_SPECIALS; - } - - // get all the adjacent vertices' CharReach - adjacent_cr = eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr; - - if (eq == RIGHT_EQUIVALENCE) { - rs = g[vi.v].reports; - } + bool operator==(const ClassInfo &b) const { + return node_type == b.node_type && depth.d1 == b.depth.d1 && + depth.d2 == b.depth.d2 && cr == b.cr && + adjacent_cr == b.adjacent_cr && edge_top == b.edge_top && + vertex_flags == b.vertex_flags && rs == b.rs; } - bool operator<(const ClassInfo &b) const; + friend size_t hash_value(const ClassInfo &c) { + size_t val = 0; + boost::hash_combine(val, boost::hash_range(begin(c.rs), end(c.rs))); + boost::hash_combine(val, c.vertex_flags); + boost::hash_combine(val, c.edge_top); + boost::hash_combine(val, c.cr); + boost::hash_combine(val, c.adjacent_cr); + boost::hash_combine(val, c.node_type); + boost::hash_combine(val, c.depth.d1); + boost::hash_combine(val, c.depth.d2); + return val; + } private: flat_set rs; /* for right equiv only */ @@ -200,26 +205,12 @@ public: return q.capacity(); } private: - set ids; //!< stores id's, for uniqueness + unordered_set ids; //!< stores id's, for uniqueness vector q; //!< vector of id's that we use as FILO. }; } -bool ClassInfo::operator<(const ClassInfo &b) const { - const ClassInfo &a = *this; - - ORDER_CHECK(node_type); - ORDER_CHECK(depth.d1); - ORDER_CHECK(depth.d2); - ORDER_CHECK(cr); - ORDER_CHECK(adjacent_cr); - ORDER_CHECK(edge_top); - ORDER_CHECK(vertex_flags); - ORDER_CHECK(rs); - return false; -} - static bool outIsIrreducible(NFAVertex &v, const NGHolder &g) { unsigned nonSpecialVertices = 0; @@ -286,9 +277,14 @@ bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) { // populate VertexInfo table static -void getVertexInfos(const NGHolder &g, ptr_vector &infos) { +ptr_vector getVertexInfos(const NGHolder &g) { + const size_t num_verts = num_vertices(g); + + ptr_vector infos; + infos.reserve(num_verts * 2); + vector vertex_map; // indexed by vertex_index property - vertex_map.resize(num_vertices(g)); + vertex_map.resize(num_verts); for (auto v : vertices_range(g)) { VertexInfo *vi = new VertexInfo(v, g); @@ -323,14 +319,24 @@ void getVertexInfos(const NGHolder &g, ptr_vector &infos) { } assert(!hasEdgeAsserts(cur_vi.v, g)); } + + return infos; } // store equivalence class in VertexInfo for each vertex static -void partitionGraph(ptr_vector &infos, ClassMap &classes, - WorkQueue &work_queue, const NGHolder &g, - EquivalenceType eq) { - map classinfomap; +vector partitionGraph(ptr_vector &infos, + WorkQueue &work_queue, const NGHolder &g, + EquivalenceType eq) { + const size_t num_verts = infos.size(); + + vector classes; + unordered_map classinfomap; + + // assume we will have lots of classes, so we don't waste time resizing + // these structures. + classes.reserve(num_verts); + classinfomap.reserve(num_verts); // get distances from start (or accept) for all vertices // only one of them is used at a time, never both @@ -356,28 +362,25 @@ void partitionGraph(ptr_vector &infos, ClassMap &classes, auto ii = classinfomap.find(ci); if (ii == classinfomap.end()) { - unsigned new_class = classinfomap.size(); - vi.equivalence_class = new_class; - - classinfomap[ci] = new_class; - - // insert this vertex into the class map - VertexInfoSet &vertices = classes[new_class]; - vertices.insert(&vi); + // vertex is in a new equivalence class by itself. + unsigned eq_class = classes.size(); + vi.equivalence_class = eq_class; + classes.push_back({&vi}); + classinfomap.emplace(move(ci), eq_class); } else { + // vertex is added to an existing class. unsigned eq_class = ii->second; vi.equivalence_class = eq_class; - - // insert this vertex into the class map - VertexInfoSet &vertices = classes[eq_class]; - vertices.insert(&vi); + classes.at(eq_class).insert(&vi); // we now know that this particular class has more than one // vertex, so we add it to the work queue work_queue.push(eq_class); } } - DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classinfomap.size()); + + DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classes.size()); + return classes; } // generalized equivalence processing (left and right) @@ -388,7 +391,7 @@ void partitionGraph(ptr_vector &infos, ClassMap &classes, // equivalence, predecessors for right equivalence) classes get revalidated in // case of a split. static -void equivalence(ClassMap &classmap, WorkQueue &work_queue, +void equivalence(vector &classes, WorkQueue &work_queue, EquivalenceType eq_type) { // now, go through the work queue until it's empty map, VertexInfoSet> tentative_classmap; @@ -397,12 +400,11 @@ void equivalence(ClassMap &classmap, WorkQueue &work_queue, WorkQueue reval_queue(work_queue.capacity()); while (!work_queue.empty()) { - // dequeue our class from the work queue unsigned cur_class = work_queue.pop(); // get all vertices in current equivalence class - VertexInfoSet &cur_class_vertices = classmap[cur_class]; + VertexInfoSet &cur_class_vertices = classes.at(cur_class); if (cur_class_vertices.size() < 2) { continue; @@ -445,16 +447,20 @@ void equivalence(ClassMap &classmap, WorkQueue &work_queue, // start from the second class for (++tmi; tmi != tentative_classmap.end(); ++tmi) { - unsigned new_class = classmap.size(); const VertexInfoSet &vertices_to_split = tmi->second; - VertexInfoSet &new_class_vertices = classmap[new_class]; + unsigned new_class = classes.size(); + VertexInfoSet new_class_vertices; for (VertexInfo *vi : vertices_to_split) { vi->equivalence_class = new_class; - cur_class_vertices.erase(vi); + // note: we cannot use the cur_class_vertices ref, as it is + // invalidated by modifications to the classes vector. + classes[cur_class].erase(vi); new_class_vertices.insert(vi); } - if (tmi->first.find(cur_class) != tmi->first.end()) { + classes.push_back(move(new_class_vertices)); + + if (contains(tmi->first, cur_class)) { reval_queue.push(new_class); } } @@ -619,16 +625,15 @@ void mergeClass(ptr_vector &infos, NGHolder &g, unsigned eq_class, // vertex (or, in rare cases for left equiv, a pair if we cannot satisfy the // report behaviour with a single vertex). static -bool mergeEquivalentClasses(ClassMap &classmap, ptr_vector &infos, - NGHolder &g) { +bool mergeEquivalentClasses(vector &classes, + ptr_vector &infos, NGHolder &g) { bool merged = false; set toRemove; // go through all classes and merge classes with more than one vertex - for (auto &cm : classmap) { + for (unsigned eq_class = 0; eq_class < classes.size(); eq_class++) { // get all vertices in current equivalence class - unsigned eq_class = cm.first; - VertexInfoSet &cur_class_vertices = cm.second; + VertexInfoSet &cur_class_vertices = classes[eq_class]; // we don't care for single-vertex classes if (cur_class_vertices.size() > 1) { @@ -644,6 +649,26 @@ bool mergeEquivalentClasses(ClassMap &classmap, ptr_vector &infos, return merged; } +static +bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) { + // create a list of equivalence classes to check + WorkQueue work_queue(num_vertices(g)); + + // get information on every vertex in the graph + // new vertices are allocated here, and stored in infos + ptr_vector infos = getVertexInfos(g); + + // partition the graph + auto classes = partitionGraph(infos, work_queue, g, eq_type); + + // do equivalence processing + equivalence(classes, work_queue, eq_type); + + // replace equivalent classes with single vertices + // new vertices are (possibly) allocated here, and stored in infos + return mergeEquivalentClasses(classes, infos, g); +} + bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) { if (!cc.grey.equivalenceEnable) { DEBUG_PRINTF("equivalence processing disabled in grey box\n"); @@ -661,34 +686,8 @@ bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) { // take note if we have merged any vertices bool merge = false; - - for (int eqi = 0; eqi < MAX_EQUIVALENCE; ++eqi) { - // map of all information pertaining a vertex - ptr_vector infos; - ClassMap classes; - - // create a list of equivalence classes to check - WorkQueue work_queue(num_vertices(g)); - EquivalenceType eq_type = (EquivalenceType) eqi; - - // resize the vector, make room for twice the vertices we have - infos.reserve(num_vertices(g) * 2); - - // get information on every vertex in the graph - // new vertices are allocated here, and stored in infos - getVertexInfos(g, infos); - - // partition the graph - partitionGraph(infos, classes, work_queue, g, eq_type); - - // do equivalence processing - equivalence(classes, work_queue, eq_type); - - // replace equivalent classes with single vertices - // new vertices are (possibly) allocated here, and stored in infos - merge |= mergeEquivalentClasses(classes, infos, g); - } - + merge |= reduceGraphEquivalences(g, LEFT_EQUIVALENCE); + merge |= reduceGraphEquivalences(g, RIGHT_EQUIVALENCE); return merge; } diff --git a/src/nfagraph/ng_execute.cpp b/src/nfagraph/ng_execute.cpp index 92bef737..4ffd89c0 100644 --- a/src/nfagraph/ng_execute.cpp +++ b/src/nfagraph/ng_execute.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,7 +58,7 @@ namespace ue2 { struct StateInfo { StateInfo(NFAVertex v, const CharReach &cr) : vertex(v), reach(cr) {} - StateInfo() : vertex(NFAGraph::null_vertex()) {} + StateInfo() : vertex(NGHolder::null_vertex()) {} NFAVertex vertex; CharReach reach; }; @@ -324,4 +324,49 @@ flat_set execute_graph(const NGHolder &running_g, initial_states); } +static +bool can_die_early(const NGHolder &g, const vector &info, + const dynamic_bitset<> &s, + map, u32> &visited, u32 age_limit) { + if (contains(visited, s) && visited[s] >= age_limit) { + /* we have already (or are in the process) of visiting here with a + * looser limit. */ + return false; + } + visited[s] = age_limit; + + if (s.none()) { + DEBUG_PRINTF("dead\n"); + return true; + } + + if (age_limit == 0) { + return false; + } + + dynamic_bitset<> all_succ(s.size()); + step(g, info, s, &all_succ); + all_succ.reset(NODE_START_DOTSTAR); + + for (u32 i = 0; i < N_CHARS; i++) { + dynamic_bitset<> next = all_succ; + filter_by_reach(info, &next, CharReach(i)); + if (can_die_early(g, info, next, visited, age_limit - 1)) { + return true; + } + } + + return false; +} + +bool can_die_early(const NGHolder &g, u32 age_limit) { + if (proper_out_degree(g.startDs, g)) { + return false; + } + const vector &info = makeInfoTable(g); + map, u32> visited; + return can_die_early(g, info, makeStateBitset(g, {g.start}), visited, + age_limit); +} + } // namespace ue2 diff --git a/src/nfagraph/ng_execute.h b/src/nfagraph/ng_execute.h index e2c7c72d..bdcfecfd 100644 --- a/src/nfagraph/ng_execute.h +++ b/src/nfagraph/ng_execute.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -64,6 +64,9 @@ flat_set execute_graph(const NGHolder &g, const NGHolder &input_dag, const flat_set &input_start_states, const flat_set &initial); +/* returns true if it is possible for the nfa to die within age_limit bytes */ +bool can_die_early(const NGHolder &g, u32 age_limit); + } // namespace ue2 #endif diff --git a/src/nfagraph/ng_extparam.cpp b/src/nfagraph/ng_extparam.cpp index 17d2a513..bc101df2 100644 --- a/src/nfagraph/ng_extparam.cpp +++ b/src/nfagraph/ng_extparam.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -294,21 +294,21 @@ bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth, static NFAVertex findSingleCyclic(const NGHolder &g) { - NFAVertex v = NFAGraph::null_vertex(); + NFAVertex v = NGHolder::null_vertex(); for (const auto &e : edges_range(g)) { if (source(e, g) == target(e, g)) { if (source(e, g) == g.startDs) { continue; } - if (v != NFAGraph::null_vertex()) { + if (v != NGHolder::null_vertex()) { // More than one cyclic vertex. - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } v = source(e, g); } } - if (v != NFAGraph::null_vertex()) { + if (v != NGHolder::null_vertex()) { DEBUG_PRINTF("cyclic is %u\n", g[v].index); assert(!is_special(v, g)); } @@ -359,11 +359,11 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { // The graph must contain a single cyclic vertex (other than startDs), and // that vertex can have one pred and one successor. NFAVertex cyclic = findSingleCyclic(g); - if (cyclic == NFAGraph::null_vertex()) { + if (cyclic == NGHolder::null_vertex()) { return false; } - NFAGraph::adjacency_iterator ai, ae; + NGHolder::adjacency_iterator ai, ae; tie(ai, ae) = adjacent_vertices(g.start, g); if (*ai == g.startDs) { ++ai; @@ -411,7 +411,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { // Check the cyclic state is A-OK. v = getSoleDestVertex(g, cyclic); - if (v == NFAGraph::null_vertex()) { + if (v == NGHolder::null_vertex()) { DEBUG_PRINTF("cyclic has more than one successor\n"); return false; } diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index 8fe4889d..e70b7708 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -89,11 +89,11 @@ void populateInit(const NGHolder &g, const flat_set &unused, } v_by_index->clear(); - v_by_index->resize(num_vertices(g), NFAGraph::null_vertex()); + v_by_index->resize(num_vertices(g), NGHolder::null_vertex()); for (auto v : vertices_range(g)) { u32 v_index = g[v].index; - assert((*v_by_index)[v_index] == NFAGraph::null_vertex()); + assert((*v_by_index)[v_index] == NGHolder::null_vertex()); (*v_by_index)[v_index] = v; } } diff --git a/src/nfagraph/ng_holder.cpp b/src/nfagraph/ng_holder.cpp index fd403378..53566891 100644 --- a/src/nfagraph/ng_holder.cpp +++ b/src/nfagraph/ng_holder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -154,7 +154,7 @@ void clear_out_edges(NFAVertex v, NGHolder &h) { } void clear_graph(NGHolder &h) { - NFAGraph::vertex_iterator vi, ve; + NGHolder::vertex_iterator vi, ve; for (tie(vi, ve) = vertices(h); vi != ve;) { NFAVertex v = *vi; ++vi; diff --git a/src/nfagraph/ng_holder.h b/src/nfagraph/ng_holder.h index 3243f665..f0a387d0 100644 --- a/src/nfagraph/ng_holder.h +++ b/src/nfagraph/ng_holder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -171,7 +171,7 @@ bool is_special(const NFAVertex v, const GraphT &g) { } static really_inline -std::pair +std::pair adjacent_vertices(NFAVertex v, const NGHolder &h) { return adjacent_vertices(v, h.g); } @@ -182,7 +182,7 @@ std::pair edge(NFAVertex u, NFAVertex v, const NGHolder &h) { } static really_inline -std::pair +std::pair edges(const NGHolder &h) { return edges(h.g); } @@ -193,13 +193,13 @@ size_t in_degree(NFAVertex v, const NGHolder &h) { } static really_inline -std::pair +std::pair in_edges(NFAVertex v, const NGHolder &h) { return in_edges(v, h.g); } static really_inline -std::pair +std::pair inv_adjacent_vertices(NFAVertex v, const NGHolder &h) { return inv_adjacent_vertices(v, h.g); } @@ -210,7 +210,7 @@ size_t out_degree(NFAVertex v, const NGHolder &h) { } static really_inline -std::pair +std::pair out_edges(NFAVertex v, const NGHolder &h) { return out_edges(v, h.g); } @@ -226,7 +226,7 @@ NFAVertex target(const NFAEdge &e, const NGHolder &h) { } static really_inline -std::pair +std::pair vertices(const NGHolder &h) { return vertices(h.g); } @@ -239,6 +239,16 @@ vertices(const NGHolder &h) { */ void clear_graph(NGHolder &h); +inline +void renumber_edges(NGHolder &h) { + h.renumberEdges(); +} + +inline +void renumber_vertices(NGHolder &h) { + h.renumberVertices(); +} + /* * \brief Clear and remove all of the vertices pointed to by the given iterator * range. @@ -315,15 +325,26 @@ void remove_edges(const Container &c, NGHolder &h, bool renumber = true) { remove_edges(c.begin(), c.end(), h, renumber); } -static UNUSED +inline bool is_triggered(const NGHolder &g) { return is_triggered(g.kind); } -static UNUSED +inline bool generates_callbacks(const NGHolder &g) { return generates_callbacks(g.kind); } + +inline +bool has_managed_reports(const NGHolder &g) { + return has_managed_reports(g.kind); +} + +inline +bool inspects_states_for_accepts(const NGHolder &g) { + return inspects_states_for_accepts(g.kind); +} + } // namespace ue2 #endif diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 713fe370..72efa43a 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -79,13 +79,17 @@ bool sanityCheckGraph(const NGHolder &g, } } - // Vertices with edges to accept or acceptEod must have reports. + // Vertices with edges to accept or acceptEod must have reports and + // other vertices must not have them. if (is_match_vertex(v, g) && v != g.accept) { if (g[v].reports.empty()) { - DEBUG_PRINTF("vertex %u has no reports\n", - g[v].index); + DEBUG_PRINTF("vertex %u has no reports\n", g[v].index); return false; } + } else if (!g[v].reports.empty()) { + DEBUG_PRINTF("vertex %u has reports but no accept edge\n", + g[v].index); + return false; } // Participant vertices should have distinct state indices. @@ -164,7 +168,7 @@ void makeTopStates(NGHolder &g, map &tops, assert(!contains(tops, t)); - NFAVertex s = NFAGraph::null_vertex(); + NFAVertex s = NGHolder::null_vertex(); flat_set succs; insert(&succs, top.second); @@ -373,7 +377,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, const map>> &triggers, bool compress_state, bool do_accel, bool impl_test_only, u32 hint, const CompileContext &cc) { - if (!generates_callbacks(h_in)) { + if (!has_managed_reports(h_in)) { rm = nullptr; } else { assert(rm); @@ -413,7 +417,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, set zombies = findZombies(*h, br_cyclic, state_ids, cc); - if (generates_callbacks(*h)) { + if (has_managed_reports(*h)) { assert(rm); remapReportsToPrograms(*h, *rm); } @@ -501,6 +505,9 @@ aligned_unique_ptr constructReversedNFA(const NGHolder &h_in, u32 hint, u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, const CompileContext &cc) { + if (!cc.grey.allowLimExNFA) { + return false; + } // Quick check: we can always implement an NFA with less than NFA_MAX_STATES // states. Note that top masks can generate extra states, so we account for // those here too. @@ -508,7 +515,7 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, return true; } - if (!generates_callbacks(g)) { + if (!has_managed_reports(g)) { rm = nullptr; } else { assert(rm); @@ -547,7 +554,7 @@ void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm removeRedundancy(g, som); - if (rm && generates_callbacks(g)) { + if (rm && has_managed_reports(g)) { pruneHighlanderDominated(g, *rm); } @@ -560,7 +567,7 @@ void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm u32 countAccelStates(const NGHolder &g, const ReportManager *rm, const CompileContext &cc) { - if (!generates_callbacks(g)) { + if (!has_managed_reports(g)) { rm = nullptr; } else { assert(rm); diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index 1f991f19..deaf2ffd 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -658,7 +658,7 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) { return g.startDs; } - NFAVertex v = NFAGraph::null_vertex(); + NFAVertex v = NGHolder::null_vertex(); for (auto w : adjacent_vertices_range(g.start, g)) { if (w != g.startDs) { if (!v) { @@ -693,8 +693,8 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) { static NFAVertex find_next(const NFAVertex v, const NGHolder &g) { - NFAVertex res = NFAGraph::null_vertex(); - for (NFAVertex u : adjacent_vertices_range(v, g)) { + NFAVertex res = NGHolder::null_vertex(); + for (NFAVertex u : adjacent_vertices_range(v, g)) { if (u != v) { res = u; break; @@ -736,7 +736,7 @@ MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g, // find our start vertex NFAVertex cur = find_next(v, g); - if (cur == NFAGraph::null_vertex()) { + if (cur == NGHolder::null_vertex()) { DEBUG_PRINTF("invalid start vertex\n"); return MultibyteAccelInfo(); } diff --git a/src/nfagraph/ng_literal_analysis.cpp b/src/nfagraph/ng_literal_analysis.cpp index f9ef6061..9229457c 100644 --- a/src/nfagraph/ng_literal_analysis.cpp +++ b/src/nfagraph/ng_literal_analysis.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -64,10 +64,6 @@ namespace { /* Small literal graph type used for the suffix tree used in * compressAndScore. */ -typedef boost::adjacency_list_traits LitGraphTraits; -typedef LitGraphTraits::vertex_descriptor LitVertex; -typedef LitGraphTraits::edge_descriptor LitEdge; struct LitGraphVertexProps { LitGraphVertexProps() {} @@ -79,11 +75,15 @@ struct LitGraphEdgeProps { LitGraphEdgeProps() {} explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {} u64a score = NO_LITERAL_AT_EDGE_SCORE; + size_t index; /* only initialised when the reverse edges are added. */ }; +/* keep edgeList = listS as you cannot remove edges if edgeList = vecS */ typedef boost::adjacency_list LitGraph; +typedef LitGraph::vertex_descriptor LitVertex; +typedef LitGraph::edge_descriptor LitEdge; typedef pair VertexPair; typedef std::queue LitVertexQ; @@ -339,6 +339,12 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, g[source(e, g)].index, g[target(e, g)].index, s.size()); } +bool bad_mixed_sensitivity(const ue2_literal &s) { + /* TODO: if the mixed cases is entirely within MAX_MASK2_WIDTH of the end, + * we should be able to handle it */ + return mixed_sensitivity(s) && s.length() > MAX_MASK2_WIDTH; +} + static u64a litUniqueness(const string &s) { CharReach seen(s); @@ -474,43 +480,36 @@ const char *describeColor(boost::default_color_type c) { /** * The BGL's boykov_kolmogorov_max_flow requires that all edges have their - * reverse edge in the graph. This function adds them, returning the new edges - * and constructing a map of (edge, rev edge). + * reverse edge in the graph. This function adds them, returning a vector + * mapping edge index to reverse edge. Note: LitGraph should be a DAG so there + * should be no existing reverse_edges. */ static -vector addReverseEdges(LitGraph &lg, - ue2::unordered_map &reverse_edge_map) { - vector reverseMe; - - reverse_edge_map.clear(); - reverse_edge_map.reserve(num_edges(lg) * 2); +vector add_reverse_edges_and_index(LitGraph &lg) { + vector fwd_edges; + size_t next_index = 0; for (const auto &e : edges_range(lg)) { - LitVertex u = source(e, lg), v = target(e, lg); - assert(u != v); - - bool exists; - LitEdge rev; - tie(rev, exists) = edge(v, u, lg); - if (exists) { - reverse_edge_map[e] = rev; - } else { - reverseMe.push_back(e); - } + lg[e].index = next_index++; + fwd_edges.push_back(e); } - vector reverseEdges; - reverseEdges.reserve(reverseMe.size()); + vector rev_map(2 * num_edges(lg)); - for (const auto &e : reverseMe) { - LitVertex u = source(e, lg), v = target(e, lg); - LitEdge rev = add_edge(v, u, lg[e], lg).first; - reverseEdges.push_back(rev); - reverse_edge_map[e] = rev; - reverse_edge_map[rev] = e; + for (const auto &e : fwd_edges) { + LitVertex u = source(e, lg); + LitVertex v = target(e, lg); + + assert(!edge(v, u, lg).second); + + LitEdge rev = add_edge(v, u, lg).first; + lg[rev].score = 0; + lg[rev].index = next_index++; + rev_map[lg[e].index] = rev; + rev_map[lg[rev].index] = e; } - return reverseEdges; + return rev_map; } static @@ -522,33 +521,33 @@ void findMinCut(LitGraph &lg, const LitVertex &root, const LitVertex &sink, assert(!in_degree(root, lg)); assert(!out_degree(sink, lg)); + size_t num_real_edges = num_edges(lg); // Add reverse edges for the convenience of the BGL's max flow algorithm. - ue2::unordered_map reverse_edge_map; - vector tempEdges = addReverseEdges(lg, reverse_edge_map); + vector rev_edges = add_reverse_edges_and_index(lg); const auto v_index_map = get(vertex_index, lg); + const auto e_index_map = get(&LitGraphEdgeProps::index, lg); const size_t num_verts = num_vertices(lg); vector colors(num_verts); vector distances(num_verts); vector predecessors(num_verts); - ue2::unordered_map residuals; - residuals.reserve(num_edges(lg)); + vector residuals(num_edges(lg)); UNUSED u64a flow = boykov_kolmogorov_max_flow(lg, get(&LitGraphEdgeProps::score, lg), - make_assoc_property_map(residuals), - make_assoc_property_map(reverse_edge_map), + make_iterator_property_map(residuals.begin(), e_index_map), + make_iterator_property_map(rev_edges.begin(), e_index_map), make_iterator_property_map(predecessors.begin(), v_index_map), make_iterator_property_map(colors.begin(), v_index_map), make_iterator_property_map(distances.begin(), v_index_map), - get(vertex_index, lg), root, sink); + v_index_map, root, sink); DEBUG_PRINTF("done, flow = %llu\n", flow); - // Remove temporary reverse edges. - for (const auto &e : tempEdges) { - remove_edge(e, lg); - } + /* remove reverse edges */ + remove_edge_if([&](const LitEdge &e) { + return lg[e].index >= num_real_edges; + }, lg); vector white_cut, black_cut; u64a white_flow = 0, black_flow = 0; @@ -631,6 +630,48 @@ u64a compressAndScore(set &s) { return score; } +/* like compressAndScore, but replaces long mixed sensitivity literals with + * something weaker. */ +u64a sanitizeAndCompressAndScore(set &lits) { + const size_t maxExploded = 8; // only case-explode this far + + /* TODO: the whole compression thing could be made better by systematically + * considering replacing literal sets not just by common suffixes but also + * by nocase literals. */ + + vector replacements; + + for (auto it = lits.begin(); it != lits.end();) { + auto jt = it; + ++it; + + if (!bad_mixed_sensitivity(*jt)) { + continue; + } + + /* we have to replace *jt with something... */ + ue2_literal s = *jt; + lits.erase(jt); + + vector exploded; + for (auto cit = caseIterateBegin(s); cit != caseIterateEnd(); ++cit) { + exploded.emplace_back(*cit, false); + if (exploded.size() > maxExploded) { + goto dont_explode; + } + } + insert(&replacements, replacements.end(), exploded); + + continue; + dont_explode: + make_nocase(&s); + replacements.push_back(s); + } + + insert(&lits, replacements); + return compressAndScore(lits); +} + u64a scoreSet(const set &s) { if (s.empty()) { return NO_LITERAL_AT_EDGE_SCORE; @@ -681,7 +722,7 @@ set getLiteralSet(const NGHolder &g, const NFAVertex &v, return s; } -vector scoreEdges(const NGHolder &g) { +vector scoreEdges(const NGHolder &g, const flat_set &known_bad) { assert(hasCorrectlyNumberedEdges(g)); vector scores(num_edges(g)); @@ -689,8 +730,12 @@ vector scoreEdges(const NGHolder &g) { for (const auto &e : edges_range(g)) { u32 eidx = g[e].index; assert(eidx < scores.size()); - set ls = getLiteralSet(g, e); - scores[eidx] = compressAndScore(ls); + if (contains(known_bad, e)) { + scores[eidx] = NO_LITERAL_AT_EDGE_SCORE; + } else { + set ls = getLiteralSet(g, e); + scores[eidx] = compressAndScore(ls); + } } return scores; @@ -849,4 +894,49 @@ bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) { return true; } +bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) { + NFAVertex v = g.accept; + + for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { + NGHolder::inv_adjacency_iterator ai, ae; + tie(ai, ae) = inv_adjacent_vertices(v, g); + if (ai == ae) { + assert(0); // no predecessors? + return false; + } + v = *ai++; + if (ai != ae) { + DEBUG_PRINTF("branch, fail\n"); + return false; + } + + if (is_special(v, g)) { + DEBUG_PRINTF("special found, fail\n"); + return false; + } + + const CharReach &cr_g = g[v].char_reach; + const CharReach &cr_l = *it; + + if (!cr_l.isSubsetOf(cr_g)) { + /* running over the prefix is needed to prevent false postives */ + DEBUG_PRINTF("reach fail\n"); + return false; + } + } + + // Our last value for v should have only start states for predecessors. + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (!is_any_start(u, g)) { + DEBUG_PRINTF("pred is not start\n"); + return false; + } + } + + assert(num_vertices(g) == lit.length() + N_SPECIALS); + + DEBUG_PRINTF("ok\n"); + return true; +} + } // namespace ue2 diff --git a/src/nfagraph/ng_literal_analysis.h b/src/nfagraph/ng_literal_analysis.h index 4fa72b9f..6fd9c525 100644 --- a/src/nfagraph/ng_literal_analysis.h +++ b/src/nfagraph/ng_literal_analysis.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,9 +42,7 @@ namespace ue2 { #define NO_LITERAL_AT_EDGE_SCORE 10000000ULL - -/* Score for special-to-special edges */ -#define INVALID_EDGE_CAP 100000000ULL +#define INVALID_EDGE_CAP 100000000ULL /* special-to-special score */ class NGHolder; @@ -59,9 +57,20 @@ std::set getLiteralSet(const NGHolder &g, const NFAVertex &v, bool only_first_encounter = true); std::set getLiteralSet(const NGHolder &g, const NFAEdge &e); -/** Score all the edges in the given graph, returning them in \p scores indexed +/** + * Returns true if we are unable to use a mixed sensitivity literal in rose (as + * our literal matchers are generally either case sensitive or not). + * + * Shortish mixed sensitivity literals can be handled by confirm checks in rose + * and are not flagged as bad. + */ +bool bad_mixed_sensitivity(const ue2_literal &s); + +/** + * Score all the edges in the given graph, returning them in \p scores indexed * by edge_index. */ -std::vector scoreEdges(const NGHolder &h); +std::vector scoreEdges(const NGHolder &h, + const flat_set &known_bad = {}); /** Returns a score for a literal set. Lower scores are better. */ u64a scoreSet(const std::set &s); @@ -69,6 +78,12 @@ u64a scoreSet(const std::set &s); /** Compress a literal set to fewer literals. */ u64a compressAndScore(std::set &s); +/** + * Compress a literal set to fewer literals and replace any long mixed + * sensitivity literals with supported literals. + */ +u64a sanitizeAndCompressAndScore(std::set &s); + bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, NGHolder *rhs); @@ -77,6 +92,10 @@ bool splitOffAnchoredLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out); +/** \brief Returns true if the given literal is the only thing in the graph, + * from (start or startDs) to accept. */ +bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit); + } // namespace ue2 #endif diff --git a/src/nfagraph/ng_literal_component.cpp b/src/nfagraph/ng_literal_component.cpp index 9ee4f151..871c8ac7 100644 --- a/src/nfagraph/ng_literal_component.cpp +++ b/src/nfagraph/ng_literal_component.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -186,7 +186,7 @@ bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, /** \brief Split off literals. True if any changes were made to the graph. */ bool splitOffLiterals(NG &ng, NGWrapper &g) { - if (!ng.cc.grey.allowRose) { + if (!ng.cc.grey.allowLiteral) { return false; } diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index b1c6ff96..39788570 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -173,11 +173,11 @@ void populateInit(const NGHolder &g, const flat_set &unused, } v_by_index->clear(); - v_by_index->resize(num_vertices(g), NFAGraph::null_vertex()); + v_by_index->resize(num_vertices(g), NGHolder::null_vertex()); for (auto v : vertices_range(g)) { u32 vert_id = g[v].index; - assert((*v_by_index)[vert_id] == NFAGraph::null_vertex()); + assert((*v_by_index)[vert_id] == NGHolder::null_vertex()); (*v_by_index)[vert_id] = v; } @@ -531,9 +531,9 @@ unique_ptr buildMcClellan(const NGHolder &graph, DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind); assert(allMatchStatesHaveReports(graph)); - bool prunable = grey.highlanderPruneDFA && generates_callbacks(graph); - assert(rm || !generates_callbacks(graph)); - if (!generates_callbacks(graph)) { + bool prunable = grey.highlanderPruneDFA && has_managed_reports(graph); + assert(rm || !has_managed_reports(graph)); + if (!has_managed_reports(graph)) { rm = nullptr; } diff --git a/src/nfagraph/ng_prefilter.cpp b/src/nfagraph/ng_prefilter.cpp index c2b9eea9..8abc45b3 100644 --- a/src/nfagraph/ng_prefilter.cpp +++ b/src/nfagraph/ng_prefilter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -135,7 +135,7 @@ void findWidths(const NGHolder &g, // Wire our entries to start and our exits to accept. for (auto v : ri.vertices) { NFAVertex v_new = mapping[v]; - assert(v_new != NFAGraph::null_vertex()); + assert(v_new != NGHolder::null_vertex()); if (isRegionEntry(g, v, region_map) && !edge(rg.start, v_new, rg).second) { diff --git a/src/nfagraph/ng_puff.cpp b/src/nfagraph/ng_puff.cpp index 540f4859..00b2e8ac 100644 --- a/src/nfagraph/ng_puff.cpp +++ b/src/nfagraph/ng_puff.cpp @@ -472,7 +472,7 @@ bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, } NFAVertex puffv = nodes.back(); - assert(puffv != NFAGraph::null_vertex()); + assert(puffv != NGHolder::null_vertex()); u32 width = countChain(g, nodes.back()); flat_set chain_reports; diff --git a/src/nfagraph/ng_redundancy.cpp b/src/nfagraph/ng_redundancy.cpp index b9b80c5b..26599251 100644 --- a/src/nfagraph/ng_redundancy.cpp +++ b/src/nfagraph/ng_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -158,7 +158,7 @@ void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) { static void inplaceIntersection(vector &vset1, const flat_set &vset2) { - const NFAVertex GONE = NFAGraph::null_vertex(); + const NFAVertex GONE = NGHolder::null_vertex(); vector::iterator it = vset1.begin(), ite = vset1.end(); flat_set::const_iterator jt = vset2.begin(), jte = vset2.end(); diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index 80434a0a..bc7e73d3 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1202,7 +1202,7 @@ CharReach predReach(const NGHolder &g, NFAVertex v) { static void filterMap(const NGHolder &subg, ue2::unordered_map &vmap) { - NFAGraph::vertex_iterator vi, ve; + NGHolder::vertex_iterator vi, ve; tie(vi, ve) = vertices(subg); const ue2::unordered_set remaining_verts(vi, ve); diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp index 3015af4c..137ac5cc 100644 --- a/src/nfagraph/ng_rose.cpp +++ b/src/nfagraph/ng_rose.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -750,7 +750,7 @@ unique_ptr LitCollection::pickNext() { for (auto v : lits.back()->vv) { if (contains(poisoned, v)) { DEBUG_PRINTF("skipping '%s' as overlapped\n", - ((const string &)*lits.back()->lit.begin()).c_str()); + dumpString(*(lits.back()->lit.begin())).c_str()); lits.pop_back(); goto next_lit; } @@ -760,7 +760,7 @@ unique_ptr LitCollection::pickNext() { lits.pop_back(); poisonCandidates(*rv); DEBUG_PRINTF("best is '%s' %u a%d t%d\n", - ((const string &)*rv->lit.begin()).c_str(), + dumpString(*(rv->lit.begin())).c_str(), g[rv->vv.front()].index, (int)createsAnchoredLHS(g, rv->vv, depths, grey), (int)createsTransientLHS(g, rv->vv, depths, grey)); @@ -773,51 +773,6 @@ unique_ptr LitCollection::pickNext() { } -/** \brief Returns true if the given literal is the only thing in the graph, - * from start to accept. */ -static -bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) { - NFAVertex v = g.accept; - - for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { - NFAGraph::inv_adjacency_iterator ai, ae; - tie(ai, ae) = inv_adjacent_vertices(v, g); - if (ai == ae) { - assert(0); // no predecessors? - return false; - } - v = *ai++; - if (ai != ae) { - DEBUG_PRINTF("branch, fail\n"); - return false; - } - - if (is_special(v, g)) { - DEBUG_PRINTF("special found, fail\n"); - return false; - } - - const CharReach &cr = g[v].char_reach; - if (cr != *it) { - DEBUG_PRINTF("reach fail\n"); - return false; - } - } - - // Our last value for v should have only start states for predecessors. - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!is_any_start(u, g)) { - DEBUG_PRINTF("pred is not start\n"); - return false; - } - } - - assert(num_vertices(g) == lit.length() + N_SPECIALS); - - DEBUG_PRINTF("ok\n"); - return true; -} - static bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) { set curr, next; @@ -860,7 +815,7 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, max_delay--; } - DEBUG_PRINTF("killing off '%s'\n", ((const string &)lit).c_str()); + DEBUG_PRINTF("killing off '%s'\n", dumpString(lit).c_str()); set curr, next; curr.insert(g.accept); @@ -917,6 +872,7 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, } clear_in_edges(g.accept, g); + clearReports(g); vector verts(pred.begin(), pred.end()); sort(verts.begin(), verts.end(), VertexIndexOrdering(g)); @@ -933,19 +889,10 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, return delay; } -static void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 delay) { + u32 delay, const vector &preds) { assert(delay <= lit.length()); - DEBUG_PRINTF("adding on '%s' %u\n", ((const string &)lit).c_str(), delay); - - vector preds; - insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g)); - clear_in_edges(g.accept, g); - - for (auto v : preds) { - g[v].reports.clear(); /* clear report from old accepts */ - } + DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay); NFAVertex prev = g.accept; auto it = lit.rbegin(); @@ -972,6 +919,19 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, assert(allMatchStatesHaveReports(g)); } +void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, + u32 delay) { + vector preds; + insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g)); + clear_in_edges(g.accept, g); + + for (auto v : preds) { + g[v].reports.clear(); /* clear report from old accepts */ + } + + restoreTrailingLiteralStates(g, lit, delay, preds); +} + /* return false if we should get rid of the edge altogether */ static bool removeLiteralFromLHS(RoseInGraph &ig, const RoseInEdge &lhs, @@ -1824,9 +1784,6 @@ bool doNetflowCut(RoseInGraph &ig, const vector &to_cut, set lits = getLiteralSet(h, e); compressAndScore(lits); cut_lits[e] = lits; - - DEBUG_PRINTF("cut lit '%s'\n", - ((const string &)*cut_lits[e].begin()).c_str()); } /* if literals are underlength bail or if it involves a forbidden edge*/ @@ -2245,7 +2202,7 @@ bool improveLHS(RoseInGraph &ig, const vector &edges, const vector &local = by_src[v]; vector graphs; - map > by_graph; + map > by_graph; for (const auto &e : local) { NGHolder *gp = ig[e].graph.get(); if (!contains(by_graph, gp)) { diff --git a/src/nfagraph/ng_rose.h b/src/nfagraph/ng_rose.h index 4e16a3c4..d180e8a5 100644 --- a/src/nfagraph/ng_rose.h +++ b/src/nfagraph/ng_rose.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,8 +33,11 @@ #ifndef NG_ROSE_H #define NG_ROSE_H +#include "ng_holder.h" #include "ue2common.h" +#include + namespace ue2 { class NGHolder; @@ -65,6 +68,13 @@ bool checkRose(const ReportManager &rm, const NGHolder &h, bool prefilter, u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, u32 max_delay, bool overhang_ok = true); +void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, + u32 delay); + +void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, + u32 delay, + const std::vector &preds); + } // namespace ue2 #endif // NG_ROSE_H diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index 03a612a0..ed2942bb 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -384,7 +384,7 @@ makePrefix(const NGHolder &g, const ue2::unordered_map ®ions, add_edge(prefix.accept, prefix.acceptEod, prefix); assert(!next_enters.empty()); - assert(next_enters.front() != NFAGraph::null_vertex()); + assert(next_enters.front() != NGHolder::null_vertex()); u32 dead_region = regions.at(next_enters.front()); DEBUG_PRINTF("curr_region %u, dead_region %u\n", regions.at(curr_exits.front()), dead_region); @@ -2064,8 +2064,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, ReportManager &rm = ng.rm; SomSlotManager &ssm = ng.ssm; - // This approach relies on Rose. - if (!cc.grey.allowRose) { + if (!cc.grey.allowHaigLit) { return SOMBE_FAIL; } @@ -2537,7 +2536,7 @@ bool doHaigLitHaigSom(NG &ng, NGHolder &g, RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); bool lhs_all_vac = true; - NFAGraph::adjacency_iterator ai, ae; + NGHolder::adjacency_iterator ai, ae; for (tie(ai, ae) = adjacent_vertices(lhs->startDs, *lhs); ai != ae && lhs_all_vac; ++ai) { if (!is_special(*ai, *lhs)) { diff --git a/src/nfagraph/ng_split.cpp b/src/nfagraph/ng_split.cpp index 42157e1e..bce638c0 100644 --- a/src/nfagraph/ng_split.cpp +++ b/src/nfagraph/ng_split.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -100,7 +100,12 @@ void splitLHS(const NGHolder &base, const vector &pivots, add_edge((*lhs_map)[pivot], lhs->accept, *lhs); } - pruneUseless(*lhs); + /* should do the renumbering unconditionally as we know edges are already + * misnumbered */ + pruneUseless(*lhs, false); + renumber_edges(*lhs); + renumber_vertices(*lhs); + filterSplitMap(*lhs, lhs_map); switch (base.kind) { @@ -112,6 +117,12 @@ void splitLHS(const NGHolder &base, const vector &pivots, case NFA_SUFFIX: lhs->kind = NFA_INFIX; break; + case NFA_EAGER_PREFIX: + /* Current code should not be assigning eager until well after all the + * splitting is done. */ + assert(0); + lhs->kind = NFA_EAGER_PREFIX; + break; case NFA_REV_PREFIX: case NFA_OUTFIX_RAW: assert(0); @@ -142,7 +153,12 @@ void splitRHS(const NGHolder &base, const vector &pivots, assert(contains(*rhs_map, pivot)); add_edge(rhs->start, (*rhs_map)[pivot], *rhs); } - pruneUseless(*rhs); + + /* should do the renumbering unconditionally as we know edges are already + * misnumbered */ + pruneUseless(*rhs, false); + renumber_edges(*rhs); + renumber_vertices(*rhs); filterSplitMap(*rhs, rhs_map); switch (base.kind) { @@ -154,6 +170,12 @@ void splitRHS(const NGHolder &base, const vector &pivots, case NFA_OUTFIX: rhs->kind = NFA_SUFFIX; break; + case NFA_EAGER_PREFIX: + /* Current code should not be assigning eager until well after all the + * splitting is done. */ + assert(0); + rhs->kind = NFA_INFIX; + break; case NFA_REV_PREFIX: case NFA_OUTFIX_RAW: assert(0); diff --git a/src/nfagraph/ng_squash.cpp b/src/nfagraph/ng_squash.cpp index dd3693e5..6577673f 100644 --- a/src/nfagraph/ng_squash.cpp +++ b/src/nfagraph/ng_squash.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -369,7 +369,7 @@ map findSquashers(const NGHolder &g, som_type som) { buildPDomTree(g, pdom_tree); // Build list of vertices by state ID and a set of init states. - vector vByIndex(numStates, NFAGraph::null_vertex()); + vector vByIndex(numStates, NGHolder::null_vertex()); NFAStateSet initStates(numStates); smgb_cache cache(g); @@ -394,7 +394,7 @@ map findSquashers(const NGHolder &g, som_type som) { for (u32 i = 0; i < numStates; i++) { NFAVertex v = vByIndex[i]; - assert(v != NFAGraph::null_vertex()); + assert(v != NGHolder::null_vertex()); const CharReach &cr = g[v].char_reach; /* only non-init cyclics can be squashers */ diff --git a/src/nfagraph/ng_uncalc_components.cpp b/src/nfagraph/ng_uncalc_components.cpp index abba09f9..217183de 100644 --- a/src/nfagraph/ng_uncalc_components.cpp +++ b/src/nfagraph/ng_uncalc_components.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -184,7 +184,7 @@ u32 commonPrefixLength(const NGHolder &ga, size_t a_count = 0; size_t b_count = 0; - NFAGraph::out_edge_iterator ei, ee; + NGHolder::out_edge_iterator ei, ee; for (tie(ei, ee) = out_edges(a[i], ga); ok && ei != ee; ++ei) { u32 sid = a_state_ids.at(target(*ei, ga)); if (sid == NO_STATE || sid >= max) { @@ -213,7 +213,7 @@ u32 commonPrefixLength(const NGHolder &ga, } } - NFAGraph::adjacency_iterator ai, ae; + NGHolder::adjacency_iterator ai, ae; for (tie(ai, ae) = adjacent_vertices(b[i], gb); ok && ai != ae; ++ai) { u32 sid = b_state_ids.at(*ai); diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index bcf0ce29..c629d553 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -78,26 +78,26 @@ depth maxDistFromStartOfData(const NFAVertexDepth &vd) { } NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex a) { - assert(a != NFAGraph::null_vertex()); + assert(a != NGHolder::null_vertex()); - NFAGraph::out_edge_iterator ii, iie; + NGHolder::out_edge_iterator ii, iie; tie(ii, iie) = out_edges(a, g); if (ii == iie) { - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } NFAVertex b = target(*ii, g); if (a == b) { ++ii; if (ii == iie) { - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } b = target(*ii, g); if (++ii != iie) { - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } } else if (++ii != iie && (target(*ii, g) != a || ++ii != iie)) { - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } assert(a != b); @@ -105,23 +105,23 @@ NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex a) { } NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex a) { - assert(a != NFAGraph::null_vertex()); + assert(a != NGHolder::null_vertex()); u32 idegree = in_degree(a, g); if (idegree != 1 && !(idegree == 2 && hasSelfLoop(a, g))) { - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } - NFAGraph::in_edge_iterator ii, iie; + NGHolder::in_edge_iterator ii, iie; tie(ii, iie) = in_edges(a, g); if (ii == iie) { - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } NFAVertex b = source(*ii, g); if (a == b) { ++ii; if (ii == iie) { - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } b = source(*ii, g); @@ -209,6 +209,15 @@ bool isAnchored(const NGHolder &g) { return true; } +bool isFloating(const NGHolder &g) { + for (auto v : adjacent_vertices_range(g.start, g)) { + if (v != g.startDs && !edge(g.startDs, v, g).second) { + return false; + } + } + return true; +} + bool isAcyclic(const NGHolder &g) { try { depth_first_search( @@ -321,7 +330,7 @@ bool can_match_at_eod(const NGHolder &h) { } bool can_only_match_at_eod(const NGHolder &g) { - NFAGraph::in_edge_iterator ie, ee; + NGHolder::in_edge_iterator ie, ee; tie(ie, ee) = in_edges(g.accept, g); return ie == ee; @@ -622,16 +631,18 @@ unique_ptr cloneHolder(const NGHolder &in) { } #ifndef NDEBUG -/** \brief Used in sanity-checking assertions: returns true if all vertices - * leading to accept or acceptEod have at least one report ID. */ + bool allMatchStatesHaveReports(const NGHolder &g) { + unordered_set reporters; for (auto v : inv_adjacent_vertices_range(g.accept, g)) { if (g[v].reports.empty()) { DEBUG_PRINTF("vertex %u has no reports!\n", g[v].index); return false; } + reporters.insert(v); } + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { if (v == g.accept) { continue; // stylised edge @@ -641,12 +652,20 @@ bool allMatchStatesHaveReports(const NGHolder &g) { g[v].index); return false; } + reporters.insert(v); } + + for (auto v : vertices_range(g)) { + if (!contains(reporters, v) && !g[v].reports.empty()) { + DEBUG_PRINTF("vertex %u is not a match state, but has reports!\n", + g[v].index); + return false; + } + } + return true; } -/** Assertion: returns true if the vertices in this graph are contiguously (and - * uniquely) numbered from zero. */ bool hasCorrectlyNumberedVertices(const NGHolder &g) { size_t count = num_vertices(g); vector ids(count, false); @@ -657,11 +676,10 @@ bool hasCorrectlyNumberedVertices(const NGHolder &g) { } ids[id] = true; } - return find(ids.begin(), ids.end(), false) == ids.end(); + return find(ids.begin(), ids.end(), false) == ids.end() + && num_vertices(g) == num_vertices(g.g); } -/** Assertion: returns true if the edges in this graph are contiguously (and - * uniquely) numbered from zero. */ bool hasCorrectlyNumberedEdges(const NGHolder &g) { size_t count = num_edges(g); vector ids(count, false); @@ -672,8 +690,10 @@ bool hasCorrectlyNumberedEdges(const NGHolder &g) { } ids[id] = true; } - return find(ids.begin(), ids.end(), false) == ids.end(); + return find(ids.begin(), ids.end(), false) == ids.end() + && num_edges(g) == num_edges(g.g); } + #endif // NDEBUG } // namespace ue2 diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index 9eb621e8..4f58dc45 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -65,7 +65,7 @@ bool is_dot(NFAVertex v, const GraphT &g) { template static really_inline void succ(const NGHolder &g, NFAVertex v, U *s) { - NFAGraph::adjacency_iterator ai, ae; + NGHolder::adjacency_iterator ai, ae; tie(ai, ae) = adjacent_vertices(v, g); s->insert(ai, ae); } @@ -74,14 +74,14 @@ void succ(const NGHolder &g, NFAVertex v, U *s) { template static really_inline void pred(const NGHolder &g, NFAVertex v, U *p) { - NFAGraph::inv_adjacency_iterator it, ite; + NGHolder::inv_adjacency_iterator it, ite; tie(it, ite) = inv_adjacent_vertices(v, g); p->insert(it, ite); } /** returns a vertex with an out edge from v and is not v. * v must have exactly one out-edge excluding self-loops. - * will return NFAGraph::null_vertex() if the preconditions don't hold. + * will return NGHolder::null_vertex() if the preconditions don't hold. */ NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v); @@ -228,6 +228,10 @@ bool isVacuous(const NGHolder &h); * proper successors). */ bool isAnchored(const NGHolder &h); +/** \brief True if the graph contains no anchored vertices (start has no + * successors aside from startDs or vertices connected to startDs). */ +bool isFloating(const NGHolder &h); + /** True if the graph contains no back-edges at all, other than the * startDs self-loop. */ bool isAcyclic(const NGHolder &g); @@ -293,15 +297,29 @@ void clearReports(NGHolder &g); void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new); #ifndef NDEBUG -// Assertions: only available in internal builds -/** \brief Used in sanity-checking assertions: returns true if all vertices - * leading to accept or acceptEod have at least one report ID. */ +// Assertions: only available in internal builds. + +/** + * Used in sanity-checking assertions: returns true if all vertices + * with edges to accept or acceptEod have at least one report ID. Additionally, + * checks that ONLY vertices with edges to accept or acceptEod has reports. + */ bool allMatchStatesHaveReports(const NGHolder &g); +/** + * Assertion: returns true if the vertices in this graph are contiguously (and + * uniquely) numbered from zero. + */ bool hasCorrectlyNumberedVertices(const NGHolder &g); + +/** + * Assertion: returns true if the edges in this graph are contiguously (and + * uniquely) numbered from zero. + */ bool hasCorrectlyNumberedEdges(const NGHolder &g); -#endif + +#endif // NDEBUG } // namespace ue2 diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp new file mode 100644 index 00000000..94e0a998 --- /dev/null +++ b/src/nfagraph/ng_violet.cpp @@ -0,0 +1,2661 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "ng_violet.h" + +#include "grey.h" +#include "ng_depth.h" +#include "ng_dominators.h" +#include "ng_dump.h" +#include "ng_equivalence.h" +#include "ng_holder.h" +#include "ng_is_equal.h" +#include "ng_literal_analysis.h" +#include "ng_netflow.h" +#include "ng_prune.h" +#include "ng_redundancy.h" +#include "ng_region.h" +#include "ng_reports.h" +#include "ng_rose.h" +#include "ng_split.h" +#include "ng_util.h" +#include "ng_width.h" +#include "rose/rose_build.h" +#include "rose/rose_build_util.h" +#include "rose/rose_in_dump.h" +#include "rose/rose_in_graph.h" +#include "rose/rose_in_util.h" +#include "util/compare.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/graph.h" +#include "util/graph_range.h" +#include "util/make_unique.h" +#include "util/order_check.h" +#include "util/target_info.h" +#include "util/ue2string.h" +#include "util/ue2_containers.h" + +#include +#include +#include +#include +#include +#include +#include + +#define STAGE_DEBUG_PRINTF DEBUG_PRINTF + +using namespace std; +using boost::adaptors::map_values; + +namespace ue2 { + +/* createsAnchoredLHS() is conservative as the depths take into account + * back edges that come from beyond the split point and would be missing after + * the graph is split. */ +static +bool createsAnchoredLHS(const NGHolder &g, const vector &vv, + const vector &depths, + const Grey &grey, depth max_depth = depth::infinity()) { + max_depth = min(max_depth, depth(grey.maxAnchoredRegion)); + + for (auto v : vv) { + /* avoid issues of self loops blowing out depths: + * look at preds, add 1 */ + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == v) { + continue; + } + + u32 idx = g[u].index; + assert(idx < depths.size()); + if (maxDistFromStartOfData(depths.at(idx)) >= max_depth) { + return false; + } + } + } + return true; +} + +/* createsTransientLHS() is conservative as the depths take into account + * back edges that come from beyond the split point and would be missing after + * the graph is split. */ +static +bool createsTransientLHS(const NGHolder &g, const vector &vv, + const vector &depths, + const Grey &grey) { + const depth max_depth(grey.maxHistoryAvailable); + + for (auto v : vv) { + /* avoid issues of self loops blowing out depths: + * look at preds, add 1 */ + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == v) { + continue; + } + + u32 idx = g[u].index; + assert(idx < depths.size()); + if (maxDistFromInit(depths.at(idx)) >= max_depth) { + return false; + } + } + } + return true; +} + +namespace { +/** + * Information on a cut: vertices and literals. + */ +struct VertLitInfo { + VertLitInfo() {} + VertLitInfo(NFAVertex v, const set &litlit, bool c_anch, + bool c_tran = false) + : vv(vector(1, v)), lit(litlit), creates_anchored(c_anch), + creates_transient(c_tran) {} + VertLitInfo(const vector &vv_in, const set &lit_in, + bool c_anch) + : vv(vv_in), lit(lit_in), creates_anchored(c_anch) {} + vector vv; + set lit; + + bool creates_anchored = false; + bool creates_transient = false; +}; + +/** + * \brief Comparator class for sorting LitCollection::lits. + * + * This is separated out from LitCollection itself as passing LitCollection to + * std::sort() would incur a (potentially expensive) copy. + */ +class LitComparator { +public: + LitComparator(const NGHolder &g_in, bool sa, bool st) + : g(g_in), seeking_anchored(sa), seeking_transient(st) {} + bool operator()(const unique_ptr &a, + const unique_ptr &b) const { + assert(a && b); + + if (seeking_anchored) { + if (a->creates_anchored != b->creates_anchored) { + return a->creates_anchored < b->creates_anchored; + } + } + + if (seeking_transient) { + if (a->creates_transient != b->creates_transient) { + return a->creates_transient < b->creates_transient; + } + } + + u64a score_a = scoreSet(a->lit); + u64a score_b = scoreSet(b->lit); + + if (score_a != score_b) { + return score_a > score_b; + } + + /* vertices should only be in one candidate cut */ + assert(a->vv == b->vv || a->vv.front() != b->vv.front()); + return g[a->vv.front()].index > g[b->vv.front()].index; + } + +private: + const NGHolder &g; /**< graph on which cuts are found */ + + bool seeking_anchored; + bool seeking_transient; +}; +} + +static +size_t shorter_than(const set &s, size_t limit) { + size_t count = 0; + + for (const auto &lit : s) { + if (lit.length() < limit) { + count++; + } + } + + return count; +} + +static +u32 min_len(const set &s) { + u32 rv = ~0U; + + for (const auto &lit : s) { + rv = min(rv, (u32)lit.length()); + } + + return rv; +} + +static +u32 min_period(const set &s) { + u32 rv = ~0U; + + for (const auto &lit : s) { + rv = min(rv, (u32)minStringPeriod(lit)); + } + DEBUG_PRINTF("min period %u\n", rv); + return rv; +} + +#define MIN_ANCHORED_LEN 2 + +static +bool validateRoseLiteralSetQuality(const set &s, u64a score, + bool anchored, u32 min_allowed_floating_len, + bool desperation) { + u32 min_allowed_len = anchored ? MIN_ANCHORED_LEN + : min_allowed_floating_len; + + assert(none_of(begin(s), end(s), bad_mixed_sensitivity)); + + if (score >= NO_LITERAL_AT_EDGE_SCORE) { + DEBUG_PRINTF("candidate is too bad %llu/%zu\n", score, s.size()); + return false; + } + + assert(!s.empty()); + if (s.empty()) { + DEBUG_PRINTF("candidate is too bad/something went wrong\n"); + return false; + } + + u32 s_min_len = min_len(s); + u32 s_min_period = min_period(s); + size_t short_count = shorter_than(s, 5); + + DEBUG_PRINTF("cand '%s': score %llu count=%zu min_len=%u min_period=%u" + " short_count=%zu desp=%d\n", + dumpString(*s.begin()).c_str(), score, s.size(), s_min_len, + s_min_period, short_count, (int)desperation); + + bool ok = true; + + if (s.size() > 10 /* magic number is magic */ + || s_min_len < min_allowed_len + || (s_min_period <= 1 && min_allowed_len != 1)) { + ok = false; + } + + if (!ok && desperation + && s.size() <= 20 /* more magic numbers are magical */ + && (s_min_len > 5 || (s_min_len > 2 && short_count <= 10)) + && s_min_period > 1) { + DEBUG_PRINTF("candidate is ok\n"); + ok = true; + } + + if (!ok && desperation + && s.size() <= 50 /* more magic numbers are magical */ + && s_min_len > 10 + && s_min_period > 1) { + DEBUG_PRINTF("candidate is ok\n"); + ok = true; + } + + if (!ok) { + DEBUG_PRINTF("candidate is too shitty\n"); + return false; + } + + return true; +} + +static UNUSED +void dumpRoseLiteralSet(const set &s) { + for (UNUSED const auto &lit : s) { + DEBUG_PRINTF(" lit: %s\n", dumpString(lit).c_str()); + } +} + +static +void getSimpleRoseLiterals(const NGHolder &g, bool seeking_anchored, + const vector *depths, + const set &a_dom, + vector> *lits, + u32 min_allowed_len, bool desperation, + const CompileContext &cc) { + assert(depths || !seeking_anchored); + + map scores; + map> lit_info; + set s; + + for (auto v : a_dom) { + s = getLiteralSet(g, v, true); /* RHS will take responsibility for any + revisits to the target vertex */ + + if (s.empty()) { + DEBUG_PRINTF("candidate is too shitty\n"); + continue; + } + + DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); + dumpRoseLiteralSet(s); + u64a score = sanitizeAndCompressAndScore(s); + + bool anchored = false; + if (seeking_anchored) { + anchored = createsAnchoredLHS(g, {v}, *depths, cc.grey); + } + + if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len, + desperation)) { + continue; + } + + DEBUG_PRINTF("candidate is a candidate\n"); + scores[v] = score; + lit_info[v] = make_unique(v, s, anchored); + } + + /* try to filter out cases where appending some characters produces worse + * literals. Only bother to look back one byte, TODO make better */ + for (auto u : a_dom) { + if (out_degree(u, g) != 1 || !scores[u]) { + continue; + } + NFAVertex v = *adjacent_vertices(u, g).first; + if (contains(scores, v) && scores[v] >= scores[u]) { + DEBUG_PRINTF("killing off v as score %llu >= %llu\n", + scores[v], scores[u]); + lit_info.erase(v); + } + } + + lits->reserve(lit_info.size()); + for (auto &m : lit_info) { + lits->push_back(move(m.second)); + } + DEBUG_PRINTF("%zu candidate literal sets\n", lits->size()); +} + +static +void getRegionRoseLiterals(const NGHolder &g, bool seeking_anchored, + const vector *depths, + const set &bad, + const set *allowed, + vector> *lits, + u32 min_allowed_len, bool desperation, + const CompileContext &cc) { + /* This allows us to get more places to split the graph as we are not + limited to points where there is a single vertex to split at. */ + + assert(depths || !seeking_anchored); + + /* TODO: operate over 'proto-regions' which ignore back edges */ + auto regions = assignRegions(g); + + set mand, optional; + map > exits; + + for (auto v : vertices_range(g)) { + u32 region = regions[v]; + if (is_any_start(v, g) || region == 0) { + continue; + } + + if (is_any_accept(v, g)) { + continue; + } + + if (!generates_callbacks(g) && is_match_vertex(v, g)) { + /* we cannot leave a completely vacuous infix */ + continue; + } + + if (isRegionExit(g, v, regions)) { + exits[region].push_back(v); + } + + if (isRegionEntry(g, v, regions)) { + // Determine whether this region is mandatory or optional. We only + // need to do this check for the first entry vertex we encounter + // for this region. + if (!contains(mand, region) && !contains(optional, region)) { + if (isOptionalRegion(g, v, regions)) { + optional.insert(region); + } else { + mand.insert(region); + } + } + } + } + + for (const auto &m : exits) { + if (false) { + next_cand: + continue; + } + + const u32 region = m.first; + const vector &vv = m.second; + assert(!vv.empty()); + + if (!contains(mand, region)) { + continue; + } + + for (auto v : vv) { + /* if an exit is in bad, the region is already handled well + * by getSimpleRoseLiterals or is otherwise bad */ + if (contains(bad, v)) { + goto next_cand; + } + /* if we are only allowed to consider some vertices, v must be in + the list; */ + if (allowed && !contains(*allowed, v)) { + goto next_cand; + } + } + + /* the final region may not have a neat exit. validate that all exits + * have an edge to each accept or none do */ + bool edge_to_a = edge(vv[0], g.accept, g).second; + bool edge_to_aeod = edge(vv[0], g.acceptEod, g).second; + const auto &reports = g[vv[0]].reports; + for (auto v : vv) { + if (edge_to_a != edge(v, g.accept, g).second) { + goto next_cand; + } + + if (edge_to_aeod != edge(v, g.acceptEod, g).second) { + goto next_cand; + } + + if (g[v].reports != reports) { + goto next_cand; + } + } + + DEBUG_PRINTF("inspecting region %u\n", region); + set s; + for (auto v : vv) { + DEBUG_PRINTF(" exit vertex: %u\n", g[v].index); + /* Note: RHS can not be depended on to take all subsequent revisits + * to this vertex */ + set ss = getLiteralSet(g, v, false); + if (ss.empty()) { + DEBUG_PRINTF("candidate is too shitty\n"); + goto next_cand; + } + insert(&s, ss); + } + + assert(!s.empty()); + + DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); + dumpRoseLiteralSet(s); + u64a score = sanitizeAndCompressAndScore(s); + + DEBUG_PRINTF("|candidate literal set| = %zu\n", s.size()); + dumpRoseLiteralSet(s); + + bool anchored = false; + if (seeking_anchored) { + anchored = createsAnchoredLHS(g, vv, *depths, cc.grey); + } + + if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len, + desperation)) { + goto next_cand; + } + + DEBUG_PRINTF("candidate is a candidate\n"); + lits->push_back(make_unique(vv, s, anchored)); + } +} + +static +void filterCandPivots(const NGHolder &g, const set &cand_raw, + set *out) { + for (auto u : cand_raw) { + const CharReach &u_cr = g[u].char_reach; + if (u_cr.count() > 40) { + continue; /* too wide to be plausible */ + } + + if (u_cr.count() > 2) { + /* include u as a candidate as successor may have backed away from + * expanding through it */ + out->insert(u); + continue; + } + + NFAVertex v = getSoleDestVertex(g, u); + if (v && in_degree(v, g) == 1 && out_degree(u, g) == 1) { + const CharReach &v_cr = g[v].char_reach; + if (v_cr.count() == 1 || v_cr.isCaselessChar()) { + continue; /* v will always generate better literals */ + } + } + + out->insert(u); + } +} + +/* cand_raw is the candidate set before filtering points which are clearly + * a bad idea. */ +static +void getCandidatePivots(const NGHolder &g, set *cand, + set *cand_raw) { + ue2::unordered_map dominators = findDominators(g); + + set accepts; + + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + if (is_special(v, g)) { + continue; + } + accepts.insert(v); + } + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + if (is_special(v, g)) { + continue; + } + accepts.insert(v); + } + + assert(!accepts.empty()); + + vector dom_trace; + auto ait = accepts.begin(); + assert(ait != accepts.end()); + NFAVertex curr = *ait; + while (curr && !is_special(curr, g)) { + dom_trace.push_back(curr); + curr = dominators[curr]; + } + reverse(dom_trace.begin(), dom_trace.end()); + for (++ait; ait != accepts.end(); ++ait) { + curr = *ait; + vector dom_trace2; + while (curr && !is_special(curr, g)) { + dom_trace2.push_back(curr); + curr = dominators[curr]; + } + reverse(dom_trace2.begin(), dom_trace2.end()); + auto dti = dom_trace.begin(), dtie = dom_trace.end(); + auto dtj = dom_trace2.begin(), dtje = dom_trace2.end(); + while (dti != dtie && dtj != dtje && *dti == *dtj) { + ++dti; + ++dtj; + } + dom_trace.erase(dti, dtie); + } + + cand_raw->insert(dom_trace.begin(), dom_trace.end()); + + filterCandPivots(g, *cand_raw, cand); +} + +static +unique_ptr findBestSplit(const NGHolder &g, + const vector *depths, + bool for_prefix, u32 min_len, + const set *allowed_cand, + const set *disallowed_cand, + const CompileContext &cc) { + assert(!for_prefix || depths); + + /* look for a single simple split point */ + set cand; + set cand_raw; + + getCandidatePivots(g, &cand, &cand_raw); + + if (allowed_cand) { + set cand2; + set cand2_raw; + set_intersection(allowed_cand->begin(), allowed_cand->end(), + cand.begin(), cand.end(), + inserter(cand2, cand2.begin())); + + set_intersection(allowed_cand->begin(), allowed_cand->end(), + cand_raw.begin(), cand_raw.end(), + inserter(cand2_raw, cand2_raw.begin())); + + cand = std::move(cand2); + cand_raw = std::move(cand2_raw); + } + if (disallowed_cand) { + DEBUG_PRINTF("%zu disallowed candidates\n", disallowed_cand->size()); + DEBUG_PRINTF("|old cand| = %zu\n", cand.size()); + erase_all(&cand, *disallowed_cand); + insert(&cand_raw, *disallowed_cand); + } + + if (!generates_callbacks(g)) { + /* not output exposed so must leave some RHS */ + for (NFAVertex v : inv_adjacent_vertices_range(g.accept, g)) { + cand.erase(v); + cand_raw.erase(v); + } + + for (NFAVertex v : inv_adjacent_vertices_range(g.acceptEod, g)) { + cand.erase(v); + cand_raw.erase(v); + } + } + + DEBUG_PRINTF("|cand| = %zu\n", cand.size()); + + bool seeking_anchored = for_prefix; + bool seeking_transient = for_prefix; //cc.streaming; + + /* TODO: revisit when backstop goes away */ + bool desperation = for_prefix && cc.streaming; + + vector> lits; /**< sorted list of potential cuts */ + + getSimpleRoseLiterals(g, seeking_anchored, depths, cand, &lits, min_len, + desperation, cc); + getRegionRoseLiterals(g, seeking_anchored, depths, cand_raw, allowed_cand, + &lits, min_len, desperation, cc); + + if (lits.empty()) { + DEBUG_PRINTF("no literals found\n"); + return nullptr; + } + + if (seeking_transient) { + for (auto &a : lits) { + a->creates_transient + = createsTransientLHS(g, a->vv, *depths, cc.grey); + } + } + + auto cmp = LitComparator(g, seeking_anchored, seeking_transient); + + unique_ptr best = move(lits.back()); + lits.pop_back(); + while (!lits.empty()) { + if (cmp(best, lits.back())) { + best = move(lits.back()); + } + lits.pop_back(); + } + + DEBUG_PRINTF("best is '%s' %u a%d t%d\n", + dumpString(*best->lit.begin()).c_str(), + g[best->vv.front()].index, + depths ? (int)createsAnchoredLHS(g, best->vv, *depths, cc.grey) : 0, + depths ? (int)createsTransientLHS(g, best->vv, *depths, cc.grey) : 0); + + return best; +} + +static +void poisonFromSuccessor(const NGHolder &h, const ue2_literal &succ, + bool overhang_ok, flat_set &bad) { + DEBUG_PRINTF("poisoning holder of size %zu, succ len %zu\n", + num_vertices(h), succ.length()); + + map > curr; + for (const auto &e : in_edges_range(h.accept, h)) { + curr[source(e, h)].insert(e); + } + + map > next; + for (auto it = succ.rbegin(); it != succ.rend(); ++it) { + for (const auto &path : curr) { + NFAVertex u = path.first; + const auto &path_set = path.second; + if (u == h.start && overhang_ok) { + DEBUG_PRINTF("poisoning early %zu [overhang]\n", + path_set.size()); + insert(&bad, path_set); + continue; + } + if (overlaps(h[u].char_reach, *it)) { + for (const auto &e : in_edges_range(u, h)) { + auto &new_path_set = next[source(e, h)]; + insert(&new_path_set, path_set); + new_path_set.insert(e); + } + } + } + DEBUG_PRINTF("succ char matches at %zu paths\n", next.size()); + assert(overhang_ok || !curr.empty()); + swap(curr, next); + next.clear(); + } + + assert(overhang_ok || !curr.empty()); + for (const auto &path : curr) { + insert(&bad, path.second); + DEBUG_PRINTF("poisoning %zu vertices\n", path.second.size()); + } +} + +static +void poisonForGoodPrefix(const NGHolder &h, + const vector &depths, + flat_set &bad, const Grey &grey) { + for (const auto &v : vertices_range(h)) { + if (!createsAnchoredLHS(h, {v}, depths, grey) + && !createsTransientLHS(h, {v}, depths, grey)) { + insert(&bad, in_edges_range(v, h)); + } + } +} + +static +flat_set poisonEdges(const NGHolder &h, + const vector *depths, + const RoseInGraph &vg, const vector &ee, + bool for_prefix, const Grey &grey) { + DEBUG_PRINTF("poisoning edges %zu successor edges\n", ee.size()); + + /* poison edges covered by successor literal */ + + set > succs; + for (const RoseInEdge &ve : ee) { + if (vg[target(ve, vg)].type != RIV_LITERAL) { + /* nothing to poison in suffixes/outfixes */ + assert(vg[target(ve, vg)].type == RIV_ACCEPT); + continue; + } + succs.insert({vg[target(ve, vg)].s, + vg[source(ve, vg)].type == RIV_LITERAL}); + + } + + DEBUG_PRINTF("poisoning edges %zu successor literals\n", succs.size()); + + flat_set bad; + for (const auto &p : succs) { + poisonFromSuccessor(h, p.first, p.second, bad); + } + + /* poison edges which don't significantly improve a prefix */ + + if (for_prefix) { + poisonForGoodPrefix(h, *depths, bad, grey); + } + + return bad; +} + +static +set poisonVertices(const NGHolder &h, const RoseInGraph &vg, + const vector &ee, const Grey &grey) { + flat_set bad_edges = poisonEdges(h, nullptr, vg, ee, false, grey); + set bad_vertices; + for (const NFAEdge &e : bad_edges) { + bad_vertices.insert(target(e, h)); + DEBUG_PRINTF("bad: %u->%u\n", h[source(e, h)].index, + h[target(e, h)].index); + } + + return bad_vertices; +} + +static +unique_ptr findBestNormalSplit(const NGHolder &g, + const RoseInGraph &vg, + const vector &ee, + const CompileContext &cc) { + assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX); + set bad_vertices = poisonVertices(g, vg, ee, cc.grey); + + return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength, + nullptr, &bad_vertices, cc); +} + +static +unique_ptr findSimplePrefixSplit(const NGHolder &g, + const CompileContext &cc) { + DEBUG_PRINTF("looking for simple prefix split\n"); + bool anchored = !proper_out_degree(g.startDs, g); + NFAVertex u = anchored ? g.start : g.startDs; + + if (out_degree(u, g) != 2) { /* startDs + succ */ + return nullptr; + } + + NFAVertex v = NGHolder::null_vertex(); + for (NFAVertex t : adjacent_vertices_range(u, g)) { + if (t != g.startDs) { + assert(!v); + v = t; + } + } + assert(v); + + if (!anchored) { + if (out_degree(g.start, g) > 2) { + return nullptr; + } + if (out_degree(g.start, g) == 2 && !edge(g.start, v, g).second) { + return nullptr; + } + } + + NFAVertex best_v = NGHolder::null_vertex(); + ue2_literal best_lit; + + u32 limit = cc.grey.maxHistoryAvailable; + if (anchored) { + LIMIT_TO_AT_MOST(&limit, cc.grey.maxAnchoredRegion); + } + + ue2_literal curr_lit; + for (u32 i = 0; i < limit; i++) { + const auto &v_cr = g[v].char_reach; + if (v_cr.count() == 1 || v_cr.isCaselessChar()) { + curr_lit.push_back(v_cr.find_first(), v_cr.isCaselessChar()); + } else { + curr_lit.clear(); + } + + if (curr_lit.length() > best_lit.length()) { + best_lit = curr_lit; + best_v = v; + } + + if (out_degree(v, g) != 1) { + break; + } + v = *adjacent_vertices(v, g).first; + } + + if (best_lit.length() < cc.grey.minRoseLiteralLength) { + return nullptr; + } + + set best_lit_set({best_lit}); + if (bad_mixed_sensitivity(best_lit)) { + sanitizeAndCompressAndScore(best_lit_set); + } + + return ue2::make_unique(best_v, best_lit_set, anchored, true); +} + +static +unique_ptr findBestPrefixSplit(const NGHolder &g, + const vector &depths, + const RoseInGraph &vg, + const vector &ee, + const CompileContext &cc) { + assert(g.kind == NFA_PREFIX); + set bad_vertices = poisonVertices(g, vg, ee, cc.grey); + auto rv = findBestSplit(g, &depths, true, cc.grey.minRoseLiteralLength, + nullptr, &bad_vertices, cc); + + /* large back edges may prevent us identifying anchored or transient cases + * properly - use a simple walk instead */ + if (!rv || !(rv->creates_transient || rv->creates_anchored)) { + auto rv2 = findSimplePrefixSplit(g, cc); + if (rv2) { + return rv2; + } + } + + return rv; +} + +static +unique_ptr findBestCleanSplit(const NGHolder &g, + const CompileContext &cc) { + assert(g.kind != NFA_PREFIX); + set cleanSplits; + for (NFAVertex v : vertices_range(g)) { + if (!g[v].char_reach.all() || !edge(v, v, g).second) { + continue; + } + insert(&cleanSplits, inv_adjacent_vertices(v, g)); + cleanSplits.erase(v); + } + cleanSplits.erase(g.start); + if (cleanSplits.empty()) { + return nullptr; + } + return findBestSplit(g, nullptr, false, cc.grey.violetEarlyCleanLiteralLen, + &cleanSplits, nullptr, cc); +} + +static +bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) { + set curr, next; + curr.insert(g.accept); + + for (auto it = lit.rbegin(); it != lit.rend(); ++it) { + next.clear(); + + for (auto v : curr) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == g.start) { + if (overhang_ok) { + DEBUG_PRINTF("bail\n"); + return true; + } else { + continue; /* it is not possible for a lhs literal to + * overhang the start */ + } + } + + const CharReach &cr = g[u].char_reach; + if (!overlaps(*it, cr)) { + continue; + } + + next.insert(u); + } + } + + curr.swap(next); + } + + return !curr.empty(); +} + +static +bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, + const vector &ee, const VertLitInfo &split) { + const vector &splitters = split.vv; + assert(!splitters.empty()); + + shared_ptr lhs = make_shared(); + shared_ptr rhs = make_shared(); + + ue2::unordered_map lhs_map; + ue2::unordered_map rhs_map; + + splitGraph(base_graph, splitters, lhs.get(), &lhs_map, rhs.get(), &rhs_map); + DEBUG_PRINTF("split %s:%zu into %s:%zu + %s:%zu\n", + to_string(base_graph.kind).c_str(), num_vertices(base_graph), + to_string(lhs->kind).c_str(), num_vertices(*lhs), + to_string(rhs->kind).c_str(), num_vertices(*rhs)); + + bool suffix = vg[target(ee.front(), vg)].type == RIV_ACCEPT; + + if (is_triggered(base_graph)) { + /* if we are already guarded, check if the split reduces the size of + * the problem before continuing with the split */ + if (num_vertices(*lhs) >= num_vertices(base_graph) + && !(suffix && isVacuous(*rhs))) { + DEBUG_PRINTF("split's lhs is no smaller\n"); + return false; + } + + if (num_vertices(*rhs) >= num_vertices(base_graph)) { + DEBUG_PRINTF("split's rhs is no smaller\n"); + return false; + } + } + + bool do_accept = false; + bool do_accept_eod = false; + assert(rhs); + if (isVacuous(*rhs) && suffix) { + if (edge(rhs->start, rhs->accept, *rhs).second) { + DEBUG_PRINTF("rhs has a cliche\n"); + do_accept = true; + remove_edge(rhs->start, rhs->accept, *rhs); + } + + if (edge(rhs->start, rhs->acceptEod, *rhs).second) { + DEBUG_PRINTF("rhs has an eod cliche\n"); + do_accept_eod = true; + remove_edge(rhs->start, rhs->acceptEod, *rhs); + } + + renumber_edges(*rhs); + } + + /* check if we still have a useful graph left over */ + bool do_norm = out_degree(rhs->start, *rhs) != 1; + + set splitter_reports; + for (auto v : splitters) { + insert(&splitter_reports, base_graph[v].reports); + } + + /* find the targets of each source vertex; note the use of vectors to + * preserve deterministic ordering */ + vector sources; + map> images; + for (const RoseInEdge &e : ee) { + RoseInVertex src = source(e, vg); + RoseInVertex dest = target(e, vg); + if (!contains(images, src)) { + sources.push_back(src); + } + images[src].push_back(dest); + remove_edge(e, vg); + } + + map, vector> verts_by_image; + + for (const auto &u : sources) { + const auto &image = images[u]; + + if (contains(verts_by_image, image)) { + for (RoseInVertex v : verts_by_image[image]) { + add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg); + } + continue; + } + + for (const auto &lit : split.lit) { + assert(!bad_mixed_sensitivity(lit)); + + /* don't allow overhang in can_match() as literals should + * correspond to the edge graph being split; overhanging the graph + * would indicate a false path.*/ + if (!can_match(*lhs, lit, false)) { + DEBUG_PRINTF("'%s' did not match lhs\n", + escapeString(lit).c_str()); + continue; + } + + DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str()); + auto v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); + add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg); + + /* work out delay later */ + if (do_accept) { + DEBUG_PRINTF("rhs has a cliche\n"); + auto tt = add_vertex(RoseInVertexProps::makeAccept( + splitter_reports), vg); + add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); + } + + if (do_accept_eod) { + DEBUG_PRINTF("rhs has an eod cliche\n"); + auto tt = add_vertex(RoseInVertexProps::makeAcceptEod( + splitter_reports), vg); + add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); + } + + if (do_norm) { + assert(out_degree(rhs->start, *rhs) > 1); + for (RoseInVertex dest : image) { + add_edge(v, dest, RoseInEdgeProps(rhs, 0U), vg); + } + } + verts_by_image[image].push_back(v); + } + } + + assert(hasCorrectlyNumberedVertices(*rhs)); + assert(hasCorrectlyNumberedEdges(*rhs)); + assert(hasCorrectlyNumberedVertices(*lhs)); + assert(hasCorrectlyNumberedEdges(*lhs)); + + return true; +} + +#define MAX_NETFLOW_CUT_WIDTH 40 /* magic number is magic */ +#define MAX_LEN_2_LITERALS_PER_CUT 3 + +static +bool checkValidNetflowLits(NGHolder &h, const vector &scores, + const map> &cut_lits, + u32 min_allowed_length) { + DEBUG_PRINTF("cut width %zu; min allowed %u\n", cut_lits.size(), + min_allowed_length); + if (cut_lits.size() > MAX_NETFLOW_CUT_WIDTH) { + return false; + } + + u32 len_2_count = 0; + + for (const auto &cut : cut_lits) { + if (scores[h[cut.first].index] >= NO_LITERAL_AT_EDGE_SCORE) { + DEBUG_PRINTF("cut uses a forbidden edge\n"); + return false; + } + + if (min_len(cut.second) < min_allowed_length) { + DEBUG_PRINTF("cut uses a bad literal\n"); + return false; + } + + for (const auto &lit : cut.second) { + if (lit.length() == 2) { + len_2_count++; + } + } + } + + if (len_2_count > MAX_LEN_2_LITERALS_PER_CUT) { + return false; + } + + return true; +} + +static +void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, + const vector &to_cut, + const vector &cut, + const map> &cut_lits) { + DEBUG_PRINTF("splitting %s:\n", to_string(h.kind).c_str()); + + /* create literal vertices and connect preds */ + unordered_set done_sources; + map>> verts_by_source; + for (const RoseInEdge &ve : to_cut) { + assert(&h == &*vg[ve].graph); + RoseInVertex src = source(ve, vg); + if (!done_sources.insert(src).second) { + continue; /* already processed */ + } + + /* iterate over cut for determinism */ + for (const auto &e : cut) { + NFAVertex prev_v = source(e, h); + NFAVertex pivot = target(e, h); + + DEBUG_PRINTF("splitting on pivot %u\n", h[pivot].index); + ue2::unordered_map temp_map; + shared_ptr new_lhs = make_shared(); + splitLHS(h, pivot, new_lhs.get(), &temp_map); + + /* want to cut off paths to pivot from things other than the pivot - + * makes a more svelte graphy */ + clear_in_edges(temp_map[pivot], *new_lhs); + add_edge(temp_map[prev_v], temp_map[pivot], *new_lhs); + + pruneUseless(*new_lhs, false); + renumber_vertices(*new_lhs); + renumber_edges(*new_lhs); + + DEBUG_PRINTF(" into lhs %s\n", to_string(new_lhs->kind).c_str()); + + assert(hasCorrectlyNumberedVertices(*new_lhs)); + assert(hasCorrectlyNumberedEdges(*new_lhs)); + + const set &lits = cut_lits.at(e); + for (const auto &lit : lits) { + if (!can_match(*new_lhs, lit, is_triggered(h))) { + continue; + } + + RoseInVertex v + = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); + + /* if this is a prefix/infix an edge directly to accept should + * represent a false path as we have poisoned vertices covered + * by the literals. */ + if (generates_callbacks(h)) { + if (edge(pivot, h.accept, h).second) { + DEBUG_PRINTF("adding acceptEod\n"); + /* literal has a direct connection to accept */ + const flat_set &reports = h[pivot].reports; + auto tt = add_vertex( + RoseInVertexProps::makeAccept(reports), vg); + add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); + } + + if (edge(pivot, h.acceptEod, h).second) { + assert(generates_callbacks(h)); + DEBUG_PRINTF("adding acceptEod\n"); + /* literal has a direct connection to accept */ + const flat_set &reports = h[pivot].reports; + auto tt = add_vertex( + RoseInVertexProps::makeAcceptEod(reports), vg); + add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); + } + } + + add_edge(src, v, RoseInEdgeProps(new_lhs, 0), vg); + verts_by_source[src].push_back({v, pivot}); + } + } + } + + /* wire the literal vertices up to successors */ + map, shared_ptr > done_rhs; + for (const RoseInEdge &ve : to_cut) { + RoseInVertex src = source(ve, vg); + RoseInVertex dest = target(ve, vg); + + /* iterate over cut for determinism */ + for (const auto &elem : verts_by_source[src]) { + NFAVertex pivot = elem.second; + RoseInVertex v = elem.first; + + vector adj; + insert(&adj, adj.end(), adjacent_vertices(pivot, h)); + /* we can ignore presence of accept, accepteod in adj as it is best + effort */ + + if (!contains(done_rhs, adj)) { + ue2::unordered_map temp_map; + shared_ptr new_rhs = make_shared(); + splitRHS(h, adj, new_rhs.get(), &temp_map); + remove_edge(new_rhs->start, new_rhs->accept, *new_rhs); + remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs); + renumber_edges(*new_rhs); + DEBUG_PRINTF(" into rhs %s\n", + to_string(new_rhs->kind).c_str()); + done_rhs.emplace(adj, new_rhs); + } + + assert(done_rhs[adj].get()); + shared_ptr new_rhs = done_rhs[adj]; + + assert(hasCorrectlyNumberedVertices(*new_rhs)); + assert(hasCorrectlyNumberedEdges(*new_rhs)); + + if (vg[dest].type == RIV_LITERAL + && !can_match(*new_rhs, vg[dest].s, true)) { + continue; + } + + if (out_degree(new_rhs->start, *new_rhs) != 1) { + add_edge(v, dest, RoseInEdgeProps(new_rhs, 0), vg); + } + } + + remove_edge(ve, vg); + } +} + +static +bool doNetflowCut(NGHolder &h, + const vector *depths, + RoseInGraph &vg, + const vector &ee, bool for_prefix, + const Grey &grey, u32 min_allowed_length = 0U) { + ENSURE_AT_LEAST(&min_allowed_length, grey.minRoseNetflowLiteralLength); + + DEBUG_PRINTF("doing netflow cut\n"); + /* TODO: we should really get literals/scores from the full graph as this + * allows us to overlap with previous cuts. */ + assert(!ee.empty()); + assert(&h == &*vg[ee.front()].graph); + assert(!for_prefix || depths); + + if (num_edges(h) > grey.maxRoseNetflowEdges) { + /* We have a limit on this because scoring edges and running netflow + * gets very slow for big graphs. */ + DEBUG_PRINTF("too many edges, skipping netflow cut\n"); + return false; + } + + assert(hasCorrectlyNumberedVertices(h)); + assert(hasCorrectlyNumberedEdges(h)); + + auto known_bad = poisonEdges(h, depths, vg, ee, for_prefix, grey); + + /* Step 1: Get scores for all edges */ + vector scores = scoreEdges(h, known_bad); /* scores by edge_index */ + + /* Step 2: Find cutset based on scores */ + vector cut = findMinCut(h, scores); + + /* Step 3: Get literals corresponding to cut edges */ + map> cut_lits; + for (const auto &e : cut) { + set lits = getLiteralSet(h, e); + sanitizeAndCompressAndScore(lits); + + cut_lits[e] = lits; + } + + /* if literals are underlength bail or if it involves a forbidden edge*/ + if (!checkValidNetflowLits(h, scores, cut_lits, min_allowed_length)) { + return false; + } + DEBUG_PRINTF("splitting\n"); + + /* Step 4: Split graph based on cuts */ + splitEdgesByCut(h, vg, ee, cut, cut_lits); + + return true; +} + +static +bool deanchorIfNeeded(NGHolder &g) { + DEBUG_PRINTF("hi\n"); + if (proper_out_degree(g.startDs, g)) { + return false; + } + + /* look for a non-special dot with a loop following start */ + set succ_g; + insert(&succ_g, adjacent_vertices(g.start, g)); + succ_g.erase(g.startDs); + + for (auto v : adjacent_vertices_range(g.start, g)) { + DEBUG_PRINTF("inspecting cand %u || = %zu\n", g[v].index, + g[v].char_reach.count()); + + if (v == g.startDs || !g[v].char_reach.all()) { + continue; + } + + set succ_v; + insert(&succ_v, adjacent_vertices(v, g)); + + if (succ_v == succ_g) { + DEBUG_PRINTF("found ^.*\n"); + for (auto succ : adjacent_vertices_range(g.start, g)) { + if (succ == g.startDs) { + continue; + } + add_edge(g.startDs, succ, g); + } + clear_vertex(v, g); + remove_vertex(v, g); + renumber_vertices(g); + return true; + } + + if (succ_g.size() == 1 && hasSelfLoop(v, g)) { + DEBUG_PRINTF("found ^.+\n"); + add_edge(g.startDs, v, g); + remove_edge(v, v, g); + return true; + } + } + + return false; +} + +static +RoseInGraph populateTrivialGraph(const NGHolder &h) { + RoseInGraph g; + shared_ptr root_g = cloneHolder(h); + bool orig_anch = isAnchored(*root_g); + orig_anch |= deanchorIfNeeded(*root_g); + + DEBUG_PRINTF("orig_anch %d\n", (int)orig_anch); + + auto start = add_vertex(RoseInVertexProps::makeStart(orig_anch), g); + auto accept = add_vertex(RoseInVertexProps::makeAccept(set()), g); + + add_edge(start, accept, RoseInEdgeProps(root_g, 0), g); + + return g; +} + +static +void avoidOutfixes(RoseInGraph &vg, const CompileContext &cc) { + STAGE_DEBUG_PRINTF("AVOIDING OUTFIX\n"); + if (num_vertices(vg) > 2) { + /* must be at least one literal aside from start and accept */ + return; + } + + RoseInEdge e = *edges(vg).first; + + NGHolder &h = *vg[e].graph; + + renumber_vertices(h); + renumber_edges(h); + + unique_ptr split = findBestNormalSplit(h, vg, {e}, cc); + + if (split && splitRoseEdge(h, vg, {e}, *split)) { + DEBUG_PRINTF("split on simple literal\n"); + } else { + doNetflowCut(h, nullptr, vg, {e}, false, cc.grey); + } +} + +static +void removeRedundantPrefixes(RoseInGraph &g) { + STAGE_DEBUG_PRINTF("REMOVING REDUNDANT PREFIXES\n"); + + for (const RoseInEdge &e : edges_range(g)) { + RoseInVertex s = source(e, g); + RoseInVertex t = target(e, g); + + if (g[s].type != RIV_START || g[t].type != RIV_LITERAL) { + continue; + } + + if (!g[e].graph) { + continue; + } + + assert(!g[t].delay); + const ue2_literal &lit = g[t].s; + + if (!literalIsWholeGraph(*g[e].graph, lit)) { + DEBUG_PRINTF("not whole graph\n"); + continue; + } + + if (!isFloating(*g[e].graph)) { + DEBUG_PRINTF("not floating\n"); + continue; + } + g[e].graph.reset(); + } +} + +static +u32 maxDelay(const CompileContext &cc) { + if (!cc.streaming) { + return MO_INVALID_IDX; + } + return cc.grey.maxHistoryAvailable; +} + +static +void removeRedundantLiteralsFromPrefixes(RoseInGraph &g, + const CompileContext &cc) { + STAGE_DEBUG_PRINTF("REMOVING LITERALS FROM PREFIXES\n"); + + vector to_anchor; + for (const RoseInEdge &e : edges_range(g)) { + RoseInVertex s = source(e, g); + RoseInVertex t = target(e, g); + + if (g[s].type != RIV_START && g[s].type != RIV_ANCHORED_START) { + continue; + } + + if (g[t].type != RIV_LITERAL) { + continue; + } + + if (!g[e].graph) { + continue; + } + + assert(!g[t].delay); + const ue2_literal &lit = g[t].s; + + DEBUG_PRINTF("removing states for literal: %s\n", + dumpString(lit).c_str()); + + unique_ptr h = cloneHolder(*g[e].graph); + const u32 max_delay = maxDelay(cc); + + u32 delay = removeTrailingLiteralStates(*h, lit, max_delay, + false /* can't overhang start */); + + DEBUG_PRINTF("got delay %u (max allowed %u)\n", delay, max_delay); + + if (edge(h->startDs, h->accept, *h).second) { + /* we should have delay == lit.length(), but in really complex + * cases we may fail to identify that we can remove the whole + * graph. Regardless, the fact that sds is wired to accept means the + * graph serves no purpose. */ + DEBUG_PRINTF("whole graph\n"); + g[e].graph.reset(); + continue; + } + + if (delay == lit.length() && edge(h->start, h->accept, *h).second + && num_vertices(*h) == N_SPECIALS) { + to_anchor.push_back(e); + continue; + } + + /* if we got here we should still have an interesting graph */ + assert(delay == max_delay || num_vertices(*h) > N_SPECIALS); + + if (delay && delay != MO_INVALID_IDX) { + DEBUG_PRINTF("setting delay %u on lhs %p\n", delay, h.get()); + + g[e].graph = move(h); + g[e].graph_lag = delay; + } + } + + if (!to_anchor.empty()) { + RoseInVertex anch = add_vertex(RoseInVertexProps::makeStart(true), g); + + for (RoseInEdge e : to_anchor) { + DEBUG_PRINTF("rehoming to anchor\n"); + RoseInVertex v = target(e, g); + add_edge(anch, v, g); + remove_edge(e, g); + } + } +} + +static +bool isStarCliche(const NGHolder &g) { + DEBUG_PRINTF("checking graph with %zu vertices\n", num_vertices(g)); + + bool nonspecials_seen = false; + + for (auto v : vertices_range(g)) { + if (is_special(v, g)) { + continue; + } + + if (nonspecials_seen) { + return false; + } + nonspecials_seen = true; + + if (!g[v].char_reach.all()) { + return false; + } + + if (!hasSelfLoop(v, g)) { + return false; + } + if (!edge(v, g.accept, g).second) { + return false; + } + } + + if (!nonspecials_seen) { + return false; + } + + if (!edge(g.start, g.accept, g).second) { + return false; + } + + return true; +} + +static +void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, + const vector &ee, + const CompileContext &cc) { + /* TODO: This could be better by not creating a separate graph for each + * successor literal. This would require using distinct report ids and also + * taking into account overlap of successor literals. */ + + set preds; + for (const RoseInEdge &e : ee) { + RoseInVertex u = source(e, ig); + assert(ig[u].type == RIV_LITERAL); + assert(!ig[e].graph_lag); + assert(!ig[u].delay); + preds.insert(ig[u].s); + } + + set succs; + for (const RoseInEdge &e : ee) { + RoseInVertex v = target(e, ig); + assert(ig[v].type == RIV_LITERAL); + assert(!ig[v].delay); + succs.insert(ig[v].s); + } + + map, u32> > graphs; /* + delay */ + + for (const ue2_literal &right : succs) { + size_t max_overlap = 0; + for (const ue2_literal &left : preds) { + size_t overlap = maxOverlap(left, right, 0); + ENSURE_AT_LEAST(&max_overlap, overlap); + } + + u32 max_allowed_delay = right.length() - max_overlap; + + if (cc.streaming) { + LIMIT_TO_AT_MOST(&max_allowed_delay, cc.grey.maxHistoryAvailable); + } + + if (!max_allowed_delay) { + continue; + } + + shared_ptr h_new = cloneHolder(h); + + u32 delay = removeTrailingLiteralStates(*h_new, right, + max_allowed_delay); + + if (delay == MO_INVALID_IDX) { + /* successor literal could not match infix -> ignore false path */ + assert(0); + continue; + } + + graphs[right] = make_pair(h_new, delay); + } + + for (const RoseInEdge &e : ee) { + RoseInVertex v = target(e, ig); + const ue2_literal &succ = ig[v].s; + if (!contains(graphs, succ)) { + continue; + } + + ig[e].graph = graphs[succ].first; + ig[e].graph_lag = graphs[succ].second; + + if (isStarCliche(*ig[e].graph)) { + DEBUG_PRINTF("is a X star!\n"); + ig[e].graph.reset(); + ig[e].graph_lag = 0; + } + } +} + +static +void removeRedundantLiteralsFromInfixes(RoseInGraph &g, + const CompileContext &cc) { + vector seen_order; + map> infixes; + + for (const RoseInEdge &e : edges_range(g)) { + RoseInVertex s = source(e, g); + RoseInVertex t = target(e, g); + + if (g[s].type != RIV_LITERAL || g[t].type != RIV_LITERAL) { + continue; + } + + if (!g[e].graph) { + continue; + } + + assert(!g[t].delay); + + NGHolder *h = g[e].graph.get(); + if (!contains(infixes, h)) { + seen_order.push_back(h); + } + infixes[h].push_back(e); + } + + for (NGHolder *h : seen_order) { + removeRedundantLiteralsFromInfix(*h, g, infixes[h], cc); + } +} + + +static +void removeRedundantLiterals(RoseInGraph &g, const CompileContext &cc) { + removeRedundantLiteralsFromPrefixes(g, cc); + removeRedundantLiteralsFromInfixes(g, cc); +} + +static +RoseInVertex getStart(RoseInGraph &vg) { + for (RoseInVertex v : vertices_range(vg)) { + if (vg[v].type == RIV_START || vg[v].type == RIV_ANCHORED_START) { + return v; + } + } + assert(0); + return RoseInGraph::null_vertex(); +} + +/** + * Finds the initial accept vertex created to which suffix/outfixes are + * attached. + */ +static +RoseInVertex getPrimaryAccept(RoseInGraph &vg) { + for (RoseInVertex v : vertices_range(vg)) { + if (vg[v].type == RIV_ACCEPT && vg[v].reports.empty()) { + return v; + } + } + assert(0); + return RoseInGraph::null_vertex(); +} + +static +bool willBeTransient(const depth &max_depth, const CompileContext &cc) { + if (!cc.streaming) { + return max_depth <= depth(ROSE_BLOCK_TRANSIENT_MAX_WIDTH); + } else { + return max_depth <= depth(cc.grey.maxHistoryAvailable + 1); + } +} + +static +bool willBeAnchoredTable(const depth &max_depth, const Grey &grey) { + return max_depth <= depth(grey.maxAnchoredRegion); +} + +static +unique_ptr make_chain(u32 count) { + assert(count); + + auto rv = make_unique(NFA_INFIX); + + NGHolder &h = *rv; + + NFAVertex u = h.start; + for (u32 i = 0; i < count; i++) { + NFAVertex v = add_vertex(h); + h[v].char_reach = CharReach::dot(); + add_edge(u, v, h); + u = v; + } + h[u].reports.insert(0); + add_edge(u, h.accept, h); + + return rv; +} + +#define SHORT_TRIGGER_LEN 16 + +static +bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg, + const vector &ee, + const CompileContext &cc) { + /* check max width and literal lengths to see if possible */ + size_t min_lit = (size_t)~0ULL; + for (const RoseInEdge &e : ee) { + RoseInVertex v = target(e, vg); + LIMIT_TO_AT_MOST(&min_lit, vg[v].s.length()); + } + + if (min_lit <= SHORT_TRIGGER_LEN || min_lit >= UINT_MAX) { + return false; + } + + depth max_width = findMaxWidth(h); + + u32 delta = min_lit - SHORT_TRIGGER_LEN; + + if (!willBeTransient(max_width - depth(delta), cc) + && !willBeAnchoredTable(max_width - depth(delta), cc.grey)) { + return false; + } + + DEBUG_PRINTF("candidate for splitting long literal (len %zu)\n", min_lit); + DEBUG_PRINTF("delta = %u\n", delta); + + /* try split */ + map > graphs; + for (const RoseInEdge &e : ee) { + RoseInVertex v = target(e, vg); + + shared_ptr h_new = cloneHolder(h); + + u32 delay = removeTrailingLiteralStates(*h_new, vg[v].s, delta); + + DEBUG_PRINTF("delay %u\n", delay); + + if (delay != delta) { + DEBUG_PRINTF("unable to trim literal\n"); + return false; + } + + if (in_degree(v, vg) != 1) { + DEBUG_PRINTF("complicated\n"); + return false; + } + + DEBUG_PRINTF("new mw = %u\n", (u32)findMaxWidth(*h_new)); + assert(willBeTransient(findMaxWidth(*h_new), cc) + || willBeAnchoredTable(findMaxWidth(*h_new), cc.grey)); + + graphs[v] = h_new; + } + + /* add .{repeats} from prefixes to long literals */ + for (const RoseInEdge &e : ee) { + RoseInVertex s = source(e, vg); + RoseInVertex t = target(e, vg); + + remove_edge(e, vg); + const ue2_literal &orig_lit = vg[t].s; + + ue2_literal lit(orig_lit.begin(), orig_lit.end() - delta); + + ue2_literal lit2(orig_lit.end() - delta, orig_lit.end()); + + assert(lit.length() + delta == orig_lit.length()); + + vg[t].s = lit2; + + RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); + add_edge(s, v, RoseInEdgeProps(graphs[t], 0), vg); + add_edge(v, t, RoseInEdgeProps(make_chain(delta), 0), vg); + } + + DEBUG_PRINTF("success\n"); + /* TODO: alter split point to avoid pathological splits */ + return true; +} + +static +bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, + const CompileContext &cc) { + DEBUG_PRINTF("trying to improve prefix %p, %zu verts\n", &h, + num_vertices(h)); + + renumber_vertices(h); + renumber_edges(h); + + vector depths; + calcDepths(h, depths); + + /* If the reason the prefix is not transient is due to a very long literal + * following, we can make it transient by restricting ourselves to using + * just the head of the literal. */ + if (makeTransientFromLongLiteral(h, vg, ee, cc)) { + return true; + } + + unique_ptr split = findBestPrefixSplit(h, depths, vg, ee, cc); + + if (split && (split->creates_transient || split->creates_anchored) + && splitRoseEdge(h, vg, ee, *split)) { + DEBUG_PRINTF("split on simple literal\n"); + return true; + } + + /* large back edges may prevent us identifing anchored or transient cases + * properly - use a simple walk instead */ + + if (doNetflowCut(h, &depths, vg, ee, true, cc.grey)) { + return true; + } + + if (split && splitRoseEdge(h, vg, ee, *split)) { + /* use the simple split even though it doesn't create a transient + * prefix */ + DEBUG_PRINTF("split on simple literal\n"); + return true; + } + + /* look for netflow cuts which don't produce good prefixes */ + if (doNetflowCut(h, &depths, vg, ee, false, cc.grey)) { + return true; + } + + if (ee.size() > 1) { + DEBUG_PRINTF("split the prefix apart based on succ literals\n"); + unordered_map, vector >, + NGHolderHasher, NGHolderEqual> trimmed; + + for (const auto &e : ee) { + shared_ptr hh = cloneHolder(h); + auto succ_lit = vg[target(e, vg)].s; + u32 delay = removeTrailingLiteralStates(*hh, succ_lit, + succ_lit.length(), + false /* can't overhang start */); + if (!delay) { + DEBUG_PRINTF("could not remove any literal, skip over\n"); + continue; + } + + trimmed[hh].emplace_back(e, delay); + } + + if (trimmed.size() == 1) { + return false; + } + + /* shift the contents to a vector so we can modify the graphs without + * violating the map's invariants. */ + vector, vector > > > + trimmed_vec(trimmed.begin(), trimmed.end()); + trimmed.clear(); + for (auto &elem : trimmed_vec) { + shared_ptr &hp = elem.first; + NGHolder &eh = *hp; + + vector base_states; + insert(&base_states, base_states.end(), + inv_adjacent_vertices(eh.accept, eh)); + clear_in_edges(eh.accept, eh); + + for (auto v : base_states) { + eh[v].reports.clear(); /* clear report from old accepts */ + } + + for (const auto &edge_delay : elem.second) { + const RoseInEdge &e = edge_delay.first; + u32 delay = edge_delay.second; + auto succ_lit = vg[target(e, vg)].s; + + vg[e].graph = hp; + assert(delay <= succ_lit.length()); + restoreTrailingLiteralStates(*vg[e].graph, succ_lit, delay, + base_states); + } + } + return true; + } + + return false; +} + +#define MAX_FIND_BETTER_PREFIX_GEN 4 +#define MAX_FIND_BETTER_PREFIX_COUNT 100 + +static +void findBetterPrefixes(RoseInGraph &vg, const CompileContext &cc) { + STAGE_DEBUG_PRINTF("FIND BETTER PREFIXES\n"); + RoseInVertex start = getStart(vg); + + bool changed; + u32 gen = 0; + do { + DEBUG_PRINTF("gen %u\n", gen); + changed = false; + vector seen_order; + map > prefixes; + + /* find prefixes */ + for (const RoseInEdge &e : out_edges_range(start, vg)) { + /* outfixes shouldn't have made it this far */ + assert(vg[target(e, vg)].type == RIV_LITERAL); + if (vg[e].graph) { + NGHolder *h = vg[e].graph.get(); + if (!contains(prefixes, h)) { + seen_order.push_back(h); + } + prefixes[h].push_back(e); + } + } + + if (prefixes.size() > MAX_FIND_BETTER_PREFIX_COUNT) { + break; + } + + /* look for bad prefixes and try to split */ + for (NGHolder *h : seen_order) { + depth max_width = findMaxWidth(*h); + if (willBeTransient(max_width, cc) + || willBeAnchoredTable(max_width, cc.grey)) { + continue; + } + + changed = improvePrefix(*h, vg, prefixes[h], cc); + } + } while (changed && gen++ < MAX_FIND_BETTER_PREFIX_GEN); +} + +#define STRONG_LITERAL_LENGTH 20 +#define MAX_EXTRACT_STRONG_LITERAL_GRAPHS 10 + +static +bool extractStrongLiteral(NGHolder &h, RoseInGraph &vg, + const vector &ee, + const CompileContext &cc) { + DEBUG_PRINTF("looking for string literal\n"); + unique_ptr split = findBestNormalSplit(h, vg, ee, cc); + + if (split && min_len(split->lit) >= STRONG_LITERAL_LENGTH) { + DEBUG_PRINTF("splitting simple literal\n"); + return splitRoseEdge(h, vg, ee, *split); + } + + return false; +} + +static +void extractStrongLiterals(RoseInGraph &vg, const CompileContext &cc) { + if (!cc.grey.violetExtractStrongLiterals) { + return; + } + STAGE_DEBUG_PRINTF("EXTRACT STRONG LITERALS\n"); + set stuck; + + bool changed; + do { + changed = false; + + vector seen_order; + map > edges_by_graph; + for (const RoseInEdge &ve : edges_range(vg)) { + if (vg[source(ve, vg)].type != RIV_LITERAL) { + continue; + } + if (vg[ve].graph) { + if (!contains(edges_by_graph, vg[ve].graph.get())) { + seen_order.push_back(vg[ve].graph.get()); + } + edges_by_graph[vg[ve].graph.get()].push_back(ve); + } + } + + if (edges_by_graph.size() > MAX_EXTRACT_STRONG_LITERAL_GRAPHS) { + DEBUG_PRINTF("too many graphs, stopping\n"); + return; + } + + for (NGHolder *g : seen_order) { + if (contains(stuck, g)) { + DEBUG_PRINTF("already known to be bad\n"); + continue; + } + bool rv = extractStrongLiteral(*g, vg, edges_by_graph[g], cc); + if (rv) { + changed = true; + } else { + stuck.insert(g); + } + } + } while (changed); +} + +#define INFIX_STRONG_GUARD_LEN 8 +#define INFIX_MIN_SPLIT_LITERAL_LEN 12 + +static +bool improveInfix(NGHolder &h, RoseInGraph &vg, const vector &ee, + const CompileContext &cc) { + unique_ptr split = findBestNormalSplit(h, vg, ee, cc); + + if (split && min_len(split->lit) >= INFIX_MIN_SPLIT_LITERAL_LEN + && splitRoseEdge(h, vg, ee, *split)) { + DEBUG_PRINTF("splitting simple literal\n"); + return true; + } + + DEBUG_PRINTF("trying for a netflow cut\n"); + /* look for netflow cuts which don't produce good prefixes */ + bool rv = doNetflowCut(h, nullptr, vg, ee, false, cc.grey, 8); + + DEBUG_PRINTF("did netfow cut? = %d\n", (int)rv); + + return rv; +} + +/** + * Infixes which are weakly guarded can, in effect, act like prefixes as they + * will often be live. We should try to split these infixes further if they + * contain strong literals so that we are at least running smaller weak infixes + * which can hopeful be accelerated/miracled. + */ +static +void improveWeakInfixes(RoseInGraph &vg, const CompileContext &cc) { + if (!cc.grey.violetAvoidWeakInfixes) { + return; + } + STAGE_DEBUG_PRINTF("IMPROVE WEAK INFIXES\n"); + + RoseInVertex start = getStart(vg); + + set weak; + vector ordered_weak; + + for (RoseInVertex vv : adjacent_vertices_range(start, vg)) { + /* outfixes shouldn't have made it this far */ + assert(vg[vv].type == RIV_LITERAL); + if (vg[vv].s.length() >= INFIX_STRONG_GUARD_LEN) { + continue; + } + + for (const RoseInEdge &e : out_edges_range(vv, vg)) { + if (vg[target(e, vg)].type != RIV_LITERAL || !vg[e].graph) { + continue; + } + + NGHolder *h = vg[e].graph.get(); + DEBUG_PRINTF("'%s' guards %p\n", dumpString(vg[vv].s).c_str(), h); + if (!contains(weak, h)) { + weak.insert(h); + ordered_weak.push_back(h); + } + } + } + + map > weak_edges; + for (const RoseInEdge &ve : edges_range(vg)) { + if (contains(weak, vg[ve].graph.get())) { + weak_edges[vg[ve].graph.get()].push_back(ve); + } + } + + for (NGHolder *h : ordered_weak) { + improveInfix(*h, vg, weak_edges[h], cc); + } +} + +static +void splitEdgesForSuffix(const NGHolder &base_graph, RoseInGraph &vg, + const vector &ee, const VertLitInfo &split, + bool eod, const flat_set &reports) { + const vector &splitters = split.vv; + assert(!splitters.empty()); + + shared_ptr lhs = make_shared(); + unordered_map v_map; + cloneHolder(*lhs, base_graph, &v_map); + lhs->kind = NFA_INFIX; + clear_in_edges(lhs->accept, *lhs); + clear_in_edges(lhs->acceptEod, *lhs); + add_edge(lhs->accept, lhs->acceptEod, *lhs); + clearReports(*lhs); + for (NFAVertex v : splitters) { + add_edge(v_map[v], lhs->accept, *lhs); + (*lhs)[v_map[v]].reports.insert(0); + } + pruneUseless(*lhs); + + /* create literal vertices and connect preds */ + for (const auto &lit : split.lit) { + if (!can_match(*lhs, lit, is_triggered(*lhs))) { + continue; + } + + DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str()); + RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), vg); + + RoseInVertex tt; + if (eod) { + DEBUG_PRINTF("doing eod\n"); + tt = add_vertex(RoseInVertexProps::makeAcceptEod(reports), vg); + } else { + DEBUG_PRINTF("doing non-eod\n"); + tt = add_vertex(RoseInVertexProps::makeAccept(reports), vg); + } + add_edge(v, tt, RoseInEdgeProps(0U, 0U), vg); + + for (const RoseInEdge &e : ee) { + RoseInVertex u = source(e, vg); + assert(!edge(u, v, vg).second); + add_edge(u, v, RoseInEdgeProps(lhs, 0U), vg); + } + } +} + +#define MIN_SUFFIX_LEN 6 + +static +bool replaceSuffixWithInfix(const NGHolder &h, RoseInGraph &vg, + const vector &suffix_edges, + const CompileContext &cc) { + DEBUG_PRINTF("inspecting suffix : %p on %zu edges\n", &h, + suffix_edges.size()); + /* + * We would, in general, rather not have output exposed engines because + * once they are triggered, they must be run while infixes only have to run + * if the successor literal is seen. Matches from output exposed engines + * also have to be placed in a priority queue and interleaved with matches + * from other sources. + * + * Note: + * - if the LHS is extremely unlikely we may be better off leaving + * a suffix unguarded. + * + * - limited width suffixes may be less bad as they won't be continuously + * active, we may want to have (a) stronger controls on if we want to pick + * a trailing literal in these cases and/or (b) look also for literals + * near accept as well as right on accept + * + * TODO: improve heuristics, splitting logic. + */ + + /* we may do multiple splits corresponding to different report behaviour */ + set seen; + map >, VertLitInfo> by_reports; /* eod, rep */ + + for (NFAVertex v : inv_adjacent_vertices_range(h.accept, h)) { + set ss = getLiteralSet(h, v, false); + if (ss.empty()) { + DEBUG_PRINTF("candidate is too shitty\n"); + return false; + } + + VertLitInfo &vli = by_reports[make_pair(false, h[v].reports)]; + insert(&vli.lit, ss); + vli.vv.push_back(v); + seen.insert(v); + } + + seen.insert(h.accept); + for (NFAVertex v : inv_adjacent_vertices_range(h.acceptEod, h)) { + if (contains(seen, v)) { + continue; + } + + set ss = getLiteralSet(h, v, false); + if (ss.empty()) { + DEBUG_PRINTF("candidate is too shitty\n"); + return false; + } + + VertLitInfo &vli = by_reports[make_pair(true, h[v].reports)]; + insert(&vli.lit, ss); + vli.vv.push_back(v); + } + + assert(!by_reports.empty()); + + /* TODO: how strong a min len do we want here ? */ + u32 min_len = cc.grey.minRoseLiteralLength; + ENSURE_AT_LEAST(&min_len, MIN_SUFFIX_LEN); + + for (auto &vli : by_reports | map_values) { + u64a score = sanitizeAndCompressAndScore(vli.lit); + + if (vli.lit.empty() + || !validateRoseLiteralSetQuality(vli.lit, score, false, min_len, + false)) { + return false; + } + } + + for (const auto &info : by_reports) { + DEBUG_PRINTF("splitting on simple literals\n"); + splitEdgesForSuffix(h, vg, suffix_edges, info.second, + info.first.first /* eod */, + info.first.second /* reports */); + } + + for (const RoseInEdge &e : suffix_edges) { + remove_edge(e, vg); + } + return true; +} + +static +void avoidSuffixes(RoseInGraph &vg, const CompileContext &cc) { + if (!cc.grey.violetAvoidSuffixes) { + return; + } + + STAGE_DEBUG_PRINTF("AVOID SUFFIXES\n"); + + RoseInVertex accept = getPrimaryAccept(vg); + map > suffixes; + vector ordered_suffixes; + + /* find suffixes */ + for (const RoseInEdge &e : in_edges_range(accept, vg)) { + /* outfixes shouldn't have made it this far */ + assert(vg[source(e, vg)].type == RIV_LITERAL); + assert(vg[e].graph); /* non suffix paths should be wired to other + accepts */ + const NGHolder *h = vg[e].graph.get(); + if (!contains(suffixes, h)) { + ordered_suffixes.push_back(h); + } + suffixes[h].push_back(e); + } + + /* look at suffixes and try to split */ + for (const NGHolder *h : ordered_suffixes) { + replaceSuffixWithInfix(*h, vg, suffixes[h], cc); + } +} + +static +bool leadingDotStartLiteral(const NGHolder &h, VertLitInfo *out) { + if (out_degree(h.start, h) != 3) { + return false; + } + + NFAVertex v = NGHolder::null_vertex(); + NFAVertex ds = NGHolder::null_vertex(); + + for (NFAVertex a : adjacent_vertices_range(h.start, h)) { + if (a == h.startDs) { + continue; + } + if (h[a].char_reach.all()) { + ds = a; + if (out_degree(ds, h) != 2 || !edge(ds, ds, h).second) { + return false; + } + } else { + v = a; + } + } + + if (!v || !ds || !edge(ds, v, h).second) { + return false; + } + + if (h[v].char_reach.count() != 1 && !h[v].char_reach.isCaselessChar()) { + return false; + } + + ue2_literal lit; + lit.push_back(h[v].char_reach.find_first(), + h[v].char_reach.isCaselessChar()); + while (out_degree(v, h) == 1) { + NFAVertex vv = *adjacent_vertices(v, h).first; + if (h[vv].char_reach.count() != 1 + && !h[vv].char_reach.isCaselessChar()) { + break; + } + + v = vv; + + lit.push_back(h[v].char_reach.find_first(), + h[v].char_reach.isCaselessChar()); + } + + if (is_match_vertex(v, h) && h.kind != NFA_SUFFIX) { + /* we have rediscovered the post-infix literal */ + return false; + } + + if (bad_mixed_sensitivity(lit)) { + make_nocase(&lit); + } + + DEBUG_PRINTF("%u found %s\n", h[v].index, dumpString(lit).c_str()); + out->vv = {v}; + out->lit = {lit}; + return true; +} + +static +bool lookForDoubleCut(const NGHolder &h, const vector &ee, + RoseInGraph &vg, const Grey &grey) { + VertLitInfo info; + if (!leadingDotStartLiteral(h, &info) + || min_len(info.lit) < grey.violetDoubleCutLiteralLen) { + return false; + } + DEBUG_PRINTF("performing split\n"); + return splitRoseEdge(h, vg, ee, {info}); +} + +static +void lookForDoubleCut(RoseInGraph &vg, const CompileContext &cc) { + if (!cc.grey.violetDoubleCut) { + return; + } + + map > right_edges; + vector ordered_graphs; + for (const RoseInEdge &ve : edges_range(vg)) { + if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) { + const NGHolder *h = vg[ve].graph.get(); + if (!contains(right_edges, h)) { + ordered_graphs.push_back(h); + } + right_edges[h].push_back(ve); + } + } + + for (const NGHolder *h : ordered_graphs) { + lookForDoubleCut(*h, right_edges[h], vg, cc.grey); + } +} + +static +pair findLiteralBefore(const NGHolder &h, NFAVertex v) { + ue2_literal lit; + if (h[v].char_reach.count() != 1 && !h[v].char_reach.isCaselessChar()) { + return {v, std::move(lit) }; + } + lit.push_back(h[v].char_reach.find_first(), + h[v].char_reach.isCaselessChar()); + + while (in_degree(v, h) == 1) { + NFAVertex vv = *inv_adjacent_vertices(v, h).first; + if (h[vv].char_reach.count() != 1 + && !h[vv].char_reach.isCaselessChar()) { + break; + } + + lit.push_back(h[vv].char_reach.find_first(), + h[vv].char_reach.isCaselessChar()); + v = vv; + } + + return {v, std::move(lit) }; +} + +static +bool lookForDotStarPred(NFAVertex v, const NGHolder &h, + NFAVertex *u, NFAVertex *ds) { + *u = NGHolder::null_vertex(); + *ds = NGHolder::null_vertex(); + for (NFAVertex a : inv_adjacent_vertices_range(v, h)) { + if (h[a].char_reach.all()) { + if (!edge(a, a, h).second) { + return false; + } + + if (*ds) { + return false; + } + + *ds = a; + } else { + if (*u) { + return false; + } + *u = a; + } + } + + if (!*u || !*ds) { + return false; + } + + return true; +} + +static +bool trailingDotStarLiteral(const NGHolder &h, VertLitInfo *out) { + /* Note: there is no delay yet - so the final literal is the already + * discovered successor literal - we are in fact interested in the literal + * before it. */ + + if (in_degree(h.accept, h) != 1) { + return false; + } + + if (in_degree(h.acceptEod, h) != 1) { + assert(0); + return false; + } + + NFAVertex v + = findLiteralBefore(h, *inv_adjacent_vertices(h.accept, h).first).first; + + NFAVertex u; + NFAVertex ds; + + if (!lookForDotStarPred(v, h, &u, &ds)) { + return false; + } + + v = u; + auto rv = findLiteralBefore(h, v); + + if (!lookForDotStarPred(v, h, &u, &ds)) { + return false; + } + + ue2_literal lit = reverse_literal(rv.second); + DEBUG_PRINTF("%u found %s\n", h[v].index, dumpString(lit).c_str()); + + if (bad_mixed_sensitivity(lit)) { + make_nocase(&lit); + } + + out->vv = {v}; + out->lit = {lit}; + return true; +} + +static +bool lookForTrailingLiteralDotStar(const NGHolder &h, + const vector &ee, + RoseInGraph &vg, const Grey &grey) { + VertLitInfo info; + if (!trailingDotStarLiteral(h, &info) + || min_len(info.lit) < grey.violetDoubleCutLiteralLen) { + return false; + } + DEBUG_PRINTF("performing split\n"); + return splitRoseEdge(h, vg, ee, info); +} + +/* In streaming mode, active engines have to be caught up at stream boundaries + * and have to be stored in stream state, so we prefer to decompose patterns + * in to literals with no state between them if possible. */ +static +void decomposeLiteralChains(RoseInGraph &vg, const CompileContext &cc) { + if (!cc.grey.violetLiteralChains) { + return; + } + + bool changed; + do { + changed = false; + + map > right_edges; + vector ordered_graphs; + for (const RoseInEdge &ve : edges_range(vg)) { + if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) { + const NGHolder *h = vg[ve].graph.get(); + if (!contains(right_edges, h)) { + ordered_graphs.push_back(h); + } + right_edges[h].push_back(ve); + } + } + + for (const NGHolder *h : ordered_graphs) { + const vector &ee = right_edges[h]; + bool rv = lookForDoubleCut(*h, ee, vg, cc.grey); + if (!rv && h->kind != NFA_SUFFIX) { + rv = lookForTrailingLiteralDotStar(*h, ee, vg, cc.grey); + } + changed |= rv; + } + } while (changed); +} + +static +bool lookForCleanSplit(const NGHolder &h, const vector &ee, + RoseInGraph &vg, const CompileContext &cc) { + unique_ptr split = findBestCleanSplit(h, cc); + + if (split) { + return splitRoseEdge(h, vg, {ee}, *split); + } + + return false; +} + +#define MAX_DESIRED_CLEAN_SPLIT_DEPTH 4 + +static +void lookForCleanEarlySplits(RoseInGraph &vg, const CompileContext &cc) { + u32 gen = 0; + + vector prev = {getStart(vg)}; + + while (gen < MAX_DESIRED_CLEAN_SPLIT_DEPTH) { + /* collect vertices in edge order for determinism */ + vector curr; + set curr_seen; + for (RoseInVertex u : prev) { + for (auto v : adjacent_vertices_range(u, vg)) { + if (curr_seen.insert(v).second) { + curr.push_back(v); + } + } + } + + map> rightfixes; + vector ordered_graphs; + for (RoseInVertex v : curr) { + for (const RoseInEdge &e : out_edges_range(v, vg)) { + if (vg[e].graph) { + NGHolder *h = vg[e].graph.get(); + if (!contains(rightfixes, h)) { + ordered_graphs.push_back(h); + } + rightfixes[h].push_back(e); + } + } + } + + for (const NGHolder *h : ordered_graphs) { + lookForCleanSplit(*h, rightfixes[h], vg, cc); + } + + prev = curr; + gen++; + } +} + +static +void rehomeEodSuffixes(RoseInGraph &vg) { + // Find edges to accept with EOD-anchored graphs that we can move over to + // acceptEod. + vector acc_edges; + for (const auto &e : edges_range(vg)) { + if (vg[target(e, vg)].type != RIV_ACCEPT) { + continue; + } + if (vg[e].haig || !vg[e].graph) { + continue; + } + + const NGHolder &h = *vg[e].graph; + + if (in_degree(h.accept, h)) { + DEBUG_PRINTF("graph isn't eod anchored\n"); + continue; + } + + acc_edges.push_back(e); + } + + for (const RoseInEdge &e : acc_edges) { + // Move this edge from accept to acceptEod + RoseInVertex w = add_vertex(RoseInVertexProps::makeAcceptEod(), vg); + add_edge(source(e, vg), w, vg[e], vg); + remove_edge(e, vg); + } + + /* old accept vertices will be tidied up by final pruneUseless() call */ +} + +bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, + const CompileContext &cc) { + assert(!can_never_match(h)); + + if (!cc.grey.allowViolet) { + return false; + } + + DEBUG_PRINTF("hello world\n"); + + RoseInGraph vg = populateTrivialGraph(h); + + /* Step 1: avoid outfixes as we always have to run them. */ + avoidOutfixes(vg, cc); + + if (num_vertices(vg) <= 2) { + /* only have an outfix; leave for ng_rose for now */ + return false; + } + + removeRedundantPrefixes(vg); + dumpPreRoseGraph(vg, cc.grey, "pre_prefix_rose.dot"); + + /* Step 2: avoid non-transient prefixes (esp in streaming mode) */ + findBetterPrefixes(vg, cc); + + dumpPreRoseGraph(vg, cc.grey, "post_prefix_rose.dot"); + + extractStrongLiterals(vg, cc); + dumpPreRoseGraph(vg, cc.grey, "post_extract_rose.dot"); + improveWeakInfixes(vg, cc); + dumpPreRoseGraph(vg, cc.grey, "post_infix_rose.dot"); + + /* Step 3: avoid output exposed engines if there is a strong trailing + literal) */ + avoidSuffixes(vg, cc); + + /* Step 4: look for infixes/suffixes with leading .*literals + * This can reduce the amount of work a heavily picked literal has to do and + * reduce the amount of state used as .* is handled internally to rose. */ + lookForDoubleCut(vg, cc); + + if (cc.streaming) { + lookForCleanEarlySplits(vg, cc); + decomposeLiteralChains(vg, cc); + } + + /* Step 5: avoid unimplementable, or overly large engines if possible */ + /* TODO: later - ng_rose is currently acting as a backstop */ + + /* Step 6: send to rose */ + rehomeEodSuffixes(vg); + removeRedundantLiterals(vg, cc); + + pruneUseless(vg); + dumpPreRoseGraph(vg, cc.grey); + calcVertexOffsets(vg); + bool rv = rose.addRose(vg, prefilter); + DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail"); + return rv; +} + +} diff --git a/src/nfagraph/ng_violet.h b/src/nfagraph/ng_violet.h new file mode 100644 index 00000000..fb62bfc0 --- /dev/null +++ b/src/nfagraph/ng_violet.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Violet method of rose construction from NGHolder. + */ + +#ifndef NG_VIOLET_H +#define NG_VIOLET_H + +#include "ue2common.h" + +namespace ue2 { + +class NGHolder; +class RoseBuild; + +struct CompileContext; + +/** \brief Attempt to consume the entire pattern in graph \a h with Rose. + * Returns true if successful. */ +bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, + const CompileContext &cc); + +} // namespace ue2 + +#endif diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 65cd7c1a..53130ddf 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1226,9 +1226,8 @@ unichar readUtf8CodePoint4c(const u8 *ts) { '\\Q' => { fgoto readQuotedLiteral; }; - '\\E' => { - throw LocatedParseError("Unmatched \\E"); - }; + # An \E that is not preceded by a \Q is ignored + '\\E' => { /* noop */ }; # Match any character '\.' => { currentSeq->addComponent(generateComponent(CLASS_ANY, false, mode)); @@ -1447,12 +1446,12 @@ unichar readUtf8CodePoint4c(const u8 *ts) { // Otherwise, we interpret the first three digits as an // octal escape, and the remaining characters stand for // themselves as literals. - const u8 *p = ts; + const u8 *s = ts; unsigned int accum = 0; unsigned int oct_digits = 0; - assert(*p == '\\'); // token starts at backslash - for (++p; p < te && oct_digits < 3; ++oct_digits, ++p) { - u8 digit = *p - '0'; + assert(*s == '\\'); // token starts at backslash + for (++s; s < te && oct_digits < 3; ++oct_digits, ++s) { + u8 digit = *s - '0'; if (digit < 8) { accum = digit + accum * 8; } else { @@ -1465,8 +1464,8 @@ unichar readUtf8CodePoint4c(const u8 *ts) { } // And then the rest of the digits, if any, are literal. - for (; p < te; ++p) { - addLiteral(currentSeq, *p, mode); + for (; s < te; ++s) { + addLiteral(currentSeq, *s, mode); } } }; diff --git a/src/parser/shortcut_literal.cpp b/src/parser/shortcut_literal.cpp index f6f5d383..3f58d752 100644 --- a/src/parser/shortcut_literal.cpp +++ b/src/parser/shortcut_literal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -162,7 +162,7 @@ ConstructLiteralVisitor::~ConstructLiteralVisitor() {} bool shortcutLiteral(NG &ng, const ParsedExpression &expr) { assert(expr.component); - if (!ng.cc.grey.allowRose) { + if (!ng.cc.grey.allowLiteral) { return false; } diff --git a/src/report.h b/src/report.h index d037d11b..4a5f401e 100644 --- a/src/report.h +++ b/src/report.h @@ -115,6 +115,42 @@ enum DedupeResult dedupeCatchup(const struct RoseEngine *rose, return DEDUPE_CONTINUE; } +/** \brief Test whether the given key (\a ekey) is set in the exhaustion vector + * \a evec. */ +static really_inline +int isExhausted(const struct RoseEngine *rose, const char *evec, u32 ekey) { + DEBUG_PRINTF("checking exhaustion %p %u\n", evec, ekey); + assert(ekey != INVALID_EKEY); + assert(ekey < rose->ekeyCount); + return mmbit_isset((const u8 *)evec, rose->ekeyCount, ekey); +} + +/** \brief Returns 1 if all exhaustion keys in the bitvector are on. */ +static really_inline +int isAllExhausted(const struct RoseEngine *rose, const char *evec) { + if (!rose->canExhaust) { + return 0; /* pattern set is inexhaustible */ + } + + return mmbit_all((const u8 *)evec, rose->ekeyCount); +} + +/** \brief Mark key \a ekey on in the exhaustion vector. */ +static really_inline +void markAsMatched(const struct RoseEngine *rose, char *evec, u32 ekey) { + DEBUG_PRINTF("marking as exhausted key %u\n", ekey); + assert(ekey != INVALID_EKEY); + assert(ekey < rose->ekeyCount); + mmbit_set((u8 *)evec, rose->ekeyCount, ekey); +} + +/** \brief Clear all keys in the exhaustion vector. */ +static really_inline +void clearEvec(const struct RoseEngine *rose, char *evec) { + DEBUG_PRINTF("clearing evec %p %u\n", evec, rose->ekeyCount); + mmbit_clear((u8 *)evec, rose->ekeyCount); +} + /** * \brief Deliver the given report to the user callback. * diff --git a/src/rose/block.c b/src/rose/block.c index 5fc5c8a1..fc72c6e9 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -29,13 +29,14 @@ #include "catchup.h" #include "init.h" #include "match.h" +#include "program_runtime.h" +#include "rose.h" +#include "rose_common.h" #include "nfa/nfa_api.h" #include "nfa/nfa_internal.h" #include "nfa/nfa_rev_api.h" #include "nfa/mcclellan.h" #include "util/fatbit.h" -#include "rose.h" -#include "rose_common.h" static rose_inline void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable, @@ -157,13 +158,213 @@ void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, init_outfixes_for_block(t, scratch, state, is_small_block); } -void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch) { +static rose_inline +void roseBlockEodExec(const struct RoseEngine *t, u64a offset, + struct hs_scratch *scratch) { + assert(t->requiresEodCheck); + assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF + || offset <= t->maxBiAnchoredWidth); + + assert(!can_stop_matching(scratch)); + assert(t->eodProgramOffset); + + // Ensure that history is correct before we look for EOD matches. + roseFlushLastByteHistory(t, scratch, offset); + scratch->tctxt.lastEndOffset = offset; + + DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset); + + // There should be no pending delayed literals. + assert(!scratch->tctxt.filledDelayedSlots); + + const u64a som = 0; + const size_t match_len = 0; + const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; + + // Note: we ignore the result, as this is the last thing to ever happen on + // a scan. + roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, match_len, + flags); +} + +/** + * \brief Run the anchored matcher, if any. Returns non-zero if matching should + * halt. + */ +static rose_inline +int roseBlockAnchored(const struct RoseEngine *t, struct hs_scratch *scratch) { + const void *atable = getALiteralMatcher(t); + if (!atable) { + DEBUG_PRINTF("no anchored table\n"); + return 0; + } + + const size_t length = scratch->core_info.len; + + if (t->amatcherMaxBiAnchoredWidth != ROSE_BOUND_INF && + length > t->amatcherMaxBiAnchoredWidth) { + return 0; + } + + if (length < t->amatcherMinWidth) { + return 0; + } + + runAnchoredTableBlock(t, atable, scratch); + + return can_stop_matching(scratch); +} + +/** + * \brief Run the floating matcher, if any. Returns non-zero if matching should + * halt. + */ +static rose_inline +int roseBlockFloating(const struct RoseEngine *t, struct hs_scratch *scratch) { + const struct HWLM *ftable = getFLiteralMatcher(t); + if (!ftable) { + return 0; + } + + const size_t length = scratch->core_info.len; + char *state = scratch->core_info.state; + struct RoseContext *tctxt = &scratch->tctxt; + + DEBUG_PRINTF("ftable fd=%u fmd %u\n", t->floatingDistance, + t->floatingMinDistance); + if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) { + DEBUG_PRINTF("skip FLOATING: no inflight matches\n"); + return 0; + } + + if (t->fmatcherMaxBiAnchoredWidth != ROSE_BOUND_INF && + length > t->fmatcherMaxBiAnchoredWidth) { + return 0; + } + + if (length < t->fmatcherMinWidth) { + return 0; + } + + const u8 *buffer = scratch->core_info.buf; + size_t flen = length; + if (t->floatingDistance != ROSE_BOUND_INF) { + flen = MIN(t->floatingDistance, length); + } + if (flen <= t->floatingMinDistance) { + return 0; + } + + DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); + DEBUG_PRINTF("-- %016llx\n", tctxt->groups); + hwlmExec(ftable, buffer, flen, t->floatingMinDistance, roseFloatingCallback, + scratch, tctxt->groups & t->floating_group_mask); + + return can_stop_matching(scratch); +} + +static rose_inline +void runEagerPrefixesBlock(const struct RoseEngine *t, + struct hs_scratch *scratch) { + if (!t->eagerIterOffset) { + return; + } + + char *state = scratch->core_info.state; + u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into + * left_table */ + const u32 arCount = t->activeLeftCount; + const u32 qCount = t->queueCount; + const struct LeftNfaInfo *left_table = getLeftTable(t); + const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset); + + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + u32 idx = 0; + u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state); + for (; ri != MMB_INVALID; + ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) { + const struct LeftNfaInfo *left = left_table + ri; + u32 qi = ri + t->leftfixBeginQueue; + DEBUG_PRINTF("leftfix %u/%u, maxLag=%u\n", ri, arCount, left->maxLag); + + assert(!fatbit_isset(scratch->aqa, qCount, qi)); + assert(left->eager); + assert(!left->infix); + + struct mq *q = scratch->queues + qi; + const struct NFA *nfa = getNfaByQueue(t, qi); + + if (scratch->core_info.len < nfa->minWidth) { + /* we know that there is not enough data for this to ever match, so + * we can immediately squash/ */ + mmbit_unset(ara, arCount, ri); + scratch->tctxt.groups &= left->squash_mask; + } + + s64a loc = MIN(scratch->core_info.len, EAGER_STOP_OFFSET); + + fatbit_set(scratch->aqa, qCount, qi); + initRoseQueue(t, qi, left, scratch); + + pushQueueAt(q, 0, MQE_START, 0); + pushQueueAt(q, 1, MQE_TOP, 0); + pushQueueAt(q, 2, MQE_END, loc); + nfaQueueInitState(nfa, q); + + char alive = nfaQueueExecToMatch(q->nfa, q, loc); + + if (!alive) { + DEBUG_PRINTF("queue %u dead, squashing\n", qi); + mmbit_unset(ara, arCount, ri); + fatbit_unset(scratch->aqa, qCount, qi); + scratch->tctxt.groups &= left->squash_mask; + } else if (q->cur == q->end) { + assert(alive != MO_MATCHES_PENDING); + if (loc == (s64a)scratch->core_info.len) { + /* We know that the prefix does not match in the block so we + * can squash the groups anyway even though it did not die */ + /* TODO: if we knew the minimum lag the leftfix is checked at we + * could make this check tighter */ + DEBUG_PRINTF("queue %u has no match in block, squashing\n", qi); + mmbit_unset(ara, arCount, ri); + fatbit_unset(scratch->aqa, qCount, qi); + scratch->tctxt.groups &= left->squash_mask; + } else { + DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } + } else { + assert(alive == MO_MATCHES_PENDING); + DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi); + q->end--; /* remove end item */ + } + } +} + +void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { assert(t); assert(scratch); assert(scratch->core_info.buf); assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount) < MAX_SPARSE_ITER_STATES); + // We should not have been called if we've already been told to terminate + // matching. + assert(!told_to_stop_matching(scratch)); + + // If this block is shorter than our minimum width, then no pattern in this + // RoseEngine could match. + /* minWidth checks should have already been performed by the caller */ + assert(scratch->core_info.len >= t->minWidth); + + // Similarly, we may have a maximum width (for engines constructed entirely + // of bi-anchored patterns). + /* This check is now handled by the interpreter */ + assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF + || scratch->core_info.len <= t->maxBiAnchoredWidth); + const size_t length = scratch->core_info.len; // We have optimizations for small block scans: we run a single coalesced @@ -189,65 +390,17 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch) { DEBUG_PRINTF("-- %016llx\n", tctxt->groups); hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback, scratch, tctxt->groups); - goto exit; + } else { + runEagerPrefixesBlock(t, scratch); + + if (roseBlockAnchored(t, scratch)) { + return; + } + if (roseBlockFloating(t, scratch)) { + return; + } } - const void *atable = getALiteralMatcher(t); - - if (atable) { - if (t->amatcherMaxBiAnchoredWidth != ROSE_BOUND_INF - && length > t->amatcherMaxBiAnchoredWidth) { - goto skip_atable; - } - - if (length < t->amatcherMinWidth) { - goto skip_atable; - } - - - runAnchoredTableBlock(t, atable, scratch); - - if (can_stop_matching(scratch)) { - goto exit; - } - - skip_atable:; - } - - const struct HWLM *ftable = getFLiteralMatcher(t); - if (ftable) { - DEBUG_PRINTF("ftable fd=%u fmd %u\n", t->floatingDistance, - t->floatingMinDistance); - if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) { - DEBUG_PRINTF("skip FLOATING: no inflight matches\n"); - goto exit; - } - - if (t->fmatcherMaxBiAnchoredWidth != ROSE_BOUND_INF - && length > t->fmatcherMaxBiAnchoredWidth) { - goto exit; - } - - if (length < t->fmatcherMinWidth) { - goto exit; - } - - const u8 *buffer = scratch->core_info.buf; - size_t flen = length; - if (t->floatingDistance != ROSE_BOUND_INF) { - flen = MIN(t->floatingDistance, length); - } - if (flen <= t->floatingMinDistance) { - goto exit; - } - - DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); - DEBUG_PRINTF("-- %016llx\n", tctxt->groups); - hwlmExec(ftable, buffer, flen, t->floatingMinDistance, - roseCallback, scratch, tctxt->groups); - } - -exit:; if (cleanUpDelayed(t, scratch, length, 0) == HWLM_TERMINATE_MATCHING) { return; } @@ -255,4 +408,16 @@ exit:; assert(!can_stop_matching(scratch)); roseCatchUpTo(t, scratch, length); + + if (!t->requiresEodCheck || !t->eodProgramOffset) { + DEBUG_PRINTF("no eod check required\n"); + return; + } + + if (can_stop_matching(scratch)) { + DEBUG_PRINTF("bailing, already halted\n"); + return; + } + + roseBlockEodExec(t, length, scratch); } diff --git a/src/rose/catchup.c b/src/rose/catchup.c index dba9629e..017a6bf0 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -39,6 +39,7 @@ #include "nfa/mpv.h" #include "som/som_runtime.h" #include "util/fatbit.h" +#include "report.h" typedef struct queue_match PQ_T; #define PQ_COMP(pqc_items, a, b) ((pqc_items)[a].loc < (pqc_items)[b].loc) @@ -51,14 +52,49 @@ int roseNfaRunProgram(const struct RoseEngine *rose, struct hs_scratch *scratch, u64a som, u64a offset, ReportID id, const char from_mpv) { const u32 program = id; const size_t match_len = 0; // Unused in this path. - const char in_anchored = 0; - const char in_catchup = 1; - roseRunProgram(rose, scratch, program, som, offset, match_len, in_anchored, - in_catchup, from_mpv, 0); + u8 flags = ROSE_PROG_FLAG_IN_CATCHUP; + if (from_mpv) { + flags |= ROSE_PROG_FLAG_FROM_MPV; + } + + roseRunProgram(rose, scratch, program, som, offset, match_len, flags); return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING; } +static rose_inline +char roseSuffixInfoIsExhausted(const struct RoseEngine *rose, + const struct NfaInfo *info, + const char *exhausted) { + if (!info->ekeyListOffset) { + return 0; + } + + DEBUG_PRINTF("check exhaustion -> start at %u\n", info->ekeyListOffset); + + /* INVALID_EKEY terminated list */ + const u32 *ekeys = getByOffset(rose, info->ekeyListOffset); + while (*ekeys != INVALID_EKEY) { + DEBUG_PRINTF("check %u\n", *ekeys); + if (!isExhausted(rose, exhausted, *ekeys)) { + DEBUG_PRINTF("not exhausted -> alive\n"); + return 0; + } + ++ekeys; + } + + DEBUG_PRINTF("all ekeys exhausted -> dead\n"); + return 1; +} + +static really_inline +char roseSuffixIsExhausted(const struct RoseEngine *rose, u32 qi, + const char *exhausted) { + DEBUG_PRINTF("check queue %u\n", qi); + const struct NfaInfo *info = getNfaInfoByQueue(rose, qi); + return roseSuffixInfoIsExhausted(rose, info, exhausted); +} + static really_inline void deactivateQueue(const struct RoseEngine *t, u8 *aa, u32 qi, struct hs_scratch *scratch) { @@ -245,14 +281,14 @@ restart: /* for use by mpv (chained) only */ static -int roseNfaFinalBlastAdaptor(u64a offset, ReportID id, void *context) { +int roseNfaFinalBlastAdaptor(u64a start, u64a end, ReportID id, void *context) { struct hs_scratch *scratch = context; + assert(scratch && scratch->magic == SCRATCH_MAGIC); const struct RoseEngine *t = scratch->core_info.rose; - DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", - offset, id); + DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); - int cb_rv = roseNfaRunProgram(t, scratch, 0, offset, id, 1); + int cb_rv = roseNfaRunProgram(t, scratch, start, end, id, 1); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { @@ -358,7 +394,6 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, assert(!q->report_current); q->cb = roseNfaFinalBlastAdaptor; - q->som_cb = NULL; DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n", qi, q->cur, q->end, q->items[q->cur].location, loc); @@ -413,113 +448,47 @@ char in_mpv(const struct RoseEngine *rose, const struct hs_scratch *scratch) { } static -int roseNfaBlastAdaptor(u64a offset, ReportID id, void *context) { +int roseNfaBlastAdaptor(u64a start, u64a end, ReportID id, void *context) { struct hs_scratch *scratch = context; - struct RoseContext *tctxt = &scratch->tctxt; + assert(scratch && scratch->magic == SCRATCH_MAGIC); const struct RoseEngine *t = scratch->core_info.rose; - DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", - offset, id); + DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); const char from_mpv = in_mpv(t, scratch); - int cb_rv = roseNfaRunProgram(t, scratch, 0, offset, id, from_mpv); + int cb_rv = roseNfaRunProgram(t, scratch, start, end, id, from_mpv); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(t, tctxt->curr_qi, + return !roseSuffixIsExhausted(t, scratch->tctxt.curr_qi, scratch->core_info.exhaustionVector); } } -static -int roseNfaBlastAdaptorNoInternal(u64a offset, ReportID id, void *context) { +int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context) { struct hs_scratch *scratch = context; - struct RoseContext *tctxt = &scratch->tctxt; - const struct RoseEngine *t = scratch->core_info.rose; + assert(scratch && scratch->magic == SCRATCH_MAGIC); - DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", - offset, id); - - assert(!in_mpv(t, scratch)); - - int cb_rv = roseNfaRunProgram(t, scratch, 0, offset, id, 0); - if (cb_rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return MO_CONTINUE_MATCHING; - } else { - assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(t, tctxt->curr_qi, - scratch->core_info.exhaustionVector); - } -} - -static -int roseNfaBlastSomAdaptor(u64a from_offset, u64a offset, ReportID id, - void *context) { - struct hs_scratch *scratch = context; - struct RoseContext *tctxt = &scratch->tctxt; - const struct RoseEngine *t = scratch->core_info.rose; - - DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", - offset, id); - - assert(!in_mpv(t, scratch)); + DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); /* must be a external report as haig cannot directly participate in chain */ - int cb_rv = roseNfaRunProgram(scratch->core_info.rose, scratch, from_offset, - offset, id, 0); - if (cb_rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return MO_CONTINUE_MATCHING; - } else { - assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(t, tctxt->curr_qi, - scratch->core_info.exhaustionVector); - } -} - -int roseNfaAdaptor(u64a offset, ReportID id, void *context) { - struct hs_scratch *scratch = context; - DEBUG_PRINTF("masky got himself a match @%llu id %u !woot!\n", offset, id); - - return roseNfaRunProgram(scratch->core_info.rose, scratch, 0, offset, id, + return roseNfaRunProgram(scratch->core_info.rose, scratch, start, end, id, 0); } -int roseNfaSomAdaptor(u64a from_offset, u64a offset, ReportID id, - void *context) { - struct hs_scratch *scratch = context; - DEBUG_PRINTF("masky got himself a match @%llu id %u !woot!\n", offset, id); - - /* must be a external report as haig cannot directly participate in chain */ - return roseNfaRunProgram(scratch->core_info.rose, scratch, from_offset, - offset, id, 0); -} - static really_inline -char blast_queue(const struct RoseEngine *t, struct hs_scratch *scratch, - struct mq *q, u32 qi, s64a to_loc, char report_current) { - struct RoseContext *tctxt = &scratch->tctxt; - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - - tctxt->curr_qi = qi; - if (info->only_external) { - q->cb = roseNfaBlastAdaptorNoInternal; - } else { - q->cb = roseNfaBlastAdaptor; - } +char blast_queue(struct hs_scratch *scratch, struct mq *q, u32 qi, s64a to_loc, + char report_current) { + scratch->tctxt.curr_qi = qi; + q->cb = roseNfaBlastAdaptor; q->report_current = report_current; - q->som_cb = roseNfaBlastSomAdaptor; DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n", qi, q->cur, q->end, q_cur_loc(q), to_loc); char alive = nfaQueueExec(q->nfa, q, to_loc); q->cb = roseNfaAdaptor; - q->som_cb = roseNfaSomAdaptor; assert(!q->report_current); return alive; @@ -549,7 +518,7 @@ hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc, ensureEnd(q, a_qi, final_loc); - char alive = blast_queue(t, scratch, q, a_qi, second_place_loc, 0); + char alive = blast_queue(scratch, q, a_qi, second_place_loc, 0); /* We have three possible outcomes: * (1) the nfa died @@ -754,7 +723,7 @@ hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc, = scratch->catchup_pq.qm_size ? pq_top_loc(&scratch->catchup_pq) : safe_loc; second_place_loc = MIN(second_place_loc, safe_loc); - if (n_qi == MMB_INVALID && report_ok_loc < second_place_loc) { + if (n_qi == MMB_INVALID && report_ok_loc <= second_place_loc) { if (buildSufPQ_final(t, report_ok_loc, second_place_loc, final_loc, scratch, aa, a_qi) == HWLM_TERMINATE_MATCHING) { @@ -845,7 +814,7 @@ hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, s64a loc, continue; } - char alive = blast_queue(t, scratch, q, qi, second_place_loc, 1); + char alive = blast_queue(scratch, q, qi, second_place_loc, 1); if (!alive) { if (can_stop_matching(scratch)) { diff --git a/src/rose/counting_miracle.h b/src/rose/counting_miracle.h index d36ed272..76db5a77 100644 --- a/src/rose/counting_miracle.h +++ b/src/rose/counting_miracle.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,6 @@ #include "rose_internal.h" #include "nfa/nfa_api_queue.h" #include "util/simd_utils.h" -#include "util/simd_utils_ssse3.h" /** \brief Maximum number of bytes to scan when looking for a "counting miracle" * stop character. */ @@ -83,7 +82,7 @@ char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end, } #define GET_LO_4(chars) and128(chars, low4bits) -#define GET_HI_4(chars) rshift2x64(andnot128(low4bits, chars), 4) +#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4) static really_inline u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison, diff --git a/src/rose/eod.c b/src/rose/eod.c deleted file mode 100644 index 7e8d4b3d..00000000 --- a/src/rose/eod.c +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "catchup.h" -#include "match.h" -#include "program_runtime.h" -#include "rose.h" -#include "util/fatbit.h" - -static really_inline -void initContext(const struct RoseEngine *t, char *state, u64a offset, - struct hs_scratch *scratch) { - struct RoseContext *tctxt = &scratch->tctxt; - tctxt->groups = loadGroups(t, state); /* TODO: diff groups for eod */ - tctxt->lit_offset_adjust = scratch->core_info.buf_offset - - scratch->core_info.hlen - + 1; // index after last byte - tctxt->delayLastEndOffset = offset; - tctxt->lastEndOffset = offset; - tctxt->filledDelayedSlots = 0; - tctxt->lastMatchOffset = 0; - tctxt->minMatchOffset = offset; - tctxt->minNonMpvMatchOffset = offset; - tctxt->next_mpv_offset = offset; - - scratch->catchup_pq.qm_size = 0; - scratch->al_log_sum = 0; /* clear the anchored logs */ - - fatbit_clear(scratch->aqa); -} - -static rose_inline -hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch, - const char is_streaming) { - assert(t->ematcherOffset); - - size_t eod_len; - const u8 *eod_data; - if (!is_streaming) { /* Block */ - eod_data = scratch->core_info.buf; - eod_len = scratch->core_info.len; - } else { /* Streaming */ - eod_len = scratch->core_info.hlen; - eod_data = scratch->core_info.hbuf; - } - - assert(eod_data); - assert(eod_len); - - // If we don't have enough bytes to produce a match from an EOD table scan, - // there's no point scanning. - if (eod_len < t->eodmatcherMinWidth) { - DEBUG_PRINTF("len=%zu < eodmatcherMinWidth=%u\n", eod_len, - t->eodmatcherMinWidth); - return HWLM_CONTINUE_MATCHING; - } - - // Ensure that we only need scan the last N bytes, where N is the length of - // the eod-anchored matcher region. - size_t adj = eod_len - MIN(eod_len, t->ematcherRegionSize); - - DEBUG_PRINTF("eod offset=%llu, eod length=%zu\n", offset, eod_len); - - struct RoseContext *tctxt = &scratch->tctxt; - const struct HWLM *etable = getELiteralMatcher(t); - - hwlmExec(etable, eod_data, eod_len, adj, roseCallback, scratch, - tctxt->groups); - - // We may need to fire delayed matches - return cleanUpDelayed(t, scratch, 0, offset); -} - -static rose_inline -int roseEodRunIterator(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch) { - if (!t->eodIterProgramOffset) { - return MO_CONTINUE_MATCHING; - } - - DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset); - - const u64a som = 0; - const size_t match_len = 0; - const char in_anchored = 0; - const char in_catchup = 0; - const char from_mpv = 0; - const char skip_mpv_catchup = 1; - if (roseRunProgram(t, scratch, t->eodIterProgramOffset, som, offset, - match_len, in_anchored, in_catchup, - from_mpv, skip_mpv_catchup) == HWLM_TERMINATE_MATCHING) { - return MO_HALT_MATCHING; - } - - return MO_CONTINUE_MATCHING; -} - -/** - * \brief Check for (and deliver) reports from active output-exposed (suffix - * or outfix) NFAs. - * - * \return MO_HALT_MATCHING if the user instructs us to stop. - */ -static rose_inline -int roseCheckNfaEod(const struct RoseEngine *t, char *state, - struct hs_scratch *scratch, u64a offset, - const char is_streaming) { - if (!t->eodNfaIterOffset) { - DEBUG_PRINTF("no engines that report at EOD\n"); - return MO_CONTINUE_MATCHING; - } - - /* data, len is used for state decompress, should be full available data */ - u8 key = 0; - if (is_streaming) { - const u8 *eod_data = scratch->core_info.hbuf; - size_t eod_len = scratch->core_info.hlen; - key = eod_len ? eod_data[eod_len - 1] : 0; - } - - const u8 *aa = getActiveLeafArray(t, state); - const u32 aaCount = t->activeArrayCount; - - const struct mmbit_sparse_iter *it = getByOffset(t, t->eodNfaIterOffset); - assert(ISALIGNED(it)); - - u32 idx = 0; - struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; - - for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state); - qi != MMB_INVALID; - qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) { - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - const struct NFA *nfa = getNfaByInfo(t, info); - - DEBUG_PRINTF("checking nfa %u\n", qi); - assert(nfaAcceptsEod(nfa)); - - char *fstate = scratch->fullState + info->fullStateOffset; - const char *sstate = (const char *)state + info->stateOffset; - - if (is_streaming) { - // Decompress stream state. - nfaExpandState(nfa, fstate, sstate, offset, key); - } - - if (nfaCheckFinalState(nfa, fstate, sstate, offset, roseReportAdaptor, - roseReportSomAdaptor, - scratch) == MO_HALT_MATCHING) { - DEBUG_PRINTF("user instructed us to stop\n"); - return MO_HALT_MATCHING; - } - } - - return MO_CONTINUE_MATCHING; -} - -static rose_inline -void cleanupAfterEodMatcher(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch) { - // Flush history to make sure it's consistent. - roseFlushLastByteHistory(t, scratch, offset); -} - -static rose_inline -void roseCheckEodSuffixes(const struct RoseEngine *t, char *state, u64a offset, - struct hs_scratch *scratch) { - const u8 *aa = getActiveLeafArray(t, state); - const u32 aaCount = t->activeArrayCount; - UNUSED u32 qCount = t->queueCount; - - for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID; - qi = mmbit_iterate(aa, aaCount, qi)) { - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - const struct NFA *nfa = getNfaByInfo(t, info); - - assert(nfaAcceptsEod(nfa)); - - DEBUG_PRINTF("checking nfa %u\n", qi); - - assert(fatbit_isset(scratch->aqa, qCount, qi)); /* we have just been - triggered */ - - char *fstate = scratch->fullState + info->fullStateOffset; - const char *sstate = (const char *)state + info->stateOffset; - - struct mq *q = scratch->queues + qi; - - pushQueueNoMerge(q, MQE_END, scratch->core_info.len); - - q->context = NULL; - /* rose exec is used as we don't want to / can't raise matches in the - * history buffer. */ - char rv = nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); - if (rv) { /* nfa is still alive */ - if (nfaCheckFinalState(nfa, fstate, sstate, offset, - roseReportAdaptor, roseReportSomAdaptor, - scratch) == MO_HALT_MATCHING) { - DEBUG_PRINTF("user instructed us to stop\n"); - return; - } - } - } -} - -static rose_inline -int roseRunEodProgram(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch) { - if (!t->eodProgramOffset) { - return MO_CONTINUE_MATCHING; - } - - DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset); - - // There should be no pending delayed literals. - assert(!scratch->tctxt.filledDelayedSlots); - - const u64a som = 0; - const size_t match_len = 0; - const char in_anchored = 0; - const char in_catchup = 0; - const char from_mpv = 0; - const char skip_mpv_catchup = 1; - if (roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, match_len, - in_anchored, in_catchup, from_mpv, - skip_mpv_catchup) == HWLM_TERMINATE_MATCHING) { - return MO_HALT_MATCHING; - } - - return MO_CONTINUE_MATCHING; -} - -static really_inline -void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset, - struct hs_scratch *scratch, const char is_streaming) { - assert(t); - assert(scratch->core_info.buf || scratch->core_info.hbuf); - assert(!scratch->core_info.buf || !scratch->core_info.hbuf); - assert(!can_stop_matching(scratch)); - - // Run the unconditional EOD program. - if (roseRunEodProgram(t, offset, scratch) == MO_HALT_MATCHING) { - return; - } - - if (roseCheckNfaEod(t, state, scratch, offset, is_streaming) == - MO_HALT_MATCHING) { - return; - } - - if (!t->eodIterProgramOffset && !t->ematcherOffset) { - DEBUG_PRINTF("no eod accepts\n"); - return; - } - - // Handle pending EOD reports. - if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) { - return; - } - - // Run the EOD anchored matcher if there is one. - if (t->ematcherOffset) { - assert(t->ematcherRegionSize); - // Unset the reports we just fired so we don't fire them again below. - mmbit_clear(getRoleState(state), t->rolesWithStateCount); - mmbit_clear(getActiveLeafArray(t, state), t->activeArrayCount); - - if (roseEodRunMatcher(t, offset, scratch, is_streaming) == - HWLM_TERMINATE_MATCHING) { - return; - } - - cleanupAfterEodMatcher(t, offset, scratch); - - // Fire any new EOD reports. - if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) { - return; - } - - roseCheckEodSuffixes(t, state, offset, scratch); - } -} - -void roseEodExec(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch) { - assert(scratch); - assert(t->requiresEodCheck); - DEBUG_PRINTF("ci buf %p/%zu his %p/%zu\n", scratch->core_info.buf, - scratch->core_info.len, scratch->core_info.hbuf, - scratch->core_info.hlen); - - // We should not have been called if we've already been told to terminate - // matching. - assert(!told_to_stop_matching(scratch)); - - if (t->maxBiAnchoredWidth != ROSE_BOUND_INF - && offset > t->maxBiAnchoredWidth) { - DEBUG_PRINTF("bailing, we are beyond max width\n"); - /* also some of the history/state may be stale */ - return; - } - - char *state = scratch->core_info.state; - assert(state); - - initContext(t, state, offset, scratch); - - roseEodExec_i(t, state, offset, scratch, 1); -} - -static rose_inline -void prepForEod(const struct RoseEngine *t, struct hs_scratch *scratch, - size_t length) { - roseFlushLastByteHistory(t, scratch, length); - scratch->tctxt.lastEndOffset = length; -} - -void roseBlockEodExec(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch) { - assert(t->requiresEodCheck); - assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF - || offset <= t->maxBiAnchoredWidth); - - assert(!can_stop_matching(scratch)); - - char *state = scratch->core_info.state; - - // Ensure that history is correct before we look for EOD matches - prepForEod(t, scratch, scratch->core_info.len); - - roseEodExec_i(t, state, offset, scratch, 0); -} diff --git a/src/rose/match.c b/src/rose/match.c index 4e9e72a6..b641e39d 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -27,14 +27,9 @@ */ #include "catchup.h" -#include "counting_miracle.h" -#include "infix.h" #include "match.h" -#include "miracle.h" #include "program_runtime.h" -#include "rose_program.h" #include "rose.h" -#include "som/som_runtime.h" #include "util/bitutils.h" #include "util/fatbit.h" @@ -98,13 +93,9 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, if (program) { const u64a som = 0; const size_t match_len = end - start + 1; - const char in_anchored = 0; - const char in_catchup = 0; - const char from_mpv = 0; - const char skip_mpv_catchup = 0; - UNUSED hwlmcb_rv_t rv = - roseRunProgram(t, scratch, program, som, real_end, match_len, - in_anchored, in_catchup, from_mpv, skip_mpv_catchup); + const u8 flags = 0; + UNUSED hwlmcb_rv_t rv = roseRunProgram(t, scratch, program, som, + real_end, match_len, flags); assert(rv != HWLM_TERMINATE_MATCHING); } @@ -121,28 +112,6 @@ hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t, return ensureQueueFlushed_i(t, scratch, qi, loc, 1, in_chained); } -static rose_inline -void recordAnchoredLiteralMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 literal_id, - u64a end) { - assert(end); - struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch); - - DEBUG_PRINTF("record %u @ %llu\n", literal_id, end); - - if (!bf64_set(&scratch->al_log_sum, end - 1)) { - // first time, clear row - DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count); - fatbit_clear(anchoredLiteralRows[end - 1]); - } - - u32 rel_idx = literal_id - t->anchored_base_id; - DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx, - t->anchored_count); - assert(rel_idx < t->anchored_count); - fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx); -} - hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, struct hs_scratch *scratch, u32 event, u64a top_squash_distance, u64a end, @@ -220,8 +189,9 @@ event_enqueued: return HWLM_CONTINUE_MATCHING; } -int roseAnchoredCallback(u64a end, u32 id, void *ctx) { +int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { struct hs_scratch *scratch = ctx; + assert(scratch && scratch->magic == SCRATCH_MAGIC); struct RoseContext *tctxt = &scratch->tctxt; struct core_info *ci = &scratch->core_info; const struct RoseEngine *t = ci->rose; @@ -250,16 +220,10 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { tctxt->lastEndOffset = real_end; } - const u32 *programs = getByOffset(t, t->litProgramOffset); - assert(id < t->literalCount); - const u64a som = 0; - const char in_anchored = 1; - const char in_catchup = 0; - const char from_mpv = 0; - const char skip_mpv_catchup = 0; - if (roseRunProgram(t, scratch, programs[id], som, real_end, match_len, - in_anchored, in_catchup, from_mpv, - skip_mpv_catchup) == HWLM_TERMINATE_MATCHING) { + // Note that the "id" we have been handed is the program offset. + const u8 flags = ROSE_PROG_FLAG_IN_ANCHORED; + if (roseRunProgram(t, scratch, id, start, real_end, match_len, + flags) == HWLM_TERMINATE_MATCHING) { assert(can_stop_matching(scratch)); DEBUG_PRINTF("caller requested termination\n"); return MO_HALT_MATCHING; @@ -267,15 +231,34 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); - if (real_end > t->floatingMinLiteralMatchOffset) { - recordAnchoredLiteralMatch(t, scratch, id, real_end); - } - return MO_CONTINUE_MATCHING; } -// Rose match-processing workhorse -/* assumes not in_anchored */ +/** + * \brief Run the program for the given literal ID, with the interpreter + * inlined into this call. + * + * Assumes not in_anchored. + */ +static really_inline +hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end, + size_t match_len, u32 id) { + DEBUG_PRINTF("id=%u\n", id); + const u32 *programs = getByOffset(t, t->litProgramOffset); + assert(id < t->literalCount); + const u64a som = 0; + const u8 flags = 0; + return roseRunProgram_i(t, scratch, programs[id], som, end, match_len, + flags); +} + +/** + * \brief Run the program for the given literal ID, with the interpreter + * out of line. + * + * Assumes not in_anchored. + */ static really_inline hwlmcb_rv_t roseProcessMatch(const struct RoseEngine *t, struct hs_scratch *scratch, u64a end, @@ -284,12 +267,8 @@ hwlmcb_rv_t roseProcessMatch(const struct RoseEngine *t, const u32 *programs = getByOffset(t, t->litProgramOffset); assert(id < t->literalCount); const u64a som = 0; - const char in_anchored = 0; - const char in_catchup = 0; - const char from_mpv = 0; - const char skip_mpv_catchup = 0; - return roseRunProgram(t, scratch, programs[id], som, end, match_len, - in_anchored, in_catchup, from_mpv, skip_mpv_catchup); + const u8 flags = 0; + return roseRunProgram(t, scratch, programs[id], som, end, match_len, flags); } static rose_inline @@ -516,7 +495,8 @@ anchored_leftovers:; return rv; } -hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { +static really_inline +hwlmcb_rv_t roseCallback_i(size_t start, size_t end, u32 id, void *ctxt) { struct hs_scratch *scratch = ctxt; struct RoseContext *tctx = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; @@ -551,7 +531,7 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { } size_t match_len = end - start + 1; - rv = roseProcessMatch(t, scratch, real_end, match_len, id); + rv = roseProcessMatchInline(t, scratch, real_end, match_len, id); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups); @@ -564,30 +544,15 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { return HWLM_TERMINATE_MATCHING; } -/** - * \brief Match callback adaptor used for matches from pure-literal cases. - * - * Literal match IDs in this path run limited Rose programs that do not use - * Rose state (which is not initialised in the pure-literal path). They can - * still, for example, check lookarounds or literal masks. - */ -hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id, - void *context) { - DEBUG_PRINTF("start=%zu, end=%zu, id=%u\n", start, end, id); - struct hs_scratch *scratch = context; - struct core_info *ci = &scratch->core_info; - const u64a real_end = (u64a)end + ci->buf_offset + 1; - const u64a som = 0; - const size_t match_len = end - start + 1; - const struct RoseEngine *rose = ci->rose; - const u32 *programs = getByOffset(rose, rose->litProgramOffset); - assert(id < rose->literalCount); - const char in_anchored = 0; - const char in_catchup = 0; - const char from_mpv = 0; - const char skip_mpv_catchup = 0; - return roseRunProgram(rose, scratch, programs[id], som, real_end, match_len, - in_anchored, in_catchup, from_mpv, skip_mpv_catchup); +hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { + return roseCallback_i(start, end, id, ctxt); +} + +hwlmcb_rv_t roseFloatingCallback(size_t start, size_t end, u32 id, void *ctxt) { + struct hs_scratch *scratch = ctxt; + const struct RoseEngine *t = scratch->core_info.rose; + + return roseCallback_i(start, end, id, ctxt) & t->floating_group_mask; } /** @@ -623,13 +588,9 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, const u64a som = 0; const size_t match_len = 0; - const char in_anchored = 0; - const char in_catchup = 0; - const char from_mpv = 0; - const char skip_mpv_catchup = 0; - hwlmcb_rv_t rv = - roseRunProgram(rose, scratch, program, som, stream_offset, match_len, - in_anchored, in_catchup, from_mpv, skip_mpv_catchup); + const u8 flags = 0; + hwlmcb_rv_t rv = roseRunProgram(rose, scratch, program, som, stream_offset, + match_len, flags); if (rv == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } @@ -637,36 +598,23 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, return MO_CONTINUE_MATCHING; } -static really_inline -int roseReportAdaptor_i(u64a som, u64a offset, ReportID id, void *context) { +int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) { struct hs_scratch *scratch = context; assert(scratch && scratch->magic == SCRATCH_MAGIC); + DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); + const struct RoseEngine *rose = scratch->core_info.rose; // Our match ID is the program offset. const u32 program = id; const size_t match_len = 0; // Unused in this path. - const char in_anchored = 0; - const char in_catchup = 0; - const char from_mpv = 0; - const char skip_mpv_catchup = 1; + const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; hwlmcb_rv_t rv = - roseRunProgram(rose, scratch, program, som, offset, match_len, - in_anchored, in_catchup, from_mpv, skip_mpv_catchup); + roseRunProgram(rose, scratch, program, start, end, match_len, flags); if (rv == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING; } - -int roseReportAdaptor(u64a offset, ReportID id, void *context) { - DEBUG_PRINTF("offset=%llu, id=%u\n", offset, id); - return roseReportAdaptor_i(0, offset, id, context); -} - -int roseReportSomAdaptor(u64a som, u64a offset, ReportID id, void *context) { - DEBUG_PRINTF("som=%llu, offset=%llu, id=%u\n", som, offset, id); - return roseReportAdaptor_i(som, offset, id, context); -} diff --git a/src/rose/match.h b/src/rose/match.h index cee32fc2..b69ff158 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -29,31 +29,34 @@ #ifndef ROSE_MATCH_H #define ROSE_MATCH_H -#include "hwlm/hwlm.h" +#include "catchup.h" #include "runtime.h" #include "scratch.h" +#include "report.h" #include "rose_common.h" #include "rose_internal.h" #include "ue2common.h" +#include "hwlm/hwlm.h" #include "nfa/nfa_api.h" #include "nfa/nfa_api_queue.h" #include "nfa/nfa_api_util.h" #include "som/som_runtime.h" #include "util/bitutils.h" +#include "util/exhaust.h" #include "util/fatbit.h" #include "util/multibit.h" /* Callbacks, defined in catchup.c */ -int roseNfaAdaptor(u64a offset, ReportID id, void *context); -int roseNfaSomAdaptor(u64a from_offset, u64a offset, ReportID id, void *context); +int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context); /* Callbacks, defined in match.c */ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctx); +hwlmcb_rv_t roseFloatingCallback(size_t start, size_t end, u32 id, void *ctx); hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, void *ctx); -int roseAnchoredCallback(u64a end, u32 id, void *ctx); +int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx); /* Common code, used all over Rose runtime */ @@ -78,7 +81,6 @@ void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, q->history = scratch->core_info.hbuf; q->hlength = scratch->core_info.hlen; q->cb = roseNfaAdaptor; - q->som_cb = roseNfaSomAdaptor; q->context = scratch; q->report_current = 0; @@ -294,4 +296,85 @@ int roseHasInFlightMatches(const struct RoseEngine *t, char *state, return 0; } +static rose_inline +hwlmcb_rv_t roseHaltIfExhausted(const struct RoseEngine *t, + struct hs_scratch *scratch) { + struct core_info *ci = &scratch->core_info; + if (isAllExhausted(t, ci->exhaustionVector)) { + ci->status |= STATUS_EXHAUSTED; + scratch->tctxt.groups = 0; + DEBUG_PRINTF("all exhausted, termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + return HWLM_CONTINUE_MATCHING; +} + +static really_inline +hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 qi, s64a loc, + char is_mpv, char in_catchup) { + struct RoseContext *tctxt = &scratch->tctxt; + u8 *aa = getActiveLeafArray(t, scratch->core_info.state); + struct fatbit *activeQueues = scratch->aqa; + u32 aaCount = t->activeArrayCount; + u32 qCount = t->queueCount; + + struct mq *q = &scratch->queues[qi]; + DEBUG_PRINTF("qcl %lld, loc: %lld, min (non mpv) match offset: %llu\n", + q_cur_loc(q), loc, tctxt->minNonMpvMatchOffset); + if (q_cur_loc(q) == loc) { + /* too many tops enqueued at the one spot; need to flatten this queue. + * We can use the full catchups as it will short circuit as we are + * already at this location. It also saves waking everybody up */ + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExec(q->nfa, q, loc); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else if (!in_catchup) { + if (is_mpv) { + tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ + if (loc + scratch->core_info.buf_offset + <= tctxt->minNonMpvMatchOffset) { + DEBUG_PRINTF("flushing chained\n"); + if (roseCatchUpMPV(t, loc, scratch) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + goto done_queue_empty; + } + } + + if (roseCatchUpTo(t, scratch, loc + scratch->core_info.buf_offset) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } else { + /* we must be a chained nfa */ + assert(is_mpv); + DEBUG_PRINTF("flushing chained\n"); + tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ + if (roseCatchUpMPV(t, loc, scratch) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } +done_queue_empty: + if (!mmbit_set(aa, aaCount, qi)) { + initQueue(q, qi, t, scratch); + nfaQueueInitState(q->nfa, q); + pushQueueAt(q, 0, MQE_START, loc); + fatbit_set(activeQueues, qCount, qi); + } + + assert(!isQueueFull(q)); + + return roseHaltIfExhausted(t, scratch); +} + +static rose_inline +hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 qi, s64a loc) { + return ensureQueueFlushed_i(t, scratch, qi, loc, 0, 0); +} + #endif diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c new file mode 100644 index 00000000..23532d40 --- /dev/null +++ b/src/rose/program_runtime.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Rose runtime: program interpreter. + */ + +#include "program_runtime.h" + +int roseNfaEarliestSom(u64a start, UNUSED u64a end, UNUSED ReportID id, + void *context) { + assert(context); + u64a *som = context; + *som = MIN(*som, start); + return MO_CONTINUE_MATCHING; +} + +hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 programOffset, + u64a som, u64a end, size_t match_len, + u8 prog_flags) { + return roseRunProgram_i(t, scratch, programOffset, som, end, match_len, + prog_flags); +} diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 78397070..8bf41715 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -26,6 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/** + * \file + * \brief Rose runtime: program interpreter. + */ + #ifndef PROGRAM_RUNTIME_H #define PROGRAM_RUNTIME_H @@ -39,13 +44,32 @@ #include "rose_internal.h" #include "rose_program.h" #include "rose_types.h" +#include "validate_mask.h" #include "runtime.h" #include "scratch.h" #include "ue2common.h" +#include "hwlm/hwlm.h" // for hwlmcb_rv_t #include "util/compare.h" #include "util/fatbit.h" #include "util/multibit.h" +/* + * Program context flags, which control the behaviour of some instructions at + * based on runtime contexts (whether the program is triggered by the anchored + * matcher, engine catchup, etc). + */ + +#define ROSE_PROG_FLAG_IN_ANCHORED 1 +#define ROSE_PROG_FLAG_IN_CATCHUP 2 +#define ROSE_PROG_FLAG_FROM_MPV 4 +#define ROSE_PROG_FLAG_SKIP_MPV_CATCHUP 8 + +hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 programOffset, + u64a som, u64a end, size_t match_len, u8 prog_flags); + +/* Inline implementation follows. */ + static rose_inline int roseCheckBenefits(const struct core_info *ci, u64a end, u32 mask_rewind, const u8 *and_mask, const u8 *exp_mask) { @@ -141,6 +165,33 @@ void rosePushDelayedMatch(const struct RoseEngine *t, fatbit_set(slot, delay_count, delay_index); } +static rose_inline +void recordAnchoredLiteralMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 literal_id, + u64a end) { + assert(end); + + if (end <= t->floatingMinLiteralMatchOffset) { + return; + } + + struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch); + + DEBUG_PRINTF("record %u @ %llu\n", literal_id, end); + + if (!bf64_set(&scratch->al_log_sum, end - 1)) { + // first time, clear row + DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count); + fatbit_clear(anchoredLiteralRows[end - 1]); + } + + u32 rel_idx = literal_id - t->anchored_base_id; + DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx, + t->anchored_count); + assert(rel_idx < t->anchored_count); + fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx); +} + static rose_inline char roseLeftfixCheckMiracles(const struct RoseEngine *t, const struct LeftNfaInfo *left, @@ -208,87 +259,6 @@ found_miracle: return 1; } -static rose_inline -hwlmcb_rv_t roseHaltIfExhausted(const struct RoseEngine *t, - struct hs_scratch *scratch) { - struct core_info *ci = &scratch->core_info; - if (isAllExhausted(t, ci->exhaustionVector)) { - ci->status |= STATUS_EXHAUSTED; - scratch->tctxt.groups = 0; - DEBUG_PRINTF("all exhausted, termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - return HWLM_CONTINUE_MATCHING; -} - -static really_inline -hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 qi, s64a loc, - char is_mpv, char in_catchup) { - struct RoseContext *tctxt = &scratch->tctxt; - u8 *aa = getActiveLeafArray(t, scratch->core_info.state); - struct fatbit *activeQueues = scratch->aqa; - u32 aaCount = t->activeArrayCount; - u32 qCount = t->queueCount; - - struct mq *q = &scratch->queues[qi]; - DEBUG_PRINTF("qcl %lld, loc: %lld, min (non mpv) match offset: %llu\n", - q_cur_loc(q), loc, tctxt->minNonMpvMatchOffset); - if (q_cur_loc(q) == loc) { - /* too many tops enqueued at the one spot; need to flatten this queue. - * We can use the full catchups as it will short circuit as we are - * already at this location. It also saves waking everybody up */ - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExec(q->nfa, q, loc); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } else if (!in_catchup) { - if (is_mpv) { - tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ - if (loc + scratch->core_info.buf_offset - <= tctxt->minNonMpvMatchOffset) { - DEBUG_PRINTF("flushing chained\n"); - if (roseCatchUpMPV(t, loc, scratch) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - goto done_queue_empty; - } - } - - if (roseCatchUpTo(t, scratch, loc + scratch->core_info.buf_offset) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } else { - /* we must be a chained nfa */ - assert(is_mpv); - DEBUG_PRINTF("flushing chained\n"); - tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ - if (roseCatchUpMPV(t, loc, scratch) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } -done_queue_empty: - if (!mmbit_set(aa, aaCount, qi)) { - initQueue(q, qi, t, scratch); - nfaQueueInitState(q->nfa, q); - pushQueueAt(q, 0, MQE_START, loc); - fatbit_set(activeQueues, qCount, qi); - } - - assert(!isQueueFull(q)); - - return roseHaltIfExhausted(t, scratch); -} - -static rose_inline -hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 qi, s64a loc) { - return ensureQueueFlushed_i(t, scratch, qi, loc, 0, 0); -} - static rose_inline hwlmcb_rv_t roseTriggerSuffix(const struct RoseEngine *t, struct hs_scratch *scratch, u32 qi, u32 top, @@ -424,7 +394,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, } s64a loc = (s64a)end - ci->buf_offset - leftfixLag; - assert(loc >= q_cur_loc(q)); + assert(loc >= q_cur_loc(q) || left->eager); assert(leftfixReport != MO_INVALID_IDX); if (!is_infix && left->transient) { @@ -471,7 +441,13 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch, DEBUG_PRINTF("checking for report %u\n", leftfixReport); DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); return rv == MO_MATCHES_PENDING; + } else if (q_cur_loc(q) > loc) { + /* an eager leftfix may have already progressed past loc if there is no + * match at loc. */ + assert(left->eager); + return 0; } else { + assert(q_cur_loc(q) == loc); DEBUG_PRINTF("checking for report %u\n", leftfixReport); char rv = nfaInAcceptState(q->nfa, leftfixReport, q); DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); @@ -660,6 +636,153 @@ int reachHasBit(const u8 *reach, u8 c) { return !!(reach[c / 8U] & (u8)1U << (c % 8U)); } +/* + * Generate a 8-byte valid_mask with #high bytes 0 from the highest side + * and #low bytes 0 from the lowest side + * and (8 - high - low) bytes '0xff' in the middle. + */ +static rose_inline +u64a generateValidMask(const s32 high, const s32 low) { + assert(high + low < 8); + DEBUG_PRINTF("high %d low %d\n", high, low); + const u64a ones = ~0ull; + return (ones << ((high + low) * 8)) >> (high * 8); +} + +/* + * Do the single-byte check if only one lookaround entry exists + * and it's a single mask. + * Return success if the byte is in the future or before history + * (offset is greater than (history) buffer length). + */ +static rose_inline +int roseCheckByte(const struct core_info *ci, u8 and_mask, u8 cmp_mask, + u8 negation, s32 checkOffset, u64a end) { + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + const s64a base_offset = end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset); + u8 c; + if (offset >= 0) { + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("in the future\n"); + return 1; + } else { + assert(offset < (s64a)ci->len); + DEBUG_PRINTF("check byte in buffer\n"); + c = ci->buf[offset]; + } + } else { + if (offset >= -(s64a) ci->hlen) { + DEBUG_PRINTF("check byte in history\n"); + c = ci->hbuf[ci->hlen + offset]; + } else { + DEBUG_PRINTF("before history and return\n"); + return 1; + } + } + + if (((and_mask & c) != cmp_mask) ^ negation) { + DEBUG_PRINTF("char 0x%02x at offset %lld failed byte check\n", + c, offset); + return 0; + } + + DEBUG_PRINTF("real offset=%lld char=%02x\n", offset, c); + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +static rose_inline +int roseCheckMask(const struct core_info *ci, u64a and_mask, u64a cmp_mask, + u64a neg_mask, s32 checkOffset, u64a end) { + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("rel offset %lld\n",base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u64a data = 0; + u64a valid_data_mask = ~0ULL; // mask for validate check. + //A 0xff byte means that this byte is in the buffer. + s32 shift_l = 0; // size of bytes in the future. + s32 shift_r = 0; // size of bytes before the history. + s32 h_len = 0; // size of bytes in the history buffer. + s32 c_len = 8; // size of bytes in the current buffer. + if (offset < 0) { + // in or before history buffer. + if (offset + 8 <= -(s64a)ci->hlen) { + DEBUG_PRINTF("before history and return\n"); + return 1; + } + const u8 *h_start = ci->hbuf; // start pointer in history buffer. + if (offset < -(s64a)ci->hlen) { + // some bytes are before history. + shift_r = -(offset + (s64a)ci->hlen); + DEBUG_PRINTF("shift_r %d", shift_r); + } else { + h_start += ci->hlen + offset; + } + if (offset + 7 < 0) { + DEBUG_PRINTF("all in history buffer\n"); + data = partial_load_u64a(h_start, 8 - shift_r); + } else { + // history part + c_len = offset + 8; + h_len = -offset - shift_r; + DEBUG_PRINTF("%d bytes in history\n", h_len); + s64a data_h = 0; + data_h = partial_load_u64a(h_start, h_len); + // current part + if (c_len > (s64a)ci->len) { + shift_l = c_len - ci->len; + c_len = ci->len; + } + data = partial_load_u64a(ci->buf, c_len); + data <<= h_len << 3; + data |= data_h; + } + if (shift_r) { + data <<= shift_r << 3; + } + } else { + // current buffer. + if (offset + c_len > (s64a)ci->len) { + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("all in the future\n"); + return 1; + } + // some bytes in the future. + shift_l = offset + c_len - ci->len; + c_len = ci->len - offset; + data = partial_load_u64a(ci->buf + offset, c_len); + } else { + data = unaligned_load_u64a(ci->buf + offset); + } + } + + if (shift_l || shift_r) { + valid_data_mask = generateValidMask(shift_l, shift_r); + } + DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask); + + if (validateMask(data, valid_data_mask, + and_mask, cmp_mask, neg_mask)) { + DEBUG_PRINTF("check mask successfully\n"); + return 1; + } else { + return 0; + } +} /** * \brief Scan around a literal, checking that that "lookaround" reach masks * are satisfied. @@ -754,13 +877,7 @@ int roseCheckLookaround(const struct RoseEngine *t, return 1; } -static -int roseNfaEarliestSom(u64a from_offset, UNUSED u64a offset, UNUSED ReportID id, - void *context) { - u64a *som = context; - *som = MIN(*som, from_offset); - return MO_CONTINUE_MATCHING; -} +int roseNfaEarliestSom(u64a start, u64a end, ReportID id, void *context); static rose_inline u64a roseGetHaigSom(const struct RoseEngine *t, struct hs_scratch *scratch, @@ -780,13 +897,13 @@ u64a roseGetHaigSom(const struct RoseEngine *t, struct hs_scratch *scratch, u64a start = ~0ULL; /* switch the callback + context for a fun one */ - q->som_cb = roseNfaEarliestSom; + q->cb = roseNfaEarliestSom; q->context = &start; nfaReportCurrentMatches(q->nfa, q); /* restore the old callback + context */ - q->som_cb = roseNfaSomAdaptor; + q->cb = roseNfaAdaptor; q->context = NULL; DEBUG_PRINTF("earliest som is %llu\n", start); return start; @@ -800,6 +917,144 @@ char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) { return end >= min_bound && end <= max_bound; } +static rose_inline +hwlmcb_rv_t roseEnginesEod(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a offset, + u32 iter_offset) { + const char is_streaming = rose->mode != HS_MODE_BLOCK; + + /* data, len is used for state decompress, should be full available data */ + u8 key = 0; + if (is_streaming) { + const u8 *eod_data = scratch->core_info.hbuf; + size_t eod_len = scratch->core_info.hlen; + key = eod_len ? eod_data[eod_len - 1] : 0; + } + + const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state); + const u32 aaCount = rose->activeArrayCount; + + const struct mmbit_sparse_iter *it = getByOffset(rose, iter_offset); + assert(ISALIGNED(it)); + + u32 idx = 0; + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state); + qi != MMB_INVALID; + qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) { + DEBUG_PRINTF("checking nfa %u\n", qi); + struct mq *q = scratch->queues + qi; + assert(q->nfa == getNfaByQueue(rose, qi)); + assert(nfaAcceptsEod(q->nfa)); + + if (is_streaming) { + // Decompress stream state. + nfaExpandState(q->nfa, q->state, q->streamState, offset, key); + } + + if (nfaCheckFinalState(q->nfa, q->state, q->streamState, offset, + roseReportAdaptor, + scratch) == MO_HALT_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return HWLM_TERMINATE_MATCHING; + } + } + + return HWLM_CONTINUE_MATCHING; +} + +static rose_inline +hwlmcb_rv_t roseSuffixesEod(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a offset) { + const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state); + const u32 aaCount = rose->activeArrayCount; + + for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID; + qi = mmbit_iterate(aa, aaCount, qi)) { + DEBUG_PRINTF("checking nfa %u\n", qi); + struct mq *q = scratch->queues + qi; + assert(q->nfa == getNfaByQueue(rose, qi)); + assert(nfaAcceptsEod(q->nfa)); + + /* We have just been triggered. */ + assert(fatbit_isset(scratch->aqa, rose->queueCount, qi)); + + pushQueueNoMerge(q, MQE_END, scratch->core_info.len); + q->context = NULL; + + /* rose exec is used as we don't want to / can't raise matches in the + * history buffer. */ + if (!nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX)) { + DEBUG_PRINTF("nfa is dead\n"); + continue; + } + if (nfaCheckFinalState(q->nfa, q->state, q->streamState, offset, + roseReportAdaptor, + scratch) == MO_HALT_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return HWLM_TERMINATE_MATCHING; + } + } + return HWLM_CONTINUE_MATCHING; +} + +static rose_inline +hwlmcb_rv_t roseMatcherEod(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a offset) { + assert(rose->ematcherOffset); + assert(rose->ematcherRegionSize); + + // Clear role state and active engines, since we have already handled all + // outstanding work there. + DEBUG_PRINTF("clear role state and active leaf array\n"); + char *state = scratch->core_info.state; + mmbit_clear(getRoleState(state), rose->rolesWithStateCount); + mmbit_clear(getActiveLeafArray(rose, state), rose->activeArrayCount); + + const char is_streaming = rose->mode != HS_MODE_BLOCK; + + size_t eod_len; + const u8 *eod_data; + if (!is_streaming) { /* Block */ + eod_data = scratch->core_info.buf; + eod_len = scratch->core_info.len; + } else { /* Streaming */ + eod_len = scratch->core_info.hlen; + eod_data = scratch->core_info.hbuf; + } + + assert(eod_data); + assert(eod_len); + + DEBUG_PRINTF("%zu bytes of eod data to scan at offset %llu\n", eod_len, + offset); + + // If we don't have enough bytes to produce a match from an EOD table scan, + // there's no point scanning. + if (eod_len < rose->eodmatcherMinWidth) { + DEBUG_PRINTF("too short for min width %u\n", rose->eodmatcherMinWidth); + return HWLM_CONTINUE_MATCHING; + } + + // Ensure that we only need scan the last N bytes, where N is the length of + // the eod-anchored matcher region. + size_t adj = eod_len - MIN(eod_len, rose->ematcherRegionSize); + + const struct HWLM *etable = getByOffset(rose, rose->ematcherOffset); + hwlmExec(etable, eod_data, eod_len, adj, roseCallback, scratch, + scratch->tctxt.groups); + + // We may need to fire delayed matches. + if (cleanUpDelayed(rose, scratch, 0, offset) == HWLM_TERMINATE_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return HWLM_TERMINATE_MATCHING; + } + + roseFlushLastByteHistory(rose, scratch, offset); + return HWLM_CONTINUE_MATCHING; +} + static void updateSeqPoint(struct RoseContext *tctxt, u64a offset, const char from_mpv) { @@ -823,16 +1078,21 @@ void updateSeqPoint(struct RoseContext *tctxt, u64a offset, } static rose_inline -hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 programOffset, - u64a som, u64a end, size_t match_len, - char in_anchored, char in_catchup, char from_mpv, - char skip_mpv_catchup) { - DEBUG_PRINTF("program=%u, offsets [%llu,%llu]\n", programOffset, som, end); +hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 programOffset, + u64a som, u64a end, size_t match_len, + u8 prog_flags) { + DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, + som, end, prog_flags); assert(programOffset >= sizeof(struct RoseEngine)); assert(programOffset < t->size); + const char in_anchored = prog_flags & ROSE_PROG_FLAG_IN_ANCHORED; + const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP; + const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV; + const char skip_mpv_catchup = prog_flags & ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; + const char *pc_base = getByOffset(t, programOffset); const char *pc = pc_base; @@ -880,9 +1140,9 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_LIT_EARLY) { - if (end < t->floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("halt: too soon, min offset=%u\n", - t->floatingMinLiteralMatchOffset); + if (end < ri->min_offset) { + DEBUG_PRINTF("halt: before min_offset=%u\n", + ri->min_offset); return HWLM_CONTINUE_MATCHING; } } @@ -941,6 +1201,30 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_MASK) { + struct core_info *ci = &scratch->core_info; + if (!roseCheckMask(ci, ri->and_mask, ri->cmp_mask, + ri->neg_mask, ri->offset, end)) { + DEBUG_PRINTF("failed mask check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_BYTE) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask, + ri->negation, ri->offset, end)) { + DEBUG_PRINTF("failed byte check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_INFIX) { if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report, end)) { @@ -968,6 +1252,11 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(RECORD_ANCHORED) { + recordAnchoredLiteralMatch(t, scratch, ri->id, end); + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CATCH_UP) { if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; @@ -1301,6 +1590,30 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ENGINES_EOD) { + if (roseEnginesEod(t, scratch, end, ri->iter_offset) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SUFFIXES_EOD) { + if (roseSuffixesEod(t, scratch, end) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(MATCHER_EOD) { + if (roseMatcherEod(t, scratch, end) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(END) { DEBUG_PRINTF("finished\n"); return HWLM_CONTINUE_MATCHING; diff --git a/src/rose/rose.h b/src/rose/rose.h index e90d2f21..9a50f0e9 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -29,106 +29,26 @@ #ifndef ROSE_H #define ROSE_H -#include "rose_types.h" -#include "rose_internal.h" -#include "runtime.h" -#include "scratch.h" #include "ue2common.h" -#include "util/multibit.h" + +struct RoseEngine; +struct hs_scratch; // Initialise state space for engine use. void roseInitState(const struct RoseEngine *t, char *state); -void roseBlockEodExec(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch); -void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch); - -static really_inline -int roseBlockHasEodWork(const struct RoseEngine *t, - struct hs_scratch *scratch) { - if (t->ematcherOffset) { - DEBUG_PRINTF("eod matcher to run\n"); - return 1; - } - - if (t->eodProgramOffset) { - DEBUG_PRINTF("has eod program\n"); - return 1; - } - - void *state = scratch->core_info.state; - if (mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { - DEBUG_PRINTF("active outfix/suffix engines\n"); - return 1; - } - - if (t->eodIterOffset) { - u32 idx; - const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset); - struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; - if (mmbit_sparse_iter_begin(getRoleState(state), t->rolesWithStateCount, - &idx, it, si_state) != MMB_INVALID) { - DEBUG_PRINTF("eod iter has states on\n"); - return 1; - } - } - - return 0; -} - /* assumes core_info in scratch has been init to point to data */ -static really_inline -void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { - assert(t); - assert(scratch); - assert(scratch->core_info.buf); - - // We should not have been called if we've already been told to terminate - // matching. - assert(!told_to_stop_matching(scratch)); - - // If this block is shorter than our minimum width, then no pattern in this - // RoseEngine could match. - /* minWidth checks should have already been performed by the caller */ - const size_t length = scratch->core_info.len; - assert(length >= t->minWidth); - - // Similarly, we may have a maximum width (for engines constructed entirely - // of bi-anchored patterns). - /* This check is now handled by the interpreter */ - assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF - || length <= t->maxBiAnchoredWidth); - - roseBlockExec_i(t, scratch); - - if (!t->requiresEodCheck) { - return; - } - - if (can_stop_matching(scratch)) { - DEBUG_PRINTF("bailing, already halted\n"); - return; - } - - if (!roseBlockHasEodWork(t, scratch)) { - DEBUG_PRINTF("no eod work\n"); - return; - } - - roseBlockEodExec(t, length, scratch); -} +void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch); /* assumes core_info in scratch has been init to point to data */ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch); -void roseEodExec(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch); +void roseStreamEodExec(const struct RoseEngine *t, u64a offset, + struct hs_scratch *scratch); -hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id, - void *context); +hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *context); -int roseReportAdaptor(u64a offset, ReportID id, void *context); -int roseReportSomAdaptor(u64a som, u64a offset, ReportID id, void *context); +int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context); int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, u64a stream_offset, struct hs_scratch *scratch); diff --git a/src/rose/rose_build.h b/src/rose/rose_build.h index bef2114f..c71671fa 100644 --- a/src/rose/rose_build.h +++ b/src/rose/rose_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -65,6 +65,7 @@ struct raw_som_dfa; class CharReach; class NGHolder; class ReportManager; +class SmallWriteBuild; class SomSlotManager; class RoseDedupeAux { @@ -128,6 +129,7 @@ public: // Construct a usable Rose builder. std::unique_ptr makeRoseBuilder(ReportManager &rm, SomSlotManager &ssm, + SmallWriteBuild &smwr, const CompileContext &cc, const BoundaryReports &boundary); @@ -140,9 +142,6 @@ size_t roseSize(const RoseEngine *t); * intended to indicate a lightweight rose. */ u32 roseQuality(const RoseEngine *t); -ue2::aligned_unique_ptr -roseAddSmallWrite(const RoseEngine *t, const SmallWriteEngine *smwr); - bool roseIsPureLiteral(const RoseEngine *t); size_t maxOverlap(const ue2_literal &a, const ue2_literal &b, u32 b_delay); diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 23c122a7..0f0e8d18 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -315,7 +315,7 @@ void createVertices(RoseBuildImpl *tbi, w = created[key]; } - NFAVertex p = pv.first; + RoseVertex p = pv.first; RoseEdge e; bool added; @@ -375,7 +375,7 @@ void createVertices(RoseBuildImpl *tbi, /* ensure the holder does not accept any paths which do not end with lit */ static void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { - DEBUG_PRINTF("strip '%s'\n", ((const string &)lit).c_str()); + DEBUG_PRINTF("strip '%s'\n", dumpString(lit).c_str()); set curr, next; curr.insert(g.accept); curr.insert(g.acceptEod); @@ -418,6 +418,7 @@ void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { } pruneUseless(g); + clearReports(g); assert(in_degree(g.accept, g) || in_degree(g.acceptEod, g) > 1); assert(allMatchStatesHaveReports(g)); @@ -651,26 +652,93 @@ floating: } static -unique_ptr makeRoseEodPrefix(const NGHolder &h, - ReportID prefix_report) { +unique_ptr makeRoseEodPrefix(const NGHolder &h, RoseBuildImpl &build, + map, ReportID> &remap) { assert(generates_callbacks(h)); - auto g = cloneHolder(h); - g->kind = is_triggered(h) ? NFA_INFIX : NFA_PREFIX; - setReportId(*g, prefix_report); + assert(!in_degree(h.accept, h)); + auto gg = cloneHolder(h); + NGHolder &g = *gg; + g.kind = is_triggered(h) ? NFA_INFIX : NFA_PREFIX; // Move acceptEod edges over to accept. vector dead; - for (const auto &e : in_edges_range(g->acceptEod, *g)) { - NFAVertex u = source(e, *g); - if (u == g->accept) { + for (const auto &e : in_edges_range(g.acceptEod, g)) { + NFAVertex u = source(e, g); + if (u == g.accept) { continue; } - add_edge_if_not_present(u, g->accept, *g); + add_edge_if_not_present(u, g.accept, g); dead.push_back(e); + + if (!contains(remap, g[u].reports)) { + remap[g[u].reports] = build.getNewNfaReport(); + } + + g[u].reports = { remap[g[u].reports] }; } - remove_edges(dead, *g); - return g; + remove_edges(dead, g); + return gg; +} + +static +u32 getEodEventID(RoseBuildImpl &build) { + // Allocate the EOD event if it hasn't been already. + if (build.eod_event_literal_id == MO_INVALID_IDX) { + build.eod_event_literal_id = build.getLiteralId({}, 0, ROSE_EVENT); + } + + return build.eod_event_literal_id; +} + +static +void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, + const NGHolder &h) { + assert(!build.isInETable(u)); + + RoseGraph &g = build.g; + map, ReportID> report_remap; + shared_ptr eod_leftfix + = makeRoseEodPrefix(h, build, report_remap); + + u32 eod_event = getEodEventID(build); + + for (const auto &report_mapping : report_remap) { + RoseVertex v = add_vertex(g); + g[v].idx = build.vertexIndex++; + g[v].literals.insert(eod_event); + build.literal_info[eod_event].vertices.insert(v); + + g[v].left.graph = eod_leftfix; + g[v].left.leftfix_report = report_mapping.second; + g[v].left.lag = 0; + RoseEdge e1 = add_edge(u, v, g).first; + g[e1].minBound = 0; + g[e1].maxBound = ROSE_BOUND_INF; + g[v].min_offset = add_rose_depth(g[u].min_offset, + findMinWidth(*g[v].left.graph)); + g[v].max_offset = ROSE_BOUND_INF; + + depth max_width = findMaxWidth(*g[v].left.graph); + if (u != build.root && max_width.is_finite() + && (!build.isAnyStart(u) || isPureAnchored(*g[v].left.graph))) { + g[e1].maxBound = max_width; + g[v].max_offset = add_rose_depth(g[u].max_offset, max_width); + } + + g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix + RoseVertex w = add_vertex(g); + g[w].idx = build.vertexIndex++; + g[w].eod_accept = true; + g[w].reports = report_mapping.first; + g[w].min_offset = g[v].min_offset; + g[w].max_offset = g[v].max_offset; + RoseEdge e = add_edge(v, w, g).first; + g[e].minBound = 0; + g[e].maxBound = 0; + g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; + DEBUG_PRINTF("accept eod vertex (idx=%zu)\n", g[w].idx); + } } static @@ -686,8 +754,20 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, RoseVertex u = pv.first; const RoseInEdgeProps &edge_props = bd.ig[pv.second]; + /* We need to duplicate the parent vertices if: + * + * 1) It already has a suffix, etc as we are going to add the specified + * suffix, etc to the parents and we do not want to overwrite the + * existing information. + * + * 2) We are making the an EOD accept and the vertex already has other + * out-edges - The LAST_BYTE history used for EOD accepts is + * incompatible with normal successors. As accepts are processed last we + * do not need to worry about other normal successors being added later. + */ if (g[u].suffix || !g[u].reports.empty() - /* also poss accept eod edge: TODO check properly */ + || (ig[iv].type == RIV_ACCEPT_EOD && out_degree(u, g) + && !edge_props.graph) || (!isLeafNode(u, g) && !tbi->isAnyStart(u))) { DEBUG_PRINTF("duplicating for parent %zu\n", g[u].idx); assert(!tbi->isAnyStart(u)); @@ -719,74 +799,37 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, } } else { assert(ig[iv].type == RIV_ACCEPT_EOD); + assert(!edge_props.haig); - if (edge_props.graph && tbi->isInETable(u)) { + if (!edge_props.graph) { + RoseVertex w = add_vertex(g); + g[w].idx = tbi->vertexIndex++; + g[w].eod_accept = true; + g[w].reports = ig[iv].reports; + g[w].min_offset = g[u].min_offset; + g[w].max_offset = g[u].max_offset; + RoseEdge e = add_edge(u, w, g).first; + g[e].minBound = 0; + g[e].maxBound = 0; + g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; + DEBUG_PRINTF("accept eod vertex (idx=%zu)\n", g[w].idx); + continue; + } + + const NGHolder &h = *edge_props.graph; + assert(!in_degree(h.accept, h)); + assert(generates_callbacks(h)); + + if (tbi->isInETable(u)) { + assert(h.kind == NFA_SUFFIX); assert(!tbi->isAnyStart(u)); /* etable can't/shouldn't use eod event */ DEBUG_PRINTF("adding suffix to i%zu\n", g[u].idx); g[u].suffix.graph = edge_props.graph; - assert(g[u].suffix.graph->kind == NFA_SUFFIX); - dumpHolder(*g[u].suffix.graph, 98, "eod_suffix", tbi->cc.grey); - assert(!in_degree(g[u].suffix.graph->accept, - *g[u].suffix.graph)); - set reports = all_reports(*g[u].suffix.graph); - tbi->rm.getReport(*reports.begin()); - assert(reports.size() == 1); - /* TODO: set dfa_(min|max)_width */ continue; - } else if (edge_props.graph) { - assert(!edge_props.haig); - assert(!tbi->isInETable(u)); - - // Allocate the EOD event if it hasn't been already. - if (tbi->eod_event_literal_id == MO_INVALID_IDX) { - tbi->eod_event_literal_id = - tbi->getLiteralId(ue2_literal(), 0, ROSE_EVENT); - } - - RoseVertex v = add_vertex(g); - g[v].idx = tbi->vertexIndex++; - g[v].literals.insert(tbi->eod_event_literal_id); - tbi->literal_info[tbi->eod_event_literal_id].vertices.insert(v); - - ReportID prefix_report = tbi->getNewNfaReport(); - g[v].left.graph - = makeRoseEodPrefix(*edge_props.graph, prefix_report); - g[v].left.leftfix_report = prefix_report; - g[v].left.lag = 0; - RoseEdge e1 = add_edge(u, v, g).first; - g[e1].minBound = 0; - g[e1].maxBound = ROSE_BOUND_INF; - g[v].min_offset = add_rose_depth( - g[u].min_offset, findMinWidth(*g[v].left.graph)); - g[v].max_offset = ROSE_BOUND_INF; - - DEBUG_PRINTF("hi\n"); - depth max_width = findMaxWidth(*g[v].left.graph); - if (u != tbi->root - && max_width.is_finite() - && (!tbi->isAnyStart(u) - || isPureAnchored(*g[v].left.graph))) { - g[e1].maxBound = max_width; - g[v].max_offset = add_rose_depth(g[u].max_offset, max_width); - } - - g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix - u = v; } - assert(!edge_props.haig); - RoseVertex w = add_vertex(g); - g[w].idx = tbi->vertexIndex++; - g[w].eod_accept = true; - g[w].reports = ig[iv].reports; - g[w].min_offset = g[u].min_offset; - g[w].max_offset = g[u].max_offset; - RoseEdge e = add_edge(u, w, g).first; - g[e].minBound = 0; - g[e].maxBound = 0; - g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; - DEBUG_PRINTF("accept eod vertex (idx=%zu)\n", g[w].idx); + makeEodEventLeftfix(*tbi, u, h); } } } @@ -887,7 +930,8 @@ bool suitableForEod(const RoseInGraph &ig, vector topo, ENSURE_AT_LEAST(&v_depth, (u32)max_width); } - if (v_depth == ROSE_BOUND_INF || v_depth > cc.grey.maxHistoryAvailable) { + if (v_depth == ROSE_BOUND_INF + || v_depth > cc.grey.maxHistoryAvailable) { DEBUG_PRINTF("not suitable for eod table %u\n", v_depth); return false; } @@ -900,6 +944,13 @@ bool suitableForEod(const RoseInGraph &ig, vector topo, return true; } +static +void shift_accepts_to_end(const RoseInGraph &ig, + vector &topo_order) { + stable_partition(begin(topo_order), end(topo_order), + [&](RoseInVertex v){ return !is_any_accept(v, ig); }); +} + static void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) { const RoseInGraph &ig = bd.ig; @@ -912,6 +963,7 @@ void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) { map > vertex_map; vector v_order = topo_order(ig); + shift_accepts_to_end(ig, v_order); u32 eod_space_required; bool use_eod_table = suitableForEod(ig, v_order, &eod_space_required, @@ -943,7 +995,7 @@ void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) { const vector &images = vertex_map[u]; // We should have no dupes. - assert(set(images.begin(), images.end()).size() + assert(set(images.begin(), images.end()).size() == images.size()); for (auto v_image : images) { @@ -1038,6 +1090,7 @@ bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h, return false; } break; + case NFA_EAGER_PREFIX: case NFA_REV_PREFIX: case NFA_OUTFIX_RAW: DEBUG_PRINTF("kind %u\n", (u32)h.kind); @@ -1133,7 +1186,7 @@ u32 maxAvailableDelay(const ue2_literal &pred_key, const ue2_literal &lit_key) { } static -u32 findMaxSafeDelay(const RoseInGraph &ig, RoseInVertex u, RoseVertex v) { +u32 findMaxSafeDelay(const RoseInGraph &ig, RoseInVertex u, RoseInVertex v) { // First, check the overlap constraints on (u,v). size_t max_delay; if (ig[v].type == RIV_LITERAL) { diff --git a/src/rose/rose_build_add_mask.cpp b/src/rose/rose_build_add_mask.cpp index d8eb939a..45333a38 100644 --- a/src/rose/rose_build_add_mask.cpp +++ b/src/rose/rose_build_add_mask.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -336,7 +336,8 @@ void buildLiteralMask(const vector &mask, vector &msk, } static -bool validateTransientMask(const vector &mask, bool eod, const Grey &grey) { +bool validateTransientMask(const vector &mask, bool anchored, + bool eod, const Grey &grey) { assert(!mask.empty()); // An EOD anchored mask requires that everything fit into history, while an @@ -348,6 +349,12 @@ bool validateTransientMask(const vector &mask, bool eod, const Grey & return false; } + /* although anchored masks cannot be transient, short masks may be placed + * into the atable. */ + if (anchored && mask.size() > grey.maxAnchoredRegion) { + return false; + } + vector lits; u32 lit_minBound; /* minBound of each literal in lit */ u32 lit_length; /* length of each literal in lit */ @@ -703,7 +710,7 @@ bool checkAllowMask(const vector &mask, ue2_literal *lit, bool RoseBuildImpl::add(bool anchored, const vector &mask, const ue2::flat_set &reports) { - if (validateTransientMask(mask, false, cc.grey)) { + if (validateTransientMask(mask, anchored, false, cc.grey)) { bool eod = false; addTransientMask(*this, mask, reports, anchored, eod); return true; @@ -726,8 +733,8 @@ bool RoseBuildImpl::add(bool anchored, const vector &mask, bool RoseBuildImpl::validateMask(const vector &mask, UNUSED const ue2::flat_set &reports, - UNUSED bool anchored, bool eod) const { - return validateTransientMask(mask, eod, cc.grey); + bool anchored, bool eod) const { + return validateTransientMask(mask, anchored, eod, cc.grey); } static diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 35ff7138..60732ff9 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -173,34 +173,54 @@ void mergeAnchoredDfas(vector> &dfas, } static -void translateReportSet(flat_set *rset, const RoseBuildImpl &tbi) { - flat_set old; - old.swap(*rset); - for (auto report_id : old) { - DEBUG_PRINTF("updating %u -> %u\n", report_id, - tbi.literal_info[report_id].final_id); - rset->insert(tbi.literal_info[report_id].final_id); - } -} - -static -void remapAnchoredReports(raw_dfa &dfa, const RoseBuildImpl &tbi) { - for (dstate &ds : dfa.states) { - translateReportSet(&ds.reports, tbi); - translateReportSet(&ds.reports_eod, tbi); - } -} - -/* Replaces the report ids currently in the dfas (rose graph literal ids) with - * the final id used by the runtime. */ -static -void remapAnchoredReports(RoseBuildImpl &tbi) { - for (auto it = tbi.anchored_nfas.begin(); it != tbi.anchored_nfas.end(); - ++it) { - for (auto &rdfa : it->second) { - assert(rdfa); - remapAnchoredReports(*rdfa, tbi); +void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) { + for (dstate &ds : rdfa.states) { + assert(ds.reports_eod.empty()); // Not used in anchored matcher. + if (ds.reports.empty()) { + continue; } + + flat_set new_reports; + for (auto id : ds.reports) { + assert(id < build.literal_info.size()); + new_reports.insert(build.literal_info.at(id).final_id); + } + ds.reports = move(new_reports); + } +} + +/** + * \brief Replaces the report ids currently in the dfas (rose graph literal + * ids) with the final id for each literal. + */ +static +void remapAnchoredReports(RoseBuildImpl &build) { + for (auto &m : build.anchored_nfas) { + for (auto &rdfa : m.second) { + assert(rdfa); + remapAnchoredReports(*rdfa, build); + } + } +} + +/** + * \brief Replace the reports (which are literal final_ids) in the given + * raw_dfa with program offsets. + */ +static +void remapIdsToPrograms(raw_dfa &rdfa, const vector &litPrograms) { + for (dstate &ds : rdfa.states) { + assert(ds.reports_eod.empty()); // Not used in anchored matcher. + if (ds.reports.empty()) { + continue; + } + + flat_set new_reports; + for (auto id : ds.reports) { + assert(id < litPrograms.size()); + new_reports.insert(litPrograms.at(id)); + } + ds.reports = move(new_reports); } } @@ -476,7 +496,7 @@ NFAVertex extractLiteral(const NGHolder &h, ue2_literal *lit) { } if (lit_verts.empty()) { - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } bool nocase = false; @@ -488,7 +508,7 @@ NFAVertex extractLiteral(const NGHolder &h, ue2_literal *lit) { if (cr.isAlpha()) { bool cr_nocase = cr.count() != 1; if (case_set && cr_nocase != nocase) { - return NFAGraph::null_vertex(); + return NGHolder::null_vertex(); } case_set = true; @@ -511,7 +531,7 @@ bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound, DEBUG_PRINTF("looking for simple case\n"); NFAVertex lit_head = extractLiteral(h, lit); - if (lit_head == NFAGraph::null_vertex()) { + if (lit_head == NGHolder::null_vertex()) { DEBUG_PRINTF("no literal found\n"); return false; } @@ -826,7 +846,7 @@ vector buildAnchoredDfas(RoseBuildImpl &build) { aligned_unique_ptr buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, - size_t *asize) { + const vector &litPrograms, size_t *asize) { const CompileContext &cc = build.cc; if (dfas.empty()) { @@ -835,6 +855,10 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, return nullptr; } + for (auto &rdfa : dfas) { + remapIdsToPrograms(rdfa, litPrograms); + } + vector> nfas; vector start_offset; // start offset for each dfa (dots removed) size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm); diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index a5317f89..ef06fcbb 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -39,13 +39,10 @@ #include struct anchored_matcher_info; -struct RoseEngine; namespace ue2 { -class NGHolder; class RoseBuildImpl; -struct Grey; struct raw_dfa; /** @@ -56,10 +53,13 @@ std::vector buildAnchoredDfas(RoseBuildImpl &build); /** * \brief Construct an anchored_matcher_info runtime structure from the given * set of DFAs. + * + * Remap the literal final_ids used for raw_dfa reports to the program offsets + * given in litPrograms. */ aligned_unique_ptr buildAnchoredMatcher(RoseBuildImpl &build, std::vector &dfas, - size_t *asize); + const std::vector &litPrograms, size_t *asize); u32 anchoredStateSize(const anchored_matcher_info &atable); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 758dd442..56591de8 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -33,6 +33,8 @@ #include "hs_compile.h" // for HS_MODE_* #include "rose_build_add_internal.h" #include "rose_build_anchored.h" +#include "rose_build_exclusive.h" +#include "rose_build_groups.h" #include "rose_build_infix.h" #include "rose_build_lookaround.h" #include "rose_build_matchers.h" @@ -48,7 +50,11 @@ #include "nfa/nfa_api_queue.h" #include "nfa/nfa_build_util.h" #include "nfa/nfa_internal.h" +#include "nfa/shengcompile.h" #include "nfa/shufticompile.h" +#include "nfa/tamaramacompile.h" +#include "nfa/tamarama_internal.h" +#include "nfagraph/ng_execute.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_lbr.h" #include "nfagraph/ng_limex.h" @@ -59,6 +65,7 @@ #include "nfagraph/ng_stop.h" #include "nfagraph/ng_util.h" #include "nfagraph/ng_width.h" +#include "smallwrite/smallwrite_build.h" #include "som/slot_manager.h" #include "util/alloc.h" #include "util/bitutils.h" @@ -69,8 +76,10 @@ #include "util/compile_error.h" #include "util/container.h" #include "util/graph_range.h" +#include "util/make_unique.h" #include "util/multibit_build.h" #include "util/order_check.h" +#include "util/popcount.h" #include "util/queue_index_factory.h" #include "util/report_manager.h" #include "util/ue2string.h" @@ -191,10 +200,13 @@ public: case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds; case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; + case ROSE_INSTR_CHECK_MASK: return &u.checkMask; + case ROSE_INSTR_CHECK_BYTE: return &u.checkByte; case ROSE_INSTR_CHECK_INFIX: return &u.checkInfix; case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix; case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed; + case ROSE_INSTR_RECORD_ANCHORED: return &u.recordAnchored; case ROSE_INSTR_CATCH_UP: return &u.catchUp; case ROSE_INSTR_CATCH_UP_MPV: return &u.catchUpMpv; case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust; @@ -222,6 +234,9 @@ public: case ROSE_INSTR_CHECK_STATE: return &u.checkState; case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin; case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext; + case ROSE_INSTR_ENGINES_EOD: return &u.enginesEod; + case ROSE_INSTR_SUFFIXES_EOD: return &u.suffixesEod; + case ROSE_INSTR_MATCHER_EOD: return &u.matcherEod; case ROSE_INSTR_END: return &u.end; } assert(0); @@ -237,10 +252,13 @@ public: case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds); case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); + case ROSE_INSTR_CHECK_MASK: return sizeof(u.checkMask); + case ROSE_INSTR_CHECK_BYTE: return sizeof(u.checkByte); case ROSE_INSTR_CHECK_INFIX: return sizeof(u.checkInfix); case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix); case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed); + case ROSE_INSTR_RECORD_ANCHORED: return sizeof(u.recordAnchored); case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp); case ROSE_INSTR_CATCH_UP_MPV: return sizeof(u.catchUpMpv); case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); @@ -268,6 +286,9 @@ public: case ROSE_INSTR_CHECK_STATE: return sizeof(u.checkState); case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin); case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext); + case ROSE_INSTR_ENGINES_EOD: return sizeof(u.enginesEod); + case ROSE_INSTR_SUFFIXES_EOD: return sizeof(u.suffixesEod); + case ROSE_INSTR_MATCHER_EOD: return sizeof(u.matcherEod); case ROSE_INSTR_END: return sizeof(u.end); } assert(0); @@ -282,10 +303,13 @@ public: ROSE_STRUCT_CHECK_BOUNDS checkBounds; ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround; + ROSE_STRUCT_CHECK_MASK checkMask; + ROSE_STRUCT_CHECK_BYTE checkByte; ROSE_STRUCT_CHECK_INFIX checkInfix; ROSE_STRUCT_CHECK_PREFIX checkPrefix; ROSE_STRUCT_ANCHORED_DELAY anchoredDelay; ROSE_STRUCT_PUSH_DELAYED pushDelayed; + ROSE_STRUCT_RECORD_ANCHORED recordAnchored; ROSE_STRUCT_CATCH_UP catchUp; ROSE_STRUCT_CATCH_UP_MPV catchUpMpv; ROSE_STRUCT_SOM_ADJUST somAdjust; @@ -313,6 +337,9 @@ public: ROSE_STRUCT_CHECK_STATE checkState; ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin; ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext; + ROSE_STRUCT_ENGINES_EOD enginesEod; + ROSE_STRUCT_SUFFIXES_EOD suffixesEod; + ROSE_STRUCT_MATCHER_EOD matcherEod; ROSE_STRUCT_END end; } u; @@ -349,6 +376,7 @@ struct RoseResources { bool has_lit_delay = false; bool has_lit_mask = false; bool has_anchored = false; + bool has_eod = false; }; struct build_context : boost::noncopyable { @@ -391,6 +419,10 @@ struct build_context : boost::noncopyable { * that have already been pushed into the engine_blob. */ ue2::unordered_map engineOffsets; + /** \brief Literal programs, indexed by final_id, after they have been + * written to the engine_blob. */ + vector litPrograms; + /** \brief Minimum offset of a match from the floating table. */ u32 floatingMinLiteralMatchOffset = 0; @@ -408,6 +440,13 @@ struct build_context : boost::noncopyable { /** \brief Resources in use (tracked as programs are added). */ RoseResources resources; + /** \brief Mapping from every vertex to the groups that must be on for that + * vertex to be reached. */ + ue2::unordered_map vertex_group_map; + + /** \brief Global bitmap of groups that can be squashed. */ + rose_group squashable_groups = 0; + /** \brief Base offset of engine_blob in the Rose engine bytecode. */ static constexpr u32 engine_blob_base = ROUNDUP_CL(sizeof(RoseEngine)); }; @@ -460,7 +499,7 @@ u32 add_to_engine_blob(build_context &bc, const T &a, const size_t len) { template static u32 add_to_engine_blob(build_context &bc, Iter b, const Iter &e) { - using value_type = typename Iter::value_type; + using value_type = typename std::iterator_traits::value_type; static_assert(is_pod::value, "should be pod"); if (b == e) { @@ -567,6 +606,11 @@ bool isPureFloating(const RoseResources &resources) { return false; } + if (resources.has_eod) { + DEBUG_PRINTF("has eod work to do\n"); + return false; + } + if (resources.has_states) { DEBUG_PRINTF("has states\n"); return false; @@ -622,6 +666,7 @@ u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc, DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay); DEBUG_PRINTF("has_lit_mask=%d\n", bc.resources.has_lit_mask); DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored); + DEBUG_PRINTF("has_eod=%d\n", bc.resources.has_eod); if (isPureFloating(bc.resources)) { return ROSE_RUNTIME_PURE_LITERAL; @@ -819,13 +864,18 @@ aligned_unique_ptr pickImpl(aligned_unique_ptr dfa_impl, aligned_unique_ptr nfa_impl) { assert(nfa_impl); assert(dfa_impl); - assert(isMcClellanType(dfa_impl->type)); + assert(isDfaType(dfa_impl->type)); // If our NFA is an LBR, it always wins. if (isLbrType(nfa_impl->type)) { return nfa_impl; } + // if our DFA is an accelerated Sheng, it always wins. + if (isShengType(dfa_impl->type) && has_accel(*dfa_impl)) { + return dfa_impl; + } + bool d_accel = has_accel(*dfa_impl); bool n_accel = has_accel(*nfa_impl); bool d_big = dfa_impl->type == MCCLELLAN_NFA_16; @@ -878,6 +928,18 @@ buildRepeatEngine(const CastleProto &proto, return castle_nfa; } +static +aligned_unique_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, + const ReportManager &rm) { + // Unleash the Sheng!! + auto dfa = shengCompile(rdfa, cc, rm); + if (!dfa) { + // Sheng wasn't successful, so unleash McClellan! + dfa = mcclellanCompile(rdfa, cc, rm); + } + return dfa; +} + /* builds suffix nfas */ static aligned_unique_ptr @@ -898,7 +960,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, } if (suff.dfa()) { - auto d = mcclellanCompile(*suff.dfa(), cc, rm); + auto d = getDfa(*suff.dfa(), cc, rm); assert(d); return d; } @@ -927,7 +989,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0), cc.grey); if (rdfa) { - auto d = mcclellanCompile(*rdfa, cc, rm); + auto d = getDfa(*rdfa, cc, rm); assert(d); if (cc.grey.roseMcClellanSuffix != 2) { n = pickImpl(move(d), move(n)); @@ -1022,8 +1084,9 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, // streaming mode. const bool compress_state = !is_transient; - assert(!left.graph() - || left.graph()->kind == (is_prefix ? NFA_PREFIX : NFA_INFIX)); + assert(is_prefix || !left.graph() || left.graph()->kind == NFA_INFIX); + assert(!is_prefix || !left.graph() || left.graph()->kind == NFA_PREFIX + || left.graph()->kind == NFA_EAGER_PREFIX); // Holder should be implementable as an NFA at the very least. if (!left.dfa() && left.graph()) { @@ -1046,12 +1109,13 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, } if (left.dfa()) { - n = mcclellanCompile(*left.dfa(), cc, rm); + n = getDfa(*left.dfa(), cc, rm); } else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix && !is_transient) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - n = mcclellanCompile(*rdfa, cc, rm); + n = getDfa(*rdfa, cc, rm); + assert(n); } } @@ -1065,7 +1129,9 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, if (!n && left.graph()) { map>> triggers; - findTriggerSequences(tbi, infixTriggers.at(left), &triggers); + if (left.graph()->kind == NFA_INFIX) { + findTriggerSequences(tbi, infixTriggers.at(left), &triggers); + } n = constructNFA(*left.graph(), nullptr, fixed_depth_tops, triggers, compress_state, cc); } @@ -1075,7 +1141,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, && (!n || !has_bounded_repeats_other_than_firsts(*n) || !is_fast(*n))) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - auto d = mcclellanCompile(*rdfa, cc, rm); + auto d = getDfa(*rdfa, cc, rm); assert(d); n = pickImpl(move(d), move(n)); } @@ -1102,23 +1168,612 @@ void setLeftNfaProperties(NFA &n, const left_id &left) { } static -bool buildLeftfixes(const RoseBuildImpl &tbi, build_context &bc, - QueueIndexFactory &qif, set *no_retrigger_queues, - bool do_prefix) { - const RoseGraph &g = tbi.g; - const CompileContext &cc = tbi.cc; - const ReportManager &rm = tbi.rm; +void appendTailToHolder(NGHolder &h, const flat_set &reports, + const vector &starts, + const vector &tail) { + assert(!tail.empty()); + NFAVertex curr = add_vertex(h); + for (NFAVertex v : starts) { + assert(!edge(v, h.acceptEod, h).second); + assert(h[v].reports == reports); + h[v].reports.clear(); + remove_edge(v, h.accept, h); + add_edge(v, curr, h); + } + auto it = tail.begin(); + h[curr].char_reach = *it; + ++it; + while (it != tail.end()) { + NFAVertex old = curr; + curr = add_vertex(h); + add_edge(old, curr, h); + assert(!it->none()); + h[curr].char_reach = *it; + ++it; + } - ue2::unordered_map seen; // already built queue indices + h[curr].reports = reports; + add_edge(curr, h.accept, h); +} - map > infixTriggers; - findInfixTriggers(tbi, &infixTriggers); +static +void appendTailToHolder(NGHolder &h, const vector &tail) { + assert(in_degree(h.acceptEod, h) == 1); + assert(!tail.empty()); + map, vector > reporters; + for (auto v : inv_adjacent_vertices_range(h.accept, h)) { + reporters[h[v].reports].push_back(v); + } + + for (const auto &e : reporters) { + appendTailToHolder(h, e.first, e.second, tail); + } + + h.renumberEdges(); +} + +static +u32 decreaseLag(const RoseBuildImpl &build, NGHolder &h, + const vector &succs) { + const RoseGraph &rg = build.g; + static const size_t MAX_RESTORE_LEN = 5; + + vector restored(MAX_RESTORE_LEN); + for (RoseVertex v : succs) { + u32 lag = rg[v].left.lag; + for (u32 lit_id : rg[v].literals) { + u32 delay = build.literals.right.at(lit_id).delay; + const ue2_literal &literal = build.literals.right.at(lit_id).s; + assert(lag <= literal.length() + delay); + size_t base = literal.length() + delay - lag; + if (base >= literal.length()) { + return 0; + } + size_t len = literal.length() - base; + len = MIN(len, restored.size()); + restored.resize(len); + auto lit_it = literal.begin() + base; + for (u32 i = 0; i < len; i++) { + assert(lit_it != literal.end()); + restored[i] |= *lit_it; + ++lit_it; + } + } + } + + assert(!restored.empty()); + + appendTailToHolder(h, restored); + + return restored.size(); +} + +#define EAGER_DIE_BEFORE_LIMIT 10 + +struct eager_info { + shared_ptr new_graph; + u32 lag_adjust = 0; +}; + +static +bool checkSuitableForEager(bool is_prefix, const left_id &left, + const RoseBuildImpl &build, + const vector &succs, + rose_group squash_mask, rose_group initial_groups, + eager_info &ei, const CompileContext &cc) { + DEBUG_PRINTF("checking prefix --> %016llx...\n", squash_mask); + + const RoseGraph &rg = build.g; + + if (!is_prefix) { + DEBUG_PRINTF("not prefix\n"); + return false; /* only prefixes (for now...) */ + } + + if ((initial_groups & squash_mask) == initial_groups) { + DEBUG_PRINTF("no squash -- useless\n"); + return false; + } + + for (RoseVertex s : succs) { + if (build.isInETable(s) + || contains(rg[s].literals, build.eod_event_literal_id)) { + return false; /* Ignore EOD related prefixes */ + } + } + + if (left.dfa()) { + const raw_dfa &dfa = *left.dfa(); + if (dfa.start_floating != DEAD_STATE) { + return false; /* not purely anchored */ + } + if (!dfa.states[dfa.start_anchored].reports.empty()) { + return false; /* vacuous (todo: handle?) */ + } + + if (!can_die_early(dfa, EAGER_DIE_BEFORE_LIMIT)) { + return false; + } + ei.new_graph = rg[succs[0]].left.graph; + } else if (left.graph()) { + const NGHolder &g = *left.graph(); + if (proper_out_degree(g.startDs, g)) { + return false; /* not purely anchored */ + } + + ei.new_graph = cloneHolder(*left.graph()); + auto gg = ei.new_graph; + gg->kind = NFA_EAGER_PREFIX; + + ei.lag_adjust = decreaseLag(build, *gg, succs); + + if (is_match_vertex(gg->start, *gg)) { + return false; /* should not still be vacuous as lag decreased */ + } + + if (!can_die_early(*gg, EAGER_DIE_BEFORE_LIMIT)) { + DEBUG_PRINTF("not eager as stuck alive\n"); + return false; + } + + /* We need to ensure that adding in the literals does not cause us to no + * longer be able to build an nfa. */ + bool ok = isImplementableNFA(*gg, nullptr, cc); + if (!ok) { + return false; + } + } else { + DEBUG_PRINTF("unable to determine if good for eager running\n"); + return false; + } + + DEBUG_PRINTF("eager prefix\n"); + return true; +} + +static +left_id updateLeftfixWithEager(RoseGraph &g, const eager_info &ei, + const vector &succs) { + u32 lag_adjust = ei.lag_adjust; + auto gg = ei.new_graph; + for (RoseVertex v : succs) { + g[v].left.graph = gg; + assert(g[v].left.lag >= lag_adjust); + g[v].left.lag -= lag_adjust; + DEBUG_PRINTF("added %u literal chars back, new lag %u\n", lag_adjust, + g[v].left.lag); + } + left_id leftfix = g[succs[0]].left; + + if (leftfix.graph()) { + assert(leftfix.graph()->kind == NFA_PREFIX + || leftfix.graph()->kind == NFA_EAGER_PREFIX); + leftfix.graph()->kind = NFA_EAGER_PREFIX; + } + if (leftfix.dfa()) { + assert(leftfix.dfa()->kind == NFA_PREFIX); + leftfix.dfa()->kind = NFA_EAGER_PREFIX; + } + + return leftfix; +} + +static +bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, + const map > &infixTriggers, + set *no_retrigger_queues, set *eager_queues, + const map &eager, + const vector &succs, left_id leftfix) { + RoseGraph &g = build.g; + const CompileContext &cc = build.cc; + const ReportManager &rm = build.rm; + + bool is_transient = contains(build.transient, leftfix); + rose_group squash_mask = build.rose_squash_masks.at(leftfix); + + DEBUG_PRINTF("making %sleftfix\n", is_transient ? "transient " : ""); + + if (contains(eager, leftfix)) { + eager_queues->insert(qi); + leftfix = updateLeftfixWithEager(g, eager.at(leftfix), succs); + } + + aligned_unique_ptr nfa; + // Need to build NFA, which is either predestined to be a Haig (in SOM mode) + // or could be all manner of things. + if (leftfix.haig()) { + nfa = goughCompile(*leftfix.haig(), build.ssm.somPrecision(), cc, rm); + } else { + nfa = makeLeftNfa(build, leftfix, prefix, is_transient, infixTriggers, + cc); + } + + if (!nfa) { + assert(!"failed to build leftfix"); + return false; + } + + setLeftNfaProperties(*nfa, leftfix); + + build.leftfix_queue_map.emplace(leftfix, qi); + nfa->queueIndex = qi; + + if (!prefix && !leftfix.haig() && leftfix.graph() + && nfaStuckOn(*leftfix.graph())) { + DEBUG_PRINTF("%u sticks on\n", qi); + no_retrigger_queues->insert(qi); + } + + DEBUG_PRINTF("built leftfix, qi=%u\n", qi); + add_nfa_to_blob(bc, *nfa); + + // Leftfixes can have stop alphabets. + vector stop(N_CHARS, 0); + /* haigs track som information - need more care */ + som_type som = leftfix.haig() ? SOM_LEFT : SOM_NONE; + if (leftfix.graph()) { + stop = findLeftOffsetStopAlphabet(*leftfix.graph(), som); + } else if (leftfix.castle()) { + stop = findLeftOffsetStopAlphabet(*leftfix.castle(), som); + } + + // Infix NFAs can have bounds on their queue lengths. + u32 max_queuelen = UINT32_MAX; + if (!prefix) { + set lits; + for (RoseVertex v : succs) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + for (u32 lit_id : g[u].literals) { + lits.insert(build.literals.right.at(lit_id).s); + } + } + } + DEBUG_PRINTF("%zu literals\n", lits.size()); + max_queuelen = findMaxInfixMatches(leftfix, lits); + if (max_queuelen < UINT32_MAX) { + max_queuelen++; + } + } + + u32 max_width; + if (is_transient) { + depth d = findMaxWidth(leftfix); + assert(d.is_finite()); + max_width = d; + } else { + max_width = 0; + } + + u8 cm_count = 0; + CharReach cm_cr; + if (cc.grey.allowCountingMiracles) { + findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr); + } + + for (RoseVertex v : succs) { + bc.leftfix_info.emplace(v, left_build_info(qi, g[v].left.lag, max_width, + squash_mask, stop, + max_queuelen, cm_count, + cm_cr)); + } + + return true; +} + +static +unique_ptr constructTamaInfo(const RoseGraph &g, + const vector &subengines, + const bool is_suffix) { + unique_ptr tamaInfo = ue2::make_unique(); + for (const auto &sub : subengines) { + const auto &rose_vertices = sub.vertices; + NFA *nfa = sub.nfa.get(); + set tops; + for (const auto &v : rose_vertices) { + if (is_suffix) { + tops.insert(g[v].suffix.top); + } else { + for (const auto &e : in_edges_range(v, g)) { + tops.insert(g[e].rose_top); + } + } + } + tamaInfo->add(nfa, tops); + } + + return tamaInfo; +} + +static +void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo, + TamaProto &tamaProto, + const vector &subengines, + const map, u32> &out_top_remap, + const bool is_suffix) { + u32 i = 0; + for (const auto &n : tamaInfo.subengines) { + for (const auto &v : subengines[i].vertices) { + if (is_suffix) { + tamaProto.add(n, g[v].idx, g[v].suffix.top, + out_top_remap); + } else { + for (const auto &e : in_edges_range(v, g)) { + tamaProto.add(n, g[v].idx, g[e].rose_top, + out_top_remap); + } + } + } + i++; + } +} + +static +shared_ptr constructContainerEngine(const RoseGraph &g, + build_context &bc, + const ExclusiveInfo &info, + const u32 queue, + const bool is_suffix) { + const auto &subengines = info.subengines; + auto tamaInfo = + constructTamaInfo(g, subengines, is_suffix); + + map, u32> out_top_remap; + auto n = buildTamarama(*tamaInfo, queue, out_top_remap); + add_nfa_to_blob(bc, *n); + + DEBUG_PRINTF("queue id:%u\n", queue); + shared_ptr tamaProto = make_shared(); + tamaProto->reports = info.reports; + updateTops(g, *tamaInfo, *tamaProto, subengines, + out_top_remap, is_suffix); + return tamaProto; +} + +static +void buildInfixContainer(RoseGraph &g, build_context &bc, + const vector &exclusive_info) { + // Build tamarama engine + for (const auto &info : exclusive_info) { + const u32 queue = info.queue; + const auto &subengines = info.subengines; + auto tamaProto = + constructContainerEngine(g, bc, info, queue, false); + + for (const auto &sub : subengines) { + const auto &verts = sub.vertices; + for (const auto &v : verts) { + DEBUG_PRINTF("vert id:%lu\n", g[v].idx); + g[v].left.tamarama = tamaProto; + } + } + } +} + +static +void buildSuffixContainer(RoseGraph &g, build_context &bc, + const vector &exclusive_info) { + // Build tamarama engine + for (const auto &info : exclusive_info) { + const u32 queue = info.queue; + const auto &subengines = info.subengines; + auto tamaProto = + constructContainerEngine(g, bc, info, queue, true); + for (const auto &sub : subengines) { + const auto &verts = sub.vertices; + for (const auto &v : verts) { + DEBUG_PRINTF("vert id:%lu\n", g[v].idx); + g[v].suffix.tamarama = tamaProto; + } + const auto &v = verts[0]; + suffix_id newSuffix(g[v].suffix); + bc.suffixes.emplace(newSuffix, queue); + } + } +} + +static +void updateExclusiveInfixProperties(const RoseBuildImpl &build, + build_context &bc, + const vector &exclusive_info, + set *no_retrigger_queues) { + const RoseGraph &g = build.g; + for (const auto &info : exclusive_info) { + // Set leftfix optimisations, disabled for tamarama subengines + rose_group squash_mask = ~rose_group{0}; + // Leftfixes can have stop alphabets. + vector stop(N_CHARS, 0); + // Infix NFAs can have bounds on their queue lengths. + u32 max_queuelen = 0; + u32 max_width = 0; + u8 cm_count = 0; + CharReach cm_cr; + + const auto &qi = info.queue; + const auto &subengines = info.subengines; + bool no_retrigger = true; + for (const auto &sub : subengines) { + const auto &verts = sub.vertices; + const auto &v_first = verts[0]; + left_id leftfix(g[v_first].left); + if (leftfix.haig() || !leftfix.graph() || + !nfaStuckOn(*leftfix.graph())) { + no_retrigger = false; + } + + for (const auto &v : verts) { + set lits; + for (auto u : inv_adjacent_vertices_range(v, build.g)) { + for (u32 lit_id : build.g[u].literals) { + lits.insert(build.literals.right.at(lit_id).s); + } + } + DEBUG_PRINTF("%zu literals\n", lits.size()); + + u32 queuelen = findMaxInfixMatches(leftfix, lits); + if (queuelen < UINT32_MAX) { + queuelen++; + } + max_queuelen = max(max_queuelen, queuelen); + } + } + + if (no_retrigger) { + no_retrigger_queues->insert(qi); + } + + for (const auto &sub : subengines) { + const auto &verts = sub.vertices; + for (const auto &v : verts) { + u32 lag = g[v].left.lag; + bc.leftfix_info.emplace( + v, left_build_info(qi, lag, max_width, squash_mask, stop, + max_queuelen, cm_count, cm_cr)); + } + } + } +} + +static +void updateExclusiveSuffixProperties(const RoseBuildImpl &build, + const vector &exclusive_info, + set *no_retrigger_queues) { + const RoseGraph &g = build.g; + for (auto &info : exclusive_info) { + const auto &qi = info.queue; + const auto &subengines = info.subengines; + bool no_retrigger = true; + for (const auto &sub : subengines) { + const auto &v_first = sub.vertices[0]; + suffix_id suffix(g[v_first].suffix); + if (!suffix.graph() || !nfaStuckOn(*suffix.graph())) { + no_retrigger = false; + break; + } + } + + if (no_retrigger) { + no_retrigger_queues->insert(qi); + } + } +} + +static +void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc, + QueueIndexFactory &qif, + const map> &infixTriggers, + const map> &vertex_map, + const vector> &groups, + set *no_retrigger_queues) { + RoseGraph &g = build.g; + const CompileContext &cc = build.cc; + + vector exclusive_info; + for (const auto &gp : groups) { + ExclusiveInfo info; + for (const auto &id : gp) { + const auto &verts = vertex_map.at(id); + left_id leftfix(g[verts[0]].left); + + bool is_transient = false; + auto n = makeLeftNfa(build, leftfix, false, is_transient, + infixTriggers, cc); + assert(n); + + setLeftNfaProperties(*n, leftfix); + + ExclusiveSubengine engine; + engine.nfa = move(n); + engine.vertices = verts; + info.subengines.push_back(move(engine)); + } + info.queue = qif.get_queue(); + exclusive_info.push_back(move(info)); + } + updateExclusiveInfixProperties(build, bc, exclusive_info, + no_retrigger_queues); + buildInfixContainer(g, bc, exclusive_info); +} + +static +void findExclusiveInfixes(RoseBuildImpl &build, build_context &bc, + QueueIndexFactory &qif, + const map> &infixTriggers, + set *no_retrigger_queues) { + const RoseGraph &g = build.g; + + set> roleInfoSet; + map> vertex_map; + + u32 role_id = 0; + map leftfixes; for (auto v : vertices_range(g)) { - if (!g[v].left) { + if (!g[v].left || build.isRootSuccessor(v)) { continue; } + left_id leftfix(g[v].left); + + // Sanity check: our NFA should contain each of the tops mentioned on + // our in-edges. + assert(roseHasTops(g, v)); + + if (contains(leftfixes, leftfix)) { + // NFA already built. + u32 id = leftfixes[leftfix]; + if (contains(vertex_map, id)) { + vertex_map[id].push_back(v); + } + DEBUG_PRINTF("sharing leftfix, id=%u\n", id); + continue; + } + + if (leftfix.graph() || leftfix.castle()) { + leftfixes.emplace(leftfix, role_id); + vertex_map[role_id].push_back(v); + + map>> triggers; + findTriggerSequences(build, infixTriggers.at(leftfix), &triggers); + RoleInfo info(leftfix, role_id); + if (setTriggerLiteralsInfix(info, triggers)) { + roleInfoSet.insert(info); + } + role_id++; + } + } + + if (leftfixes.size() > 1) { + DEBUG_PRINTF("leftfix size:%lu\n", leftfixes.size()); + vector> groups; + exclusiveAnalysisInfix(build, vertex_map, roleInfoSet, groups); + buildExclusiveInfixes(build, bc, qif, infixTriggers, vertex_map, + groups, no_retrigger_queues); + } +} + +static +bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, + QueueIndexFactory &qif, set *no_retrigger_queues, + set *eager_queues, bool do_prefix) { + RoseGraph &g = tbi.g; + const CompileContext &cc = tbi.cc; + + map > infixTriggers; + vector order; + unordered_map > succs; + findInfixTriggers(tbi, &infixTriggers); + + if (cc.grey.allowTamarama && cc.streaming && !do_prefix) { + findExclusiveInfixes(tbi, bc, qif, infixTriggers, + no_retrigger_queues); + } + + for (auto v : vertices_range(g)) { + if (!g[v].left || g[v].left.tamarama) { + continue; + } + + assert(tbi.isNonRootSuccessor(v) != tbi.isRootSuccessor(v)); bool is_prefix = tbi.isRootSuccessor(v); if (do_prefix != is_prefix) { @@ -1132,11 +1787,13 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, build_context &bc, // our in-edges. assert(roseHasTops(g, v)); - u32 qi; // queue index, set below. - u32 lag = g[v].left.lag; bool is_transient = contains(tbi.transient, leftfix); - if (is_transient && tbi.cc.grey.roseLookaroundMasks) { + // Transient leftfixes can sometimes be implemented solely with + // lookarounds, in which case we don't need to build an engine. + // TODO: Handle SOM-tracking cases as well. + if (cc.grey.roseLookaroundMasks && is_transient && + !g[v].left.tracksSom()) { vector lookaround; if (makeLeftfixLookaround(tbi, v, lookaround)) { DEBUG_PRINTF("implementing as lookaround!\n"); @@ -1145,94 +1802,42 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, build_context &bc, } } - if (contains(seen, leftfix)) { - // NFA already built. - qi = seen[leftfix]; - assert(contains(bc.engineOffsets, qi)); - DEBUG_PRINTF("sharing leftfix, qi=%u\n", qi); - } else { - DEBUG_PRINTF("making %sleftfix\n", is_transient ? "transient " : ""); - - aligned_unique_ptr nfa; - - // Need to build NFA, which is either predestined to be a Haig (in - // SOM mode) or could be all manner of things. - if (leftfix.haig()) { - nfa = goughCompile(*leftfix.haig(), tbi.ssm.somPrecision(), cc, - rm); - } else { - assert(tbi.isNonRootSuccessor(v) != tbi.isRootSuccessor(v)); - nfa = makeLeftNfa(tbi, leftfix, is_prefix, is_transient, - infixTriggers, cc); - } - - if (!nfa) { - assert(!"failed to build leftfix"); - return false; - } - - setLeftNfaProperties(*nfa, leftfix); - - qi = qif.get_queue(); - nfa->queueIndex = qi; - - if (!is_prefix && !leftfix.haig() && leftfix.graph() && - nfaStuckOn(*leftfix.graph())) { - DEBUG_PRINTF("%u sticks on\n", qi); - no_retrigger_queues->insert(qi); - } - - DEBUG_PRINTF("built leftfix, qi=%u\n", qi); - add_nfa_to_blob(bc, *nfa); - seen.emplace(leftfix, qi); + if (!contains(succs, leftfix)) { + order.push_back(leftfix); } + succs[leftfix].push_back(v); + } + + rose_group initial_groups = tbi.getInitialGroups(); + rose_group combined_eager_squashed_mask = ~0ULL; + + map eager; + + for (const left_id &leftfix : order) { + const auto &left_succs = succs[leftfix]; + rose_group squash_mask = tbi.rose_squash_masks.at(leftfix); + eager_info ei; - // Leftfixes can have stop alphabets. - vector stop(N_CHARS, 0); - /* haigs track som information - need more care */ - som_type som = leftfix.haig() ? SOM_LEFT : SOM_NONE; - if (leftfix.graph()) { - stop = findLeftOffsetStopAlphabet(*leftfix.graph(), som); - } else if (leftfix.castle()) { - stop = findLeftOffsetStopAlphabet(*leftfix.castle(), som); + if (checkSuitableForEager(do_prefix, leftfix, tbi, left_succs, + squash_mask, initial_groups, ei, cc)) { + eager[leftfix] = ei; + combined_eager_squashed_mask &= squash_mask; + DEBUG_PRINTF("combo %016llx...\n", combined_eager_squashed_mask); } + } - // Infix NFAs can have bounds on their queue lengths. - u32 max_queuelen = UINT32_MAX; - if (!is_prefix) { - set lits; - for (auto u : inv_adjacent_vertices_range(v, tbi.g)) { - for (u32 lit_id : tbi.g[u].literals) { - lits.insert(tbi.literals.right.at(lit_id).s); - } - } - DEBUG_PRINTF("%zu literals\n", lits.size()); - max_queuelen = findMaxInfixMatches(leftfix, lits); - if (max_queuelen < UINT32_MAX) { - max_queuelen++; - } - } + if (do_prefix && combined_eager_squashed_mask & initial_groups) { + DEBUG_PRINTF("eager groups won't squash everyone - be lazy\n"); + eager_queues->clear(); + eager.clear(); + } - u32 max_width; - if (is_transient) { - depth d = findMaxWidth(leftfix); - assert(d.is_finite()); - max_width = d; - } else { - max_width = 0; - } - - u8 cm_count = 0; - CharReach cm_cr; - if (cc.grey.allowCountingMiracles) { - findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr); - } - - bc.leftfix_info.emplace( - v, left_build_info(qi, lag, max_width, squash_mask, stop, - max_queuelen, cm_count, cm_cr)); + for (const left_id &leftfix : order) { + buildLeftfix(tbi, bc, do_prefix, qif.get_queue(), infixTriggers, + no_retrigger_queues, eager_queues, eager, succs[leftfix], + leftfix); } return true; @@ -1271,8 +1876,8 @@ public: }; aligned_unique_ptr operator()(unique_ptr &rdfa) const { - // Unleash the McClellan! - return mcclellanCompile(*rdfa, build.cc, build.rm); + // Unleash the mighty DFA! + return getDfa(*rdfa, build.cc, build.rm); } aligned_unique_ptr operator()(unique_ptr &haig) const { @@ -1300,7 +1905,7 @@ public: !has_bounded_repeats_other_than_firsts(*n)) { auto rdfa = buildMcClellan(h, &rm, cc.grey); if (rdfa) { - auto d = mcclellanCompile(*rdfa, cc, rm); + auto d = getDfa(*rdfa, cc, rm); if (d) { n = pickImpl(move(d), move(n)); } @@ -1458,6 +2063,7 @@ void assignSuffixQueues(RoseBuildImpl &build, build_context &bc) { u32 queue = build.qif.get_queue(); DEBUG_PRINTF("assigning %p to queue %u\n", s.graph(), queue); bc.suffixes.emplace(s, queue); + build.suffix_queue_map.emplace(s, queue); } } @@ -1481,11 +2087,111 @@ void setSuffixProperties(NFA &n, const suffix_id &suff, } static -bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, - set *no_retrigger_queues) { - map > suffixTriggers; - findSuffixTriggers(tbi, &suffixTriggers); +void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc, + QueueIndexFactory &qif, + map> &suffixTriggers, + const map> &vertex_map, + const vector> &groups, + set *no_retrigger_queues) { + RoseGraph &g = build.g; + vector exclusive_info; + for (const auto &gp : groups) { + ExclusiveInfo info; + for (const auto &id : gp) { + const auto &verts = vertex_map.at(id); + suffix_id s(g[verts[0]].suffix); + + const set &s_triggers = suffixTriggers.at(s); + + map fixed_depth_tops; + findFixedDepthTops(g, s_triggers, &fixed_depth_tops); + + map>> triggers; + findTriggerSequences(build, s_triggers, &triggers); + + auto n = buildSuffix(build.rm, build.ssm, fixed_depth_tops, + triggers, s, build.cc); + assert(n); + + setSuffixProperties(*n, s, build.rm); + + ExclusiveSubengine engine; + engine.nfa = move(n); + engine.vertices = verts; + info.subengines.push_back(move(engine)); + + const auto &reports = all_reports(s); + info.reports.insert(reports.begin(), reports.end()); + } + info.queue = qif.get_queue(); + exclusive_info.push_back(move(info)); + } + updateExclusiveSuffixProperties(build, exclusive_info, + no_retrigger_queues); + buildSuffixContainer(g, bc, exclusive_info); +} + +static +void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc, + QueueIndexFactory &qif, + map> &suffixTriggers, + set *no_retrigger_queues) { + const RoseGraph &g = tbi.g; + + map suffixes; + set> roleInfoSet; + map> vertex_map; + u32 role_id = 0; + for (auto v : vertices_range(g)) { + if (!g[v].suffix) { + continue; + } + + const suffix_id s(g[v].suffix); + + DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].idx, s.graph()); + + // We may have already built this NFA. + if (contains(suffixes, s)) { + u32 id = suffixes[s]; + if (!tbi.isInETable(v)) { + vertex_map[id].push_back(v); + } + continue; + } + + // Currently disable eod suffixes for exclusive analysis + if (!tbi.isInETable(v) && (s.graph() || s.castle())) { + DEBUG_PRINTF("assigning %p to id %u\n", s.graph(), role_id); + suffixes.emplace(s, role_id); + + vertex_map[role_id].push_back(v); + const set &s_triggers = suffixTriggers.at(s); + map>> triggers; + findTriggerSequences(tbi, s_triggers, &triggers); + + RoleInfo info(s, role_id); + if (setTriggerLiteralsSuffix(info, triggers)) { + roleInfoSet.insert(info); + } + role_id++; + } + } + + if (suffixes.size() > 1) { + DEBUG_PRINTF("suffix size:%lu\n", suffixes.size()); + vector> groups; + exclusiveAnalysisSuffix(tbi, vertex_map, roleInfoSet, groups); + buildExclusiveSuffixes(tbi, bc, qif, suffixTriggers, vertex_map, + groups, no_retrigger_queues); + } +} + +static +bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, + set *no_retrigger_queues, + const map> &suffixTriggers) { // To ensure compile determinism, build suffix engines in order of their // (unique) queue indices, so that we call add_nfa_to_blob in the same // order. @@ -1498,6 +2204,11 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, for (const auto &e : ordered) { const u32 queue = e.first; const suffix_id &s = e.second; + + if (s.tamarama()) { + continue; + } + const set &s_triggers = suffixTriggers.at(s); map fixed_depth_tops; @@ -1583,22 +2294,35 @@ void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { } } +/* Note: buildNfas may reduce the lag for vertices that have prefixes */ static bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, - set *no_retrigger_queues, u32 *leftfixBeginQueue) { + set *no_retrigger_queues, set *eager_queues, + u32 *leftfixBeginQueue) { + map> suffixTriggers; + findSuffixTriggers(tbi, &suffixTriggers); + + if (tbi.cc.grey.allowTamarama && tbi.cc.streaming) { + findExclusiveSuffixes(tbi, bc, qif, suffixTriggers, + no_retrigger_queues); + } + assignSuffixQueues(tbi, bc); - if (!buildSuffixes(tbi, bc, no_retrigger_queues)) { + if (!buildSuffixes(tbi, bc, no_retrigger_queues, suffixTriggers)) { return false; } + suffixTriggers.clear(); *leftfixBeginQueue = qif.allocated_count(); - if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, true)) { + if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues, + true)) { return false; } - if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, false)) { + if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues, + false)) { return false; } @@ -1642,10 +2366,10 @@ static void findTransientQueues(const map &leftfix_info, set *out) { DEBUG_PRINTF("curating transient queues\n"); - for (const auto &rbi : leftfix_info | map_values) { - if (rbi.transient) { - DEBUG_PRINTF("q %u is transient\n", rbi.queue); - out->insert(rbi.queue); + for (const auto &build : leftfix_info | map_values) { + if (build.transient) { + DEBUG_PRINTF("q %u is transient\n", build.queue); + out->insert(build.queue); } } } @@ -1750,24 +2474,18 @@ u32 addIteratorToTable(build_context &bc, return offset; } -static -bool hasLastByteHistoryOutEdge(const RoseGraph &g, RoseVertex v) { - for (const auto &e : out_edges_range(v, g)) { - if (g[e].history == ROSE_ROLE_HISTORY_LAST_BYTE) { - return true; - } - } - return false; -} - static u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { vector lb_roles; for (auto v : vertices_range(g)) { - if (hasLastByteHistoryOutEdge(g, v)) { - assert(contains(bc.roleStateIndices, v)); - lb_roles.push_back(bc.roleStateIndices.at(v)); + if (!hasLastByteHistorySucc(g, v)) { + continue; + } + // Eager EOD reporters won't have state indices. + auto it = bc.roleStateIndices.find(v); + if (it != end(bc.roleStateIndices)) { + lb_roles.push_back(it->second); } } @@ -1933,16 +2651,6 @@ bool anyEndfixMpvTriggers(const RoseBuildImpl &tbi) { return false; } -static -bool hasInternalReport(const set &reports, const ReportManager &rm) { - for (ReportID r : reports) { - if (!isExternalReport(rm.getReport(r))) { - return true; - } - } - return false; -} - static void populateNfaInfoBasics(const RoseBuildImpl &build, const build_context &bc, const vector &outfixes, @@ -1960,24 +2668,10 @@ void populateNfaInfoBasics(const RoseBuildImpl &build, const build_context &bc, info.no_retrigger = contains(no_retrigger_queues, qi) ? 1 : 0; } - // Mark outfixes that only trigger external reports. + // Mark outfixes that are in the small block matcher. for (const auto &out : outfixes) { const u32 qi = out.get_queue(); - infos[qi].in_sbmatcher = out.in_sbmatcher; - if (!hasInternalReport(all_reports(out), build.rm)) { - infos[qi].only_external = 1; - } - } - - // Mark suffixes that only trigger external reports. - for (const auto &e : bc.suffixes) { - const suffix_id &s = e.first; - u32 qi = e.second; - - if (!hasInternalReport(all_reports(s), build.rm)) { - infos[qi].only_external = 1; - } } // Mark suffixes triggered by EOD table literals. @@ -2150,6 +2844,12 @@ flattenProgram(const vector> &programs) { case ROSE_INSTR_CHECK_LOOKAROUND: ri.u.checkLookaround.fail_jump = jump_val; break; + case ROSE_INSTR_CHECK_MASK: + ri.u.checkMask.fail_jump = jump_val; + break; + case ROSE_INSTR_CHECK_BYTE: + ri.u.checkByte.fail_jump = jump_val; + break; case ROSE_INSTR_CHECK_INFIX: ri.u.checkInfix.fail_jump = jump_val; break; @@ -2263,6 +2963,18 @@ void recordResources(RoseResources &resources, break; } } + + const auto &g = build.g; + for (const auto &v : vertices_range(g)) { + if (g[v].eod_accept) { + resources.has_eod = true; + break; + } + if (g[v].suffix && has_eod_accepts(g[v].suffix)) { + resources.has_eod = true; + break; + } + } } static @@ -2328,7 +3040,37 @@ void buildActiveLeftIter(const vector &leftTable, } static -bool hasEodAnchors(const RoseBuildImpl &tbi, const build_context &bc, +bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) { + const auto &g = build.g; + const auto v = target(e, g); + + if (!build.g[v].eod_accept) { + return false; + } + + // If there's a graph between us and EOD, we shouldn't be eager. + if (build.g[v].left) { + return false; + } + + // Must be exactly at EOD. + if (g[e].minBound != 0 || g[e].maxBound != 0) { + return false; + } + + // In streaming mode, we can only eagerly report EOD for literals in the + // EOD-anchored table, as that's the only time we actually know where EOD + // is. In block mode, we always have this information. + const auto u = source(e, g); + if (build.cc.streaming && !build.isInETable(u)) { + return false; + } + + return true; +} + +static +bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc, u32 outfixEndQueue) { for (u32 i = 0; i < outfixEndQueue; i++) { if (nfaAcceptsEod(get_nfa_from_blob(bc, i))) { @@ -2337,16 +3079,18 @@ bool hasEodAnchors(const RoseBuildImpl &tbi, const build_context &bc, } } - if (tbi.eod_event_literal_id != MO_INVALID_IDX) { + if (build.eod_event_literal_id != MO_INVALID_IDX) { DEBUG_PRINTF("eod is an event to be celebrated\n"); return true; } - for (auto v : vertices_range(tbi.g)) { - if (tbi.g[v].eod_accept) { + + const RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (g[v].eod_accept) { DEBUG_PRINTF("literally report eod\n"); return true; } - if (tbi.g[v].suffix && has_eod_accepts(tbi.g[v].suffix)) { + if (g[v].suffix && has_eod_accepts(g[v].suffix)) { DEBUG_PRINTF("eod suffix\n"); return true; } @@ -2432,6 +3176,122 @@ bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { return true; } +static +u32 addLookaround(build_context &bc, const vector &look) { + // Check the cache. + auto it = bc.lookaround_cache.find(look); + if (it != bc.lookaround_cache.end()) { + DEBUG_PRINTF("reusing look at idx %zu\n", it->second); + return verify_u32(it->second); + } + + // Linear scan for sequence. + auto seq_it = search(begin(bc.lookaround), end(bc.lookaround), begin(look), + end(look)); + if (seq_it != end(bc.lookaround)) { + size_t idx = distance(begin(bc.lookaround), seq_it); + DEBUG_PRINTF("linear scan found look at idx %zu\n", idx); + bc.lookaround_cache.emplace(look, idx); + return verify_u32(idx); + } + + // New sequence. + size_t idx = bc.lookaround.size(); + bc.lookaround_cache.emplace(look, idx); + insert(&bc.lookaround, bc.lookaround.end(), look); + DEBUG_PRINTF("adding look at idx %zu\n", idx); + return verify_u32(idx); +} + +static +bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) { + size_t reach_size = cr.count(); + assert(reach_size > 0); + // check whether entry_size is some power of 2. + if ((reach_size - 1) & reach_size) { + return false; + } + make_and_cmp_mask(cr, &andmask, &cmpmask); + if ((1 << popcount32((u8)(~andmask))) ^ reach_size) { + return false; + } + return true; +} + +static +bool checkReachWithFlip(const CharReach &cr, u8 &andmask, + u8 &cmpmask, u8 &flip) { + if (checkReachMask(cr, andmask, cmpmask)) { + flip = 0; + return true; + } + if (checkReachMask(~cr, andmask, cmpmask)) { + flip = 1; + return true; + } + return false; +} + +static +bool makeRoleByte(const vector &look, + vector &program) { + if (look.size() == 1) { + const auto &entry = look[0]; + u8 andmask_u8, cmpmask_u8; + u8 flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) { + return false; + } + s32 checkbyte_offset = verify_s32(entry.offset); + DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_BYTE, + JumpTarget::NEXT_BLOCK); + ri.u.checkByte.and_mask = andmask_u8; + ri.u.checkByte.cmp_mask = cmpmask_u8; + ri.u.checkByte.negation = flip; + ri.u.checkByte.offset = checkbyte_offset; + program.push_back(ri); + return true; + } + return false; +} + +static +bool makeRoleMask(const vector &look, + vector &program) { + if (look.back().offset < look.front().offset + 8) { + s32 base_offset = verify_s32(look.front().offset); + u64a and_mask = 0; + u64a cmp_mask = 0; + u64a neg_mask = 0; + for (const auto &entry : look) { + u8 andmask_u8, cmpmask_u8, flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, + cmpmask_u8, flip)) { + return false; + } + DEBUG_PRINTF("entry offset %d\n", entry.offset); + u32 shift = (entry.offset - base_offset) << 3; + and_mask |= (u64a)andmask_u8 << shift; + cmp_mask |= (u64a)cmpmask_u8 << shift; + if (flip) { + neg_mask |= 0xffLLU << shift; + } + } + DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n", + and_mask, cmp_mask); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_MASK, + JumpTarget::NEXT_BLOCK); + ri.u.checkMask.and_mask = and_mask; + ri.u.checkMask.cmp_mask = cmp_mask; + ri.u.checkMask.neg_mask = neg_mask; + ri.u.checkMask.offset = base_offset; + program.push_back(ri); + return true; + } + return false; +} + static void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, vector &program) { @@ -2457,19 +3317,16 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, return; } - DEBUG_PRINTF("role has lookaround\n"); - u32 look_idx; - auto it = bc.lookaround_cache.find(look); - if (it != bc.lookaround_cache.end()) { - DEBUG_PRINTF("reusing look at idx %zu\n", it->second); - look_idx = verify_u32(it->second); - } else { - size_t idx = bc.lookaround.size(); - bc.lookaround_cache.emplace(look, idx); - insert(&bc.lookaround, bc.lookaround.end(), look); - DEBUG_PRINTF("adding look at idx %zu\n", idx); - look_idx = verify_u32(idx); + if (makeRoleByte(look, program)) { + return; } + + if (makeRoleMask(look, program)) { + return; + } + + DEBUG_PRINTF("role has lookaround\n"); + u32 look_idx = addLookaround(bc, look); u32 look_count = verify_u32(look.size()); auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND, @@ -2875,7 +3732,15 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(contains(bc.engineOffsets, qi)); const NFA *nfa = get_nfa_from_blob(bc, qi); u32 suffixEvent; - if (isMultiTopType(nfa->type)) { + if (isContainerType(nfa->type)) { + auto tamaProto = g[v].suffix.tamarama.get(); + assert(tamaProto); + u32 top = (u32)MQE_TOP_FIRST + + tamaProto->top_remap.at(make_pair(g[v].idx, + g[v].suffix.top)); + assert(top < MQE_INVALID); + suffixEvent = top; + } else if (isMultiTopType(nfa->type)) { assert(!g[v].suffix.haig); u32 top = (u32)MQE_TOP_FIRST + g[v].suffix.top; assert(top < MQE_INVALID); @@ -2893,11 +3758,38 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, } static -void makeRoleGroups(const rose_group &groups, +void makeRoleGroups(RoseBuildImpl &build, build_context &bc, RoseVertex v, vector &program) { + const auto &g = build.g; + rose_group groups = g[v].groups; if (!groups) { return; } + + // The set of "already on" groups as we process this vertex is the + // intersection of the groups set by our predecessors. + assert(in_degree(v, g) > 0); + rose_group already_on = ~rose_group{0}; + for (const auto &u : inv_adjacent_vertices_range(v, g)) { + already_on &= bc.vertex_group_map.at(u); + } + + DEBUG_PRINTF("already_on=0x%llx\n", already_on); + DEBUG_PRINTF("squashable=0x%llx\n", bc.squashable_groups); + DEBUG_PRINTF("groups=0x%llx\n", groups); + + already_on &= ~bc.squashable_groups; + DEBUG_PRINTF("squashed already_on=0x%llx\n", already_on); + + // We don't *have* to mask off the groups that we know are already on, but + // this will make bugs more apparent. + groups &= ~already_on; + + if (!groups) { + DEBUG_PRINTF("no new groups to set, skipping\n"); + return; + } + auto ri = RoseInstruction(ROSE_INSTR_SET_GROUPS); ri.u.setGroups.groups = groups; program.push_back(ri); @@ -2926,7 +3818,13 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, // DFAs have no TOP_N support, so they get a classic MQE_TOP event. u32 top; - if (!isMultiTopType(nfa->type)) { + if (isContainerType(nfa->type)) { + auto tamaProto = g[v].left.tamarama.get(); + assert(tamaProto); + top = MQE_TOP_FIRST + tamaProto->top_remap.at( + make_pair(g[v].idx, g[e].rose_top)); + assert(top < MQE_INVALID); + } else if (!isMultiTopType(nfa->type)) { assert(num_tops(g[v].left) == 1); top = MQE_TOP; } else { @@ -2989,7 +3887,7 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, : g[e].maxBound + lit_length; if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { - assert(g[u].max_offset != ROSE_BOUND_INF); + assert(g[u].fixedOffset()); // Make offsets absolute. min_bound += g[u].max_offset; if (max_bound != ROSE_BOUND_INF) { @@ -3006,6 +3904,10 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, max_bound = MAX_OFFSET; } + // This instruction should be doing _something_ -- bounds should be tighter + // than just {length, inf}. + assert(min_bound > lit_length || max_bound < MAX_OFFSET); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, JumpTarget::NEXT_BLOCK); ri.u.checkBounds.min_bound = min_bound; ri.u.checkBounds.max_bound = max_bound; @@ -3032,6 +3934,30 @@ void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, program.push_back(move(ri)); } +static +void makeRoleEagerEodReports(RoseBuildImpl &build, build_context &bc, + RoseVertex v, vector &program) { + vector eod_program; + + for (const auto &e : out_edges_range(v, build.g)) { + if (canEagerlyReportAtEod(build, e)) { + makeRoleReports(build, bc, target(e, build.g), eod_program); + } + } + + if (eod_program.empty()) { + return; + } + + if (!onlyAtEod(build, v)) { + // The rest of our program wasn't EOD anchored, so we need to guard + // these reports with a check. + program.emplace_back(ROSE_INSTR_CHECK_ONLY_EOD, JumpTarget::NEXT_BLOCK); + } + + program.insert(end(program), begin(eod_program), end(eod_program)); +} + static vector makeProgram(RoseBuildImpl &build, build_context &bc, const RoseEdge &e) { @@ -3068,15 +3994,21 @@ vector makeProgram(RoseBuildImpl &build, build_context &bc, // Next, we can add program instructions that have effects. makeRoleReports(build, bc, v, program); + makeRoleInfixTriggers(build, bc, v, program); // Note: SET_GROUPS instruction must be after infix triggers, as an infix // going dead may switch off groups. - makeRoleGroups(g[v].groups, program); + makeRoleGroups(build, bc, v, program); makeRoleSuffix(build, bc, v, program); + makeRoleSetState(bc, v, program); + // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if + // the program doesn't have one already). + makeRoleEagerEodReports(build, bc, v, program); + return program; } @@ -3135,10 +4067,21 @@ void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { if (build.isVirtualVertex(v)) { continue; } - // Leaf nodes don't need state indices, as they don't have successors. - if (isLeafNode(v, g)) { + + // We only need a state index if we have successors that are not + // eagerly-reported EOD vertices. + bool needs_state_index = false; + for (const auto &e : out_edges_range(v, g)) { + if (!canEagerlyReportAtEod(build, e)) { + needs_state_index = true; + break; + } + } + + if (!needs_state_index) { continue; } + /* TODO: also don't need a state index if all edges are nfa based */ bc.roleStateIndices.emplace(v, state++); } @@ -3149,9 +4092,9 @@ void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { } static -bool hasUsefulStops(const left_build_info &rbi) { +bool hasUsefulStops(const left_build_info &build) { for (u32 i = 0; i < N_CHARS; i++) { - if (rbi.stopAlphabet[i]) { + if (build.stopAlphabet[i]) { return true; } } @@ -3160,6 +4103,7 @@ bool hasUsefulStops(const left_build_info &rbi) { static void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, + const set &eager_queues, u32 leftfixBeginQueue, u32 leftfixCount, vector &leftTable, u32 *laggedRoseCount, size_t *history) { @@ -3219,6 +4163,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, DEBUG_PRINTF("mw = %u\n", lbi.transient); left.transient = verify_u8(lbi.transient); left.infix = tbi.isNonRootSuccessor(v); + left.eager = contains(eager_queues, lbi.queue); // A rose has a lagIndex if it's non-transient and we are // streaming. @@ -3249,7 +4194,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, } static -u32 addPredBlocksSingle( +void addPredBlocksSingle( map>> &predProgramLists, vector &program) { @@ -3257,6 +4202,7 @@ u32 addPredBlocksSingle( for (const auto &m : predProgramLists) { const u32 &pred_state = m.first; + assert(!m.second.empty()); auto subprog = flattenProgram(m.second); // Check our pred state. @@ -3271,7 +4217,6 @@ u32 addPredBlocksSingle( auto prog = flattenProgram(prog_blocks); program.insert(end(program), begin(prog), end(prog)); - return 0; // No iterator. } static @@ -3284,7 +4229,7 @@ u32 programLength(const vector &program) { } static -u32 addPredBlocksMulti(build_context &bc, +void addPredBlocksMulti(build_context &bc, map>> &predProgramLists, vector &program) { assert(!predProgramLists.empty()); @@ -3314,6 +4259,7 @@ u32 addPredBlocksMulti(build_context &bc, DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(), curr_offset); jump_table.push_back(curr_offset); + assert(!e.second.empty()); auto subprog = flattenProgram(e.second); if (e.first != keys.back()) { @@ -3359,31 +4305,31 @@ u32 addPredBlocksMulti(build_context &bc, } program.insert(end(program), begin(sparse_program), end(sparse_program)); - - return iter_offset; } static -u32 addPredBlocks(build_context &bc, - map>> &predProgramLists, - vector &program, - bool force_sparse_iter) { +void addPredBlocks(build_context &bc, + map>> &predProgramLists, + vector &program) { const size_t num_preds = predProgramLists.size(); if (num_preds == 0) { program = flattenProgram({program}); - return 0; // No iterator. - } else if (!force_sparse_iter && num_preds == 1) { - return addPredBlocksSingle(predProgramLists, program); - } else { - return addPredBlocksMulti(bc, predProgramLists, program); + return; } + + if (num_preds == 1) { + addPredBlocksSingle(predProgramLists, program); + return; + } + + addPredBlocksMulti(bc, predProgramLists, program); } /** * Returns the pair (program offset, sparse iter offset). */ static -pair makeSparseIterProgram(build_context &bc, +vector makeSparseIterProgram(build_context &bc, map>> &predProgramLists, const vector &root_program, const vector &pre_program) { @@ -3399,7 +4345,7 @@ pair makeSparseIterProgram(build_context &bc, // Add blocks to deal with non-root edges (triggered by sparse iterator or // mmbit_isset checks). This operation will flatten the program up to this // point. - u32 iter_offset = addPredBlocks(bc, predProgramLists, program, false); + addPredBlocks(bc, predProgramLists, program); // If we have a root program, replace the END instruction with it. Note // that the root program has already been flattened. @@ -3410,8 +4356,7 @@ pair makeSparseIterProgram(build_context &bc, program.insert(end(program), begin(root_program), end(root_program)); } - applyFinalSpecialisation(program); - return {writeProgram(bc, program), iter_offset}; + return program; } static @@ -3439,8 +4384,7 @@ void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id, } static -void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id, - vector &program) { +rose_group getFinalIdGroupsUnion(const RoseBuildImpl &build, u32 final_id) { assert(contains(build.final_id_to_literal, final_id)); const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); @@ -3448,7 +4392,13 @@ void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id, for (const auto &li : lit_infos) { groups |= li->group_mask; } + return groups; +} +static +void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id, + vector &program) { + rose_group groups = getFinalIdGroupsUnion(build, final_id); if (!groups) { return; } @@ -3497,11 +4447,7 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id, return; } - rose_group groups = 0; - for (const auto &li : lit_infos) { - groups |= li->group_mask; - } - + rose_group groups = getFinalIdGroupsUnion(build, final_id); if (!groups) { return; } @@ -3513,6 +4459,62 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id, program.push_back(move(ri)); } +static +u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + assert(!lit_vertices.empty()); + + u32 max_offset = 0; + for (const auto &v : lit_vertices) { + max_offset = max(max_offset, build.g[v].max_offset); + } + + return max_offset; +} + +static +void makeRecordAnchoredInstruction(const RoseBuildImpl &build, + build_context &bc, u32 final_id, + vector &program) { + assert(contains(build.final_id_to_literal, final_id)); + const auto &lit_ids = build.final_id_to_literal.at(final_id); + + // Must be anchored. + assert(!lit_ids.empty()); + if (build.literals.right.at(*begin(lit_ids)).table != ROSE_ANCHORED) { + return; + } + + // If this anchored literal can never match past + // floatingMinLiteralMatchOffset, we will never have to record it. + u32 max_offset = 0; + for (u32 lit_id : lit_ids) { + assert(build.literals.right.at(lit_id).table == ROSE_ANCHORED); + max_offset = max(max_offset, findMaxOffset(build, lit_id)); + } + + if (max_offset <= bc.floatingMinLiteralMatchOffset) { + return; + } + + auto ri = RoseInstruction(ROSE_INSTR_RECORD_ANCHORED); + ri.u.recordAnchored.id = final_id; + program.push_back(move(ri)); +} + +static +u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + assert(!lit_vertices.empty()); + + u32 min_offset = UINT32_MAX; + for (const auto &v : lit_vertices) { + min_offset = min(min_offset, build.g[v].min_offset); + } + + return min_offset; +} + static void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, u32 final_id, @@ -3536,22 +4538,36 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, return; } - size_t min_offset = SIZE_MAX; + size_t min_len = SIZE_MAX; + u32 min_offset = UINT32_MAX; for (u32 lit_id : lit_ids) { const auto &lit = build.literals.right.at(lit_id); - min_offset = min(min_offset, lit.elength()); + size_t lit_min_len = lit.elength(); + u32 lit_min_offset = findMinOffset(build, lit_id); + DEBUG_PRINTF("lit_id=%u has min_len=%zu, min_offset=%u\n", lit_id, + lit_min_len, lit_min_offset); + min_len = min(min_len, lit_min_len); + min_offset = min(min_offset, lit_min_offset); } - DEBUG_PRINTF("%zu lits, min_offset=%zu\n", lit_ids.size(), min_offset); + DEBUG_PRINTF("final_id=%u has min_len=%zu, min_offset=%u, " + "global min is %u\n", final_id, min_len, min_offset, + bc.floatingMinLiteralMatchOffset); // If we can't match before the min offset, we don't need the check. - if (min_offset >= bc.floatingMinLiteralMatchOffset) { + if (min_len >= bc.floatingMinLiteralMatchOffset) { DEBUG_PRINTF("no need for check, min is %u\n", - bc.floatingMinLiteralMatchOffset); + bc.floatingMinLiteralMatchOffset); return; } - program.push_back(RoseInstruction(ROSE_INSTR_CHECK_LIT_EARLY)); + assert(min_offset >= bc.floatingMinLiteralMatchOffset); + assert(min_offset < UINT32_MAX); + + DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_LIT_EARLY); + ri.u.checkLitEarly.min_offset = min_offset; + program.push_back(move(ri)); } static @@ -3601,8 +4617,9 @@ vector buildLitInitialProgram(RoseBuildImpl &build, } static -u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, - const vector &lit_edges) { +vector buildLiteralProgram(RoseBuildImpl &build, + build_context &bc, u32 final_id, + const vector &lit_edges) { const auto &g = build.g; DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size()); @@ -3621,6 +4638,9 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, assert(contains(bc.roleStateIndices, u)); u32 pred_state = bc.roleStateIndices.at(u); auto program = makeProgram(build, bc, e); + if (program.empty()) { + continue; + } predProgramLists[pred_state].push_back(program); } @@ -3639,10 +4659,18 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, root_programs.push_back(role_prog); } - // Literal may squash groups. if (final_id != MO_INVALID_IDX) { - root_programs.push_back({}); - makeGroupSquashInstruction(build, final_id, root_programs.back()); + vector prog; + + // Literal may squash groups. + makeGroupSquashInstruction(build, final_id, prog); + + // Literal may be anchored and need to be recorded. + makeRecordAnchoredInstruction(build, bc, final_id, prog); + + if (!prog.empty()) { + root_programs.push_back(move(prog)); + } } vector root_program; @@ -3654,7 +4682,19 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, // Put it all together. return makeSparseIterProgram(bc, predProgramLists, root_program, - pre_program).first; + pre_program); +} + +static +u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, + const vector &lit_edges) { + auto program = buildLiteralProgram(build, bc, final_id, lit_edges); + if (program.empty()) { + return 0; + } + // Note: already flattened. + applyFinalSpecialisation(program); + return writeProgram(bc, program); } static @@ -3720,53 +4760,84 @@ pair buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { const u32 num_literals = build.final_id_to_literal.size(); auto lit_edge_map = findEdgesByLiteral(build); - vector litPrograms(num_literals); + bc.litPrograms.resize(num_literals); vector delayRebuildPrograms(num_literals); for (u32 finalId = 0; finalId != num_literals; ++finalId) { const auto &lit_edges = lit_edge_map[finalId]; - litPrograms[finalId] = - buildLiteralProgram(build, bc, finalId, lit_edges); + bc.litPrograms[finalId] = + writeLiteralProgram(build, bc, finalId, lit_edges); delayRebuildPrograms[finalId] = buildDelayRebuildProgram(build, bc, finalId); } u32 litProgramsOffset = - add_to_engine_blob(bc, begin(litPrograms), end(litPrograms)); + add_to_engine_blob(bc, begin(bc.litPrograms), end(bc.litPrograms)); u32 delayRebuildProgramsOffset = add_to_engine_blob( bc, begin(delayRebuildPrograms), end(delayRebuildPrograms)); return {litProgramsOffset, delayRebuildProgramsOffset}; } +/** + * \brief Returns all reports used by output-exposed engines, for which we need + * to generate programs. + */ static -u32 buildReportPrograms(RoseBuildImpl &build, build_context &bc) { - const auto &rm = build.rm; - const u32 numReports = verify_u32(rm.numReports()); - vector programs(numReports); +set findEngineReports(const RoseBuildImpl &build) { + set reports; + + // The small write engine uses these engine report programs. + insert(&reports, build.smwr.all_reports()); + + for (const auto &outfix : build.outfixes) { + insert(&reports, all_reports(outfix)); + } + + const auto &g = build.g; + for (auto v : vertices_range(g)) { + if (g[v].suffix) { + insert(&reports, all_reports(g[v].suffix)); + } + } + + DEBUG_PRINTF("%zu engine reports (of %zu)\n", reports.size(), + build.rm.numReports()); + return reports; +} + +static +pair buildReportPrograms(RoseBuildImpl &build, build_context &bc) { + const auto reports = findEngineReports(build); + vector programs; + programs.reserve(reports.size()); vector program; - for (ReportID id = 0; id < numReports; id++) { + for (ReportID id : reports) { program.clear(); const bool has_som = false; makeCatchupMpv(build, bc, id, program); makeReport(build, id, has_som, program); program = flattenProgram({program}); applyFinalSpecialisation(program); - programs[id] = writeProgram(bc, program); - build.rm.setProgramOffset(id, programs[id]); + u32 offset = writeProgram(bc, program); + programs.push_back(offset); + build.rm.setProgramOffset(id, offset); DEBUG_PRINTF("program for report %u @ %u (%zu instructions)\n", id, programs.back(), program.size()); } - return add_to_engine_blob(bc, begin(programs), end(programs)); + u32 offset = add_to_engine_blob(bc, begin(programs), end(programs)); + u32 count = verify_u32(programs.size()); + return {offset, count}; } static vector makeEodAnchorProgram(RoseBuildImpl &build, build_context &bc, - const RoseEdge &e) { + const RoseEdge &e, + const bool multiple_preds) { const RoseGraph &g = build.g; const RoseVertex v = target(e, g); @@ -3776,7 +4847,7 @@ vector makeEodAnchorProgram(RoseBuildImpl &build, makeRoleCheckBounds(build, v, e, program); } - if (hasGreaterInDegree(1, v, g)) { + if (multiple_preds) { // Only necessary when there is more than one pred. makeRoleCheckNotHandled(bc, v, program); } @@ -3792,11 +4863,34 @@ vector makeEodAnchorProgram(RoseBuildImpl &build, return program; } -/** - * Returns the pair (program offset, sparse iter offset). - */ static -pair buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) { +bool hasEodAnchoredSuffix(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (g[v].suffix && build.isInETable(v)) { + DEBUG_PRINTF("vertex %zu is in eod table and has a suffix\n", + g[v].idx); + return true; + } + } + return false; +} + +static +bool hasEodMatcher(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (build.isInETable(v)) { + DEBUG_PRINTF("vertex %zu is in eod table\n", g[v].idx); + return true; + } + } + return false; +} + +static +void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, + bool in_etable, vector &program) { const RoseGraph &g = build.g; // pred state id -> list of programs @@ -3810,37 +4904,50 @@ pair buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) { DEBUG_PRINTF("vertex %zu (with %zu preds) fires on EOD\n", g[v].idx, in_degree(v, g)); + vector edge_list; for (const auto &e : in_edges_range(v, g)) { RoseVertex u = source(e, g); + if (build.isInETable(u) != in_etable) { + DEBUG_PRINTF("pred %zu %s in etable\n", g[u].idx, + in_etable ? "is not" : "is"); + continue; + } + if (canEagerlyReportAtEod(build, e)) { + DEBUG_PRINTF("already done report for vertex %zu\n", g[u].idx); + continue; + } + edge_list.push_back(e); + } + const bool multiple_preds = edge_list.size() > 1; + for (const auto &e : edge_list) { + RoseVertex u = source(e, g); assert(contains(bc.roleStateIndices, u)); u32 predStateIdx = bc.roleStateIndices.at(u); - auto program = makeEodAnchorProgram(build, bc, e); - predProgramLists[predStateIdx].push_back(program); + auto prog = makeEodAnchorProgram(build, bc, e, multiple_preds); + if (prog.empty()) { + continue; + } + predProgramLists[predStateIdx].push_back(prog); } } if (predProgramLists.empty()) { - DEBUG_PRINTF("no eod anchored roles\n"); - return {0, 0}; + return; } - - vector program; - - // Note: we force the use of a sparse iterator for the EOD program so we - // can easily guard EOD execution at runtime. - u32 iter_offset = addPredBlocks(bc, predProgramLists, program, true); - - assert(program.size() > 1); - applyFinalSpecialisation(program); - return {writeProgram(bc, program), iter_offset}; + if (!program.empty()) { + assert(program.back().code() == ROSE_INSTR_END); + program.pop_back(); + } + addPredBlocks(bc, predProgramLists, program); } static -u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) { +void addEodEventProgram(RoseBuildImpl &build, build_context &bc, + vector &program) { if (build.eod_event_literal_id == MO_INVALID_IDX) { - return 0; + return; } const RoseGraph &g = build.g; @@ -3864,7 +4971,80 @@ u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) { tie(g[source(b, g)].idx, g[target(b, g)].idx); }); - return buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list); + auto prog = buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list); + program.insert(end(program), begin(prog), end(prog)); +} + +static +void addEnginesEodProgram(u32 eodNfaIterOffset, + vector &program) { + if (!eodNfaIterOffset) { + return; + } + + auto ri = RoseInstruction(ROSE_INSTR_ENGINES_EOD); + ri.u.enginesEod.iter_offset = eodNfaIterOffset; + if (!program.empty()) { + assert(program.back().code() == ROSE_INSTR_END); + program.pop_back(); + } + program.push_back(move(ri)); + program.emplace_back(ROSE_INSTR_END); +} + +static +void addSuffixesEodProgram(const RoseBuildImpl &build, + vector &program) { + if (!hasEodAnchoredSuffix(build)) { + return; + } + + if (!program.empty()) { + assert(program.back().code() == ROSE_INSTR_END); + program.pop_back(); + } + program.emplace_back(ROSE_INSTR_SUFFIXES_EOD); + program.emplace_back(ROSE_INSTR_END); +} + +static +void addMatcherEodProgram(const RoseBuildImpl &build, + vector &program) { + if (!hasEodMatcher(build)) { + return; + } + + if (!program.empty()) { + assert(program.back().code() == ROSE_INSTR_END); + program.pop_back(); + } + program.emplace_back(ROSE_INSTR_MATCHER_EOD); + program.emplace_back(ROSE_INSTR_END); +} + +static +u32 writeEodProgram(RoseBuildImpl &build, build_context &bc, + u32 eodNfaIterOffset) { + vector program; + + addEodEventProgram(build, bc, program); + addEnginesEodProgram(eodNfaIterOffset, program); + addEodAnchorProgram(build, bc, false, program); + addMatcherEodProgram(build, program); + addEodAnchorProgram(build, bc, true, program); + addSuffixesEodProgram(build, program); + + if (program.size() == 1) { + assert(program.back().code() == ROSE_INSTR_END); + return 0; + } + + if (program.empty()) { + return 0; + } + + applyFinalSpecialisation(program); + return writeProgram(bc, program); } static @@ -3900,6 +5080,9 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { u32 max_d = g[v].max_offset; u32 min_d = g[v].min_offset; + DEBUG_PRINTF("checking %u: elen %zu min/max %u/%u\n", lit_id, + key.elength_including_mask(), min_d, max_d); + if (build.literal_info[lit_id].undelayed_id != lit_id) { /* this is a delayed match; need to update delay properties */ /* TODO: can delayed literals ever be in another table ? */ @@ -3919,9 +5102,9 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { switch (key.table) { case ROSE_FLOATING: ENSURE_AT_LEAST(&engine->floatingDistance, max_d); - if (min_d >= key.elength()) { + if (min_d >= key.elength_including_mask()) { LIMIT_TO_AT_MOST(&engine->floatingMinDistance, - min_d - (u32)key.elength()); + min_d - (u32)key.elength_including_mask()); } else { /* overlapped literals from rose + anchored table can * cause us to underflow due to sloppiness in @@ -3965,6 +5148,60 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { } } +static +u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, + u32 queue_count, + build_context &bc) { + if (eager.empty()) { + return 0; + } + + vector vec; + for (u32 q : eager) { + assert(q >= leftfixBeginQueue); + vec.push_back(q - leftfixBeginQueue); + } + + vector iter; + mmbBuildSparseIterator(iter, vec, queue_count - leftfixBeginQueue); + return addIteratorToTable(bc, iter); +} + +static +aligned_unique_ptr addSmallWriteEngine(RoseBuildImpl &build, + aligned_unique_ptr rose) { + assert(rose); + + if (roseIsPureLiteral(rose.get())) { + DEBUG_PRINTF("pure literal case, not adding smwr\n"); + return rose; + } + + u32 qual = roseQuality(rose.get()); + auto smwr_engine = build.smwr.build(qual); + if (!smwr_engine) { + DEBUG_PRINTF("no smwr built\n"); + return rose; + } + + const size_t mainSize = roseSize(rose.get()); + const size_t smallWriteSize = smwrSize(smwr_engine.get()); + DEBUG_PRINTF("adding smwr engine, size=%zu\n", smallWriteSize); + + const size_t smwrOffset = ROUNDUP_CL(mainSize); + const size_t newSize = smwrOffset + smallWriteSize; + + auto rose2 = aligned_zmalloc_unique(newSize); + char *ptr = (char *)rose2.get(); + memcpy(ptr, rose.get(), mainSize); + memcpy(ptr + smwrOffset, smwr_engine.get(), smallWriteSize); + + rose2->smallWriteOffset = verify_u32(smwrOffset); + rose2->size = verify_u32(newSize); + + return rose2; +} + aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { DerivedBoundaryReports dboundary(boundary); @@ -3981,10 +5218,15 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { bc.resources.has_anchored = true; } bc.needs_mpv_catchup = needsMpvCatchup(*this); + bc.vertex_group_map = getVertexGroupMap(*this); + bc.squashable_groups = getSquashableGroups(*this); auto boundary_out = makeBoundaryPrograms(*this, bc, boundary, dboundary); - u32 reportProgramOffset = buildReportPrograms(*this, bc); + u32 reportProgramOffset; + u32 reportProgramCount; + tie(reportProgramOffset, reportProgramCount) = + buildReportPrograms(*this, bc); // Build NFAs set no_retrigger_queues; @@ -3997,7 +5239,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 outfixEndQueue = qif.allocated_count(); u32 leftfixBeginQueue = outfixEndQueue; - if (!buildNfas(*this, bc, qif, &no_retrigger_queues, + set eager_queues; + + /* Note: buildNfas may reduce the lag for vertices that have prefixes */ + if (!buildNfas(*this, bc, qif, &no_retrigger_queues, &eager_queues, &leftfixBeginQueue)) { return nullptr; } @@ -4017,7 +5262,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 laggedRoseCount = 0; vector leftInfoTable; - buildLeftInfoTable(*this, bc, leftfixBeginQueue, + buildLeftInfoTable(*this, bc, eager_queues, leftfixBeginQueue, queue_count - leftfixBeginQueue, leftInfoTable, &laggedRoseCount, &historyRequired); @@ -4026,15 +5271,14 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { tie(litProgramOffset, litDelayRebuildProgramOffset) = buildLiteralPrograms(*this, bc); - u32 eodProgramOffset = writeEodProgram(*this, bc); - u32 eodIterProgramOffset; - u32 eodIterOffset; - tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc); + u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset); vector activeLeftIter; buildActiveLeftIter(leftInfoTable, activeLeftIter); u32 lastByteOffset = buildLastByteIter(g, bc); + u32 eagerIterOffset = buildEagerQueueIter(eager_queues, leftfixBeginQueue, + queue_count, bc); // Enforce role table resource limit. if (num_vertices(g) > cc.grey.limitRoseRoleCount) { @@ -4057,7 +5301,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build anchored matcher. size_t asize = 0; u32 amatcherOffset = 0; - auto atable = buildAnchoredMatcher(*this, anchored_dfas, &asize); + auto atable = buildAnchoredMatcher(*this, anchored_dfas, bc.litPrograms, + &asize); if (atable) { currOffset = ROUNDUP_CL(currOffset); amatcherOffset = currOffset; @@ -4065,9 +5310,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { } // Build floating HWLM matcher. + rose_group fgroups = 0; size_t fsize = 0; size_t floatingStreamStateRequired = 0; - auto ftable = buildFloatingMatcher(*this, &fsize, &historyRequired, + auto ftable = buildFloatingMatcher(*this, &fgroups, &fsize, &historyRequired, &floatingStreamStateRequired); u32 fmatcherOffset = 0; if (ftable) { @@ -4200,17 +5446,16 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->litProgramOffset = litProgramOffset; engine->litDelayRebuildProgramOffset = litDelayRebuildProgramOffset; engine->reportProgramOffset = reportProgramOffset; - engine->reportProgramCount = verify_u32(rm.reports().size()); + engine->reportProgramCount = reportProgramCount; engine->runtimeImpl = pickRuntimeImpl(*this, bc, outfixEndQueue); engine->mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); engine->activeArrayCount = activeArrayCount; engine->activeLeftCount = activeLeftCount; engine->queueCount = queue_count; + engine->eagerIterOffset = eagerIterOffset; engine->handledKeyCount = bc.handledKeys.size(); - engine->group_weak_end = group_weak_end; - engine->rolesWithStateCount = bc.numStates; engine->leftOffset = leftOffset; @@ -4226,9 +5471,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->nfaInfoOffset = nfaInfoOffset; engine->eodProgramOffset = eodProgramOffset; - engine->eodIterProgramOffset = eodIterProgramOffset; - engine->eodIterOffset = eodIterOffset; - engine->eodNfaIterOffset = eodNfaIterOffset; engine->lastByteHistoryIterOffset = lastByteOffset; @@ -4282,6 +5524,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { fillMatcherDistances(*this, engine.get()); engine->initialGroups = getInitialGroups(); + engine->floating_group_mask = fgroups; engine->totalNumLiterals = verify_u32(literal_info.size()); engine->asize = verify_u32(asize); engine->ematcherRegionSize = ematcher_region_size; @@ -4315,6 +5558,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // after we copied it into the engine bytecode. assert(byte_length(bc.engine_blob) == engineBlobSize); + // Add a small write engine if appropriate. + engine = addSmallWriteEngine(*this, move(engine)); + DEBUG_PRINTF("rose done %p\n", engine.get()); return engine; } diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 12500599..3f82a9cc 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -34,6 +34,8 @@ #include "rose_build_castle.h" #include "rose_build_convert.h" #include "rose_build_dump.h" +#include "rose_build_groups.h" +#include "rose_build_matchers.h" #include "rose_build_merge.h" #include "rose_build_role_aliasing.h" #include "rose_build_util.h" @@ -68,7 +70,6 @@ #include #include #include -#include #include #include #include @@ -77,65 +78,16 @@ #include using namespace std; -using boost::adaptors::map_keys; using boost::adaptors::map_values; namespace ue2 { -#define ROSE_LONG_LITERAL_LEN 8 - #define ANCHORED_REHOME_MIN_FLOATING 800 #define ANCHORED_REHOME_MIN_FLOATING_SHORT 50 #define ANCHORED_REHOME_ALLOW_SHORT 20 #define ANCHORED_REHOME_DEEP 25 #define ANCHORED_REHOME_SHORT_LEN 3 -static -bool superStrong(const rose_literal_id &lit) { - if (lit.s.length() < ROSE_LONG_LITERAL_LEN) { - return false; - } - - const u32 EXPECTED_FDR_BUCKET_LENGTH = 8; - - assert(lit.s.length() >= EXPECTED_FDR_BUCKET_LENGTH); - size_t len = lit.s.length(); - const string &s = lit.s.get_string(); - - for (size_t i = 1; i < EXPECTED_FDR_BUCKET_LENGTH; i++) { - if (s[len - 1 - i] != s[len - 1]) { - return true; /* we have at least some variation in the tail */ - } - } - DEBUG_PRINTF("lit '%s' is not superstrong due to tail\n", - escapeString(s).c_str()); - return false; -} - -rose_group RoseBuildImpl::getGroups(RoseVertex v) const { - rose_group groups = 0; - - for (u32 id : g[v].literals) { - u32 lit_id = literal_info.at(id).undelayed_id; - - rose_group mygroups = literal_info[lit_id].group_mask; - groups |= mygroups; - } - - return groups; -} - -/** \brief Get the groups of the successor literals of a given vertex. */ -rose_group RoseBuildImpl::getSuccGroups(RoseVertex start) const { - rose_group initialGroups = 0; - - for (auto v : adjacent_vertices_range(start, g)) { - initialGroups |= getGroups(v); - } - - return initialGroups; -} - #ifdef DEBUG static UNUSED void printLitInfo(const rose_literal_info &li, u32 id) { @@ -481,6 +433,9 @@ RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) { // If the bounds are {0,0}, this role can only match precisely at EOD. if (minBound == 0 && maxBound == 0) { + /* last byte history will squash the state byte so cannot have other + * succ */ + assert(out_degree(u, g) == 1); return ROSE_ROLE_HISTORY_LAST_BYTE; } @@ -501,7 +456,8 @@ RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) { return ROSE_ROLE_HISTORY_NONE; } - if (g[u].fixedOffset()) { + if (g[u].fixedOffset() && + (g[e].minBound || g[e].maxBound != ROSE_BOUND_INF)) { DEBUG_PRINTF("fixed offset -> anch\n"); return ROSE_ROLE_HISTORY_ANCH; } @@ -555,8 +511,8 @@ bool RoseBuildImpl::isDirectReport(u32 id) const { } // Use the program to handle cases that aren't external reports. - for (const ReportID &id : g[v].reports) { - if (!isExternalReport(rm.getReport(id))) { + for (const ReportID &rid : g[v].reports) { + if (!isExternalReport(rm.getReport(rid))) { return false; } } @@ -585,6 +541,45 @@ bool RoseBuildImpl::isDirectReport(u32 id) const { return true; } + +/* If we have prefixes that can squash all the floating roots, we can have a + * somewhat-conditional floating table. As we can't yet look at squash_masks, we + * have to make some guess as to if we are in this case but the win for not + * running a floating table over a large portion of the stream is significantly + * larger than avoiding running an eod table over the last N bytes. */ +static +bool checkFloatingKillableByPrefixes(const RoseBuildImpl &tbi) { + for (auto v : vertices_range(tbi.g)) { + if (!tbi.isRootSuccessor(v)) { + continue; + } + + if (!tbi.isFloating(v)) { + continue; + } + + if (!tbi.g[v].left) { + DEBUG_PRINTF("unguarded floating root\n"); + return false; + } + + if (tbi.g[v].left.graph) { + const NGHolder &h = *tbi.g[v].left.graph; + if (proper_out_degree(h.startDs, h)) { + DEBUG_PRINTF("floating nfa prefix, won't die\n"); + return false; + } + } else if (tbi.g[v].left.dfa) { + if (tbi.g[v].left.dfa->start_floating != DEAD_STATE) { + DEBUG_PRINTF("floating dfa prefix, won't die\n"); + return false; + } + } + } + + return true; +} + static bool checkEodStealFloating(const RoseBuildImpl &tbi, const vector &eodLiteralsForFloating, @@ -606,6 +601,11 @@ bool checkEodStealFloating(const RoseBuildImpl &tbi, return false; } + if (checkFloatingKillableByPrefixes(tbi)) { + DEBUG_PRINTF("skipping as prefixes may make ftable conditional\n"); + return false; + } + DEBUG_PRINTF("%zu are eod literals, %u floating; floating len=%zu\n", eodLiteralsForFloating.size(), numFloatingLiterals, shortestFloatingLen); @@ -862,274 +862,6 @@ bool RoseBuildImpl::hasFinalId(u32 id) const { return literal_info.at(id).final_id != MO_INVALID_IDX; } -static -bool eligibleForAlwaysOnGroup(const RoseBuildImpl &tbi, u32 id) { - /* returns true if it or any of its delay versions have root role */ - for (auto v : tbi.literal_info[id].vertices) { - if (tbi.isRootSuccessor(v)) { - NGHolder *h = tbi.g[v].left.graph.get(); - if (!h || proper_out_degree(h->startDs, *h)) { - return true; - } - } - } - - for (u32 delayed_id : tbi.literal_info[id].delayed_ids) { - for (auto v : tbi.literal_info[delayed_id].vertices) { - if (tbi.isRootSuccessor(v)) { - NGHolder *h = tbi.g[v].left.graph.get(); - if (!h || proper_out_degree(h->startDs, *h)) { - return true; - } - } - } - } - - return false; -} - -static -bool requires_group_assignment(const rose_literal_id &lit, - const rose_literal_info &info) { - if (lit.delay) { /* we will check the shadow's master */ - return false; - } - - if (lit.table == ROSE_ANCHORED || lit.table == ROSE_EVENT) { - return false; - } - - // If we already have a group applied, skip. - if (info.group_mask) { - return false; - } - - if (info.vertices.empty() && info.delayed_ids.empty()) { - DEBUG_PRINTF("literal is good for nothing\n"); - return false; - } - - return true; -} - -static -rose_group calcLocalGroup(const RoseVertex v, const RoseGraph &g, - const deque &literal_info, - const bool small_literal_count) { - rose_group local_group = 0; - - for (auto u : inv_adjacent_vertices_range(v, g)) { - /* In small cases, ensure that siblings have the same rose parentage to - * allow rose squashing. In larger cases, don't do this as groups are - * probably too scarce. */ - for (auto w : adjacent_vertices_range(u, g)) { - if (!small_literal_count || g[v].left == g[w].left) { - for (u32 lit_id : g[w].literals) { - local_group |= literal_info[lit_id].group_mask; - } - } else { - DEBUG_PRINTF("not sibling different mother %zu %zu\n", - g[v].idx, g[w].idx); - } - } - } - - return local_group; -} - -/* group constants */ -#define MAX_LIGHT_LITERAL_CASE 200 /* allow rose to affect group decisions below - * this */ - -static -flat_set getAssociatedVertices(const RoseBuildImpl &build, u32 id) { - flat_set out; - const auto &info = build.literal_info[id]; - insert(&out, info.vertices); - for (const auto &delayed : info.delayed_ids) { - insert(&out, build.literal_info[delayed].vertices); - } - return out; -} - -static -u32 next_available_group(u32 counter, u32 min_start_group) { - counter++; - if (counter == ROSE_GROUPS_MAX) { - DEBUG_PRINTF("resetting groups\n"); - counter = min_start_group; - } - - return counter; -} - -// Assigns groups to literals in the general case, when we have more literals -// than available groups. -void RoseBuildImpl::assignGroupsToLiterals() { - bool small_literal_count = literal_info.size() <= MAX_LIGHT_LITERAL_CASE; - - map groupCount; /* group index to number of members */ - - u32 counter = 0; - u32 group_always_on = 0; - - // First pass: handle always on literals. - for (const auto &e : literals.right) { - u32 id = e.first; - const rose_literal_id &lit = e.second; - rose_literal_info &info = literal_info[id]; - - if (!requires_group_assignment(lit, info)) { - continue; - } - - // If this literal has a root role, we always have to search for it - // anyway, so it goes in the always-on group. - /* We could end up squashing it if it is followed by a .* */ - if (eligibleForAlwaysOnGroup(*this, id)) { - info.group_mask = 1ULL << group_always_on; - groupCount[group_always_on]++; - continue; - } - } - - u32 group_long_lit; - if (groupCount[group_always_on]) { - DEBUG_PRINTF("%u always on literals\n", groupCount[group_always_on]); - group_long_lit = group_always_on; - counter++; - } else { - group_long_lit = counter; - counter++; - } - - u32 min_start_group = counter; - priority_queue, u32> > pq; - - // Second pass: the other literals. - for (const auto &e : literals.right) { - u32 id = e.first; - const rose_literal_id &lit = e.second; - rose_literal_info &info = literal_info[id]; - - if (!requires_group_assignment(lit, info)) { - continue; - } - - assert(!eligibleForAlwaysOnGroup(*this, id)); - pq.push(make_pair(make_pair(-(s32)literal_info[id].vertices.size(), - -(s32)lit.s.length()), id)); - } - - vector long_lits; - while (!pq.empty()) { - u32 id = pq.top().second; - pq.pop(); - UNUSED const rose_literal_id &lit = literals.right.at(id); - DEBUG_PRINTF("assigning groups to lit %u (v %zu l %zu)\n", id, - literal_info[id].vertices.size(), lit.s.length()); - - u8 group_id = 0; - rose_group group = ~0ULL; - for (auto v : getAssociatedVertices(*this, id)) { - rose_group local_group = calcLocalGroup(v, g, literal_info, - small_literal_count); - group &= local_group; - if (!group) { - break; - } - } - - if (group == ~0ULL) { - goto boring; - } - - group &= ~((1ULL << min_start_group) - 1); /* ensure the purity of the - * always_on groups */ - if (!group) { - goto boring; - } - - group_id = ctz64(group); - - /* TODO: fairness */ - DEBUG_PRINTF("picking sibling group %hhd\n", group_id); - literal_info[id].group_mask = 1ULL << group_id; - groupCount[group_id]++; - - continue; - - boring: - /* long literals will either be stuck in a mega group or spread around - * depending on availability */ - if (superStrong(lit)) { - long_lits.push_back(id); - continue; - } - - // Other literals are assigned to our remaining groups round-robin. - group_id = counter; - - DEBUG_PRINTF("picking boring group %hhd\n", group_id); - literal_info[id].group_mask = 1ULL << group_id; - groupCount[group_id]++; - counter = next_available_group(counter, min_start_group); - } - - /* spread long literals out amongst unused groups if any, otherwise stick - * them in the always on the group */ - - if (groupCount[counter]) { - DEBUG_PRINTF("sticking long literals in the image of the always on\n"); - for (u32 lit_id : long_lits) { - literal_info[lit_id].group_mask = 1ULL << group_long_lit; - groupCount[group_long_lit]++; - } - } else { - u32 min_long_counter = counter; - DEBUG_PRINTF("base long lit group = %u\n", min_long_counter); - for (u32 lit_id : long_lits) { - u8 group_id = counter; - literal_info[lit_id].group_mask = 1ULL << group_id; - groupCount[group_id]++; - counter = next_available_group(counter, min_long_counter); - } - } - - /* assign delayed literals to the same group as their parent */ - for (const auto &e : literals.right) { - u32 id = e.first; - const rose_literal_id &lit = e.second; - - if (!lit.delay) { - continue; - } - - u32 parent = literal_info[id].undelayed_id; - DEBUG_PRINTF("%u is shadow picking up groups from %u\n", id, parent); - assert(literal_info[parent].undelayed_id == parent); - assert(literal_info[parent].group_mask); - literal_info[id].group_mask = literal_info[parent].group_mask; - /* don't increment the group count - these don't really exist */ - } - - DEBUG_PRINTF("populate group to literal mapping\n"); - for (const u32 id : literals.right | map_keys) { - rose_group groups = literal_info[id].group_mask; - while (groups) { - u32 group_id = findAndClearLSB_64(&groups); - group_to_literal[group_id].insert(id); - } - } - - /* find how many groups we allocated */ - for (u32 i = 0; i < ROSE_GROUPS_MAX; i++) { - if (groupCount[i]) { - group_end = MAX(group_end, i + 1); - } - } -} - bool RoseBuildImpl::hasDelayedLiteral(RoseVertex v) const { for (u32 lit_id : g[v].literals) { if (literals.right.at(lit_id).delay) { @@ -1160,213 +892,6 @@ bool RoseBuildImpl::hasAnchoredTablePred(RoseVertex v) const { return false; } -/* returns true if every vertex associated with a groups also belongs to - lit_info */ -static -bool coversGroup(const RoseBuildImpl &tbi, const rose_literal_info &lit_info) { - if (lit_info.vertices.empty()) { - DEBUG_PRINTF("no vertices - does not cover\n"); - return false; - } - - if (!lit_info.group_mask) { - DEBUG_PRINTF("no group - does not cover\n"); - return false; /* no group (not a floating lit?) */ - } - - assert(popcount64(lit_info.group_mask) == 1); - - /* for each lit in group, ensure that vertices are a subset of lit_info's */ - rose_group groups = lit_info.group_mask; - while (groups) { - u32 group_id = findAndClearLSB_64(&groups); - for (u32 id : tbi.group_to_literal.at(group_id)) { - DEBUG_PRINTF(" checking against friend %u\n", id); - if (!is_subset_of(tbi.literal_info[id].vertices, - lit_info.vertices)) { - DEBUG_PRINTF("fail\n"); - return false; - } - } - } - - DEBUG_PRINTF("ok\n"); - return true; -} - -static -bool isGroupSquasher(const RoseBuildImpl &tbi, const u32 id /* literal id */, - rose_group forbidden_squash_group) { - const RoseGraph &g = tbi.g; - - const rose_literal_info &lit_info = tbi.literal_info.at(id); - - DEBUG_PRINTF("checking if %u '%s' is a group squasher %016llx\n", id, - dumpString(tbi.literals.right.at(id).s).c_str(), - lit_info.group_mask); - - if (tbi.literals.right.at(id).table == ROSE_EVENT) { - DEBUG_PRINTF("event literal, has no groups to squash\n"); - return false; - } - - if (!coversGroup(tbi, lit_info)) { - DEBUG_PRINTF("does not cover group\n"); - return false; - } - - if (lit_info.group_mask & forbidden_squash_group) { - /* probably a delayed lit */ - DEBUG_PRINTF("skipping as involves a forbidden group\n"); - return false; - } - - // Single-vertex, less constrained case than the multiple-vertex one below. - if (lit_info.vertices.size() == 1) { - const RoseVertex &v = *lit_info.vertices.begin(); - - if (tbi.hasDelayPred(v)) { /* due to rebuild issues */ - return false; - } - - /* there are two ways to be a group squasher: - * 1) only care about the first accepted match - * 2) can only match once after a pred match - * - * (2) requires analysis of the infix before v and is not implemented, - * TODO - */ - - /* Case 1 */ - - // Can't squash cases with accepts - if (!g[v].reports.empty()) { - return false; - } - - /* Can't squash cases with a suffix without analysis of the suffix. - * TODO: look at suffixes */ - if (g[v].suffix) { - return false; - } - - // Out-edges must have inf max bound, + no other shenanigans */ - for (const auto &e : out_edges_range(v, g)) { - if (g[e].maxBound != ROSE_BOUND_INF) { - return false; - } - - if (g[target(e, g)].left) { - return false; /* is an infix rose trigger, TODO: analysis */ - } - } - - DEBUG_PRINTF("%u is a path 1 group squasher\n", id); - return true; - - /* note: we could also squash the groups of its preds (if nobody else is - * using them. TODO. */ - } - - // Multiple-vertex case - for (auto v : lit_info.vertices) { - assert(!tbi.isAnyStart(v)); - - // Can't squash cases with accepts - if (!g[v].reports.empty()) { - return false; - } - - // Suffixes and leftfixes are out too as first literal may not match - // for everyone. - if (!g[v].isBoring()) { - return false; - } - - /* TODO: checks are solid but we should explain */ - if (tbi.hasDelayPred(v) || tbi.hasAnchoredTablePred(v)) { - return false; - } - - // Out-edges must have inf max bound and not directly lead to another - // vertex with this group, e.g. 'foobar.*foobar'. - for (const auto &e : out_edges_range(v, g)) { - if (g[e].maxBound != ROSE_BOUND_INF) { - return false; - } - RoseVertex t = target(e, g); - - if (g[t].left) { - return false; /* is an infix rose trigger */ - } - - for (u32 lit_id : g[t].literals) { - if (tbi.literal_info[lit_id].group_mask & lit_info.group_mask) { - return false; - } - } - } - - // In-edges must all be dot-stars with no overlap at all, as overlap - // also causes history to be used. - /* Different tables are already forbidden by previous checks */ - for (const auto &e : in_edges_range(v, g)) { - if (!(g[e].minBound == 0 && g[e].maxBound == ROSE_BOUND_INF)) { - return false; - } - - // Check overlap, if source was a literal. - RoseVertex u = source(e, g); - if (tbi.maxLiteralOverlap(u, v)) { - return false; - } - } - } - - DEBUG_PRINTF("literal %u is a multi-vertex group squasher\n", id); - return true; -} - -static -void findGroupSquashers(RoseBuildImpl &tbi) { - rose_group forbidden_squash_group = 0; - for (const auto &e : tbi.literals.right) { - if (e.second.delay) { - forbidden_squash_group |= tbi.literal_info[e.first].group_mask; - } - } - - for (u32 id = 0; id < tbi.literal_info.size(); id++) { - if (isGroupSquasher(tbi, id, forbidden_squash_group)) { - tbi.literal_info[id].squash_group = true; - } - } -} - -/** - * The groups that a role sets are determined by the union of its successor - * literals. Requires the literals already have had groups assigned. - */ -void RoseBuildImpl::assignGroupsToRoles() { - /* Note: if there is a succ literal in the sidematcher, its successors - * literals must be added instead */ - for (auto v : vertices_range(g)) { - if (isAnyStart(v)) { - continue; - } - - const rose_group succ_groups = getSuccGroups(v); - g[v].groups |= succ_groups; - - if (ghost.find(v) != ghost.end()) { - /* delayed roles need to supply their groups to the ghost role */ - g[ghost[v]].groups |= succ_groups; - } - - DEBUG_PRINTF("vertex %zu: groups=%llx\n", g[v].idx, g[v].groups); - } -} - void RoseBuildImpl::findTransientLeftfixes(void) { for (auto v : vertices_range(g)) { if (!g[v].left) { @@ -1393,19 +918,32 @@ void RoseBuildImpl::findTransientLeftfixes(void) { continue; } - u32 his = g[v].left.lag + max_width; + if (cc.streaming) { + /* STREAMING: transient prefixes must be able to run using history + * rather than storing state. */ + u32 his = g[v].left.lag + max_width; - // If this vertex has an event literal, we need to add one to cope - // with it. - if (hasLiteralInTable(v, ROSE_EVENT)) { - his++; - } + // If this vertex has an event literal, we need to add one to cope + // with it. + if (hasLiteralInTable(v, ROSE_EVENT)) { + his++; + } - /* +1 as trigger must appear in main buffer and no byte is needed to - * decompress the state */ - if (his <= cc.grey.maxHistoryAvailable + 1) { - transient.insert(left); - DEBUG_PRINTF("a transient leftfix has been spotted his=%u\n", his); + /* +1 as trigger must appear in main buffer and no byte is needed to + * decompress the state */ + if (his <= cc.grey.maxHistoryAvailable + 1) { + transient.insert(left); + DEBUG_PRINTF("a transient leftfix spotted his=%u\n", his); + } + } else { + /* BLOCK: transientness is less important and more fuzzy, ideally + * it should be quick to calculate the state. No need to worry about + * history (and hence lag). */ + if (max_width < depth(ROSE_BLOCK_TRANSIENT_MAX_WIDTH)) { + transient.insert(left); + DEBUG_PRINTF("a transient block leftfix spotted [%u]\n", + (u32)max_width); + } } } } @@ -1718,7 +1256,8 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai, assert(old_id < tbi.literal_info.size()); const rose_literal_info &li = tbi.literal_info[old_id]; - // For compile determinism, operate over literal vertices in index order. + // For compile determinism, operate over literal vertices in index + // order. vector lit_verts(begin(li.vertices), end(li.vertices)); sort(begin(lit_verts), end(lit_verts), VertexIndexComp(g)); @@ -1732,40 +1271,9 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai, g[v].max_offset = sai.max_bound + sai.literal.length(); lit_info.vertices.insert(v); - assert(!g[v].reports.empty()); - - bool doDirectReports = true; - for (ReportID report_id : g[v].reports) { - const Report &old_rep = tbi.rm.getReport(report_id); - if (!isExternalReport(old_rep) || old_rep.hasBounds()) { - doDirectReports = false; - break; - } - } - - if (doDirectReports) { - flat_set dr_reports; - for (ReportID report_id : g[v].reports) { - // These new literal roles can be made direct reports, with - // their bounds handled by the bounds on their Report - // structures. - Report rep(tbi.rm.getReport(report_id)); // copy - assert(!rep.hasBounds()); - rep.minOffset = sai.literal.length() + sai.min_bound; - rep.maxOffset = sai.literal.length() + sai.max_bound; - dr_reports.insert(tbi.rm.getInternalId(rep)); - } - g[v].reports = dr_reports; - RoseEdge e = add_edge(tbi.root, v, g).first; - g[e].minBound = 0; // handled by internal_report - g[e].maxBound = ROSE_BOUND_INF; // handled by internal_report - } else { - // If we have a complex internal report, these must become - // anchored literals with their own roles. - RoseEdge e = add_edge(anchored_root, v, g).first; - g[e].minBound = sai.min_bound; - g[e].maxBound = sai.max_bound; - } + RoseEdge e = add_edge(anchored_root, v, g).first; + g[e].minBound = sai.min_bound; + g[e].maxBound = sai.max_bound; } } } @@ -2181,8 +1689,10 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { assert(!danglingVertexRef(*this)); - assignGroupsToLiterals(); - assignGroupsToRoles(); + findMoreLiteralMasks(*this); + + assignGroupsToLiterals(*this); + assignGroupsToRoles(*this); findGroupSquashers(*this); /* final prep work */ diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index f5e99c23..1578dda1 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -651,6 +651,26 @@ CharReach getReachOfNormalVertex(const NGHolder &g) { return CharReach(); } +/** + * \brief Set the edge bounds and appropriate history on the given edge in the + * Rose graph. + */ +static +void setEdgeBounds(RoseGraph &g, const RoseEdge &e, u32 min_bound, + u32 max_bound) { + assert(min_bound <= max_bound); + assert(max_bound <= ROSE_BOUND_INF); + + g[e].minBound = min_bound; + g[e].maxBound = max_bound; + + if (min_bound || max_bound < ROSE_BOUND_INF) { + g[e].history = ROSE_ROLE_HISTORY_ANCH; + } else { + g[e].history = ROSE_ROLE_HISTORY_NONE; + } +} + static bool handleStartPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, const RoseEdge &e_old, RoseVertex ar, @@ -686,18 +706,13 @@ bool handleStartPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, if (source(e_old, g) == ar) { assert(g[e_old].minBound <= bound_min); assert(g[e_old].maxBound >= bound_max); - g[e_old].minBound = bound_min; - g[e_old].maxBound = bound_max; - g[e_old].history = ROSE_ROLE_HISTORY_ANCH; + setEdgeBounds(g, e_old, bound_min, bound_max); } else { RoseEdge e_new; UNUSED bool added; tie(e_new, added) = add_edge(ar, v, g); assert(added); - g[e_new].minBound = bound_min; - g[e_new].maxBound = bound_max; - g[e_new].history = ROSE_ROLE_HISTORY_ANCH; - + setEdgeBounds(g, e_new, bound_min, bound_max); to_delete->push_back(e_old); } @@ -751,9 +766,7 @@ bool handleStartDsPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, /* update bounds on edge */ assert(g[e].minBound <= repeatCount); - g[e].minBound = repeatCount; - g[e].maxBound = ROSE_BOUND_INF; - g[e].history = ROSE_ROLE_HISTORY_ANCH; + setEdgeBounds(g, e, repeatCount, ROSE_BOUND_INF); g[v].left.reset(); /* clear the prefix info */ @@ -893,26 +906,19 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, } if (source(e_old, g) == ar) { - g[e_old].minBound = ri.repeatMin + width; - g[e_old].maxBound = ri.repeatMax + width; - g[e_old].history = ROSE_ROLE_HISTORY_ANCH; + setEdgeBounds(g, e_old, ri.repeatMin + width, ri.repeatMax + width); } else { RoseEdge e_new; UNUSED bool added; tie(e_new, added) = add_edge(ar, v, g); assert(added); - g[e_new].minBound = ri.repeatMin + width; - g[e_new].maxBound = ri.repeatMax + width; - g[e_new].history = ROSE_ROLE_HISTORY_ANCH; - + setEdgeBounds(g, e_new, ri.repeatMin + width, ri.repeatMax + width); to_delete->push_back(e_old); } } else { assert(g[e_old].minBound <= ri.repeatMin + width); - g[e_old].minBound = ri.repeatMin + width; - g[e_old].maxBound = ROSE_BOUND_INF; - g[e_old].history = ROSE_ROLE_HISTORY_ANCH; + setEdgeBounds(g, e_old, ri.repeatMin + width, ROSE_BOUND_INF); } g[v].left.dfa.reset(); @@ -1110,19 +1116,9 @@ void convertAnchPrefixToBounds(RoseBuildImpl &tbi) { bounds.min -= delay_adj; } bounds.max -= delay_adj; - - g[e].minBound = bounds.min; - g[e].maxBound = - bounds.max.is_finite() ? (u32)bounds.max : ROSE_BOUND_INF; - - // It's possible that a (0,inf) case might sneak through here, in which - // case we don't need ANCH history at all. - if (g[e].minBound == 0 && g[e].maxBound == ROSE_BOUND_INF) { - g[e].history = ROSE_ROLE_HISTORY_NONE; - } else { - g[e].history = ROSE_ROLE_HISTORY_ANCH; - } - + setEdgeBounds(g, e, bounds.min, bounds.max.is_finite() + ? (u32)bounds.max + : ROSE_BOUND_INF); g[v].left.reset(); } } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 079dd556..5fb27c55 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -30,12 +30,13 @@ #include "rose_build_dump.h" -#include "hwlm/hwlm_build.h" #include "rose_build_impl.h" #include "rose_build_matchers.h" #include "rose/rose_dump.h" #include "rose_internal.h" #include "ue2common.h" +#include "hwlm/hwlm_build.h" +#include "nfa/castlecompile.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng_dump.h" #include "som/slot_manager_dump.h" @@ -60,24 +61,22 @@ using namespace std; namespace ue2 { -static -string to_string(nfa_kind k) { - switch (k) { - case NFA_PREFIX: - return "p"; - case NFA_INFIX: - return "i"; - case NFA_SUFFIX: - return "s"; - case NFA_OUTFIX: - return "o"; - case NFA_REV_PREFIX: - return "r"; - case NFA_OUTFIX_RAW: - return "O"; +/** \brief Return the kind of a left_id or a suffix_id. */ +template +string render_kind(const Graph &g) { + if (g.graph()) { + return to_string(g.graph()->kind); } - assert(0); - return "?"; + if (g.dfa()) { + return to_string(g.dfa()->kind); + } + if (g.haig()) { + return to_string(g.haig()->kind); + } + if (g.castle()) { + return to_string(g.castle()->kind); + } + return "UNKNOWN"; } namespace { @@ -130,22 +129,12 @@ public: } if (g[v].suffix) { - os << "\\nSUFFIX (TOP " << g[v].suffix.top; - // Can't dump the queue number, but we can identify the suffix. - if (g[v].suffix.graph) { - os << ", graph=" << g[v].suffix.graph.get() << " " - << to_string(g[v].suffix.graph->kind); + suffix_id suff(g[v].suffix); + os << "\\n" << render_kind(suff) << " (top " << g[v].suffix.top; + auto it = build.suffix_queue_map.find(suff); + if (it != end(build.suffix_queue_map)) { + os << ", queue " << it->second; } - if (g[v].suffix.castle) { - os << ", castle=" << g[v].suffix.castle.get(); - } - if (g[v].suffix.rdfa) { - os << ", dfa=" << g[v].suffix.rdfa.get(); - } - if (g[v].suffix.haig) { - os << ", haig=" << g[v].suffix.haig.get(); - } - os << ")"; } @@ -154,15 +143,15 @@ public: } if (g[v].left) { - const char *roseKind = - build.isRootSuccessor(v) ? "PREFIX" : "INFIX"; - os << "\\nROSE " << roseKind; - os << " ("; - os << "report " << g[v].left.leftfix_report << ")"; - - if (g[v].left.graph) { - os << " " << to_string(g[v].left.graph->kind); + left_id left(g[v].left); + os << "\\n" << render_kind(left) << " (queue "; + auto it = build.leftfix_queue_map.find(left); + if (it != end(build.leftfix_queue_map)) { + os << it->second; + } else { + os << "??"; } + os << ", report " << g[v].left.leftfix_report << ")"; } os << "\""; @@ -262,14 +251,18 @@ void dumpRoseGraph(const RoseBuild &build_base, const RoseEngine *t, const RoseBuildImpl &build = dynamic_cast(build_base); const Grey &grey = build.cc.grey; - if (!grey.dumpFlags) { + + /* "early" rose graphs should only be dumped if we are dumping intermediate + * graphs. Early graphs can be identified by the lack of a RoseEngine. */ + u32 flag_test = t ? Grey::DUMP_IMPL : Grey::DUMP_INT_GRAPH; + + if (!(grey.dumpFlags & flag_test)) { return; } stringstream ss; ss << grey.dumpPath << filename; - DEBUG_PRINTF("dumping graph to %s\n", ss.str().c_str()); ofstream os(ss.str()); @@ -447,18 +440,6 @@ void dumpTestLiterals(const string &filename, const vector &lits) { of.close(); } -namespace { -struct LongerThanLimit { - explicit LongerThanLimit(size_t len) : max_len(len) {} - bool operator()(const hwlmLiteral &lit) const { - return lit.s.length() > max_len; - } - - private: - size_t max_len; -}; -} - static void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED); @@ -470,13 +451,14 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED); dumpTestLiterals(base + "rose_eod_test_literals.txt", lits); - lits = fillHamsterLiteralList(build, ROSE_FLOATING); - auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK); - lits.insert(end(lits), begin(lits2), end(lits2)); - lits.erase(remove_if(lits.begin(), lits.end(), - LongerThanLimit(ROSE_SMALL_BLOCK_LEN)), - lits.end()); - dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits); + if (!build.cc.streaming) { + lits = fillHamsterLiteralList(build, ROSE_FLOATING, + ROSE_SMALL_BLOCK_LEN); + auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, + ROSE_SMALL_BLOCK_LEN); + lits.insert(end(lits), begin(lits2), end(lits2)); + dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits); + } } void dumpRose(const RoseBuild &build_base, const RoseEngine *t, diff --git a/src/rose/rose_build_exclusive.cpp b/src/rose/rose_build_exclusive.cpp new file mode 100644 index 00000000..c9e8d215 --- /dev/null +++ b/src/rose/rose_build_exclusive.cpp @@ -0,0 +1,446 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ue2common.h" + +#include "rose_build_exclusive.h" +#include "rose_build_merge.h" +#include "nfa/castlecompile.h" +#include "nfagraph/ng_execute.h" +#include "nfagraph/ng_holder.h" +#include "nfagraph/ng_util.h" +#include "util/clique.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/graph.h" +#include "util/make_unique.h" + +using namespace std; + +namespace ue2 { + +template +struct RoleChunk { + vector> roles; +}; + +static +CharReach getReachability(const NGHolder &h) { + CharReach cr; + for (const auto &v : vertices_range(h)) { + if (!is_special(v, h)) { + cr |= h[v].char_reach; + } + } + return cr; +} + +template +static +vector> divideIntoChunks(const RoseBuildImpl &build, + set> &roleInfoSet) { + u32 chunkSize = build.cc.grey.tamaChunkSize; + u32 cnt = 1; + vector> chunks; + RoleChunk roleChunk; + for (const auto &roleInfo : roleInfoSet) { + if (cnt == chunkSize) { + cnt -= chunkSize; + chunks.push_back(roleChunk); + roleChunk.roles.clear(); + } + roleChunk.roles.push_back(roleInfo); + cnt++; + } + + if (cnt > 1) { + chunks.push_back(roleChunk); + } + + return chunks; +} + +/* add prefix literals to engine graph */ +static +bool addPrefixLiterals(NGHolder &h, ue2::unordered_set &tailId, + const vector> &triggers) { + DEBUG_PRINTF("add literals to graph\n"); + + NFAVertex start = h.start; + vector heads; + vector tails; + for (const auto &lit : triggers) { + NFAVertex last = start; + if (lit.empty()) { + return false; + } + u32 i = 0; + for (const auto &c : lit) { + DEBUG_PRINTF("lit:%s \n", c.to_string().c_str()); + NFAVertex u = add_vertex(h); + h[u].char_reach = c; + if (!i++) { + heads.push_back(u); + last = u; + continue; + } + add_edge(last, u, h); + last = u; + } + tails.push_back(last); + tailId.insert(h[last].index); + } + + for (auto v : adjacent_vertices_range(start, h)) { + if (v != h.startDs) { + for (auto &t : tails) { + add_edge(t, v, h); + } + } + } + + clear_out_edges(start, h); + add_edge(h.start, h.start, h); + for (auto &t : heads) { + add_edge(start, t, h); + } + + DEBUG_PRINTF("literals addition done\n"); + return true; +} + +/* check if one literal is suffix of another */ +static +bool isSuffix(const vector> &triggers1, + const vector> &triggers2) { + // literal suffix test + for (const auto &lit1 : triggers1) { + for (const auto &lit2 : triggers2) { + const size_t len = min(lit1.size(), lit2.size()); + if (equal(lit1.rbegin(), lit1.rbegin() + len, + lit2.rbegin(), overlaps)) { + return true; + } + } + } + return false; +} + +/* prepare initial infix or suffix graph used for exclusive analysis */ +template +static +u32 prepareRoleGraph(NGHolder &h, const role_id &s1) { + u32 num = 0; + if (s1.castle()) { + num = num_vertices(h); + NFAVertex u = add_vertex(h); + h[u].char_reach = s1.castle()->reach(); + add_edge(h.startDs, u, h); + // add self loop to repeat characters + add_edge(u, u, h); + } else if (s1.graph()) { + const NGHolder &g = *s1.graph(); + cloneHolder(h, g); + num = num_vertices(h); + } else { + // only infixes and suffixes with graph properties are possible + // candidates, already filtered out other cases before + // exclusive analysis + assert(0); + } + + return num; +} + +/* get a subset of literal if reset character is found */ +static +vector findStartPos(const CharReach &cr1, + const vector &lit) { + auto it = lit.rbegin(), ite = lit.rend(); + u32 pos = lit.size(); + for (; it != ite; it++) { + if (!overlaps(cr1, *it)) { + break; + } + pos--; + } + + return vector (lit.begin() + pos, lit.end()); +} + +template +static +bool isExclusive(const NGHolder &h, + const u32 num, ue2::unordered_set &tailId, + map> &skipList, + const RoleInfo &role1, + const RoleInfo &role2) { + const u32 id1 = role1.id; + const u32 id2 = role2.id; + + if (contains(skipList, id1) && contains(skipList[id1], id2)) { + return false; + } + + const auto &triggers1 = role1.literals; + const auto &triggers2 = role2.literals; + if (isSuffix(triggers1, triggers2)) { + skipList[id2].insert(id1); + return false; + } + + DEBUG_PRINTF("role id2:%u\n", id2); + const auto &cr1 = role1.cr; + if (overlaps(cr1, role2.last_cr)) { + CharReach cr = cr1 | role1.prefix_cr; + for (const auto &lit : triggers2) { + auto lit1 = findStartPos(cr, lit); + if (lit1.empty()) { + continue; + } + u32 lower_bound = 0; + if (lit1.size() < lit.size()) { + lower_bound = ~0U; + } + + ue2::flat_set states; + for (const auto &v : vertices_range(h)) { + if (h[v].index >= lower_bound || h[v].index < 2) { + states.insert(v); + } + } + + auto activeStates = execute_graph(h, lit1, states); + // Check if has only literal states are on + for (const auto &s : activeStates) { + u32 stateId = h[s].index; + if ((stateId > 1 && stateId <= num) || + contains(tailId, stateId)) { + skipList[id2].insert(id1); + return false; + } + } + } + } + + return true; +} + +template +static +ue2::unordered_set checkExclusivity(const NGHolder &h, + const u32 num, ue2::unordered_set &tailId, + map> &skipList, + const RoleInfo &role1, + const RoleChunk &roleChunk) { + ue2::unordered_set info; + const u32 id1 = role1.id; + for (const auto &role2 : roleChunk.roles) { + const u32 id2 = role2.id; + if (id1 != id2 && isExclusive(h, num, tailId, skipList, + role1, role2)) { + info.insert(id2); + } + } + + return info; +} + +static +void findCliques(const map> &exclusiveGroups, + vector> &exclusive_roles) { + if (exclusiveGroups.empty()) { + return; + } + // Construct the exclusivity graph + map vertex_map; + unique_ptr cg = make_unique(); + + // Add vertices representing infixes/suffixes + for (const auto &e : exclusiveGroups) { + const u32 id = e.first; + CliqueVertex v1 = add_vertex(CliqueVertexProps(id), *cg); + vertex_map[id] = v1; + } + + // Wire exclusive pairs + for (const auto &e1 : exclusiveGroups) { + const u32 literalId1 = e1.first; + CliqueVertex lv = vertex_map[literalId1]; + const set &exclusiveSet = e1.second; + for (const auto &e2 : exclusiveGroups) { + const u32 literalId2 = e2.first; + if (literalId1 < literalId2 && + contains(exclusiveSet, literalId2)) { + add_edge(lv, vertex_map[literalId2], *cg); + DEBUG_PRINTF("Wire %u:%u\n", literalId1, literalId2); + } + } + } + + // Find clique groups + const auto &clique = removeClique(*cg); + for (const auto &i : clique) { + DEBUG_PRINTF("cliq:%lu\n", i.size()); + if (i.size() > 1) { + exclusive_roles.push_back(i); + } + } + DEBUG_PRINTF("Clique graph size:%lu\n", exclusive_roles.size()); +} + +static +map> findExclusiveGroups(const RoseBuildImpl &build, + const map> &exclusiveInfo, + const map> &vertex_map, + const bool is_infix) { + map> exclusiveGroups; + for (const auto &e : exclusiveInfo) { + u32 i = e.first; + const auto &s = e.second; + set group; + set q1(vertex_map.at(i).begin(), + vertex_map.at(i).end()); + DEBUG_PRINTF("vertex set:%lu\n", q1.size()); + for (const auto &val : s) { + set q2(vertex_map.at(val).begin(), + vertex_map.at(val).end()); + if (contains(exclusiveInfo.at(val), i) && + (!is_infix || mergeableRoseVertices(build, q1, q2))) { + group.insert(val); + } + } + if (!group.empty()) { + exclusiveGroups[i] = group; + } + } + + return exclusiveGroups; +} + +template +static +bool setTriggerLiterals(RoleInfo &roleInfo, + const map>> &triggers) { + u32 minLiteralLen = ~0U; + for (const auto &tr : triggers) { + for (const auto &lit : tr.second) { + if (lit.empty()) { + return false; + } + minLiteralLen = min(minLiteralLen, (u32)lit.size()); + roleInfo.last_cr |= lit.back(); + for (const auto &c : lit) { + roleInfo.prefix_cr |= c; + } + roleInfo.literals.push_back(lit); + } + } + + if (roleInfo.role.graph()) { + const NGHolder &g = *roleInfo.role.graph(); + roleInfo.cr = getReachability(g); + } else if (roleInfo.role.castle()) { + roleInfo.cr = roleInfo.role.castle()->reach(); + } + + // test the score of this engine + roleInfo.score = 256 - roleInfo.cr.count() + minLiteralLen; + if (roleInfo.score < 20) { + return false; + } + + return true; +} + +bool setTriggerLiteralsInfix(RoleInfo &roleInfo, + const map>> &triggers) { + return setTriggerLiterals(roleInfo, triggers); +} + +bool setTriggerLiteralsSuffix(RoleInfo &roleInfo, + const map>> &triggers) { + return setTriggerLiterals(roleInfo, triggers); +} + +template +static +void exclusiveAnalysis(const RoseBuildImpl &build, + const map> &vertex_map, + set> &roleInfoSet, + vector> &exclusive_roles, const bool is_infix) { + const auto &chunks = divideIntoChunks(build, roleInfoSet); + DEBUG_PRINTF("Exclusivity analysis entry\n"); + map> exclusiveInfo; + + for (const auto &roleChunk : chunks) { + map> skipList; + for (const auto &role1 : roleChunk.roles) { + const u32 id1 = role1.id; + const role_id &s1 = role1.role; + const auto &triggers1 = role1.literals; + + NGHolder h; + u32 num = prepareRoleGraph(h, s1); + DEBUG_PRINTF("role id1:%u\n", id1); + unordered_set tailId; + if (!addPrefixLiterals(h, tailId, triggers1)) { + continue; + } + + exclusiveInfo[id1] = checkExclusivity(h, num, tailId, + skipList, role1, roleChunk); + } + } + + // Create final candidate exclusive groups + const auto exclusiveGroups = + findExclusiveGroups(build, exclusiveInfo, vertex_map, is_infix); + exclusiveInfo.clear(); + + // Find cliques for each exclusive groups + findCliques(exclusiveGroups, exclusive_roles); +} + +void exclusiveAnalysisInfix(const RoseBuildImpl &build, + const map> &vertex_map, + set> &roleInfoSet, + vector> &exclusive_roles) { + exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles, + true); +} + +void exclusiveAnalysisSuffix(const RoseBuildImpl &build, + const map> &vertex_map, + set> &roleInfoSet, + vector> &exclusive_roles) { + exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles, + false); +} + +} // namespace ue2 diff --git a/src/rose/rose_build_exclusive.h b/src/rose/rose_build_exclusive.h new file mode 100644 index 00000000..9cabb1d2 --- /dev/null +++ b/src/rose/rose_build_exclusive.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief exclusive analysis for infix and suffix engines. + * Two engines are considered as exclusive if they can never be alive + * at the same time. This analysis takes advantage of the property of + * triggering literal + engine graph. If the triggering literals of + * two engines can make all the states dead in each other's graph, + * then they are exclusive. + */ +#ifndef ROSE_BUILD_EXCLUSIVE_H +#define ROSE_BUILD_EXCLUSIVE_H + +#include "ue2common.h" + +#include "rose_build_impl.h" +#include "util/alloc.h" +#include "util/charreach.h" + +#include +#include +#include + +namespace ue2 { + +/** brief subengine info including built engine and + * corresponding triggering rose vertices */ +struct ExclusiveSubengine { + aligned_unique_ptr nfa; + std::vector vertices; +}; + +/** \brief exclusive info to build tamarama */ +struct ExclusiveInfo { + // subengine info + std::vector subengines; + // all the report in tamarama + std::set reports; + // assigned queue id + u32 queue; +}; + +/** \brief role info structure for exclusive analysis */ +template +struct RoleInfo { + RoleInfo(role_id role_in, u32 id_in) : role(role_in), id(id_in) {} + bool operator==(const RoleInfo &b) const { + return id == b.id; + } + bool operator!=(const RoleInfo &b) const { return !(*this == b); } + bool operator<(const RoleInfo &b) const { + const RoleInfo &a = *this; + if (a.score != b.score) { + return a.score > b.score; + } + ORDER_CHECK(id); + return false; + } + + std::vector> literals; // prefix literals + CharReach prefix_cr; // reach of prefix literals + CharReach last_cr; // reach of the last character of literals + CharReach cr; // reach of engine graph + const role_id role; // infix or suffix info + const u32 id; // infix or suffix id + u32 score = ~0U; // score for exclusive analysis +}; + +/** + * \brief add triggering literals to infix info. + */ +bool setTriggerLiteralsInfix(RoleInfo &roleInfo, + const std::map>> &triggers); + +/** + * \brief add triggering literals to suffix info. + */ +bool setTriggerLiteralsSuffix(RoleInfo &roleInfo, + const std::map>> &triggers); + +/** + * Exclusive analysis for infix engines. + * + * @param build rose build info mainly used to set exclusive chunk size here + * @param vertex_map mapping between engine id and rose vertices + * related to this engine + * @param roleInfoSet structure contains role properties including infix info, + * triggering literals and literal reachabilities. + * Used for exclusive analysis. + * @param exclusive_roles output mapping between engine id and its exclusive + * group id + */ +void exclusiveAnalysisInfix(const RoseBuildImpl &build, + const std::map> &vertex_map, + std::set> &roleInfoSet, + std::vector> &exclusive_roles); + +/** + * Exclusive analysis for suffix engines. + * + * @param build rose build info mainly used to set exclusive chunk size here + * @param vertex_map mapping between engine id and rose vertices + * related to this engine + * @param roleInfoSet structure contains role properties including suffix info, + * triggering literals and literal reachabilities. + * Used for exclusive analysis. + * @param exclusive_roles output mapping between engine id and its exclusive + * group id + */ +void exclusiveAnalysisSuffix(const RoseBuildImpl &build, + const std::map> &vertex_map, + std::set> &roleInfoSet, + std::vector> &exclusive_roles); + +} // namespace ue2 + +#endif //ROSE_BUILD_EXCLUSIVE_H + diff --git a/src/rose/rose_build_groups.cpp b/src/rose/rose_build_groups.cpp new file mode 100644 index 00000000..5e477e3b --- /dev/null +++ b/src/rose/rose_build_groups.cpp @@ -0,0 +1,646 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Rose build: code for analysing literal groups. + */ + +#include "rose_build_groups.h" + +#include +#include + +#include +#include +#include + +using namespace std; +using boost::adaptors::map_keys; + +namespace ue2 { + +#define ROSE_LONG_LITERAL_LEN 8 + +static +bool superStrong(const rose_literal_id &lit) { + if (lit.s.length() < ROSE_LONG_LITERAL_LEN) { + return false; + } + + const u32 EXPECTED_FDR_BUCKET_LENGTH = 8; + + assert(lit.s.length() >= EXPECTED_FDR_BUCKET_LENGTH); + size_t len = lit.s.length(); + const string &s = lit.s.get_string(); + + for (size_t i = 1; i < EXPECTED_FDR_BUCKET_LENGTH; i++) { + if (s[len - 1 - i] != s[len - 1]) { + return true; /* we have at least some variation in the tail */ + } + } + DEBUG_PRINTF("lit '%s' is not superstrong due to tail\n", + escapeString(s).c_str()); + return false; +} + +static +bool eligibleForAlwaysOnGroup(const RoseBuildImpl &build, u32 id) { + /* returns true if it or any of its delay versions have root role */ + for (auto v : build.literal_info[id].vertices) { + if (build.isRootSuccessor(v)) { + NGHolder *h = build.g[v].left.graph.get(); + if (!h || proper_out_degree(h->startDs, *h)) { + return true; + } + } + } + + for (u32 delayed_id : build.literal_info[id].delayed_ids) { + for (auto v : build.literal_info[delayed_id].vertices) { + if (build.isRootSuccessor(v)) { + NGHolder *h = build.g[v].left.graph.get(); + if (!h || proper_out_degree(h->startDs, *h)) { + return true; + } + } + } + } + + return false; +} + +static +bool requires_group_assignment(const rose_literal_id &lit, + const rose_literal_info &info) { + if (lit.delay) { /* we will check the shadow's master */ + return false; + } + + if (lit.table == ROSE_ANCHORED || lit.table == ROSE_EVENT) { + return false; + } + + // If we already have a group applied, skip. + if (info.group_mask) { + return false; + } + + if (info.vertices.empty() && info.delayed_ids.empty()) { + DEBUG_PRINTF("literal is good for nothing\n"); + return false; + } + + return true; +} + +static +rose_group calcLocalGroup(const RoseVertex v, const RoseGraph &g, + const deque &literal_info, + const bool small_literal_count) { + rose_group local_group = 0; + + for (auto u : inv_adjacent_vertices_range(v, g)) { + /* In small cases, ensure that siblings have the same rose parentage to + * allow rose squashing. In larger cases, don't do this as groups are + * probably too scarce. */ + for (auto w : adjacent_vertices_range(u, g)) { + if (!small_literal_count || g[v].left == g[w].left) { + for (u32 lit_id : g[w].literals) { + local_group |= literal_info[lit_id].group_mask; + } + } else { + DEBUG_PRINTF("not sibling different mother %zu %zu\n", + g[v].idx, g[w].idx); + } + } + } + + return local_group; +} + +/* group constants */ +#define MAX_LIGHT_LITERAL_CASE 200 /* allow rose to affect group decisions below + * this */ + +static +flat_set getAssociatedVertices(const RoseBuildImpl &build, u32 id) { + flat_set out; + const auto &info = build.literal_info[id]; + insert(&out, info.vertices); + for (const auto &delayed : info.delayed_ids) { + insert(&out, build.literal_info[delayed].vertices); + } + return out; +} + +static +u32 next_available_group(u32 counter, u32 min_start_group) { + counter++; + if (counter == ROSE_GROUPS_MAX) { + DEBUG_PRINTF("resetting groups\n"); + counter = min_start_group; + } + + return counter; +} + +void assignGroupsToLiterals(RoseBuildImpl &build) { + auto &literals = build.literals; + auto &literal_info = build.literal_info; + + bool small_literal_count = literal_info.size() <= MAX_LIGHT_LITERAL_CASE; + + map groupCount; /* group index to number of members */ + + u32 counter = 0; + u32 group_always_on = 0; + + // First pass: handle always on literals. + for (const auto &e : literals.right) { + u32 id = e.first; + const rose_literal_id &lit = e.second; + rose_literal_info &info = literal_info[id]; + + if (!requires_group_assignment(lit, info)) { + continue; + } + + // If this literal has a root role, we always have to search for it + // anyway, so it goes in the always-on group. + /* We could end up squashing it if it is followed by a .* */ + if (eligibleForAlwaysOnGroup(build, id)) { + info.group_mask = 1ULL << group_always_on; + groupCount[group_always_on]++; + continue; + } + } + + u32 group_long_lit; + if (groupCount[group_always_on]) { + DEBUG_PRINTF("%u always on literals\n", groupCount[group_always_on]); + group_long_lit = group_always_on; + counter++; + } else { + group_long_lit = counter; + counter++; + } + + u32 min_start_group = counter; + priority_queue> pq; + + // Second pass: the other literals. + for (const auto &e : literals.right) { + u32 id = e.first; + const rose_literal_id &lit = e.second; + rose_literal_info &info = literal_info[id]; + + if (!requires_group_assignment(lit, info)) { + continue; + } + + assert(!eligibleForAlwaysOnGroup(build, id)); + pq.emplace(-(s32)info.vertices.size(), -(s32)lit.s.length(), id); + } + vector long_lits; + while (!pq.empty()) { + u32 id = get<2>(pq.top()); + pq.pop(); + UNUSED const rose_literal_id &lit = literals.right.at(id); + DEBUG_PRINTF("assigning groups to lit %u (v %zu l %zu)\n", id, + literal_info[id].vertices.size(), lit.s.length()); + + u8 group_id = 0; + rose_group group = ~0ULL; + for (auto v : getAssociatedVertices(build, id)) { + rose_group local_group = calcLocalGroup(v, build.g, literal_info, + small_literal_count); + group &= local_group; + if (!group) { + break; + } + } + + if (group == ~0ULL) { + goto boring; + } + + group &= ~((1ULL << min_start_group) - 1); /* ensure the purity of the + * always_on groups */ + if (!group) { + goto boring; + } + + group_id = ctz64(group); + + /* TODO: fairness */ + DEBUG_PRINTF("picking sibling group %hhd\n", group_id); + literal_info[id].group_mask = 1ULL << group_id; + groupCount[group_id]++; + + continue; + + boring: + /* long literals will either be stuck in a mega group or spread around + * depending on availability */ + if (superStrong(lit)) { + long_lits.push_back(id); + continue; + } + + // Other literals are assigned to our remaining groups round-robin. + group_id = counter; + + DEBUG_PRINTF("picking boring group %hhd\n", group_id); + literal_info[id].group_mask = 1ULL << group_id; + groupCount[group_id]++; + counter = next_available_group(counter, min_start_group); + } + + /* spread long literals out amongst unused groups if any, otherwise stick + * them in the always on the group */ + + if (groupCount[counter]) { + DEBUG_PRINTF("sticking long literals in the image of the always on\n"); + for (u32 lit_id : long_lits) { + literal_info[lit_id].group_mask = 1ULL << group_long_lit; + groupCount[group_long_lit]++; + } + } else { + u32 min_long_counter = counter; + DEBUG_PRINTF("base long lit group = %u\n", min_long_counter); + for (u32 lit_id : long_lits) { + u8 group_id = counter; + literal_info[lit_id].group_mask = 1ULL << group_id; + groupCount[group_id]++; + counter = next_available_group(counter, min_long_counter); + } + } + /* assign delayed literals to the same group as their parent */ + for (const auto &e : literals.right) { + u32 id = e.first; + const rose_literal_id &lit = e.second; + + if (!lit.delay) { + continue; + } + + u32 parent = literal_info[id].undelayed_id; + DEBUG_PRINTF("%u is shadow picking up groups from %u\n", id, parent); + assert(literal_info[parent].undelayed_id == parent); + assert(literal_info[parent].group_mask); + literal_info[id].group_mask = literal_info[parent].group_mask; + /* don't increment the group count - these don't really exist */ + } + + DEBUG_PRINTF("populate group to literal mapping\n"); + for (const u32 id : literals.right | map_keys) { + rose_group groups = literal_info[id].group_mask; + while (groups) { + u32 group_id = findAndClearLSB_64(&groups); + build.group_to_literal[group_id].insert(id); + } + } + + /* find how many groups we allocated */ + for (u32 i = 0; i < ROSE_GROUPS_MAX; i++) { + if (groupCount[i]) { + build.group_end = max(build.group_end, i + 1); + } + } +} + +rose_group RoseBuildImpl::getGroups(RoseVertex v) const { + rose_group groups = 0; + + for (u32 id : g[v].literals) { + u32 lit_id = literal_info.at(id).undelayed_id; + + rose_group mygroups = literal_info[lit_id].group_mask; + groups |= mygroups; + } + + return groups; +} + +/** \brief Get the groups of the successor literals of a given vertex. */ +rose_group RoseBuildImpl::getSuccGroups(RoseVertex start) const { + rose_group initialGroups = 0; + + for (auto v : adjacent_vertices_range(start, g)) { + initialGroups |= getGroups(v); + } + + return initialGroups; +} + +/** + * The groups that a role sets are determined by the union of its successor + * literals. Requires the literals already have had groups assigned. + */ +void assignGroupsToRoles(RoseBuildImpl &build) { + auto &g = build.g; + + /* Note: if there is a succ literal in the sidematcher, its successors + * literals must be added instead */ + for (auto v : vertices_range(g)) { + if (build.isAnyStart(v)) { + continue; + } + + const rose_group succ_groups = build.getSuccGroups(v); + g[v].groups |= succ_groups; + + auto ghost_it = build.ghost.find(v); + if (ghost_it != end(build.ghost)) { + /* delayed roles need to supply their groups to the ghost role */ + g[ghost_it->second].groups |= succ_groups; + } + + DEBUG_PRINTF("vertex %zu: groups=%llx\n", g[v].idx, g[v].groups); + } +} + +/** + * \brief Returns a mapping from each graph vertex v to the intersection of the + * groups switched on by all of the paths leading up to (and including) v from + * the start vertexes. + */ +unordered_map +getVertexGroupMap(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; + vector v_order; + v_order.reserve(num_vertices(g)); + + boost::topological_sort(g, back_inserter(v_order), + vertex_index_map(get(&RoseVertexProps::idx, g))); + + unordered_map vertex_group_map; + vertex_group_map.reserve(num_vertices(g)); + + const rose_group initial_groups = build.getInitialGroups(); + + for (const auto &v : boost::adaptors::reverse(v_order)) { + DEBUG_PRINTF("vertex %zu\n", g[v].idx); + + if (build.isAnyStart(v)) { + DEBUG_PRINTF("start vertex, groups=0x%llx\n", initial_groups); + vertex_group_map.emplace(v, initial_groups); + continue; + } + + // To get to this vertex, we must have come through a predecessor, and + // everyone who isn't a start vertex has one. + assert(in_degree(v, g) > 0); + rose_group pred_groups = ~rose_group{0}; + for (auto u : inv_adjacent_vertices_range(v, g)) { + DEBUG_PRINTF("pred %zu\n", g[u].idx); + assert(contains(vertex_group_map, u)); + pred_groups &= vertex_group_map.at(u); + } + + DEBUG_PRINTF("pred_groups=0x%llx\n", pred_groups); + DEBUG_PRINTF("g[v].groups=0x%llx\n", g[v].groups); + + rose_group v_groups = pred_groups | g[v].groups; + DEBUG_PRINTF("v_groups=0x%llx\n", v_groups); + + vertex_group_map.emplace(v, v_groups); + } + + return vertex_group_map; +} + +/** + * \brief Find the set of groups that can be squashed anywhere in the graph, + * either by a literal or by a leftfix. + */ +rose_group getSquashableGroups(const RoseBuildImpl &build) { + rose_group squashable_groups = 0; + for (const auto &info : build.literal_info) { + if (info.squash_group) { + DEBUG_PRINTF("lit squash mask 0x%llx\n", info.group_mask); + squashable_groups |= info.group_mask; + } + } + for (const auto &m : build.rose_squash_masks) { + DEBUG_PRINTF("left squash mask 0x%llx\n", ~m.second); + squashable_groups |= ~m.second; + } + + DEBUG_PRINTF("squashable groups=0x%llx\n", squashable_groups); + return squashable_groups; +} + +/** + * \brief True if every vertex associated with a group also belongs to + * lit_info. + */ +static +bool coversGroup(const RoseBuildImpl &build, + const rose_literal_info &lit_info) { + if (lit_info.vertices.empty()) { + DEBUG_PRINTF("no vertices - does not cover\n"); + return false; + } + + if (!lit_info.group_mask) { + DEBUG_PRINTF("no group - does not cover\n"); + return false; /* no group (not a floating lit?) */ + } + + assert(popcount64(lit_info.group_mask) == 1); + + /* for each lit in group, ensure that vertices are a subset of lit_info's */ + rose_group groups = lit_info.group_mask; + while (groups) { + u32 group_id = findAndClearLSB_64(&groups); + for (u32 id : build.group_to_literal.at(group_id)) { + DEBUG_PRINTF(" checking against friend %u\n", id); + if (!is_subset_of(build.literal_info[id].vertices, + lit_info.vertices)) { + DEBUG_PRINTF("fail\n"); + return false; + } + } + } + + DEBUG_PRINTF("ok\n"); + return true; +} + +static +bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */, + rose_group forbidden_squash_group) { + const RoseGraph &g = build.g; + + const rose_literal_info &lit_info = build.literal_info.at(id); + + DEBUG_PRINTF("checking if %u '%s' is a group squasher %016llx\n", id, + dumpString(build.literals.right.at(id).s).c_str(), + lit_info.group_mask); + + if (build.literals.right.at(id).table == ROSE_EVENT) { + DEBUG_PRINTF("event literal, has no groups to squash\n"); + return false; + } + + if (!coversGroup(build, lit_info)) { + DEBUG_PRINTF("does not cover group\n"); + return false; + } + + if (lit_info.group_mask & forbidden_squash_group) { + /* probably a delayed lit */ + DEBUG_PRINTF("skipping as involves a forbidden group\n"); + return false; + } + + // Single-vertex, less constrained case than the multiple-vertex one below. + if (lit_info.vertices.size() == 1) { + const RoseVertex &v = *lit_info.vertices.begin(); + + if (build.hasDelayPred(v)) { /* due to rebuild issues */ + return false; + } + + /* there are two ways to be a group squasher: + * 1) only care about the first accepted match + * 2) can only match once after a pred match + * + * (2) requires analysis of the infix before v and is not implemented, + * TODO + */ + + /* Case 1 */ + + // Can't squash cases with accepts + if (!g[v].reports.empty()) { + return false; + } + + /* Can't squash cases with a suffix without analysis of the suffix. + * TODO: look at suffixes */ + if (g[v].suffix) { + return false; + } + + // Out-edges must have inf max bound, + no other shenanigans */ + for (const auto &e : out_edges_range(v, g)) { + if (g[e].maxBound != ROSE_BOUND_INF) { + return false; + } + + if (g[target(e, g)].left) { + return false; /* is an infix rose trigger, TODO: analysis */ + } + } + + DEBUG_PRINTF("%u is a path 1 group squasher\n", id); + return true; + + /* note: we could also squash the groups of its preds (if nobody else is + * using them. TODO. */ + } + + // Multiple-vertex case + for (auto v : lit_info.vertices) { + assert(!build.isAnyStart(v)); + + // Can't squash cases with accepts + if (!g[v].reports.empty()) { + return false; + } + + // Suffixes and leftfixes are out too as first literal may not match + // for everyone. + if (!g[v].isBoring()) { + return false; + } + + /* TODO: checks are solid but we should explain */ + if (build.hasDelayPred(v) || build.hasAnchoredTablePred(v)) { + return false; + } + + // Out-edges must have inf max bound and not directly lead to another + // vertex with this group, e.g. 'foobar.*foobar'. + for (const auto &e : out_edges_range(v, g)) { + if (g[e].maxBound != ROSE_BOUND_INF) { + return false; + } + RoseVertex t = target(e, g); + + if (g[t].left) { + return false; /* is an infix rose trigger */ + } + + for (u32 lit_id : g[t].literals) { + if (build.literal_info[lit_id].group_mask & + lit_info.group_mask) { + return false; + } + } + } + + // In-edges must all be dot-stars with no overlap at all, as overlap + // also causes history to be used. + /* Different tables are already forbidden by previous checks */ + for (const auto &e : in_edges_range(v, g)) { + if (!(g[e].minBound == 0 && g[e].maxBound == ROSE_BOUND_INF)) { + return false; + } + + // Check overlap, if source was a literal. + RoseVertex u = source(e, g); + if (build.maxLiteralOverlap(u, v)) { + return false; + } + } + } + + DEBUG_PRINTF("literal %u is a multi-vertex group squasher\n", id); + return true; +} + +void findGroupSquashers(RoseBuildImpl &build) { + rose_group forbidden_squash_group = 0; + for (const auto &e : build.literals.right) { + if (e.second.delay) { + forbidden_squash_group |= build.literal_info[e.first].group_mask; + } + } + + for (u32 id = 0; id < build.literal_info.size(); id++) { + if (isGroupSquasher(build, id, forbidden_squash_group)) { + build.literal_info[id].squash_group = true; + } + } +} + +} // namespace ue2 diff --git a/src/rose/rose_build_groups.h b/src/rose/rose_build_groups.h new file mode 100644 index 00000000..3ab5eb78 --- /dev/null +++ b/src/rose/rose_build_groups.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Rose build: code for analysing literal groups. + */ + +#ifndef ROSE_BUILD_GROUPS_H +#define ROSE_BUILD_GROUPS_H + +#include "rose_build_impl.h" +#include "util/ue2_containers.h" + +namespace ue2 { + +unordered_map +getVertexGroupMap(const RoseBuildImpl &build); + +rose_group getSquashableGroups(const RoseBuildImpl &build); + +void assignGroupsToLiterals(RoseBuildImpl &build); + +void assignGroupsToRoles(RoseBuildImpl &build); + +void findGroupSquashers(RoseBuildImpl &build); + +} // namespace ue2 + +#endif // ROSE_BUILD_GROUPS_H + diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 4122e0bd..d239a698 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -60,17 +60,19 @@ struct BoundaryReports; struct CastleProto; struct CompileContext; class ReportManager; +class SmallWriteBuild; class SomSlotManager; struct suffix_id { suffix_id(const RoseSuffixInfo &in) : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()), - h(in.haig.get()), dfa_min_width(in.dfa_min_width), + h(in.haig.get()), t(in.tamarama.get()), + dfa_min_width(in.dfa_min_width), dfa_max_width(in.dfa_max_width) { assert(!g || g->kind == NFA_SUFFIX); } bool operator==(const suffix_id &b) const { - bool rv = g == b.g && c == b.c && h == b.h && d == b.d; + bool rv = g == b.g && c == b.c && h == b.h && d == b.d && t == b.t; assert(!rv || dfa_min_width == b.dfa_min_width); assert(!rv || dfa_max_width == b.dfa_max_width); return rv; @@ -82,6 +84,7 @@ struct suffix_id { ORDER_CHECK(c); ORDER_CHECK(d); ORDER_CHECK(h); + ORDER_CHECK(t); return false; } @@ -113,6 +116,22 @@ struct suffix_id { } return c; } + TamaProto *tamarama() { + if (!d && !h) { + assert(dfa_min_width == depth(0)); + assert(dfa_max_width == depth::infinity()); + } + return t; + } + const TamaProto *tamarama() const { + if (!d && !h) { + assert(dfa_min_width == depth(0)); + assert(dfa_max_width == depth::infinity()); + } + return t; + } + + raw_som_dfa *haig() { return h; } const raw_som_dfa *haig() const { return h; } raw_dfa *dfa() { return d; } @@ -125,6 +144,7 @@ private: CastleProto *c; raw_dfa *d; raw_som_dfa *h; + TamaProto *t; depth dfa_min_width; depth dfa_max_width; @@ -150,7 +170,7 @@ struct left_id { : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()), h(in.haig.get()), dfa_min_width(in.dfa_min_width), dfa_max_width(in.dfa_max_width) { - assert(!g || !generates_callbacks(*g)); + assert(!g || !has_managed_reports(*g)); } bool operator==(const left_id &b) const { bool rv = g == b.g && c == b.c && h == b.h && d == b.d; @@ -257,6 +277,17 @@ struct rose_literal_id { u32 distinctiveness; size_t elength(void) const { return s.length() + delay; } + size_t elength_including_mask(void) const { + size_t mask_len = msk.size(); + for (u8 c : msk) { + if (!c) { + mask_len--; + } else { + break; + } + } + return MAX(mask_len, s.length()) + delay; + } }; static inline @@ -307,7 +338,7 @@ struct OutfixInfo { template explicit OutfixInfo(std::unique_ptr x) : proto(std::move(x)) {} - explicit OutfixInfo(MpvProto mpv) : proto(std::move(mpv)) {} + explicit OutfixInfo(MpvProto mpv_in) : proto(std::move(mpv_in)) {} u32 get_queue(QueueIndexFactory &qif); @@ -317,14 +348,14 @@ struct OutfixInfo { } bool is_nonempty_mpv() const { - auto *mpv = boost::get(&proto); - return mpv && !mpv->empty(); + auto *m = boost::get(&proto); + return m && !m->empty(); } bool is_dead() const { - auto *mpv = boost::get(&proto); - if (mpv) { - return mpv->empty(); + auto *m = boost::get(&proto); + if (m) { + return m->empty(); } return boost::get(&proto) != nullptr; } @@ -396,7 +427,7 @@ std::set all_reports(const OutfixInfo &outfix); // Concrete impl class class RoseBuildImpl : public RoseBuild { public: - RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm, + RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm, SmallWriteBuild &smwr, const CompileContext &cc, const BoundaryReports &boundary); ~RoseBuildImpl() override; @@ -439,10 +470,6 @@ public: // Find the maximum bound on the edges to this vertex's successors. u32 calcSuccMaxBound(RoseVertex u) const; - // Assign roles to groups, writing the groups bitset into each role in the - // graph. - void assignGroupsToRoles(); - /* Returns the ID of the given literal in the literal map, adding it if * necessary. */ u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table); @@ -474,8 +501,6 @@ public: bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const; bool hasAnchoredTablePred(RoseVertex v) const; - void assignGroupsToLiterals(void); - // Is the given vertex a successor of either root or anchored_root? bool isRootSuccessor(const RoseVertex &v) const; /* Is the given vertex a successor of something other than root or @@ -534,13 +559,18 @@ public: std::map>> anchored_nfas; std::map> anchored_simple; std::map > group_to_literal; - u32 group_weak_end; u32 group_end; u32 anchored_base_id; u32 ematcher_region_size; /**< number of bytes the eod table runs over */ + /** \brief Mapping from leftfix to queue ID (used in dump code). */ + unordered_map leftfix_queue_map; + + /** \brief Mapping from suffix to queue ID (used in dump code). */ + unordered_map suffix_queue_map; + /** \brief Mapping from anchored literal ID to the original literal suffix * present when the literal was added to the literal matcher. Used for * overlap calculation in history assignment. */ @@ -566,6 +596,7 @@ public: QueueIndexFactory qif; ReportManager &rm; SomSlotManager &ssm; + SmallWriteBuild &smwr; const BoundaryReports &boundary; private: diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index 54c01e08..ba77b402 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -538,6 +538,36 @@ void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, } } +static +bool hasSingleFloatingStart(const NGHolder &g) { + NFAVertex initial = NGHolder::null_vertex(); + for (auto v : adjacent_vertices_range(g.startDs, g)) { + if (v == g.startDs) { + continue; + } + if (initial != NGHolder::null_vertex()) { + DEBUG_PRINTF("more than one start\n"); + return false; + } + initial = v; + } + + if (initial == NGHolder::null_vertex()) { + DEBUG_PRINTF("no floating starts\n"); + return false; + } + + // Anchored start must have no successors other than startDs and initial. + for (auto v : adjacent_vertices_range(g.start, g)) { + if (v != initial && v != g.startDs) { + DEBUG_PRINTF("anchored start\n"); + return false; + } + } + + return true; +} + static bool getTransientPrefixReach(const NGHolder &g, u32 lag, map &look) { @@ -546,15 +576,9 @@ bool getTransientPrefixReach(const NGHolder &g, u32 lag, return false; } - // Currently we don't handle anchored prefixes, as we would need to be able - // to represent the bounds from the anchor as well. - if (out_degree(g.start, g) != 1) { - DEBUG_PRINTF("anchored\n"); - return false; - } - - if (out_degree(g.startDs, g) != 2) { - DEBUG_PRINTF("more than one start\n"); + // Must be a floating chain wired to startDs. + if (!hasSingleFloatingStart(g)) { + DEBUG_PRINTF("not a single floating start\n"); return false; } @@ -569,12 +593,28 @@ bool getTransientPrefixReach(const NGHolder &g, u32 lag, look[0 - i] = g[v].char_reach; - if (in_degree(v, g) != 1) { + NFAVertex next = NGHolder::null_vertex(); + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == g.start) { + continue; // Benign, checked by hasSingleFloatingStart + } + if (next == NGHolder::null_vertex()) { + next = u; + continue; + } DEBUG_PRINTF("branch\n"); return false; } - v = *(inv_adjacent_vertices(v, g).first); + if (next == NGHolder::null_vertex() || next == v) { + DEBUG_PRINTF("no predecessor or only self-loop\n"); + // This graph is malformed -- all vertices in a graph that makes it + // to this analysis should have predecessors. + assert(0); + return false; + } + + v = next; i++; } @@ -644,6 +684,10 @@ bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, lookaround.reserve(look.size()); for (const auto &m : look) { + if (m.first < -128 || m.first > 127) { + DEBUG_PRINTF("range too big\n"); + return false; + } s8 offset = verify_s8(m.first); lookaround.emplace_back(offset, m.second); } diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 83c49556..2eb70f60 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -38,12 +38,14 @@ #include "hwlm/hwlm_build.h" #include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" +#include "nfa/nfa_api_queue.h" #include "util/charreach_util.h" #include "util/compile_context.h" #include "util/compile_error.h" #include "util/dump_charclass.h" #include "util/report.h" #include "util/report_manager.h" +#include "util/verify_types.h" #include "ue2common.h" #include @@ -333,6 +335,80 @@ bool findHamsterMask(const RoseBuildImpl &build, const rose_literal_id &id, return true; } +void findMoreLiteralMasks(RoseBuildImpl &build) { + if (!build.cc.grey.roseHamsterMasks) { + return; + } + + vector candidates; + for (const auto &e : build.literals.right) { + const u32 id = e.first; + const auto &lit = e.second; + + // This pass takes place before final IDs are assigned to literals. + assert(!build.hasFinalId(id)); + + if (lit.delay || build.isDelayed(id)) { + continue; + } + + // Literal masks are only allowed for literals that will end up in an + // HWLM table. + switch (lit.table) { + case ROSE_FLOATING: + case ROSE_EOD_ANCHORED: + case ROSE_ANCHORED_SMALL_BLOCK: + break; + default: + continue; + } + + if (!lit.msk.empty()) { + continue; + } + + const auto &lit_info = build.literal_info.at(id); + if (lit_info.requires_benefits) { + continue; + } + candidates.push_back(id); + } + + for (const u32 &id : candidates) { + const auto &lit = build.literals.right.at(id); + auto &lit_info = build.literal_info.at(id); + + vector msk, cmp; + if (!findHamsterMask(build, lit, lit_info, msk, cmp)) { + continue; + } + assert(!msk.empty()); + DEBUG_PRINTF("found advisory mask for lit_id=%u (%s)\n", id, + dumpString(lit.s).c_str()); + u32 new_id = build.getLiteralId(lit.s, msk, cmp, lit.delay, lit.table); + assert(new_id != id); + DEBUG_PRINTF("replacing with new lit_id=%u\n", new_id); + + // Note that our new literal may already exist and have vertices, etc. + // We assume that this transform is happening prior to group assignment. + assert(lit_info.group_mask == 0); + auto &new_info = build.literal_info.at(new_id); + + // Move the vertices across. + new_info.vertices.insert(begin(lit_info.vertices), + end(lit_info.vertices)); + for (auto v : lit_info.vertices) { + build.g[v].literals.erase(id); + build.g[v].literals.insert(new_id); + } + lit_info.vertices.clear(); + + // Preserve other properties. + new_info.requires_explode = lit_info.requires_explode; + new_info.requires_benefits = lit_info.requires_benefits; + } +} + static bool isDirectHighlander(const RoseBuildImpl &build, const u32 id, const rose_literal_info &info) { @@ -340,8 +416,8 @@ bool isDirectHighlander(const RoseBuildImpl &build, const u32 id, return false; } - auto is_simple_exhaustible = [&build](ReportID id) { - const Report &report = build.rm.getReport(id); + auto is_simple_exhaustible = [&build](ReportID rid) { + const Report &report = build.rm.getReport(rid); return isSimpleExhaustible(report); }; @@ -359,7 +435,7 @@ bool isDirectHighlander(const RoseBuildImpl &build, const u32 id, // Called by isNoRunsLiteral below. static -bool isNoRunsVertex(const RoseBuildImpl &build, NFAVertex u) { +bool isNoRunsVertex(const RoseBuildImpl &build, RoseVertex u) { const RoseGraph &g = build.g; if (!g[u].isBoring()) { DEBUG_PRINTF("u=%zu is not boring\n", g[u].idx); @@ -445,8 +521,111 @@ bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, return true; } +static +const raw_puff &getChainedPuff(const RoseBuildImpl &build, + const Report &report) { + DEBUG_PRINTF("chained report, event %u\n", report.onmatch); + + // MPV has already been moved to the outfixes vector. + assert(!build.mpv_outfix); + + auto mpv_outfix_it = find_if( + begin(build.outfixes), end(build.outfixes), + [](const OutfixInfo &outfix) { return outfix.is_nonempty_mpv(); }); + assert(mpv_outfix_it != end(build.outfixes)); + const auto *mpv = mpv_outfix_it->mpv(); + + u32 puff_index = report.onmatch - MQE_TOP_FIRST; + assert(puff_index < mpv->triggered_puffettes.size()); + return mpv->triggered_puffettes.at(puff_index); +} + +/** + * \brief Returns a conservative estimate of the minimum offset at which the + * given literal can lead to a report. + * + * TODO: This could be made more precise by calculating a "distance to accept" + * for every vertex in the graph; right now we're only accurate for leaf nodes. + */ +static +u64a literalMinReportOffset(const RoseBuildImpl &build, + const rose_literal_id &lit, + const rose_literal_info &info) { + const auto &g = build.g; + + const u32 lit_len = verify_u32(lit.elength()); + + u64a lit_min_offset = UINT64_MAX; + + for (const auto &v : info.vertices) { + DEBUG_PRINTF("vertex %zu min_offset=%u\n", g[v].idx, g[v].min_offset); + + u64a vert_offset = g[v].min_offset; + + if (vert_offset >= lit_min_offset) { + continue; + } + + u64a min_offset = UINT64_MAX; + + for (const auto &id : g[v].reports) { + const Report &report = build.rm.getReport(id); + DEBUG_PRINTF("report id %u, min offset=%llu\n", id, + report.minOffset); + if (report.type == INTERNAL_ROSE_CHAIN) { + // This vertex triggers an MPV, which will fire reports after + // repeating for a while. + assert(report.minOffset == 0); // Should not have bounds. + const auto &puff = getChainedPuff(build, report); + DEBUG_PRINTF("chained puff repeats=%u\n", puff.repeats); + const Report &puff_report = build.rm.getReport(puff.report); + DEBUG_PRINTF("puff report %u, min offset=%llu\n", puff.report, + puff_report.minOffset); + min_offset = min(min_offset, max(vert_offset + puff.repeats, + puff_report.minOffset)); + } else { + DEBUG_PRINTF("report min offset=%llu\n", report.minOffset); + min_offset = min(min_offset, max(vert_offset, + report.minOffset)); + } + } + + if (g[v].suffix) { + depth suffix_width = findMinWidth(g[v].suffix, g[v].suffix.top); + assert(suffix_width.is_reachable()); + DEBUG_PRINTF("suffix with width %s\n", suffix_width.str().c_str()); + min_offset = min(min_offset, vert_offset + suffix_width); + } + + if (!isLeafNode(v, g) || min_offset == UINT64_MAX) { + min_offset = vert_offset; + } + + lit_min_offset = min(lit_min_offset, min_offset); + } + + // If this literal in the undelayed literal corresponding to some delayed + // literals, we must take their minimum offsets into account. + for (const u32 &delayed_id : info.delayed_ids) { + const auto &delayed_lit = build.literals.right.at(delayed_id); + const auto &delayed_info = build.literal_info.at(delayed_id); + u64a delayed_min_offset = literalMinReportOffset(build, delayed_lit, + delayed_info); + DEBUG_PRINTF("delayed_id=%u, min_offset = %llu\n", delayed_id, + delayed_min_offset); + lit_min_offset = min(lit_min_offset, delayed_min_offset); + } + + // If we share a vertex with a shorter literal, our min offset might dip + // below the length of this one. + lit_min_offset = max(lit_min_offset, u64a{lit_len}); + + return lit_min_offset; +} + vector fillHamsterLiteralList(const RoseBuildImpl &build, - rose_literal_table table) { + rose_literal_table table, + u32 max_offset) { vector lits; for (const auto &e : build.literals.right) { @@ -472,33 +651,40 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, DEBUG_PRINTF("lit='%s'\n", escapeString(lit).c_str()); - vector msk = e.second.msk; // copy - vector cmp = e.second.cmp; // copy - - if (msk.empty()) { - // Try and pick up an advisory mask. - if (!findHamsterMask(build, e.second, info, msk, cmp)) { - msk.clear(); cmp.clear(); - } else { - DEBUG_PRINTF("picked up late mask %zu\n", msk.size()); + if (max_offset != ROSE_BOUND_INF) { + u64a min_report = literalMinReportOffset(build, e.second, info); + if (min_report > max_offset) { + DEBUG_PRINTF("min report offset=%llu exceeds max_offset=%u\n", + min_report, max_offset); + continue; } } + const vector &msk = e.second.msk; + const vector &cmp = e.second.cmp; + bool noruns = isNoRunsLiteral(build, id, info); if (info.requires_explode) { DEBUG_PRINTF("exploding lit\n"); - const vector empty_msk; // msk/cmp will be empty case_iter cit = caseIterateBegin(lit); case_iter cite = caseIterateEnd(); for (; cit != cite; ++cit) { + string s = *cit; + bool nocase = false; + DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d msk=%s, " "cmp=%s (exploded)\n", - final_id, escapeString(lit.get_string()).c_str(), - 0, noruns, dumpMask(msk).c_str(), - dumpMask(cmp).c_str()); - lits.emplace_back(*cit, false, noruns, final_id, groups, - empty_msk, empty_msk); + final_id, escapeString(s).c_str(), nocase, noruns, + dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + + if (!maskIsConsistent(s, nocase, msk, cmp)) { + DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + continue; + } + + lits.emplace_back(move(s), nocase, noruns, final_id, groups, + msk, cmp); } } else { const std::string &s = lit.get_string(); @@ -514,8 +700,7 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, continue; } - lits.emplace_back(lit.get_string(), lit.any_nocase(), noruns, - final_id, groups, msk, cmp); + lits.emplace_back(s, nocase, noruns, final_id, groups, msk, cmp); } } @@ -523,10 +708,12 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, } aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, + rose_group *fgroups, size_t *fsize, size_t *historyRequired, size_t *streamStateRequired) { *fsize = 0; + *fgroups = 0; auto fl = fillHamsterLiteralList(build, ROSE_FLOATING); if (fl.empty()) { @@ -534,6 +721,10 @@ aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, return nullptr; } + for (const hwlmLiteral &hlit : fl) { + *fgroups |= hlit.groups; + } + hwlmStreamingControl ctl; hwlmStreamingControl *ctlp; if (build.cc.streaming) { @@ -587,7 +778,8 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto lits = fillHamsterLiteralList(build, ROSE_FLOATING); + auto lits = fillHamsterLiteralList(build, ROSE_FLOATING, + ROSE_SMALL_BLOCK_LEN); if (lits.empty()) { DEBUG_PRINTF("no floating table\n"); return nullptr; @@ -596,8 +788,8 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto anchored_lits = - fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK); + auto anchored_lits = fillHamsterLiteralList(build, + ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN); if (anchored_lits.empty()) { DEBUG_PRINTF("no small-block anchored literals\n"); return nullptr; @@ -605,15 +797,10 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, lits.insert(lits.end(), anchored_lits.begin(), anchored_lits.end()); - // Remove literals that are longer than our small block length, as they can - // never match. TODO: improve by removing literals that have a min match - // offset greater than ROSE_SMALL_BLOCK_LEN, which will catch anchored cases - // with preceding dots that put them over the limit. - auto longer_than_limit = [](const hwlmLiteral &lit) { - return lit.s.length() > ROSE_SMALL_BLOCK_LEN; - }; - lits.erase(remove_if(lits.begin(), lits.end(), longer_than_limit), - lits.end()); + // None of our literals should be longer than the small block limit. + assert(all_of(begin(lits), end(lits), [](const hwlmLiteral &lit) { + return lit.s.length() <= ROSE_SMALL_BLOCK_LEN; + })); if (lits.empty()) { DEBUG_PRINTF("no literals shorter than small block len\n"); diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index 9781f514..2a225bf5 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -44,10 +44,17 @@ namespace ue2 { struct hwlmLiteral; +/** + * \brief Build up a vector of literals for the given table. + * + * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can + * only lead to a pattern match after max_offset may be excluded. + */ std::vector fillHamsterLiteralList(const RoseBuildImpl &build, - rose_literal_table table); + rose_literal_table table, u32 max_offset = ROSE_BOUND_INF); aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, + rose_group *fgroups, size_t *fsize, size_t *historyRequired, size_t *streamStateRequired); @@ -58,6 +65,8 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, size_t *esize); +void findMoreLiteralMasks(RoseBuildImpl &build); + } // namespace ue2 #endif // ROSE_BUILD_MATCHERS_H diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index a10bc86e..759e0dbe 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -338,7 +338,7 @@ void findUncalcLeavesCandidates(RoseBuildImpl &tbi, deque &ordered) { const RoseGraph &g = tbi.g; - vector suffix_vertices; // vertices with suffix graphs + vector suffix_vertices; // vertices with suffix graphs ue2::unordered_map fcount; // ref count per graph for (auto v : vertices_range(g)) { diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index b16e3a69..c2f9f580 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -34,6 +34,7 @@ #include "nfa/mcclellancompile_util.h" #include "nfa/nfa_api.h" #include "nfa/rdfa.h" +#include "nfa/tamaramacompile.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_limex.h" #include "nfagraph/ng_reports.h" @@ -66,7 +67,9 @@ namespace ue2 { // just to get it out of the header RoseBuild::~RoseBuild() { } -RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in, +RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, + SomSlotManager &ssm_in, + SmallWriteBuild &smwr_in, const CompileContext &cc_in, const BoundaryReports &boundary_in) : cc(cc_in), @@ -75,7 +78,6 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in, vertexIndex(0), delay_base_id(MO_INVALID_IDX), hasSom(false), - group_weak_end(0), group_end(0), anchored_base_id(MO_INVALID_IDX), ematcher_region_size(0), @@ -83,6 +85,7 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, SomSlotManager &ssm_in, max_rose_anchored_floating_overlap(0), rm(rm_in), ssm(ssm_in), + smwr(smwr_in), boundary(boundary_in), next_nfa_report(0) { // add root vertices to graph @@ -233,10 +236,12 @@ size_t RoseBuildImpl::minLiteralLen(RoseVertex v) const { } // RoseBuild factory -unique_ptr makeRoseBuilder(ReportManager &rm, SomSlotManager &ssm, +unique_ptr makeRoseBuilder(ReportManager &rm, + SomSlotManager &ssm, + SmallWriteBuild &smwr, const CompileContext &cc, const BoundaryReports &boundary) { - return ue2::make_unique(rm, ssm, cc, boundary); + return ue2::make_unique(rm, ssm, smwr, cc, boundary); } size_t roseSize(const RoseEngine *t) { @@ -538,11 +543,11 @@ static bool requiresDedupe(const NGHolder &h, const ue2::flat_set &reports, const Grey &grey) { /* TODO: tighten */ - NFAVertex seen_vert = NFAGraph::null_vertex(); + NFAVertex seen_vert = NGHolder::null_vertex(); for (auto v : inv_adjacent_vertices_range(h.accept, h)) { if (has_intersection(h[v].reports, reports)) { - if (seen_vert != NFAGraph::null_vertex()) { + if (seen_vert != NGHolder::null_vertex()) { return true; } seen_vert = v; @@ -551,7 +556,7 @@ bool requiresDedupe(const NGHolder &h, const ue2::flat_set &reports, for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) { if (has_intersection(h[v].reports, reports)) { - if (seen_vert != NFAGraph::null_vertex()) { + if (seen_vert != NGHolder::null_vertex()) { return true; } seen_vert = v; @@ -581,8 +586,12 @@ public: bool requiresDedupeSupport( const ue2::flat_set &reports) const override; +private: + bool hasSafeMultiReports(const ue2::flat_set &reports) const; + const RoseBuildImpl &tbi; - map> vert_map; + map> vert_map; //!< ordinary literals + map> sb_vert_map; //!< small block literals map> suffix_map; map> outfix_map; map> puff_map; @@ -602,10 +611,14 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) set suffixes; for (auto v : vertices_range(g)) { - // Literals in the small block table don't count as dupes: although - // they have copies in the anchored table, the two are never run in the - // same runtime invocation. All other literals count, though. - if (!tbi.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) { + // Literals in the small block table are "shadow" copies of literals in + // the other tables that do not run in the same runtime invocation. + // Dedupe key assignment will be taken care of by the real literals. + if (tbi.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) { + for (const auto &report_id : g[v].reports) { + sb_vert_map[report_id].insert(v); + } + } else { for (const auto &report_id : g[v].reports) { vert_map[report_id].insert(v); } @@ -673,19 +686,54 @@ bool literalsCouldRace(const rose_literal_id &lit1, return r.first == smaller->rend(); } +bool RoseDedupeAuxImpl::hasSafeMultiReports( + const flat_set &reports) const { + if (reports.size() <= 1) { + return true; + } + + /* We have more than one ReportID corresponding to the external ID that is + * presented to the user. These may differ in offset adjustment, bounds + * checks, etc. */ + + /* TODO: work out if these differences will actually cause problems */ + + /* One common case where we know we don't have a problem is if there are + * precisely two reports, one for the main Rose path and one for the + * "small block matcher" path. */ + if (reports.size() == 2) { + ReportID id1 = *reports.begin(); + ReportID id2 = *reports.rbegin(); + + bool has_verts_1 = contains(vert_map, id1); + bool has_verts_2 = contains(vert_map, id2); + bool has_sb_verts_1 = contains(sb_vert_map, id1); + bool has_sb_verts_2 = contains(sb_vert_map, id2); + + if (has_verts_1 != has_verts_2 && has_sb_verts_1 != has_sb_verts_2) { + DEBUG_PRINTF("two reports, one full and one small block: ok\n"); + return true; + } + } + + DEBUG_PRINTF("more than one report\n"); + return false; +} + bool RoseDedupeAuxImpl::requiresDedupeSupport( const ue2::flat_set &reports) const { /* TODO: this could be expanded to check for offset or character constraints */ + DEBUG_PRINTF("reports: %s\n", as_string_list(reports).c_str()); + const RoseGraph &g = tbi.g; bool has_suffix = false; bool has_outfix = false; - if (reports.size() > 1) { - /* may have offset adjust */ - /* TODO: work out if the offset adjust will actually cause problems */ + if (!hasSafeMultiReports(reports)) { + DEBUG_PRINTF("multiple reports not safe\n"); return true; } @@ -697,7 +745,6 @@ bool RoseDedupeAuxImpl::requiresDedupeSupport( if (contains(vert_map, r)) { insert(&roles, vert_map.at(r)); } - if (contains(suffix_map, r)) { insert(&suffixes, suffix_map.at(r)); } @@ -880,7 +927,7 @@ namespace { class OutfixAllReports : public boost::static_visitor> { public: set operator()(const boost::blank &) const { - return {}; + return set(); } template @@ -909,7 +956,7 @@ set all_reports(const OutfixInfo &outfix) { bool RoseSuffixInfo::operator==(const RoseSuffixInfo &b) const { return top == b.top && graph == b.graph && castle == b.castle && - rdfa == b.rdfa && haig == b.haig; + rdfa == b.rdfa && haig == b.haig && tamarama == b.tamarama; } bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const { @@ -919,6 +966,7 @@ bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const { ORDER_CHECK(castle); ORDER_CHECK(haig); ORDER_CHECK(rdfa); + ORDER_CHECK(tamarama); assert(a.dfa_min_width == b.dfa_min_width); assert(a.dfa_max_width == b.dfa_max_width); return false; @@ -931,13 +979,16 @@ void RoseSuffixInfo::reset(void) { castle.reset(); rdfa.reset(); haig.reset(); + tamarama.reset(); dfa_min_width = 0; dfa_max_width = depth::infinity(); } std::set all_reports(const suffix_id &s) { assert(s.graph() || s.castle() || s.haig() || s.dfa()); - if (s.graph()) { + if (s.tamarama()) { + return all_reports(*s.tamarama()); + } else if (s.graph()) { return all_reports(*s.graph()); } else if (s.castle()) { return all_reports(*s.castle()); @@ -1149,6 +1200,7 @@ void LeftEngInfo::reset(void) { castle.reset(); dfa.reset(); haig.reset(); + tamarama.reset(); lag = 0; leftfix_report = MO_INVALID_IDX; dfa_min_width = 0; @@ -1187,6 +1239,11 @@ u32 roseQuality(const RoseEngine *t) { always_run++; } + if (t->eagerIterOffset) { + /* eager prefixes are always run */ + always_run++; + } + const HWLM *ftable = getFLiteralMatcher(t); if (ftable) { /* TODO: ignore conditional ftables, or ftables beyond smwr region */ @@ -1227,30 +1284,6 @@ u32 roseQuality(const RoseEngine *t) { return 1; } -/** \brief Add a SMWR engine to the given RoseEngine. */ -aligned_unique_ptr roseAddSmallWrite(const RoseEngine *t, - const SmallWriteEngine *smwr) { - assert(t); - assert(smwr); - - const u32 mainSize = roseSize(t); - const u32 smallWriteSize = smwrSize(smwr); - - u32 smwrOffset = ROUNDUP_CL(mainSize); - u32 newSize = smwrOffset + smallWriteSize; - - aligned_unique_ptr t2 = - aligned_zmalloc_unique(newSize); - char *ptr = (char *)t2.get(); - memcpy(ptr, t, mainSize); - memcpy(ptr + smwrOffset, smwr, smallWriteSize); - - t2->smallWriteOffset = smwrOffset; - t2->size = newSize; - - return t2; -} - #ifndef NDEBUG /** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose * graph are implementable. */ diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 1f873403..c2366f0e 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -156,22 +156,31 @@ private: ue2::unordered_set hash_cont; /* member checks */ }; -/** - * \brief Mapping from a particular rose engine to a set of associated - * vertices. - */ -typedef ue2::unordered_map > revRoseMap; +struct RoseAliasingInfo { + RoseAliasingInfo(const RoseBuildImpl &build) { + const auto &g = build.g; -} // namespace + // Populate reverse leftfix map. + for (auto v : vertices_range(g)) { + if (g[v].left) { + rev_leftfix[g[v].left].insert(v); + } + } -static -void populateRevRoseMap(const RoseGraph &g, revRoseMap *out) { - for (auto v : vertices_range(g)) { - if (g[v].left) { - (*out)[g[v].left].insert(v); + // Populate reverse ghost vertex map. + for (const auto &m : build.ghost) { + rev_ghost[m.second].insert(m.first); } } -} + + /** \brief Mapping from leftfix to vertices. */ + ue2::unordered_map> rev_leftfix; + + /** \brief Mapping from undelayed ghost to delayed vertices. */ + ue2::unordered_map> rev_ghost; +}; + +} // namespace // Check successor set: must lead to the same vertices via edges with the // same properties. @@ -262,7 +271,8 @@ bool samePredecessors(RoseVertex a, RoseVertex b, const RoseGraph &g) { } static -bool hasCommonSuccWithBadBounds(RoseVertex a, RoseVertex b, const RoseGraph &g) { +bool hasCommonSuccWithBadBounds(RoseVertex a, RoseVertex b, + const RoseGraph &g) { for (const auto &e_a : out_edges_range(a, g)) { bool exists; RoseEdge e; @@ -283,7 +293,8 @@ bool hasCommonSuccWithBadBounds(RoseVertex a, RoseVertex b, const RoseGraph &g) } static -bool hasCommonPredWithBadBounds(RoseVertex a, RoseVertex b, const RoseGraph &g) { +bool hasCommonPredWithBadBounds(RoseVertex a, RoseVertex b, + const RoseGraph &g) { for (const auto &e_a : in_edges_range(a, g)) { bool exists; RoseEdge e; @@ -309,23 +320,24 @@ bool hasCommonPredWithBadBounds(RoseVertex a, RoseVertex b, const RoseGraph &g) } static -bool canMergeLiterals(RoseVertex a, RoseVertex b, const RoseBuildImpl &tbi) { - const auto &lits_a = tbi.g[a].literals; - const auto &lits_b = tbi.g[b].literals; +bool canMergeLiterals(RoseVertex a, RoseVertex b, const RoseBuildImpl &build) { + const auto &lits_a = build.g[a].literals; + const auto &lits_b = build.g[b].literals; assert(!lits_a.empty() && !lits_b.empty()); // If both vertices have only pseudo-dotstar in-edges, we can merge // literals of different lengths and can avoid the check below. - if (tbi.hasOnlyPseudoStarInEdges(a) && tbi.hasOnlyPseudoStarInEdges(b)) { + if (build.hasOnlyPseudoStarInEdges(a) && + build.hasOnlyPseudoStarInEdges(b)) { DEBUG_PRINTF("both have pseudo-dotstar in-edges\n"); return true; } // Otherwise, all the literals involved must have the same length. for (u32 a_id : lits_a) { - const rose_literal_id &la = tbi.literals.right.at(a_id); + const rose_literal_id &la = build.literals.right.at(a_id); for (u32 b_id : lits_b) { - const rose_literal_id &lb = tbi.literals.right.at(b_id); + const rose_literal_id &lb = build.literals.right.at(b_id); if (la.elength() != lb.elength()) { DEBUG_PRINTF("bad merge %zu!=%zu '%s', '%s'\n", la.elength(), @@ -339,8 +351,8 @@ bool canMergeLiterals(RoseVertex a, RoseVertex b, const RoseBuildImpl &tbi) { } static -bool isAliasingCandidate(RoseVertex v, const RoseBuildImpl &tbi) { - const RoseVertexProps &props = tbi.g[v]; +bool isAliasingCandidate(RoseVertex v, const RoseBuildImpl &build) { + const RoseVertexProps &props = build.g[v]; // Must have literals. if (props.literals.empty()) { @@ -348,14 +360,43 @@ bool isAliasingCandidate(RoseVertex v, const RoseBuildImpl &tbi) { } assert(*props.literals.begin() != MO_INVALID_IDX); + return true; +} - // Any vertex involved in a "ghost" relationship has already been disallowed +static +bool sameGhostProperties(const RoseBuildImpl &build, + const RoseAliasingInfo &rai, RoseVertex a, + RoseVertex b) { + // If these are ghost mapping keys, then they must map to the same vertex. + if (contains(build.ghost, a) || contains(build.ghost, b)) { + DEBUG_PRINTF("checking ghost key compat\n"); + if (!contains(build.ghost, a) || !contains(build.ghost, b)) { + DEBUG_PRINTF("missing ghost mapping\n"); + return false; + } + if (build.ghost.at(a) != build.ghost.at(b)) { + DEBUG_PRINTF("diff ghost mapping\n"); + return false; + } + DEBUG_PRINTF("ghost mappings ok\n"); + return true; + } + + // If they are ghost vertices, then they must have the same literals. + if (contains(rai.rev_ghost, a) || contains(rai.rev_ghost, b)) { + if (!contains(rai.rev_ghost, a) || !contains(rai.rev_ghost, b)) { + DEBUG_PRINTF("missing ghost reverse mapping\n"); + return false; + } + return build.g[a].literals == build.g[b].literals; + } return true; } static -bool sameRoleProperties(const RoseBuildImpl &build, RoseVertex a, RoseVertex b) { +bool sameRoleProperties(const RoseBuildImpl &build, const RoseAliasingInfo &rai, + RoseVertex a, RoseVertex b) { const RoseGraph &g = build.g; const RoseVertexProps &aprops = g[a], &bprops = g[b]; @@ -380,13 +421,17 @@ bool sameRoleProperties(const RoseBuildImpl &build, RoseVertex a, RoseVertex b) return false; } + if (!sameGhostProperties(build, rai, a, b)) { + return false; + } + /* "roses are mergeable" check are handled elsewhere */ return true; } -/* Checks compatibility of role properties if we require that two roles are right - * equiv. */ +/* Checks compatibility of role properties if we require that two roles are + * right equiv. */ static bool sameRightRoleProperties(const RoseBuildImpl &build, RoseVertex a, RoseVertex b) { @@ -448,15 +493,6 @@ size_t hashRightRoleProperties(RoseVertex v, const RoseGraph &g) { return val; } -static -void removeVertexFromMaps(RoseVertex v, RoseBuildImpl &build, revRoseMap &rrm) { - if (build.g[v].left) { - const left_id left(build.g[v].left); - assert(contains(rrm[left], v)); - rrm[left].erase(v); - } -} - static void mergeEdgeAdd(RoseVertex u, RoseVertex v, const RoseEdge &from_edge, const RoseEdge *to_edge, RoseGraph &g) { @@ -485,7 +521,7 @@ void mergeEdges(RoseVertex a, RoseVertex b, RoseGraph &g) { // Cache b's in-edges so we can look them up by source quickly. for (const auto &e : in_edges_range(b, g)) { RoseVertex u = source(e, g); - b_edges.insert(make_pair(u, e)); + b_edges.emplace(u, e); } // Add a's in-edges to b, merging them in where b already has the new edge. @@ -504,7 +540,7 @@ void mergeEdges(RoseVertex a, RoseVertex b, RoseGraph &g) { b_edges.clear(); for (const auto &e : out_edges_range(b, g)) { RoseVertex v = target(e, g); - b_edges.insert(make_pair(v, e)); + b_edges.emplace(v, e); } // Add a's out-edges to b, merging them in where b already has the new edge. @@ -524,11 +560,11 @@ void mergeEdges(RoseVertex a, RoseVertex b, RoseGraph &g) { } static -void mergeLiteralSets(RoseVertex a, RoseVertex b, RoseBuildImpl &tbi) { - RoseGraph &g = tbi.g; +void mergeLiteralSets(RoseVertex a, RoseVertex b, RoseBuildImpl &build) { + RoseGraph &g = build.g; const auto &a_literals = g[a].literals; for (u32 lit_id : a_literals) { - auto &lit_vertices = tbi.literal_info[lit_id].vertices; + auto &lit_vertices = build.literal_info[lit_id].vertices; lit_vertices.erase(a); lit_vertices.insert(b); } @@ -536,37 +572,91 @@ void mergeLiteralSets(RoseVertex a, RoseVertex b, RoseBuildImpl &tbi) { insert(&g[b].literals, a_literals); } -// Merge role 'a' into 'b'. static -void mergeVertices(RoseVertex a, RoseVertex b, RoseBuildImpl &tbi, - revRoseMap &rrm) { - RoseGraph &g = tbi.g; - DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].idx, g[b].idx); +void updateAliasingInfo(RoseBuildImpl &build, RoseAliasingInfo &rai, + RoseVertex a, RoseVertex b) { + if (build.g[a].left) { + const left_id left(build.g[a].left); + assert(contains(rai.rev_leftfix[left], a)); + rai.rev_leftfix[left].erase(a); + } + if (contains(build.ghost, a)) { + auto ghost = build.ghost.at(a); + assert(contains(build.ghost, b) && ghost == build.ghost.at(b)); + build.ghost.erase(a); + rai.rev_ghost[ghost].erase(a); + } + + if (contains(rai.rev_ghost, a)) { + for (const auto &v : rai.rev_ghost[a]) { + build.ghost[v] = b; + rai.rev_ghost[b].insert(v); + } + rai.rev_ghost.erase(a); + } +} + +/** \brief Common role merge code used by variants below. */ +static +void mergeCommon(RoseBuildImpl &build, RoseAliasingInfo &rai, RoseVertex a, + RoseVertex b) { + RoseGraph &g = build.g; - // Merge role properties. assert(g[a].eod_accept == g[b].eod_accept); assert(g[a].left == g[b].left); - - insert(&g[b].reports, g[a].reports); + assert(!g[a].suffix || g[a].suffix == g[b].suffix); // In some situations (ghost roles etc), we can have different groups. assert(!g[a].groups && !g[b].groups); /* current structure means groups * haven't been assigned yet */ g[b].groups |= g[a].groups; - g[b].min_offset = min(g[a].min_offset, g[b].min_offset); - g[b].max_offset = max(g[a].max_offset, g[b].max_offset); + mergeLiteralSets(a, b, build); + updateAliasingInfo(build, rai, a, b); - mergeLiteralSets(a, b, tbi); + // Our min and max_offsets should be sane. + assert(g[b].min_offset <= g[b].max_offset); + + // Safety check: we should not have created through a merge a vertex that + // has an out-edge with ANCH history but is not fixed-offset. + assert(!hasAnchHistorySucc(g, b) || g[b].fixedOffset()); +} + +/** \brief Merge role 'a' into 'b', left merge path. */ +static +void mergeVerticesLeft(RoseVertex a, RoseVertex b, RoseBuildImpl &build, + RoseAliasingInfo &rai) { + RoseGraph &g = build.g; + DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].idx, g[b].idx); + + insert(&g[b].reports, g[a].reports); + + // Since it is a left merge (identical LHS) we should pick the tighter + // bound. + g[b].min_offset = max(g[a].min_offset, g[b].min_offset); + g[b].max_offset = min(g[a].max_offset, g[b].max_offset); if (!g[b].suffix) { g[b].suffix = g[a].suffix; - } else { - assert(!g[a].suffix || g[b].suffix == g[a].suffix); } mergeEdges(a, b, g); - removeVertexFromMaps(a, tbi, rrm); + mergeCommon(build, rai, a, b); +} + +/** \brief Merge role 'a' into 'b', right merge path. */ +static +void mergeVerticesRight(RoseVertex a, RoseVertex b, RoseBuildImpl &build, + RoseAliasingInfo &rai) { + RoseGraph &g = build.g; + DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].idx, g[b].idx); + + insert(&g[b].reports, g[a].reports); + g[b].min_offset = min(g[a].min_offset, g[b].min_offset); + g[b].max_offset = max(g[a].max_offset, g[b].max_offset); + + mergeEdges(a, b, g); + mergeCommon(build, rai, a, b); } /** @@ -574,57 +664,35 @@ void mergeVertices(RoseVertex a, RoseVertex b, RoseBuildImpl &tbi, * that the in- and out-edge sets, reports and suffixes are identical. */ static -void mergeVerticesDiamond(RoseVertex a, RoseVertex b, RoseBuildImpl &tbi, - revRoseMap &rrm) { - RoseGraph &g = tbi.g; +void mergeVerticesDiamond(RoseVertex a, RoseVertex b, RoseBuildImpl &build, + RoseAliasingInfo &rai) { + RoseGraph &g = build.g; DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].idx, g[b].idx); - // Merge role properties. For a diamond merge, most properties are already - // the same (with the notable exception of the literal set). - assert(g[a].eod_accept == g[b].eod_accept); - assert(g[a].left == g[b].left); + // For a diamond merge, most properties are already the same (with the + // notable exception of the literal set). assert(g[a].reports == g[b].reports); assert(g[a].suffix == g[b].suffix); - // In some situations (ghost roles etc), we can have different groups. - assert(!g[a].groups && !g[b].groups); /* current structure means groups - * haven't been assigned yet */ - g[b].groups |= g[a].groups; - g[b].min_offset = min(g[a].min_offset, g[b].min_offset); g[b].max_offset = max(g[a].max_offset, g[b].max_offset); - mergeLiteralSets(a, b, tbi); - removeVertexFromMaps(a, tbi, rrm); + mergeCommon(build, rai, a, b); } static never_inline -void findCandidates(const RoseBuildImpl &tbi, CandidateSet *candidates) { - ue2::unordered_set disallowed; - - // We currently deny candidature to any vertex involved in a "ghost" - // relationship. - for (const auto &m : tbi.ghost) { - disallowed.insert(m.first); - disallowed.insert(m.second); - } - - for (auto v : vertices_range(tbi.g)) { - // Ignore ghost relationships. - if (contains(disallowed, v)) { - continue; - } - - if (isAliasingCandidate(v, tbi)) { - DEBUG_PRINTF("candidate %zu\n", tbi.g[v].idx); - DEBUG_PRINTF("lits: %u\n", *tbi.g[v].literals.begin()); +void findCandidates(const RoseBuildImpl &build, CandidateSet *candidates) { + for (auto v : vertices_range(build.g)) { + if (isAliasingCandidate(v, build)) { + DEBUG_PRINTF("candidate %zu\n", build.g[v].idx); + DEBUG_PRINTF("lits: %u\n", *build.g[v].literals.begin()); candidates->insert(v); } } - assert(candidates->size() <= num_vertices(tbi.g)); + assert(candidates->size() <= num_vertices(build.g)); DEBUG_PRINTF("found %zu/%zu candidates\n", candidates->size(), - num_vertices(tbi.g)); + num_vertices(build.g)); } static @@ -639,7 +707,7 @@ RoseVertex pickSucc(const RoseVertex v, const RoseGraph &g) { static RoseVertex pickPred(const RoseVertex v, const RoseGraph &g, - const RoseBuildImpl &tbi) { + const RoseBuildImpl &build) { RoseGraph::in_edge_iterator ei, ee; tie(ei, ee) = in_edges(v, g); if (ei == ee) { @@ -650,7 +718,7 @@ RoseVertex pickPred(const RoseVertex v, const RoseGraph &g, // Avoid roots if we have other options, since it doesn't matter to the // merge pass which predecessor we pick. RoseVertex u = source(*ei, g); - while (tbi.isAnyStart(u) && ++ei != ee) { + while (build.isAnyStart(u) && ++ei != ee) { u = source(*ei, g); } return u; @@ -700,12 +768,13 @@ bool hasCommonPredWithDiffRoses(RoseVertex a, RoseVertex b, } static -void pruneReportIfUnused(const RoseBuildImpl &tbi, shared_ptr h, +void pruneReportIfUnused(const RoseBuildImpl &build, shared_ptr h, const set &verts, ReportID report) { DEBUG_PRINTF("trying to prune %u from %p (v %zu)\n", report, h.get(), verts.size()); for (RoseVertex v : verts) { - if (tbi.g[v].left.graph == h && tbi.g[v].left.leftfix_report == report) { + if (build.g[v].left.graph == h && + build.g[v].left.leftfix_report == report) { DEBUG_PRINTF("report %u still in use\n", report); return; } @@ -717,12 +786,12 @@ void pruneReportIfUnused(const RoseBuildImpl &tbi, shared_ptr h, // unimplementable. DEBUG_PRINTF("report %u has been merged away, pruning\n", report); - assert(h->kind == tbi.isRootSuccessor(*verts.begin()) ? NFA_PREFIX - : NFA_INFIX); + assert(h->kind == build.isRootSuccessor(*verts.begin()) ? NFA_PREFIX + : NFA_INFIX); unique_ptr h_new = cloneHolder(*h); pruneReport(*h_new, report); - if (isImplementableNFA(*h_new, nullptr, tbi.cc)) { + if (isImplementableNFA(*h_new, nullptr, build.cc)) { clear_graph(*h); cloneHolder(*h, *h_new); } else { @@ -828,9 +897,9 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g, } static -bool mergeSameCastle(RoseBuildImpl &tbi, RoseVertex a, RoseVertex b, - revRoseMap &rrm) { - RoseGraph &g = tbi.g; +bool mergeSameCastle(RoseBuildImpl &build, RoseVertex a, RoseVertex b, + RoseAliasingInfo &rai) { + RoseGraph &g = build.g; LeftEngInfo &a_left = g[a].left; LeftEngInfo &b_left = g[b].left; CastleProto &castle = *a_left.castle; @@ -853,7 +922,7 @@ bool mergeSameCastle(RoseBuildImpl &tbi, RoseVertex a, RoseVertex b, return false; } - const ReportID new_report = tbi.getNewNfaReport(); + const ReportID new_report = build.getNewNfaReport(); map a_top_map, b_top_map; for (const auto &c : castle.repeats) { @@ -875,9 +944,9 @@ bool mergeSameCastle(RoseBuildImpl &tbi, RoseVertex a, RoseVertex b, } } - assert(contains(rrm[b_left], b)); - rrm[b_left].erase(b); - rrm[a_left].insert(b); + assert(contains(rai.rev_leftfix[b_left], b)); + rai.rev_leftfix[b_left].erase(b); + rai.rev_leftfix[a_left].insert(b); a_left.leftfix_report = new_report; b_left.leftfix_report = new_report; @@ -886,15 +955,15 @@ bool mergeSameCastle(RoseBuildImpl &tbi, RoseVertex a, RoseVertex b, updateEdgeTops(g, a, a_top_map); updateEdgeTops(g, b, b_top_map); - pruneUnusedTops(castle, g, rrm[a_left]); + pruneUnusedTops(castle, g, rai.rev_leftfix[a_left]); return true; } static -bool attemptRoseCastleMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, +bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, RoseVertex b, bool trivialCasesOnly, - revRoseMap &rrm) { - RoseGraph &g = tbi.g; + RoseAliasingInfo &rai) { + RoseGraph &g = build.g; LeftEngInfo &a_left = g[a].left; LeftEngInfo &b_left = g[b].left; left_id a_left_id(a_left); @@ -912,28 +981,28 @@ bool attemptRoseCastleMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, if (&a_castle == &b_castle) { DEBUG_PRINTF("castles are the same\n"); - return mergeSameCastle(tbi, a, b, rrm); + return mergeSameCastle(build, a, b, rai); } if (is_equal(a_castle, a_left.leftfix_report, b_castle, b_left.leftfix_report)) { DEBUG_PRINTF("castles are equiv with respect to reports\n"); - if (rrm[a_left_id].size() == 1) { + if (rai.rev_leftfix[a_left_id].size() == 1) { /* nobody else is using a_castle */ - rrm[b_left_id].erase(b); - rrm[a_left_id].insert(b); - pruneUnusedTops(b_castle, g, rrm[b_left_id]); + rai.rev_leftfix[b_left_id].erase(b); + rai.rev_leftfix[a_left_id].insert(b); + pruneUnusedTops(b_castle, g, rai.rev_leftfix[b_left_id]); b_left.castle = a_left.castle; b_left.leftfix_report = a_left.leftfix_report; DEBUG_PRINTF("OK -> only user of a_castle\n"); return true; } - if (rrm[b_left_id].size() == 1) { + if (rai.rev_leftfix[b_left_id].size() == 1) { /* nobody else is using b_castle */ - rrm[a_left_id].erase(a); - rrm[b_left_id].insert(a); - pruneUnusedTops(a_castle, g, rrm[a_left_id]); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].insert(a); + pruneUnusedTops(a_castle, g, rai.rev_leftfix[a_left_id]); a_left.castle = b_left.castle; a_left.leftfix_report = b_left.leftfix_report; DEBUG_PRINTF("OK -> only user of b_castle\n"); @@ -942,32 +1011,32 @@ bool attemptRoseCastleMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, if (preds_same) { /* preds are the same anyway in diamond/left merges just need to - * check that all the literals in rrm[b_h] can handle a_h */ - for (auto v : rrm[b_left_id]) { - if (!mergeableRoseVertices(tbi, a, v)) { + * check that all the literals in rev_leftfix[b_h] can handle a_h */ + for (auto v : rai.rev_leftfix[b_left_id]) { + if (!mergeableRoseVertices(build, a, v)) { goto literal_mismatch_1; } } - rrm[a_left_id].erase(a); - rrm[b_left_id].insert(a); - pruneUnusedTops(a_castle, g, rrm[a_left_id]); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].insert(a); + pruneUnusedTops(a_castle, g, rai.rev_leftfix[a_left_id]); a_left.castle = b_left.castle; a_left.leftfix_report = b_left.leftfix_report; DEBUG_PRINTF("OK -> same preds ???\n"); return true; literal_mismatch_1: /* preds are the same anyway in diamond/left merges just need to - * check that all the literals in rrm[a_h] can handle b_h */ - for (auto v : rrm[a_left_id]) { - if (!mergeableRoseVertices(tbi, v, b)) { + * check that all the literals in rev_leftfix[a_h] can handle b_h */ + for (auto v : rai.rev_leftfix[a_left_id]) { + if (!mergeableRoseVertices(build, v, b)) { goto literal_mismatch_2; } } - rrm[b_left_id].erase(b); - rrm[a_left_id].insert(b); - pruneUnusedTops(b_castle, g, rrm[b_left_id]); + rai.rev_leftfix[b_left_id].erase(b); + rai.rev_leftfix[a_left_id].insert(b); + pruneUnusedTops(b_castle, g, rai.rev_leftfix[b_left_id]); b_left.castle = a_left.castle; b_left.leftfix_report = a_left.leftfix_report; DEBUG_PRINTF("OK -> same preds ???\n"); @@ -978,15 +1047,15 @@ bool attemptRoseCastleMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, /* we need to create a new graph as there may be other people * using b_left and it would be bad if a's preds started triggering it */ - ReportID new_report = tbi.getNewNfaReport(); + ReportID new_report = build.getNewNfaReport(); shared_ptr new_castle = make_shared(a_castle); pruneCastle(*new_castle, a_left.leftfix_report); setReports(*new_castle, new_report); - rrm[a_left_id].erase(a); - rrm[b_left_id].erase(b); - pruneUnusedTops(*a_left.castle, g, rrm[a_left_id]); - pruneUnusedTops(*b_left.castle, g, rrm[b_left_id]); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].erase(b); + pruneUnusedTops(*a_left.castle, g, rai.rev_leftfix[a_left_id]); + pruneUnusedTops(*b_left.castle, g, rai.rev_leftfix[b_left_id]); a_left.leftfix_report = new_report; b_left.leftfix_report = new_report; @@ -994,9 +1063,9 @@ bool attemptRoseCastleMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, b_left.castle = new_castle; assert(a_left == b_left); - rrm[a_left].insert(a); - rrm[a_left].insert(b); - pruneUnusedTops(*new_castle, g, rrm[a_left]); + rai.rev_leftfix[a_left].insert(a); + rai.rev_leftfix[a_left].insert(b); + pruneUnusedTops(*new_castle, g, rai.rev_leftfix[a_left]); return true; } @@ -1008,27 +1077,27 @@ bool attemptRoseCastleMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, // Only infixes. Prefixes require special care when doing non-trivial // merges. - if (!tbi.isNonRootSuccessor(a) || !tbi.isNonRootSuccessor(b)) { + if (!build.isNonRootSuccessor(a) || !build.isNonRootSuccessor(b)) { return false; } - set &b_verts = rrm[b_left_id]; + set &b_verts = rai.rev_leftfix[b_left_id]; set aa; aa.insert(a); - if (!mergeableRoseVertices(tbi, aa, b_verts)) { + if (!mergeableRoseVertices(build, aa, b_verts)) { DEBUG_PRINTF("vertices not mergeable\n"); return false; } - if (!tbi.cc.grey.roseMultiTopRoses || !tbi.cc.grey.allowCastle) { + if (!build.cc.grey.roseMultiTopRoses || !build.cc.grey.allowCastle) { return false; } DEBUG_PRINTF("merging into new castle\n"); // Clone new castle with a's repeats in it, set to a new report. - ReportID new_report = tbi.getNewNfaReport(); + ReportID new_report = build.getNewNfaReport(); shared_ptr m_castle = make_shared(a_castle); pruneCastle(*m_castle, a_left.leftfix_report); setReports(*m_castle, new_report); @@ -1070,10 +1139,10 @@ bool attemptRoseCastleMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, DEBUG_PRINTF("merged into castle containing %zu repeats\n", m_castle->repeats.size()); - rrm[a_left_id].erase(a); - rrm[b_left_id].erase(b); - pruneUnusedTops(*a_left.castle, g, rrm[a_left_id]); - pruneUnusedTops(*b_left.castle, g, rrm[b_left_id]); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].erase(b); + pruneUnusedTops(*a_left.castle, g, rai.rev_leftfix[a_left_id]); + pruneUnusedTops(*b_left.castle, g, rai.rev_leftfix[b_left_id]); a_left.castle = m_castle; a_left.leftfix_report = new_report; @@ -1081,17 +1150,17 @@ bool attemptRoseCastleMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, b_left.leftfix_report = new_report; assert(a_left == b_left); - rrm[a_left].insert(a); - rrm[a_left].insert(b); - pruneUnusedTops(*m_castle, g, rrm[a_left]); + rai.rev_leftfix[a_left].insert(a); + rai.rev_leftfix[a_left].insert(b); + pruneUnusedTops(*m_castle, g, rai.rev_leftfix[a_left]); return true; } static -bool attemptRoseGraphMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, +bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, RoseVertex b, bool trivialCasesOnly, - revRoseMap &rrm) { - RoseGraph &g = tbi.g; + RoseAliasingInfo &rai) { + RoseGraph &g = build.g; LeftEngInfo &a_left = g[a].left; LeftEngInfo &b_left = g[b].left; left_id a_left_id(a_left); @@ -1108,72 +1177,74 @@ bool attemptRoseGraphMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, DEBUG_PRINTF("OK -> same actual holder\n"); ReportID a_oldreport = a_left.leftfix_report; ReportID b_oldreport = b_left.leftfix_report; - ReportID new_report = tbi.getNewNfaReport(); + ReportID new_report = build.getNewNfaReport(); duplicateReport(*a_h, a_left.leftfix_report, new_report); duplicateReport(*b_h, b_left.leftfix_report, new_report); a_left.leftfix_report = new_report; b_left.leftfix_report = new_report; - pruneReportIfUnused(tbi, b_h, rrm[b_left_id], a_oldreport); - pruneReportIfUnused(tbi, b_h, rrm[b_left_id], b_oldreport); - pruneUnusedTops(*b_h, g, rrm[b_left_id]); + pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id], + a_oldreport); + pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id], + b_oldreport); + pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); assert(a_left == b_left); return true; } /* if it is the same graph, it is also fairly easy */ if (is_equal(*a_h, a_left.leftfix_report, *b_h, b_left.leftfix_report)) { - if (rrm[a_left_id].size() == 1) { + if (rai.rev_leftfix[a_left_id].size() == 1) { /* nobody else is using a_h */ - rrm[b_left_id].erase(b); - rrm[a_left_id].insert(b); + rai.rev_leftfix[b_left_id].erase(b); + rai.rev_leftfix[a_left_id].insert(b); b_left.graph = a_h; b_left.leftfix_report = a_left.leftfix_report; - pruneUnusedTops(*b_h, g, rrm[b_left_id]); + pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); DEBUG_PRINTF("OK -> only user of a_h\n"); return true; } - if (rrm[b_left_id].size() == 1) { + if (rai.rev_leftfix[b_left_id].size() == 1) { /* nobody else is using b_h */ - rrm[a_left_id].erase(a); - rrm[b_left_id].insert(a); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].insert(a); a_left.graph = b_h; a_left.leftfix_report = b_left.leftfix_report; - pruneUnusedTops(*a_h, g, rrm[a_left_id]); + pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); DEBUG_PRINTF("OK -> only user of b_h\n"); return true; } if (preds_same) { /* preds are the same anyway in diamond/left merges just need to - * check that all the literals in rrm[b_h] can handle a_h */ - for (auto v : rrm[b_left_id]) { - if (!mergeableRoseVertices(tbi, a, v)) { + * check that all the literals in rev_leftfix[b_h] can handle a_h */ + for (auto v : rai.rev_leftfix[b_left_id]) { + if (!mergeableRoseVertices(build, a, v)) { goto literal_mismatch_1; } } - rrm[a_left_id].erase(a); - rrm[b_left_id].insert(a); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].insert(a); a_left.graph = b_h; a_left.leftfix_report = b_left.leftfix_report; - pruneUnusedTops(*a_h, g, rrm[a_left_id]); + pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); DEBUG_PRINTF("OK -> same preds ???\n"); return true; literal_mismatch_1: /* preds are the same anyway in diamond/left merges just need to - * check that all the literals in rrm[a_h] can handle b_h */ - for (auto v : rrm[a_left_id]) { - if (!mergeableRoseVertices(tbi, v, b)) { + * check that all the literals in rev_leftfix[a_h] can handle b_h */ + for (auto v : rai.rev_leftfix[a_left_id]) { + if (!mergeableRoseVertices(build, v, b)) { goto literal_mismatch_2; } } - rrm[b_left_id].erase(b); - rrm[a_left_id].insert(b); + rai.rev_leftfix[b_left_id].erase(b); + rai.rev_leftfix[a_left_id].insert(b); b_left.graph = a_h; b_left.leftfix_report = a_left.leftfix_report; - pruneUnusedTops(*b_h, g, rrm[b_left_id]); + pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); DEBUG_PRINTF("OK -> same preds ???\n"); return true; literal_mismatch_2:; @@ -1182,25 +1253,24 @@ bool attemptRoseGraphMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, /* we need to create a new graph as there may be other people * using b_left and it would be bad if a's preds started triggering it */ - ReportID new_report = tbi.getNewNfaReport(); + ReportID new_report = build.getNewNfaReport(); shared_ptr new_graph = cloneHolder(*b_h); duplicateReport(*new_graph, b_left.leftfix_report, new_report); - pruneReportIfUnused(tbi, new_graph, set(), - b_left.leftfix_report); + pruneAllOtherReports(*new_graph, new_report); - rrm[a_left_id].erase(a); - rrm[b_left_id].erase(b); - pruneUnusedTops(*a_h, g, rrm[a_left_id]); - pruneUnusedTops(*b_h, g, rrm[b_left_id]); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].erase(b); + pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); + pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); a_left.leftfix_report = new_report; b_left.leftfix_report = new_report; a_left.graph = new_graph; b_left.graph = new_graph; - rrm[a_left].insert(a); - rrm[a_left].insert(b); - pruneUnusedTops(*new_graph, g, rrm[a_left]); + rai.rev_leftfix[a_left].insert(a); + rai.rev_leftfix[a_left].insert(b); + pruneUnusedTops(*new_graph, g, rai.rev_leftfix[a_left]); return true; } @@ -1212,23 +1282,23 @@ bool attemptRoseGraphMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, // Only infixes. Prefixes require special care when doing non-trivial // merges. - if (!tbi.isNonRootSuccessor(a) || !tbi.isNonRootSuccessor(b)) { + if (!build.isNonRootSuccessor(a) || !build.isNonRootSuccessor(b)) { return false; } DEBUG_PRINTF("attempting merge of roses on vertices %zu and %zu\n", g[a].idx, g[b].idx); - set &b_verts = rrm[b_left]; + set &b_verts = rai.rev_leftfix[b_left]; set aa; aa.insert(a); - if (!mergeableRoseVertices(tbi, aa, b_verts)) { + if (!mergeableRoseVertices(build, aa, b_verts)) { DEBUG_PRINTF("vertices not mergeable\n"); return false; } - if (!tbi.cc.grey.roseMultiTopRoses) { + if (!build.cc.grey.roseMultiTopRoses) { return false; } @@ -1238,10 +1308,10 @@ bool attemptRoseGraphMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, /* We need to allocate a new report id because */ ReportID a_oldreport = a_left.leftfix_report; ReportID b_oldreport = b_left.leftfix_report; - ReportID new_report = tbi.getNewNfaReport(); + ReportID new_report = build.getNewNfaReport(); duplicateReport(*b_h, b_left.leftfix_report, new_report); b_left.leftfix_report = new_report; - pruneReportIfUnused(tbi, b_h, rrm[b_left_id], b_oldreport); + pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id], b_oldreport); NGHolder victim; cloneHolder(victim, *a_h); @@ -1265,7 +1335,7 @@ bool attemptRoseGraphMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, assert(victim.kind == b_h->kind); assert(!generates_callbacks(*b_h)); - if (!mergeNfaPair(victim, *b_h, nullptr, tbi.cc)) { + if (!mergeNfaPair(victim, *b_h, nullptr, build.cc)) { DEBUG_PRINTF("merge failed\n"); // Restore in-edge properties. for (const auto &e : in_edges_range(a, g)) { @@ -1282,22 +1352,22 @@ bool attemptRoseGraphMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, a_left.graph = b_h; a_left.leftfix_report = new_report; - assert(contains(rrm[a_left_id], a)); - assert(contains(rrm[b_left_id], b)); - rrm[a_left_id].erase(a); - rrm[b_left_id].insert(a); + assert(contains(rai.rev_leftfix[a_left_id], a)); + assert(contains(rai.rev_leftfix[b_left_id], b)); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].insert(a); - pruneUnusedTops(*a_h, g, rrm[a_left_id]); - pruneUnusedTops(*b_h, g, rrm[b_left_id]); + pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); + pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); // Prune A's report from its old prefix if it was only used by A. - pruneReportIfUnused(tbi, a_h, rrm[a_left_id], a_oldreport); + pruneReportIfUnused(build, a_h, rai.rev_leftfix[a_left_id], a_oldreport); - reduceImplementableGraph(*b_h, SOM_NONE, nullptr, tbi.cc); + reduceImplementableGraph(*b_h, SOM_NONE, nullptr, build.cc); assert(roseHasTops(g, a)); assert(roseHasTops(g, b)); - assert(isImplementableNFA(*b_h, nullptr, tbi.cc)); + assert(isImplementableNFA(*b_h, nullptr, build.cc)); return true; } @@ -1305,13 +1375,14 @@ bool attemptRoseGraphMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, // the two LeftEngInfo structures to be the same. Returns false if the merge // is not possible. static -bool attemptRoseMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, - RoseVertex b, bool trivialCasesOnly, revRoseMap &rrm) { +bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, + RoseVertex b, bool trivialCasesOnly, + RoseAliasingInfo &rai) { DEBUG_PRINTF("attempting rose merge, vertices a=%zu, b=%zu\n", - tbi.g[a].idx, tbi.g[b].idx); + build.g[a].idx, build.g[b].idx); assert(a != b); - RoseGraph &g = tbi.g; + RoseGraph &g = build.g; LeftEngInfo &a_left = g[a].left; LeftEngInfo &b_left = g[b].left; @@ -1335,8 +1406,8 @@ bool attemptRoseMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, } // Only non-transients for the moment. - if (contains(tbi.transient, a_left_id) || - contains(tbi.transient, b_left_id)) { + if (contains(build.transient, a_left_id) || + contains(build.transient, b_left_id)) { return false; } @@ -1350,13 +1421,13 @@ bool attemptRoseMerge(RoseBuildImpl &tbi, bool preds_same, RoseVertex a, assert(roseHasTops(g, b)); if (a_left_id.graph() && b_left_id.graph()) { - return attemptRoseGraphMerge(tbi, preds_same, a, b, trivialCasesOnly, - rrm); + return attemptRoseGraphMerge(build, preds_same, a, b, trivialCasesOnly, + rai); } if (a_left_id.castle() && b_left_id.castle()) { - return attemptRoseCastleMerge(tbi, preds_same, a, b, trivialCasesOnly, - rrm); + return attemptRoseCastleMerge(build, preds_same, a, b, trivialCasesOnly, + rai); } return false; @@ -1481,8 +1552,8 @@ void splitByNeighbour(const RoseGraph &g, vector> &buckets, } static -vector> splitDiamondMergeBuckets(CandidateSet &candidates, - const RoseBuildImpl &build) { +vector> +splitDiamondMergeBuckets(CandidateSet &candidates, const RoseBuildImpl &build) { const RoseGraph &g = build.g; vector> buckets(1); @@ -1499,19 +1570,20 @@ vector> splitDiamondMergeBuckets(CandidateSet &candidates, return buckets; } + static never_inline -void diamondMergePass(CandidateSet &candidates, RoseBuildImpl &tbi, +void diamondMergePass(CandidateSet &candidates, RoseBuildImpl &build, vector *dead, bool mergeRoses, - revRoseMap &rrm) { + RoseAliasingInfo &rai) { DEBUG_PRINTF("begin\n"); - RoseGraph &g = tbi.g; + RoseGraph &g = build.g; if (candidates.empty()) { return; } /* Vertices may only be diamond merged with others in the same bucket */ - auto cand_buckets = splitDiamondMergeBuckets(candidates, tbi); + auto cand_buckets = splitDiamondMergeBuckets(candidates, build); for (const vector &siblings : cand_buckets) { for (auto it = siblings.begin(); it != siblings.end();) { @@ -1525,7 +1597,7 @@ void diamondMergePass(CandidateSet &candidates, RoseBuildImpl &tbi, RoseVertex b = *jt; assert(contains(candidates, b)); - if (!sameRoleProperties(tbi, a, b)) { + if (!sameRoleProperties(build, rai, a, b)) { DEBUG_PRINTF("diff role prop\n"); continue; } @@ -1536,23 +1608,23 @@ void diamondMergePass(CandidateSet &candidates, RoseBuildImpl &tbi, * so we still have to checks successors and predecessors. */ if (!sameSuccessors(a, b, g) - || !sameRightRoleProperties(tbi, a, b) + || !sameRightRoleProperties(build, a, b) || !samePredecessors(a, b, g)) { DEBUG_PRINTF("not diamond\n"); continue; } - if (!canMergeLiterals(a, b, tbi)) { + if (!canMergeLiterals(a, b, build)) { DEBUG_PRINTF("incompatible lits\n"); continue; } - if (!attemptRoseMerge(tbi, true, a, b, !mergeRoses, rrm)) { + if (!attemptRoseMerge(build, true, a, b, !mergeRoses, rai)) { DEBUG_PRINTF("rose fail\n"); continue; } - mergeVerticesDiamond(a, b, tbi, rrm); + mergeVerticesDiamond(a, b, build, rai); dead->push_back(a); candidates.erase(a); break; // next a @@ -1568,6 +1640,7 @@ vector::iterator findLeftMergeSibling( vector::iterator it, const vector::iterator &end, const RoseVertex a, const RoseBuildImpl &build, + const RoseAliasingInfo &rai, const CandidateSet &candidates) { const RoseGraph &g = build.g; @@ -1581,7 +1654,7 @@ vector::iterator findLeftMergeSibling( continue; } - if (!sameRoleProperties(build, a, b)) { + if (!sameRoleProperties(build, rai, a, b)) { continue; } @@ -1611,10 +1684,10 @@ vector::iterator findLeftMergeSibling( } static never_inline -void leftMergePass(CandidateSet &candidates, RoseBuildImpl &tbi, - vector *dead, revRoseMap &rrm) { +void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build, + vector *dead, RoseAliasingInfo &rai) { DEBUG_PRINTF("begin (%zu)\n", candidates.size()); - RoseGraph &g = tbi.g; + RoseGraph &g = build.g; vector siblings; CandidateSet::iterator it = candidates.begin(); @@ -1629,11 +1702,11 @@ void leftMergePass(CandidateSet &candidates, RoseBuildImpl &tbi, assert(!g[a].literals.empty()); u32 lit_id = *g[a].literals.begin(); - const auto &verts = tbi.literal_info.at(lit_id).vertices; - RoseVertex pred = pickPred(a, g, tbi); + const auto &verts = build.literal_info.at(lit_id).vertices; + RoseVertex pred = pickPred(a, g, build); siblings.clear(); - if (pred == RoseGraph::null_vertex() || tbi.isAnyStart(pred) || + if (pred == RoseGraph::null_vertex() || build.isAnyStart(pred) || hasGreaterOutDegree(verts.size(), pred, g)) { // Select sibling from amongst the vertices that share a literal. siblings.insert(siblings.end(), verts.begin(), verts.end()); @@ -1645,20 +1718,20 @@ void leftMergePass(CandidateSet &candidates, RoseBuildImpl &tbi, sort(siblings.begin(), siblings.end(), VertexIndexComp(g)); - auto jt = findLeftMergeSibling(siblings.begin(), siblings.end(), a, tbi, - candidates); + auto jt = findLeftMergeSibling(siblings.begin(), siblings.end(), a, + build, rai, candidates); if (jt == siblings.end()) { continue; } RoseVertex b = *jt; - if (!attemptRoseMerge(tbi, true, a, b, 0, rrm)) { + if (!attemptRoseMerge(build, true, a, b, 0, rai)) { DEBUG_PRINTF("rose fail\n"); continue; } - mergeVertices(a, b, tbi, rrm); + mergeVerticesLeft(a, b, build, rai); dead->push_back(a); candidates.erase(ait); } @@ -1693,6 +1766,7 @@ vector::const_iterator findRightMergeSibling( vector::const_iterator it, const vector::const_iterator &end, const RoseVertex a, const RoseBuildImpl &build, + const RoseAliasingInfo &rai, const CandidateSet &candidates) { const RoseGraph &g = build.g; @@ -1706,7 +1780,7 @@ vector::const_iterator findRightMergeSibling( continue; } - if (!sameRoleProperties(build, a, b)) { + if (!sameRoleProperties(build, rai, a, b)) { continue; } @@ -1764,10 +1838,10 @@ void split(map &keys, size_t *next_key, Iter it, } static never_inline -void buildCandidateRightSiblings(CandidateSet &candidates, RoseBuildImpl &tbi, - map > &sibling_cache, +void buildCandidateRightSiblings(CandidateSet &candidates, RoseBuildImpl &build, + map> &sibling_cache, map &keys_ext) { - RoseGraph &g = tbi.g; + RoseGraph &g = build.g; size_t next_key = 1; map keys; @@ -1783,7 +1857,7 @@ void buildCandidateRightSiblings(CandidateSet &candidates, RoseBuildImpl &tbi, assert(!g[a].literals.empty()); u32 lit_id = *g[a].literals.begin(); RoseVertex succ = pickSucc(a, g); - const auto &verts = tbi.literal_info.at(lit_id).vertices; + const auto &verts = build.literal_info.at(lit_id).vertices; if (succ != RoseGraph::null_vertex() && !hasGreaterInDegree(verts.size(), succ, g)) { if (!done_succ.insert(succ).second) { @@ -1818,28 +1892,28 @@ void buildCandidateRightSiblings(CandidateSet &candidates, RoseBuildImpl &tbi, } for (auto &siblings : sibling_cache | map_values) { - sort(siblings.begin(), siblings.end(), VertexIndexComp(tbi.g)); + sort(siblings.begin(), siblings.end(), VertexIndexComp(build.g)); } } static const vector &getCandidateRightSiblings( - const map > &sibling_cache, + const map> &sibling_cache, map &keys, RoseVertex a) { size_t key = keys.at(a); return sibling_cache.at(key); } static never_inline -void rightMergePass(CandidateSet &candidates, RoseBuildImpl &tbi, +void rightMergePass(CandidateSet &candidates, RoseBuildImpl &build, vector *dead, bool mergeRoses, - revRoseMap &rrm) { + RoseAliasingInfo &rai) { DEBUG_PRINTF("begin\n"); - map > sibling_cache; + map> sibling_cache; map keys; - buildCandidateRightSiblings(candidates, tbi, sibling_cache, keys); + buildCandidateRightSiblings(candidates, build, sibling_cache, keys); CandidateSet::iterator it = candidates.begin(); while (it != candidates.end()) { @@ -1856,11 +1930,12 @@ void rightMergePass(CandidateSet &candidates, RoseBuildImpl &tbi, auto jt = siblings.begin(); while (jt != siblings.end()) { - jt = findRightMergeSibling(jt, siblings.end(), a, tbi, candidates); + jt = findRightMergeSibling(jt, siblings.end(), a, build, rai, + candidates); if (jt == siblings.end()) { break; } - if (attemptRoseMerge(tbi, false, a, *jt, !mergeRoses, rrm)) { + if (attemptRoseMerge(build, false, a, *jt, !mergeRoses, rai)) { break; } ++jt; @@ -1871,7 +1946,7 @@ void rightMergePass(CandidateSet &candidates, RoseBuildImpl &tbi, } RoseVertex b = *jt; - mergeVertices(a, b, tbi, rrm); + mergeVerticesRight(a, b, build, rai); dead->push_back(a); candidates.erase(ait); } @@ -1947,10 +2022,9 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { return; } - revRoseMap rrm; - DEBUG_PRINTF("doing role aliasing mr=%d\n", (int)mergeRoses); - populateRevRoseMap(g, &rrm); + + RoseAliasingInfo rai(build); mergeRoses &= cc.grey.mergeRose & cc.grey.roseMergeRosesDuringAliasing; @@ -1963,8 +2037,8 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { size_t old_dead_size = 0; do { old_dead_size = dead.size(); - leftMergePass(candidates, build, &dead, rrm); - rightMergePass(candidates, build, &dead, mergeRoses, rrm); + leftMergePass(candidates, build, &dead, rai); + rightMergePass(candidates, build, &dead, mergeRoses, rai); } while (old_dead_size != dead.size()); /* Diamond merge passes cannot create extra merges as they require the same @@ -1972,7 +2046,7 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { * to a merge to different pred/succ before a diamond merge, it will still * be afterwards. */ filterDiamondCandidates(g, candidates); - diamondMergePass(candidates, build, &dead, mergeRoses, rrm); + diamondMergePass(candidates, build, &dead, mergeRoses, rai); DEBUG_PRINTF("killed %zu vertices\n", dead.size()); build.removeVertices(dead); diff --git a/src/rose/rose_build_util.h b/src/rose/rose_build_util.h index 536b031a..85cfc010 100644 --- a/src/rose/rose_build_util.h +++ b/src/rose/rose_build_util.h @@ -36,6 +36,9 @@ namespace ue2 { +/** Max allowed width for transient graphs in block mode */ +#define ROSE_BLOCK_TRANSIENT_MAX_WIDTH 255U + // Comparator for vertices using their index property. struct VertexIndexComp { VertexIndexComp(const RoseGraph &gg) : g(gg) {} diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index f6badd1b..a3d00943 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -40,6 +40,7 @@ #include "nfa/nfa_build_util.h" #include "nfa/nfa_dump_api.h" #include "nfa/nfa_internal.h" +#include "nfa/nfa_kind.h" #include "util/dump_charclass.h" #include "util/multibit_internal.h" #include "util/multibit.h" @@ -253,7 +254,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(CHECK_LIT_EARLY) {} + PROGRAM_CASE(CHECK_LIT_EARLY) { + os << " min_offset " << ri->min_offset << endl; + } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_GROUPS) { @@ -288,6 +291,31 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_MASK) { + os << " and_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->and_mask << std::dec << endl; + os << " cmp_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->cmp_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_BYTE) { + os << " and_mask 0x" << std::hex << std::setw(2) + << std::setfill('0') << u32{ri->and_mask} << std::dec + << endl; + os << " cmp_mask 0x" << std::hex << std::setw(2) + << std::setfill('0') << u32{ri->cmp_mask} << std::dec + << endl; + os << " negation " << u32{ri->negation} << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_INFIX) { os << " queue " << ri->queue << endl; os << " lag " << ri->lag << endl; @@ -310,6 +338,11 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(RECORD_ANCHORED) { + os << " id " << ri->id << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CATCH_UP) {} PROGRAM_NEXT_INSTRUCTION @@ -474,6 +507,17 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ENGINES_EOD) { + os << " iter_offset " << ri->iter_offset << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SUFFIXES_EOD) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(MATCHER_EOD) {} + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(END) { return; } PROGRAM_NEXT_INSTRUCTION @@ -529,7 +573,7 @@ void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); const char *base = (const char *)t; - os << "Unconditional EOD Program:" << endl; + os << "EOD Program:" << endl; if (t->eodProgramOffset) { dumpProgram(os, t, base + t->eodProgramOffset); @@ -538,14 +582,6 @@ void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { os << "" << endl; } - os << "Sparse Iter EOD Program:" << endl; - - if (t->eodIterProgramOffset) { - dumpProgram(os, t, base + t->eodIterProgramOffset); - } else { - os << "" << endl; - } - os.close(); } @@ -600,6 +636,9 @@ void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) { } const LeftNfaInfo *left = getLeftInfoByQueue(t, qindex); + if (left->eager) { + fout << "eager "; + } if (left->transient) { fout << "transient " << (u32)left->transient << " "; } @@ -659,6 +698,76 @@ void dumpComponentInfo(const RoseEngine *t, const string &base) { } } + +static +void dumpComponentInfoCsv(const RoseEngine *t, const string &base) { + FILE *f = fopen((base +"rose_components.csv").c_str(), "w"); + + fprintf(f, "Index, Offset,Engine Type,States,Stream State,Bytecode Size," + "Kind,Notes\n"); + + for (u32 i = 0; i < t->queueCount; i++) { + const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); + const NFA *n = getNfaByInfo(t, nfa_info); + nfa_kind kind; + stringstream notes; + + if (i < t->outfixBeginQueue) { + notes << "chained;"; + } + + if (nfa_info->eod) { + notes << "eod;"; + } + + if (i < t->outfixEndQueue) { + kind = NFA_OUTFIX; + } else if (i < t->leftfixBeginQueue) { + kind = NFA_SUFFIX; + } else { + const LeftNfaInfo *left = getLeftInfoByQueue(t, i); + if (left->eager) { + notes << "eager;"; + } + if (left->transient) { + notes << "transient " << (u32)left->transient << ";"; + } + if (left->infix) { + kind = NFA_INFIX; + u32 maxQueueLen = left->maxQueueLen; + if (maxQueueLen != (u32)(-1)) { + notes << "maxqlen=" << maxQueueLen << ";"; + } + } else { + kind = NFA_PREFIX; + } + notes << "maxlag=" << left->maxLag << ";"; + if (left->stopTable) { + notes << "miracles;"; + } + if (left->countingMiracleOffset) { + auto cm = (const RoseCountingMiracle *) + ((const char *)t + left->countingMiracleOffset); + notes << "counting_miracle:" << (int)cm->count + << (cm->shufti ? "s" : "v") << ";"; + } + if (nfaSupportsZombie(n)) { + notes << " zombie;"; + } + if (left->eod_check) { + notes << "left_eod;"; + } + } + + fprintf(f, "%u,%zd,\"%s\",%u,%u,%u,%s,%s\n", i, + (const char *)n - (const char *)t, describe(*n).c_str(), + n->nPositions, n->streamStateSize, n->length, + to_string(kind).c_str(), notes.str().c_str()); + } + fclose(f); +} + + static void dumpExhaust(const RoseEngine *t, const string &base) { stringstream sstxt; @@ -710,7 +819,7 @@ void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { FILE *f; f = fopen(ssdot.str().c_str(), "w"); - nfaDumpDot(n, f); + nfaDumpDot(n, f, base); fclose(f); f = fopen(sstxt.str().c_str(), "w"); @@ -770,7 +879,7 @@ void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) { FILE *f; f = fopen(ssdot.str().c_str(), "w"); - nfaDumpDot(n, f); + nfaDumpDot(n, f, base); fclose(f); f = fopen(sstxt.str().c_str(), "w"); @@ -801,7 +910,7 @@ void dumpAnchored(const RoseEngine *t, const string &base) { FILE *f; f = fopen(ssdot.str().c_str(), "w"); - nfaDumpDot(n, f); + nfaDumpDot(n, f, base); fclose(f); f = fopen(sstxt.str().c_str(), "w"); @@ -906,8 +1015,7 @@ void roseDumpText(const RoseEngine *t, FILE *f) { t->lookaroundTableOffset - t->lookaroundReachOffset); fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end); - fprintf(f, " - history buffer : %u bytes (+1 for len)\n", - t->historyRequired); + fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8); fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); fprintf(f, " - floating matcher : %u bytes\n", t->floatingStreamState); @@ -925,6 +1033,7 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, "\n"); fprintf(f, "initial groups : 0x%016llx\n", t->initialGroups); + fprintf(f, "floating groups : 0x%016llx\n", t->floating_group_mask); fprintf(f, "handled key count : %u\n", t->handledKeyCount); fprintf(f, "\n"); @@ -1012,15 +1121,13 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, activeArrayCount); DUMP_U32(t, activeLeftCount); DUMP_U32(t, queueCount); + DUMP_U32(t, eagerIterOffset); DUMP_U32(t, handledKeyCount); DUMP_U32(t, leftOffset); DUMP_U32(t, roseCount); DUMP_U32(t, lookaroundTableOffset); DUMP_U32(t, lookaroundReachOffset); DUMP_U32(t, eodProgramOffset); - DUMP_U32(t, eodIterProgramOffset); - DUMP_U32(t, eodIterOffset); - DUMP_U32(t, eodNfaIterOffset); DUMP_U32(t, lastByteHistoryIterOffset); DUMP_U32(t, minWidth); DUMP_U32(t, minWidthExcludingBoundaries); @@ -1033,6 +1140,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, floatingMinLiteralMatchOffset); DUMP_U32(t, nfaInfoOffset); DUMP_U64(t, initialGroups); + DUMP_U64(t, floating_group_mask); DUMP_U32(t, size); DUMP_U32(t, delay_count); DUMP_U32(t, delay_base_id); @@ -1068,7 +1176,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, ematcherRegionSize); DUMP_U32(t, somRevCount); DUMP_U32(t, somRevOffsetOffset); - DUMP_U32(t, group_weak_end); DUMP_U32(t, floatingStreamState); fprintf(f, "}\n"); fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); @@ -1077,6 +1184,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { void roseDumpComponents(const RoseEngine *t, bool dump_raw, const string &base) { dumpComponentInfo(t, base); + dumpComponentInfoCsv(t, base); dumpNfas(t, dump_raw, base); dumpAnchored(t, base); dumpRevComponentInfo(t, base); diff --git a/src/rose/rose_graph.h b/src/rose/rose_graph.h index b0ac8d11..6abe629b 100644 --- a/src/rose/rose_graph.h +++ b/src/rose/rose_graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -55,6 +55,7 @@ namespace ue2 { struct CastleProto; struct raw_dfa; struct raw_som_dfa; +struct TamaProto; /** \brief Table type for a literal. */ enum rose_literal_table { @@ -82,6 +83,7 @@ struct LeftEngInfo { std::shared_ptr castle; std::shared_ptr dfa; std::shared_ptr haig; + std::shared_ptr tamarama; u32 lag = 0U; ReportID leftfix_report = MO_INVALID_IDX; depth dfa_min_width = 0; @@ -92,6 +94,7 @@ struct LeftEngInfo { && other.castle == castle && other.dfa == dfa && other.haig == haig + && other.tamarama == tamarama && other.lag == lag && other.leftfix_report == leftfix_report; } @@ -104,6 +107,7 @@ struct LeftEngInfo { ORDER_CHECK(castle); ORDER_CHECK(dfa); ORDER_CHECK(haig); + ORDER_CHECK(tamarama); ORDER_CHECK(lag); ORDER_CHECK(leftfix_report); return false; @@ -121,6 +125,7 @@ struct RoseSuffixInfo { std::shared_ptr castle; std::shared_ptr haig; std::shared_ptr rdfa; + std::shared_ptr tamarama; depth dfa_min_width = 0; depth dfa_max_width = depth::infinity(); @@ -128,7 +133,7 @@ struct RoseSuffixInfo { bool operator!=(const RoseSuffixInfo &b) const { return !(*this == b); } bool operator<(const RoseSuffixInfo &b) const; void reset(void); - operator bool() const { return graph || castle || haig || rdfa; } + operator bool() const { return graph || castle || haig || rdfa || tamarama; } }; /** \brief Properties attached to each Rose graph vertex. */ diff --git a/src/rose/rose_in_dump.cpp b/src/rose/rose_in_dump.cpp index 899e50c4..fbd6858b 100644 --- a/src/rose/rose_in_dump.cpp +++ b/src/rose/rose_in_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -51,7 +51,7 @@ namespace ue2 { void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey, const char *filename) { - if (!grey.dumpFlags) { + if (!(grey.dumpFlags & Grey::DUMP_INT_GRAPH)) { return; } @@ -107,7 +107,8 @@ void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey, size_t id = graph_ids.size(); graph_ids[&*ig[e].graph] = id; } - fprintf(f, "graph %zu", graph_ids[&*ig[e].graph]); + fprintf(f, "graph %zu\n%s", graph_ids[&*ig[e].graph], + to_string(ig[e].graph->kind).c_str()); } if (ig[e].haig) { fprintf(f, "haig "); diff --git a/src/rose/rose_in_graph.h b/src/rose/rose_in_graph.h index 2c00a418..14d4d9b2 100644 --- a/src/rose/rose_in_graph.h +++ b/src/rose/rose_in_graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -106,6 +106,12 @@ public: ROSE_BOUND_INF); } + /* for when there is a suffix graph which handles the reports */ + static RoseInVertexProps makeAcceptEod() { + return RoseInVertexProps(RIV_ACCEPT_EOD, ue2_literal(), 0, + ROSE_BOUND_INF); + } + static RoseInVertexProps makeStart(bool anchored) { DEBUG_PRINTF("making %s\n", anchored ? "anchored start" : "start"); if (anchored) { diff --git a/src/rose/rose_in_util.h b/src/rose/rose_in_util.h index 7c74554a..1f3c4ef7 100644 --- a/src/rose/rose_in_util.h +++ b/src/rose/rose_in_util.h @@ -46,6 +46,11 @@ void calcVertexOffsets(RoseInGraph &ig); enum nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e); void pruneUseless(RoseInGraph &g); +inline +bool is_any_accept(RoseInVertex v, const RoseInGraph &g) { + return g[v].type == RIV_ACCEPT || g[v].type == RIV_ACCEPT_EOD; +} + } #endif diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index bbe0b1b6..51913984 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -144,6 +144,7 @@ struct LeftNfaInfo { u32 stopTable; // stop table index, or ROSE_OFFSET_INVALID u8 transient; /**< 0 if not transient, else max width of transient prefix */ char infix; /* TODO: make flags */ + char eager; /**< nfa should be run eagerly to first match or death */ char eod_check; /**< nfa is used by the event eod literal */ u32 countingMiracleOffset; /** if not 0, offset to RoseCountingMiracle. */ rose_group squash_mask; /* & mask applied when rose nfa dies */ @@ -155,8 +156,6 @@ struct NfaInfo { u32 fullStateOffset; /* offset in scratch, relative to ??? */ u32 ekeyListOffset; /* suffix, relative to base of rose, 0 if no ekeys */ u8 no_retrigger; /* TODO */ - u8 only_external; /**< does not raise any som internal events or chained - * rose events */ u8 in_sbmatcher; /**< this outfix should not be run in small-block * execution, as it will be handled by the sbmatcher * HWLM table. */ @@ -348,10 +347,15 @@ struct RoseEngine { * literals. */ u32 litDelayRebuildProgramOffset; - /** \brief Offset of u32 array of program offsets for internal reports. */ + /** + * \brief Offset of u32 array of program offsets for reports used by + * output-exposed engines. + */ u32 reportProgramOffset; - /** \brief Number of programs for internal reports. */ + /** + * \brief Number of programs for reports used by output-exposed engines. + */ u32 reportProgramCount; /** @@ -366,6 +370,9 @@ struct RoseEngine { u32 activeLeftCount; //number of nfas tracked in the active rose array u32 queueCount; /**< number of nfa queues */ + u32 eagerIterOffset; /**< offset to sparse iter for eager prefixes or 0 if + * none */ + /** \brief Number of keys used by CHECK_SET_HANDLED instructions in role * programs. Used to size the handled_roles fatbit in scratch. */ u32 handledKeyCount; @@ -376,12 +383,7 @@ struct RoseEngine { u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32 * bytes each) */ - u32 eodProgramOffset; //!< Unconditional EOD program, otherwise 0. - u32 eodIterProgramOffset; // or 0 if no eod iterator program - u32 eodIterOffset; // offset to EOD sparse iter or 0 if none - - /** \brief Offset to sparse iter over outfix/suffix NFAs that accept EOD. */ - u32 eodNfaIterOffset; + u32 eodProgramOffset; //!< EOD program, otherwise 0. u32 lastByteHistoryIterOffset; // if non-zero @@ -406,6 +408,7 @@ struct RoseEngine { * table */ u32 nfaInfoOffset; /* offset to the nfa info offset array */ rose_group initialGroups; + rose_group floating_group_mask; /* groups that are used by the ftable */ u32 size; // (bytes) u32 delay_count; /* number of delayed literal ids. */ u32 delay_base_id; /* literal id of the first delayed literal. @@ -431,7 +434,6 @@ struct RoseEngine { u32 ematcherRegionSize; /* max region size to pass to ematcher */ u32 somRevCount; /**< number of som reverse nfas */ u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */ - u32 group_weak_end; /* end of weak groups, debugging only */ u32 floatingStreamState; // size in bytes struct scatter_full_plan state_init; @@ -468,17 +470,6 @@ const struct HWLM *getFLiteralMatcher(const struct RoseEngine *t) { return (const struct HWLM *)lt; } -static really_inline -const void *getELiteralMatcher(const struct RoseEngine *t) { - if (!t->ematcherOffset) { - return NULL; - } - - const char *et = (const char *)t + t->ematcherOffset; - assert(ISALIGNED_N(et, 8)); - return et; -} - static really_inline const void *getSBLiteralMatcher(const struct RoseEngine *t) { if (!t->sbmatcherOffset) { diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 01572dbd..545e190f 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -50,9 +50,12 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. + ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check. + ROSE_INSTR_CHECK_BYTE, //!< Single Byte check. ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state. ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. + ROSE_INSTR_RECORD_ANCHORED, //!< Record an anchored literal match. ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches. ROSE_INSTR_CATCH_UP_MPV, //!< Catch up the MPV. ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. @@ -96,6 +99,17 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit. ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states. ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states. + + /** \brief Check outfixes and suffixes for EOD and fire reports if so. */ + ROSE_INSTR_ENGINES_EOD, + + /** \brief Catch up and check active suffixes for EOD and fire reports if + * so. */ + ROSE_INSTR_SUFFIXES_EOD, + + /** \brief Run the EOD-anchored HWLM literal matcher. */ + ROSE_INSTR_MATCHER_EOD, + ROSE_INSTR_END //!< End of program. }; @@ -120,6 +134,7 @@ struct ROSE_STRUCT_CHECK_LIT_MASK { /** Note: check failure will halt program. */ struct ROSE_STRUCT_CHECK_LIT_EARLY { u8 code; //!< From enum RoseInstructionCode. + u32 min_offset; //!< Minimum offset for this literal. }; /** Note: check failure will halt program. */ @@ -153,6 +168,24 @@ struct ROSE_STRUCT_CHECK_LOOKAROUND { u32 fail_jump; //!< Jump forward this many bytes on failure. }; +struct ROSE_STRUCT_CHECK_MASK { + u8 code; //!< From enum roseInstructionCode. + u64a and_mask; //!< 64-bits and mask. + u64a cmp_mask; //!< 64-bits cmp mask. + u64a neg_mask; //!< 64-bits negation mask. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_BYTE { + u8 code; //!< From enum RoseInstructionCode. + u8 and_mask; //!< 8-bits and mask. + u8 cmp_mask; //!< 8-bits cmp mask. + u8 negation; //!< Flag about negation. + s32 offset; //!< The relative offset. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_CHECK_INFIX { u8 code; //!< From enum RoseInstructionCode. u32 queue; //!< Queue of leftfix to check. @@ -175,6 +208,11 @@ struct ROSE_STRUCT_PUSH_DELAYED { u32 index; // Delay literal index (relative to first delay lit). }; +struct ROSE_STRUCT_RECORD_ANCHORED { + u8 code; //!< From enum RoseInstructionCode. + u32 id; //!< Literal ID. +}; + struct ROSE_STRUCT_CATCH_UP { u8 code; //!< From enum RoseInstructionCode. }; @@ -351,6 +389,19 @@ struct ROSE_STRUCT_SPARSE_ITER_NEXT { u32 fail_jump; //!< Jump forward this many bytes on failure. }; +struct ROSE_STRUCT_ENGINES_EOD { + u8 code; //!< From enum RoseInstructionCode. + u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. +}; + +struct ROSE_STRUCT_SUFFIXES_EOD { + u8 code; //!< From enum RoseInstructionCode. +}; + +struct ROSE_STRUCT_MATCHER_EOD { + u8 code; //!< From enum RoseInstructionCode. +}; + struct ROSE_STRUCT_END { u8 code; //!< From enum RoseInstructionCode. }; diff --git a/src/rose/runtime.h b/src/rose/runtime.h index d4309bfb..60c7d34b 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -35,7 +35,6 @@ #include "rose_internal.h" #include "scratch.h" -#include "util/exhaust.h" // for isExhausted #include "util/partial_store.h" /* @@ -56,6 +55,11 @@ #define rose_inline really_inline +/* Maximum offset that we will eagerly run prefixes to. Beyond this point, eager + * prefixes are always run in exactly the same way as normal prefixes. */ +#define EAGER_STOP_OFFSET 64 + + static really_inline const void *getByOffset(const struct RoseEngine *t, u32 offset) { assert(offset < t->size); @@ -108,39 +112,6 @@ const u8 *getLeftfixLagTableConst(const struct RoseEngine *t, return (const u8 *)(state + t->stateOffsets.leftfixLagTable); } -static rose_inline -char roseSuffixInfoIsExhausted(const struct RoseEngine *t, - const struct NfaInfo *info, - const char *exhausted) { - if (!info->ekeyListOffset) { - return 0; - } - - DEBUG_PRINTF("check exhaustion -> start at %u\n", info->ekeyListOffset); - - /* INVALID_EKEY terminated list */ - const u32 *ekeys = (const u32 *)((const char *)t + info->ekeyListOffset); - while (*ekeys != INVALID_EKEY) { - DEBUG_PRINTF("check %u\n", *ekeys); - if (!isExhausted(t, exhausted, *ekeys)) { - DEBUG_PRINTF("not exhausted -> alive\n"); - return 0; - } - ++ekeys; - } - - DEBUG_PRINTF("all ekeys exhausted -> dead\n"); - return 1; -} - -static really_inline -char roseSuffixIsExhausted(const struct RoseEngine *t, u32 qi, - const char *exhausted) { - DEBUG_PRINTF("check queue %u\n", qi); - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - return roseSuffixInfoIsExhausted(t, info, exhausted); -} - static really_inline u32 has_chained_nfas(const struct RoseEngine *t) { return t->outfixBeginQueue; diff --git a/src/rose/stream.c b/src/rose/stream.c index b08fe04d..b934f98f 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -31,13 +31,14 @@ #include "infix.h" #include "match.h" #include "miracle.h" +#include "program_runtime.h" +#include "rose.h" #include "hwlm/hwlm.h" #include "nfa/mcclellan.h" #include "nfa/nfa_api.h" #include "nfa/nfa_api_queue.h" #include "nfa/nfa_internal.h" #include "util/fatbit.h" -#include "rose.h" static rose_inline void runAnchoredTableStream(const struct RoseEngine *t, const void *atable, @@ -422,8 +423,95 @@ void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable, assert(!can_stop_matching(scratch)); } +static rose_inline +void runEagerPrefixesStream(const struct RoseEngine *t, + struct hs_scratch *scratch) { + if (!t->eagerIterOffset + || scratch->core_info.buf_offset >= EAGER_STOP_OFFSET) { + return; + } + + char *state = scratch->core_info.state; + u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into + * left_table */ + const u32 arCount = t->activeLeftCount; + const u32 qCount = t->queueCount; + const struct LeftNfaInfo *left_table = getLeftTable(t); + const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset); + + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + u32 idx = 0; + u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state); + for (; ri != MMB_INVALID; + ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) { + const struct LeftNfaInfo *left = left_table + ri; + u32 qi = ri + t->leftfixBeginQueue; + DEBUG_PRINTF("leftfix %u of %u, maxLag=%u\n", ri, arCount, left->maxLag); + + assert(!fatbit_isset(scratch->aqa, qCount, qi)); + assert(left->eager); + assert(!left->infix); + + struct mq *q = scratch->queues + qi; + const struct NFA *nfa = getNfaByQueue(t, qi); + s64a loc = MIN(scratch->core_info.len, + EAGER_STOP_OFFSET - scratch->core_info.buf_offset); + + fatbit_set(scratch->aqa, qCount, qi); + initRoseQueue(t, qi, left, scratch); + + if (scratch->core_info.buf_offset) { + s64a sp = left->transient ? -(s64a)scratch->core_info.hlen + : -(s64a)loadRoseDelay(t, state, left); + pushQueueAt(q, 0, MQE_START, sp); + if (scratch->core_info.buf_offset + sp > 0) { + loadStreamState(nfa, q, sp); + /* if the leftfix fix is currently in a match state, we cannot + * advance it. */ + if (nfaInAnyAcceptState(nfa, q)) { + continue; + } + pushQueueAt(q, 1, MQE_END, loc); + } else { + pushQueueAt(q, 1, MQE_TOP, sp); + pushQueueAt(q, 2, MQE_END, loc); + nfaQueueInitState(q->nfa, q); + } + } else { + pushQueueAt(q, 0, MQE_START, 0); + pushQueueAt(q, 1, MQE_TOP, 0); + pushQueueAt(q, 2, MQE_END, loc); + nfaQueueInitState(nfa, q); + } + + char alive = nfaQueueExecToMatch(q->nfa, q, loc); + + if (!alive) { + DEBUG_PRINTF("queue %u dead, squashing\n", qi); + mmbit_unset(ara, arCount, ri); + fatbit_unset(scratch->aqa, qCount, qi); + scratch->tctxt.groups &= left->squash_mask; + } else if (q->cur == q->end) { + assert(alive != MO_MATCHES_PENDING); + /* unlike in block mode we cannot squash groups if there is no match + * in this block as we need the groups on for later stream writes */ + /* TODO: investigate possibility of a method to suppress groups for + * a single stream block. */ + DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else { + assert(alive == MO_MATCHES_PENDING); + DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi); + q->end--; /* remove end item */ + } + } +} + void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { - DEBUG_PRINTF("OH HAI\n"); + DEBUG_PRINTF("OH HAI [%llu, %llu)\n", scratch->core_info.buf_offset, + scratch->core_info.buf_offset + (u64a)scratch->core_info.len); assert(t); assert(scratch->core_info.hbuf); assert(scratch->core_info.buf); @@ -460,8 +548,8 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { tctxt->minMatchOffset = offset; tctxt->minNonMpvMatchOffset = offset; tctxt->next_mpv_offset = 0; - DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu\n", - scratch->core_info.hlen, scratch->core_info.len); + DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu groups=%016llx\n", + scratch->core_info.hlen, scratch->core_info.len, tctxt->groups); fatbit_clear(scratch->aqa); scratch->al_log_sum = 0; @@ -471,6 +559,8 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { streamInitSufPQ(t, state, scratch); } + runEagerPrefixesStream(t, scratch); + u32 alen = t->anchoredDistance > offset ? MIN(length + offset, t->anchoredDistance) - offset : 0; @@ -539,8 +629,9 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { } DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); - hwlmExecStreaming(ftable, scratch, flen, start, roseCallback, scratch, - tctxt->groups, stream_state); + hwlmExecStreaming(ftable, scratch, flen, start, roseFloatingCallback, + scratch, tctxt->groups & t->floating_group_mask, + stream_state); } flush_delay_and_exit: @@ -558,3 +649,67 @@ exit: scratch->core_info.status); return; } + +static rose_inline +void roseStreamInitEod(const struct RoseEngine *t, u64a offset, + struct hs_scratch *scratch) { + struct RoseContext *tctxt = &scratch->tctxt; + /* TODO: diff groups for eod */ + tctxt->groups = loadGroups(t, scratch->core_info.state); + tctxt->lit_offset_adjust = scratch->core_info.buf_offset + - scratch->core_info.hlen + + 1; // index after last byte + tctxt->delayLastEndOffset = offset; + tctxt->lastEndOffset = offset; + tctxt->filledDelayedSlots = 0; + tctxt->lastMatchOffset = 0; + tctxt->minMatchOffset = offset; + tctxt->minNonMpvMatchOffset = offset; + tctxt->next_mpv_offset = offset; + + scratch->catchup_pq.qm_size = 0; + scratch->al_log_sum = 0; /* clear the anchored logs */ + + fatbit_clear(scratch->aqa); +} + +void roseStreamEodExec(const struct RoseEngine *t, u64a offset, + struct hs_scratch *scratch) { + assert(scratch); + assert(t->requiresEodCheck); + DEBUG_PRINTF("ci buf %p/%zu his %p/%zu\n", scratch->core_info.buf, + scratch->core_info.len, scratch->core_info.hbuf, + scratch->core_info.hlen); + + // We should not have been called if we've already been told to terminate + // matching. + assert(!told_to_stop_matching(scratch)); + + if (t->maxBiAnchoredWidth != ROSE_BOUND_INF + && offset > t->maxBiAnchoredWidth) { + DEBUG_PRINTF("bailing, we are beyond max width\n"); + /* also some of the history/state may be stale */ + return; + } + + if (!t->eodProgramOffset) { + DEBUG_PRINTF("no eod program\n"); + return; + } + + roseStreamInitEod(t, offset, scratch); + + DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset); + + // There should be no pending delayed literals. + assert(!scratch->tctxt.filledDelayedSlots); + + const u64a som = 0; + const size_t match_len = 0; + const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; + + // Note: we ignore the result, as this is the last thing to ever happen on + // a scan. + roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, match_len, + flags); +} diff --git a/src/rose/validate_mask.h b/src/rose/validate_mask.h new file mode 100644 index 00000000..b2c2f5d6 --- /dev/null +++ b/src/rose/validate_mask.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ue2common.h" + +// check positive bytes in cmp_result. +// return one if the check passed, zero otherwise. +static really_inline +int posValidateMask(const u64a cmp_result, const u64a pos_mask) { + return !(cmp_result & pos_mask); +} + +/* + * check negative bytes in cmp_result. + * return one if any byte in cmp_result is not 0, zero otherwise. + * check lowest 7 bits and highest bit of every byte respectively. + */ +static really_inline +int negValidateMask(const u64a cmp_result, const u64a neg_mask) { + const u64a count_mask = 0x7f7f7f7f7f7f7f7f; + // check lowest 7 bits of every byte. + // the highest bit should be 1 if check passed. + u64a check_low = (cmp_result & count_mask) + count_mask; + // check the highest bit of every byte. + // combine the highest bit and 0x7f to 0xff if check passes. + // flip all 0xff to 0x00 and 0x7f to 0x80. + u64a check_all = ~(check_low | cmp_result | count_mask); + return !(check_all & neg_mask); +} + +static really_inline +int validateMask(u64a data, u64a valid_data_mask, u64a and_mask, + u64a cmp_mask, u64a neg_mask) { + // skip some byte where valid_data_mask is 0x00 there. + and_mask &= valid_data_mask; + cmp_mask &= valid_data_mask; + neg_mask &= valid_data_mask; + u64a cmp_result = (data & and_mask) ^ cmp_mask; + /* do the positive check first since it's cheaper */ + if (posValidateMask(cmp_result, ~neg_mask) + && negValidateMask(cmp_result, neg_mask)) { + return 1; + } else { + DEBUG_PRINTF("data %llx valid_data_mask(vdm) %llx\n", + data, valid_data_mask); + DEBUG_PRINTF("and_mask & vdm %llx cmp_mask & vdm %llx\n", and_mask, + cmp_mask); + DEBUG_PRINTF("cmp_result %llx neg_mask & vdm %llx\n", + cmp_result, neg_mask); + return 0; + } +} diff --git a/src/runtime.c b/src/runtime.c index 95f21d84..35a11634 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -43,6 +43,7 @@ #include "nfa/nfa_api_util.h" #include "nfa/nfa_internal.h" #include "nfa/nfa_rev_api.h" +#include "nfa/sheng.h" #include "smallwrite/smallwrite_internal.h" #include "rose/rose.h" #include "rose/runtime.h" @@ -198,7 +199,11 @@ void pureLiteralBlockExec(const struct RoseEngine *rose, size_t length = scratch->core_info.len; DEBUG_PRINTF("rose engine %d\n", rose->runtimeImpl); - hwlmExec(ftable, buffer, length, 0, rosePureLiteralCallback, scratch, + // RoseContext values that need to be set for use by roseCallback. + scratch->tctxt.groups = rose->initialGroups; + scratch->tctxt.lit_offset_adjust = 1; + + hwlmExec(ftable, buffer, length, 0, roseCallback, scratch, rose->initialGroups); } @@ -217,7 +222,6 @@ void initOutfixQueue(struct mq *q, u32 qi, const struct RoseEngine *t, q->history = scratch->core_info.hbuf; q->hlength = scratch->core_info.hlen; q->cb = roseReportAdaptor; - q->som_cb = roseReportSomAdaptor; q->context = scratch; q->report_current = 0; @@ -257,8 +261,8 @@ void soleOutfixBlockExec(const struct RoseEngine *t, char rv = nfaQueueExec(q->nfa, q, scratch->core_info.len); if (rv && nfaAcceptsEod(nfa) && len == scratch->core_info.len) { - nfaCheckFinalState(nfa, q->state, q->streamState, q->length, - q->cb, q->som_cb, scratch); + nfaCheckFinalState(nfa, q->state, q->streamState, q->length, q->cb, + scratch); } } @@ -283,13 +287,16 @@ void runSmallWriteEngine(const struct SmallWriteEngine *smwr, size_t local_alen = length - smwr->start_offset; const u8 *local_buffer = buffer + smwr->start_offset; - assert(isMcClellanType(nfa->type)); + assert(isDfaType(nfa->type)); if (nfa->type == MCCLELLAN_NFA_8) { nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer, local_alen, roseReportAdaptor, scratch); - } else { + } else if (nfa->type == MCCLELLAN_NFA_16){ nfaExecMcClellan16_B(nfa, smwr->start_offset, local_buffer, local_alen, roseReportAdaptor, scratch); + } else { + nfaExecSheng0_B(nfa, smwr->start_offset, local_buffer, + local_alen, roseReportAdaptor, scratch); } } @@ -532,7 +539,7 @@ void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) { return; } - roseEodExec(rose, id->offset, scratch); + roseStreamEodExec(rose, id->offset, scratch); } static never_inline @@ -568,7 +575,7 @@ void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) { assert(nfaAcceptsEod(nfa)); nfaCheckFinalState(nfa, q->state, q->streamState, q->offset, q->cb, - q->som_cb, scratch); + scratch); } static really_inline @@ -743,11 +750,15 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n", stream_state->offset, scratch->core_info.len); + // RoseContext values that need to be set for use by roseCallback. + scratch->tctxt.groups = loadGroups(rose, scratch->core_info.state); + scratch->tctxt.lit_offset_adjust = scratch->core_info.buf_offset + 1; + // Pure literal cases don't have floatingMinDistance set, so we always // start the match region at zero. const size_t start = 0; - hwlmExecStreaming(ftable, scratch, len2, start, rosePureLiteralCallback, + hwlmExecStreaming(ftable, scratch, len2, start, roseCallback, scratch, rose->initialGroups, hwlm_stream_state); if (!told_to_stop_matching(scratch) && diff --git a/src/scratch.c b/src/scratch.c index d8742e7d..dae2c672 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -227,6 +227,11 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { // Don't get too big for your boots assert((size_t)(current - (char *)s) <= alloc_size); + // Init q->scratch ptr for every queue. + for (struct mq *qi = s->queues; qi != s->queues + queueCount; ++qi) { + qi->scratch = s; + } + return HS_SUCCESS; } diff --git a/src/scratch.h b/src/scratch.h index f8e322f8..a2f02503 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -45,7 +45,7 @@ extern "C" #endif UNUSED static const u32 SCRATCH_MAGIC = 0x544F4259; -#define FDR_TEMP_BUF_SIZE 200 +#define FDR_TEMP_BUF_SIZE 220 struct fatbit; struct hs_scratch; @@ -141,7 +141,6 @@ struct match_deduper { struct ALIGN_CL_DIRECTIVE hs_scratch { u32 magic; u8 in_use; /**< non-zero when being used by an API call. */ - char *scratch_alloc; /* user allocated scratch object */ u32 queueCount; u32 bStateSize; /**< sizeof block mode states */ u32 tStateSize; /**< sizeof transient rose states */ @@ -161,10 +160,6 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { struct match_deduper deduper; u32 anchored_literal_region_len; u32 anchored_literal_count; - u32 delay_count; - u32 scratchSize; - u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE]; - u32 handledKeyCount; struct fatbit *handled_roles; /**< fatbit of ROLES (not states) already * handled by this literal */ u64a *som_store; /**< array of som locations */ @@ -176,6 +171,11 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { * location had been writable */ u64a som_set_now_offset; /**< offset at which som_set_now represents */ u32 som_store_count; + u32 handledKeyCount; + u32 delay_count; + u32 scratchSize; + char *scratch_alloc; /* user allocated scratch object */ + u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE]; }; /* array of fatbit ptr; TODO: why not an array of fatbits? */ diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 792a3d5b..90770ba5 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -34,6 +34,7 @@ #include "nfa/mcclellancompile_util.h" #include "nfa/nfa_internal.h" #include "nfa/rdfa_merge.h" +#include "nfa/shengcompile.h" #include "nfagraph/ng.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_mcclellan.h" @@ -65,7 +66,8 @@ namespace { // unnamed // Concrete impl class class SmallWriteBuildImpl : public SmallWriteBuild { public: - SmallWriteBuildImpl(const ReportManager &rm, const CompileContext &cc); + SmallWriteBuildImpl(size_t num_patterns, const ReportManager &rm, + const CompileContext &cc); // Construct a runtime implementation. aligned_unique_ptr build(u32 roseQuality) override; @@ -73,6 +75,8 @@ public: void add(const NGWrapper &w) override; void add(const ue2_literal &literal, ReportID r) override; + set all_reports() const override; + bool determiniseLiterals(); const ReportManager &rm; @@ -87,11 +91,14 @@ public: SmallWriteBuild::~SmallWriteBuild() { } -SmallWriteBuildImpl::SmallWriteBuildImpl(const ReportManager &rm_in, +SmallWriteBuildImpl::SmallWriteBuildImpl(size_t num_patterns, + const ReportManager &rm_in, const CompileContext &cc_in) : rm(rm_in), cc(cc_in), /* small write is block mode only */ - poisoned(!cc.grey.allowSmallWrite || cc.streaming) { + poisoned(!cc.grey.allowSmallWrite + || cc.streaming + || num_patterns > cc.grey.smallWriteMaxPatterns) { } void SmallWriteBuildImpl::add(const NGWrapper &w) { @@ -163,6 +170,10 @@ void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) { } cand_literals.push_back(make_pair(literal, r)); + + if (cand_literals.size() > cc.grey.smallWriteMaxLiterals) { + poisoned = true; + } } static @@ -181,6 +192,7 @@ void lit_to_graph(NGHolder *h, const ue2_literal &literal, ReportID r) { bool SmallWriteBuildImpl::determiniseLiterals() { DEBUG_PRINTF("handling literals\n"); assert(!poisoned); + assert(cand_literals.size() <= cc.grey.smallWriteMaxLiterals); if (cand_literals.empty()) { return true; /* nothing to do */ @@ -301,6 +313,20 @@ bool is_slow(const raw_dfa &rdfa, const set &accel, return true; } +static +aligned_unique_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, + const ReportManager &rm, + set &accel_states) { + aligned_unique_ptr dfa = nullptr; + if (cc.grey.allowSmallWriteSheng) { + dfa = shengCompile(rdfa, cc, rm, &accel_states); + } + if (!dfa) { + dfa = mcclellanCompile(rdfa, cc, rm, &accel_states); + } + return dfa; +} + static aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, const CompileContext &cc, @@ -311,9 +337,9 @@ aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, // Unleash the McClellan! set accel_states; - auto nfa = mcclellanCompile(rdfa, cc, rm, &accel_states); + auto nfa = getDfa(rdfa, cc, rm, accel_states); if (!nfa) { - DEBUG_PRINTF("mcclellan compile failed for smallwrite NFA\n"); + DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); return nullptr; } @@ -329,9 +355,9 @@ aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, return nullptr; } - nfa = mcclellanCompile(rdfa, cc, rm, &accel_states); + nfa = getDfa(rdfa, cc, rm, accel_states); if (!nfa) { - DEBUG_PRINTF("mcclellan compile failed for smallwrite NFA\n"); + DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); assert(0); /* able to build orig dfa but not the trimmed? */ return nullptr; } @@ -340,7 +366,7 @@ aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, *small_region = cc.grey.smallWriteLargestBuffer; } - assert(isMcClellanType(nfa->type)); + assert(isDfaType(nfa->type)); if (nfa->length > cc.grey.limitSmallWriteOutfixSize || nfa->length > cc.grey.limitDFASize) { DEBUG_PRINTF("smallwrite outfix size too large\n"); @@ -352,9 +378,10 @@ aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, } // SmallWriteBuild factory -unique_ptr makeSmallWriteBuilder(const ReportManager &rm, +unique_ptr makeSmallWriteBuilder(size_t num_patterns, + const ReportManager &rm, const CompileContext &cc) { - return ue2::make_unique(rm, cc); + return ue2::make_unique(num_patterns, rm, cc); } aligned_unique_ptr @@ -403,6 +430,20 @@ SmallWriteBuildImpl::build(u32 roseQuality) { return smwr; } +set SmallWriteBuildImpl::all_reports() const { + set reports; + if (poisoned) { + return reports; + } + if (rdfa) { + insert(&reports, ::ue2::all_reports(*rdfa)); + } + for (const auto &cand : cand_literals) { + reports.insert(cand.second); + } + return reports; +} + size_t smwrSize(const SmallWriteEngine *smwr) { assert(smwr); return smwr->size; diff --git a/src/smallwrite/smallwrite_build.h b/src/smallwrite/smallwrite_build.h index 9c3de9d3..84c6df3a 100644 --- a/src/smallwrite/smallwrite_build.h +++ b/src/smallwrite/smallwrite_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,6 +38,8 @@ #include "ue2common.h" #include "util/alloc.h" +#include + #include struct SmallWriteEngine; @@ -61,10 +63,13 @@ public: virtual void add(const NGWrapper &w) = 0; virtual void add(const ue2_literal &literal, ReportID r) = 0; + + virtual std::set all_reports() const = 0; }; // Construct a usable SmallWrite builder. -std::unique_ptr makeSmallWriteBuilder(const ReportManager &rm, +std::unique_ptr makeSmallWriteBuilder(size_t num_patterns, + const ReportManager &rm, const CompileContext &cc); size_t smwrSize(const SmallWriteEngine *t); diff --git a/src/smallwrite/smallwrite_dump.cpp b/src/smallwrite/smallwrite_dump.cpp index 8987e8b3..0db97df5 100644 --- a/src/smallwrite/smallwrite_dump.cpp +++ b/src/smallwrite/smallwrite_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -73,7 +73,7 @@ void smwrDumpNFA(const SmallWriteEngine *smwr, bool dump_raw, FILE *f; f = fopen((base + "smallwrite_nfa.dot").c_str(), "w"); - nfaDumpDot(n, f); + nfaDumpDot(n, f, base); fclose(f); f = fopen((base + "smallwrite_nfa.txt").c_str(), "w"); diff --git a/src/som/som_runtime.c b/src/som/som_runtime.c index 9d0a1390..1a868efc 100644 --- a/src/som/som_runtime.c +++ b/src/som/som_runtime.c @@ -87,14 +87,14 @@ char ok_and_mark_if_unset(u8 *som_store_valid, struct fatbit *som_set_now, } static -int somRevCallback(u64a offset, ReportID id, void *ctx) { - DEBUG_PRINTF("offset=%llu, id=%u\n", offset, id); +int somRevCallback(UNUSED u64a start, u64a end, ReportID id, void *ctx) { + DEBUG_PRINTF("offset=%llu, id=%u\n", end, id); // We use the id to store the offset adjustment (for assertions like a // leading \b or multiline mode). assert(id <= 1); u64a *from_offset = ctx; - LIMIT_TO_AT_MOST(from_offset, offset + id); + LIMIT_TO_AT_MOST(from_offset, end + id); return 1; // continue matching. } diff --git a/src/ue2common.h b/src/ue2common.h index 2de60753..e1f03f72 100644 --- a/src/ue2common.h +++ b/src/ue2common.h @@ -52,6 +52,9 @@ #define ALIGN_ATTR(x) __attribute__((aligned((x)))) #endif +#define ALIGN_DIRECTIVE ALIGN_ATTR(16) +#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32) +#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64) typedef signed char s8; typedef unsigned char u8; @@ -82,10 +85,6 @@ typedef u32 ReportID; #define HS_PUBLIC_API #endif -#define ALIGN_DIRECTIVE ALIGN_ATTR(16) -#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32) -#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64) - #define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0])) /** \brief Shorthand for the attribute to shut gcc about unused parameters */ diff --git a/src/util/bitutils.h b/src/util/bitutils.h index c863fba9..6f1bcd09 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -454,4 +454,20 @@ void bf64_unset(u64a *bitfield, u32 i) { *bitfield &= ~(1ULL << i); } +static really_inline +u32 rank_in_mask32(u32 mask, u32 bit) { + assert(bit < sizeof(u32) * 8); + assert(mask & (u32)(1U << bit)); + mask &= (u32)(1U << bit) - 1; + return popcount32(mask); +} + +static really_inline +u32 rank_in_mask64(u64a mask, u32 bit) { + assert(bit < sizeof(u64a) * 8); + assert(mask & (u64a)(1ULL << bit)); + mask &= (u64a)(1ULL << bit) - 1; + return popcount64(mask); +} + #endif // BITUTILS_H diff --git a/src/util/clique.cpp b/src/util/clique.cpp new file mode 100644 index 00000000..ea22779c --- /dev/null +++ b/src/util/clique.cpp @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief An algorithm to find cliques. + */ + +#include "clique.h" +#include "container.h" +#include "graph_range.h" +#include "make_unique.h" +#include "ue2_containers.h" + +#include +#include +#include + +using namespace std; + +namespace ue2 { + +static +vector getNeighborInfo(const CliqueGraph &g, + const CliqueVertex &cv, const set &group) { + u32 id = g[cv].stateId; + vector neighbor; + // find neighbors for cv + for (const auto &v : adjacent_vertices_range(cv, g)) { + if (g[v].stateId != id && contains(group, g[v].stateId)){ + neighbor.push_back(g[v].stateId); + DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId); + } + } + + return neighbor; +} + +static +vector findCliqueGroup(CliqueGraph &cg) { + stack> gStack; + + // Create mapping between vertex and id + map vertexMap; + vector init; + for (const auto &v : vertices_range(cg)) { + vertexMap[cg[v].stateId] = v; + init.push_back(cg[v].stateId); + } + gStack.push(init); + + // Get the vertex to start from + vector clique; + while (!gStack.empty()) { + vector g = move(gStack.top()); + gStack.pop(); + + // Choose a vertex from the graph + u32 id = g[0]; + CliqueVertex &n = vertexMap.at(id); + clique.push_back(id); + // Corresponding vertex in the original graph + set subgraphId(g.begin(), g.end()); + auto neighbor = getNeighborInfo(cg, n, subgraphId); + // Get graph consisting of neighbors for left branch + if (!neighbor.empty()) { + gStack.push(neighbor); + } + } + + return clique; +} + +template +bool graph_empty(const Graph &g) { + typename Graph::vertex_iterator vi, ve; + tie(vi, ve) = vertices(g); + return vi == ve; +} + +vector> removeClique(CliqueGraph &cg) { + DEBUG_PRINTF("graph size:%lu\n", num_vertices(cg)); + vector> cliquesVec = {findCliqueGroup(cg)}; + while (!graph_empty(cg)) { + const vector &c = cliquesVec.back(); + vector dead; + for (const auto &v : vertices_range(cg)) { + u32 id = cg[v].stateId; + if (find(c.begin(), c.end(), id) != c.end()) { + dead.push_back(v); + } + } + for (const auto &v : dead) { + clear_vertex(v, cg); + remove_vertex(v, cg); + } + if (graph_empty(cg)) { + break; + } + auto clique = findCliqueGroup(cg); + cliquesVec.push_back(clique); + } + + return cliquesVec; +} + +} // namespace ue2 diff --git a/src/nfa/limex_simd512a.c b/src/util/clique.h similarity index 61% rename from src/nfa/limex_simd512a.c rename to src/util/clique.h index 1c4a0fb9..89c6d4ed 100644 --- a/src/nfa/limex_simd512a.c +++ b/src/util/clique.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,48 +27,34 @@ */ /** \file - * \brief LimEx NFA: 512-bit SIMD runtime implementations. + * \brief An algorithm to find cliques. */ -//#define DEBUG_INPUT -//#define DEBUG_EXCEPTIONS +#ifndef CLIQUE_H +#define CLIQUE_H -#include "limex.h" - -#include "accel.h" -#include "limex_internal.h" -#include "nfa_internal.h" #include "ue2common.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" -// Common code -#include "limex_runtime.h" +#include -#define SIZE 512 -#define STATE_T m512 -#include "limex_exceptional.h" +#include -#define SIZE 512 -#define STATE_T m512 -#include "limex_state_impl.h" +namespace ue2 { -#define SIZE 512 -#define STATE_T m512 -#define INLINE_ATTR really_inline -#include "limex_common_impl.h" +struct CliqueVertexProps { + CliqueVertexProps() {} + explicit CliqueVertexProps(u32 state_in) : stateId(state_in) {} -#define SIZE 512 -#define STATE_T m512 -#define SHIFT 2 -#include "limex_runtime_impl.h" + u32 stateId = ~0U; +}; -#define SIZE 512 -#define STATE_T m512 -#define SHIFT 1 -#include "limex_runtime_impl.h" +typedef boost::adjacency_list CliqueGraph; +typedef CliqueGraph::vertex_descriptor CliqueVertex; -#define SIZE 512 -#define STATE_T m512 -#define SHIFT 3 -#include "limex_runtime_impl.h" +/** \brief Returns a vector of cliques found in a graph. */ +std::vector> removeClique(CliqueGraph &cg); + +} // namespace ue2 + +#endif diff --git a/src/util/exhaust.h b/src/util/exhaust.h index b55c52d7..d6f2ac06 100644 --- a/src/util/exhaust.h +++ b/src/util/exhaust.h @@ -33,47 +33,9 @@ #ifndef EXHAUST_H #define EXHAUST_H -#include "rose/rose_internal.h" -#include "util/multibit.h" #include "ue2common.h" /** Index meaning a given exhaustion key is invalid. */ #define INVALID_EKEY (~(u32)0) -/** \brief Test whether the given key (\a ekey) is set in the exhaustion vector - * \a evec. */ -static really_inline -int isExhausted(const struct RoseEngine *t, const char *evec, u32 ekey) { - DEBUG_PRINTF("checking exhaustion %p %u\n", evec, ekey); - assert(ekey != INVALID_EKEY); - assert(ekey < t->ekeyCount); - return mmbit_isset((const u8 *)evec, t->ekeyCount, ekey); -} - -/** \brief Returns 1 if all exhaustion keys in the bitvector are on. */ -static really_inline -int isAllExhausted(const struct RoseEngine *t, const char *evec) { - if (!t->canExhaust) { - return 0; /* pattern set is inexhaustible */ - } - - return mmbit_all((const u8 *)evec, t->ekeyCount); -} - -/** \brief Mark key \a ekey on in the exhaustion vector. */ -static really_inline -void markAsMatched(const struct RoseEngine *t, char *evec, u32 ekey) { - DEBUG_PRINTF("marking as exhausted key %u\n", ekey); - assert(ekey != INVALID_EKEY); - assert(ekey < t->ekeyCount); - mmbit_set((u8 *)evec, t->ekeyCount, ekey); -} - -/** \brief Clear all keys in the exhaustion vector. */ -static really_inline -void clearEvec(const struct RoseEngine *t, char *evec) { - DEBUG_PRINTF("clearing evec %p %u\n", evec, t->ekeyCount); - mmbit_clear((u8 *)evec, t->ekeyCount); -} - #endif diff --git a/src/util/masked_move.h b/src/util/masked_move.h index 93c79e75..09276e80 100644 --- a/src/util/masked_move.h +++ b/src/util/masked_move.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,7 +33,6 @@ #include "unaligned.h" #include "simd_utils.h" -#include "simd_utils_ssse3.h" #ifdef __cplusplus extern "C" { diff --git a/src/util/shuffle_ssse3.h b/src/util/shuffle_ssse3.h deleted file mode 100644 index d295839b..00000000 --- a/src/util/shuffle_ssse3.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SHUFFLE_SSSE3_H -#define SHUFFLE_SSSE3_H - -#include "simd_utils_ssse3.h" - -#ifdef DEBUG -#include "compare.h" -static really_inline void shufDumpMsk(m128 msk) { - u8 * mskAsU8 = (u8 *)&msk; - for (int i = 0; i < 16; i++) { - u8 c = mskAsU8[i]; - for (int j = 0; j < 8; j++) { - if ((c >> (7-j)) & 0x1) - printf("1"); - else - printf("0"); - } - printf(" "); - } -} - -static really_inline void shufDumpMskAsChars(m128 msk) { - u8 * mskAsU8 = (u8 *)&msk; - for (int i = 0; i < 16; i++) { - u8 c = mskAsU8[i]; - if (ourisprint(c)) - printf("%c",c); - else - printf("."); - } -} -#endif - -#if !defined(NO_SSSE3) -static really_inline -u32 shufflePshufb128(m128 s, const m128 permute, const m128 compare) { - m128 shuffled = pshufb(s, permute); - m128 compared = and128(shuffled, compare); -#ifdef DEBUG - printf("State: "); shufDumpMsk(s); printf("\n"); - printf("Permute: "); shufDumpMsk(permute); printf("\n"); - printf("Compare: "); shufDumpMsk(compare); printf("\n"); - printf("Shuffled: "); shufDumpMsk(shuffled); printf("\n"); - printf("Compared: "); shufDumpMsk(compared); printf("\n"); -#endif - u16 rv = ~cmpmsk8(compared, shuffled); - return (u32)rv; -} -#endif // NO_SSSE3 - -#endif // SHUFFLE_SSSE3_H diff --git a/src/util/simd_types.h b/src/util/simd_types.h index 63311b10..e4541411 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -65,7 +65,7 @@ typedef __m128i m128; #if defined(__AVX2__) typedef __m256i m256; #else -typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256; +typedef ALIGN_AVX_DIRECTIVE struct {m128 lo; m128 hi;} m256; #endif // these should align to 16 and 32 respectively diff --git a/src/util/simd_utils.c b/src/util/simd_utils.c new file mode 100644 index 00000000..a86c568d --- /dev/null +++ b/src/util/simd_utils.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Lookup tables to support SIMD operations. + */ + +#include "simd_utils.h" + +const char vbs_mask_data[] ALIGN_CL_DIRECTIVE = { + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, + + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, +}; + +#define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0 +#define ZEROES_31 ZEROES_8, ZEROES_8, ZEROES_8, 0, 0, 0, 0, 0, 0, 0 +#define ZEROES_32 ZEROES_8, ZEROES_8, ZEROES_8, ZEROES_8 + +/** \brief LUT for the mask1bit functions. */ +const u8 simd_onebit_masks[] ALIGN_CL_DIRECTIVE = { + ZEROES_31, 0x01, ZEROES_32, + ZEROES_31, 0x02, ZEROES_32, + ZEROES_31, 0x04, ZEROES_32, + ZEROES_31, 0x08, ZEROES_32, + ZEROES_31, 0x10, ZEROES_32, + ZEROES_31, 0x20, ZEROES_32, + ZEROES_31, 0x40, ZEROES_32, + ZEROES_31, 0x80, ZEROES_32, +}; diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 99ad7ce5..3544629f 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -33,6 +33,10 @@ #ifndef SIMD_UTILS #define SIMD_UTILS +#if !defined(_WIN32) && !defined(__SSSE3__) +#error SSSE3 instructions must be enabled +#endif + #include "config.h" #include // for memcpy @@ -68,10 +72,6 @@ #include "ue2common.h" #include "simd_types.h" -#if defined(__GNUC__) -#define USE_GCC_COMPOUND_STATEMENTS -#endif - // Define a common assume_aligned using an appropriate compiler built-in, if // it's available. Note that we need to handle C or C++ compilation. #ifdef __cplusplus @@ -84,23 +84,26 @@ # endif #endif -#ifdef _WIN32 -#define NO_ASM -#endif - // Fallback to identity case. #ifndef assume_aligned #define assume_aligned(x, y) (x) #endif +#ifdef __cplusplus +extern "C" { +#endif +extern const char vbs_mask_data[]; +#ifdef __cplusplus +} +#endif + static really_inline m128 ones128(void) { -#if !defined(NO_ASM) - // trick from Intel's optimization guide to generate all-ones. We have to - // use inline asm, as there's no intrinsic for this yet. - m128 ret; - __asm__ ("pcmpeqb %0,%0" : "=x"(ret)); - return ret; +#if defined(__GNUC__) || defined(__INTEL_COMPILER) + /* gcc gets this right */ + return _mm_set1_epi8(0xFF); #else + /* trick from Intel's optimization guide to generate all-ones. + * ICC converts this to the single cmpeq instruction */ return _mm_cmpeq_epi8(_mm_setzero_si128(), _mm_setzero_si128()); #endif } @@ -146,34 +149,13 @@ static really_inline u32 diffrich64_128(m128 a, m128 b) { #endif } -// forward decl -static really_inline m128 xor128(m128 a, m128 b); - -/** \brief Return msb mask of packet 8 bit compare equal */ -static really_inline unsigned short cmpmsk8(m128 a, m128 b) { - m128 tmp = _mm_cmpeq_epi8(a, b); - return _mm_movemask_epi8(tmp); -} - -#define shift2x64(a, b) _mm_slli_epi64((a), (b)) -#define rshift2x64(a, b) _mm_srli_epi64((a), (b)) +#define lshift64_m128(a, b) _mm_slli_epi64((a), (b)) +#define rshift64_m128(a, b) _mm_srli_epi64((a), (b)) #define eq128(a, b) _mm_cmpeq_epi8((a), (b)) #define movemask128(a) ((u32)_mm_movemask_epi8((a))) - -// We found that this generated better code with gcc-4.1 and with the default -// tuning settings on gcc-4.4 than just using the _mm_set1_epi8() instrinsic. static really_inline m128 set16x8(u8 c) { -#if !defined(__AVX2__) - m128 a = _mm_cvtsi32_si128((int)c); - a = _mm_unpacklo_epi8(a, a); - a = _mm_unpacklo_epi8(a, a); - a = _mm_shuffle_epi32(a, 0); - return a; -#else - // uses a broadcast for much win return _mm_set1_epi8(c); -#endif } static really_inline u32 movd(const m128 in) { @@ -190,16 +172,8 @@ static really_inline u64a movq(const m128 in) { #endif } -static really_inline m128 shiftRight8Bits(m128 a) { - return _mm_srli_si128(a,1); -} - -static really_inline m128 shiftLeft8Bits(m128 a) { - return _mm_slli_si128(a,1); -} - -#define byteShiftRight128(a, count_immed) _mm_srli_si128(a, count_immed) -#define byteShiftLeft128(a, count_immed) _mm_slli_si128(a, count_immed) +#define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed) +#define lshiftbyte_m128(a, count_immed) _mm_slli_si128(a, count_immed) #if !defined(__AVX2__) // TODO: this entire file needs restructuring - this carveout is awful @@ -209,8 +183,8 @@ static really_inline m128 shiftLeft8Bits(m128 a) { #define extract32from256(a, imm) _mm_extract_epi32((imm >> 2) ? a.hi : a.lo, imm % 4) #define extract64from256(a, imm) _mm_extract_epi64((imm >> 2) ? a.hi : a.lo, imm % 2) #else -#define extract32from256(a, imm) movd(byteShiftRight128((imm >> 2) ? a.hi : a.lo, (imm % 4) * 8)) -#define extract64from256(a, imm) movq(byteShiftRight128((imm >> 2) ? a.hi : a.lo, (imm % 2) * 8)) +#define extract32from256(a, imm) movd(_mm_srli_si128((imm >> 2) ? a.hi : a.lo, (imm % 4) * 8)) +#define extract64from256(a, imm) movq(_mm_srli_si128((imm >> 2) ? a.hi : a.lo, (imm % 2) * 8)) #endif #endif // !AVX2 @@ -231,10 +205,6 @@ static really_inline m128 andnot128(m128 a, m128 b) { return _mm_andnot_si128(a, b); } -// The shift amount is an immediate, so we define these operations as macros on -// Intel SIMD. -#define shift128(a, b) _mm_slli_epi64((a), (b)) - // aligned load static really_inline m128 load128(const void *ptr) { assert(ISALIGNED_N(ptr, alignof(m128))); @@ -275,70 +245,85 @@ m128 loadbytes128(const void *ptr, unsigned int n) { return a; } +extern const u8 simd_onebit_masks[]; + +static really_inline +m128 mask1bit128(unsigned int n) { + assert(n < sizeof(m128) * 8); + u32 mask_idx = ((n % 8) * 64) + 31; + mask_idx -= n / 8; + return loadu128(&simd_onebit_masks[mask_idx]); +} + // switches on bit N in the given vector. static really_inline void setbit128(m128 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); - // We should be able to figure out a better way than this. - union { - m128 simd; - u8 bytes[sizeof(m128)]; - } x; - x.simd = *ptr; - - u8 *b = &x.bytes[n / 8]; - *b |= 1U << (n % 8); - - *ptr = x.simd; + *ptr = or128(mask1bit128(n), *ptr); } // switches off bit N in the given vector. static really_inline void clearbit128(m128 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); - // We should be able to figure out a better way than this. - union { - m128 simd; - u8 bytes[sizeof(m128)]; - } x; - x.simd = *ptr; - - u8 *b = &x.bytes[n / 8]; - *b &= ~(1U << (n % 8)); - - *ptr = x.simd; + *ptr = andnot128(mask1bit128(n), *ptr); } // tests bit N in the given vector. static really_inline char testbit128(const m128 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); - // We should be able to figure out a better way than this. - const char *bytes = (const char *)ptr; - return !!(bytes[n / 8] & (1 << (n % 8))); + const m128 mask = mask1bit128(n); +#if defined(__SSE4_1__) + return !_mm_testz_si128(mask, *ptr); +#else + return isnonzero128(and128(mask, *ptr)); +#endif } +// offset must be an immediate +#define palignr(r, l, offset) _mm_alignr_epi8(r, l, offset) + +static really_inline +m128 pshufb(m128 a, m128 b) { + m128 result; + result = _mm_shuffle_epi8(a, b); + return result; +} + +static really_inline +m256 vpshufb(m256 a, m256 b) { +#if defined(__AVX2__) + return _mm256_shuffle_epi8(a, b); +#else + m256 rv; + rv.lo = pshufb(a.lo, b.lo); + rv.hi = pshufb(a.hi, b.hi); + return rv; +#endif +} + +static really_inline +m128 variable_byte_shift_m128(m128 in, s32 amount) { + assert(amount >= -16 && amount <= 16); + m128 shift_mask = loadu128(vbs_mask_data + 16 - amount); + return pshufb(in, shift_mask); +} + + /**** **** 256-bit Primitives ****/ #if defined(__AVX2__) -#define shift4x64(a, b) _mm256_slli_epi64((a), (b)) -#define rshift4x64(a, b) _mm256_srli_epi64((a), (b)) +#define lshift64_m256(a, b) _mm256_slli_epi64((a), (b)) +#define rshift64_m256(a, b) _mm256_srli_epi64((a), (b)) static really_inline m256 set32x8(u32 in) { - m128 a = _mm_cvtsi32_si128(in); - return _mm256_broadcastb_epi8(a); + return _mm256_set1_epi8(in); } #define eq256(a, b) _mm256_cmpeq_epi8((a), (b)) #define movemask256(a) ((u32)_mm256_movemask_epi8((a))) -static really_inline u32 cmpmsk16(m256 a, m256 b) { - m256 tmp = _mm256_cmpeq_epi8(a, b); - return _mm256_movemask_epi8(tmp); -} static really_inline m256 set2x128(m128 a) { return _mm256_broadcastsi128_si256(a); @@ -347,18 +332,18 @@ m256 set2x128(m128 a) { #else static really_inline -m256 shift4x64(m256 a, int b) { +m256 lshift64_m256(m256 a, int b) { m256 rv = a; - rv.lo = shift2x64(rv.lo, b); - rv.hi = shift2x64(rv.hi, b); + rv.lo = lshift64_m128(rv.lo, b); + rv.hi = lshift64_m128(rv.hi, b); return rv; } static really_inline -m256 rshift4x64(m256 a, int b) { +m256 rshift64_m256(m256 a, int b) { m256 rv = a; - rv.lo = rshift2x64(rv.lo, b); - rv.hi = rshift2x64(rv.hi, b); + rv.lo = rshift64_m128(rv.lo, b); + rv.hi = rshift64_m128(rv.hi, b); return rv; } static really_inline @@ -382,12 +367,7 @@ static really_inline m256 zeroes256(void) { static really_inline m256 ones256(void) { #if defined(__AVX2__) - m256 rv; -#if defined(NO_ASM) - rv = eq256(zeroes256(), zeroes256()); -#else - __asm__ ("vpcmpeqb %0,%0,%0" : "=x"(rv)); -#endif + m256 rv = _mm256_set1_epi8(0xFF); #else m256 rv = {ones128(), ones128()}; #endif @@ -398,13 +378,6 @@ static really_inline m256 ones256(void) { static really_inline m256 and256(m256 a, m256 b) { return _mm256_and_si256(a, b); } -#elif defined(USE_GCC_COMPOUND_STATEMENTS) -#define and256(a, b) ({ \ - m256 rv_and256; \ - rv_and256.lo = and128((a).lo, (b).lo); \ - rv_and256.hi = and128((a).hi, (b).hi); \ - rv_and256; \ -}) #else static really_inline m256 and256(m256 a, m256 b) { m256 rv; @@ -418,13 +391,6 @@ static really_inline m256 and256(m256 a, m256 b) { static really_inline m256 or256(m256 a, m256 b) { return _mm256_or_si256(a, b); } -#elif defined(USE_GCC_COMPOUND_STATEMENTS) -#define or256(a, b) ({ \ - m256 rv_or256; \ - rv_or256.lo = or128((a).lo, (b).lo); \ - rv_or256.hi = or128((a).hi, (b).hi); \ - rv_or256; \ -}) #else static really_inline m256 or256(m256 a, m256 b) { m256 rv; @@ -438,13 +404,6 @@ static really_inline m256 or256(m256 a, m256 b) { static really_inline m256 xor256(m256 a, m256 b) { return _mm256_xor_si256(a, b); } -#elif defined(USE_GCC_COMPOUND_STATEMENTS) -#define xor256(a, b) ({ \ - m256 rv_xor256; \ - rv_xor256.lo = xor128((a).lo, (b).lo); \ - rv_xor256.hi = xor128((a).hi, (b).hi); \ - rv_xor256; \ -}) #else static really_inline m256 xor256(m256 a, m256 b) { m256 rv; @@ -458,13 +417,6 @@ static really_inline m256 xor256(m256 a, m256 b) { static really_inline m256 not256(m256 a) { return _mm256_xor_si256(a, ones256()); } -#elif defined(USE_GCC_COMPOUND_STATEMENTS) -#define not256(a) ({ \ - m256 rv_not256; \ - rv_not256.lo = not128((a).lo); \ - rv_not256.hi = not128((a).hi); \ - rv_not256; \ -}) #else static really_inline m256 not256(m256 a) { m256 rv; @@ -478,13 +430,6 @@ static really_inline m256 not256(m256 a) { static really_inline m256 andnot256(m256 a, m256 b) { return _mm256_andnot_si256(a, b); } -#elif defined(USE_GCC_COMPOUND_STATEMENTS) -#define andnot256(a, b) ({ \ - m256 rv_andnot256; \ - rv_andnot256.lo = andnot128((a).lo, (b).lo); \ - rv_andnot256.hi = andnot128((a).hi, (b).hi); \ - rv_andnot256; \ -}) #else static really_inline m256 andnot256(m256 a, m256 b) { m256 rv; @@ -494,26 +439,6 @@ static really_inline m256 andnot256(m256 a, m256 b) { } #endif -// The shift amount is an immediate, so we define these operations as macros on -// Intel SIMD (using a GNU C extension). -#if defined(__AVX2__) -#define shift256(a, b) _mm256_slli_epi64((a), (b)) -#elif defined(__GNUC__) -#define shift256(a, b) ({ \ - m256 rv_shift256; \ - rv_shift256.lo = shift128(a.lo, b); \ - rv_shift256.hi = shift128(a.hi, b); \ - rv_shift256; \ -}) -#else -static really_inline m256 shift256(m256 a, unsigned b) { - m256 rv; - rv.lo = shift128(a.lo, b); - rv.hi = shift128(a.hi, b); - return rv; -} -#endif - static really_inline int diff256(m256 a, m256 b) { #if defined(__AVX2__) return !!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(a, b)) ^ (int)-1); @@ -558,11 +483,10 @@ static really_inline u32 diffrich64_256(m256 a, m256 b) { // aligned load static really_inline m256 load256(const void *ptr) { -#if defined(__AVX2__) assert(ISALIGNED_N(ptr, alignof(m256))); +#if defined(__AVX2__) return _mm256_load_si256((const m256 *)ptr); #else - assert(ISALIGNED_N(ptr, alignof(m128))); m256 rv = { load128(ptr), load128((const char *)ptr + 16) }; return rv; #endif @@ -582,11 +506,10 @@ static really_inline m256 load2x128(const void *ptr) { // aligned store static really_inline void store256(void *ptr, m256 a) { -#if defined(__AVX2__) assert(ISALIGNED_N(ptr, alignof(m256))); +#if defined(__AVX2__) _mm256_store_si256((m256 *)ptr, a); #else - assert(ISALIGNED_16(ptr)); ptr = assume_aligned(ptr, 16); *(m256 *)ptr = a; #endif @@ -618,6 +541,14 @@ m256 loadbytes256(const void *ptr, unsigned int n) { return a; } +static really_inline +m256 mask1bit256(unsigned int n) { + assert(n < sizeof(m256) * 8); + u32 mask_idx = ((n % 8) * 64) + 31; + mask_idx -= n / 8; + return loadu256(&simd_onebit_masks[mask_idx]); +} + #if !defined(__AVX2__) // switches on bit N in the given vector. static really_inline @@ -666,42 +597,19 @@ char testbit256(const m256 *ptr, unsigned int n) { // switches on bit N in the given vector. static really_inline void setbit256(m256 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); - // We should be able to figure out a better way than this. - union { - m256 simd; - u8 bytes[sizeof(m256)]; - } x; - x.simd = *ptr; - - u8 *b = &x.bytes[n / 8]; - *b |= 1U << (n % 8); - - *ptr = x.simd; + *ptr = or256(mask1bit256(n), *ptr); } -// TODO: can we do this better in avx-land? static really_inline void clearbit256(m256 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); - union { - m256 simd; - u8 bytes[sizeof(m256)]; - } x; - x.simd = *ptr; - - u8 *b = &x.bytes[n / 8]; - *b &= ~(1U << (n % 8)); - - *ptr = x.simd; + *ptr = andnot256(mask1bit256(n), *ptr); } // tests bit N in the given vector. static really_inline char testbit256(const m256 *ptr, unsigned int n) { - assert(n < sizeof(*ptr) * 8); - const char *bytes = (const char *)ptr; - return !!(bytes[n / 8] & (1 << (n % 8))); + const m256 mask = mask1bit256(n); + return !_mm256_testz_si256(mask, *ptr); } static really_really_inline @@ -714,27 +622,19 @@ m128 movdq_lo(m256 x) { return _mm256_extracti128_si256(x, 0); } -static really_inline -m256 shift256Right8Bits(m256 a) { - return _mm256_srli_si256(a, 1); -} - -static really_inline -m256 shift256Left8Bits(m256 a) { - return _mm256_slli_si256(a, 1); -} #define cast256to128(a) _mm256_castsi256_si128(a) #define cast128to256(a) _mm256_castsi128_si256(a) #define swap128in256(a) _mm256_permute4x64_epi64(a, 0x4E) #define insert128to256(a, b, imm) _mm256_inserti128_si256(a, b, imm) -#define byteShiftRight256(a, count_immed) _mm256_srli_si256(a, count_immed) -#define byteShiftLeft256(a, count_immed) _mm256_slli_si256(a, count_immed) +#define rshift128_m256(a, count_immed) _mm256_srli_si256(a, count_immed) +#define lshift128_m256(a, count_immed) _mm256_slli_si256(a, count_immed) #define extract64from256(a, imm) _mm_extract_epi64(_mm256_extracti128_si256(a, imm >> 1), imm % 2) #define extract32from256(a, imm) _mm_extract_epi32(_mm256_extracti128_si256(a, imm >> 2), imm % 4) #define extractlow64from256(a) _mm_cvtsi128_si64(cast256to128(a)) #define extractlow32from256(a) movd(cast256to128(a)) #define interleave256hi(a, b) _mm256_unpackhi_epi8(a, b); #define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b); +#define vpalignr(r, l, offset) _mm256_alignr_epi8(r, l, offset) #endif //AVX2 @@ -742,15 +642,6 @@ m256 shift256Left8Bits(m256 a) { **** 384-bit Primitives ****/ -#if defined(USE_GCC_COMPOUND_STATEMENTS) -#define and384(a, b) ({ \ - m384 rv_and384; \ - rv_and384.lo = and128((a).lo, (b).lo); \ - rv_and384.mid = and128((a).mid, (b).mid); \ - rv_and384.hi = and128((a).hi, (b).hi); \ - rv_and384; \ -}) -#else static really_inline m384 and384(m384 a, m384 b) { m384 rv; rv.lo = and128(a.lo, b.lo); @@ -758,17 +649,7 @@ static really_inline m384 and384(m384 a, m384 b) { rv.hi = and128(a.hi, b.hi); return rv; } -#endif -#if defined(USE_GCC_COMPOUND_STATEMENTS) -#define or384(a, b) ({ \ - m384 rv_or384; \ - rv_or384.lo = or128((a).lo, (b).lo); \ - rv_or384.mid = or128((a).mid, (b).mid); \ - rv_or384.hi = or128((a).hi, (b).hi); \ - rv_or384; \ -}) -#else static really_inline m384 or384(m384 a, m384 b) { m384 rv; rv.lo = or128(a.lo, b.lo); @@ -776,17 +657,7 @@ static really_inline m384 or384(m384 a, m384 b) { rv.hi = or128(a.hi, b.hi); return rv; } -#endif -#if defined(USE_GCC_COMPOUND_STATEMENTS) -#define xor384(a, b) ({ \ - m384 rv_xor384; \ - rv_xor384.lo = xor128((a).lo, (b).lo); \ - rv_xor384.mid = xor128((a).mid, (b).mid); \ - rv_xor384.hi = xor128((a).hi, (b).hi); \ - rv_xor384; \ -}) -#else static really_inline m384 xor384(m384 a, m384 b) { m384 rv; rv.lo = xor128(a.lo, b.lo); @@ -794,17 +665,6 @@ static really_inline m384 xor384(m384 a, m384 b) { rv.hi = xor128(a.hi, b.hi); return rv; } -#endif - -#if defined(USE_GCC_COMPOUND_STATEMENTS) -#define not384(a) ({ \ - m384 rv_not384; \ - rv_not384.lo = not128((a).lo); \ - rv_not384.mid = not128((a).mid); \ - rv_not384.hi = not128((a).hi); \ - rv_not384; \ -}) -#else static really_inline m384 not384(m384 a) { m384 rv; rv.lo = not128(a.lo); @@ -812,17 +672,6 @@ static really_inline m384 not384(m384 a) { rv.hi = not128(a.hi); return rv; } -#endif - -#if defined(USE_GCC_COMPOUND_STATEMENTS) -#define andnot384(a, b) ({ \ - m384 rv_andnot384; \ - rv_andnot384.lo = andnot128((a).lo, (b).lo); \ - rv_andnot384.mid = andnot128((a).mid, (b).mid); \ - rv_andnot384.hi = andnot128((a).hi, (b).hi); \ - rv_andnot384; \ -}) -#else static really_inline m384 andnot384(m384 a, m384 b) { m384 rv; rv.lo = andnot128(a.lo, b.lo); @@ -830,27 +679,16 @@ static really_inline m384 andnot384(m384 a, m384 b) { rv.hi = andnot128(a.hi, b.hi); return rv; } -#endif -// The shift amount is an immediate, so we define these operations as macros on -// Intel SIMD (using a GNU C extension). -#if defined(__GNUC__) -#define shift384(a, b) ({ \ - m384 rv; \ - rv.lo = shift128(a.lo, b); \ - rv.mid = shift128(a.mid, b); \ - rv.hi = shift128(a.hi, b); \ - rv; \ -}) -#else -static really_inline m384 shift384(m384 a, unsigned b) { +// The shift amount is an immediate +static really_really_inline +m384 lshift64_m384(m384 a, unsigned b) { m384 rv; - rv.lo = shift128(a.lo, b); - rv.mid = shift128(a.mid, b); - rv.hi = shift128(a.hi, b); + rv.lo = lshift64_m128(a.lo, b); + rv.mid = lshift64_m128(a.mid, b); + rv.hi = lshift64_m128(a.hi, b); return rv; } -#endif static really_inline m384 zeroes384(void) { m384 rv = {zeroes128(), zeroes128(), zeroes128()}; @@ -980,103 +818,49 @@ char testbit384(const m384 *ptr, unsigned int n) { **** 512-bit Primitives ****/ -#if defined(USE_GCC_COMPOUND_STATEMENTS) -#define and512(a, b) ({ \ - m512 rv_and512; \ - rv_and512.lo = and256((a).lo, (b).lo); \ - rv_and512.hi = and256((a).hi, (b).hi); \ - rv_and512; \ -}) -#else static really_inline m512 and512(m512 a, m512 b) { m512 rv; rv.lo = and256(a.lo, b.lo); rv.hi = and256(a.hi, b.hi); return rv; } -#endif -#if defined(USE_GCC_COMPOUND_STATEMENTS) -#define or512(a, b) ({ \ - m512 rv_or512; \ - rv_or512.lo = or256((a).lo, (b).lo); \ - rv_or512.hi = or256((a).hi, (b).hi); \ - rv_or512; \ -}) -#else static really_inline m512 or512(m512 a, m512 b) { m512 rv; rv.lo = or256(a.lo, b.lo); rv.hi = or256(a.hi, b.hi); return rv; } -#endif -#if defined(USE_GCC_COMPOUND_STATEMENTS) -#define xor512(a, b) ({ \ - m512 rv_xor512; \ - rv_xor512.lo = xor256((a).lo, (b).lo); \ - rv_xor512.hi = xor256((a).hi, (b).hi); \ - rv_xor512; \ -}) -#else static really_inline m512 xor512(m512 a, m512 b) { m512 rv; rv.lo = xor256(a.lo, b.lo); rv.hi = xor256(a.hi, b.hi); return rv; } -#endif -#if defined(USE_GCC_COMPOUND_STATEMENTS) -#define not512(a) ({ \ - m512 rv_not512; \ - rv_not512.lo = not256((a).lo); \ - rv_not512.hi = not256((a).hi); \ - rv_not512; \ -}) -#else static really_inline m512 not512(m512 a) { m512 rv; rv.lo = not256(a.lo); rv.hi = not256(a.hi); return rv; } -#endif -#if defined(USE_GCC_COMPOUND_STATEMENTS) -#define andnot512(a, b) ({ \ - m512 rv_andnot512; \ - rv_andnot512.lo = andnot256((a).lo, (b).lo); \ - rv_andnot512.hi = andnot256((a).hi, (b).hi); \ - rv_andnot512; \ -}) -#else static really_inline m512 andnot512(m512 a, m512 b) { m512 rv; rv.lo = andnot256(a.lo, b.lo); rv.hi = andnot256(a.hi, b.hi); return rv; } -#endif -// The shift amount is an immediate, so we define these operations as macros on -// Intel SIMD (using a GNU C extension). -#if defined(USE_GCC_COMPOUND_STATEMENTS) -#define shift512(a, b) ({ \ - m512 rv_shift512; \ - rv_shift512.lo = shift256(a.lo, b); \ - rv_shift512.hi = shift256(a.hi, b); \ - rv_shift512; \ -}) -#else -static really_inline m512 shift512(m512 a, unsigned b) { +// The shift amount is an immediate +static really_really_inline +m512 lshift64_m512(m512 a, unsigned b) { m512 rv; - rv.lo = shift256(a.lo, b); - rv.hi = shift256(a.hi, b); + rv.lo = lshift64_m256(a.lo, b); + rv.hi = lshift64_m256(a.hi, b); return rv; } -#endif static really_inline m512 zeroes512(void) { m512 rv = {zeroes256(), zeroes256()}; @@ -1132,19 +916,19 @@ static really_inline u32 diffrich64_512(m512 a, m512 b) { // aligned load static really_inline m512 load512(const void *ptr) { - assert(ISALIGNED_16(ptr)); + assert(ISALIGNED_N(ptr, alignof(m256))); m512 rv = { load256(ptr), load256((const char *)ptr + 32) }; return rv; } // aligned store static really_inline void store512(void *ptr, m512 a) { + assert(ISALIGNED_N(ptr, alignof(m256))); #if defined(__AVX2__) m512 *x = (m512 *)ptr; store256(&x->lo, a.lo); store256(&x->hi, a.hi); #else - assert(ISALIGNED_16(ptr)); ptr = assume_aligned(ptr, 16); *(m512 *)ptr = a; #endif diff --git a/src/util/simd_utils_ssse3.h b/src/util/simd_utils_ssse3.h deleted file mode 100644 index 6854ade3..00000000 --- a/src/util/simd_utils_ssse3.h +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief SIMD primitives specifically for Intel SSSE3 platforms. - */ - -#ifndef SIMD_UTILS_SSSE3_H_E27DF795C9AA02 -#define SIMD_UTILS_SSSE3_H_E27DF795C9AA02 - -#if !defined(_WIN32) && !defined(__SSSE3__) -#error SSSE3 instructions must be enabled -#endif - -#include "simd_utils.h" -#include "ue2common.h" - -// we may already have x86intrin.h -#if !defined(USE_X86INTRIN_H) -#if defined(HAVE_C_INTRIN_H) -#include -#elif defined(HAVE_TMMINTRIN_H) -#include // SSSE3 intrinsics -#else -#define I_HAVE_BROKEN_INTRINSICS -#endif -#endif - - -#if !defined(I_HAVE_BROKEN_INTRINSICS) -// newish compilers get this right -#define palignr(r, l, offset) _mm_alignr_epi8(r, l, offset) -#else -// must be inline, even in weak-sauce debug builds. -// oldish compilers either don't have the intrinsic, or force one arg through memory -static really_really_inline -m128 palignr(m128 r, m128 l, const int offset) { - __asm__ ("palignr %2,%1,%0" : "+x"(r) : "x"(l), "i"(offset)); - return r; -} -#endif - - -static really_inline -m128 pshufb(m128 a, m128 b) { - m128 result; -#if !defined(I_HAVE_BROKEN_INTRINSICS) - result = _mm_shuffle_epi8(a, b); -#else - __asm__("pshufb\t%1,%0" : "=x"(result) : "xm"(b), "0"(a)); -#endif - return result; -} - -#ifdef __cplusplus -extern "C" { -#endif -extern const char vbs_mask_data[]; -#ifdef __cplusplus -} -#endif - -static really_inline -m128 variable_byte_shift_m128(m128 in, s32 amount) { - assert(amount >= -16 && amount <= 16); - m128 shift_mask = loadu128(vbs_mask_data + 16 - amount); - return pshufb(in, shift_mask); -} - -#if defined(__AVX2__) - -static really_inline -m256 vpshufb(m256 a, m256 b) { - return _mm256_shuffle_epi8(a, b); -} - -#if defined(USE_GCC_COMPOUND_STATEMENTS) -#define vpalignr(r, l, offset) ({ \ - m256 res = _mm256_alignr_epi8(r, l, offset); \ - res; \ -}) -#else -#define vpalignr(r, l, offset) _mm256_alignr_epi8(r, l, offset) -#endif - -#else // not __AVX2__ - -static really_inline -m256 vpshufb(m256 a, m256 b) { - m256 rv; - rv.lo = pshufb(a.lo, b.lo); - rv.hi = pshufb(a.hi, b.hi); - return rv; -} - -/* palignr requires the offset to be an immediate, which we can do with a - * compound macro, otherwise we have to enumerate the offsets and hope the - * compiler can throw the rest away. */ -#if defined(USE_GCC_COMPOUND_STATEMENTS) -#define vpalignr(r, l, offset) ({ \ - m256 res; \ - res.lo = palignr(r.lo, l.lo, offset); \ - res.hi = palignr(r.hi, l.hi, offset); \ - res; \ -}) -#else -#define VPALIGN_CASE(N) case N: \ - res.lo = palignr(r.lo, l.lo, N); \ - res.hi = palignr(r.hi, l.hi, N); \ - return res; -static really_inline -m256 vpalignr(m256 r, m256 l, const int offset) { - m256 res; - switch (offset) { - VPALIGN_CASE(0) - VPALIGN_CASE(1) - VPALIGN_CASE(2) - VPALIGN_CASE(3) - VPALIGN_CASE(4) - VPALIGN_CASE(5) - VPALIGN_CASE(6) - VPALIGN_CASE(7) - VPALIGN_CASE(8) - VPALIGN_CASE(9) - VPALIGN_CASE(10) - VPALIGN_CASE(11) - VPALIGN_CASE(12) - VPALIGN_CASE(13) - VPALIGN_CASE(14) - VPALIGN_CASE(15) - default: - assert(0); - return zeroes256(); - } -} -#undef VPALIGN_CASE -#endif -#endif // __AVX2__ - -#endif /* SIMD_UTILS_SSSE3_H_E27DF795C9AA02 */ - diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index e3b01363..217d08ea 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -82,7 +82,7 @@ private: void increment() { ++it; } void decrement() { --it; } void advance(size_t n) { it += n; } - typename WrappedIter::difference_type + typename std::iterator_traits::difference_type distance_to(const iter_wrapper &other) const { return other.it - it; } diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 88695ea9..3c7be473 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -55,8 +55,6 @@ size_t maxStringSelfOverlap(const std::string &a, bool nocase); /// Compares two strings, returns non-zero if they're different. u32 cmp(const char *a, const char *b, size_t len, bool nocase); -class CharReach; - struct ue2_literal { public: /// Single element proxy, pointed to by our const_iterator. @@ -124,6 +122,13 @@ public: ue2_literal &operator=(const ue2_literal &) = default; ue2_literal &operator=(ue2_literal &&) = default; + template + ue2_literal(InputIt b, InputIt e) { + for (; b != e; ++b) { + push_back(*b); + } + } + size_type length() const { return s.length(); } bool empty() const { return s.empty(); } ue2_literal substr(size_type pos, size_type n = std::string::npos) const; diff --git a/src/util/uniform_ops.h b/src/util/uniform_ops.h index 45ea4108..0619c7e4 100644 --- a/src/util/uniform_ops.h +++ b/src/util/uniform_ops.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -125,12 +125,12 @@ #define andnot_m384(a, b) (andnot384(a, b)) #define andnot_m512(a, b) (andnot512(a, b)) -#define shift_u32(a, b) ((a) << (b)) -#define shift_u64a(a, b) ((a) << (b)) -#define shift_m128(a, b) (shift128(a, b)) -#define shift_m256(a, b) (shift256(a, b)) -#define shift_m384(a, b) (shift384(a, b)) -#define shift_m512(a, b) (shift512(a, b)) +#define lshift_u32(a, b) ((a) << (b)) +#define lshift_u64a(a, b) ((a) << (b)) +#define lshift_m128(a, b) (lshift64_m128(a, b)) +#define lshift_m256(a, b) (lshift64_m256(a, b)) +#define lshift_m384(a, b) (lshift64_m384(a, b)) +#define lshift_m512(a, b) (lshift64_m512(a, b)) #define isZero_u8(a) ((a) == 0) #define isZero_u32(a) ((a) == 0) diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index a893d3d5..63f3a9ac 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -25,6 +25,11 @@ if(CXX_WUNUSED_VARIABLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable") endif() +if(CMAKE_COMPILER_IS_GNUCC) + # spurious warnings? + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-array-bounds") +endif() + add_library(gtest STATIC ${gtest_SOURCES}) add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${PROJECT_SOURCE_DIR}) @@ -65,6 +70,7 @@ set(unit_internal_SOURCES internal/pqueue.cpp internal/repeat.cpp internal/rose_build_merge.cpp + internal/rose_mask.cpp internal/rvermicelli.cpp internal/simd_utils.cpp internal/shuffle.cpp diff --git a/unit/hyperscan/allocators.cpp b/unit/hyperscan/allocators.cpp index 66c456ee..40c45072 100644 --- a/unit/hyperscan/allocators.cpp +++ b/unit/hyperscan/allocators.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,9 @@ #include "test_util.h" #include +#include + +using std::string; static void *null_malloc(size_t) { return nullptr; } @@ -83,6 +86,22 @@ TEST(CustomAllocator, TwoAlignedCompile) { hs_set_database_allocator(nullptr, nullptr); } +TEST(CustomAllocator, TwoAlignedCompileError) { + hs_set_misc_allocator(two_aligned_malloc, two_aligned_free); + + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + const hs_platform_info_t *platform = nullptr; + hs_error_t err = + hs_compile("\\1", 0, HS_MODE_BLOCK, platform, &db, &compile_err); + ASSERT_EQ(HS_COMPILER_ERROR, err); + ASSERT_EQ(nullptr, db); + ASSERT_NE(nullptr, compile_err); + EXPECT_STREQ("Allocator returned misaligned memory.", compile_err->message); + hs_free_compile_error(compile_err); + hs_set_database_allocator(nullptr, nullptr); +} + TEST(CustomAllocator, TwoAlignedDatabaseInfo) { hs_database_t *db = buildDB("foobar", 0, 0, HS_MODE_BLOCK); ASSERT_TRUE(db != nullptr); @@ -149,3 +168,30 @@ TEST(CustomAllocator, TwoAlignedAllocScratch) { hs_set_scratch_allocator(nullptr, nullptr); hs_free_database(db); } + +TEST(CustomAllocator, NullMallocExpressionInfo) { + hs_set_allocator(null_malloc, nullptr); + + string pattern = "foobar"; + hs_expr_info_t *info = nullptr; + hs_compile_error_t *c_err = nullptr; + hs_error_t err = hs_expression_info(pattern.c_str(), 0, &info, &c_err); + ASSERT_EQ(HS_COMPILER_ERROR, err); + ASSERT_NE(nullptr, c_err); + hs_free_compile_error(c_err); + hs_set_allocator(nullptr, nullptr); +} + +TEST(CustomAllocator, TwoAlignedExpressionInfo) { + hs_set_misc_allocator(two_aligned_malloc, two_aligned_free); + + string pattern = "\\1"; + hs_expr_info_t *info = nullptr; + hs_compile_error_t *c_err = nullptr; + hs_error_t err = hs_expression_info(pattern.c_str(), 0, &info, &c_err); + ASSERT_EQ(HS_COMPILER_ERROR, err); + ASSERT_NE(nullptr, c_err); + EXPECT_STREQ("Allocator returned misaligned memory.", c_err->message); + hs_free_compile_error(c_err); + hs_set_allocator(nullptr, nullptr); +} diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 9fc3a413..1a33210d 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -32,7 +32,6 @@ 31:/\B/W #\B unsupported in UCP mode at index 0. 32:/foo(?{print "Hello world\n";})bar/ #Embedded code is not supported at index 3. 33:/the (\S+)(?{ $color = $^N }) (\S+)(?{ $animal = $^N })/i #Embedded code is not supported at index 9. -34:/foobar\E/s #Unmatched \E at index 6. 35:/\X/8 #\X unsupported at index 0. 36:/\B+/ #Invalid repeat at index 2. 37:/\B?/ #Invalid repeat at index 2. diff --git a/unit/hyperscan/serialize.cpp b/unit/hyperscan/serialize.cpp index e13d27b2..7e0fcb7c 100644 --- a/unit/hyperscan/serialize.cpp +++ b/unit/hyperscan/serialize.cpp @@ -483,4 +483,71 @@ TEST(Serialize, DeserializeUnalignedMalloc) { free(bytes); } +TEST(Serialize, DeserializeGarbage) { + hs_database_t *db; + hs_compile_error_t *c_err; + static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + + hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + // determine database size for subsequent hs_deserialize_database_at + size_t db_len; + err = hs_database_size(db, &db_len); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_NE(0, db_len); + + // serialize + char *bytes = nullptr; + size_t bytes_len = 0; + + err = hs_serialize_database(db, &bytes, &bytes_len); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_NE(0, bytes_len); + + hs_free_database(db); + + // append '\0' byte to the serialized string to spoil it + bytes = (char *)realloc(bytes, bytes_len + 1); + ASSERT_NE(nullptr, bytes); + bytes[bytes_len] = '\0'; + + // create set of invalid serializations + struct Arg { + char *start; + size_t len; + }; + + const Arg invalid_args[] = { + {bytes + 1, bytes_len}, + {bytes + 1, bytes_len - 1}, + {bytes, bytes_len - 1}, + {bytes, bytes_len + 1}, + }; + + for (const Arg &arg : invalid_args) { + hs_database_t *a_db; + err = hs_deserialize_database(arg.start, arg.len, &a_db); + ASSERT_NE(HS_SUCCESS, err); + + char *new_db = (char *)malloc(db_len); + ASSERT_NE(nullptr, new_db); + err = hs_deserialize_database_at(arg.start, arg.len, + (hs_database_t *)(new_db)); + ASSERT_NE(HS_SUCCESS, err); + free(new_db); + + char *info; + err = hs_serialized_database_info(arg.start, arg.len, &info); + ASSERT_NE(HS_SUCCESS, err); + + size_t ser_len; + err = hs_serialized_database_size(arg.start, arg.len, &ser_len); + ASSERT_NE(HS_SUCCESS, err); + } + + free(bytes); +} + } diff --git a/unit/internal/bitutils.cpp b/unit/internal/bitutils.cpp index e13270dc..4d476932 100644 --- a/unit/internal/bitutils.cpp +++ b/unit/internal/bitutils.cpp @@ -412,3 +412,27 @@ TEST(BitUtils, bf_it_1) { ASSERT_EQ(~0U, bf64_iterate(1ULL << 63, 63)); } +TEST(BitUtils, rank_in_mask32) { + for (u32 i = 0; i < 32; i++) { + ASSERT_EQ(i, rank_in_mask32(0xffffffff, i)); + ASSERT_EQ(0, rank_in_mask32(1U << i, i)); + } + ASSERT_EQ(0, rank_in_mask32(0xf0f0f0f0, 4)); + ASSERT_EQ(1, rank_in_mask32(0xf0f0f0f0, 5)); + ASSERT_EQ(3, rank_in_mask32(0xf0f0f0f0, 7)); + ASSERT_EQ(7, rank_in_mask32(0xf0f0f0f0, 15)); + ASSERT_EQ(15, rank_in_mask32(0xf0f0f0f0, 31)); +} + +TEST(BitUtils, rank_in_mask64) { + for (u32 i = 0; i < 64; i++) { + ASSERT_EQ(i, rank_in_mask64(0xffffffffffffffffULL, i)); + ASSERT_EQ(0, rank_in_mask64(1ULL << i, i)); + } + ASSERT_EQ(0, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 4)); + ASSERT_EQ(1, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 5)); + ASSERT_EQ(3, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 7)); + ASSERT_EQ(7, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 15)); + ASSERT_EQ(15, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 31)); + ASSERT_EQ(31, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 63)); +} diff --git a/unit/internal/lbr.cpp b/unit/internal/lbr.cpp index 2bb359df..e40bda02 100644 --- a/unit/internal/lbr.cpp +++ b/unit/internal/lbr.cpp @@ -36,6 +36,7 @@ #include "nfa/nfa_internal.h" #include "nfa/nfa_api_util.h" #include "nfagraph/ng_lbr.h" +#include "nfagraph/ng_util.h" #include "util/alloc.h" #include "util/compile_context.h" #include "grey.h" @@ -71,7 +72,7 @@ struct LbrTestParams { }; static -int onMatch(u64a, ReportID, void *ctx) { +int onMatch(u64a, u64a, ReportID, void *ctx) { unsigned *matches = (unsigned *)ctx; (*matches)++; return MO_CONTINUE_MATCHING; @@ -97,6 +98,7 @@ protected: ParsedExpression parsed(0, pattern.c_str(), flags, 0); unique_ptr g = buildWrapper(rm, cc, parsed); ASSERT_TRUE(g != nullptr); + clearReports(*g); ASSERT_TRUE(isLBR(*g, grey)); @@ -122,9 +124,9 @@ protected: q.length = 0; // filled in by test q.history = nullptr; q.hlength = 0; + q.scratch = nullptr; // not needed by LBR q.report_current = 0; q.cb = onMatch; - q.som_cb = nullptr; // only used by Haig q.context = &matches; } diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 91ab09db..6bb4fcb9 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -31,14 +31,15 @@ #include "grey.h" #include "compiler/compiler.h" -#include "nfagraph/ng.h" -#include "nfagraph/ng_limex.h" -#include "nfagraph/ng_restructuring.h" #include "nfa/limex_context.h" #include "nfa/limex_internal.h" #include "nfa/nfa_api.h" #include "nfa/nfa_api_util.h" #include "nfa/nfa_internal.h" +#include "nfagraph/ng.h" +#include "nfagraph/ng_limex.h" +#include "nfagraph/ng_restructuring.h" +#include "nfagraph/ng_util.h" #include "util/alloc.h" #include "util/target_info.h" @@ -51,7 +52,7 @@ static const string SCAN_DATA = "___foo______\n___foofoo_foo_^^^^^^^^^^^^^^^^^^" static const u32 MATCH_REPORT = 1024; static -int onMatch(u64a, ReportID, void *ctx) { +int onMatch(u64a, u64a, ReportID, void *ctx) { unsigned *matches = (unsigned *)ctx; (*matches)++; return MO_CONTINUE_MATCHING; @@ -76,6 +77,7 @@ protected: ParsedExpression parsed(0, expr.c_str(), flags, 0); unique_ptr g = buildWrapper(rm, cc, parsed); ASSERT_TRUE(g != nullptr); + clearReports(*g); rm.setProgramOffset(0, MATCH_REPORT); @@ -102,9 +104,9 @@ protected: q.length = SCAN_DATA.size(); q.history = nullptr; q.hlength = 0; + q.scratch = nullptr; /* limex does not use scratch */ q.report_current = 0; q.cb = onMatch; - q.som_cb = nullptr; // only used by Haig q.context = &matches; } @@ -129,7 +131,7 @@ protected: INSTANTIATE_TEST_CASE_P( LimEx, LimExModelTest, - Range((int)LIMEX_NFA_32_1, (int)LIMEX_NFA_512_7)); + Range((int)LIMEX_NFA_32, (int)LIMEX_NFA_512)); TEST_P(LimExModelTest, StateSize) { ASSERT_TRUE(nfa != nullptr); @@ -292,8 +294,7 @@ TEST_P(LimExModelTest, CheckFinalState) { // Check for EOD matches. char rv = nfaCheckFinalState(nfa.get(), full_state.get(), - stream_state.get(), end, onMatch, nullptr, - &matches); + stream_state.get(), end, onMatch, &matches); ASSERT_EQ(MO_CONTINUE_MATCHING, rv); } @@ -311,14 +312,14 @@ protected: ParsedExpression parsed(0, expr.c_str(), flags, 0); unique_ptr g = buildWrapper(rm, cc, parsed); ASSERT_TRUE(g != nullptr); + clearReports(*g); // Reverse the graph and add some reports on the accept vertices. NGHolder g_rev(NFA_REV_PREFIX); reverseHolder(*g, g_rev); - NFAGraph::inv_adjacency_iterator ai, ae; - for (tie(ai, ae) = inv_adjacent_vertices(g_rev.accept, g_rev); ai != ae; - ++ai) { - g_rev[*ai].reports.insert(0); + clearReports(g_rev); + for (NFAVertex v : inv_adjacent_vertices_range(g_rev.accept, g_rev)) { + g_rev[v].reports.insert(0); } nfa = constructReversedNFA(g_rev, type, cc); @@ -336,7 +337,7 @@ protected: }; INSTANTIATE_TEST_CASE_P(LimExReverse, LimExReverseTest, - Range((int)LIMEX_NFA_32_1, (int)LIMEX_NFA_512_7)); + Range((int)LIMEX_NFA_32, (int)LIMEX_NFA_512)); TEST_P(LimExReverseTest, BlockExecReverse) { ASSERT_TRUE(nfa != nullptr); @@ -370,6 +371,7 @@ protected: ReportManager rm(cc.grey); unique_ptr g = buildWrapper(rm, cc, parsed); ASSERT_TRUE(g != nullptr); + clearReports(*g); rm.setProgramOffset(0, MATCH_REPORT); @@ -396,9 +398,9 @@ protected: q.length = ZOMBIE_SCAN_DATA.length(); q.history = nullptr; q.hlength = 0; + q.scratch = nullptr; /* limex does not use scratch */ q.report_current = 0; q.cb = onMatch; - q.som_cb = nullptr; // only used by Haig q.context = &matches; } @@ -422,7 +424,7 @@ protected: }; INSTANTIATE_TEST_CASE_P(LimExZombie, LimExZombieTest, - Range((int)LIMEX_NFA_32_1, (int)LIMEX_NFA_512_7)); + Range((int)LIMEX_NFA_32, (int)LIMEX_NFA_512)); TEST_P(LimExZombieTest, GetZombieStatus) { ASSERT_TRUE(nfa != nullptr); diff --git a/unit/internal/multiaccel_matcher.cpp b/unit/internal/multiaccel_matcher.cpp index 45a24f46..bdf56ff9 100644 --- a/unit/internal/multiaccel_matcher.cpp +++ b/unit/internal/multiaccel_matcher.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,47 +43,16 @@ extern "C" { #include "util/alloc.h" #include "util/charreach.h" +#include +#include +#include #include #include -#include -#include -#include using namespace ue2; using namespace std; using namespace testing; -/* - * Static functions needed for this test's wellbeing - */ - -// char generator -static inline -char getChar(const CharReach &cr, bool match) { - char result; - do { - result = rand() % CharReach::npos; - } while (cr.test(result) != match); - return result; -} - -// appends a string with matches/unmatches according to input match pattern -static -void getMatch(u8 *result, u32 start, const string &pattern, - const CharReach &cr) { - for (const auto &c : pattern) { - result[start++] = getChar(cr, c == '1'); - } -} - -// appends non-matching noise of certain lengths -static -void getNoise(u8 *result, u32 start, u32 len, const CharReach &cr) { - for (unsigned i = 0; i < len; i++) { - result[start + i] = getChar(cr, false); - } -} - // test parameters structure struct MultiaccelTestParam { string match_pattern; @@ -126,6 +95,34 @@ protected: test_all_offsets = p.test_all_offsets; } + char getChar(const CharReach &cr) { + assert(cr.count() > 0); + auto dist = uniform_int_distribution(0, cr.count() - 1); + size_t result = cr.find_nth(dist(prng)); + assert(result != CharReach::npos); + return (char)result; + } + + // char generator + char getChar(const CharReach &cr, bool match) { + return getChar(match ? cr : ~cr); + } + + // appends a string with matches/unmatches according to input match pattern + void getMatch(u8 *result, u32 start, const string &pattern, + const CharReach &cr) { + for (const auto &c : pattern) { + result[start++] = getChar(cr, c == '1'); + } + } + + // appends non-matching noise of certain lengths + void getNoise(u8 *result, u32 start, u32 len, const CharReach &cr) { + for (unsigned i = 0; i < len; i++) { + result[start + i] = getChar(cr, false); + } + } + // deferred buffer generation, as we don't know CharReach before we run the test void GenerateBuffer(const CharReach &cr) { const MultiaccelTestParam &p = GetParam(); @@ -167,6 +164,10 @@ protected: aligned_free(buffer); } + // We want our tests to be deterministic, so we use a PRNG in the test + // fixture. + mt19937 prng; + u32 match_idx; u8 *buffer; bool test_all_offsets; diff --git a/unit/internal/nfagraph_equivalence.cpp b/unit/internal/nfagraph_equivalence.cpp index 3677e1d2..3ca1923f 100644 --- a/unit/internal/nfagraph_equivalence.cpp +++ b/unit/internal/nfagraph_equivalence.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,7 +27,8 @@ */ /** - * Unit tests for checking the removeGraphEquivalences code in nfagraph/ng_equivalence.cpp. + * Unit tests for checking the removeGraphEquivalences code in + * nfagraph/ng_equivalence.cpp. */ #include "config.h" @@ -71,10 +72,9 @@ TEST(NFAGraph, RemoveEquivalence1) { ASSERT_EQ(2U, in_degree(g.accept, g)); // Find a vertex that goes right after startDs - NFAVertex a = NFAGraph::null_vertex(); - NFAGraph::adjacency_iterator ai, ae; - for (tie(ai, ae) = adjacent_vertices(g.startDs, g); ai != ae; ++ai) { - a = *ai; + NFAVertex a = NGHolder::null_vertex(); + for (NFAVertex v : adjacent_vertices_range(g.startDs, g)) { + a = v; if (a == g.startDs) { continue; } @@ -87,8 +87,8 @@ TEST(NFAGraph, RemoveEquivalence1) { ASSERT_TRUE(a != nullptr); // There should be two edges from v to nodes with reachability 'b' and 'c' - NFAVertex b = NFAGraph::null_vertex(); - NFAVertex c = NFAGraph::null_vertex(); + NFAVertex b = NGHolder::null_vertex(); + NFAVertex c = NGHolder::null_vertex(); for (NFAVertex tmp : adjacent_vertices_range(a, g)) { const CharReach &tmpcr = g[tmp].char_reach; ASSERT_EQ(1U, tmpcr.count()); @@ -133,11 +133,9 @@ TEST(NFAGraph, RemoveEquivalence2) { ASSERT_EQ(1U, in_degree(g.accept, g)); // Find a vertex leading to accept - NFAVertex a = NFAGraph::null_vertex(); - NFAGraph::inv_adjacency_iterator ai, ae; - for (tie(ai, ae) = inv_adjacent_vertices(g.accept, g); ai != ae; - ++ai) { - a = *ai; + NFAVertex a = NGHolder::null_vertex(); + for (NFAVertex v : inv_adjacent_vertices_range(g.accept, g)) { + a = v; if (a == g.accept) { continue; } @@ -150,8 +148,8 @@ TEST(NFAGraph, RemoveEquivalence2) { ASSERT_TRUE(a != nullptr); // There should be two edges from v to nodes with reachability 'b' and 'c' - NFAVertex b = NFAGraph::null_vertex(); - NFAVertex c = NFAGraph::null_vertex(); + NFAVertex b = NGHolder::null_vertex(); + NFAVertex c = NGHolder::null_vertex(); for (NFAVertex tmp : inv_adjacent_vertices_range(a, g)) { const CharReach &tmpcr = g[tmp].char_reach; ASSERT_EQ(1U, tmpcr.count()); @@ -197,10 +195,9 @@ TEST(NFAGraph, RemoveEquivalence3) { ASSERT_EQ(2U, in_degree(g.accept, g)); // Find a vertex 'a' that goes right after startDs - NFAVertex a = NFAGraph::null_vertex(); - NFAGraph::adjacency_iterator ai, ae; - for (tie(ai, ae) = adjacent_vertices(g.startDs, g); ai != ae; ++ai) { - a = *ai; + NFAVertex a = NGHolder::null_vertex(); + for (NFAVertex v : adjacent_vertices_range(g.startDs, g)) { + a = v; if (a == g.startDs) { continue; } @@ -234,10 +231,9 @@ TEST(NFAGraph, RemoveEquivalence3) { ASSERT_TRUE(edge(dot2, dot1, g).second); // now, let's find X and Y nodes - NFAVertex X = NFAGraph::null_vertex(); - NFAVertex Y = NFAGraph::null_vertex(); - for (tie(ai, ae) = adjacent_vertices(dot2, g); ai != ae; ++ai) { - NFAVertex tmp = *ai; + NFAVertex X = NGHolder::null_vertex(); + NFAVertex Y = NGHolder::null_vertex(); + for (NFAVertex tmp : adjacent_vertices_range(dot2, g)) { // we already know about dot1, so skip it if (tmp == dot1) { @@ -290,12 +286,9 @@ TEST(NFAGraph, RemoveEquivalence4) { ASSERT_EQ(1U, in_degree(g.accept, g)); // Find X and Y nodes that are connected to startDs - NFAVertex X = NFAGraph::null_vertex(); - NFAVertex Y = NFAGraph::null_vertex(); - NFAGraph::adjacency_iterator ai, ae; - for (tie(ai, ae) = adjacent_vertices(g.startDs, g); ai != ae; ++ai) { - NFAVertex tmp = *ai; - + NFAVertex X = NGHolder::null_vertex(); + NFAVertex Y = NGHolder::null_vertex(); + for (NFAVertex tmp : adjacent_vertices_range(g.startDs, g)) { // skip startDs if (tmp == g.startDs) { continue; @@ -341,10 +334,8 @@ TEST(NFAGraph, RemoveEquivalence4) { ASSERT_TRUE(edge(dot2, dot1, g).second); // now find 'a' - NFAVertex a = NFAGraph::null_vertex(); - for (tie(ai, ae) = adjacent_vertices(dot2, g); ai != ae; ++ai) { - NFAVertex tmp = *ai; - + NFAVertex a = NGHolder::null_vertex(); + for (NFAVertex tmp : adjacent_vertices_range(dot2, g)) { // skip dot1 if (tmp == dot1) { continue; @@ -392,10 +383,9 @@ TEST(NFAGraph, RemoveEquivalence5) { ASSERT_EQ(1U, in_degree(g.accept, g)); // find first vertex and ensure it has a self loop - NFAVertex v = NFAGraph::null_vertex(); - NFAGraph::adjacency_iterator ai, ae; - for (tie(ai, ae) = adjacent_vertices(g.startDs, g); ai != ae; ++ai) { - v = *ai; + NFAVertex v = NGHolder::null_vertex(); + for (NFAVertex t : adjacent_vertices_range(g.startDs, g)) { + v = t; if (v == g.startDs) { continue; } @@ -409,15 +399,13 @@ TEST(NFAGraph, RemoveEquivalence5) { ASSERT_TRUE(v != nullptr); // now, find the vertex leading to accept - NFAVertex v2 = NFAGraph::null_vertex(); - for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) { - NFAVertex tmp = *ai; - + NFAVertex v2 = NGHolder::null_vertex(); + for (NFAVertex tmp : adjacent_vertices_range(v, g)) { // skip self-loop if (tmp == v) { continue; } - v2 = *ai; + v2 = tmp; // get char reach const CharReach tmpcr = g[tmp].char_reach; @@ -450,10 +438,9 @@ TEST(NFAGraph, RemoveEquivalence6) { ASSERT_EQ(1U, in_degree(g.accept, g)); // find that vertex and ensure it has no self loops and an edge to accept - NFAVertex v = NFAGraph::null_vertex(); - NFAGraph::adjacency_iterator ai, ae; - for (tie(ai, ae) = adjacent_vertices(g.startDs, g); ai != ae; ++ai) { - v = *ai; + NFAVertex v = NGHolder::null_vertex(); + for (NFAVertex t : adjacent_vertices_range(g.startDs, g)) { + v = t; if (v == g.startDs) { continue; } @@ -492,13 +479,12 @@ TEST(NFAGraph, RemoveEquivalence7) { ASSERT_EQ(1U, in_degree(g.accept, g)); // find that vertex and ensure it's a dot self loop and has one outgoing edge - NFAVertex v = NFAGraph::null_vertex(); - NFAGraph::adjacency_iterator ai, ae; - for (tie(ai, ae) = adjacent_vertices(g.start, g); ai != ae; ++ai) { - if (*ai == g.startDs) { + NFAVertex v = NGHolder::null_vertex(); + for (NFAVertex t : adjacent_vertices_range(g.start, g)) { + if (t == g.startDs) { continue; } - v = *ai; + v = t; // check if it has the right char reach const CharReach &tmpcr = g[v].char_reach; ASSERT_TRUE(tmpcr.all()); @@ -509,13 +495,13 @@ TEST(NFAGraph, RemoveEquivalence7) { ASSERT_TRUE(v != nullptr); // find the next vertex and ensure it has an edge to accept - NFAVertex v2 = NFAGraph::null_vertex(); - for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) { + NFAVertex v2 = NGHolder::null_vertex(); + for (NFAVertex t : adjacent_vertices_range(v, g)) { // skip self loop - if (*ai == v) { + if (t == v) { continue; } - v2 = *ai; + v2 = t; // check if it has the right char reach const CharReach &tmpcr = g[v2].char_reach; ASSERT_EQ(1U, tmpcr.count()); diff --git a/unit/internal/nfagraph_redundancy.cpp b/unit/internal/nfagraph_redundancy.cpp index 16266453..acb3cc7b 100644 --- a/unit/internal/nfagraph_redundancy.cpp +++ b/unit/internal/nfagraph_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,7 +27,8 @@ */ /** - * Unit tests for checking the removeRedundancy code in nfagraph/ng_redundancy.cpp. + * Unit tests for checking the removeRedundancy code in + * nfagraph/ng_redundancy.cpp. */ #include "config.h" @@ -62,15 +63,17 @@ TEST(NFAGraph, RemoveRedundancy1) { // Our graph should only have two non-special nodes ASSERT_EQ((size_t)N_SPECIALS + 2, num_vertices(*graph)); - // Dot-star start state should be connected to itself and a single other vertex + // Dot-star start state should be connected to itself and a single other + // vertex ASSERT_EQ(2U, out_degree(graph->startDs, g)); // That single vertex should have reachability [ab] - NFAVertex v = NFAGraph::null_vertex(); - NFAGraph::adjacency_iterator ai, ae; - for (tie(ai, ae) = adjacent_vertices(graph->startDs, g); ai != ae; ++ai) { - v = *ai; - if (v != graph->startDs) break; + NFAVertex v = NGHolder::null_vertex(); + for (NFAVertex t : adjacent_vertices_range(graph->startDs, g)) { + v = t; + if (v != graph->startDs) { + break; + } } const CharReach &cr = g[v].char_reach; ASSERT_EQ(2U, cr.count()); @@ -103,35 +106,39 @@ TEST(NFAGraph, RemoveRedundancy2) { // Our graph should now have only 3 non-special vertices ASSERT_EQ((size_t)N_SPECIALS + 3, num_vertices(*graph)); - // Dot-star start state should be connected to itself and a single other vertex + // Dot-star start state should be connected to itself and a single other + // vertex ASSERT_EQ(2U, out_degree(graph->startDs, g)); // That single vertex should have reachability [a] - NFAVertex v = NFAGraph::null_vertex(); - NFAGraph::adjacency_iterator ai, ae; - for (tie(ai, ae) = adjacent_vertices(graph->startDs, g); ai != ae; ++ai) { - v = *ai; - if (v != graph->startDs) break; + NFAVertex v = NGHolder::null_vertex(); + for (NFAVertex t : adjacent_vertices_range(graph->startDs, g)) { + v = t; + if (v != graph->startDs) { + break; + } } const CharReach &cr = g[v].char_reach; ASSERT_EQ(1U, cr.count()); ASSERT_TRUE(cr.test('a')); - // 'a' should have two out edges: one to a dot with a cycle (.*) and one to 'c' + // 'a' should have two out edges: one to a dot with a cycle (.*) and one to + // 'c' ASSERT_EQ(2U, out_degree(v, g)); - NFAVertex dotstar = NFAGraph::null_vertex(), vc = NFAGraph::null_vertex(); - for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) { - const CharReach &cr2 = g[*ai].char_reach; + NFAVertex dotstar = NGHolder::null_vertex(); + NFAVertex vc = NGHolder::null_vertex(); + for (NFAVertex t : adjacent_vertices_range(v, g)) { + const CharReach &cr2 = g[t].char_reach; if (cr2.count() == 1 && cr2.test('c')) { - vc = *ai; + vc = t; } else if (cr2.all()) { - dotstar = *ai; + dotstar = t; } else { FAIL(); } } - ASSERT_TRUE(vc != NFAGraph::null_vertex()); - ASSERT_TRUE(dotstar != NFAGraph::null_vertex()); + ASSERT_TRUE(vc != NGHolder::null_vertex()); + ASSERT_TRUE(dotstar != NGHolder::null_vertex()); // Dot-star node should have a self-loop and an edge to vertex 'c' ASSERT_EQ(2U, out_degree(dotstar, g)); diff --git a/unit/internal/nfagraph_repeat.cpp b/unit/internal/nfagraph_repeat.cpp index 2473d755..b34d1271 100644 --- a/unit/internal/nfagraph_repeat.cpp +++ b/unit/internal/nfagraph_repeat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,6 +32,7 @@ #include "gtest/gtest.h" #include "nfagraph/ng_repeat.h" +#include "nfagraph/ng_util.h" #include "util/depth.h" #include "hs_compile.h" @@ -89,12 +90,15 @@ static const PureRepeatTest pureRepeatTests[] = { { "^..?..?..?..?..?", 5, 10 } }; -INSTANTIATE_TEST_CASE_P(PureRepeat, NFAPureRepeatTest, ValuesIn(pureRepeatTests)); +INSTANTIATE_TEST_CASE_P(PureRepeat, NFAPureRepeatTest, + ValuesIn(pureRepeatTests)); TEST_P(NFAPureRepeatTest, Check) { const PureRepeatTest &t = GetParam(); SCOPED_TRACE(testing::Message() << "Pattern: " << t.pattern); - unique_ptr w(constructGraph(t.pattern, HS_FLAG_ALLOWEMPTY)); + auto w = constructGraph(t.pattern, HS_FLAG_ALLOWEMPTY); + ASSERT_TRUE(w != nullptr); + clearReports(*w); PureRepeat repeat; bool result = isPureRepeat(*w, repeat); diff --git a/unit/internal/nfagraph_util.cpp b/unit/internal/nfagraph_util.cpp index 81dfd682..135276dd 100644 --- a/unit/internal/nfagraph_util.cpp +++ b/unit/internal/nfagraph_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -85,24 +85,23 @@ TEST(NFAGraph, split1) { splitGraph(src, pivot, &lhs, &lhs_map, &rhs, &rhs_map); ASSERT_EQ(3U + N_SPECIALS, num_vertices(lhs)); - NFAGraph::vertex_iterator vi, ve; - for (tie(vi, ve) = vertices(lhs); vi != ve; ++vi) { - if (is_special(*vi, lhs)) { + for (NFAVertex v : vertices_range(lhs)) { + if (is_special(v, lhs)) { continue; } - u32 cr = lhs[*vi].char_reach.find_first(); + u32 cr = lhs[v].char_reach.find_first(); SCOPED_TRACE(cr); ASSERT_TRUE((cr >= 'a' && cr <= 'c')); } ASSERT_EQ(8U + N_SPECIALS, num_vertices(rhs) ); - for (tie(vi, ve) = vertices(rhs); vi != ve; ++vi) { - if (is_special(*vi, rhs)) { + for (NFAVertex v : vertices_range(rhs)) { + if (is_special(v, rhs)) { continue; } - u32 cr = rhs[*vi].char_reach.find_first(); + u32 cr = rhs[v].char_reach.find_first(); SCOPED_TRACE(cr); ASSERT_TRUE(cr >= 'b' && cr <= 'i'); } @@ -137,24 +136,23 @@ TEST(NFAGraph, split2) { splitGraph(src, pivot, &lhs, &lhs_map, &rhs, &rhs_map); ASSERT_EQ(3U + N_SPECIALS, num_vertices(lhs)); - NFAGraph::vertex_iterator vi, ve; - for (tie(vi, ve) = vertices(lhs); vi != ve; ++vi) { - if (is_special(*vi, lhs)) { + for (NFAVertex v : vertices_range(lhs)) { + if (is_special(v, lhs)) { continue; } - u32 cr = lhs[*vi].char_reach.find_first(); + u32 cr = lhs[v].char_reach.find_first(); SCOPED_TRACE(cr); ASSERT_TRUE(cr >= 'a' && cr <= 'c'); } ASSERT_EQ(3U + N_SPECIALS, num_vertices(rhs) ); - for (tie(vi, ve) = vertices(rhs); vi != ve; ++vi) { - if (is_special(*vi, rhs)) { + for (NFAVertex v : vertices_range(rhs)) { + if (is_special(v, rhs)) { continue; } - u32 cr = rhs[*vi].char_reach.find_first(); + u32 cr = rhs[v].char_reach.find_first(); SCOPED_TRACE(cr); ASSERT_TRUE(cr >= 'b' && cr <= 'd'); } @@ -211,24 +209,23 @@ TEST(NFAGraph, split3) { splitGraph(src, pivots, &lhs, &lhs_map, &rhs, &rhs_map); ASSERT_EQ(7U + N_SPECIALS, num_vertices(lhs)); - NFAGraph::vertex_iterator vi, ve; - for (tie(vi, ve) = vertices(lhs); vi != ve; ++vi) { - if (is_special(*vi, lhs)) { + for (NFAVertex v : vertices_range(lhs)) { + if (is_special(v, lhs)) { continue; } - u32 cr = lhs[*vi].char_reach.find_first(); + u32 cr = lhs[v].char_reach.find_first(); SCOPED_TRACE(cr); ASSERT_TRUE((cr >= 'a' && cr <= 'g')); } ASSERT_EQ(2U + N_SPECIALS, num_vertices(rhs) ); - for (tie(vi, ve) = vertices(rhs); vi != ve; ++vi) { - if (is_special(*vi, rhs)) { + for (NFAVertex v : vertices_range(rhs)) { + if (is_special(v, rhs)) { continue; } - u32 cr = rhs[*vi].char_reach.find_first(); + u32 cr = rhs[v].char_reach.find_first(); SCOPED_TRACE(cr); ASSERT_TRUE(cr >= 'h' && cr <= 'i'); } @@ -289,13 +286,12 @@ TEST(NFAGraph, split4) { splitGraph(src, pivots, &lhs, &lhs_map, &rhs, &rhs_map); ASSERT_EQ(7U + N_SPECIALS, num_vertices(lhs)); - NFAGraph::vertex_iterator vi, ve; - for (tie(vi, ve) = vertices(lhs); vi != ve; ++vi) { - if (is_special(*vi, lhs)) { + for (NFAVertex v : vertices_range(lhs)) { + if (is_special(v, lhs)) { continue; } - u32 cr = lhs[*vi].char_reach.find_first(); + u32 cr = lhs[v].char_reach.find_first(); SCOPED_TRACE(cr); ASSERT_TRUE((cr >= 'a' && cr <= 'g')); } @@ -304,12 +300,12 @@ TEST(NFAGraph, split4) { ASSERT_TRUE(edge(lhs_map[d], lhs_map[d], lhs).second); ASSERT_EQ(2U + N_SPECIALS, num_vertices(rhs) ); - for (tie(vi, ve) = vertices(rhs); vi != ve; ++vi) { - if (is_special(*vi, rhs)) { + for (NFAVertex v : vertices_range(rhs)) { + if (is_special(v, rhs)) { continue; } - u32 cr = rhs[*vi].char_reach.find_first(); + u32 cr = rhs[v].char_reach.find_first(); SCOPED_TRACE(cr); ASSERT_TRUE(cr >= 'h' && cr <= 'i'); } diff --git a/unit/internal/rose_build_merge.cpp b/unit/internal/rose_build_merge.cpp index ad6b0176..3f5a8382 100644 --- a/unit/internal/rose_build_merge.cpp +++ b/unit/internal/rose_build_merge.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,20 +39,12 @@ #include "util/compile_context.h" #include "util/graph_range.h" #include "util/make_unique.h" +#include "smallwrite/smallwrite_build.h" #include "som/slot_manager.h" using std::vector; using namespace ue2; -static -std::unique_ptr constructBuilder(const Grey &grey) { - CompileContext cc(true, false, get_current_target(), grey); - ReportManager rm(cc.grey); - SomSlotManager ssm(8); // som precision - BoundaryReports boundary; - return makeRoseBuilder(rm, ssm, cc, boundary); -} - static std::unique_ptr makeSuffixGraph(ReportID report) { auto h = ue2::make_unique(NFA_SUFFIX); @@ -100,7 +92,12 @@ size_t numUniqueSuffixGraphs(const RoseGraph &g) { TEST(RoseMerge, uncalcLeaves_nonleaf) { Grey grey; - auto build_base = constructBuilder(grey); + CompileContext cc(true, false, get_current_target(), grey); + ReportManager rm(cc.grey); + SomSlotManager ssm(8); // som precision + auto smwr = makeSmallWriteBuilder(1, rm, cc); + BoundaryReports boundary; + auto build_base = makeRoseBuilder(rm, ssm, *smwr, cc, boundary); ASSERT_NE(nullptr, build_base); RoseBuildImpl &build = static_cast(*build_base); diff --git a/unit/internal/rose_mask.cpp b/unit/internal/rose_mask.cpp new file mode 100644 index 00000000..e6be00f3 --- /dev/null +++ b/unit/internal/rose_mask.cpp @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "rose/validate_mask.h" +#include "gtest/gtest.h" + +#define ONES64 0xffffffffffffffffULL + +/* valid_data_mask is flexible, don't need to be fixed in Info */ +struct ValidateMaskTestInfo { + u64a data; + u64a and_mask; + u64a cmp_mask; + u64a neg_mask; +}; + +static const ValidateMaskTestInfo testBasic[] = { + /* data is randomly picked */ + {0x1234abcd4321dcbaULL, 0xff09bbdd7f7ffeffULL, + 0x1200abcd4561dcbbULL, 0xffff00ffULL}, + /* data = "VaLiDaTe" */ + {0x56614c6944615465ULL, 0xe0feffffdf7b5480ULL, + 0x40614c6946615400ULL, 0xff0000ff000000ULL}, + /* data = "\0\0\0MASK\0" */ + {0x4d41534b00ULL, 0xfffffefebfdf002cULL, + 0x5536344c0173002cULL, 0xffffff0000ff00ffULL}, + /* data = "FOo14foo" */ + {0x464f6f3134666f6fULL, 0xdfdffffef8c0f000ULL, + 0x46466f3030406000ULL, 0xff000000000000ULL}, + /* data = "FOo14foo" with different cmp_mask and neg_mask*/ + {0x464f6f3134666f6fULL, 0xdfdffffef8c0f000ULL, + 0x44464f3034606f60ULL, 0xffffff00ffffffffULL}, +}; + +/* + * generate 37 different valid_data_mask + * 8 from 0xff to 0xff00000000000000 + * 7 from 0xffff to 0xffff000000000000 + * ... + * 0xffffffffffffffff and 0 + */ +static int initLegalValidMasks(u64a validMasks[]) { + u64a data = ONES64; + int num = 0; + for (int i = 0; i < 64; i += 8) { + for (int j = 0; j <= i; j += 8) { + validMasks[num] = data << j; + num++; + } + data >>= 8; + } + validMasks[num] = 0; + num++; + return num; +} + +/* + * generate all 256 neg_masks + * including 0, 0xff, 0xff00,..., 0xffffffffffffffff + */ +static int initLegalNegMasks(u64a negMasks[]) { + u64a data = 0; + u64a offset; + int num = 0; + while (data != ONES64) { + negMasks[num] = data; + num++; + offset = (data | (data +1)) ^ data; + data += 0xfeULL * offset + 1; + } + negMasks[num] = data; + num++; + return num; +} + + +/* + * check all legal valid_mask(37 different) for validateMask[] + */ +TEST(ValidateMask, ValidMaskTests) { + u64a validMasks[256]; + int num = initLegalValidMasks(validMasks); + + for (const auto &t : testBasic) { + for (int i = 0; i < num; i++) { + EXPECT_EQ(1, validateMask(t.data, + validMasks[i], + t.and_mask, + t.cmp_mask, + t.neg_mask)); + } + } +} + +/* + * fix neg_mask to 0 and ONES64, + * check output of ValidateMask on different valid_mask, + * for neg_mask = 0, + */ +TEST(ValidateMask, AdvancedValidMaskTests) { + u64a validMasks[256]; + int num = initLegalValidMasks(validMasks); + int bool_result; + for (const auto &t: testBasic) { + for (int i = 0; i < num; i++) { + bool_result = !(validMasks[i] & t.neg_mask); + EXPECT_EQ(bool_result, validateMask(t.data, + validMasks[i], + t.and_mask, + t.cmp_mask, + 0)); + bool_result = (validMasks[i] | t.neg_mask) == t.neg_mask; + EXPECT_EQ(bool_result, validateMask(t.data, + validMasks[i], + t.and_mask, + t.cmp_mask, + ONES64)); + } + } +} + +/* + * test every pair of valid_data_mask and neg_mask + * and compute the expect output by a formula + */ +TEST(ValidateMask, FullTests) { + u64a validMasks[256]; + u64a negMasks[256]; + int vm_num = initLegalValidMasks(validMasks); + int nm_num = initLegalNegMasks(negMasks); + int bool_result; + for (const auto &t: testBasic) { + for (int i = 0; i < vm_num; i++) { + for (int j = 0; j < nm_num; j++) { + /* + * treat t.neg_mask as a truthtable (a negative truthtable) + * we expect validateMask output 1 if and only if + * the truthtable(tt) and neg_mask(nm) looks same + * under "&" operation with valid_data_mask(vdm) + * that is + * output = (tt & vdm) == (nm & vdm) ? 1 : 0; + */ + bool_result = (t.neg_mask & validMasks[i]) == + (negMasks[j] & validMasks[i]); + EXPECT_EQ(bool_result, validateMask(t.data, + validMasks[i], + t.and_mask, + t.cmp_mask, + negMasks[j])); + } + } + } +} + +/* + * drop the original validateMask[].neg_mask + * and test more neg_mask and valid_mask manually + */ +TEST(ValidateMask, ManualTest_0) { + const auto &t = testBasic[0]; + EXPECT_EQ(1, validateMask(t.data, ONES64 << 8, + t.and_mask, t.cmp_mask, 0xffff0000ULL)); + EXPECT_EQ(1, validateMask(t.data, (ONES64 << 16) >> 8, + t.and_mask, t.cmp_mask, 0xffff0000ULL)); + EXPECT_EQ(1, validateMask(t.data, ONES64 << 16, + t.and_mask, t.cmp_mask, 0xffffff00ULL)); + EXPECT_EQ(1, validateMask(t.data, ONES64 << 24, + t.and_mask, t.cmp_mask, 0xff00ffffULL)); + EXPECT_EQ(1, validateMask(t.data, ONES64 >> 32, + t.and_mask, t.cmp_mask, 0xffffffff00ffULL)); + EXPECT_EQ(1, validateMask(t.data, ONES64 >> 40, + t.and_mask, t.cmp_mask, 0xff00ffULL)); + EXPECT_EQ(1, validateMask(t.data, 0, + t.and_mask, t.cmp_mask, ONES64)); + EXPECT_EQ(1, validateMask(t.data, 0, + t.and_mask, t.cmp_mask, ~t.neg_mask)); + EXPECT_EQ(0, validateMask(t.data, ONES64 << 16, + t.and_mask, t.cmp_mask, 0xff0000ffULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64, + t.and_mask, t.cmp_mask, 0xffff0000ULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64 >> 32, + t.and_mask, t.cmp_mask, 0xff00ffULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64 << 8, + t.and_mask, t.cmp_mask, 0xffffffffULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64 << 16, + t.and_mask, t.cmp_mask, 0xff0000ffULL)); +} + +TEST(ValidateMask, ManualTest_1) { + const auto &t = testBasic[1]; + EXPECT_EQ(1, validateMask(t.data, ONES64 << 16, + t.and_mask, t.cmp_mask, 0xff0000ff00ffffULL)); + EXPECT_EQ(1, validateMask(t.data, ONES64 << 32, + t.and_mask, t.cmp_mask, 0xff000000000000ULL)); + EXPECT_EQ(1, validateMask(t.data, ONES64 << 32, + t.and_mask, t.cmp_mask, 0xff0000ffff00ffULL)); + EXPECT_EQ(1, validateMask(t.data, ONES64 << 56, + t.and_mask, t.cmp_mask, 0)); + EXPECT_EQ(1, validateMask(t.data, ONES64 >> 8, + t.and_mask, t.cmp_mask, 0xffff0000ff000000ULL)); + EXPECT_EQ(1, validateMask(t.data, ONES64 >> 16, + t.and_mask, t.cmp_mask, 0xff000000ULL)); + EXPECT_EQ(1, validateMask(t.data, (ONES64 << 32) >> 16, + t.and_mask, t.cmp_mask, 0xff00ff00)); + EXPECT_EQ(1, validateMask(t.data, ONES64 >> 40, + t.and_mask, t.cmp_mask, 0xff00000000ULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64, + t.and_mask, t.cmp_mask, 0)); + EXPECT_EQ(0, validateMask(t.data, ONES64 << 48, + t.and_mask, t.cmp_mask, 0)); + EXPECT_EQ(0, validateMask(t.data, ONES64 << 56, + t.and_mask, t.cmp_mask, 0xff00000000000000ULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64 << 16, + t.and_mask, t.cmp_mask, 0xff0000ffff0000ULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64 >> 8, + t.and_mask, t.cmp_mask, 0xff000000ULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64 >> 16, + t.and_mask, t.cmp_mask, 0xffff000000ULL)); + EXPECT_EQ(0, validateMask(t.data, (ONES64 << 40) >> 16, + t.and_mask, t.cmp_mask, 0xff000000000000ULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64 << 8, + t.and_mask, t.cmp_mask, ONES64)); +} + +TEST(ValidateMask, ManualTest_2) { + const auto &t = testBasic[2]; + EXPECT_EQ(1, validateMask(t.data, ONES64 << 24, + t.and_mask, t.cmp_mask, 0xffffff0000000000ULL)); + EXPECT_EQ(1, validateMask(t.data, ONES64 << 56, + t.and_mask, t.cmp_mask, 0xff00000000000000ULL)); + EXPECT_EQ(1, validateMask(t.data, ONES64 << 56, + t.and_mask, t.cmp_mask, 0xff00ffffff00ffffULL)); + EXPECT_EQ(1, validateMask(t.data, 0, + t.and_mask, t.cmp_mask, ONES64)); + EXPECT_EQ(1, validateMask(t.data, ONES64 >> 24, + t.and_mask, t.cmp_mask, 0xff00ffULL)); + EXPECT_EQ(1, validateMask(t.data, ONES64 >> 32, + t.and_mask, t.cmp_mask, 0xffff00ff00ffULL)); + EXPECT_EQ(1, validateMask(t.data, (ONES64 << 32) >> 24, + t.and_mask, t.cmp_mask, 0xff0000ULL)); + EXPECT_EQ(1, validateMask(t.data, (ONES64 << 32) >> 24, + t.and_mask, t.cmp_mask, 0xff00ffULL)); + EXPECT_EQ(1, validateMask(t.data, (ONES64 << 56) >> 40, + t.and_mask, t.cmp_mask, 0xff0000ULL)); + EXPECT_EQ(1, validateMask(t.data, (ONES64 << 56) >> 32, + t.and_mask, t.cmp_mask, 0)); + EXPECT_EQ(1, validateMask(t.data, ONES64 >> 40, + t.and_mask, t.cmp_mask, 0xffffffff00ffULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64, + t.and_mask, t.cmp_mask, 0)); + EXPECT_EQ(0, validateMask(t.data, ONES64, + t.and_mask, t.cmp_mask, ONES64)); + EXPECT_EQ(0, validateMask(t.data, ONES64 << 56, + t.and_mask, t.cmp_mask, 0)); + EXPECT_EQ(0, validateMask(t.data, ONES64 << 48, + t.and_mask, t.cmp_mask, 0xff00000000000000ULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64 << 8, + t.and_mask, t.cmp_mask, 0xffffff00000000ffULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64 >> 32, + t.and_mask, t.cmp_mask, 0xffff00ULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64 >> 32, + t.and_mask, t.cmp_mask, 0xffffffULL)); + EXPECT_EQ(0, validateMask(t.data, ONES64 >> 16, + t.and_mask, t.cmp_mask, 0xff00ffULL)); + EXPECT_EQ(0, validateMask(t.data, (ONES64 << 32) >> 24, + t.and_mask, t.cmp_mask, 0)); + EXPECT_EQ(0, validateMask(t.data, (ONES64 << 32) >> 24, + t.and_mask, t.cmp_mask, 0xffffff00000000ffULL)); + EXPECT_EQ(0, validateMask(t.data, (ONES64 << 32) >> 24, + t.and_mask, t.cmp_mask, 0xffffff000000ff00ULL)); + EXPECT_EQ(0, validateMask(t.data, (ONES64 << 56) >> 40, + t.and_mask, t.cmp_mask, 0)); + EXPECT_EQ(0, validateMask(t.data, (ONES64 << 56) >> 48, + t.and_mask, t.cmp_mask, 0xff00ULL)); +} diff --git a/unit/internal/shuffle.cpp b/unit/internal/shuffle.cpp index 58e5a61f..614b641d 100644 --- a/unit/internal/shuffle.cpp +++ b/unit/internal/shuffle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,8 +31,7 @@ #include "gtest/gtest.h" #include "util/simd_utils.h" -#include "util/shuffle.h" -#include "util/shuffle_ssse3.h" +#include "nfa/limex_shuffle.h" namespace { @@ -50,34 +49,34 @@ Mask setbit(unsigned int bit) { return cf.simd; } -TEST(Shuffle, ShuffleDynamic32_1) { +TEST(Shuffle, PackedExtract32_1) { // Try all possible one-bit masks for (unsigned int i = 0; i < 32; i++) { // shuffle a single 1 bit to the front u32 mask = 1U << i; - EXPECT_EQ(1U, shuffleDynamic32(mask, mask)); - EXPECT_EQ(1U, shuffleDynamic32(~0U, mask)); + EXPECT_EQ(1U, packedExtract32(mask, mask)); + EXPECT_EQ(1U, packedExtract32(~0U, mask)); // we should get zero out of these cases - EXPECT_EQ(0U, shuffleDynamic32(0, mask)); - EXPECT_EQ(0U, shuffleDynamic32(~mask, mask)); + EXPECT_EQ(0U, packedExtract32(0, mask)); + EXPECT_EQ(0U, packedExtract32(~mask, mask)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 32); j++) { - EXPECT_EQ(0U, shuffleDynamic32((1U << j), mask)); + EXPECT_EQ(0U, packedExtract32((1U << j), mask)); } } } -TEST(Shuffle, ShuffleDynamic32_2) { +TEST(Shuffle, PackedExtract32_2) { // All 32 bits in mask are on u32 mask = ~0U; - EXPECT_EQ(0U, shuffleDynamic32(0, mask)); - EXPECT_EQ(mask, shuffleDynamic32(mask, mask)); + EXPECT_EQ(0U, packedExtract32(0, mask)); + EXPECT_EQ(mask, packedExtract32(mask, mask)); for (unsigned int i = 0; i < 32; i++) { - EXPECT_EQ(1U << i, shuffleDynamic32(1U << i, mask)); + EXPECT_EQ(1U << i, packedExtract32(1U << i, mask)); } } -TEST(Shuffle, ShuffleDynamic32_3) { +TEST(Shuffle, PackedExtract32_3) { // Try setting every second bit u32 mask = 0; for (unsigned int i = 0; i < 32; i += 2) { @@ -85,63 +84,63 @@ TEST(Shuffle, ShuffleDynamic32_3) { } // Test both cases (all even bits, all odd bits) - EXPECT_EQ((1U << 16) - 1, shuffleDynamic32(mask, mask)); - EXPECT_EQ((1U << 16) - 1, shuffleDynamic32(~mask, ~mask)); - EXPECT_EQ(0U, shuffleDynamic32(~mask, mask)); - EXPECT_EQ(0U, shuffleDynamic32(mask, ~mask)); + EXPECT_EQ((1U << 16) - 1, packedExtract32(mask, mask)); + EXPECT_EQ((1U << 16) - 1, packedExtract32(~mask, ~mask)); + EXPECT_EQ(0U, packedExtract32(~mask, mask)); + EXPECT_EQ(0U, packedExtract32(mask, ~mask)); for (unsigned int i = 0; i < 32; i += 2) { - EXPECT_EQ(1U << (i/2), shuffleDynamic32(1U << i, mask)); - EXPECT_EQ(0U, shuffleDynamic32(1U << i, ~mask)); - EXPECT_EQ(1U << (i/2), shuffleDynamic32(1U << (i+1), ~mask)); - EXPECT_EQ(0U, shuffleDynamic32(1U << (i+1), mask)); + EXPECT_EQ(1U << (i/2), packedExtract32(1U << i, mask)); + EXPECT_EQ(0U, packedExtract32(1U << i, ~mask)); + EXPECT_EQ(1U << (i/2), packedExtract32(1U << (i+1), ~mask)); + EXPECT_EQ(0U, packedExtract32(1U << (i+1), mask)); } } -TEST(Shuffle, ShuffleDynamic64_1) { +TEST(Shuffle, PackedExtract64_1) { // Try all possible one-bit masks for (unsigned int i = 0; i < 64; i++) { // shuffle a single 1 bit to the front u64a mask = 1ULL << i; - EXPECT_EQ(1U, shuffleDynamic64(mask, mask)); - EXPECT_EQ(1U, shuffleDynamic64(~0ULL, mask)); + EXPECT_EQ(1U, packedExtract64(mask, mask)); + EXPECT_EQ(1U, packedExtract64(~0ULL, mask)); // we should get zero out of these cases - EXPECT_EQ(0U, shuffleDynamic64(0, mask)); - EXPECT_EQ(0U, shuffleDynamic64(~mask, mask)); + EXPECT_EQ(0U, packedExtract64(0, mask)); + EXPECT_EQ(0U, packedExtract64(~mask, mask)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 64); j++) { - EXPECT_EQ(0U, shuffleDynamic64((1ULL << j), mask)); + EXPECT_EQ(0U, packedExtract64((1ULL << j), mask)); } } } -TEST(Shuffle, ShuffleDynamic64_2) { +TEST(Shuffle, PackedExtract64_2) { // Fill first half of mask u64a mask = 0x00000000ffffffffULL; - EXPECT_EQ(0U, shuffleDynamic64(0, mask)); - EXPECT_EQ(0xffffffffU, shuffleDynamic64(mask, mask)); + EXPECT_EQ(0U, packedExtract64(0, mask)); + EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); for (unsigned int i = 0; i < 32; i++) { - EXPECT_EQ(1U << i, shuffleDynamic64(1ULL << i, mask)); + EXPECT_EQ(1U << i, packedExtract64(1ULL << i, mask)); } // Fill second half of mask mask = 0xffffffff00000000ULL; - EXPECT_EQ(0U, shuffleDynamic64(0, mask)); - EXPECT_EQ(0xffffffffU, shuffleDynamic64(mask, mask)); + EXPECT_EQ(0U, packedExtract64(0, mask)); + EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); for (unsigned int i = 32; i < 64; i++) { - EXPECT_EQ(1U << (i - 32), shuffleDynamic64(1ULL << i, mask)); + EXPECT_EQ(1U << (i - 32), packedExtract64(1ULL << i, mask)); } // Try one in the middle mask = 0x0000ffffffff0000ULL; - EXPECT_EQ(0U, shuffleDynamic64(0, mask)); - EXPECT_EQ(0xffffffffU, shuffleDynamic64(mask, mask)); + EXPECT_EQ(0U, packedExtract64(0, mask)); + EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); for (unsigned int i = 16; i < 48; i++) { - EXPECT_EQ(1U << (i - 16), shuffleDynamic64(1ULL << i, mask)); + EXPECT_EQ(1U << (i - 16), packedExtract64(1ULL << i, mask)); } } -TEST(Shuffle, ShuffleDynamic64_3) { +TEST(Shuffle, PackedExtract64_3) { // Try setting every second bit (note: 32 bits, the max we can shuffle) u64a mask = 0; for (unsigned int i = 0; i < 64; i += 2) { @@ -149,46 +148,69 @@ TEST(Shuffle, ShuffleDynamic64_3) { } // Test both cases (all even bits, all odd bits) - EXPECT_EQ(0xffffffffU, shuffleDynamic64(mask, mask)); - EXPECT_EQ(0xffffffffU, shuffleDynamic64(~mask, ~mask)); - EXPECT_EQ(0U, shuffleDynamic64(~mask, mask)); - EXPECT_EQ(0U, shuffleDynamic64(mask, ~mask)); + EXPECT_EQ(0xffffffffU, packedExtract64(mask, mask)); + EXPECT_EQ(0xffffffffU, packedExtract64(~mask, ~mask)); + EXPECT_EQ(0U, packedExtract64(~mask, mask)); + EXPECT_EQ(0U, packedExtract64(mask, ~mask)); for (unsigned int i = 0; i < 64; i += 2) { - EXPECT_EQ(1U << (i/2), shuffleDynamic64(1ULL << i, mask)); - EXPECT_EQ(0U, shuffleDynamic64(1ULL << i, ~mask)); - EXPECT_EQ(1U << (i/2), shuffleDynamic64(1ULL << (i+1), ~mask)); - EXPECT_EQ(0U, shuffleDynamic64(1ULL << (i+1), mask)); + EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << i, mask)); + EXPECT_EQ(0U, packedExtract64(1ULL << i, ~mask)); + EXPECT_EQ(1U << (i/2), packedExtract64(1ULL << (i+1), ~mask)); + EXPECT_EQ(0U, packedExtract64(1ULL << (i+1), mask)); } } +template static -void build_pshufb_masks_onebit(unsigned int bit, m128 *permute, m128 *compare) { +void build_pshufb_masks_onebit(unsigned int bit, T *permute, T *compare) { + static_assert(sizeof(T) == sizeof(m128) || sizeof(T) == sizeof(m256), + "should be valid type"); // permute mask has 0x80 in all bytes except the one we care about memset(permute, 0x80, sizeof(*permute)); memset(compare, 0, sizeof(*compare)); char *pmsk = (char *)permute; char *cmsk = (char *)compare; - pmsk[0] = bit/8; - cmsk[0] = ~(1 << (bit % 8)); + u8 off = (bit >= 128) ? 0x10 : 0; + pmsk[off] = bit/8; + cmsk[off] = ~(1 << (bit % 8)); } -TEST(Shuffle, ShufflePshufb128_1) { +TEST(Shuffle, PackedExtract128_1) { // Try all possible one-bit masks for (unsigned int i = 0; i < 128; i++) { // shuffle a single 1 bit to the front m128 permute, compare; build_pshufb_masks_onebit(i, &permute, &compare); - EXPECT_EQ(1U, shufflePshufb128(setbit(i), permute, compare)); - EXPECT_EQ(1U, shufflePshufb128(ones128(), permute, compare)); + EXPECT_EQ(1U, packedExtract128(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract128(ones128(), permute, compare)); // we should get zero out of these cases - EXPECT_EQ(0U, shufflePshufb128(zeroes128(), permute, compare)); - EXPECT_EQ(0U, shufflePshufb128(not128(setbit(i)), permute, compare)); + EXPECT_EQ(0U, packedExtract128(zeroes128(), permute, compare)); + EXPECT_EQ(0U, packedExtract128(not128(setbit(i)), permute, compare)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 128); j++) { - EXPECT_EQ(0U, shufflePshufb128(setbit(j), permute, compare)); + EXPECT_EQ(0U, packedExtract128(setbit(j), permute, compare)); } } } +#if defined(__AVX2__) +TEST(Shuffle, PackedExtract256_1) { + // Try all possible one-bit masks + for (unsigned int i = 0; i < 256; i++) { + // shuffle a single 1 bit to the front + m256 permute, compare; + build_pshufb_masks_onebit(i, &permute, &compare); + EXPECT_EQ(1U, packedExtract256(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract256(ones256(), permute, compare)); + // we should get zero out of these cases + EXPECT_EQ(0U, packedExtract256(zeroes256(), permute, compare)); + EXPECT_EQ(0U, packedExtract256(not256(setbit(i)), permute, compare)); + // we should get zero out of all the other bit positions + for (unsigned int j = 0; (j != i && j < 256); j++) { + EXPECT_EQ(0U, packedExtract256(setbit(j), permute, compare)); + } + } +} +#endif } // namespace diff --git a/unit/internal/shufti.cpp b/unit/internal/shufti.cpp index b8d77d37..81495a9c 100644 --- a/unit/internal/shufti.cpp +++ b/unit/internal/shufti.cpp @@ -283,9 +283,9 @@ TEST(DoubleShufti, BuildMask1) { lits.insert(make_pair('a', 'B')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, &lo2m, &hi2m); - ASSERT_TRUE(rv); + ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; u8 *lo2 = (u8 *)&lo2m; @@ -326,9 +326,9 @@ TEST(DoubleShufti, BuildMask2) { lits.insert(make_pair('a','z')); lits.insert(make_pair('B','z')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, &lo2m, &hi2m); - ASSERT_TRUE(rv); + ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; u8 *lo2 = (u8 *)&lo2m; @@ -354,9 +354,9 @@ TEST(DoubleShufti, BuildMask4) { lits.insert(make_pair('A','z')); lits.insert(make_pair('b','z')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, &lo2m, &hi2m); - ASSERT_TRUE(rv); + ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; u8 *lo2 = (u8 *)&lo2m; @@ -383,9 +383,9 @@ TEST(DoubleShufti, BuildMask5) { CharReach bytes; bytes.set('X'); - bool rv = shuftiBuildDoubleMasks(bytes, lits, &lo1m, &hi1m, + bool ret = shuftiBuildDoubleMasks(bytes, lits, &lo1m, &hi1m, &lo2m, &hi2m); - ASSERT_TRUE(rv); + ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; u8 *lo2 = (u8 *)&lo2m; @@ -421,9 +421,9 @@ TEST(DoubleShufti, BuildMask6) { lits.insert(make_pair('A','x')); lits.insert(make_pair('b','x')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1m, &hi1m, &lo2m, &hi2m); - ASSERT_TRUE(rv); + ASSERT_TRUE(ret); u8 *lo1 = (u8 *)&lo1m; u8 *lo2 = (u8 *)&lo2m; @@ -485,9 +485,9 @@ TEST(DoubleShufti, ExecNoMatch1) { lits.insert(make_pair('a','b')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -506,8 +506,8 @@ TEST(DoubleShufti, ExecNoMatch1b) { lits.insert(make_pair('b','a')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -527,8 +527,8 @@ TEST(DoubleShufti, ExecNoMatch2) { lits.insert(make_pair('a','b')); lits.insert(make_pair('B','b')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -548,8 +548,8 @@ TEST(DoubleShufti, ExecNoMatch2b) { lits.insert(make_pair('b','a')); lits.insert(make_pair('b','B')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -568,8 +568,8 @@ TEST(DoubleShufti, ExecNoMatch3) { lits.insert(make_pair('V','e')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -588,8 +588,8 @@ TEST(DoubleShufti, ExecNoMatch3b) { lits.insert(make_pair('e','V')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; @@ -608,8 +608,8 @@ TEST(DoubleShufti, ExecMatch1) { lits.insert(make_pair('a','b')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -629,8 +629,8 @@ TEST(DoubleShufti, ExecMatch2) { lits.insert(make_pair('a','a')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -651,8 +651,8 @@ TEST(DoubleShufti, ExecMatch3) { lits.insert(make_pair('B','a')); lits.insert(make_pair('a','a')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbBaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -675,8 +675,8 @@ TEST(DoubleShufti, ExecMatch4) { lits.insert(make_pair('C','a')); lits.insert(make_pair('c','a')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbAaaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -717,8 +717,8 @@ TEST(DoubleShufti, ExecMatch4b) { lits.insert(make_pair('a','C')); lits.insert(make_pair('a','c')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); /* 0123456789012345678901234567890 */ char t1[] = "bbbbbbbbbbbbbbbbbaAaaaaaaaaaaaaaabbbbbbbbbbbbbbbabbbbbbbbbbbb"; @@ -756,8 +756,8 @@ TEST(DoubleShufti, ExecMatch5) { lits.insert(make_pair('a','A')); - bool rv = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -780,8 +780,8 @@ TEST(DoubleShufti, ExecMatchMixed1) { // just one one-byte literal onebyte.set('a'); - bool rv = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -804,8 +804,8 @@ TEST(DoubleShufti, ExecMatchMixed2) { onebyte.set('a'); twobyte.insert(make_pair('x', 'y')); - bool rv = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; char t2[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -838,8 +838,8 @@ TEST(DoubleShufti, ExecMatchMixed3) { onebyte.set('a'); twobyte.insert(make_pair('x', 'y')); - bool rv = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); - ASSERT_TRUE(rv); + bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, &lo1, &hi1, &lo2, &hi2); + ASSERT_TRUE(ret); const int len = 420; char t1[len + 1]; diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index de0f1eea..3c07b2b0 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -32,7 +32,6 @@ #include "util/alloc.h" #include "util/make_unique.h" #include "util/simd_utils.h" -#include "util/simd_utils_ssse3.h" using namespace std; using namespace ue2; @@ -644,50 +643,50 @@ TEST(SimdUtilsTest, variableByteShift128) { char base[] = "0123456789ABCDEF"; m128 in = loadu128(base); - EXPECT_TRUE(!diff128(byteShiftRight128(in, 0), + EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 0), variable_byte_shift_m128(in, 0))); - EXPECT_TRUE(!diff128(byteShiftRight128(in, 1), + EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 1), variable_byte_shift_m128(in, -1))); - EXPECT_TRUE(!diff128(byteShiftRight128(in, 2), + EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 2), variable_byte_shift_m128(in, -2))); - EXPECT_TRUE(!diff128(byteShiftRight128(in, 3), + EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 3), variable_byte_shift_m128(in, -3))); - EXPECT_TRUE(!diff128(byteShiftRight128(in, 4), + EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 4), variable_byte_shift_m128(in, -4))); - EXPECT_TRUE(!diff128(byteShiftRight128(in, 5), + EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 5), variable_byte_shift_m128(in, -5))); - EXPECT_TRUE(!diff128(byteShiftRight128(in, 6), + EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 6), variable_byte_shift_m128(in, -6))); - EXPECT_TRUE(!diff128(byteShiftRight128(in, 7), + EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 7), variable_byte_shift_m128(in, -7))); - EXPECT_TRUE(!diff128(byteShiftRight128(in, 8), + EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 8), variable_byte_shift_m128(in, -8))); - EXPECT_TRUE(!diff128(byteShiftRight128(in, 9), + EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 9), variable_byte_shift_m128(in, -9))); - EXPECT_TRUE(!diff128(byteShiftRight128(in, 10), + EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 10), variable_byte_shift_m128(in, -10))); - EXPECT_TRUE(!diff128(byteShiftLeft128(in, 0), + EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 0), variable_byte_shift_m128(in, 0))); - EXPECT_TRUE(!diff128(byteShiftLeft128(in, 1), + EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 1), variable_byte_shift_m128(in, 1))); - EXPECT_TRUE(!diff128(byteShiftLeft128(in, 2), + EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 2), variable_byte_shift_m128(in, 2))); - EXPECT_TRUE(!diff128(byteShiftLeft128(in, 3), + EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 3), variable_byte_shift_m128(in, 3))); - EXPECT_TRUE(!diff128(byteShiftLeft128(in, 4), + EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 4), variable_byte_shift_m128(in, 4))); - EXPECT_TRUE(!diff128(byteShiftLeft128(in, 5), + EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 5), variable_byte_shift_m128(in, 5))); - EXPECT_TRUE(!diff128(byteShiftLeft128(in, 6), + EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 6), variable_byte_shift_m128(in, 6))); - EXPECT_TRUE(!diff128(byteShiftLeft128(in, 7), + EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 7), variable_byte_shift_m128(in, 7))); - EXPECT_TRUE(!diff128(byteShiftLeft128(in, 8), + EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 8), variable_byte_shift_m128(in, 8))); - EXPECT_TRUE(!diff128(byteShiftLeft128(in, 9), + EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 9), variable_byte_shift_m128(in, 9))); - EXPECT_TRUE(!diff128(byteShiftLeft128(in, 10), + EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 10), variable_byte_shift_m128(in, 10))); EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, 16))); diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index 30629f71..9fa6743e 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -158,7 +158,7 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, DEBUG_PRINTF("dequeuing path %s, back %u\n", pathToString(g, *p).c_str(), g[u].index); - NFAGraph::adjacency_iterator ai, ae; + NGHolder::adjacency_iterator ai, ae; for (tie(ai, ae) = adjacent_vertices(u, g); ai != ae; ++ai) { NFAVertex v = *ai; diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index 4d188d78..60ff0a17 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -76,7 +76,7 @@ struct fmstate { fmstate(const NGHolder &g, bool som_in, bool utf8_in, bool aSD_in, const ReportManager &rm_in) : num_states(num_vertices(g)), states(num_states), next(num_states), - vertices(num_vertices(g), NFAGraph::null_vertex()), som(som_in), + vertices(num_vertices(g), NGHolder::null_vertex()), som(som_in), utf8(utf8_in), allowStartDs(aSD_in), rm(rm_in), accept(num_states), accept_with_eod(num_states) { // init states