mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch develop into master
This commit is contained in:
commit
3dcd51c272
11
CHANGELOG.md
11
CHANGELOG.md
@ -2,6 +2,17 @@
|
||||
|
||||
This is a list of notable changes to Hyperscan, in reverse chronological order.
|
||||
|
||||
## [4.6.0] 2017-09-22
|
||||
- New API feature: stream state compression. This allows the user to compress
|
||||
and restore state for streams to reduce memory usage.
|
||||
- Many improvements to literal matching performance, including more support
|
||||
for Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512).
|
||||
- Compile time improvements, mainly reducing compiler memory allocation.
|
||||
Also results in reduced compile time for some pattern sets.
|
||||
- Bugfix for issue #62: fix error building Hyperscan using older versions of
|
||||
Boost.
|
||||
- Small updates to fix warnings identified by Coverity.
|
||||
|
||||
## [4.5.2] 2017-07-26
|
||||
- Bugfix for issue #57: Treat characters between `\Q.\E` as codepoints in
|
||||
UTF8 mode.
|
||||
|
@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11)
|
||||
project (hyperscan C CXX)
|
||||
|
||||
set (HS_MAJOR_VERSION 4)
|
||||
set (HS_MINOR_VERSION 5)
|
||||
set (HS_PATCH_VERSION 2)
|
||||
set (HS_MINOR_VERSION 6)
|
||||
set (HS_PATCH_VERSION 0)
|
||||
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})
|
||||
|
||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
@ -38,6 +38,7 @@ endif()
|
||||
|
||||
set(BINDIR "${PROJECT_BINARY_DIR}/bin")
|
||||
set(LIBDIR "${PROJECT_BINARY_DIR}/lib")
|
||||
|
||||
set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
# First for the generic no-config case
|
||||
@ -57,6 +58,11 @@ if(CMAKE_GENERATOR STREQUAL Xcode)
|
||||
set(XCODE TRUE)
|
||||
endif()
|
||||
|
||||
# older versions of cmake don't know things support isystem
|
||||
if (XCODE OR CMAKE_CXX_COMPILER_ID MATCHES "Intel")
|
||||
set(CMAKE_INCLUDE_SYSTEM_FLAG_CXX "-isystem")
|
||||
endif ()
|
||||
|
||||
set(CMAKE_INCLUDE_CURRENT_DIR 1)
|
||||
include_directories(${PROJECT_SOURCE_DIR}/src)
|
||||
include_directories(${PROJECT_BINARY_DIR})
|
||||
@ -148,8 +154,9 @@ if(MSVC OR MSVC_IDE)
|
||||
# todo: change these as required
|
||||
set(ARCH_C_FLAGS "/arch:AVX2")
|
||||
set(ARCH_CXX_FLAGS "/arch:AVX2")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /wd4244 /wd4267")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /wd4244 /wd4267 /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS")
|
||||
set(MSVC_WARNS "/wd4101 /wd4146 /wd4172 /wd4200 /wd4244 /wd4267 /wd4307 /wd4334 /wd4805 -D_CRT_SECURE_NO_WARNINGS")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 ${MSVC_WARNS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 ${MSVC_WARNS} /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD")
|
||||
endif()
|
||||
string(REPLACE "/RTC1" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
||||
string(REPLACE "/RTC1" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
|
||||
@ -248,7 +255,13 @@ else()
|
||||
endif()
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0 -Wno-unused-local-typedefs -Wno-maybe-uninitialized")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-maybe-uninitialized")
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0")
|
||||
endif ()
|
||||
# don't complain about abi
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi")
|
||||
endif()
|
||||
|
||||
if (NOT(ARCH_IA32 AND RELEASE_BUILD))
|
||||
@ -256,11 +269,6 @@ else()
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
|
||||
endif()
|
||||
|
||||
if (RELEASE_BUILD)
|
||||
# we don't need the noise of ABI warnings in a release build
|
||||
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi")
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi")
|
||||
endif ()
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
set(SKYLAKE_FLAG "-xCORE-AVX512")
|
||||
@ -396,18 +404,14 @@ if (CXX_MISSING_DECLARATIONS)
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wmissing-declarations")
|
||||
endif()
|
||||
|
||||
CHECK_CXX_COMPILER_FLAG("-Wunused-local-typedefs" CXX_UNUSED_LOCAL_TYPEDEFS)
|
||||
|
||||
# gcc5 complains about this
|
||||
CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE)
|
||||
|
||||
endif()
|
||||
|
||||
if (NOT XCODE)
|
||||
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
|
||||
else()
|
||||
# cmake doesn't think Xcode supports isystem
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${Boost_INCLUDE_DIRS}")
|
||||
endif()
|
||||
|
||||
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
set(LINUX TRUE)
|
||||
@ -419,10 +423,10 @@ endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
|
||||
if(NOT WIN32)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -diag-error 10006 -diag-disable 177 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable=remark")
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable=remark")
|
||||
endif()
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-error 10006 -diag-disable 177 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 1170 -diag-disable 3373 -diag-disable=remark")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 1170 -diag-disable 3373 -diag-disable=remark")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -513,6 +517,9 @@ set (hs_exec_SRCS
|
||||
src/crc32.h
|
||||
src/report.h
|
||||
src/runtime.c
|
||||
src/stream_compress.c
|
||||
src/stream_compress.h
|
||||
src/stream_compress_impl.h
|
||||
src/fdr/fdr.c
|
||||
src/fdr/fdr.h
|
||||
src/fdr/fdr_internal.h
|
||||
@ -629,6 +636,7 @@ set (hs_exec_SRCS
|
||||
src/util/masked_move.h
|
||||
src/util/multibit.h
|
||||
src/util/multibit.c
|
||||
src/util/multibit_compress.h
|
||||
src/util/multibit_internal.h
|
||||
src/util/pack_bits.h
|
||||
src/util/popcount.h
|
||||
@ -651,7 +659,7 @@ set (hs_exec_avx2_SRCS
|
||||
)
|
||||
|
||||
|
||||
SET (hs_SRCS
|
||||
SET (hs_compile_SRCS
|
||||
${hs_HEADERS}
|
||||
src/crc32.h
|
||||
src/database.h
|
||||
@ -659,7 +667,6 @@ SET (hs_SRCS
|
||||
src/grey.h
|
||||
src/hs.cpp
|
||||
src/hs_internal.h
|
||||
src/hs_version.c
|
||||
src/hs_version.h
|
||||
src/scratch.h
|
||||
src/state.h
|
||||
@ -735,6 +742,7 @@ SET (hs_SRCS
|
||||
src/nfa/nfa_build_util.h
|
||||
src/nfa/nfa_internal.h
|
||||
src/nfa/nfa_kind.h
|
||||
src/nfa/rdfa.cpp
|
||||
src/nfa/rdfa.h
|
||||
src/nfa/rdfa_graph.cpp
|
||||
src/nfa/rdfa_graph.h
|
||||
@ -960,6 +968,7 @@ SET (hs_SRCS
|
||||
src/rose/rose_build_merge.cpp
|
||||
src/rose/rose_build_merge.h
|
||||
src/rose/rose_build_misc.cpp
|
||||
src/rose/rose_build_misc.h
|
||||
src/rose/rose_build_program.cpp
|
||||
src/rose/rose_build_program.h
|
||||
src/rose/rose_build_resources.h
|
||||
@ -996,9 +1005,13 @@ SET (hs_SRCS
|
||||
src/util/dump_mask.h
|
||||
src/util/fatbit_build.cpp
|
||||
src/util/fatbit_build.h
|
||||
src/util/flat_containers.h
|
||||
src/util/graph.h
|
||||
src/util/graph_range.h
|
||||
src/util/graph_small_color_map.h
|
||||
src/util/hash.h
|
||||
src/util/hash_dynamic_bitset.h
|
||||
src/util/insertion_ordered.h
|
||||
src/util/math.h
|
||||
src/util/multibit_build.cpp
|
||||
src/util/multibit_build.h
|
||||
@ -1016,7 +1029,6 @@ SET (hs_SRCS
|
||||
src/util/small_vector.h
|
||||
src/util/target_info.cpp
|
||||
src/util/target_info.h
|
||||
src/util/ue2_containers.h
|
||||
src/util/ue2_graph.h
|
||||
src/util/ue2string.cpp
|
||||
src/util/ue2string.h
|
||||
@ -1024,6 +1036,7 @@ SET (hs_SRCS
|
||||
src/util/unicode_def.h
|
||||
src/util/unicode_set.h
|
||||
src/util/uniform_ops.h
|
||||
src/util/unordered.h
|
||||
src/util/verify_types.h
|
||||
)
|
||||
|
||||
@ -1076,7 +1089,7 @@ set(hs_dump_SRCS
|
||||
)
|
||||
|
||||
if (DUMP_SUPPORT)
|
||||
set(hs_SRCS ${hs_SRCS} ${hs_dump_SRCS})
|
||||
set(hs_compile_SRCS ${hs_compile_SRCS} ${hs_dump_SRCS})
|
||||
endif()
|
||||
|
||||
# we group things by sublibraries, specifying shared and static and then
|
||||
@ -1099,12 +1112,20 @@ if (NOT FAT_RUNTIME)
|
||||
add_library(hs_runtime STATIC src/hs_version.c src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec>)
|
||||
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
add_library(hs STATIC ${hs_SRCS} src/hs_valid_platform.c $<TARGET_OBJECTS:hs_exec>)
|
||||
add_library(hs_compile OBJECT ${hs_compile_SRCS})
|
||||
|
||||
add_library(hs STATIC
|
||||
src/hs_version.c
|
||||
src/hs_valid_platform.c
|
||||
$<TARGET_OBJECTS:hs_exec>
|
||||
$<TARGET_OBJECTS:hs_compile>)
|
||||
endif (BUILD_STATIC_LIBS)
|
||||
|
||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
add_library(hs_exec_shared OBJECT ${hs_exec_SRCS})
|
||||
set_target_properties(hs_exec_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
|
||||
add_library(hs_compile_shared OBJECT ${hs_compile_SRCS})
|
||||
set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
|
||||
endif()
|
||||
|
||||
else (FAT_RUNTIME)
|
||||
@ -1158,10 +1179,11 @@ else (FAT_RUNTIME)
|
||||
$<TARGET_OBJECTS:hs_exec_common>
|
||||
${RUNTIME_LIBS})
|
||||
set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C)
|
||||
add_library(hs_compile OBJECT ${hs_compile_SRCS})
|
||||
|
||||
# we want the static lib for testing
|
||||
add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c
|
||||
${hs_SRCS}
|
||||
$<TARGET_OBJECTS:hs_compile>
|
||||
$<TARGET_OBJECTS:hs_exec_common>
|
||||
${RUNTIME_LIBS})
|
||||
|
||||
@ -1169,6 +1191,8 @@ else (FAT_RUNTIME)
|
||||
|
||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
# build shared libs
|
||||
add_library(hs_compile_shared OBJECT ${hs_compile_SRCS})
|
||||
set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
|
||||
add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS})
|
||||
list(APPEND RUNTIME_SHLIBS $<TARGET_OBJECTS:hs_exec_shared_core2>)
|
||||
set_target_properties(hs_exec_shared_core2 PROPERTIES
|
||||
@ -1249,10 +1273,10 @@ endif()
|
||||
if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
|
||||
if (NOT FAT_RUNTIME)
|
||||
add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c
|
||||
${hs_SRCS} $<TARGET_OBJECTS:hs_exec_shared>)
|
||||
$<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_shared>)
|
||||
else()
|
||||
add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c
|
||||
${hs_SRCS} $<TARGET_OBJECTS:hs_exec_common_shared>
|
||||
$<TARGET_OBJECTS:hs_compile_shared> $<TARGET_OBJECTS:hs_exec_common_shared>
|
||||
${RUNTIME_SHLIBS})
|
||||
|
||||
endif()
|
||||
|
@ -64,7 +64,7 @@ libpcre are supported. The use of unsupported constructs will result in
|
||||
compilation errors.
|
||||
|
||||
The version of PCRE used to validate Hyperscan's interpretation of this syntax
|
||||
is 8.40.
|
||||
is 8.41.
|
||||
|
||||
====================
|
||||
Supported Constructs
|
||||
|
@ -80,6 +80,42 @@ functions for the management of streams:
|
||||
another, resetting the destination stream first. This call avoids the
|
||||
allocation done by :c:func:`hs_copy_stream`.
|
||||
|
||||
==================
|
||||
Stream Compression
|
||||
==================
|
||||
|
||||
A stream object is allocated as a fixed size region of memory which has been
|
||||
sized to ensure that no memory allocations are required during scan
|
||||
operations. When the system is under memory pressure, it may be useful to reduce
|
||||
the memory consumed by streams that are not expected to be used soon. The
|
||||
Hyperscan API provides calls for translating a stream to and from a compressed
|
||||
representation for this purpose. The compressed representation differs from the
|
||||
full stream object as it does not reserve space for components which are not
|
||||
required given the current stream state. The Hyperscan API functions for this
|
||||
functionality are:
|
||||
|
||||
* :c:func:`hs_compress_stream`: fills the provided buffer with a compressed
|
||||
representation of the stream and returns the number of bytes consumed by the
|
||||
compressed representation. If the buffer is not large enough to hold the
|
||||
compressed representation, :c:member:`HS_INSUFFICIENT_SPACE` is returned along
|
||||
with the required size. This call does not modify the original stream in any
|
||||
way: it may still be written to with :c:func:`hs_scan_stream`, used as part of
|
||||
the various reset calls to reinitialise its state, or
|
||||
:c:func:`hs_close_stream` may be called to free its resources.
|
||||
|
||||
* :c:func:`hs_expand_stream`: creates a new stream based on a buffer containing
|
||||
a compressed representation.
|
||||
|
||||
* :c:func:`hs_reset_and_expand_stream`: constructs a stream based on a buffer
|
||||
containing a compressed representation on top of an existing stream, resetting
|
||||
the existing stream first. This call avoids the allocation done by
|
||||
:c:func:`hs_expand_stream`.
|
||||
|
||||
Note: it is not recommended to use stream compression between every call to scan
|
||||
for performance reasons as it takes time to convert between the compressed
|
||||
representation and a standard stream.
|
||||
|
||||
|
||||
**********
|
||||
Block Mode
|
||||
**********
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -165,6 +165,7 @@ static bool higher_is_better(Criterion c) {
|
||||
}
|
||||
|
||||
static void print_criterion(Criterion c, double val) {
|
||||
std::ios::fmtflags f(cout.flags());
|
||||
switch (c) {
|
||||
case CRITERION_THROUGHPUT:
|
||||
cout << std::fixed << std::setprecision(3) << val << " Megabits/s";
|
||||
@ -179,6 +180,7 @@ static void print_criterion(Criterion c, double val) {
|
||||
cout << static_cast<size_t>(val) << " bytes";
|
||||
break;
|
||||
}
|
||||
cout.flags(f);
|
||||
}
|
||||
|
||||
// Key for identifying a stream in our pcap input data, using data from its IP
|
||||
@ -596,11 +598,13 @@ double eval_set(Benchmark &bench, Sigdata &sigs, unsigned int mode,
|
||||
size_t bytes = bench.bytes();
|
||||
size_t matches = bench.matches();
|
||||
if (diagnose) {
|
||||
std::ios::fmtflags f(cout.flags());
|
||||
cout << "Scan time " << std::fixed << std::setprecision(3) << scan_time
|
||||
<< " sec, Scanned " << bytes * repeatCount << " bytes, Throughput "
|
||||
<< std::fixed << std::setprecision(3)
|
||||
<< (bytes * 8 * repeatCount) / (scan_time * 1000000)
|
||||
<< " Mbps, Matches " << matches << endl;
|
||||
cout.flags(f);
|
||||
}
|
||||
return (bytes * 8 * repeatCount) / (scan_time * 1000000);
|
||||
}
|
||||
@ -755,10 +759,12 @@ int main(int argc, char **argv) {
|
||||
for (unsigned i = count; i < 16; i++) {
|
||||
cout << " ";
|
||||
}
|
||||
std::ios::fmtflags out_f(cout.flags());
|
||||
cout << "Performance: ";
|
||||
print_criterion(criterion, best);
|
||||
cout << " (" << std::fixed << std::setprecision(3) << (best / score_base)
|
||||
<< "x) after cutting:" << endl;
|
||||
cout.flags(out_f);
|
||||
|
||||
// s now has factor_max signatures
|
||||
for (const auto &found : s) {
|
||||
|
@ -127,6 +127,16 @@ CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes,
|
||||
CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes,
|
||||
const size_t length, size_t *deserialized_size);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_compress_stream, const hs_stream_t *stream,
|
||||
char *buf, size_t buf_space, size_t *used_space);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_expand_stream, const hs_database_t *db,
|
||||
hs_stream_t **stream, const char *buf,size_t buf_size);
|
||||
|
||||
CREATE_DISPATCH(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream,
|
||||
const char *buf, size_t buf_size, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *context);
|
||||
|
||||
/** INTERNALS **/
|
||||
|
||||
CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_loadval.h"
|
||||
#include "flood_runtime.h"
|
||||
#include "scratch.h"
|
||||
#include "teddy.h"
|
||||
#include "teddy_internal.h"
|
||||
#include "util/arch.h"
|
||||
@ -358,7 +359,7 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
|
||||
}
|
||||
u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a) + 1);
|
||||
confWithBit(fdrc, a, ptr_main - a->buf + byte, control,
|
||||
last_match_id, confVal);
|
||||
last_match_id, confVal, conf, bit);
|
||||
} while (unlikely(!!*conf));
|
||||
}
|
||||
|
||||
@ -725,13 +726,17 @@ static never_inline
|
||||
hwlm_error_t fdr_engine_exec(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control) {
|
||||
assert(ISALIGNED_CL(fdr));
|
||||
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
u32 last_match_id = INVALID_MATCH_ID;
|
||||
u32 domain_mask_flipped = ~fdr->domainMask;
|
||||
u8 stride = fdr->stride;
|
||||
const u64a *ft =
|
||||
(const u64a *)((const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR)));
|
||||
const u32 *confBase = (const u32 *)((const u8 *)ft + fdr->tabSize);
|
||||
(const u64a *)((const u8 *)fdr + ROUNDUP_CL(sizeof(struct FDR)));
|
||||
assert(ISALIGNED_CL(ft));
|
||||
const u32 *confBase = (const u32 *)((const u8 *)fdr + fdr->confOffset);
|
||||
assert(ISALIGNED_CL(confBase));
|
||||
struct zone zones[ZONE_MAX];
|
||||
assert(fdr->domain > 8 && fdr->domain < 16);
|
||||
|
||||
@ -798,14 +803,14 @@ static const FDRFUNCTYPE funcs[] = {
|
||||
fdr_engine_exec,
|
||||
NULL, /* old: fast teddy */
|
||||
NULL, /* old: fast teddy */
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks2_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks2_pck_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks3_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks3_pck_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks4_fat),
|
||||
ONLY_AVX2(fdr_exec_teddy_avx2_msks4_pck_fat),
|
||||
ONLY_AVX2(fdr_exec_fat_teddy_msks1),
|
||||
ONLY_AVX2(fdr_exec_fat_teddy_msks1_pck),
|
||||
ONLY_AVX2(fdr_exec_fat_teddy_msks2),
|
||||
ONLY_AVX2(fdr_exec_fat_teddy_msks2_pck),
|
||||
ONLY_AVX2(fdr_exec_fat_teddy_msks3),
|
||||
ONLY_AVX2(fdr_exec_fat_teddy_msks3_pck),
|
||||
ONLY_AVX2(fdr_exec_fat_teddy_msks4),
|
||||
ONLY_AVX2(fdr_exec_fat_teddy_msks4_pck),
|
||||
fdr_exec_teddy_msks1,
|
||||
fdr_exec_teddy_msks1_pck,
|
||||
fdr_exec_teddy_msks2,
|
||||
@ -820,8 +825,8 @@ static const FDRFUNCTYPE funcs[] = {
|
||||
static const u8 fake_history[FAKE_HISTORY_SIZE];
|
||||
|
||||
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
hwlm_group_t groups) {
|
||||
size_t start, HWLMCallback cb,
|
||||
struct hs_scratch *scratch, hwlm_group_t groups) {
|
||||
// We guarantee (for safezone construction) that it is safe to read 16
|
||||
// bytes before the end of the history buffer.
|
||||
const u8 *hbuf = fake_history + FAKE_HISTORY_SIZE;
|
||||
@ -833,7 +838,7 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
|
||||
0,
|
||||
start,
|
||||
cb,
|
||||
ctxt,
|
||||
scratch,
|
||||
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
||||
0
|
||||
};
|
||||
@ -847,7 +852,8 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
|
||||
|
||||
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
||||
size_t hlen, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
size_t start, HWLMCallback cb,
|
||||
struct hs_scratch *scratch,
|
||||
hwlm_group_t groups) {
|
||||
struct FDR_Runtime_Args a = {
|
||||
buf,
|
||||
@ -856,7 +862,7 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
||||
hlen,
|
||||
start,
|
||||
cb,
|
||||
ctxt,
|
||||
scratch,
|
||||
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
||||
/* we are guaranteed to always have 16 initialised bytes at the end of
|
||||
* the history buffer (they may be garbage). */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -42,6 +42,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
struct FDR;
|
||||
struct hs_scratch;
|
||||
|
||||
/**
|
||||
* \brief Block-mode scan.
|
||||
@ -49,13 +50,13 @@ struct FDR;
|
||||
* \param fdr FDR matcher engine.
|
||||
* \param buf Buffer to scan.
|
||||
* \param len Length of buffer to scan.
|
||||
* \param start First offset in buf at which a match may end.
|
||||
* \param start First offset in buf at which a match may start.
|
||||
* \param cb Callback to call when a match is found.
|
||||
* \param ctxt Caller-provided context pointer supplied to callback on match.
|
||||
* \param scratch Scratch supplied to callback on match.
|
||||
* \param groups Initial groups mask.
|
||||
*/
|
||||
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
size_t start, HWLMCallback cb, struct hs_scratch *scratch,
|
||||
hwlm_group_t groups);
|
||||
|
||||
/**
|
||||
@ -66,14 +67,15 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
|
||||
* \param hlen Length of history buffer (hbuf).
|
||||
* \param buf Buffer to scan.
|
||||
* \param len Length of buffer to scan (buf).
|
||||
* \param start First offset in buf at which a match may end.
|
||||
* \param start First offset in buf at which a match may start.
|
||||
* \param cb Callback to call when a match is found.
|
||||
* \param ctxt Caller-provided context pointer supplied to callback on match.
|
||||
* \param scratch Scratch supplied to callback on match.
|
||||
* \param groups Initial groups mask.
|
||||
*/
|
||||
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
||||
size_t hlen, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
size_t start, HWLMCallback cb,
|
||||
struct hs_scratch *scratch,
|
||||
hwlm_group_t groups);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -42,7 +42,9 @@
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_mask.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/math.h"
|
||||
#include "util/noncopyable.h"
|
||||
#include "util/target_info.h"
|
||||
@ -50,6 +52,7 @@
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cstdio>
|
||||
@ -61,6 +64,8 @@
|
||||
#include <numeric>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/multi_array.hpp>
|
||||
@ -81,7 +86,6 @@ private:
|
||||
bool make_small;
|
||||
|
||||
u8 *tabIndexToMask(u32 indexInTable);
|
||||
void assignStringsToBuckets();
|
||||
#ifdef DEBUG
|
||||
void dumpMasks(const u8 *defaultMask);
|
||||
#endif
|
||||
@ -90,10 +94,13 @@ private:
|
||||
void createInitialState(FDR *fdr);
|
||||
|
||||
public:
|
||||
FDRCompiler(vector<hwlmLiteral> lits_in, const FDREngineDescription &eng_in,
|
||||
FDRCompiler(vector<hwlmLiteral> lits_in,
|
||||
map<BucketIndex, std::vector<LiteralIndex>> bucketToLits_in,
|
||||
const FDREngineDescription &eng_in,
|
||||
bool make_small_in, const Grey &grey_in)
|
||||
: eng(eng_in), grey(grey_in), tab(eng_in.getTabSizeBytes()),
|
||||
lits(move(lits_in)), make_small(make_small_in) {}
|
||||
lits(move(lits_in)), bucketToLits(move(bucketToLits_in)),
|
||||
make_small(make_small_in) {}
|
||||
|
||||
bytecode_ptr<FDR> build();
|
||||
};
|
||||
@ -144,61 +151,139 @@ void FDRCompiler::createInitialState(FDR *fdr) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Lay out FDR structures in bytecode.
|
||||
*
|
||||
* Note that each major structure (header, table, confirm, flood control) is
|
||||
* cacheline-aligned.
|
||||
*/
|
||||
bytecode_ptr<FDR> FDRCompiler::setupFDR() {
|
||||
auto floodTable = setupFDRFloodControl(lits, eng, grey);
|
||||
auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
|
||||
|
||||
size_t headerSize = sizeof(FDR);
|
||||
size_t tabSize = eng.getTabSizeBytes();
|
||||
|
||||
auto floodControlTmp = setupFDRFloodControl(lits, eng, grey);
|
||||
auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small);
|
||||
|
||||
assert(ISALIGNED_16(tabSize));
|
||||
assert(ISALIGNED_16(confirmTmp.size()));
|
||||
assert(ISALIGNED_16(floodControlTmp.size()));
|
||||
size_t headerSize = ROUNDUP_16(sizeof(FDR));
|
||||
size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.size() +
|
||||
floodControlTmp.size());
|
||||
// Note: we place each major structure here on a cacheline boundary.
|
||||
size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(tabSize) +
|
||||
ROUNDUP_CL(confirmTable.size()) + floodTable.size();
|
||||
|
||||
DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
|
||||
"total=%zu\n",
|
||||
headerSize, tabSize, confirmTmp.size(), floodControlTmp.size(),
|
||||
headerSize, tabSize, confirmTable.size(), floodTable.size(),
|
||||
size);
|
||||
|
||||
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
|
||||
assert(fdr); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
u8 *fdr_base = (u8 *)fdr.get();
|
||||
|
||||
// Write header.
|
||||
fdr->size = size;
|
||||
fdr->engineID = eng.getID();
|
||||
fdr->maxStringLen = verify_u32(maxLen(lits));
|
||||
createInitialState(fdr.get());
|
||||
|
||||
u8 *fdr_base = (u8 *)fdr.get();
|
||||
u8 *ptr = fdr_base + ROUNDUP_16(sizeof(FDR));
|
||||
copy(tab.begin(), tab.end(), ptr);
|
||||
ptr += tabSize;
|
||||
|
||||
memcpy(ptr, confirmTmp.get(), confirmTmp.size());
|
||||
ptr += confirmTmp.size();
|
||||
|
||||
fdr->floodOffset = verify_u32(ptr - fdr_base);
|
||||
memcpy(ptr, floodControlTmp.get(), floodControlTmp.size());
|
||||
ptr += floodControlTmp.size();
|
||||
|
||||
/* we are allowing domains 9 to 15 only */
|
||||
assert(eng.bits > 8 && eng.bits < 16);
|
||||
fdr->numStrings = verify_u32(lits.size());
|
||||
assert(eng.bits > 8 && eng.bits < 16); // we allow domains 9 to 15 only
|
||||
fdr->domain = eng.bits;
|
||||
fdr->domainMask = (1 << eng.bits) - 1;
|
||||
fdr->tabSize = (1 << eng.bits) * (eng.schemeWidth / 8);
|
||||
fdr->tabSize = tabSize;
|
||||
fdr->stride = eng.stride;
|
||||
createInitialState(fdr.get());
|
||||
|
||||
// Write table.
|
||||
u8 *ptr = fdr_base + ROUNDUP_CL(sizeof(FDR));
|
||||
assert(ISALIGNED_CL(ptr));
|
||||
copy(tab.begin(), tab.end(), ptr);
|
||||
ptr += ROUNDUP_CL(tabSize);
|
||||
|
||||
// Write confirm structures.
|
||||
assert(ISALIGNED_CL(ptr));
|
||||
fdr->confOffset = verify_u32(ptr - fdr_base);
|
||||
memcpy(ptr, confirmTable.get(), confirmTable.size());
|
||||
ptr += ROUNDUP_CL(confirmTable.size());
|
||||
|
||||
// Write flood control structures.
|
||||
assert(ISALIGNED_CL(ptr));
|
||||
fdr->floodOffset = verify_u32(ptr - fdr_base);
|
||||
memcpy(ptr, floodTable.get(), floodTable.size());
|
||||
ptr += floodTable.size(); // last write, no need to round up
|
||||
|
||||
return fdr;
|
||||
}
|
||||
|
||||
//#define DEBUG_ASSIGNMENT
|
||||
|
||||
static
|
||||
double getScoreUtil(u32 len, u32 count) {
|
||||
return len == 0 ? numeric_limits<double>::max()
|
||||
: our_pow(count, 1.05) * our_pow(len, -3.0);
|
||||
}
|
||||
/**
|
||||
* Utility class for computing:
|
||||
*
|
||||
* score(count, len) = pow(count, 1.05) * pow(len, -3)
|
||||
*
|
||||
* Calling pow() is expensive. This is mitigated by using pre-computed LUTs for
|
||||
* small inputs and a cache for larger ones.
|
||||
*/
|
||||
class Scorer {
|
||||
unordered_map<u32, double> count_factor_cache;
|
||||
|
||||
// LUT: pow(count, 1.05) for small values of count.
|
||||
static const array<double, 100> count_lut;
|
||||
|
||||
double count_factor(u32 count) {
|
||||
if (count < count_lut.size()) {
|
||||
return count_lut[count];
|
||||
}
|
||||
|
||||
auto it = count_factor_cache.find(count);
|
||||
if (it != count_factor_cache.end()) {
|
||||
return it->second;
|
||||
}
|
||||
double r = our_pow(count, 1.05);
|
||||
count_factor_cache.emplace(count, r);
|
||||
return r;
|
||||
}
|
||||
|
||||
// LUT: pow(len, -3) for len in range [0,8].
|
||||
static const array<double, 9> len_lut;
|
||||
|
||||
double len_factor(u32 len) {
|
||||
assert(len <= len_lut.size());
|
||||
return len_lut[len];
|
||||
}
|
||||
|
||||
public:
|
||||
double operator()(u32 len, u32 count) {
|
||||
if (len == 0) {
|
||||
return numeric_limits<double>::max();
|
||||
}
|
||||
return count_factor(count) * len_factor(len);
|
||||
}
|
||||
};
|
||||
|
||||
const array<double, 100> Scorer::count_lut{{
|
||||
pow(0, 1.05), pow(1, 1.05), pow(2, 1.05), pow(3, 1.05), pow(4, 1.05),
|
||||
pow(5, 1.05), pow(6, 1.05), pow(7, 1.05), pow(8, 1.05), pow(9, 1.05),
|
||||
pow(10, 1.05), pow(11, 1.05), pow(12, 1.05), pow(13, 1.05), pow(14, 1.05),
|
||||
pow(15, 1.05), pow(16, 1.05), pow(17, 1.05), pow(18, 1.05), pow(19, 1.05),
|
||||
pow(20, 1.05), pow(21, 1.05), pow(22, 1.05), pow(23, 1.05), pow(24, 1.05),
|
||||
pow(25, 1.05), pow(26, 1.05), pow(27, 1.05), pow(28, 1.05), pow(29, 1.05),
|
||||
pow(30, 1.05), pow(31, 1.05), pow(32, 1.05), pow(33, 1.05), pow(34, 1.05),
|
||||
pow(35, 1.05), pow(36, 1.05), pow(37, 1.05), pow(38, 1.05), pow(39, 1.05),
|
||||
pow(40, 1.05), pow(41, 1.05), pow(42, 1.05), pow(43, 1.05), pow(44, 1.05),
|
||||
pow(45, 1.05), pow(46, 1.05), pow(47, 1.05), pow(48, 1.05), pow(49, 1.05),
|
||||
pow(50, 1.05), pow(51, 1.05), pow(52, 1.05), pow(53, 1.05), pow(54, 1.05),
|
||||
pow(55, 1.05), pow(56, 1.05), pow(57, 1.05), pow(58, 1.05), pow(59, 1.05),
|
||||
pow(60, 1.05), pow(61, 1.05), pow(62, 1.05), pow(63, 1.05), pow(64, 1.05),
|
||||
pow(65, 1.05), pow(66, 1.05), pow(67, 1.05), pow(68, 1.05), pow(69, 1.05),
|
||||
pow(70, 1.05), pow(71, 1.05), pow(72, 1.05), pow(73, 1.05), pow(74, 1.05),
|
||||
pow(75, 1.05), pow(76, 1.05), pow(77, 1.05), pow(78, 1.05), pow(79, 1.05),
|
||||
pow(80, 1.05), pow(81, 1.05), pow(82, 1.05), pow(83, 1.05), pow(84, 1.05),
|
||||
pow(85, 1.05), pow(86, 1.05), pow(87, 1.05), pow(88, 1.05), pow(89, 1.05),
|
||||
pow(90, 1.05), pow(91, 1.05), pow(92, 1.05), pow(93, 1.05), pow(94, 1.05),
|
||||
pow(95, 1.05), pow(96, 1.05), pow(97, 1.05), pow(98, 1.05), pow(99, 1.05),
|
||||
}};
|
||||
|
||||
const array<double, 9> Scorer::len_lut{{
|
||||
pow(0, -3.0), pow(1, -3.0), pow(2, -3.0), pow(3, -3.0), pow(4, -3.0),
|
||||
pow(5, -3.0), pow(6, -3.0), pow(7, -3.0), pow(8, -3.0)}};
|
||||
|
||||
/**
|
||||
* Returns true if the two given literals should be placed in the same chunk as
|
||||
@ -297,7 +382,10 @@ next_literal:
|
||||
return chunks;
|
||||
}
|
||||
|
||||
void FDRCompiler::assignStringsToBuckets() {
|
||||
static
|
||||
map<BucketIndex, vector<LiteralIndex>> assignStringsToBuckets(
|
||||
vector<hwlmLiteral> &lits,
|
||||
const FDREngineDescription &eng) {
|
||||
const double MAX_SCORE = numeric_limits<double>::max();
|
||||
|
||||
assert(!lits.empty()); // Shouldn't be called with no literals.
|
||||
@ -340,12 +428,14 @@ void FDRCompiler::assignStringsToBuckets() {
|
||||
boost::multi_array<pair<double, u32>, 2> t(
|
||||
boost::extents[numChunks][numBuckets]);
|
||||
|
||||
Scorer scorer;
|
||||
|
||||
for (u32 j = 0; j < numChunks; j++) {
|
||||
u32 cnt = 0;
|
||||
for (u32 k = j; k < numChunks; ++k) {
|
||||
cnt += chunks[k].count;
|
||||
}
|
||||
t[j][0] = {getScoreUtil(chunks[j].length, cnt), 0};
|
||||
t[j][0] = {scorer(chunks[j].length, cnt), 0};
|
||||
}
|
||||
|
||||
for (u32 i = 1; i < numBuckets; i++) {
|
||||
@ -353,7 +443,7 @@ void FDRCompiler::assignStringsToBuckets() {
|
||||
pair<double, u32> best = {MAX_SCORE, 0};
|
||||
u32 cnt = chunks[j].count;
|
||||
for (u32 k = j + 1; k < numChunks - 1; k++) {
|
||||
auto score = getScoreUtil(chunks[j].length, cnt);
|
||||
auto score = scorer(chunks[j].length, cnt);
|
||||
if (score > best.first) {
|
||||
break; // now worse locally than our best score, give up
|
||||
}
|
||||
@ -381,6 +471,7 @@ void FDRCompiler::assignStringsToBuckets() {
|
||||
|
||||
// our best score is in t[0][N_BUCKETS-1] and we can follow the links
|
||||
// to find where our buckets should start and what goes into them
|
||||
vector<vector<LiteralIndex>> buckets;
|
||||
for (u32 i = 0, n = numBuckets; n && (i != numChunks - 1); n--) {
|
||||
u32 j = t[i][n - 1].second;
|
||||
if (j == 0) {
|
||||
@ -391,21 +482,33 @@ void FDRCompiler::assignStringsToBuckets() {
|
||||
u32 first_id = chunks[i].first_id;
|
||||
u32 last_id = chunks[j].first_id;
|
||||
assert(first_id < last_id);
|
||||
u32 bucket = numBuckets - n;
|
||||
UNUSED const auto &first_lit = lits[first_id];
|
||||
UNUSED const auto &last_lit = lits[last_id - 1];
|
||||
DEBUG_PRINTF("placing [%u-%u) in bucket %u (%u lits, len %zu-%zu, "
|
||||
"score %0.4f)\n",
|
||||
first_id, last_id, bucket, last_id - first_id,
|
||||
first_lit.s.length(), last_lit.s.length(),
|
||||
getScoreUtil(first_lit.s.length(), last_id - first_id));
|
||||
DEBUG_PRINTF("placing [%u-%u) in one bucket (%u lits, len %zu-%zu, "
|
||||
"score %0.4f)\n",
|
||||
first_id, last_id, last_id - first_id,
|
||||
first_lit.s.length(), last_lit.s.length(),
|
||||
scorer(first_lit.s.length(), last_id - first_id));
|
||||
|
||||
auto &bucket_lits = bucketToLits[bucket];
|
||||
for (u32 k = first_id; k < last_id; k++) {
|
||||
bucket_lits.push_back(k);
|
||||
vector<LiteralIndex> litIds;
|
||||
u32 cnt = last_id - first_id;
|
||||
// long literals first for included literals checking
|
||||
for (u32 k = 0; k < cnt; k++) {
|
||||
litIds.push_back(last_id - k - 1);
|
||||
}
|
||||
|
||||
i = j;
|
||||
buckets.push_back(litIds);
|
||||
}
|
||||
|
||||
// reverse bucket id, longer literals come first
|
||||
map<BucketIndex, vector<LiteralIndex>> bucketToLits;
|
||||
size_t bucketCnt = buckets.size();
|
||||
for (size_t i = 0; i < bucketCnt; i++) {
|
||||
bucketToLits.emplace(bucketCnt - i - 1, move(buckets[i]));
|
||||
}
|
||||
|
||||
return bucketToLits;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
@ -426,7 +529,7 @@ bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
|
||||
const vector<LiteralIndex> &vl,
|
||||
const vector<hwlmLiteral> &lits,
|
||||
SuffixPositionInString pos,
|
||||
std::map<u32, ue2::unordered_set<u32> > &m2) {
|
||||
map<u32, unordered_set<u32>> &m2) {
|
||||
assert(eng.bits < 32);
|
||||
|
||||
u32 distance = 0;
|
||||
@ -497,7 +600,7 @@ void FDRCompiler::setupTab() {
|
||||
SuffixPositionInString pLimit = eng.getBucketWidth(b);
|
||||
for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
|
||||
u32 bit = eng.getSchemeBit(b, pos);
|
||||
map<u32, ue2::unordered_set<u32>> m2;
|
||||
map<u32, unordered_set<u32>> m2;
|
||||
bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
|
||||
if (done) {
|
||||
clearbit(&defaultMask[0], bit);
|
||||
@ -505,7 +608,7 @@ void FDRCompiler::setupTab() {
|
||||
}
|
||||
for (const auto &elem : m2) {
|
||||
u32 dc = elem.first;
|
||||
const ue2::unordered_set<u32> &mskSet = elem.second;
|
||||
const unordered_set<u32> &mskSet = elem.second;
|
||||
u32 v = ~dc;
|
||||
do {
|
||||
u32 b2 = v & dc;
|
||||
@ -529,24 +632,222 @@ void FDRCompiler::setupTab() {
|
||||
}
|
||||
|
||||
bytecode_ptr<FDR> FDRCompiler::build() {
|
||||
assignStringsToBuckets();
|
||||
setupTab();
|
||||
return setupFDR();
|
||||
}
|
||||
|
||||
static
|
||||
bool isSuffix(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
|
||||
const auto &s1 = lit1.s;
|
||||
const auto &s2 = lit2.s;
|
||||
size_t len1 = s1.length();
|
||||
size_t len2 = s2.length();
|
||||
assert(len1 >= len2);
|
||||
|
||||
if (lit1.nocase || lit2.nocase) {
|
||||
return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2,
|
||||
[](char a, char b) { return mytoupper(a) == mytoupper(b); });
|
||||
} else {
|
||||
return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* if lit2 is a suffix of lit1 but the case sensitivity, groups or mask info
|
||||
* of lit2 is a subset of lit1, then lit1 can't squash lit2 and lit2 can
|
||||
* possibly match when lit1 matches. In this case, we can't do bucket
|
||||
* squashing. e.g. AAA(no case) in bucket 0, AA(no case) and aa in bucket 1,
|
||||
* we can't squash bucket 1 if we have input like "aaa" as aa can also match.
|
||||
*/
|
||||
static
|
||||
bool includedCheck(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
|
||||
/* lit1 is caseless and lit2 is case sensitive */
|
||||
if ((lit1.nocase && !lit2.nocase)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* lit2's group is a subset of lit1 */
|
||||
if (lit1.groups != lit2.groups &&
|
||||
(lit2.groups == (lit1.groups & lit2.groups))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* TODO: narrow down cases for mask check */
|
||||
if (lit1.cmp != lit2.cmp || lit1.msk != lit2.msk) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* if lit2 is an included literal of both lit0 and lit1, then lit0 and lit1
|
||||
* shouldn't match at the same offset, otherwise we give up squashing for lit1.
|
||||
* e.g. lit0:AAA(no case), lit1:aa, lit2:A(no case). We can have duplicate
|
||||
* matches for input "aaa" if lit0 and lit1 both squash lit2.
|
||||
*/
|
||||
static
|
||||
bool checkParentLit(
|
||||
const vector<hwlmLiteral> &lits, u32 pos1,
|
||||
const unordered_set<u32> &parent_map,
|
||||
const unordered_map<u32, unordered_set<u32>> &exception_map) {
|
||||
assert(pos1 < lits.size());
|
||||
const auto &lit1 = lits[pos1];
|
||||
for (const auto pos2 : parent_map) {
|
||||
if (contains(exception_map, pos2)) {
|
||||
const auto &exception_pos = exception_map.at(pos2);
|
||||
if (contains(exception_pos, pos1)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* if lit1 isn't an exception of lit2, then we have to do further
|
||||
* exclusive check.
|
||||
* TODO: More mask checks. Note if two literals are group exclusive,
|
||||
* it is possible that they match at the same offset. */
|
||||
assert(pos2 < lits.size());
|
||||
const auto &lit2 = lits[pos2];
|
||||
if (isSuffix(lit2, lit1)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
void buildSquashMask(vector<hwlmLiteral> &lits, u32 id1, u32 bucket1,
|
||||
size_t start, const vector<pair<u32, u32>> &group,
|
||||
unordered_map<u32, unordered_set<u32>> &parent_map,
|
||||
unordered_map<u32, unordered_set<u32>> &exception_map) {
|
||||
auto &lit1 = lits[id1];
|
||||
DEBUG_PRINTF("b:%u len:%zu\n", bucket1, lit1.s.length());
|
||||
|
||||
size_t cnt = group.size();
|
||||
bool included = false;
|
||||
bool exception = false;
|
||||
u32 child_id = ~0U;
|
||||
for (size_t i = start; i < cnt; i++) {
|
||||
u32 bucket2 = group[i].first;
|
||||
assert(bucket2 >= bucket1);
|
||||
|
||||
u32 id2 = group[i].second;
|
||||
auto &lit2 = lits[id2];
|
||||
// check if lit2 is a suffix of lit1
|
||||
if (isSuffix(lit1, lit2)) {
|
||||
/* if we have a included literal in the same bucket,
|
||||
* quit and let the included literal to do possible squashing */
|
||||
if (bucket1 == bucket2) {
|
||||
DEBUG_PRINTF("same bucket\n");
|
||||
return;
|
||||
}
|
||||
/* if lit2 is a suffix but doesn't pass included checks for
|
||||
* extra info, we give up sqaushing */
|
||||
if (includedCheck(lit1, lit2)) {
|
||||
DEBUG_PRINTF("find exceptional suffix %u\n", lit2.id);
|
||||
exception_map[id1].insert(id2);
|
||||
exception = true;
|
||||
} else if (checkParentLit(lits, id1, parent_map[id2],
|
||||
exception_map)) {
|
||||
if (lit1.included_id == INVALID_LIT_ID) {
|
||||
DEBUG_PRINTF("find suffix lit1 %u lit2 %u\n",
|
||||
lit1.id, lit2.id);
|
||||
lit1.included_id = lit2.id;
|
||||
} else {
|
||||
/* if we have multiple included literals in one bucket,
|
||||
* give up squashing. */
|
||||
DEBUG_PRINTF("multiple included literals\n");
|
||||
lit1.included_id = INVALID_LIT_ID;
|
||||
return;
|
||||
}
|
||||
child_id = id2;
|
||||
included = true;
|
||||
}
|
||||
}
|
||||
|
||||
size_t next = i + 1;
|
||||
u32 nextBucket = next < cnt ? group[next].first : ~0U;
|
||||
if (bucket2 != nextBucket) {
|
||||
if (included) {
|
||||
if (exception) {
|
||||
/* give up if we have exception literals
|
||||
* in the same bucket as the included literal. */
|
||||
lit1.included_id = INVALID_LIT_ID;
|
||||
} else {
|
||||
parent_map[child_id].insert(id1);
|
||||
|
||||
lit1.squash |= 1U << bucket2;
|
||||
DEBUG_PRINTF("build squash mask %2x for %u\n",
|
||||
lit1.squash, lit1.id);
|
||||
}
|
||||
return;
|
||||
}
|
||||
exception = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr u32 INCLUDED_LIMIT = 1000;
|
||||
|
||||
static
|
||||
void findIncludedLits(vector<hwlmLiteral> &lits,
|
||||
const vector<vector<pair<u32, u32>>> &lastCharMap) {
|
||||
/* Map for finding the positions of literal which includes a literal
|
||||
* in FDR hwlm literal vector. */
|
||||
unordered_map<u32, unordered_set<u32>> parent_map;
|
||||
|
||||
/* Map for finding the positions of exception literals which could
|
||||
* sometimes match if a literal matches in FDR hwlm literal vector. */
|
||||
unordered_map<u32, unordered_set<u32>> exception_map;
|
||||
for (const auto &group : lastCharMap) {
|
||||
size_t cnt = group.size();
|
||||
if (cnt > INCLUDED_LIMIT) {
|
||||
continue;
|
||||
}
|
||||
for (size_t i = 0; i < cnt; i++) {
|
||||
u32 bucket1 = group[i].first;
|
||||
u32 id1 = group[i].second;
|
||||
buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map,
|
||||
exception_map);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void addIncludedInfo(
|
||||
vector<hwlmLiteral> &lits, u32 nBuckets,
|
||||
map<BucketIndex, vector<LiteralIndex>> &bucketToLits) {
|
||||
vector<vector<pair<u32, u32>>> lastCharMap(256);
|
||||
|
||||
for (BucketIndex b = 0; b < nBuckets; b++) {
|
||||
if (!bucketToLits[b].empty()) {
|
||||
for (const LiteralIndex &lit_idx : bucketToLits[b]) {
|
||||
const auto &lit = lits[lit_idx];
|
||||
u8 c = mytoupper(lit.s.back());
|
||||
lastCharMap[c].emplace_back(b, lit_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
findIncludedLits(lits, lastCharMap);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
static
|
||||
bytecode_ptr<FDR> fdrBuildTableInternal(const vector<hwlmLiteral> &lits,
|
||||
bool make_small, const target_t &target,
|
||||
const Grey &grey, u32 hint) {
|
||||
unique_ptr<HWLMProto> fdrBuildProtoInternal(u8 engType,
|
||||
vector<hwlmLiteral> &lits,
|
||||
bool make_small,
|
||||
const target_t &target,
|
||||
const Grey &grey, u32 hint) {
|
||||
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
|
||||
|
||||
if (grey.fdrAllowTeddy) {
|
||||
auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, grey);
|
||||
if (fdr) {
|
||||
auto proto = teddyBuildProtoHinted(engType, lits, make_small, hint,
|
||||
target);
|
||||
if (proto) {
|
||||
DEBUG_PRINTF("build with teddy succeeded\n");
|
||||
return fdr;
|
||||
return proto;
|
||||
} else {
|
||||
DEBUG_PRINTF("build with teddy failed, will try with FDR\n");
|
||||
}
|
||||
@ -564,23 +865,47 @@ bytecode_ptr<FDR> fdrBuildTableInternal(const vector<hwlmLiteral> &lits,
|
||||
des->stride = 1;
|
||||
}
|
||||
|
||||
FDRCompiler fc(lits, *des, make_small, grey);
|
||||
auto bucketToLits = assignStringsToBuckets(lits, *des);
|
||||
addIncludedInfo(lits, des->getNumBuckets(), bucketToLits);
|
||||
auto proto =
|
||||
ue2::make_unique<HWLMProto>(engType, move(des), lits, bucketToLits,
|
||||
make_small);
|
||||
return proto;
|
||||
}
|
||||
|
||||
unique_ptr<HWLMProto> fdrBuildProto(u8 engType, vector<hwlmLiteral> lits,
|
||||
bool make_small, const target_t &target,
|
||||
const Grey &grey) {
|
||||
return fdrBuildProtoInternal(engType, lits, make_small, target, grey,
|
||||
HINT_INVALID);
|
||||
}
|
||||
|
||||
static
|
||||
bytecode_ptr<FDR> fdrBuildTableInternal(const HWLMProto &proto,
|
||||
const Grey &grey) {
|
||||
|
||||
if (proto.teddyEng) {
|
||||
return teddyBuildTable(proto, grey);
|
||||
}
|
||||
|
||||
FDRCompiler fc(proto.lits, proto.bucketToLits, *(proto.fdrEng),
|
||||
proto.make_small, grey);
|
||||
return fc.build();
|
||||
}
|
||||
|
||||
bytecode_ptr<FDR> fdrBuildTable(const vector<hwlmLiteral> &lits,
|
||||
bool make_small, const target_t &target,
|
||||
const Grey &grey) {
|
||||
return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID);
|
||||
bytecode_ptr<FDR> fdrBuildTable(const HWLMProto &proto, const Grey &grey) {
|
||||
return fdrBuildTableInternal(proto, grey);
|
||||
}
|
||||
|
||||
#if !defined(RELEASE_BUILD)
|
||||
|
||||
bytecode_ptr<FDR> fdrBuildTableHinted(const vector<hwlmLiteral> &lits,
|
||||
bool make_small, u32 hint,
|
||||
const target_t &target,
|
||||
const Grey &grey) {
|
||||
return fdrBuildTableInternal(lits, make_small, target, grey, hint);
|
||||
unique_ptr<HWLMProto> fdrBuildProtoHinted(u8 engType,
|
||||
vector<hwlmLiteral> lits,
|
||||
bool make_small, u32 hint,
|
||||
const target_t &target,
|
||||
const Grey &grey) {
|
||||
return fdrBuildProtoInternal(engType, lits, make_small, target, grey,
|
||||
hint);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -34,6 +34,7 @@
|
||||
#define FDR_COMPILE_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
|
||||
#include <vector>
|
||||
@ -46,18 +47,23 @@ struct hwlmLiteral;
|
||||
struct Grey;
|
||||
struct target_t;
|
||||
|
||||
bytecode_ptr<FDR> fdrBuildTable(const std::vector<hwlmLiteral> &lits,
|
||||
bool make_small, const target_t &target,
|
||||
const Grey &grey);
|
||||
bytecode_ptr<FDR> fdrBuildTable(const HWLMProto &proto, const Grey &grey);
|
||||
|
||||
#if !defined(RELEASE_BUILD)
|
||||
|
||||
bytecode_ptr<FDR> fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits,
|
||||
bool make_small, u32 hint,
|
||||
const target_t &target, const Grey &grey);
|
||||
|
||||
std::unique_ptr<HWLMProto> fdrBuildProtoHinted(
|
||||
u8 engType,
|
||||
std::vector<hwlmLiteral> lits,
|
||||
bool make_small, u32 hint,
|
||||
const target_t &target,
|
||||
const Grey &grey);
|
||||
#endif
|
||||
|
||||
std::unique_ptr<HWLMProto> fdrBuildProto(
|
||||
u8 engType,
|
||||
std::vector<hwlmLiteral> lits,
|
||||
bool make_small, const target_t &target,
|
||||
const Grey &grey);
|
||||
|
||||
/** \brief Returns size in bytes of the given FDR engine. */
|
||||
size_t fdrSize(const struct FDR *fdr);
|
||||
|
||||
|
@ -57,10 +57,11 @@ class FDREngineDescription;
|
||||
struct hwlmStreamingControl;
|
||||
struct Grey;
|
||||
|
||||
bytecode_ptr<u8> setupFullConfs(const std::vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng,
|
||||
std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
|
||||
bool make_small);
|
||||
bytecode_ptr<u8> setupFullConfs(
|
||||
const std::vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng,
|
||||
const std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
|
||||
bool make_small);
|
||||
|
||||
// all suffixes include an implicit max_bucket_width suffix to ensure that
|
||||
// we always read a full-scale flood "behind" us in terms of what's in our
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -42,12 +42,11 @@ u32 mul_hash_64(u64a lv, u64a andmsk, u64a mult, u32 nBits) {
|
||||
#define CONF_TYPE u64a
|
||||
#define CONF_HASH_CALL mul_hash_64
|
||||
|
||||
typedef enum LitInfoFlags {
|
||||
NoFlags = 0,
|
||||
Caseless = 1,
|
||||
NoRepeat = 2,
|
||||
ComplexConfirm = 4
|
||||
} LitInfoFlags;
|
||||
/**
|
||||
* \brief Flag indicating this literal doesn't need to be delivered more than
|
||||
* once, used in LitInfo::flags.
|
||||
*/
|
||||
#define FDR_LIT_FLAG_NOREPEAT 1
|
||||
|
||||
/**
|
||||
* \brief Structure describing a literal, linked to by FDRConfirm.
|
||||
@ -61,12 +60,12 @@ struct LitInfo {
|
||||
hwlm_group_t groups;
|
||||
u32 id; // literal ID as passed in
|
||||
u8 size;
|
||||
u8 flags; /* LitInfoFlags */
|
||||
u8 flags; //!< bitfield of flags from FDR_LIT_FLAG_* above.
|
||||
u8 next;
|
||||
u8 extended_size;
|
||||
};
|
||||
|
||||
#define FDRC_FLAG_NO_CONFIRM 1
|
||||
#define FDRC_FLAG_NOREPEAT 2
|
||||
|
||||
/**
|
||||
* \brief FDR confirm header.
|
||||
@ -79,12 +78,8 @@ struct LitInfo {
|
||||
struct FDRConfirm {
|
||||
CONF_TYPE andmsk;
|
||||
CONF_TYPE mult;
|
||||
u32 nBitsOrSoleID; // if flags is NO_CONFIRM then this is soleID
|
||||
u32 flags; // sole meaning is 'non-zero means no-confirm' (that is all)
|
||||
u32 nBits;
|
||||
hwlm_group_t groups;
|
||||
u32 soleLitSize;
|
||||
u32 soleLitCmp;
|
||||
u32 soleLitMsk;
|
||||
};
|
||||
|
||||
static really_inline
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/container.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
@ -47,19 +48,6 @@ namespace ue2 {
|
||||
|
||||
using BC2CONF = map<BucketIndex, bytecode_ptr<FDRConfirm>>;
|
||||
|
||||
// return the number of bytes beyond a length threshold in all strings in lits
|
||||
static
|
||||
size_t thresholdedSize(const vector<hwlmLiteral> &lits, size_t threshold) {
|
||||
size_t tot = 0;
|
||||
for (const auto &lit : lits) {
|
||||
size_t sz = lit.s.size();
|
||||
if (sz > threshold) {
|
||||
tot += ROUNDUP_N(sz - threshold, 8);
|
||||
}
|
||||
}
|
||||
return tot;
|
||||
}
|
||||
|
||||
static
|
||||
u64a make_u64a_mask(const vector<u8> &v) {
|
||||
assert(v.size() <= sizeof(u64a));
|
||||
@ -92,19 +80,12 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
|
||||
LitInfo &info = tmpLitInfo[i];
|
||||
memset(&info, 0, sizeof(info));
|
||||
info.id = lit.id;
|
||||
u8 flags = NoFlags;
|
||||
if (lit.nocase) {
|
||||
flags |= Caseless;
|
||||
}
|
||||
u8 flags = 0;
|
||||
if (lit.noruns) {
|
||||
flags |= NoRepeat;
|
||||
}
|
||||
if (lit.msk.size() > lit.s.size()) {
|
||||
flags |= ComplexConfirm;
|
||||
info.extended_size = verify_u8(lit.msk.size());
|
||||
flags |= FDR_LIT_FLAG_NOREPEAT;
|
||||
}
|
||||
info.flags = flags;
|
||||
info.size = verify_u8(lit.s.size());
|
||||
info.size = verify_u8(max(lit.msk.size(), lit.s.size()));
|
||||
info.groups = lit.groups;
|
||||
|
||||
// these are built up assuming a LE machine
|
||||
@ -149,7 +130,12 @@ void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
|
||||
|
||||
static
|
||||
bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
|
||||
bool make_small, bool make_confirm) {
|
||||
bool make_small) {
|
||||
// Every literal must fit within CONF_TYPE.
|
||||
assert(all_of_in(lits, [](const hwlmLiteral &lit) {
|
||||
return lit.s.size() <= sizeof(CONF_TYPE);
|
||||
}));
|
||||
|
||||
vector<LitInfo> tmpLitInfo(lits.size());
|
||||
CONF_TYPE andmsk;
|
||||
fillLitInfo(lits, tmpLitInfo, andmsk);
|
||||
@ -167,40 +153,6 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
|
||||
}
|
||||
|
||||
CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL;
|
||||
u32 flags = 0;
|
||||
// we use next three variables for 'confirmless' case to speed-up
|
||||
// confirmation process
|
||||
u32 soleLitSize = 0;
|
||||
u32 soleLitCmp = 0;
|
||||
u32 soleLitMsk = 0;
|
||||
|
||||
if (!make_confirm) {
|
||||
flags = FDRC_FLAG_NO_CONFIRM;
|
||||
if (lits[0].noruns) {
|
||||
flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted
|
||||
}
|
||||
mult = 0;
|
||||
soleLitSize = lits[0].s.size() - 1;
|
||||
// we can get to this point only in confirmless case;
|
||||
// it means that we have only one literal per FDRConfirm (no packing),
|
||||
// with no literal mask and size of literal is less or equal
|
||||
// to the number of masks of Teddy engine;
|
||||
// maximum number of masks for Teddy is 4, so the size of
|
||||
// literal is definitely less or equal to size of u32
|
||||
assert(lits[0].s.size() <= sizeof(u32));
|
||||
for (u32 i = 0; i < lits[0].s.size(); i++) {
|
||||
u32 shiftLoc = (sizeof(u32) - i - 1) * 8;
|
||||
u8 c = lits[0].s[lits[0].s.size() - i - 1];
|
||||
if (lits[0].nocase && ourisalpha(c)) {
|
||||
soleLitCmp |= (u32)(c & CASE_CLEAR) << shiftLoc;
|
||||
soleLitMsk |= (u32)CASE_CLEAR << shiftLoc;
|
||||
}
|
||||
else {
|
||||
soleLitCmp |= (u32)c << shiftLoc;
|
||||
soleLitMsk |= (u32)0xff << shiftLoc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we can walk the vector and assign elements from the vectors to a
|
||||
// map by hash value
|
||||
@ -276,12 +228,11 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
|
||||
#endif
|
||||
|
||||
const size_t bitsToLitIndexSize = (1U << nBits) * sizeof(u32);
|
||||
const size_t totalLitSize = thresholdedSize(lits, sizeof(CONF_TYPE));
|
||||
|
||||
// this size can now be a worst-case as we can always be a bit smaller
|
||||
size_t size = ROUNDUP_N(sizeof(FDRConfirm), alignof(u32)) +
|
||||
ROUNDUP_N(bitsToLitIndexSize, alignof(LitInfo)) +
|
||||
sizeof(LitInfo) * lits.size() + totalLitSize;
|
||||
sizeof(LitInfo) * lits.size();
|
||||
size = ROUNDUP_N(size, alignof(FDRConfirm));
|
||||
|
||||
auto fdrc = make_zeroed_bytecode_ptr<FDRConfirm>(size);
|
||||
@ -289,11 +240,7 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
|
||||
|
||||
fdrc->andmsk = andmsk;
|
||||
fdrc->mult = mult;
|
||||
fdrc->nBitsOrSoleID = (flags & FDRC_FLAG_NO_CONFIRM) ? lits[0].id : nBits;
|
||||
fdrc->flags = flags;
|
||||
fdrc->soleLitSize = soleLitSize;
|
||||
fdrc->soleLitCmp = soleLitCmp;
|
||||
fdrc->soleLitMsk = soleLitMsk;
|
||||
fdrc->nBits = nBits;
|
||||
|
||||
fdrc->groups = gm;
|
||||
|
||||
@ -345,40 +292,37 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
|
||||
bytecode_ptr<u8>
|
||||
setupFullConfs(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng,
|
||||
map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
|
||||
const map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
|
||||
bool make_small) {
|
||||
bool makeConfirm = true;
|
||||
unique_ptr<TeddyEngineDescription> teddyDescr =
|
||||
getTeddyDescription(eng.getID());
|
||||
if (teddyDescr) {
|
||||
makeConfirm = teddyDescr->needConfirm(lits);
|
||||
}
|
||||
|
||||
BC2CONF bc2Conf;
|
||||
u32 totalConfirmSize = 0;
|
||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||
if (!bucketToLits[b].empty()) {
|
||||
if (contains(bucketToLits, b)) {
|
||||
vector<hwlmLiteral> vl;
|
||||
for (const LiteralIndex &lit_idx : bucketToLits[b]) {
|
||||
for (const LiteralIndex &lit_idx : bucketToLits.at(b)) {
|
||||
vl.push_back(lits[lit_idx]);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("b %d sz %zu\n", b, vl.size());
|
||||
auto fc = getFDRConfirm(vl, make_small, makeConfirm);
|
||||
auto fc = getFDRConfirm(vl, make_small);
|
||||
totalConfirmSize += fc.size();
|
||||
bc2Conf.emplace(b, move(fc));
|
||||
}
|
||||
}
|
||||
|
||||
u32 nBuckets = eng.getNumBuckets();
|
||||
u32 totalConfSwitchSize = nBuckets * sizeof(u32);
|
||||
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
|
||||
u32 totalConfSwitchSize = ROUNDUP_CL(nBuckets * sizeof(u32));
|
||||
u32 totalSize = totalConfSwitchSize + totalConfirmSize;
|
||||
|
||||
auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 16);
|
||||
auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 64);
|
||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
u32 *confBase = (u32 *)buf.get();
|
||||
u8 *ptr = buf.get() + totalConfSwitchSize;
|
||||
assert(ISALIGNED_CL(ptr));
|
||||
|
||||
for (const auto &m : bc2Conf) {
|
||||
const BucketIndex &idx = m.first;
|
||||
|
@ -29,6 +29,7 @@
|
||||
#ifndef FDR_CONFIRM_RUNTIME_H
|
||||
#define FDR_CONFIRM_RUNTIME_H
|
||||
|
||||
#include "scratch.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_loadval.h"
|
||||
#include "hwlm/hwlm.h"
|
||||
@ -41,13 +42,14 @@
|
||||
static really_inline
|
||||
void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a,
|
||||
size_t i, hwlmcb_rv_t *control, u32 *last_match,
|
||||
u64a conf_key) {
|
||||
u64a conf_key, u64a *conf, u8 bit) {
|
||||
assert(i < a->len);
|
||||
assert(i >= a->start_offset);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
const u8 * buf = a->buf;
|
||||
u32 c = CONF_HASH_CALL(conf_key, fdrc->andmsk, fdrc->mult,
|
||||
fdrc->nBitsOrSoleID);
|
||||
fdrc->nBits);
|
||||
u32 start = getConfirmLitIndex(fdrc)[c];
|
||||
if (likely(!start)) {
|
||||
return;
|
||||
@ -56,6 +58,10 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
|
||||
const struct LitInfo *li
|
||||
= (const struct LitInfo *)((const u8 *)fdrc + start);
|
||||
|
||||
struct hs_scratch *scratch = a->scratch;
|
||||
assert(!scratch->fdr_conf);
|
||||
scratch->fdr_conf = conf;
|
||||
scratch->fdr_conf_offset = bit;
|
||||
u8 oldNext; // initialized in loop
|
||||
do {
|
||||
assert(ISALIGNED(li));
|
||||
@ -64,7 +70,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((*last_match == li->id) && (li->flags & NoRepeat)) {
|
||||
if ((*last_match == li->id) && (li->flags & FDR_LIT_FLAG_NOREPEAT)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -86,99 +92,13 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely(li->flags & ComplexConfirm)) {
|
||||
const u8 *loc2 = buf + i - li->extended_size + 1;
|
||||
if (loc2 < buf) {
|
||||
u32 full_overhang = buf - loc2;
|
||||
size_t len_history = a->len_history;
|
||||
if (full_overhang > len_history) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*last_match = li->id;
|
||||
*control = a->cb(loc - buf, i, li->id, a->ctxt);
|
||||
*control = a->cb(i, li->id, scratch);
|
||||
out:
|
||||
oldNext = li->next; // oldNext is either 0 or an 'adjust' value
|
||||
li++;
|
||||
} while (oldNext);
|
||||
}
|
||||
|
||||
// 'light-weight' confirmation function which is used by 1-mask Teddy;
|
||||
// in the 'confirmless' case it simply calls callback function,
|
||||
// otherwise it calls 'confWithBit' function for the full confirmation procedure
|
||||
static really_inline
|
||||
void confWithBit1(const struct FDRConfirm *fdrc,
|
||||
const struct FDR_Runtime_Args *a, size_t i,
|
||||
hwlmcb_rv_t *control, u32 *last_match, u64a conf_key) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
if (unlikely(fdrc->mult)) {
|
||||
confWithBit(fdrc, a, i, control, last_match, conf_key);
|
||||
return;
|
||||
} else {
|
||||
u32 id = fdrc->nBitsOrSoleID;
|
||||
|
||||
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
|
||||
return;
|
||||
}
|
||||
*last_match = id;
|
||||
*control = a->cb(i, i, id, a->ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
// This is 'light-weight' confirmation function which is used by 2-3-4-mask Teddy
|
||||
// In the 'confirmless' case it makes fast 32-bit comparison,
|
||||
// otherwise it calls 'confWithBit' function for the full confirmation procedure
|
||||
static really_inline
|
||||
void confWithBitMany(const struct FDRConfirm *fdrc,
|
||||
const struct FDR_Runtime_Args *a, size_t i, CautionReason r,
|
||||
hwlmcb_rv_t *control, u32 *last_match, u64a conf_key) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
if (i < a->start_offset) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (unlikely(fdrc->mult)) {
|
||||
confWithBit(fdrc, a, i, control, last_match, conf_key);
|
||||
return;
|
||||
} else {
|
||||
const u32 id = fdrc->nBitsOrSoleID;
|
||||
const u32 len = fdrc->soleLitSize;
|
||||
|
||||
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (r == VECTORING && len > i - a->start_offset) {
|
||||
if (len > i + a->len_history) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 cmp = (u32)a->buf[i] << 24;
|
||||
|
||||
if (len <= i) {
|
||||
for (u32 j = 1; j <= len; j++) {
|
||||
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
|
||||
}
|
||||
} else {
|
||||
for (u32 j = 1; j <= i; j++) {
|
||||
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
|
||||
}
|
||||
cmp |= (u32)(a->histBytes >> (40 + i * 8));
|
||||
}
|
||||
|
||||
if ((fdrc->soleLitMsk & cmp) != fdrc->soleLitCmp) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
*last_match = id;
|
||||
*control = a->cb(i - len, i, id, a->ctxt);
|
||||
}
|
||||
scratch->fdr_conf = NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -30,10 +30,12 @@
|
||||
|
||||
#include "fdr_compile.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_dump.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "teddy_internal.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <cstdio>
|
||||
@ -43,7 +45,7 @@
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
using std::unique_ptr;
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
@ -58,33 +60,127 @@ bool fdrIsTeddy(const FDR *fdr) {
|
||||
return !getFdrDescription(engine);
|
||||
}
|
||||
|
||||
void fdrPrintStats(const FDR *fdr, FILE *f) {
|
||||
const bool isTeddy = fdrIsTeddy(fdr);
|
||||
static
|
||||
void dumpLitIndex(const FDRConfirm *fdrc, FILE *f) {
|
||||
const u32 *lit_index = getConfirmLitIndex(fdrc);
|
||||
u32 num_lits = 1U << fdrc->nBits;
|
||||
u32 lits_used = count_if(lit_index, lit_index + num_lits,
|
||||
[](u32 idx) { return idx != 0; });
|
||||
|
||||
if (isTeddy) {
|
||||
fprintf(f, "TEDDY: %u\n", fdr->engineID);
|
||||
} else {
|
||||
fprintf(f, "FDR: %u\n", fdr->engineID);
|
||||
fprintf(f, " load %u/%u (%0.2f%%)\n", lits_used, num_lits,
|
||||
(double)lits_used / (double)(num_lits)*100);
|
||||
}
|
||||
|
||||
static
|
||||
void dumpConfirms(const void *fdr_base, u32 conf_offset, u32 num_confirms,
|
||||
FILE *f) {
|
||||
const u32 *conf = (const u32 *)((const char *)fdr_base + conf_offset);
|
||||
for (u32 i = 0; i < num_confirms; i++) {
|
||||
const auto *fdrc = (const FDRConfirm *)((const char *)conf + conf[i]);
|
||||
fprintf(f, " confirm %u\n", i);
|
||||
fprintf(f, " andmsk 0x%016llx\n", fdrc->andmsk);
|
||||
fprintf(f, " mult 0x%016llx\n", fdrc->mult);
|
||||
fprintf(f, " nbits %u\n", fdrc->nBits);
|
||||
fprintf(f, " groups 0x%016llx\n", fdrc->groups);
|
||||
dumpLitIndex(fdrc, f);
|
||||
}
|
||||
}
|
||||
|
||||
if (isTeddy) {
|
||||
auto des = getTeddyDescription(fdr->engineID);
|
||||
if (des) {
|
||||
fprintf(f, " masks %u\n", des->numMasks);
|
||||
fprintf(f, " buckets %u\n", des->getNumBuckets());
|
||||
fprintf(f, " packed %s\n", des->packed ? "true" : "false");
|
||||
} else {
|
||||
fprintf(f, " <unknown engine>\n");
|
||||
static
|
||||
void dumpTeddyReinforced(const u8 *rmsk, const u32 num_tables, FILE *f) {
|
||||
// dump reinforcement masks
|
||||
for (u32 b = 0; b < num_tables; b++) {
|
||||
fprintf(f, " reinforcement table for bucket %u..%u:\n",
|
||||
b * 8, b * 8 + 7);
|
||||
for (u32 i = 0; i <= N_CHARS; i++) {
|
||||
fprintf(f, " 0x%02x: ", i);
|
||||
for (u32 j = 0; j < 8; j++) {
|
||||
u8 val = rmsk[b * ((N_CHARS + 1) * 8) + i * 8 + j];
|
||||
for (u32 k = 0; k < 8; k++) {
|
||||
fprintf(f, "%s", ((val >> k) & 0x1) ? "1" : "0");
|
||||
}
|
||||
fprintf(f, " ");
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
} else {
|
||||
fprintf(f, " domain %u\n", fdr->domain);
|
||||
fprintf(f, " stride %u\n", fdr->stride);
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dumpTeddyMasks(const u8 *baseMsk, u32 numMasks, u32 maskWidth, FILE *f) {
|
||||
// dump nibble masks
|
||||
fprintf(f, " nibble masks:\n");
|
||||
for (u32 i = 0; i < numMasks * 2; i++) {
|
||||
fprintf(f, " -%d%s: ", 1 + i / 2, (i % 2) ? "hi" : "lo");
|
||||
for (u32 j = 0; j < 16 * maskWidth; j++) {
|
||||
u8 val = baseMsk[i * 16 * maskWidth + j];
|
||||
for (u32 k = 0; k < 8; k++) {
|
||||
fprintf(f, "%s", ((val >> k) & 0x1) ? "1" : "0");
|
||||
}
|
||||
fprintf(f, " ");
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
static
|
||||
void dumpTeddy(const Teddy *teddy, FILE *f) {
|
||||
fprintf(f, "TEDDY: %u\n", teddy->engineID);
|
||||
auto des = getTeddyDescription(teddy->engineID);
|
||||
if (!des) {
|
||||
fprintf(f, " <unknown engine>\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(f, " strings ???\n");
|
||||
fprintf(f, " masks %u\n", des->numMasks);
|
||||
fprintf(f, " buckets %u\n", des->getNumBuckets());
|
||||
fprintf(f, " packed %s\n", des->packed ? "true" : "false");
|
||||
fprintf(f, " strings %u\n", teddy->numStrings);
|
||||
fprintf(f, " size %zu bytes\n", fdrSize((const FDR *)teddy));
|
||||
fprintf(f, " max length %u\n", teddy->maxStringLen);
|
||||
fprintf(f, " floodoff %u (%x)\n", teddy->floodOffset,
|
||||
teddy->floodOffset);
|
||||
fprintf(f, "\n");
|
||||
|
||||
u32 maskWidth = des->getNumBuckets() / 8;
|
||||
size_t headerSize = sizeof(Teddy);
|
||||
size_t maskLen = des->numMasks * 16 * 2 * maskWidth;
|
||||
const u8 *teddy_base = (const u8 *)teddy;
|
||||
const u8 *baseMsk = teddy_base + ROUNDUP_CL(headerSize);
|
||||
const u8 *rmsk = baseMsk + ROUNDUP_CL(maskLen);
|
||||
dumpTeddyMasks(baseMsk, des->numMasks, maskWidth, f);
|
||||
dumpTeddyReinforced(rmsk, maskWidth, f);
|
||||
dumpConfirms(teddy, teddy->confOffset, des->getNumBuckets(), f);
|
||||
}
|
||||
|
||||
static
|
||||
void dumpFDR(const FDR *fdr, FILE *f) {
|
||||
fprintf(f, "FDR: %u\n", fdr->engineID);
|
||||
auto des = getFdrDescription(fdr->engineID);
|
||||
if (!des) {
|
||||
fprintf(f, " <unknown engine>\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(f, " domain %u\n", fdr->domain);
|
||||
fprintf(f, " stride %u\n", fdr->stride);
|
||||
fprintf(f, " strings %u\n", fdr->numStrings);
|
||||
fprintf(f, " size %zu bytes\n", fdrSize(fdr));
|
||||
fprintf(f, " max length %u\n", fdr->maxStringLen);
|
||||
fprintf(f, " floodoff %u (%x)\n", fdr->floodOffset, fdr->floodOffset);
|
||||
fprintf(f, "\n");
|
||||
|
||||
dumpConfirms(fdr, fdr->confOffset, des->getNumBuckets(), f);
|
||||
}
|
||||
|
||||
void fdrPrintStats(const FDR *fdr, FILE *f) {
|
||||
if (fdrIsTeddy(fdr)) {
|
||||
dumpTeddy((const Teddy *)fdr, f);
|
||||
} else {
|
||||
dumpFDR(fdr, f);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -30,7 +30,6 @@
|
||||
#define FDR_ENGINE_DESCRIPTION_H
|
||||
|
||||
#include "engine_description.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
@ -36,6 +36,8 @@
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm.h" // for hwlm_group_t, HWLMCallback
|
||||
|
||||
struct hs_scratch;
|
||||
|
||||
typedef enum {
|
||||
NOT_CAUTIOUS, //!< not near a boundary (quantify?)
|
||||
VECTORING //!< potentially vectoring
|
||||
@ -56,7 +58,6 @@ struct FDRFlood {
|
||||
|
||||
u32 ids[FDR_FLOOD_MAX_IDS]; //!< the ids
|
||||
hwlm_group_t groups[FDR_FLOOD_MAX_IDS]; //!< group ids to go with string ids
|
||||
u32 len[FDR_FLOOD_MAX_IDS]; //!< lengths to go with the string ids
|
||||
};
|
||||
|
||||
/** \brief FDR structure.
|
||||
@ -69,19 +70,18 @@ struct FDR {
|
||||
u32 engineID;
|
||||
u32 size;
|
||||
u32 maxStringLen;
|
||||
u32 numStrings;
|
||||
u32 confOffset;
|
||||
u32 floodOffset;
|
||||
|
||||
u8 stride; /* stride - how frequeuntly the data is consulted by the first
|
||||
u8 stride; /* stride - how frequently the data is consulted by the first
|
||||
* stage matcher */
|
||||
u8 domain; /* number of bits used to index into main FDR table. This value
|
||||
* is used only of debugging/asserts. */
|
||||
u16 domainMask; /* pre-computed domain mask */
|
||||
u32 tabSize; /* pre-computed hashtable size in bytes */
|
||||
u32 pad;
|
||||
|
||||
m128 start; /* initial start state to use at offset 0. The state has been set
|
||||
* up based on the min length of buckets to reduce the need for
|
||||
* pointless confirms. */
|
||||
m128 start; /* initial start state to use at offset 0. The state has been
|
||||
* set up based on the min length of buckets to reduce the need
|
||||
* for pointless confirms. */
|
||||
};
|
||||
|
||||
/** \brief FDR runtime arguments.
|
||||
@ -97,7 +97,7 @@ struct FDR_Runtime_Args {
|
||||
size_t len_history;
|
||||
size_t start_offset;
|
||||
HWLMCallback cb;
|
||||
void *ctxt;
|
||||
struct hs_scratch *scratch;
|
||||
const u8 *firstFloodDetect;
|
||||
const u64a histBytes;
|
||||
};
|
||||
|
@ -82,11 +82,10 @@ void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
|
||||
fl.ids[fl.idCount] = lit.id;
|
||||
fl.allGroups |= lit.groups;
|
||||
fl.groups[fl.idCount] = lit.groups;
|
||||
fl.len[fl.idCount] = suffix;
|
||||
// when idCount gets to max_ids this flood no longer happens
|
||||
// only incremented one more time to avoid arithmetic overflow
|
||||
DEBUG_PRINTF("Added Flood for char '%c' suffix=%u len[%hu]=%u\n",
|
||||
c, fl.suffix, fl.idCount, suffix);
|
||||
c, fl.suffix, fl.idCount, suffix);
|
||||
fl.idCount++;
|
||||
}
|
||||
}
|
||||
@ -182,8 +181,7 @@ bytecode_ptr<u8> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
||||
printf("i is %02x fl->idCount is %hd fl->suffix is %d fl->allGroups is "
|
||||
"%016llx\n", i, fl.idCount, fl.suffix, fl.allGroups);
|
||||
for (u32 j = 0; j < fl.idCount; j++) {
|
||||
printf("j is %d fl.groups[j] %016llx fl.len[j] %d \n", j,
|
||||
fl.groups[j], fl.len[j]);
|
||||
printf("j is %d fl.groups[j] %016llx\n", j, fl.groups[j]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -94,7 +94,7 @@ const u8 * floodDetect(const struct FDR * fdr,
|
||||
const u8 * buf = a->buf;
|
||||
const size_t len = a->len;
|
||||
HWLMCallback cb = a->cb;
|
||||
void * ctxt = a->ctxt;
|
||||
struct hs_scratch *scratch = a->scratch;
|
||||
|
||||
const u8 * ptr = *ptrPtr;
|
||||
// tryFloodDetect is never put in places where unconditional
|
||||
@ -196,120 +196,110 @@ const u8 * floodDetect(const struct FDR * fdr,
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups);
|
||||
t += 4) {
|
||||
DEBUG_PRINTF("aaa %u %llx\n", t, fl->groups[0]);
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 0 - len0, i + t + 0, fl->ids[0], ctxt);
|
||||
*control = cb(i + t + 0, fl->ids[0], scratch);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
*control = cb(i + t + 1, fl->ids[0], scratch);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
|
||||
*control = cb(i + t + 2, fl->ids[0], scratch);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
|
||||
*control = cb(i + t + 3, fl->ids[0], scratch);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) {
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
u32 len1 = fl->len[1] - 1;
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
|
||||
*control = cb(i + t, fl->ids[0], scratch);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
|
||||
*control = cb(i + t, fl->ids[1], scratch);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control =
|
||||
cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
cb(i + t + 1, fl->ids[0], scratch);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
|
||||
*control = cb(i + t + 1, fl->ids[1], scratch);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
|
||||
*control = cb(i + t + 2, fl->ids[0], scratch);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 2 - len1, i + t + 2, fl->ids[1], ctxt);
|
||||
*control = cb(i + t + 2, fl->ids[1], scratch);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
|
||||
*control = cb(i + t + 3, fl->ids[0], scratch);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 3 - len1, i + t + 3, fl->ids[1], ctxt);
|
||||
*control = cb(i + t + 3, fl->ids[1], scratch);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
u32 len1 = fl->len[1] - 1;
|
||||
u32 len2 = fl->len[2] - 1;
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
|
||||
*control = cb(i + t, fl->ids[0], scratch);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
|
||||
*control = cb(i + t, fl->ids[1], scratch);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
|
||||
*control = cb(i + t, fl->ids[2], scratch);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
*control = cb(i + t + 1, fl->ids[0], scratch);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
|
||||
*control = cb(i + t + 1, fl->ids[1], scratch);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
|
||||
*control = cb(i + t + 1, fl->ids[2], scratch);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// slow generalized loop
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
u32 len1 = fl->len[1] - 1;
|
||||
u32 len2 = fl->len[2] - 1;
|
||||
u32 len3 = fl->len[3] - 1;
|
||||
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
|
||||
*control = cb(i + t, fl->ids[0], scratch);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
|
||||
*control = cb(i + t, fl->ids[1], scratch);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
|
||||
*control = cb(i + t, fl->ids[2], scratch);
|
||||
}
|
||||
if (*control & fl->groups[3]) {
|
||||
*control = cb(i + t - len3, i + t, fl->ids[3], ctxt);
|
||||
*control = cb(i + t, fl->ids[3], scratch);
|
||||
}
|
||||
|
||||
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
|
||||
if (*control & fl->groups[t2]) {
|
||||
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
|
||||
*control = cb(i + t, fl->ids[t2], scratch);
|
||||
}
|
||||
}
|
||||
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
*control = cb(i + t + 1, fl->ids[0], scratch);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
|
||||
*control = cb(i + t + 1, fl->ids[1], scratch);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
|
||||
*control = cb(i + t + 1, fl->ids[2], scratch);
|
||||
}
|
||||
if (*control & fl->groups[3]) {
|
||||
*control = cb(i + t + 1 - len3, i + t + 1, fl->ids[3], ctxt);
|
||||
*control = cb(i + t + 1, fl->ids[3], scratch);
|
||||
}
|
||||
|
||||
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
|
||||
if (*control & fl->groups[t2]) {
|
||||
*control = cb(i + t + 1 - (fl->len[t2] - 1), i + t + 1, fl->ids[t2], ctxt);
|
||||
*control = cb(i + t + 1, fl->ids[t2], scratch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -320,7 +310,7 @@ const u8 * floodDetect(const struct FDR * fdr,
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) {
|
||||
for (u32 t2 = 0; t2 < fl->idCount; t2++) {
|
||||
if (*control & fl->groups[t2]) {
|
||||
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
|
||||
*control = cb(i + t, fl->ids[t2], scratch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
1209
src/fdr/teddy.c
1209
src/fdr/teddy.c
File diff suppressed because it is too large
Load Diff
@ -73,37 +73,37 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
|
||||
|
||||
#if defined(HAVE_AVX2)
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
hwlm_error_t fdr_exec_fat_teddy_msks1(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
hwlm_error_t fdr_exec_fat_teddy_msks1_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
hwlm_error_t fdr_exec_fat_teddy_msks2(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
hwlm_error_t fdr_exec_fat_teddy_msks2_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
hwlm_error_t fdr_exec_fat_teddy_msks3(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
hwlm_error_t fdr_exec_fat_teddy_msks3_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
hwlm_error_t fdr_exec_fat_teddy_msks4(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
hwlm_error_t fdr_exec_fat_teddy_msks4_pck(const struct FDR *fdr,
|
||||
const struct FDR_Runtime_Args *a,
|
||||
hwlm_group_t control);
|
||||
|
||||
#endif /* HAVE_AVX2 */
|
||||
|
||||
|
1135
src/fdr/teddy_avx2.c
1135
src/fdr/teddy_avx2.c
File diff suppressed because it is too large
Load Diff
@ -42,10 +42,14 @@
|
||||
#include "teddy_engine_description.h"
|
||||
#include "grey.h"
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/container.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/noncopyable.h"
|
||||
#include "util/popcount.h"
|
||||
#include "util/small_vector.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
@ -69,38 +73,58 @@ namespace {
|
||||
|
||||
//#define TEDDY_DEBUG
|
||||
|
||||
/** \brief Max number of Teddy masks we use. */
|
||||
static constexpr size_t MAX_NUM_MASKS = 4;
|
||||
|
||||
class TeddyCompiler : noncopyable {
|
||||
const TeddyEngineDescription ŋ
|
||||
const Grey &grey;
|
||||
const vector<hwlmLiteral> &lits;
|
||||
map<BucketIndex, std::vector<LiteralIndex>> bucketToLits;
|
||||
bool make_small;
|
||||
|
||||
public:
|
||||
TeddyCompiler(const vector<hwlmLiteral> &lits_in,
|
||||
map<BucketIndex, std::vector<LiteralIndex>> bucketToLits_in,
|
||||
const TeddyEngineDescription &eng_in, bool make_small_in,
|
||||
const Grey &grey_in)
|
||||
: eng(eng_in), grey(grey_in), lits(lits_in), make_small(make_small_in) {
|
||||
}
|
||||
: eng(eng_in), grey(grey_in), lits(lits_in),
|
||||
bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
|
||||
|
||||
bytecode_ptr<FDR> build();
|
||||
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
|
||||
};
|
||||
|
||||
class TeddySet {
|
||||
/**
|
||||
* \brief Estimate of the max number of literals in a set, used to
|
||||
* minimise allocations.
|
||||
*/
|
||||
static constexpr size_t LITS_PER_SET = 20;
|
||||
|
||||
/** \brief Number of masks. */
|
||||
u32 len;
|
||||
// nibbleSets is a series of bitfields over 16 predicates
|
||||
// that represent the whether shufti nibble set
|
||||
// so for num_masks = 4 we will represent our strings by
|
||||
// 8 u16s in the vector that indicate what a shufti bucket
|
||||
// would have to look like
|
||||
vector<u16> nibbleSets;
|
||||
set<u32> litIds;
|
||||
|
||||
/**
|
||||
* \brief A series of bitfields over 16 predicates that represent the
|
||||
* shufti nibble set.
|
||||
*
|
||||
* So for num_masks = 4 we will represent our strings by 8 u16s in the
|
||||
* vector that indicate what a shufti bucket would have to look like.
|
||||
*/
|
||||
small_vector<u16, MAX_NUM_MASKS * 2> nibbleSets;
|
||||
|
||||
/**
|
||||
* \brief Sorted, unique set of literals. We maintain our own set in a
|
||||
* sorted vector to minimise allocations.
|
||||
*/
|
||||
small_vector<u32, LITS_PER_SET> litIds;
|
||||
|
||||
public:
|
||||
explicit TeddySet(u32 len_in) : len(len_in), nibbleSets(len_in * 2, 0) {}
|
||||
const set<u32> & getLits() const { return litIds; }
|
||||
size_t litCount() const { return litIds.size(); }
|
||||
const small_vector<u32, LITS_PER_SET> &getLits() const { return litIds; }
|
||||
|
||||
bool operator<(const TeddySet & s) const {
|
||||
bool operator<(const TeddySet &s) const {
|
||||
return litIds < s.litIds;
|
||||
}
|
||||
|
||||
@ -116,11 +140,11 @@ public:
|
||||
printf("%u ", id);
|
||||
}
|
||||
printf("\n");
|
||||
printf("Flood prone : %s\n", isRunProne()?"yes":"no");
|
||||
printf("Flood prone : %s\n", isRunProne() ? "yes" : "no");
|
||||
}
|
||||
#endif
|
||||
|
||||
bool identicalTail(const TeddySet & ts) const {
|
||||
bool identicalTail(const TeddySet &ts) const {
|
||||
return nibbleSets == ts.nibbleSets;
|
||||
}
|
||||
|
||||
@ -131,24 +155,19 @@ public:
|
||||
u8 c = s[s.size() - i - 1];
|
||||
u8 c_hi = (c >> 4) & 0xf;
|
||||
u8 c_lo = c & 0xf;
|
||||
nibbleSets[i*2] = 1 << c_lo;
|
||||
nibbleSets[i * 2] = 1 << c_lo;
|
||||
if (lit.nocase && ourisalpha(c)) {
|
||||
nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2));
|
||||
nibbleSets[i * 2 + 1] =
|
||||
(1 << (c_hi & 0xd)) | (1 << (c_hi | 0x2));
|
||||
} else {
|
||||
nibbleSets[i*2+1] = 1 << c_hi;
|
||||
nibbleSets[i * 2 + 1] = 1 << c_hi;
|
||||
}
|
||||
} else {
|
||||
nibbleSets[i*2] = nibbleSets[i*2+1] = 0xffff;
|
||||
nibbleSets[i * 2] = nibbleSets[i * 2 + 1] = 0xffff;
|
||||
}
|
||||
}
|
||||
litIds.insert(lit_id);
|
||||
}
|
||||
|
||||
void merge(const TeddySet &ts) {
|
||||
for (u32 i = 0; i < nibbleSets.size(); i++) {
|
||||
nibbleSets[i] |= ts.nibbleSets[i];
|
||||
}
|
||||
litIds.insert(ts.litIds.begin(), ts.litIds.end());
|
||||
litIds.push_back(lit_id);
|
||||
sort_and_unique(litIds);
|
||||
}
|
||||
|
||||
// return a value p from 0 .. MAXINT64 that gives p/MAXINT64
|
||||
@ -167,15 +186,15 @@ public:
|
||||
// a small fixed cost + the cost of traversing some sort of followup
|
||||
// (assumption is that the followup is linear)
|
||||
u64a heuristic() const {
|
||||
return probability() * (2+litCount());
|
||||
return probability() * (2 + litCount());
|
||||
}
|
||||
|
||||
bool isRunProne() const {
|
||||
u16 lo_and = 0xffff;
|
||||
u16 hi_and = 0xffff;
|
||||
for (u32 i = 0; i < len; i++) {
|
||||
lo_and &= nibbleSets[i*2];
|
||||
hi_and &= nibbleSets[i*2+1];
|
||||
lo_and &= nibbleSets[i * 2];
|
||||
hi_and &= nibbleSets[i * 2 + 1];
|
||||
}
|
||||
// we're not flood-prone if there's no way to get
|
||||
// through with a flood
|
||||
@ -184,10 +203,27 @@ public:
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
friend TeddySet merge(const TeddySet &a, const TeddySet &b) {
|
||||
assert(a.nibbleSets.size() == b.nibbleSets.size());
|
||||
|
||||
TeddySet m(a);
|
||||
|
||||
for (size_t i = 0; i < m.nibbleSets.size(); i++) {
|
||||
m.nibbleSets[i] |= b.nibbleSets[i];
|
||||
}
|
||||
|
||||
m.litIds.insert(m.litIds.end(), b.litIds.begin(), b.litIds.end());
|
||||
sort_and_unique(m.litIds);
|
||||
|
||||
return m;
|
||||
}
|
||||
};
|
||||
|
||||
bool TeddyCompiler::pack(map<BucketIndex,
|
||||
std::vector<LiteralIndex> > &bucketToLits) {
|
||||
static
|
||||
bool pack(const vector<hwlmLiteral> &lits,
|
||||
const TeddyEngineDescription &eng,
|
||||
map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits) {
|
||||
set<TeddySet> sts;
|
||||
|
||||
for (u32 i = 0; i < lits.size(); i++) {
|
||||
@ -200,7 +236,8 @@ bool TeddyCompiler::pack(map<BucketIndex,
|
||||
#ifdef TEDDY_DEBUG
|
||||
printf("Size %zu\n", sts.size());
|
||||
for (const TeddySet &ts : sts) {
|
||||
printf("\n"); ts.dump();
|
||||
printf("\n");
|
||||
ts.dump();
|
||||
}
|
||||
printf("\n===============================================\n");
|
||||
#endif
|
||||
@ -220,9 +257,7 @@ bool TeddyCompiler::pack(map<BucketIndex,
|
||||
continue;
|
||||
}
|
||||
|
||||
TeddySet tmpSet(eng.numMasks);
|
||||
tmpSet.merge(s1);
|
||||
tmpSet.merge(s2);
|
||||
TeddySet tmpSet = merge(s1, s2);
|
||||
u64a newScore = tmpSet.heuristic();
|
||||
u64a oldScore = s1.heuristic() + s2.heuristic();
|
||||
if (newScore < oldScore) {
|
||||
@ -250,9 +285,7 @@ bool TeddyCompiler::pack(map<BucketIndex,
|
||||
}
|
||||
|
||||
// do the merge
|
||||
TeddySet nts(eng.numMasks);
|
||||
nts.merge(*m1);
|
||||
nts.merge(*m2);
|
||||
TeddySet nts = merge(*m1, *m2);
|
||||
#ifdef TEDDY_DEBUG
|
||||
printf("Merging\n");
|
||||
printf("m1 = \n");
|
||||
@ -282,66 +315,51 @@ bool TeddyCompiler::pack(map<BucketIndex,
|
||||
return true;
|
||||
}
|
||||
|
||||
bytecode_ptr<FDR> TeddyCompiler::build() {
|
||||
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
|
||||
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
|
||||
return nullptr;
|
||||
}
|
||||
// this entry has all-zero mask to skip reinforcement
|
||||
#define NO_REINFORCEMENT N_CHARS
|
||||
|
||||
#ifdef TEDDY_DEBUG
|
||||
for (size_t i = 0; i < lits.size(); i++) {
|
||||
printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(),
|
||||
lits[i].nocase ? "caseless" : "caseful");
|
||||
for (size_t j = 0; j < lits[i].s.size(); j++) {
|
||||
printf("%02x", ((u32)lits[i].s[j])&0xff);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
// this means every entry in reinforcement table
|
||||
#define ALL_CHAR_SET N_CHARS
|
||||
|
||||
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
|
||||
if(eng.needConfirm(lits)) {
|
||||
if (!pack(bucketToLits)) {
|
||||
DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
|
||||
lits.size(), eng.getNumBuckets());
|
||||
return nullptr;
|
||||
// each item's reinforcement mask has REINFORCED_MSK_LEN bytes
|
||||
#define REINFORCED_MSK_LEN 8
|
||||
|
||||
// reinforcement table size for each 8 buckets set
|
||||
#define RTABLE_SIZE ((N_CHARS + 1) * REINFORCED_MSK_LEN)
|
||||
|
||||
static
|
||||
void initReinforcedTable(u8 *rmsk) {
|
||||
u64a *mask = (u64a *)rmsk;
|
||||
fill_n(mask, N_CHARS, 0x00ffffffffffffffULL);
|
||||
}
|
||||
|
||||
static
|
||||
void fillReinforcedMskZero(u8 *rmsk) {
|
||||
u8 *mc = rmsk + NO_REINFORCEMENT * REINFORCED_MSK_LEN;
|
||||
fill_n(mc, REINFORCED_MSK_LEN, 0x00);
|
||||
}
|
||||
|
||||
static
|
||||
void fillReinforcedMsk(u8 *rmsk, u16 c, u32 j, u8 bmsk) {
|
||||
assert(j > 0);
|
||||
if (c == ALL_CHAR_SET) {
|
||||
for (size_t i = 0; i < N_CHARS; i++) {
|
||||
u8 *mc = rmsk + i * REINFORCED_MSK_LEN;
|
||||
mc[j - 1] &= ~bmsk;
|
||||
}
|
||||
} else {
|
||||
for (u32 i = 0; i < lits.size(); i++) {
|
||||
bucketToLits[i].push_back(i);
|
||||
}
|
||||
u8 *mc = rmsk + c * REINFORCED_MSK_LEN;
|
||||
mc[j - 1] &= ~bmsk;
|
||||
}
|
||||
u32 maskWidth = eng.getNumBuckets() / 8;
|
||||
}
|
||||
|
||||
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
|
||||
|
||||
auto floodControlTmp = setupFDRFloodControl(lits, eng, grey);
|
||||
auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small);
|
||||
|
||||
size_t size = ROUNDUP_N(sizeof(Teddy) +
|
||||
maskLen +
|
||||
confirmTmp.size() +
|
||||
floodControlTmp.size(),
|
||||
16 * maskWidth);
|
||||
|
||||
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
|
||||
assert(fdr); // otherwise would have thrown std::bad_alloc
|
||||
Teddy *teddy = (Teddy *)fdr.get(); // ugly
|
||||
u8 *teddy_base = (u8 *)teddy;
|
||||
|
||||
teddy->size = size;
|
||||
teddy->engineID = eng.getID();
|
||||
teddy->maxStringLen = verify_u32(maxLen(lits));
|
||||
|
||||
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
|
||||
memcpy(ptr, confirmTmp.get(), confirmTmp.size());
|
||||
ptr += confirmTmp.size();
|
||||
|
||||
teddy->floodOffset = verify_u32(ptr - teddy_base);
|
||||
memcpy(ptr, floodControlTmp.get(), floodControlTmp.size());
|
||||
ptr += floodControlTmp.size();
|
||||
|
||||
u8 *baseMsk = teddy_base + sizeof(Teddy);
|
||||
static
|
||||
void fillNibbleMasks(const map<BucketIndex,
|
||||
vector<LiteralIndex>> &bucketToLits,
|
||||
const vector<hwlmLiteral> &lits,
|
||||
u32 numMasks, u32 maskWidth, size_t maskLen,
|
||||
u8 *baseMsk) {
|
||||
memset(baseMsk, 0xff, maskLen);
|
||||
|
||||
for (const auto &b2l : bucketToLits) {
|
||||
const u32 &bucket_id = b2l.first;
|
||||
@ -354,16 +372,18 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
|
||||
const u32 sz = verify_u32(l.s.size());
|
||||
|
||||
// fill in masks
|
||||
for (u32 j = 0; j < eng.numMasks; j++) {
|
||||
u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8);
|
||||
u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8);
|
||||
for (u32 j = 0; j < numMasks; j++) {
|
||||
const u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8);
|
||||
const u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8);
|
||||
const u32 lo_base = msk_id_lo * 16;
|
||||
const u32 hi_base = msk_id_hi * 16;
|
||||
|
||||
// if we don't have a char at this position, fill in i
|
||||
// locations in these masks with '1'
|
||||
if (j >= sz) {
|
||||
for (u32 n = 0; n < 16; n++) {
|
||||
baseMsk[msk_id_lo * 16 + n] |= bmsk;
|
||||
baseMsk[msk_id_hi * 16 + n] |= bmsk;
|
||||
baseMsk[lo_base + n] &= ~bmsk;
|
||||
baseMsk[hi_base + n] &= ~bmsk;
|
||||
}
|
||||
} else {
|
||||
u8 c = l.s[sz - 1 - j];
|
||||
@ -382,51 +402,173 @@ bytecode_ptr<FDR> TeddyCompiler::build() {
|
||||
|
||||
for (u8 cm = 0; cm < 0x10; cm++) {
|
||||
if ((cm & m_lo) == (cmp_lo & m_lo)) {
|
||||
baseMsk[msk_id_lo * 16 + cm] |= bmsk;
|
||||
baseMsk[lo_base + cm] &= ~bmsk;
|
||||
}
|
||||
if ((cm & m_hi) == (cmp_hi & m_hi)) {
|
||||
baseMsk[msk_id_hi * 16 + cm] |= bmsk;
|
||||
baseMsk[hi_base + cm] &= ~bmsk;
|
||||
}
|
||||
}
|
||||
} else{
|
||||
} else {
|
||||
if (l.nocase && ourisalpha(c)) {
|
||||
u32 cmHalfClear = (0xdf >> hiShift) & 0xf;
|
||||
u32 cmHalfSet = (0x20 >> hiShift) & 0xf;
|
||||
baseMsk[msk_id_hi * 16 + (n_hi & cmHalfClear)] |= bmsk;
|
||||
baseMsk[msk_id_hi * 16 + (n_hi | cmHalfSet )] |= bmsk;
|
||||
u32 cmHalfSet = (0x20 >> hiShift) & 0xf;
|
||||
baseMsk[hi_base + (n_hi & cmHalfClear)] &= ~bmsk;
|
||||
baseMsk[hi_base + (n_hi | cmHalfSet)] &= ~bmsk;
|
||||
} else {
|
||||
baseMsk[msk_id_hi * 16 + n_hi] |= bmsk;
|
||||
baseMsk[hi_base + n_hi] &= ~bmsk;
|
||||
}
|
||||
baseMsk[msk_id_lo * 16 + n_lo] |= bmsk;
|
||||
baseMsk[lo_base + n_lo] &= ~bmsk;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void fillReinforcedTable(const map<BucketIndex,
|
||||
vector<LiteralIndex>> &bucketToLits,
|
||||
const vector<hwlmLiteral> &lits,
|
||||
u8 *rtable_base, const u32 num_tables) {
|
||||
vector<u8 *> tables;
|
||||
for (u32 i = 0; i < num_tables; i++) {
|
||||
tables.push_back(rtable_base + i * RTABLE_SIZE);
|
||||
}
|
||||
|
||||
for (auto t : tables) {
|
||||
initReinforcedTable(t);
|
||||
}
|
||||
|
||||
for (const auto &b2l : bucketToLits) {
|
||||
const u32 &bucket_id = b2l.first;
|
||||
const vector<LiteralIndex> &ids = b2l.second;
|
||||
u8 *rmsk = tables[bucket_id / 8];
|
||||
const u8 bmsk = 1U << (bucket_id % 8);
|
||||
|
||||
for (const LiteralIndex &lit_id : ids) {
|
||||
const hwlmLiteral &l = lits[lit_id];
|
||||
DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
|
||||
const u32 sz = verify_u32(l.s.size());
|
||||
|
||||
// fill in reinforced masks
|
||||
for (u32 j = 1; j < REINFORCED_MSK_LEN; j++) {
|
||||
if (sz - 1 < j) {
|
||||
fillReinforcedMsk(rmsk, ALL_CHAR_SET, j, bmsk);
|
||||
} else {
|
||||
u8 c = l.s[sz - 1 - j];
|
||||
if (l.nocase && ourisalpha(c)) {
|
||||
u8 c_up = c & 0xdf;
|
||||
fillReinforcedMsk(rmsk, c_up, j, bmsk);
|
||||
u8 c_lo = c | 0x20;
|
||||
fillReinforcedMsk(rmsk, c_lo, j, bmsk);
|
||||
} else {
|
||||
fillReinforcedMsk(rmsk, c, j, bmsk);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto t : tables) {
|
||||
fillReinforcedMskZero(t);
|
||||
}
|
||||
}
|
||||
|
||||
bytecode_ptr<FDR> TeddyCompiler::build() {
|
||||
u32 maskWidth = eng.getNumBuckets() / 8;
|
||||
|
||||
size_t headerSize = sizeof(Teddy);
|
||||
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
|
||||
size_t reinforcedMaskLen = RTABLE_SIZE * maskWidth;
|
||||
|
||||
auto floodTable = setupFDRFloodControl(lits, eng, grey);
|
||||
auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
|
||||
|
||||
// Note: we place each major structure here on a cacheline boundary.
|
||||
size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) +
|
||||
ROUNDUP_CL(reinforcedMaskLen) +
|
||||
ROUNDUP_CL(confirmTable.size()) + floodTable.size();
|
||||
|
||||
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
|
||||
assert(fdr); // otherwise would have thrown std::bad_alloc
|
||||
Teddy *teddy = (Teddy *)fdr.get(); // ugly
|
||||
u8 *teddy_base = (u8 *)teddy;
|
||||
|
||||
// Write header.
|
||||
teddy->size = size;
|
||||
teddy->engineID = eng.getID();
|
||||
teddy->maxStringLen = verify_u32(maxLen(lits));
|
||||
teddy->numStrings = verify_u32(lits.size());
|
||||
|
||||
// Write confirm structures.
|
||||
u8 *ptr = teddy_base + ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) +
|
||||
ROUNDUP_CL(reinforcedMaskLen);
|
||||
assert(ISALIGNED_CL(ptr));
|
||||
teddy->confOffset = verify_u32(ptr - teddy_base);
|
||||
memcpy(ptr, confirmTable.get(), confirmTable.size());
|
||||
ptr += ROUNDUP_CL(confirmTable.size());
|
||||
|
||||
// Write flood control structures.
|
||||
assert(ISALIGNED_CL(ptr));
|
||||
teddy->floodOffset = verify_u32(ptr - teddy_base);
|
||||
memcpy(ptr, floodTable.get(), floodTable.size());
|
||||
ptr += floodTable.size();
|
||||
|
||||
// Write teddy masks.
|
||||
u8 *baseMsk = teddy_base + ROUNDUP_CL(headerSize);
|
||||
fillNibbleMasks(bucketToLits, lits, eng.numMasks, maskWidth, maskLen,
|
||||
baseMsk);
|
||||
|
||||
// Write reinforcement masks.
|
||||
u8 *reinforcedMsk = baseMsk + ROUNDUP_CL(maskLen);
|
||||
fillReinforcedTable(bucketToLits, lits, reinforcedMsk, maskWidth);
|
||||
|
||||
return fdr;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
bool assignStringsToBuckets(
|
||||
const vector<hwlmLiteral> &lits,
|
||||
TeddyEngineDescription &eng,
|
||||
map<BucketIndex, vector<LiteralIndex>> &bucketToLits) {
|
||||
assert(eng.numMasks <= MAX_NUM_MASKS);
|
||||
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
|
||||
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef TEDDY_DEBUG
|
||||
for (u32 i = 0; i < eng.numMasks * 2; i++) {
|
||||
for (u32 j = 0; j < 16; j++) {
|
||||
u8 val = baseMsk[i * 16 + j];
|
||||
for (u32 k = 0; k < 8; k++) {
|
||||
printf("%s", ((val >> k) & 0x1) ? "1" : "0");
|
||||
}
|
||||
printf(" ");
|
||||
for (size_t i = 0; i < lits.size(); i++) {
|
||||
printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(),
|
||||
lits[i].nocase ? "caseless" : "caseful");
|
||||
for (size_t j = 0; j < lits[i].s.size(); j++) {
|
||||
printf("%02x", ((u32)lits[i].s[j])&0xff);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
return fdr;
|
||||
if (!pack(lits, eng, bucketToLits)) {
|
||||
DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
|
||||
lits.size(), eng.getNumBuckets());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bytecode_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
|
||||
bool make_small, u32 hint,
|
||||
const target_t &target,
|
||||
const Grey &grey) {
|
||||
bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey) {
|
||||
TeddyCompiler tc(proto.lits, proto.bucketToLits, *(proto.teddyEng),
|
||||
proto.make_small, grey);
|
||||
return tc.build();
|
||||
}
|
||||
|
||||
|
||||
unique_ptr<HWLMProto> teddyBuildProtoHinted(
|
||||
u8 engType, const vector<hwlmLiteral> &lits,
|
||||
bool make_small, u32 hint, const target_t &target) {
|
||||
unique_ptr<TeddyEngineDescription> des;
|
||||
if (hint == HINT_INVALID) {
|
||||
des = chooseTeddyEngine(target, lits);
|
||||
@ -436,8 +578,14 @@ bytecode_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
|
||||
if (!des) {
|
||||
return nullptr;
|
||||
}
|
||||
TeddyCompiler tc(lits, *des, make_small, grey);
|
||||
return tc.build();
|
||||
|
||||
map<BucketIndex, std::vector<LiteralIndex>> bucketToLits;
|
||||
if (!assignStringsToBuckets(lits, *des, bucketToLits)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return ue2::make_unique<HWLMProto>(engType, move(des), lits,
|
||||
bucketToLits, make_small);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -35,6 +35,7 @@
|
||||
#define TEDDY_COMPILE_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
|
||||
#include <vector>
|
||||
@ -43,15 +44,16 @@ struct FDR;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class TeddyEngineDescription;
|
||||
struct Grey;
|
||||
struct hwlmLiteral;
|
||||
struct target_t;
|
||||
|
||||
bytecode_ptr<FDR> teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits,
|
||||
bool make_small, u32 hint,
|
||||
const target_t &target,
|
||||
const Grey &grey);
|
||||
bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey);
|
||||
|
||||
std::unique_ptr<HWLMProto> teddyBuildProtoHinted(
|
||||
u8 engType, const std::vector<hwlmLiteral> &lits,
|
||||
bool make_small, u32 hint, const target_t &target);
|
||||
} // namespace ue2
|
||||
|
||||
#endif // TEDDY_COMPILE_H
|
||||
|
@ -51,18 +51,6 @@ u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
|
||||
return numMasks;
|
||||
}
|
||||
|
||||
bool TeddyEngineDescription::needConfirm(const vector<hwlmLiteral> &lits) const {
|
||||
if (packed || lits.size() > getNumBuckets()) {
|
||||
return true;
|
||||
}
|
||||
for (const auto &lit : lits) {
|
||||
if (lit.s.size() > numMasks || !lit.msk.empty()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {
|
||||
static const TeddyEngineDef defns[] = {
|
||||
{ 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false },
|
||||
|
@ -55,7 +55,6 @@ public:
|
||||
explicit TeddyEngineDescription(const TeddyEngineDef &def);
|
||||
|
||||
u32 getDefaultFloodSuffixLength() const override;
|
||||
bool needConfirm(const std::vector<hwlmLiteral> &lits) const;
|
||||
};
|
||||
|
||||
std::unique_ptr<TeddyEngineDescription>
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -26,6 +26,28 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Teddy bytecode layout:
|
||||
* * |-----|
|
||||
* * | | struct Teddy
|
||||
* * |-----|
|
||||
* * | | teddy masks
|
||||
* * | |
|
||||
* * |-----|
|
||||
* * | | reinforcement mask table for bucket 0..7
|
||||
* * | |
|
||||
* * |-----|
|
||||
* * | | reinforcement mask table for bucket 8..15 (FAT teddy)
|
||||
* * | |
|
||||
* * |-----|
|
||||
* * | | confirm
|
||||
* * | |
|
||||
* * | |
|
||||
* * |-----|
|
||||
* * | | flood control
|
||||
* * | |
|
||||
* * |-----|
|
||||
*/
|
||||
|
||||
#ifndef TEDDY_INTERNAL_H
|
||||
#define TEDDY_INTERNAL_H
|
||||
|
||||
@ -36,11 +58,9 @@ struct Teddy {
|
||||
u32 engineID;
|
||||
u32 size;
|
||||
u32 maxStringLen;
|
||||
u32 numStrings;
|
||||
u32 confOffset;
|
||||
u32 floodOffset;
|
||||
u32 link;
|
||||
u32 pad1;
|
||||
u32 pad2;
|
||||
u32 pad3;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -38,8 +38,12 @@
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/uniform_ops.h"
|
||||
|
||||
extern const u8 ALIGN_DIRECTIVE p_mask_arr[17][32];
|
||||
#if defined(HAVE_AVX2)
|
||||
extern const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64];
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_64_BIT
|
||||
#define TEDDY_CONF_TYPE u64a
|
||||
@ -110,8 +114,27 @@ void copyRuntBlock128(u8 *dst, const u8 *src, size_t len) {
|
||||
}
|
||||
|
||||
// Note: p_mask is an output param that initialises a poison mask.
|
||||
// *p_mask = load128(p_mask_arr[n] + 16 - m) means:
|
||||
// m byte 0xff in the beginning, followed by n byte 0x00,
|
||||
// then followed by the rest bytes 0xff.
|
||||
// ptr >= lo:
|
||||
// no history.
|
||||
// for end/short zone, ptr==lo and start_offset==0
|
||||
// for start zone, see below
|
||||
// lo ptr hi hi
|
||||
// |----------|-------|----------------|............|
|
||||
// -start 0 -start+offset MIN(avail,16)
|
||||
// p_mask ffff..ff0000...........00ffff..........
|
||||
// ptr < lo:
|
||||
// only start zone.
|
||||
// history
|
||||
// ptr lo hi hi
|
||||
// |----------|-------|----------------|............|
|
||||
// 0 start start+offset end(<=16)
|
||||
// p_mask ffff.....ffffff..ff0000...........00ffff..........
|
||||
static really_inline
|
||||
m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const size_t start_offset,
|
||||
const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history,
|
||||
const u32 nMasks) {
|
||||
union {
|
||||
@ -123,27 +146,34 @@ m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
uintptr_t copy_start;
|
||||
uintptr_t copy_len;
|
||||
|
||||
if (ptr >= lo) {
|
||||
if (ptr >= lo) { // short/end/start zone
|
||||
uintptr_t start = (uintptr_t)(ptr - lo);
|
||||
uintptr_t avail = (uintptr_t)(hi - ptr);
|
||||
if (avail >= 16) {
|
||||
*p_mask = load128(p_mask_arr[16] + 16);
|
||||
assert(start_offset - start <= 16);
|
||||
*p_mask = loadu128(p_mask_arr[16 - start_offset + start]
|
||||
+ 16 - start_offset + start);
|
||||
return loadu128(ptr);
|
||||
}
|
||||
*p_mask = load128(p_mask_arr[avail] + 16);
|
||||
assert(start_offset - start <= avail);
|
||||
*p_mask = loadu128(p_mask_arr[avail - start_offset + start]
|
||||
+ 16 - start_offset + start);
|
||||
copy_start = 0;
|
||||
copy_len = avail;
|
||||
} else {
|
||||
} else { // start zone
|
||||
uintptr_t need = MIN((uintptr_t)(lo - ptr),
|
||||
MIN(len_history, nMasks - 1));
|
||||
uintptr_t start = (uintptr_t)(lo - ptr);
|
||||
uintptr_t i;
|
||||
for (i = start - need; ptr + i < lo; i++) {
|
||||
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
|
||||
for (i = start - need; i < start; i++) {
|
||||
u.val8[i] = buf_history[len_history - (start - i)];
|
||||
}
|
||||
uintptr_t end = MIN(16, (uintptr_t)(hi - ptr));
|
||||
*p_mask = loadu128(p_mask_arr[end - start] + 16 - start);
|
||||
copy_start = i;
|
||||
copy_len = end - i;
|
||||
assert(start + start_offset <= end);
|
||||
*p_mask = loadu128(p_mask_arr[end - start - start_offset]
|
||||
+ 16 - start - start_offset);
|
||||
copy_start = start;
|
||||
copy_len = end - start;
|
||||
}
|
||||
|
||||
// Runt block from the buffer.
|
||||
@ -152,6 +182,205 @@ m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
#if defined(HAVE_AVX2)
|
||||
/*
|
||||
* \brief Copy a block of [0,31] bytes efficiently.
|
||||
*
|
||||
* This function is a workaround intended to stop some compilers from
|
||||
* synthesizing a memcpy function call out of the copy of a small number of
|
||||
* bytes that we do in vectoredLoad256.
|
||||
*/
|
||||
static really_inline
|
||||
void copyRuntBlock256(u8 *dst, const u8 *src, size_t len) {
|
||||
switch (len) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
*dst = *src;
|
||||
break;
|
||||
case 2:
|
||||
unaligned_store_u16(dst, unaligned_load_u16(src));
|
||||
break;
|
||||
case 3:
|
||||
unaligned_store_u16(dst, unaligned_load_u16(src));
|
||||
dst[2] = src[2];
|
||||
break;
|
||||
case 4:
|
||||
unaligned_store_u32(dst, unaligned_load_u32(src));
|
||||
break;
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
/* Perform copy with two overlapping 4-byte chunks. */
|
||||
unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4));
|
||||
unaligned_store_u32(dst, unaligned_load_u32(src));
|
||||
break;
|
||||
case 8:
|
||||
unaligned_store_u64a(dst, unaligned_load_u64a(src));
|
||||
break;
|
||||
case 9:
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
/* Perform copy with two overlapping 8-byte chunks. */
|
||||
unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8));
|
||||
unaligned_store_u64a(dst, unaligned_load_u64a(src));
|
||||
break;
|
||||
case 16:
|
||||
storeu128(dst, loadu128(src));
|
||||
break;
|
||||
default:
|
||||
/* Perform copy with two overlapping 16-byte chunks. */
|
||||
assert(len < 32);
|
||||
storeu128(dst + len - 16, loadu128(src + len - 16));
|
||||
storeu128(dst, loadu128(src));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: p_mask is an output param that initialises a poison mask.
|
||||
// *p_mask = load256(p_mask_arr256[n] + 32 - m) means:
|
||||
// m byte 0xff in the beginning, followed by n byte 0x00,
|
||||
// then followed by the rest bytes 0xff.
|
||||
// ptr >= lo:
|
||||
// no history.
|
||||
// for end/short zone, ptr==lo and start_offset==0
|
||||
// for start zone, see below
|
||||
// lo ptr hi hi
|
||||
// |----------|-------|----------------|............|
|
||||
// -start 0 -start+offset MIN(avail,32)
|
||||
// p_mask ffff..ff0000...........00ffff..........
|
||||
// ptr < lo:
|
||||
// only start zone.
|
||||
// history
|
||||
// ptr lo hi hi
|
||||
// |----------|-------|----------------|............|
|
||||
// 0 start start+offset end(<=32)
|
||||
// p_mask ffff.....ffffff..ff0000...........00ffff..........
|
||||
static really_inline
|
||||
m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const size_t start_offset,
|
||||
const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history,
|
||||
const u32 nMasks) {
|
||||
union {
|
||||
u8 val8[32];
|
||||
m256 val256;
|
||||
} u;
|
||||
u.val256 = zeroes256();
|
||||
|
||||
uintptr_t copy_start;
|
||||
uintptr_t copy_len;
|
||||
|
||||
if (ptr >= lo) { // short/end/start zone
|
||||
uintptr_t start = (uintptr_t)(ptr - lo);
|
||||
uintptr_t avail = (uintptr_t)(hi - ptr);
|
||||
if (avail >= 32) {
|
||||
assert(start_offset - start <= 32);
|
||||
*p_mask = loadu256(p_mask_arr256[32 - start_offset + start]
|
||||
+ 32 - start_offset + start);
|
||||
return loadu256(ptr);
|
||||
}
|
||||
assert(start_offset - start <= avail);
|
||||
*p_mask = loadu256(p_mask_arr256[avail - start_offset + start]
|
||||
+ 32 - start_offset + start);
|
||||
copy_start = 0;
|
||||
copy_len = avail;
|
||||
} else { //start zone
|
||||
uintptr_t need = MIN((uintptr_t)(lo - ptr),
|
||||
MIN(len_history, nMasks - 1));
|
||||
uintptr_t start = (uintptr_t)(lo - ptr);
|
||||
uintptr_t i;
|
||||
for (i = start - need; i < start; i++) {
|
||||
u.val8[i] = buf_history[len_history - (start - i)];
|
||||
}
|
||||
uintptr_t end = MIN(32, (uintptr_t)(hi - ptr));
|
||||
assert(start + start_offset <= end);
|
||||
*p_mask = loadu256(p_mask_arr256[end - start - start_offset]
|
||||
+ 32 - start - start_offset);
|
||||
copy_start = start;
|
||||
copy_len = end - start;
|
||||
}
|
||||
|
||||
// Runt block from the buffer.
|
||||
copyRuntBlock256(&u.val8[copy_start], &ptr[copy_start], copy_len);
|
||||
|
||||
return u.val256;
|
||||
}
|
||||
#endif // HAVE_AVX2
|
||||
|
||||
#if defined(HAVE_AVX512)
|
||||
// Note: p_mask is an output param that initialises a poison mask.
|
||||
// u64a k = ones_u64a << n' >> m'; // m' < n'
|
||||
// *p_mask = set_mask_m512(~k);
|
||||
// means p_mask is consist of:
|
||||
// (n' - m') poison bytes "0xff" at the beginning,
|
||||
// followed by (64 - n') valid bytes "0x00",
|
||||
// then followed by the rest m' poison bytes "0xff".
|
||||
// ptr >= lo:
|
||||
// no history.
|
||||
// for end/short zone, ptr==lo and start_offset==0
|
||||
// for start zone, see below
|
||||
// lo ptr hi hi
|
||||
// |----------|-------|----------------|............|
|
||||
// -start 0 -start+offset MIN(avail,64)
|
||||
// p_mask ffff..ff0000...........00ffff..........
|
||||
// ptr < lo:
|
||||
// only start zone.
|
||||
// history
|
||||
// ptr lo hi hi
|
||||
// |----------|-------|----------------|............|
|
||||
// 0 start start+offset end(<=64)
|
||||
// p_mask ffff.....ffffff..ff0000...........00ffff..........
|
||||
static really_inline
|
||||
m512 vectoredLoad512(m512 *p_mask, const u8 *ptr, const size_t start_offset,
|
||||
const u8 *lo, const u8 *hi, const u8 *hbuf, size_t hlen,
|
||||
const u32 nMasks) {
|
||||
m512 val;
|
||||
|
||||
uintptr_t copy_start;
|
||||
uintptr_t copy_len;
|
||||
|
||||
if (ptr >= lo) { // short/end/start zone
|
||||
uintptr_t start = (uintptr_t)(ptr - lo);
|
||||
uintptr_t avail = (uintptr_t)(hi - ptr);
|
||||
if (avail >= 64) {
|
||||
assert(start_offset - start <= 64);
|
||||
u64a k = ones_u64a << (start_offset - start);
|
||||
*p_mask = set_mask_m512(~k);
|
||||
return loadu512(ptr);
|
||||
}
|
||||
assert(start_offset - start <= avail);
|
||||
u64a k = ones_u64a << (64 - avail + start_offset - start)
|
||||
>> (64 - avail);
|
||||
*p_mask = set_mask_m512(~k);
|
||||
copy_start = 0;
|
||||
copy_len = avail;
|
||||
} else { //start zone
|
||||
uintptr_t need = MIN((uintptr_t)(lo - ptr),
|
||||
MIN(hlen, nMasks - 1));
|
||||
uintptr_t start = (uintptr_t)(lo - ptr);
|
||||
u64a j = 0x7fffffffffffffffULL >> (63 - need) << (start - need);
|
||||
val = loadu_maskz_m512(j, &hbuf[hlen - start]);
|
||||
uintptr_t end = MIN(64, (uintptr_t)(hi - ptr));
|
||||
assert(start + start_offset <= end);
|
||||
u64a k = ones_u64a << (64 - end + start + start_offset) >> (64 - end);
|
||||
*p_mask = set_mask_m512(~k);
|
||||
copy_start = start;
|
||||
copy_len = end - start;
|
||||
}
|
||||
|
||||
assert(copy_len < 64);
|
||||
assert(copy_len > 0);
|
||||
u64a j = ones_u64a >> (64 - copy_len) << copy_start;
|
||||
val = loadu_mask_m512(val, j, ptr);
|
||||
|
||||
return val;
|
||||
}
|
||||
#endif // HAVE_AVX512
|
||||
|
||||
static really_inline
|
||||
u64a getConfVal(const struct FDR_Runtime_Args *a, const u8 *ptr, u32 byte,
|
||||
CautionReason reason) {
|
||||
@ -190,63 +419,27 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
|
||||
if (!(fdrc->groups & *control)) {
|
||||
continue;
|
||||
}
|
||||
u64a tmp = 0;
|
||||
u64a confVal = getConfVal(a, ptr, byte, reason);
|
||||
confWithBit(fdrc, a, ptr - a->buf + byte, control,
|
||||
last_match, confVal);
|
||||
last_match, confVal, &tmp, 0);
|
||||
} while (unlikely(*conf));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void do_confWithBit1_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
|
||||
const u32 *confBase, CautionReason reason,
|
||||
const struct FDR_Runtime_Args *a, const u8 *ptr,
|
||||
hwlmcb_rv_t *control, u32 *last_match) {
|
||||
do {
|
||||
u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf);
|
||||
u32 byte = bit / bucket + offset;
|
||||
u32 idx = bit % bucket;
|
||||
u32 cf = confBase[idx];
|
||||
const struct FDRConfirm *fdrc = (const struct FDRConfirm *)
|
||||
((const u8 *)confBase + cf);
|
||||
if (!(fdrc->groups & *control)) {
|
||||
continue;
|
||||
}
|
||||
u64a confVal = getConfVal(a, ptr, byte, reason);
|
||||
confWithBit1(fdrc, a, ptr - a->buf + byte, control, last_match,
|
||||
confVal);
|
||||
} while (unlikely(*conf));
|
||||
const m128 *getMaskBase(const struct Teddy *teddy) {
|
||||
return (const m128 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void do_confWithBitMany_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset,
|
||||
const u32 *confBase, CautionReason reason,
|
||||
const struct FDR_Runtime_Args *a, const u8 *ptr,
|
||||
hwlmcb_rv_t *control, u32 *last_match) {
|
||||
do {
|
||||
u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf);
|
||||
u32 byte = bit / bucket + offset;
|
||||
u32 idx = bit % bucket;
|
||||
u32 cf = confBase[idx];
|
||||
const struct FDRConfirm *fdrc = (const struct FDRConfirm *)
|
||||
((const u8 *)confBase + cf);
|
||||
if (!(fdrc->groups & *control)) {
|
||||
continue;
|
||||
}
|
||||
u64a confVal = getConfVal(a, ptr, byte, reason);
|
||||
confWithBitMany(fdrc, a, ptr - a->buf + byte, reason, control,
|
||||
last_match, confVal);
|
||||
} while (unlikely(*conf));
|
||||
const u64a *getReinforcedMaskBase(const struct Teddy *teddy, u8 numMask) {
|
||||
return (const u64a *)((const u8 *)getMaskBase(teddy)
|
||||
+ ROUNDUP_CL(2 * numMask * sizeof(m128)));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const m128 * getMaskBase(const struct Teddy *teddy) {
|
||||
return (const m128 *)((const u8 *)teddy + sizeof(struct Teddy));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u32 * getConfBase(const struct Teddy *teddy, u8 numMask) {
|
||||
return (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) +
|
||||
(numMask*32));
|
||||
const u32 *getConfBase(const struct Teddy *teddy) {
|
||||
return (const u32 *)((const u8 *)teddy + teddy->confOffset);
|
||||
}
|
||||
|
||||
#endif /* TEDDY_RUNTIME_COMMON_H_ */
|
||||
|
@ -139,6 +139,7 @@ Grey::Grey(void) :
|
||||
limitSmallWriteOutfixSize(1048576), // 1 MB
|
||||
smallWriteMaxPatterns(10000),
|
||||
smallWriteMaxLiterals(10000),
|
||||
smallWriteMergeBatchSize(20),
|
||||
allowTamarama(true), // Tamarama engine
|
||||
tamaChunkSize(100),
|
||||
dumpFlags(0),
|
||||
@ -302,6 +303,7 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
||||
G_UPDATE(limitSmallWriteOutfixSize);
|
||||
G_UPDATE(smallWriteMaxPatterns);
|
||||
G_UPDATE(smallWriteMaxLiterals);
|
||||
G_UPDATE(smallWriteMergeBatchSize);
|
||||
G_UPDATE(allowTamarama);
|
||||
G_UPDATE(tamaChunkSize);
|
||||
G_UPDATE(limitPatternCount);
|
||||
|
@ -157,6 +157,7 @@ struct Grey {
|
||||
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
|
||||
u32 smallWriteMaxPatterns; // only try small writes if fewer patterns
|
||||
u32 smallWriteMaxLiterals; // only try small writes if fewer literals
|
||||
u32 smallWriteMergeBatchSize; // number of DFAs to merge in a batch
|
||||
|
||||
// Tamarama engine
|
||||
bool allowTamarama;
|
||||
|
10
src/hs.cpp
10
src/hs.cpp
@ -227,10 +227,10 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
|
||||
target_t target_info = platform ? target_t(*platform)
|
||||
: get_current_target();
|
||||
|
||||
CompileContext cc(isStreaming, isVectored, target_info, g);
|
||||
NG ng(cc, elements, somPrecision);
|
||||
|
||||
try {
|
||||
CompileContext cc(isStreaming, isVectored, target_info, g);
|
||||
NG ng(cc, elements, somPrecision);
|
||||
|
||||
for (unsigned int i = 0; i < elements; i++) {
|
||||
// Add this expression to the compiler
|
||||
try {
|
||||
@ -262,7 +262,7 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
|
||||
e.hasIndex ? (int)e.index : -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
catch (std::bad_alloc) {
|
||||
catch (const std::bad_alloc &) {
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<hs_compile_error_t *>(&hs_enomem);
|
||||
return HS_COMPILER_ERROR;
|
||||
@ -399,7 +399,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
||||
*error = generateCompileError(e);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
catch (std::bad_alloc) {
|
||||
catch (std::bad_alloc &) {
|
||||
*error = const_cast<hs_compile_error_t *>(&hs_enomem);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
@ -561,6 +561,18 @@ hs_error_t HS_CDECL hs_valid_platform(void);
|
||||
*/
|
||||
#define HS_ARCH_ERROR (-11)
|
||||
|
||||
/**
|
||||
* Provided buffer was too small.
|
||||
*
|
||||
* This error indicates that there was insufficient space in the buffer. The
|
||||
* call should be repeated with a larger provided buffer.
|
||||
*
|
||||
* Note: in this situation, it is normal for the amount of space required to be
|
||||
* returned in the same manner as the used space would have been returned if the
|
||||
* call was successful.
|
||||
*/
|
||||
#define HS_INSUFFICIENT_SPACE (-12)
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
114
src/hs_runtime.h
114
src/hs_runtime.h
@ -321,6 +321,120 @@ hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id,
|
||||
match_event_handler onEvent,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* Creates a compressed representation of the provided stream in the buffer
|
||||
* provided. This compressed representation can be converted back into a stream
|
||||
* state by using @ref hs_expand_stream() or @ref hs_reset_and_expand_stream().
|
||||
* The size of the compressed representation will be placed into @a used_space.
|
||||
*
|
||||
* If there is not sufficient space in the buffer to hold the compressed
|
||||
* represention, @ref HS_INSUFFICIENT_SPACE will be returned and @a used_space
|
||||
* will be populated with the amount of space required.
|
||||
*
|
||||
* Note: this function does not close the provided stream, you may continue to
|
||||
* use the stream or to free it with @ref hs_close_stream().
|
||||
*
|
||||
* @param stream
|
||||
* The stream (as created by @ref hs_open_stream()) to be compressed.
|
||||
*
|
||||
* @param buf
|
||||
* Buffer to write the compressed representation into. Note: if the call is
|
||||
* just being used to determine the amount of space required, it is allowed
|
||||
* to pass NULL here and @a buf_space as 0.
|
||||
*
|
||||
* @param buf_space
|
||||
* The number of bytes in @a buf. If buf_space is too small, the call will
|
||||
* fail with @ref HS_INSUFFICIENT_SPACE.
|
||||
*
|
||||
* @param used_space
|
||||
* Pointer to where the amount of used space will be written to. The used
|
||||
* buffer space is always less than or equal to @a buf_space. If the call
|
||||
* fails with @ref HS_INSUFFICIENT_SPACE, this pointer will be used to
|
||||
* write out the amount of buffer space required.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, @ref HS_INSUFFICIENT_SPACE if the provided
|
||||
* buffer is too small.
|
||||
*/
|
||||
hs_error_t HS_CDECL hs_compress_stream(const hs_stream_t *stream, char *buf,
|
||||
size_t buf_space, size_t *used_space);
|
||||
|
||||
/**
|
||||
* Decompresses a compressed representation created by @ref hs_compress_stream()
|
||||
* into a new stream.
|
||||
*
|
||||
* Note: @a buf must correspond to a complete compressed representation created
|
||||
* by @ref hs_compress_stream() of a stream that was opened against @a db. It is
|
||||
* not always possible to detect misuse of this API and behaviour is undefined
|
||||
* if these properties are not satisfied.
|
||||
*
|
||||
* @param db
|
||||
* The compiled pattern database that the compressed stream was opened
|
||||
* against.
|
||||
*
|
||||
* @param stream
|
||||
* On success, a pointer to the expanded @ref hs_stream_t will be
|
||||
* returned; NULL on failure.
|
||||
*
|
||||
* @param buf
|
||||
* A compressed representation of a stream. These compressed forms are
|
||||
* created by @ref hs_compress_stream().
|
||||
*
|
||||
* @param buf_size
|
||||
* The size in bytes of the compressed representation.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t HS_CDECL hs_expand_stream(const hs_database_t *db,
|
||||
hs_stream_t **stream, const char *buf,
|
||||
size_t buf_size);
|
||||
|
||||
/**
|
||||
* Decompresses a compressed representation created by @ref hs_compress_stream()
|
||||
* on top of the 'to' stream. The 'to' stream will first be reset (reporting
|
||||
* any EOD matches if a non-NULL @a onEvent callback handler is provided).
|
||||
*
|
||||
* Note: the 'to' stream must be opened against the same database as the
|
||||
* compressed stream.
|
||||
*
|
||||
* Note: @a buf must correspond to a complete compressed representation created
|
||||
* by @ref hs_compress_stream() of a stream that was opened against @a db. It is
|
||||
* not always possible to detect misuse of this API and behaviour is undefined
|
||||
* if these properties are not satisfied.
|
||||
*
|
||||
* @param to_stream
|
||||
* A pointer to the generated @ref hs_stream_t will be
|
||||
* returned; NULL on failure.
|
||||
*
|
||||
* @param buf
|
||||
* A compressed representation of a stream. These compressed forms are
|
||||
* created by @ref hs_compress_stream().
|
||||
*
|
||||
* @param buf_size
|
||||
* The size in bytes of the compressed representation.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
|
||||
* allowed to be NULL only if the @a onEvent callback is also NULL.
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param context
|
||||
* The user defined pointer which will be passed to the callback function
|
||||
* when a match occurs.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream,
|
||||
const char *buf, size_t buf_size,
|
||||
hs_scratch_t *scratch,
|
||||
match_event_handler onEvent,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* The block (non-streaming) regular expression scanner.
|
||||
*
|
||||
|
@ -170,7 +170,7 @@ void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen,
|
||||
}
|
||||
|
||||
hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
size_t start, HWLMCallback cb, struct hs_scratch *scratch,
|
||||
hwlm_group_t groups) {
|
||||
assert(t);
|
||||
|
||||
@ -184,25 +184,23 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
|
||||
|
||||
if (t->type == HWLM_ENGINE_NOOD) {
|
||||
DEBUG_PRINTF("calling noodExec\n");
|
||||
return noodExec(HWLM_C_DATA(t), buf + start, len - start, start, cb,
|
||||
ctxt);
|
||||
} else {
|
||||
assert(t->type == HWLM_ENGINE_FDR);
|
||||
const union AccelAux *aa = &t->accel0;
|
||||
if ((groups & ~t->accel1_groups) == 0) {
|
||||
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
|
||||
aa = &t->accel1;
|
||||
}
|
||||
do_accel_block(aa, buf, len, &start);
|
||||
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
|
||||
start);
|
||||
return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt, groups);
|
||||
return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch);
|
||||
}
|
||||
|
||||
assert(t->type == HWLM_ENGINE_FDR);
|
||||
const union AccelAux *aa = &t->accel0;
|
||||
if ((groups & ~t->accel1_groups) == 0) {
|
||||
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
|
||||
aa = &t->accel1;
|
||||
}
|
||||
do_accel_block(aa, buf, len, &start);
|
||||
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start);
|
||||
return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, scratch, groups);
|
||||
}
|
||||
|
||||
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
|
||||
size_t len, size_t start, HWLMCallback cb,
|
||||
void *ctxt, hwlm_group_t groups) {
|
||||
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, size_t len, size_t start,
|
||||
HWLMCallback cb, struct hs_scratch *scratch,
|
||||
hwlm_group_t groups) {
|
||||
assert(t);
|
||||
assert(scratch);
|
||||
|
||||
@ -224,24 +222,21 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
|
||||
// If we've been handed a start offset, we can use a block mode scan at
|
||||
// that offset.
|
||||
if (start) {
|
||||
return noodExec(HWLM_C_DATA(t), buf + start, len - start, start,
|
||||
cb, ctxt);
|
||||
return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch);
|
||||
} else {
|
||||
return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb,
|
||||
ctxt, scratch->fdr_temp_buf,
|
||||
FDR_TEMP_BUF_SIZE);
|
||||
scratch);
|
||||
}
|
||||
} else {
|
||||
// t->type == HWLM_ENGINE_FDR
|
||||
const union AccelAux *aa = &t->accel0;
|
||||
if ((groups & ~t->accel1_groups) == 0) {
|
||||
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
|
||||
aa = &t->accel1;
|
||||
}
|
||||
do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
|
||||
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
|
||||
start);
|
||||
return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len,
|
||||
start, cb, ctxt, groups);
|
||||
}
|
||||
|
||||
assert(t->type == HWLM_ENGINE_FDR);
|
||||
const union AccelAux *aa = &t->accel0;
|
||||
if ((groups & ~t->accel1_groups) == 0) {
|
||||
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
|
||||
aa = &t->accel1;
|
||||
}
|
||||
do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
|
||||
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start);
|
||||
return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, start, cb,
|
||||
scratch, groups);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -71,14 +71,17 @@ typedef hwlm_group_t hwlmcb_rv_t;
|
||||
* designed for a different architecture). */
|
||||
#define HWLM_ERROR_UNKNOWN 2
|
||||
|
||||
/** \brief Max length of the literal passed to HWLM. */
|
||||
#define HWLM_LITERAL_MAX_LEN 8
|
||||
|
||||
struct hs_scratch;
|
||||
struct HWLM;
|
||||
|
||||
/** \brief The type for an HWLM callback.
|
||||
*
|
||||
* This callback receives a start-of-match offset, an end-of-match offset, the
|
||||
* ID of the match and the context pointer that was passed into \ref
|
||||
* hwlmExec or \ref hwlmExecStreaming.
|
||||
* This callback receives an end-of-match offset, the ID of the match and
|
||||
* the context pointer that was passed into \ref hwlmExec or
|
||||
* \ref hwlmExecStreaming.
|
||||
*
|
||||
* A callback return of \ref HWLM_TERMINATE_MATCHING will stop matching.
|
||||
*
|
||||
@ -92,8 +95,8 @@ struct HWLM;
|
||||
* belonging to the literal which was active at the when the end match location
|
||||
* was first reached.
|
||||
*/
|
||||
typedef hwlmcb_rv_t (*HWLMCallback)(size_t start, size_t end, u32 id,
|
||||
void *context);
|
||||
typedef hwlmcb_rv_t (*HWLMCallback)(size_t end, u32 id,
|
||||
struct hs_scratch *scratch);
|
||||
|
||||
/** \brief Match strings in table.
|
||||
*
|
||||
@ -104,24 +107,26 @@ typedef hwlmcb_rv_t (*HWLMCallback)(size_t start, size_t end, u32 id,
|
||||
* Returns \ref HWLM_TERMINATED if scanning is cancelled due to the callback
|
||||
* returning \ref HWLM_TERMINATE_MATCHING.
|
||||
*
|
||||
* \p start is the first offset at which a match may start.
|
||||
* \p start is the first offset at which a match may start. Note: match
|
||||
* starts may include masks overhanging the main literal.
|
||||
*
|
||||
* The underlying engine may choose not to report any match which starts before
|
||||
* the first possible match of a literal which is in the initial group mask.
|
||||
*/
|
||||
hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback callback, void *context,
|
||||
hwlm_group_t groups);
|
||||
size_t start, HWLMCallback callback,
|
||||
struct hs_scratch *scratch, hwlm_group_t groups);
|
||||
|
||||
/** \brief As for \ref hwlmExec, but a streaming case across two buffers.
|
||||
*
|
||||
* \p scratch is used to access fdr_temp_buf and to access the history buffer,
|
||||
* history length and the main buffer.
|
||||
*
|
||||
* \p len is the length of the main buffer to be scanned.
|
||||
*
|
||||
* \p start is an advisory hint representing the first offset at which a match
|
||||
* may start. Some underlying literal matches may not respect it.
|
||||
* may start. Some underlying literal matches may not respect it. Note: match
|
||||
* starts may include masks overhanging the main literal.
|
||||
*
|
||||
* \p scratch is used to access the history buffer, history length and
|
||||
* the main buffer.
|
||||
*
|
||||
* Two buffers/lengths are provided. Matches that occur entirely within
|
||||
* the history buffer will not be reported by this function. The offsets
|
||||
@ -129,10 +134,9 @@ hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len,
|
||||
* match at byte 10 of the main buffer is reported as 10). Matches that start
|
||||
* in the history buffer will have starts reported with 'negative' values.
|
||||
*/
|
||||
hwlm_error_t hwlmExecStreaming(const struct HWLM *tab,
|
||||
struct hs_scratch *scratch, size_t len,
|
||||
size_t start, HWLMCallback callback,
|
||||
void *context, hwlm_group_t groups);
|
||||
hwlm_error_t hwlmExecStreaming(const struct HWLM *tab, size_t len, size_t start,
|
||||
HWLMCallback callback,
|
||||
struct hs_scratch *scratch, hwlm_group_t groups);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
@ -41,8 +41,12 @@
|
||||
#include "scratch.h"
|
||||
#include "ue2common.h"
|
||||
#include "fdr/fdr_compile.h"
|
||||
#include "fdr/fdr_compile_internal.h"
|
||||
#include "fdr/fdr_engine_description.h"
|
||||
#include "fdr/teddy_engine_description.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
#include <cassert>
|
||||
@ -53,6 +57,28 @@ using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
HWLMProto::HWLMProto(u8 engType_in, vector<hwlmLiteral> lits_in)
|
||||
: engType(engType_in), lits(move(lits_in)) {}
|
||||
|
||||
HWLMProto::HWLMProto(u8 engType_in,
|
||||
unique_ptr<FDREngineDescription> eng_in,
|
||||
vector<hwlmLiteral> lits_in,
|
||||
map<u32, vector<u32>> bucketToLits_in,
|
||||
bool make_small_in)
|
||||
: engType(engType_in), fdrEng(move(eng_in)), lits(move(lits_in)),
|
||||
bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
|
||||
|
||||
HWLMProto::HWLMProto(u8 engType_in,
|
||||
unique_ptr<TeddyEngineDescription> eng_in,
|
||||
vector<hwlmLiteral> lits_in,
|
||||
map<u32, vector<u32>> bucketToLits_in,
|
||||
bool make_small_in)
|
||||
: engType(engType_in), teddyEng(move(eng_in)),
|
||||
lits(move(lits_in)),
|
||||
bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
|
||||
|
||||
HWLMProto::~HWLMProto() {}
|
||||
|
||||
static
|
||||
void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
|
||||
#ifdef DEBUG
|
||||
@ -89,17 +115,55 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!lits.front().msk.empty()) {
|
||||
DEBUG_PRINTF("noodle can't handle supplementary masks\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bytecode_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, bool make_small,
|
||||
const CompileContext &cc,
|
||||
bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
|
||||
UNUSED hwlm_group_t expected_groups) {
|
||||
size_t engSize = 0;
|
||||
shared_ptr<void> eng;
|
||||
|
||||
const auto &lits = proto.lits;
|
||||
DEBUG_PRINTF("building table with %zu strings\n", lits.size());
|
||||
|
||||
if (proto.engType == HWLM_ENGINE_NOOD) {
|
||||
DEBUG_PRINTF("build noodle table\n");
|
||||
const hwlmLiteral &lit = lits.front();
|
||||
auto noodle = noodBuildTable(lit);
|
||||
if (noodle) {
|
||||
engSize = noodle.size();
|
||||
}
|
||||
eng = move(noodle);
|
||||
} else {
|
||||
DEBUG_PRINTF("building a new deal\n");
|
||||
auto fdr = fdrBuildTable(proto, cc.grey);
|
||||
if (fdr) {
|
||||
engSize = fdr.size();
|
||||
}
|
||||
eng = move(fdr);
|
||||
}
|
||||
|
||||
if (!eng) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
assert(engSize);
|
||||
if (engSize > cc.grey.limitLiteralMatcherSize) {
|
||||
throw ResourceLimitError();
|
||||
}
|
||||
|
||||
const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize;
|
||||
auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64);
|
||||
|
||||
h->type = proto.engType;
|
||||
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
unique_ptr<HWLMProto>
|
||||
hwlmBuildProto(vector<hwlmLiteral> &lits, bool make_small,
|
||||
const CompileContext &cc) {
|
||||
assert(!lits.empty());
|
||||
dumpLits(lits);
|
||||
|
||||
@ -129,9 +193,7 @@ bytecode_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, bool make_small,
|
||||
}
|
||||
}
|
||||
|
||||
u8 engType = 0;
|
||||
size_t engSize = 0;
|
||||
shared_ptr<void> eng;
|
||||
unique_ptr<HWLMProto> proto;
|
||||
|
||||
DEBUG_PRINTF("building table with %zu strings\n", lits.size());
|
||||
|
||||
@ -139,39 +201,17 @@ bytecode_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits, bool make_small,
|
||||
|
||||
if (isNoodleable(lits, cc)) {
|
||||
DEBUG_PRINTF("build noodle table\n");
|
||||
engType = HWLM_ENGINE_NOOD;
|
||||
const hwlmLiteral &lit = lits.front();
|
||||
auto noodle = noodBuildTable(lit);
|
||||
if (noodle) {
|
||||
engSize = noodle.size();
|
||||
}
|
||||
eng = move(noodle);
|
||||
proto = ue2::make_unique<HWLMProto>(HWLM_ENGINE_NOOD, lits);
|
||||
} else {
|
||||
DEBUG_PRINTF("building a new deal\n");
|
||||
engType = HWLM_ENGINE_FDR;
|
||||
auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey);
|
||||
if (fdr) {
|
||||
engSize = fdr.size();
|
||||
proto = fdrBuildProto(HWLM_ENGINE_FDR, lits, make_small,
|
||||
cc.target_info, cc.grey);
|
||||
if (!proto) {
|
||||
return nullptr;
|
||||
}
|
||||
eng = move(fdr);
|
||||
}
|
||||
|
||||
if (!eng) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
assert(engSize);
|
||||
if (engSize > cc.grey.limitLiteralMatcherSize) {
|
||||
throw ResourceLimitError();
|
||||
}
|
||||
|
||||
const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize;
|
||||
auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64);
|
||||
|
||||
h->type = engType;
|
||||
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
|
||||
|
||||
return h;
|
||||
return proto;
|
||||
}
|
||||
|
||||
size_t hwlmSize(const HWLM *h) {
|
||||
|
@ -34,9 +34,11 @@
|
||||
#define HWLM_BUILD_H
|
||||
|
||||
#include "hwlm.h"
|
||||
#include "hwlm_literal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
@ -44,15 +46,62 @@ struct HWLM;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class FDREngineDescription;
|
||||
class TeddyEngineDescription;
|
||||
struct CompileContext;
|
||||
struct Grey;
|
||||
struct hwlmLiteral;
|
||||
|
||||
/** \brief Class representing a literal matcher prototype. */
|
||||
struct HWLMProto {
|
||||
/**
|
||||
* \brief Engine type to distinguish noodle from FDR and Teddy.
|
||||
*/
|
||||
u8 engType;
|
||||
|
||||
/**
|
||||
* \brief FDR engine description.
|
||||
*/
|
||||
std::unique_ptr<FDREngineDescription> fdrEng;
|
||||
|
||||
/**
|
||||
* \brief Teddy engine description.
|
||||
*/
|
||||
std::unique_ptr<TeddyEngineDescription> teddyEng;
|
||||
|
||||
/**
|
||||
* \brief HWLM literals passed from Rose.
|
||||
*/
|
||||
std::vector<hwlmLiteral> lits;
|
||||
|
||||
/**
|
||||
* \brief Bucket assignment info in FDR and Teddy
|
||||
*/
|
||||
std::map<u32, std::vector<u32>> bucketToLits;
|
||||
|
||||
/**
|
||||
* \brief Flag to optimise matcher for small size from Rose.
|
||||
*/
|
||||
bool make_small = false;
|
||||
|
||||
HWLMProto(u8 engType_in, std::vector<hwlmLiteral> lits_in);
|
||||
|
||||
HWLMProto(u8 engType_in, std::unique_ptr<FDREngineDescription> eng_in,
|
||||
std::vector<hwlmLiteral> lits_in,
|
||||
std::map<u32, std::vector<u32>> bucketToLits_in,
|
||||
bool make_small_in);
|
||||
|
||||
HWLMProto(u8 engType_in, std::unique_ptr<TeddyEngineDescription> eng_in,
|
||||
std::vector<hwlmLiteral> lits_in,
|
||||
std::map<u32, std::vector<u32>> bucketToLits_in,
|
||||
bool make_small_in);
|
||||
|
||||
~HWLMProto();
|
||||
};
|
||||
|
||||
/** \brief Build an \ref HWLM literal matcher runtime structure for a group of
|
||||
* literals.
|
||||
*
|
||||
* \param lits The group of literals.
|
||||
* \param make_small Optimise matcher for small size.
|
||||
* \param proto Literal matcher prototype.
|
||||
* \param cc Compile context.
|
||||
* \param expected_groups FIXME: document me!
|
||||
*
|
||||
@ -60,10 +109,13 @@ struct hwlmLiteral;
|
||||
* may result in a nullptr return value, or a std::bad_alloc exception being
|
||||
* thrown.
|
||||
*/
|
||||
bytecode_ptr<HWLM> hwlmBuild(const std::vector<hwlmLiteral> &lits,
|
||||
bool make_small, const CompileContext &cc,
|
||||
bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
|
||||
hwlm_group_t expected_groups = HWLM_ALL_GROUPS);
|
||||
|
||||
std::unique_ptr<HWLMProto>
|
||||
hwlmBuildProto(std::vector<hwlmLiteral> &lits, bool make_small,
|
||||
const CompileContext &cc);
|
||||
|
||||
/**
|
||||
* Returns an estimate of the number of repeated characters on the end of a
|
||||
* literal that will make a literal set of size \a numLiterals suffer
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -38,16 +38,19 @@
|
||||
#include "ue2common.h"
|
||||
#include "fdr/fdr_dump.h"
|
||||
#include "nfa/accel_dump.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include "util/dump_util.h"
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void hwlmPrintStats(const HWLM *h, FILE *f) {
|
||||
void hwlmGenerateDumpFiles(const HWLM *h, const string &base) {
|
||||
StdioFile f(base + ".txt", "w");
|
||||
|
||||
switch (h->type) {
|
||||
case HWLM_ENGINE_NOOD:
|
||||
noodPrintStats((const noodTable *)HWLM_C_DATA(h), f);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -35,16 +35,16 @@
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
struct HWLM;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Dump some information about the give HWLM structure. */
|
||||
void hwlmPrintStats(const HWLM *h, FILE *f);
|
||||
void hwlmGenerateDumpFiles(const HWLM *h, const std::string &base);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif // DUMP_SUPPORT
|
||||
#endif // HWLM_DUMP_H
|
||||
|
@ -42,12 +42,11 @@
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Max length of the literal passed to HWLM. */
|
||||
#define HWLM_LITERAL_MAX_LEN 8
|
||||
|
||||
/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
|
||||
#define HWLM_MASKLEN 8
|
||||
|
||||
#define INVALID_LIT_ID ~0U
|
||||
|
||||
/** \brief Class representing a literal, fed to \ref hwlmBuild. */
|
||||
struct hwlmLiteral {
|
||||
std::string s; //!< \brief The literal itself.
|
||||
@ -67,6 +66,21 @@ struct hwlmLiteral {
|
||||
* can be quashed by the literal matcher. */
|
||||
bool noruns;
|
||||
|
||||
/** \brief included literal id. */
|
||||
u32 included_id = INVALID_LIT_ID;
|
||||
|
||||
/** \brief Squash mask for FDR's confirm mask for included literals.
|
||||
*
|
||||
* In FDR confirm, if we have included literal in another bucket,
|
||||
* we can use this mask to squash the bit for the bucket in FDR confirm
|
||||
* mask and then run programs of included literal directly and avoid
|
||||
* confirm work.
|
||||
*
|
||||
* This value is calculated in FDR compile code once bucket assignment is
|
||||
* completed
|
||||
*/
|
||||
u8 squash = 0;
|
||||
|
||||
/** \brief Set of groups that literal belongs to.
|
||||
*
|
||||
* Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of
|
||||
|
@ -35,14 +35,33 @@
|
||||
|
||||
#include "hwlm_literal.h"
|
||||
#include "noodle_internal.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/verify_types.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <cstring> // for memcpy
|
||||
#include <vector>
|
||||
|
||||
using std::vector;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
u64a make_u64a_mask(const vector<u8> &v) {
|
||||
assert(v.size() <= sizeof(u64a));
|
||||
if (v.size() > sizeof(u64a)) {
|
||||
throw std::exception();
|
||||
}
|
||||
|
||||
u64a mask = 0;
|
||||
size_t len = v.size();
|
||||
unsigned char *m = (unsigned char *)&mask;
|
||||
DEBUG_PRINTF("making mask len %zu\n", len);
|
||||
memcpy(m, &v[0], len);
|
||||
return mask;
|
||||
}
|
||||
|
||||
static
|
||||
size_t findNoodFragOffset(const hwlmLiteral &lit) {
|
||||
const auto &s = lit.s;
|
||||
@ -67,30 +86,59 @@ size_t findNoodFragOffset(const hwlmLiteral &lit) {
|
||||
}
|
||||
|
||||
bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit) {
|
||||
if (!lit.msk.empty()) {
|
||||
DEBUG_PRINTF("noodle can't handle supplementary masks\n");
|
||||
return nullptr;
|
||||
const auto &s = lit.s;
|
||||
|
||||
size_t mask_len = std::max(s.length(), lit.msk.size());
|
||||
DEBUG_PRINTF("mask is %zu bytes\n", lit.msk.size());
|
||||
assert(mask_len <= 8);
|
||||
assert(lit.msk.size() == lit.cmp.size());
|
||||
|
||||
vector<u8> n_msk(mask_len);
|
||||
vector<u8> n_cmp(mask_len);
|
||||
|
||||
for (unsigned i = mask_len - lit.msk.size(), j = 0; i < mask_len;
|
||||
i++, j++) {
|
||||
DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx\n", i, lit.msk[j], i, lit.cmp[j]);
|
||||
n_msk[i] = lit.msk[j];
|
||||
n_cmp[i] = lit.cmp[j];
|
||||
}
|
||||
|
||||
const auto &s = lit.s;
|
||||
size_t noodle_len = sizeof(noodTable) + s.length();
|
||||
auto n = make_zeroed_bytecode_ptr<noodTable>(noodle_len);
|
||||
size_t s_off = mask_len - s.length();
|
||||
for (unsigned i = s_off; i < mask_len; i++) {
|
||||
u8 c = s[i - s_off];
|
||||
u8 si_msk = lit.nocase && ourisalpha(c) ? (u8)CASE_CLEAR : (u8)0xff;
|
||||
n_msk[i] |= si_msk;
|
||||
n_cmp[i] |= c & si_msk;
|
||||
assert((n_cmp[i] & si_msk) == c);
|
||||
DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx '%c'\n", i, n_msk[i], i, n_cmp[i],
|
||||
ourisprint(c) ? (char)c : '.');
|
||||
}
|
||||
|
||||
auto n = make_zeroed_bytecode_ptr<noodTable>(sizeof(noodTable));
|
||||
assert(n);
|
||||
DEBUG_PRINTF("size of nood %zu\n", sizeof(noodTable));
|
||||
|
||||
size_t key_offset = findNoodFragOffset(lit);
|
||||
|
||||
n->id = lit.id;
|
||||
n->len = verify_u32(s.length());
|
||||
n->key_offset = verify_u32(key_offset);
|
||||
n->single = s.length() == 1 ? 1 : 0;
|
||||
n->key_offset = verify_u8(s.length() - key_offset);
|
||||
n->nocase = lit.nocase ? 1 : 0;
|
||||
memcpy(n->str, s.c_str(), s.length());
|
||||
n->key0 = s[key_offset];
|
||||
if (n->single) {
|
||||
n->key1 = 0;
|
||||
} else {
|
||||
n->key1 = s[key_offset + 1];
|
||||
}
|
||||
n->msk = make_u64a_mask(n_msk);
|
||||
n->cmp = make_u64a_mask(n_cmp);
|
||||
n->msk_len = mask_len;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
size_t noodSize(const noodTable *n) {
|
||||
assert(n); // shouldn't call with null
|
||||
return sizeof(*n) + n->len;
|
||||
size_t noodSize(const noodTable *) {
|
||||
return sizeof(noodTable);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
@ -102,13 +150,17 @@ namespace ue2 {
|
||||
|
||||
void noodPrintStats(const noodTable *n, FILE *f) {
|
||||
fprintf(f, "Noodle table\n");
|
||||
fprintf(f, "Len: %u Key Offset: %u\n", n->len, n->key_offset);
|
||||
fprintf(f, "Key Offset: %u\n", n->key_offset);
|
||||
fprintf(f, "Msk: %llx Cmp: %llx MskLen %u\n",
|
||||
n->msk >> 8 * (8 - n->msk_len), n->cmp >> 8 * (8 - n->msk_len),
|
||||
n->msk_len);
|
||||
fprintf(f, "String: ");
|
||||
for (u32 i = 0; i < n->len; i++) {
|
||||
if (isgraph(n->str[i]) && n->str[i] != '\\') {
|
||||
fprintf(f, "%c", n->str[i]);
|
||||
for (u32 i = 0; i < n->msk_len; i++) {
|
||||
const u8 *m = (const u8 *)&n->cmp;
|
||||
if (isgraph(m[i]) && m[i] != '\\') {
|
||||
fprintf(f, "%c", m[i]);
|
||||
} else {
|
||||
fprintf(f, "\\x%02hhx", n->str[i]);
|
||||
fprintf(f, "\\x%02hhx", m[i]);
|
||||
}
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "hwlm.h"
|
||||
#include "noodle_engine.h"
|
||||
#include "noodle_internal.h"
|
||||
#include "scratch.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/arch.h"
|
||||
#include "util/bitutils.h"
|
||||
@ -39,6 +40,7 @@
|
||||
#include "util/intrinsics.h"
|
||||
#include "util/join.h"
|
||||
#include "util/masked_move.h"
|
||||
#include "util/partial_store.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#include <ctype.h>
|
||||
@ -49,7 +51,7 @@
|
||||
struct cb_info {
|
||||
HWLMCallback cb; //!< callback function called on match
|
||||
u32 id; //!< ID to pass to callback on match
|
||||
void *ctx; //!< caller-supplied context to pass to callback
|
||||
struct hs_scratch *scratch; //!< scratch to pass to callback
|
||||
size_t offsetAdj; //!< used in streaming mode
|
||||
};
|
||||
|
||||
@ -83,9 +85,8 @@ struct cb_info {
|
||||
while (unlikely(z)) { \
|
||||
Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \
|
||||
size_t matchPos = d - buf + pos; \
|
||||
DEBUG_PRINTF("match pos %zu\n", matchPos); \
|
||||
hwlmcb_rv_t rv = final(buf, len, key, 1, 0, 0, noCase, cbi, \
|
||||
matchPos); \
|
||||
DEBUG_PRINTF("match pos %zu\n", matchPos); \
|
||||
hwlmcb_rv_t rv = final(n, buf, len, 1, cbi, matchPos); \
|
||||
RETURN_IF_TERMINATED(rv); \
|
||||
} \
|
||||
} while (0)
|
||||
@ -95,9 +96,8 @@ struct cb_info {
|
||||
while (unlikely(z)) { \
|
||||
Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \
|
||||
size_t matchPos = d - buf + pos - 1; \
|
||||
DEBUG_PRINTF("match pos %zu\n", matchPos); \
|
||||
hwlmcb_rv_t rv = final(buf, len, key, keyLen, keyOffset, 1, \
|
||||
noCase, cbi, matchPos); \
|
||||
DEBUG_PRINTF("match pos %zu\n", matchPos); \
|
||||
hwlmcb_rv_t rv = final(n, buf, len, 0, cbi, matchPos); \
|
||||
RETURN_IF_TERMINATED(rv); \
|
||||
} \
|
||||
} while (0)
|
||||
@ -111,21 +111,26 @@ u8 caseClear8(u8 x, bool noCase) {
|
||||
// is used only for single chars with case insensitivity used correctly,
|
||||
// so it can go straight to the callback if we get this far.
|
||||
static really_inline
|
||||
hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
|
||||
size_t keyOffset, bool is_double, bool noCase,
|
||||
const struct cb_info *cbi, size_t pos) {
|
||||
pos -= keyOffset;
|
||||
if (is_double) {
|
||||
if (pos + keyLen > len) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
if (cmpForward(buf + pos, key, keyLen, noCase)) { // ret 1 on mismatch
|
||||
return HWLM_SUCCESS;
|
||||
hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len,
|
||||
char single, const struct cb_info *cbi, size_t pos) {
|
||||
if (single) {
|
||||
if (n->msk_len == 1) {
|
||||
goto match;
|
||||
}
|
||||
}
|
||||
pos += cbi->offsetAdj;
|
||||
DEBUG_PRINTF("match @ %zu->%zu\n", pos, (pos + keyLen - 1));
|
||||
hwlmcb_rv_t rv = cbi->cb(pos, (pos + keyLen - 1), cbi->id, cbi->ctx);
|
||||
assert(len >= n->msk_len);
|
||||
u64a v =
|
||||
partial_load_u64a(buf + pos + n->key_offset - n->msk_len, n->msk_len);
|
||||
DEBUG_PRINTF("v %016llx msk %016llx cmp %016llx\n", v, n->msk, n->cmp);
|
||||
if ((v & n->msk) != n->cmp) {
|
||||
/* mask didn't match */
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
match:
|
||||
pos -= cbi->offsetAdj;
|
||||
DEBUG_PRINTF("match @ %zu\n", pos + n->key_offset);
|
||||
hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - 1, cbi->id, cbi->scratch);
|
||||
if (rv == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
@ -147,38 +152,43 @@ hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
|
||||
#endif
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key,
|
||||
bool noCase, const struct cb_info *cbi) {
|
||||
hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, size_t start, bool noCase,
|
||||
const struct cb_info *cbi) {
|
||||
|
||||
const MASK_TYPE mask1 = getMask(key[0], noCase);
|
||||
const MASK_TYPE mask1 = getMask(n->key0, noCase);
|
||||
const MASK_TYPE caseMask = getCaseMask();
|
||||
|
||||
size_t offset = start + n->msk_len - 1;
|
||||
size_t end = len;
|
||||
assert(offset < end);
|
||||
|
||||
#if !defined(HAVE_AVX512)
|
||||
hwlm_error_t rv;
|
||||
size_t end = len;
|
||||
|
||||
if (len < CHUNKSIZE) {
|
||||
rv = scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, len);
|
||||
if (end - offset < CHUNKSIZE) {
|
||||
rv = scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, offset,
|
||||
end);
|
||||
return rv;
|
||||
}
|
||||
|
||||
if (len == CHUNKSIZE) {
|
||||
rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi,
|
||||
0, len);
|
||||
if (end - offset == CHUNKSIZE) {
|
||||
rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
|
||||
cbi, offset, end);
|
||||
return rv;
|
||||
}
|
||||
|
||||
uintptr_t data = (uintptr_t)buf;
|
||||
uintptr_t s2Start = ROUNDUP_N(data, CHUNKSIZE) - data;
|
||||
uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data;
|
||||
uintptr_t last = data + end;
|
||||
uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
|
||||
uintptr_t s3Start = len - CHUNKSIZE;
|
||||
uintptr_t s3Start = end - CHUNKSIZE;
|
||||
|
||||
if (s2Start) {
|
||||
if (offset != s2Start) {
|
||||
// first scan out to the fast scan starting point
|
||||
DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
|
||||
rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi,
|
||||
0, s2Start);
|
||||
rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
|
||||
cbi, offset, s2Start);
|
||||
RETURN_IF_TERMINATED(rv);
|
||||
}
|
||||
|
||||
@ -186,68 +196,70 @@ hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key,
|
||||
// scan as far as we can, bounded by the last point this key can
|
||||
// possibly match
|
||||
DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s2End);
|
||||
rv = scanSingleFast(buf, len, key, noCase, caseMask, mask1, cbi,
|
||||
s2Start, s2End);
|
||||
rv = scanSingleFast(n, buf, len, noCase, caseMask, mask1, cbi, s2Start,
|
||||
s2End);
|
||||
RETURN_IF_TERMINATED(rv);
|
||||
}
|
||||
|
||||
// if we are done bail out
|
||||
if (s2End == end) {
|
||||
if (s2End == len) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, end);
|
||||
rv = scanSingleUnaligned(buf, len, s3Start, key, noCase, caseMask, mask1,
|
||||
cbi, s2End, end);
|
||||
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, len);
|
||||
rv = scanSingleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, cbi,
|
||||
s2End, len);
|
||||
|
||||
return rv;
|
||||
#else // HAVE_AVX512
|
||||
return scanSingle512(buf, len, key, noCase, caseMask, mask1, cbi);
|
||||
return scanSingle512(n, buf, len, noCase, caseMask, mask1, cbi, offset,
|
||||
end);
|
||||
#endif
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset, bool noCase,
|
||||
hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, size_t start, bool noCase,
|
||||
const struct cb_info *cbi) {
|
||||
// we stop scanning for the key-fragment when the rest of the key can't
|
||||
// possibly fit in the remaining buffer
|
||||
size_t end = len - keyLen + keyOffset + 2;
|
||||
size_t end = len - n->key_offset + 2;
|
||||
|
||||
// the first place the key can match
|
||||
size_t offset = start + n->msk_len - n->key_offset;
|
||||
|
||||
const MASK_TYPE caseMask = getCaseMask();
|
||||
const MASK_TYPE mask1 = getMask(key[keyOffset + 0], noCase);
|
||||
const MASK_TYPE mask2 = getMask(key[keyOffset + 1], noCase);
|
||||
const MASK_TYPE mask1 = getMask(n->key0, noCase);
|
||||
const MASK_TYPE mask2 = getMask(n->key1, noCase);
|
||||
|
||||
#if !defined(HAVE_AVX512)
|
||||
hwlm_error_t rv;
|
||||
|
||||
if (end - keyOffset < CHUNKSIZE) {
|
||||
rv = scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
||||
mask1, mask2, cbi, keyOffset, end);
|
||||
if (end - offset < CHUNKSIZE) {
|
||||
rv = scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
|
||||
offset, end);
|
||||
return rv;
|
||||
}
|
||||
if (end - keyOffset == CHUNKSIZE) {
|
||||
rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset,
|
||||
noCase, caseMask, mask1, mask2, cbi, keyOffset,
|
||||
end);
|
||||
if (end - offset == CHUNKSIZE) {
|
||||
rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
|
||||
mask2, cbi, offset, end);
|
||||
return rv;
|
||||
}
|
||||
|
||||
uintptr_t data = (uintptr_t)buf;
|
||||
uintptr_t s2Start = ROUNDUP_N(data + keyOffset, CHUNKSIZE) - data;
|
||||
uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data;
|
||||
uintptr_t s1End = s2Start + 1;
|
||||
uintptr_t last = data + end;
|
||||
uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
|
||||
uintptr_t s3Start = end - CHUNKSIZE;
|
||||
uintptr_t off = keyOffset;
|
||||
uintptr_t off = offset;
|
||||
|
||||
if (s2Start != keyOffset) {
|
||||
if (s2Start != off) {
|
||||
// first scan out to the fast scan starting point plus one char past to
|
||||
// catch the key on the overlap
|
||||
DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
|
||||
rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset,
|
||||
noCase, caseMask, mask1, mask2, cbi, off,
|
||||
s1End);
|
||||
DEBUG_PRINTF("stage 1: %zu -> %zu\n", off, s2Start);
|
||||
rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
|
||||
mask2, cbi, off, s1End);
|
||||
RETURN_IF_TERMINATED(rv);
|
||||
}
|
||||
off = s1End;
|
||||
@ -261,8 +273,8 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
|
||||
// scan as far as we can, bounded by the last point this key can
|
||||
// possibly match
|
||||
DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s3Start);
|
||||
rv = scanDoubleFast(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
||||
mask1, mask2, cbi, s2Start, s2End);
|
||||
rv = scanDoubleFast(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
|
||||
s2Start, s2End);
|
||||
RETURN_IF_TERMINATED(rv);
|
||||
off = s2End;
|
||||
}
|
||||
@ -273,130 +285,158 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s3Start, end);
|
||||
rv = scanDoubleUnaligned(buf, len, s3Start, key, keyLen, keyOffset, noCase,
|
||||
caseMask, mask1, mask2, cbi, off, end);
|
||||
rv = scanDoubleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1,
|
||||
mask2, cbi, off, end);
|
||||
|
||||
return rv;
|
||||
#else // AVX512
|
||||
return scanDouble512(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
||||
mask1, mask2, cbi, keyOffset, end);
|
||||
return scanDouble512(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
|
||||
offset, end);
|
||||
#endif // AVX512
|
||||
}
|
||||
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleNoCase(const u8 *buf, size_t len, const u8 *key,
|
||||
hwlm_error_t scanSingleNoCase(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, size_t start,
|
||||
const struct cb_info *cbi) {
|
||||
return scanSingleMain(buf, len, key, 1, cbi);
|
||||
return scanSingleMain(n, buf, len, start, 1, cbi);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleCase(const u8 *buf, size_t len, const u8 *key,
|
||||
hwlm_error_t scanSingleCase(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, size_t start,
|
||||
const struct cb_info *cbi) {
|
||||
return scanSingleMain(buf, len, key, 0, cbi);
|
||||
return scanSingleMain(n, buf, len, start, 0, cbi);
|
||||
}
|
||||
|
||||
// Single-character specialisation, used when keyLen = 1
|
||||
static really_inline
|
||||
hwlm_error_t scanSingle(const u8 *buf, size_t len, const u8 *key, bool noCase,
|
||||
const struct cb_info *cbi) {
|
||||
if (!ourisalpha(key[0])) {
|
||||
hwlm_error_t scanSingle(const struct noodTable *n, const u8 *buf, size_t len,
|
||||
size_t start, bool noCase, const struct cb_info *cbi) {
|
||||
if (!ourisalpha(n->key0)) {
|
||||
noCase = 0; // force noCase off if we don't have an alphabetic char
|
||||
}
|
||||
|
||||
// kinda ugly, but this forces constant propagation
|
||||
if (noCase) {
|
||||
return scanSingleNoCase(buf, len, key, cbi);
|
||||
return scanSingleNoCase(n, buf, len, start, cbi);
|
||||
} else {
|
||||
return scanSingleCase(buf, len, key, cbi);
|
||||
return scanSingleCase(n, buf, len, start, cbi);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleNoCase(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset,
|
||||
hwlm_error_t scanDoubleNoCase(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, size_t start,
|
||||
const struct cb_info *cbi) {
|
||||
return scanDoubleMain(buf, len, key, keyLen, keyOffset, 1, cbi);
|
||||
return scanDoubleMain(n, buf, len, start, 1, cbi);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleCase(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset,
|
||||
hwlm_error_t scanDoubleCase(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, size_t start,
|
||||
const struct cb_info *cbi) {
|
||||
return scanDoubleMain(buf, len, key, keyLen, keyOffset, 0, cbi);
|
||||
return scanDoubleMain(n, buf, len, start, 0, cbi);
|
||||
}
|
||||
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDouble(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
|
||||
size_t keyOffset, bool noCase,
|
||||
const struct cb_info *cbi) {
|
||||
hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len,
|
||||
size_t start, bool noCase, const struct cb_info *cbi) {
|
||||
// kinda ugly, but this forces constant propagation
|
||||
if (noCase) {
|
||||
return scanDoubleNoCase(buf, len, key, keyLen, keyOffset, cbi);
|
||||
return scanDoubleNoCase(n, buf, len, start, cbi);
|
||||
} else {
|
||||
return scanDoubleCase(buf, len, key, keyLen, keyOffset, cbi);
|
||||
return scanDoubleCase(n, buf, len, start, cbi);
|
||||
}
|
||||
}
|
||||
|
||||
// main entry point for the scan code
|
||||
static really_inline
|
||||
hwlm_error_t scan(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
|
||||
size_t keyOffset, bool noCase, const struct cb_info *cbi) {
|
||||
if (len < keyLen) {
|
||||
hwlm_error_t scan(const struct noodTable *n, const u8 *buf, size_t len,
|
||||
size_t start, char single, bool noCase,
|
||||
const struct cb_info *cbi) {
|
||||
if (len - start < n->msk_len) {
|
||||
// can't find string of length keyLen in a shorter buffer
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
if (keyLen == 1) {
|
||||
assert(keyOffset == 0);
|
||||
return scanSingle(buf, len, key, noCase, cbi);
|
||||
if (single) {
|
||||
return scanSingle(n, buf, len, start, noCase, cbi);
|
||||
} else {
|
||||
return scanDouble(buf, len, key, keyLen, keyOffset, noCase, cbi);
|
||||
return scanDouble(n, buf, len, start, noCase, cbi);
|
||||
}
|
||||
}
|
||||
|
||||
/** \brief Block-mode scanner. */
|
||||
hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
|
||||
size_t offset_adj, HWLMCallback cb, void *ctxt) {
|
||||
size_t start, HWLMCallback cb,
|
||||
struct hs_scratch *scratch) {
|
||||
assert(n && buf);
|
||||
|
||||
struct cb_info cbi = { cb, n->id, ctxt, offset_adj };
|
||||
DEBUG_PRINTF("nood scan of %zu bytes for %*s\n", len, n->len, n->str);
|
||||
return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi);
|
||||
struct cb_info cbi = {cb, n->id, scratch, 0};
|
||||
DEBUG_PRINTF("nood scan of %zu bytes for %*s @ %p\n", len, n->msk_len,
|
||||
(const char *)&n->cmp, buf);
|
||||
|
||||
return scan(n, buf, len, start, n->single, n->nocase, &cbi);
|
||||
}
|
||||
|
||||
/** \brief Streaming-mode scanner. */
|
||||
hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
|
||||
size_t hlen, const u8 *buf, size_t len,
|
||||
HWLMCallback cb, void *ctxt, u8 *temp_buf,
|
||||
UNUSED size_t temp_buffer_size) {
|
||||
HWLMCallback cb, struct hs_scratch *scratch) {
|
||||
assert(n);
|
||||
|
||||
struct cb_info cbi = {cb, n->id, ctxt, 0};
|
||||
hwlm_error_t rv;
|
||||
if (len + hlen < n->msk_len) {
|
||||
DEBUG_PRINTF("not enough bytes for a match\n");
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
if (hlen) {
|
||||
struct cb_info cbi = {cb, n->id, scratch, 0};
|
||||
DEBUG_PRINTF("nood scan of %zu bytes (%zu hlen) for %*s @ %p\n", len, hlen,
|
||||
n->msk_len, (const char *)&n->cmp, buf);
|
||||
|
||||
if (hlen && n->msk_len > 1) {
|
||||
/*
|
||||
* we have history, so build up a buffer from enough of the history
|
||||
* buffer plus what we've been given to scan. Since this is relatively
|
||||
* short, just check against msk+cmp per byte offset for matches.
|
||||
*/
|
||||
assert(hbuf);
|
||||
u8 ALIGN_DIRECTIVE temp_buf[HWLM_LITERAL_MAX_LEN * 2];
|
||||
memset(temp_buf, 0, sizeof(temp_buf));
|
||||
|
||||
size_t tl1 = MIN(n->len - 1, hlen);
|
||||
size_t tl2 = MIN(n->len - 1, len);
|
||||
size_t temp_len = tl1 + tl2;
|
||||
assert(temp_len < temp_buffer_size);
|
||||
memcpy(temp_buf, hbuf + hlen - tl1, tl1);
|
||||
memcpy(temp_buf + tl1, buf, tl2);
|
||||
assert(n->msk_len);
|
||||
size_t tl1 = MIN((size_t)n->msk_len - 1, hlen);
|
||||
size_t tl2 = MIN((size_t)n->msk_len - 1, len);
|
||||
|
||||
cbi.offsetAdj = -tl1;
|
||||
rv = scan(temp_buf, temp_len, n->str, n->len, n->key_offset, n->nocase,
|
||||
&cbi);
|
||||
if (rv == HWLM_TERMINATED) {
|
||||
return HWLM_TERMINATED;
|
||||
assert(tl1 + tl2 <= sizeof(temp_buf));
|
||||
assert(tl1 + tl2 >= n->msk_len);
|
||||
assert(tl1 <= sizeof(u64a));
|
||||
assert(tl2 <= sizeof(u64a));
|
||||
DEBUG_PRINTF("using %zu bytes of hist and %zu bytes of buf\n", tl1, tl2);
|
||||
|
||||
unaligned_store_u64a(temp_buf,
|
||||
partial_load_u64a(hbuf + hlen - tl1, tl1));
|
||||
unaligned_store_u64a(temp_buf + tl1, partial_load_u64a(buf, tl2));
|
||||
|
||||
for (size_t i = 0; i <= tl1 + tl2 - n->msk_len; i++) {
|
||||
u64a v = unaligned_load_u64a(temp_buf + i);
|
||||
if ((v & n->msk) == n->cmp) {
|
||||
size_t m_end = -tl1 + i + n->msk_len - 1;
|
||||
DEBUG_PRINTF("match @ %zu (i %zu)\n", m_end, i);
|
||||
hwlmcb_rv_t rv = cb(m_end, n->id, scratch);
|
||||
if (rv == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(buf);
|
||||
|
||||
cbi.offsetAdj = 0;
|
||||
return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi);
|
||||
return scan(n, buf, len, 0, n->single, n->nocase, &cbi);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -41,16 +41,17 @@ extern "C"
|
||||
#endif
|
||||
|
||||
struct noodTable;
|
||||
struct hs_scratch;
|
||||
|
||||
/** \brief Block-mode scanner. */
|
||||
hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
|
||||
size_t offset_adj, HWLMCallback cb, void *ctxt);
|
||||
size_t start, HWLMCallback cb,
|
||||
struct hs_scratch *scratch);
|
||||
|
||||
/** \brief Streaming-mode scanner. */
|
||||
hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
|
||||
size_t hlen, const u8 *buf, size_t len,
|
||||
HWLMCallback cb, void *ctxt, u8 *temp_buf,
|
||||
size_t temp_buffer_size);
|
||||
HWLMCallback cb, struct hs_scratch *scratch);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
@ -38,10 +38,11 @@ static really_inline m256 getCaseMask(void) {
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset,
|
||||
const u8 *key, bool noCase, m256 caseMask,
|
||||
m256 mask1, const struct cb_info *cbi,
|
||||
size_t start, size_t end) {
|
||||
hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, size_t offset, bool noCase,
|
||||
m256 caseMask, m256 mask1,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + offset;
|
||||
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
|
||||
const size_t l = end - start;
|
||||
@ -66,11 +67,11 @@ hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
|
||||
const u8 *key, size_t keyLen, size_t keyOffset,
|
||||
bool noCase, m256 caseMask, m256 mask1,
|
||||
m256 mask2, const struct cb_info *cbi,
|
||||
size_t start, size_t end) {
|
||||
hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, size_t offset, bool noCase,
|
||||
m256 caseMask, m256 mask1, m256 mask2,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + offset;
|
||||
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
|
||||
size_t l = end - start;
|
||||
@ -100,8 +101,8 @@ hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
|
||||
// alignment boundary if needed and to finish off data that the aligned scan
|
||||
// function can't handle (due to small/unaligned chunk at end)
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
bool noCase, m256 caseMask, m256 mask1,
|
||||
hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, bool noCase, m256 caseMask, m256 mask1,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start;
|
||||
@ -140,11 +141,10 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset, bool noCase,
|
||||
m256 caseMask, m256 mask1, m256 mask2,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, bool noCase, m256 caseMask, m256 mask1,
|
||||
m256 mask2, const struct cb_info *cbi,
|
||||
size_t start, size_t end) {
|
||||
const u8 *d = buf + start;
|
||||
size_t l = end - start;
|
||||
if (!l) {
|
||||
@ -182,8 +182,8 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key,
|
||||
bool noCase, m256 caseMask, m256 mask1,
|
||||
hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, bool noCase, m256 caseMask, m256 mask1,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start, *e = buf + end;
|
||||
@ -203,10 +203,9 @@ hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset, bool noCase,
|
||||
m256 caseMask, m256 mask1, m256 mask2,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, bool noCase, m256 caseMask, m256 mask1,
|
||||
m256 mask2, const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start, *e = buf + end;
|
||||
DEBUG_PRINTF("start %zu end %zu \n", start, end);
|
||||
|
@ -43,8 +43,8 @@ m512 getCaseMask(void) {
|
||||
// alignment boundary if needed and to finish off data that the aligned scan
|
||||
// function can't handle (due to small/unaligned chunk at end)
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
bool noCase, m512 caseMask, m512 mask1,
|
||||
hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, bool noCase, m512 caseMask, m512 mask1,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start;
|
||||
@ -73,11 +73,12 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingle512(const u8 *buf, size_t len, const u8 *key,
|
||||
hwlm_error_t scanSingle512(const struct noodTable *n, const u8 *buf, size_t len,
|
||||
bool noCase, m512 caseMask, m512 mask1,
|
||||
const struct cb_info *cbi) {
|
||||
const u8 *d = buf;
|
||||
const u8 *e = buf + len;
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start;
|
||||
const u8 *e = buf + end;
|
||||
DEBUG_PRINTF("start %p end %p \n", d, e);
|
||||
assert(d < e);
|
||||
if (d + 64 >= e) {
|
||||
@ -86,8 +87,8 @@ hwlm_error_t scanSingle512(const u8 *buf, size_t len, const u8 *key,
|
||||
|
||||
// peel off first part to cacheline boundary
|
||||
const u8 *d1 = ROUNDUP_PTR(d, 64);
|
||||
if (scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0,
|
||||
d1 - d) == HWLM_TERMINATED) {
|
||||
if (scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, start,
|
||||
d1 - buf) == HWLM_TERMINATED) {
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
d = d1;
|
||||
@ -106,16 +107,15 @@ tail:
|
||||
DEBUG_PRINTF("d %p e %p \n", d, e);
|
||||
// finish off tail
|
||||
|
||||
return scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, d - buf,
|
||||
return scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, d - buf,
|
||||
e - buf);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset, bool noCase,
|
||||
m512 caseMask, m512 mask1, m512 mask2,
|
||||
const struct cb_info *cbi, u64a *lastz0,
|
||||
size_t start, size_t end) {
|
||||
hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, bool noCase, m512 caseMask, m512 mask1,
|
||||
m512 mask2, const struct cb_info *cbi,
|
||||
u64a *lastz0, size_t start, size_t end) {
|
||||
DEBUG_PRINTF("start %zu end %zu last 0x%016llx\n", start, end, *lastz0);
|
||||
const u8 *d = buf + start;
|
||||
ptrdiff_t scan_len = end - start;
|
||||
@ -142,9 +142,8 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDouble512(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset, bool noCase,
|
||||
m512 caseMask, m512 mask1, m512 mask2,
|
||||
hwlm_error_t scanDouble512(const struct noodTable *n, const u8 *buf, size_t len,
|
||||
bool noCase, m512 caseMask, m512 mask1, m512 mask2,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start;
|
||||
@ -158,9 +157,8 @@ hwlm_error_t scanDouble512(const u8 *buf, size_t len, const u8 *key,
|
||||
|
||||
// peel off first part to cacheline boundary
|
||||
const u8 *d1 = ROUNDUP_PTR(d, 64);
|
||||
if (scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
||||
mask1, mask2, cbi, &lastz0, start,
|
||||
d1 - buf) == HWLM_TERMINATED) {
|
||||
if (scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
|
||||
&lastz0, start, d1 - buf) == HWLM_TERMINATED) {
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
d = d1;
|
||||
@ -188,6 +186,6 @@ tail:
|
||||
DEBUG_PRINTF("d %p e %p off %zu \n", d, e, d - buf);
|
||||
// finish off tail
|
||||
|
||||
return scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
||||
mask1, mask2, cbi, &lastz0, d - buf, end);
|
||||
return scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
|
||||
&lastz0, d - buf, end);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -38,8 +38,8 @@ static really_inline m128 getCaseMask(void) {
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
bool noCase, m128 caseMask, m128 mask1,
|
||||
hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, bool noCase, m128 caseMask, m128 mask1,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start;
|
||||
@ -67,10 +67,11 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset,
|
||||
const u8 *key, bool noCase, m128 caseMask,
|
||||
m128 mask1, const struct cb_info *cbi,
|
||||
size_t start, size_t end) {
|
||||
hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, size_t offset, bool noCase,
|
||||
m128 caseMask, m128 mask1,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + offset;
|
||||
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
|
||||
const size_t l = end - start;
|
||||
@ -96,11 +97,10 @@ hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset, bool noCase,
|
||||
m128 caseMask, m128 mask1, m128 mask2,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, bool noCase, m128 caseMask, m128 mask1,
|
||||
m128 mask2, const struct cb_info *cbi,
|
||||
size_t start, size_t end) {
|
||||
const u8 *d = buf + start;
|
||||
size_t l = end - start;
|
||||
if (!l) {
|
||||
@ -128,11 +128,11 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
|
||||
const u8 *key, size_t keyLen, size_t keyOffset,
|
||||
bool noCase, m128 caseMask, m128 mask1,
|
||||
m128 mask2, const struct cb_info *cbi,
|
||||
size_t start, size_t end) {
|
||||
hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, size_t offset, bool noCase,
|
||||
m128 caseMask, m128 mask1, m128 mask2,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + offset;
|
||||
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
|
||||
size_t l = end - start;
|
||||
@ -158,8 +158,8 @@ hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key,
|
||||
bool noCase, m128 caseMask, m128 mask1,
|
||||
hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, bool noCase, m128 caseMask, m128 mask1,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start, *e = buf + end;
|
||||
@ -179,10 +179,9 @@ hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset, bool noCase,
|
||||
m128 caseMask, m128 mask1, m128 mask2,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf,
|
||||
size_t len, bool noCase, m128 caseMask, m128 mask1,
|
||||
m128 mask2, const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start, *e = buf + end;
|
||||
assert(d < e);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -30,18 +30,22 @@
|
||||
* \brief Data structures for Noodle literal matcher engine.
|
||||
*/
|
||||
|
||||
#ifndef NOODLE_INTERNAL_H_25D751C42E34A6
|
||||
#define NOODLE_INTERNAL_H_25D751C42E34A6
|
||||
#ifndef NOODLE_INTERNAL_H
|
||||
#define NOODLE_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
struct noodTable {
|
||||
u32 id;
|
||||
u32 len;
|
||||
u32 key_offset;
|
||||
u8 nocase;
|
||||
u8 str[];
|
||||
u64a msk;
|
||||
u64a cmp;
|
||||
u8 msk_len;
|
||||
u8 key_offset;
|
||||
u8 nocase;
|
||||
u8 single;
|
||||
u8 key0;
|
||||
u8 key1;
|
||||
};
|
||||
|
||||
#endif /* NOODLE_INTERNAL_H_25D751C42E34A6 */
|
||||
#endif /* NOODLE_INTERNAL_H */
|
||||
|
||||
|
@ -41,6 +41,8 @@
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#define PATHS_LIMIT 500
|
||||
@ -65,6 +67,17 @@ void dump_paths(const Container &paths) {
|
||||
DEBUG_PRINTF("%zu paths\n", paths.size());
|
||||
}
|
||||
|
||||
static
|
||||
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
|
||||
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
|
||||
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
rv.at(rdfa.alpha_remap[i]).set(i);
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
bool is_useful_path(const vector<path> &good, const path &p) {
|
||||
for (const auto &g : good) {
|
||||
@ -98,9 +111,10 @@ path append(const path &orig, const CharReach &cr, u32 new_dest) {
|
||||
}
|
||||
|
||||
static
|
||||
void extend(const raw_dfa &rdfa, const path &p,
|
||||
map<u32, vector<path>> &all, vector<path> &out) {
|
||||
dstate s = rdfa.states[p.dest];
|
||||
void extend(const raw_dfa &rdfa, const vector<CharReach> &rev_map,
|
||||
const path &p, unordered_map<u32, vector<path>> &all,
|
||||
vector<path> &out) {
|
||||
const dstate &s = rdfa.states[p.dest];
|
||||
|
||||
if (!p.reach.empty() && p.reach.back().none()) {
|
||||
out.push_back(p);
|
||||
@ -125,9 +139,9 @@ void extend(const raw_dfa &rdfa, const path &p,
|
||||
}
|
||||
|
||||
flat_map<u32, CharReach> dest;
|
||||
for (unsigned i = 0; i < N_CHARS; i++) {
|
||||
u32 succ = s.next[rdfa.alpha_remap[i]];
|
||||
dest[succ].set(i);
|
||||
for (u32 i = 0; i < rev_map.size(); i++) {
|
||||
u32 succ = s.next[i];
|
||||
dest[succ] |= rev_map[i];
|
||||
}
|
||||
|
||||
for (const auto &e : dest) {
|
||||
@ -148,13 +162,14 @@ void extend(const raw_dfa &rdfa, const path &p,
|
||||
static
|
||||
vector<vector<CharReach>> generate_paths(const raw_dfa &rdfa,
|
||||
dstate_id_t base, u32 len) {
|
||||
const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa);
|
||||
vector<path> paths{path(base)};
|
||||
map<u32, vector<path>> all;
|
||||
unordered_map<u32, vector<path>> all;
|
||||
all[base].push_back(path(base));
|
||||
for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) {
|
||||
vector<path> next_gen;
|
||||
for (const auto &p : paths) {
|
||||
extend(rdfa, p, all, next_gen);
|
||||
extend(rdfa, rev_map, p, all, next_gen);
|
||||
}
|
||||
|
||||
paths = move(next_gen);
|
||||
@ -195,17 +210,6 @@ bool better(const AccelScheme &a, const AccelScheme &b) {
|
||||
return a.cr.count() < b.cr.count();
|
||||
}
|
||||
|
||||
static
|
||||
vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
|
||||
vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
|
||||
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
rv.at(rdfa.alpha_remap[i]).set(i);
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
bool double_byte_ok(const AccelScheme &info) {
|
||||
return !info.double_byte.empty() &&
|
||||
@ -225,16 +229,16 @@ bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
|
||||
}
|
||||
|
||||
static
|
||||
vector<u16> find_nonexit_symbols(const raw_dfa &rdfa,
|
||||
const CharReach &escape) {
|
||||
set<u16> rv;
|
||||
flat_set<u16> find_nonexit_symbols(const raw_dfa &rdfa,
|
||||
const CharReach &escape) {
|
||||
flat_set<u16> rv;
|
||||
CharReach nonexit = ~escape;
|
||||
for (auto i = nonexit.find_first(); i != CharReach::npos;
|
||||
for (auto i = nonexit.find_first(); i != nonexit.npos;
|
||||
i = nonexit.find_next(i)) {
|
||||
rv.insert(rdfa.alpha_remap[i]);
|
||||
}
|
||||
|
||||
return vector<u16>(rv.begin(), rv.end());
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
@ -254,7 +258,7 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
|
||||
|
||||
u16 top_remap = raw.alpha_remap[TOP];
|
||||
|
||||
ue2::unordered_set<dstate_id_t> seen;
|
||||
std::unordered_set<dstate_id_t> seen;
|
||||
while (true) {
|
||||
seen.insert(s);
|
||||
DEBUG_PRINTF("basis %hu\n", s);
|
||||
@ -288,7 +292,7 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
|
||||
|
||||
static
|
||||
set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
|
||||
const AccelScheme &ei) {
|
||||
const AccelScheme &ei) {
|
||||
DEBUG_PRINTF("looking for region around %hu\n", base);
|
||||
|
||||
set<dstate_id_t> region = {base};
|
||||
|
@ -44,6 +44,8 @@
|
||||
#include "util/simd_types.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
|
@ -31,7 +31,7 @@
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/flat_containers.h"
|
||||
|
||||
union AccelAux;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -71,7 +71,7 @@ void dumpTextSubCastle(const SubCastle &sub, FILE *f) {
|
||||
void nfaExecCastle_dump(const struct NFA *nfa, const string &base) {
|
||||
const Castle *c = (const Castle *)getImplNfa(nfa);
|
||||
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
StdioFile f(base + ".txt", "w");
|
||||
|
||||
fprintf(f, "Castle multi-tenant repeat engine\n");
|
||||
fprintf(f, "\n");
|
||||
@ -117,7 +117,6 @@ void nfaExecCastle_dump(const struct NFA *nfa, const string &base) {
|
||||
fprintf(f, "Sub %u:\n", i);
|
||||
dumpTextSubCastle(sub[i], f);
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -48,11 +48,11 @@
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/multibit_build.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/verify_types.h"
|
||||
#include "grey.h"
|
||||
|
||||
@ -153,13 +153,11 @@ static
|
||||
void getNeighborInfo(const CliqueGraph &g, vector<u32> &neighbor,
|
||||
const CliqueVertex &cv, const set<u32> &group) {
|
||||
u32 id = g[cv].stateId;
|
||||
ue2::unordered_set<u32> neighborId;
|
||||
|
||||
// find neighbors for cv
|
||||
for (const auto &v : adjacent_vertices_range(cv, g)) {
|
||||
if (g[v].stateId != id && contains(group, g[v].stateId)){
|
||||
if (g[v].stateId != id && contains(group, g[v].stateId)) {
|
||||
neighbor.push_back(g[v].stateId);
|
||||
neighborId.insert(g[v].stateId);
|
||||
DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId);
|
||||
}
|
||||
}
|
||||
@ -772,7 +770,7 @@ bool mergeCastle(CastleProto &c1, const CastleProto &c2,
|
||||
const u32 top = m.first;
|
||||
const PureRepeat &pr = m.second;
|
||||
DEBUG_PRINTF("top %u\n", top);
|
||||
u32 new_top = c1.add(pr);
|
||||
u32 new_top = c1.merge(pr);
|
||||
top_map[top] = new_top;
|
||||
DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top);
|
||||
}
|
||||
@ -883,7 +881,7 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2) {
|
||||
}
|
||||
|
||||
bool requiresDedupe(const CastleProto &proto,
|
||||
const ue2::flat_set<ReportID> &reports) {
|
||||
const flat_set<ReportID> &reports) {
|
||||
for (const auto &report : reports) {
|
||||
auto it = proto.report_map.find(report);
|
||||
if (it == end(proto.report_map)) {
|
||||
|
@ -39,11 +39,12 @@
|
||||
#include "nfagraph/ng_repeat.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/flat_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
struct NFA;
|
||||
@ -89,7 +90,7 @@ struct CastleProto {
|
||||
std::map<u32, PureRepeat> repeats;
|
||||
|
||||
/** \brief Mapping from report to associated tops. */
|
||||
ue2::unordered_map<ReportID, flat_set<u32>> report_map;
|
||||
std::unordered_map<ReportID, flat_set<u32>> report_map;
|
||||
|
||||
/**
|
||||
* \brief Next top id to use. Repeats may be removed without top remapping,
|
||||
@ -127,7 +128,9 @@ buildCastle(const CastleProto &proto,
|
||||
const CompileContext &cc, const ReportManager &rm);
|
||||
|
||||
/**
|
||||
* \brief Merge two CastleProto prototypes together, if possible.
|
||||
* \brief Merge two CastleProto prototypes together, if possible. If a
|
||||
* particular repeat from c2 is already in c1, then it will be reused rather
|
||||
* than adding a duplicate repeat.
|
||||
*
|
||||
* Returns true if merge of all repeats in c2 into c1 succeeds, and fills
|
||||
* mapping with the repeat indices.
|
||||
@ -155,7 +158,7 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2);
|
||||
* of the reports in the given set.
|
||||
*/
|
||||
bool requiresDedupe(const CastleProto &proto,
|
||||
const ue2::flat_set<ReportID> &reports);
|
||||
const flat_set<ReportID> &reports);
|
||||
|
||||
/**
|
||||
* \brief Build an NGHolder from a CastleProto.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -30,11 +30,9 @@
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
// prevent weak vtables for raw_report_info, dfa_build_strat and raw_dfa
|
||||
// prevent weak vtables for raw_report_info, dfa_build_strat
|
||||
raw_report_info::~raw_report_info() {}
|
||||
|
||||
dfa_build_strat::~dfa_build_strat() {}
|
||||
|
||||
raw_dfa::~raw_dfa() {}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -59,12 +59,13 @@
|
||||
#include "dfa_min.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "mcclellancompile_util.h"
|
||||
#include "rdfa.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/container.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/noncopyable.h"
|
||||
#include "util/partitioned_set.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
@ -299,6 +300,10 @@ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_dead(rdfa)) {
|
||||
DEBUG_PRINTF("dfa is empty\n");
|
||||
}
|
||||
|
||||
UNUSED const size_t states_before = rdfa.states.size();
|
||||
|
||||
HopcroftInfo info(rdfa);
|
||||
|
@ -37,11 +37,11 @@
|
||||
#include "nfa_internal.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include "ue2common.h"
|
||||
|
@ -33,7 +33,7 @@
|
||||
#include "nfa_kind.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/order_check.h"
|
||||
|
||||
#include <map>
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -32,8 +32,10 @@
|
||||
#include "goughcompile_internal.h"
|
||||
#include "grey.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_util.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
@ -66,10 +68,7 @@ string dump_name(const gough_edge_id &e) {
|
||||
|
||||
static
|
||||
void dump_graph(const GoughGraph &g, const string &base, const Grey &grey) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "gough_" << base << ".dot";
|
||||
|
||||
FILE *f = fopen(ss.str().c_str(), "w");
|
||||
StdioFile f(grey.dumpPath + "gough_" + base + ".dot", "w");
|
||||
|
||||
fprintf(f, "digraph NFA {\n");
|
||||
fprintf(f, "rankdir=LR;\n");
|
||||
@ -94,8 +93,6 @@ void dump_graph(const GoughGraph &g, const string &base, const Grey &grey) {
|
||||
dump_name(g[s]).c_str(), dump_name(g[t]).c_str());
|
||||
}
|
||||
fprintf(f, "}\n");
|
||||
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
static
|
||||
@ -133,9 +130,7 @@ set<const GoughSSAVar *> uses(const GoughEdgeProps &ep) {
|
||||
static
|
||||
void dump_var_mapping(const GoughGraph &g, const string &base,
|
||||
const Grey &grey) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "gough_" << base << "_vars.txt";
|
||||
FILE *f = fopen(ss.str().c_str(), "w");
|
||||
StdioFile f(grey.dumpPath + "gough_" + base + "_vars.txt", "w");
|
||||
for (auto v : vertices_range(g)) {
|
||||
set<const GoughSSAVar *> used = uses(g[v]);
|
||||
if (g[v].vars.empty() && used.empty()) {
|
||||
@ -180,7 +175,6 @@ void dump_var_mapping(const GoughGraph &g, const string &base,
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
static
|
||||
@ -220,12 +214,7 @@ void gather_vars(const GoughGraph &g, vector<const GoughSSAVar *> *vars,
|
||||
|
||||
static
|
||||
void dump_vars(const GoughGraph &g, const string &base, const Grey &grey) {
|
||||
FILE *f;
|
||||
{
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "gough_" << base << "_vars.dot";
|
||||
f = fopen(ss.str().c_str(), "w");
|
||||
}
|
||||
StdioFile f(grey.dumpPath + "gough_" + base + "_vars.dot", "w");
|
||||
fprintf(f, "digraph NFA {\n");
|
||||
fprintf(f, "rankdir=LR;\n");
|
||||
fprintf(f, "size=\"11.5,8\"\n");
|
||||
@ -271,7 +260,6 @@ void dump_vars(const GoughGraph &g, const string &base, const Grey &grey) {
|
||||
}
|
||||
|
||||
fprintf(f, "}\n");
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
void dump(const GoughGraph &g, const string &base, const Grey &grey) {
|
||||
@ -317,18 +305,11 @@ void dump_blocks(const map<gough_edge_id, vector<gough_ins>> &blocks,
|
||||
return;
|
||||
}
|
||||
|
||||
FILE *f;
|
||||
{
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "gough_" << base << "_programs.txt";
|
||||
f = fopen(ss.str().c_str(), "w");
|
||||
}
|
||||
StdioFile f(grey.dumpPath + "gough_" + base + "_programs.txt", "w");
|
||||
|
||||
for (const auto &m : blocks) {
|
||||
dump_block(f, m.first, m.second);
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -33,9 +33,9 @@
|
||||
#include "mcclellancompile.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/noncopyable.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
@ -106,10 +106,10 @@ struct GoughSSAVarJoin;
|
||||
struct GoughSSAVar : noncopyable {
|
||||
GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {}
|
||||
virtual ~GoughSSAVar();
|
||||
const ue2::flat_set<GoughSSAVar *> &get_inputs() const {
|
||||
const flat_set<GoughSSAVar *> &get_inputs() const {
|
||||
return inputs;
|
||||
}
|
||||
const ue2::flat_set<GoughSSAVarWithInputs *> &get_outputs() const {
|
||||
const flat_set<GoughSSAVarWithInputs *> &get_outputs() const {
|
||||
return outputs;
|
||||
}
|
||||
virtual void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) = 0;
|
||||
@ -127,8 +127,8 @@ struct GoughSSAVar : noncopyable {
|
||||
clear_outputs();
|
||||
}
|
||||
protected:
|
||||
ue2::flat_set<GoughSSAVar *> inputs;
|
||||
ue2::flat_set<GoughSSAVarWithInputs *> outputs;
|
||||
flat_set<GoughSSAVar *> inputs;
|
||||
flat_set<GoughSSAVarWithInputs *> outputs;
|
||||
friend struct GoughSSAVarWithInputs;
|
||||
friend struct GoughSSAVarMin;
|
||||
friend struct GoughSSAVarJoin;
|
||||
@ -184,16 +184,14 @@ struct GoughSSAVarJoin : public GoughSSAVarWithInputs {
|
||||
|
||||
void add_input(GoughSSAVar *v, GoughEdge prev);
|
||||
|
||||
const ue2::flat_set<GoughEdge> &get_edges_for_input(GoughSSAVar *input)
|
||||
const;
|
||||
const std::map<GoughSSAVar *, ue2::flat_set<GoughEdge> > &get_input_map()
|
||||
const;
|
||||
const flat_set<GoughEdge> &get_edges_for_input(GoughSSAVar *input) const;
|
||||
const std::map<GoughSSAVar *, flat_set<GoughEdge>> &get_input_map() const;
|
||||
|
||||
protected:
|
||||
void remove_input_raw(GoughSSAVar *v) override;
|
||||
|
||||
private:
|
||||
std::map<GoughSSAVar *, ue2::flat_set<GoughEdge>> input_map;
|
||||
std::map<GoughSSAVar *, flat_set<GoughEdge>> input_map;
|
||||
};
|
||||
|
||||
struct gough_accel_state_info {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -32,10 +32,10 @@
|
||||
#include "gough_internal.h"
|
||||
#include "grey.h"
|
||||
#include "util/container.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
@ -235,7 +235,7 @@ void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g,
|
||||
if (contains(aux.containing_v, def)) {
|
||||
def_v = aux.containing_v.at(def);
|
||||
}
|
||||
ue2::unordered_set<GoughVertex> done;
|
||||
unordered_set<GoughVertex> done;
|
||||
while (!pending_vertex.empty()) {
|
||||
GoughVertex current = *pending_vertex.begin();
|
||||
pending_vertex.erase(current);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -353,22 +353,14 @@ void nfaExecGough16_dumpText(const struct NFA *nfa, FILE *f) {
|
||||
|
||||
void nfaExecGough16_dump(const NFA *nfa, const string &base) {
|
||||
assert(nfa->type == GOUGH_NFA_16);
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
nfaExecGough16_dumpText(nfa, f);
|
||||
fclose(f);
|
||||
f = fopen_or_throw((base + ".dot").c_str(), "w");
|
||||
nfaExecGough16_dumpDot(nfa, f);
|
||||
fclose(f);
|
||||
nfaExecGough16_dumpText(nfa, StdioFile(base + ".txt", "w"));
|
||||
nfaExecGough16_dumpDot(nfa, StdioFile(base + ".dot", "w"));
|
||||
}
|
||||
|
||||
void nfaExecGough8_dump(const NFA *nfa, const string &base) {
|
||||
assert(nfa->type == GOUGH_NFA_8);
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
nfaExecGough8_dumpText(nfa, f);
|
||||
fclose(f);
|
||||
f = fopen_or_throw((base + ".dot").c_str(), "w");
|
||||
nfaExecGough8_dumpDot(nfa, f);
|
||||
fclose(f);
|
||||
nfaExecGough8_dumpText(nfa, StdioFile(base + ".txt", "w"));
|
||||
nfaExecGough8_dumpDot(nfa, StdioFile(base + ".dot", "w"));
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -71,47 +71,40 @@ void nfaExecLbrDot_dump(const NFA *nfa, const string &base) {
|
||||
assert(nfa);
|
||||
assert(nfa->type == LBR_NFA_DOT);
|
||||
const lbr_dot *ld = (const lbr_dot *)getImplNfa(nfa);
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
StdioFile f(base + ".txt", "w");
|
||||
lbrDumpCommon(&ld->common, f);
|
||||
fprintf(f, "DOT model\n");
|
||||
fprintf(f, "\n");
|
||||
dumpTextReverse(nfa, f);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
void nfaExecLbrVerm_dump(const NFA *nfa, const string &base) {
|
||||
assert(nfa);
|
||||
assert(nfa->type == LBR_NFA_VERM);
|
||||
const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa);
|
||||
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
|
||||
StdioFile f(base + ".txt", "w");
|
||||
lbrDumpCommon(&lv->common, f);
|
||||
fprintf(f, "VERM model, scanning for 0x%02x\n", lv->c);
|
||||
fprintf(f, "\n");
|
||||
dumpTextReverse(nfa, f);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
void nfaExecLbrNVerm_dump(const NFA *nfa, const string &base) {
|
||||
assert(nfa);
|
||||
assert(nfa->type == LBR_NFA_NVERM);
|
||||
const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa);
|
||||
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
|
||||
StdioFile f(base + ".txt", "w");
|
||||
lbrDumpCommon(&lv->common, f);
|
||||
fprintf(f, "NEGATED VERM model, scanning for 0x%02x\n", lv->c);
|
||||
fprintf(f, "\n");
|
||||
dumpTextReverse(nfa, f);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) {
|
||||
assert(nfa);
|
||||
assert(nfa->type == LBR_NFA_SHUF);
|
||||
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
StdioFile f(base + ".txt", "w");
|
||||
|
||||
const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa);
|
||||
lbrDumpCommon(&ls->common, f);
|
||||
@ -122,14 +115,13 @@ void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) {
|
||||
describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
|
||||
fprintf(f, "\n");
|
||||
dumpTextReverse(nfa, f);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) {
|
||||
assert(nfa);
|
||||
assert(nfa->type == LBR_NFA_TRUF);
|
||||
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
StdioFile f(base + ".txt", "w");
|
||||
|
||||
const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa);
|
||||
lbrDumpCommon(<->common, f);
|
||||
@ -140,7 +132,6 @@ void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) {
|
||||
describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count());
|
||||
fprintf(f, "\n");
|
||||
dumpTextReverse(nfa, f);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -53,11 +53,13 @@
|
||||
#include "util/charreach.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/graph_small_color_map.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/unordered.h"
|
||||
#include "util/verify_types.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
@ -96,18 +98,20 @@ struct precalcAccel {
|
||||
};
|
||||
|
||||
struct limex_accel_info {
|
||||
ue2::unordered_set<NFAVertex> accelerable;
|
||||
unordered_set<NFAVertex> accelerable;
|
||||
map<NFAStateSet, precalcAccel> precalc;
|
||||
ue2::unordered_map<NFAVertex, flat_set<NFAVertex>> friends;
|
||||
ue2::unordered_map<NFAVertex, AccelScheme> accel_map;
|
||||
unordered_map<NFAVertex, flat_set<NFAVertex>> friends;
|
||||
unordered_map<NFAVertex, AccelScheme> accel_map;
|
||||
};
|
||||
|
||||
static
|
||||
map<NFAVertex, NFAStateSet>
|
||||
reindexByStateId(const map<NFAVertex, NFAStateSet> &in, const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
unordered_map<NFAVertex, NFAStateSet>
|
||||
reindexByStateId(const unordered_map<NFAVertex, NFAStateSet> &in,
|
||||
const NGHolder &g,
|
||||
const unordered_map<NFAVertex, u32> &state_ids,
|
||||
const u32 num_states) {
|
||||
map<NFAVertex, NFAStateSet> out;
|
||||
unordered_map<NFAVertex, NFAStateSet> out;
|
||||
out.reserve(in.size());
|
||||
|
||||
vector<u32> indexToState(num_vertices(g), NO_STATE);
|
||||
for (const auto &m : state_ids) {
|
||||
@ -137,18 +141,20 @@ reindexByStateId(const map<NFAVertex, NFAStateSet> &in, const NGHolder &g,
|
||||
|
||||
struct build_info {
|
||||
build_info(NGHolder &hi,
|
||||
const ue2::unordered_map<NFAVertex, u32> &states_in,
|
||||
const unordered_map<NFAVertex, u32> &states_in,
|
||||
const vector<BoundedRepeatData> &ri,
|
||||
const map<NFAVertex, NFAStateSet> &rsmi,
|
||||
const map<NFAVertex, NFAStateSet> &smi,
|
||||
const unordered_map<NFAVertex, NFAStateSet> &rsmi,
|
||||
const unordered_map<NFAVertex, NFAStateSet> &smi,
|
||||
const map<u32, set<NFAVertex>> &ti, const set<NFAVertex> &zi,
|
||||
bool dai, bool sci, const CompileContext &cci,
|
||||
u32 nsi)
|
||||
: h(hi), state_ids(states_in), repeats(ri), tops(ti), zombies(zi),
|
||||
do_accel(dai), stateCompression(sci), cc(cci),
|
||||
bool dai, bool sci, const CompileContext &cci, u32 nsi)
|
||||
: h(hi), state_ids(states_in), repeats(ri), tops(ti), tugs(nsi),
|
||||
zombies(zi), do_accel(dai), stateCompression(sci), cc(cci),
|
||||
num_states(nsi) {
|
||||
for (const auto &br : repeats) {
|
||||
insert(&tugs, br.tug_triggers);
|
||||
for (auto v : br.tug_triggers) {
|
||||
assert(state_ids.at(v) != NO_STATE);
|
||||
tugs.set(state_ids.at(v));
|
||||
}
|
||||
br_cyclic[br.cyclic] =
|
||||
BoundedRepeatSummary(br.repeatMin, br.repeatMax);
|
||||
}
|
||||
@ -160,15 +166,15 @@ struct build_info {
|
||||
}
|
||||
|
||||
NGHolder &h;
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids;
|
||||
const unordered_map<NFAVertex, u32> &state_ids;
|
||||
const vector<BoundedRepeatData> &repeats;
|
||||
|
||||
// Squash maps; state sets are indexed by state_id.
|
||||
map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
map<NFAVertex, NFAStateSet> squashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> squashMap;
|
||||
|
||||
const map<u32, set<NFAVertex>> &tops;
|
||||
ue2::unordered_set<NFAVertex> tugs;
|
||||
NFAStateSet tugs;
|
||||
map<NFAVertex, BoundedRepeatSummary> br_cyclic;
|
||||
const set<NFAVertex> &zombies;
|
||||
bool do_accel;
|
||||
@ -238,7 +244,7 @@ bool isLimitedTransition(int from, int to, int maxshift) {
|
||||
|
||||
// Fill a bit mask
|
||||
template<class Mask>
|
||||
void maskFill(Mask &m, char c) {
|
||||
void maskFill(Mask &m, u8 c) {
|
||||
memset(&m, c, sizeof(m));
|
||||
}
|
||||
|
||||
@ -478,7 +484,7 @@ bool allow_wide_accel(const vector<NFAVertex> &vv, const NGHolder &g,
|
||||
static
|
||||
void nfaFindAccelSchemes(const NGHolder &g,
|
||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||
ue2::unordered_map<NFAVertex, AccelScheme> *out) {
|
||||
unordered_map<NFAVertex, AccelScheme> *out) {
|
||||
vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
|
||||
|
||||
NFAVertex sds_or_proxy = get_sds_or_proxy(g);
|
||||
@ -503,8 +509,8 @@ void nfaFindAccelSchemes(const NGHolder &g,
|
||||
}
|
||||
|
||||
struct fas_visitor : public boost::default_bfs_visitor {
|
||||
fas_visitor(const ue2::unordered_map<NFAVertex, AccelScheme> &am_in,
|
||||
ue2::unordered_map<NFAVertex, AccelScheme> *out_in)
|
||||
fas_visitor(const unordered_map<NFAVertex, AccelScheme> &am_in,
|
||||
unordered_map<NFAVertex, AccelScheme> *out_in)
|
||||
: accel_map(am_in), out(out_in) {}
|
||||
|
||||
void discover_vertex(NFAVertex v, const NGHolder &) {
|
||||
@ -515,13 +521,13 @@ struct fas_visitor : public boost::default_bfs_visitor {
|
||||
throw this; /* done */
|
||||
}
|
||||
}
|
||||
const ue2::unordered_map<NFAVertex, AccelScheme> &accel_map;
|
||||
ue2::unordered_map<NFAVertex, AccelScheme> *out;
|
||||
const unordered_map<NFAVertex, AccelScheme> &accel_map;
|
||||
unordered_map<NFAVertex, AccelScheme> *out;
|
||||
};
|
||||
|
||||
static
|
||||
void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops,
|
||||
ue2::unordered_map<NFAVertex, AccelScheme> *accel_map) {
|
||||
unordered_map<NFAVertex, AccelScheme> *accel_map) {
|
||||
/* We want the NFA_MAX_ACCEL_STATES best acceleration states, everything
|
||||
* else should be ditched. We use a simple BFS to choose accel states near
|
||||
* the start. */
|
||||
@ -541,14 +547,12 @@ void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops,
|
||||
tempEdges.push_back(e); // Remove edge later.
|
||||
}
|
||||
|
||||
ue2::unordered_map<NFAVertex, AccelScheme> out;
|
||||
unordered_map<NFAVertex, AccelScheme> out;
|
||||
|
||||
try {
|
||||
vector<boost::default_color_type> colour(num_vertices(g));
|
||||
boost::breadth_first_search(g, g.start,
|
||||
visitor(fas_visitor(*accel_map, &out))
|
||||
.color_map(make_iterator_property_map(colour.begin(),
|
||||
get(vertex_index, g))));
|
||||
visitor(fas_visitor(*accel_map, &out))
|
||||
.color_map(make_small_color_map(g)));
|
||||
} catch (fas_visitor *) {
|
||||
; /* found max accel_states */
|
||||
}
|
||||
@ -983,16 +987,18 @@ u32 addSquashMask(const build_info &args, const NFAVertex &v,
|
||||
return idx;
|
||||
}
|
||||
|
||||
using ReportListCache = ue2_unordered_map<vector<ReportID>, u32>;
|
||||
|
||||
static
|
||||
u32 addReports(const flat_set<ReportID> &r, vector<ReportID> &reports,
|
||||
unordered_map<vector<ReportID>, u32> &reportListCache) {
|
||||
ReportListCache &reports_cache) {
|
||||
assert(!r.empty());
|
||||
|
||||
vector<ReportID> my_reports(begin(r), end(r));
|
||||
my_reports.push_back(MO_INVALID_IDX); // sentinel
|
||||
|
||||
auto cache_it = reportListCache.find(my_reports);
|
||||
if (cache_it != end(reportListCache)) {
|
||||
auto cache_it = reports_cache.find(my_reports);
|
||||
if (cache_it != end(reports_cache)) {
|
||||
u32 offset = cache_it->second;
|
||||
DEBUG_PRINTF("reusing cached report list at %u\n", offset);
|
||||
return offset;
|
||||
@ -1008,13 +1014,12 @@ u32 addReports(const flat_set<ReportID> &r, vector<ReportID> &reports,
|
||||
|
||||
u32 offset = verify_u32(reports.size());
|
||||
insert(&reports, reports.end(), my_reports);
|
||||
reportListCache.emplace(move(my_reports), offset);
|
||||
reports_cache.emplace(move(my_reports), offset);
|
||||
return offset;
|
||||
}
|
||||
|
||||
static
|
||||
void buildAcceptsList(const build_info &args,
|
||||
unordered_map<vector<ReportID>, u32> &reports_cache,
|
||||
void buildAcceptsList(const build_info &args, ReportListCache &reports_cache,
|
||||
vector<NFAVertex> &verts, vector<NFAAccept> &accepts,
|
||||
vector<ReportID> &reports, vector<NFAStateSet> &squash) {
|
||||
if (verts.empty()) {
|
||||
@ -1052,8 +1057,7 @@ void buildAcceptsList(const build_info &args,
|
||||
}
|
||||
|
||||
static
|
||||
void buildAccepts(const build_info &args,
|
||||
unordered_map<vector<ReportID>, u32> &reports_cache,
|
||||
void buildAccepts(const build_info &args, ReportListCache &reports_cache,
|
||||
NFAStateSet &acceptMask, NFAStateSet &acceptEodMask,
|
||||
vector<NFAAccept> &accepts, vector<NFAAccept> &acceptsEod,
|
||||
vector<ReportID> &reports, vector<NFAStateSet> &squash) {
|
||||
@ -1120,7 +1124,7 @@ u32 uncompressedStateSize(u32 num_states) {
|
||||
|
||||
static
|
||||
u32 compressedStateSize(const NGHolder &h, const NFAStateSet &maskedStates,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids) {
|
||||
const unordered_map<NFAVertex, u32> &state_ids) {
|
||||
// Shrink state requirement to enough to fit the compressed largest reach.
|
||||
vector<u32> allreach(N_CHARS, 0);
|
||||
|
||||
@ -1191,7 +1195,7 @@ bool hasSquashableInitDs(const build_info &args) {
|
||||
|
||||
static
|
||||
bool hasInitDsStates(const NGHolder &h,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids) {
|
||||
const unordered_map<NFAVertex, u32> &state_ids) {
|
||||
if (state_ids.at(h.startDs) != NO_STATE) {
|
||||
return true;
|
||||
}
|
||||
@ -1359,17 +1363,16 @@ struct ExceptionProto {
|
||||
};
|
||||
|
||||
static
|
||||
u32 buildExceptionMap(const build_info &args,
|
||||
unordered_map<vector<ReportID>, u32> &reports_cache,
|
||||
const ue2::unordered_set<NFAEdge> &exceptional,
|
||||
u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache,
|
||||
const unordered_set<NFAEdge> &exceptional,
|
||||
map<ExceptionProto, vector<u32>> &exceptionMap,
|
||||
vector<ReportID> &reportList) {
|
||||
const NGHolder &h = args.h;
|
||||
const u32 num_states = args.num_states;
|
||||
u32 exceptionCount = 0;
|
||||
|
||||
ue2::unordered_map<NFAVertex, u32> pos_trigger;
|
||||
ue2::unordered_map<NFAVertex, u32> tug_trigger;
|
||||
unordered_map<NFAVertex, u32> pos_trigger;
|
||||
unordered_map<NFAVertex, u32> tug_trigger;
|
||||
|
||||
for (u32 i = 0; i < args.repeats.size(); i++) {
|
||||
const BoundedRepeatData &br = args.repeats[i];
|
||||
@ -1518,18 +1521,14 @@ u32 depth_to_u32(const depth &d) {
|
||||
}
|
||||
|
||||
static
|
||||
bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e,
|
||||
const build_info &args, u32 maxShift) {
|
||||
NFAVertex from = source(e, h);
|
||||
NFAVertex to = target(e, h);
|
||||
u32 f = args.state_ids.at(from);
|
||||
u32 t = args.state_ids.at(to);
|
||||
if (!isLimitedTransition(f, t, maxShift)) {
|
||||
bool isExceptionalTransition(u32 from, u32 to, const build_info &args,
|
||||
u32 maxShift) {
|
||||
if (!isLimitedTransition(from, to, maxShift)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// All transitions out of a tug trigger are exceptional.
|
||||
if (contains(args.tugs, from)) {
|
||||
if (args.tugs.test(from)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -1545,7 +1544,7 @@ u32 findMaxVarShift(const build_info &args, u32 nShifts) {
|
||||
if (from == NO_STATE || to == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
if (!isExceptionalTransition(h, e, args, MAX_SHIFT_AMOUNT)) {
|
||||
if (!isExceptionalTransition(from, to, args, MAX_SHIFT_AMOUNT)) {
|
||||
shiftMask |= (1UL << (to - from));
|
||||
}
|
||||
}
|
||||
@ -1574,7 +1573,7 @@ int getLimexScore(const build_info &args, u32 nShifts) {
|
||||
if (from == NO_STATE || to == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
if (isExceptionalTransition(h, e, args, maxVarShift)) {
|
||||
if (isExceptionalTransition(from, to, args, maxVarShift)) {
|
||||
exceptionalStates.set(from);
|
||||
}
|
||||
}
|
||||
@ -1615,9 +1614,7 @@ bool cannotDie(const build_info &args, const set<NFAVertex> &tops) {
|
||||
// top, looking for a cyclic path consisting of vertices of dot reach. If
|
||||
// one exists, than the NFA cannot die after this top is triggered.
|
||||
|
||||
vector<boost::default_color_type> colours(num_vertices(h));
|
||||
auto colour_map = boost::make_iterator_property_map(colours.begin(),
|
||||
get(vertex_index, h));
|
||||
auto colour_map = make_small_color_map(h);
|
||||
|
||||
struct CycleFound {};
|
||||
struct CannotDieVisitor : public boost::default_dfs_visitor {
|
||||
@ -1848,10 +1845,9 @@ struct Factory {
|
||||
maskSetBit(limex->repeatCyclicMask, cyclic);
|
||||
}
|
||||
/* also include tugs in repeat cyclic mask */
|
||||
for (NFAVertex v : args.tugs) {
|
||||
u32 v_state = args.state_ids.at(v);
|
||||
assert(v_state != NO_STATE);
|
||||
maskSetBit(limex->repeatCyclicMask, v_state);
|
||||
for (size_t i = args.tugs.find_first(); i != args.tugs.npos;
|
||||
i = args.tugs.find_next(i)) {
|
||||
maskSetBit(limex->repeatCyclicMask, i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1872,7 +1868,7 @@ struct Factory {
|
||||
// We check for exceptional transitions here, as we don't want tug
|
||||
// trigger transitions emitted as limited transitions (even if they
|
||||
// could be in this model).
|
||||
if (!isExceptionalTransition(h, e, args, maxShift)) {
|
||||
if (!isExceptionalTransition(from, to, args, maxShift)) {
|
||||
u32 shift = to - from;
|
||||
if ((shiftMask & (1UL << shift)) == 0UL) {
|
||||
shiftMask |= (1UL << shift);
|
||||
@ -1896,7 +1892,7 @@ struct Factory {
|
||||
|
||||
static
|
||||
void findExceptionalTransitions(const build_info &args,
|
||||
ue2::unordered_set<NFAEdge> &exceptional,
|
||||
unordered_set<NFAEdge> &exceptional,
|
||||
u32 maxShift) {
|
||||
const NGHolder &h = args.h;
|
||||
|
||||
@ -1907,7 +1903,7 @@ struct Factory {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isExceptionalTransition(h, e, args, maxShift)) {
|
||||
if (isExceptionalTransition(from, to, args, maxShift)) {
|
||||
exceptional.insert(e);
|
||||
}
|
||||
}
|
||||
@ -2171,9 +2167,9 @@ struct Factory {
|
||||
|
||||
// We track report lists that have already been written into the global
|
||||
// list in case we can reuse them.
|
||||
unordered_map<vector<ReportID>, u32> reports_cache;
|
||||
ReportListCache reports_cache;
|
||||
|
||||
ue2::unordered_set<NFAEdge> exceptional;
|
||||
unordered_set<NFAEdge> exceptional;
|
||||
u32 shiftCount = findBestNumOfVarShifts(args);
|
||||
assert(shiftCount);
|
||||
u32 maxShift = findMaxVarShift(args, shiftCount);
|
||||
@ -2377,10 +2373,10 @@ MAKE_LIMEX_TRAITS(512)
|
||||
// Some sanity tests, called by an assertion in generate().
|
||||
static UNUSED
|
||||
bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
const unordered_map<NFAVertex, u32> &state_ids,
|
||||
u32 num_states) {
|
||||
ue2::unordered_set<u32> seen;
|
||||
ue2::unordered_set<NFAVertex> top_starts;
|
||||
unordered_set<u32> seen;
|
||||
unordered_set<NFAVertex> top_starts;
|
||||
for (const auto &vv : tops | map_values) {
|
||||
insert(&top_starts, vv);
|
||||
}
|
||||
@ -2427,7 +2423,7 @@ bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops,
|
||||
#endif // NDEBUG
|
||||
|
||||
static
|
||||
u32 max_state(const ue2::unordered_map<NFAVertex, u32> &state_ids) {
|
||||
u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) {
|
||||
u32 rv = 0;
|
||||
for (const auto &m : state_ids) {
|
||||
DEBUG_PRINTF("state %u\n", m.second);
|
||||
@ -2440,14 +2436,14 @@ u32 max_state(const ue2::unordered_map<NFAVertex, u32> &state_ids) {
|
||||
}
|
||||
|
||||
bytecode_ptr<NFA> generate(NGHolder &h,
|
||||
const ue2::unordered_map<NFAVertex, u32> &states,
|
||||
const vector<BoundedRepeatData> &repeats,
|
||||
const map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const map<u32, set<NFAVertex>> &tops,
|
||||
const set<NFAVertex> &zombies, bool do_accel,
|
||||
bool stateCompression, u32 hint,
|
||||
const CompileContext &cc) {
|
||||
const unordered_map<NFAVertex, u32> &states,
|
||||
const vector<BoundedRepeatData> &repeats,
|
||||
const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const unordered_map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const map<u32, set<NFAVertex>> &tops,
|
||||
const set<NFAVertex> &zombies, bool do_accel,
|
||||
bool stateCompression, u32 hint,
|
||||
const CompileContext &cc) {
|
||||
const u32 num_states = max_state(states) + 1;
|
||||
DEBUG_PRINTF("total states: %u\n", num_states);
|
||||
|
||||
@ -2510,13 +2506,13 @@ bytecode_ptr<NFA> generate(NGHolder &h,
|
||||
}
|
||||
|
||||
u32 countAccelStates(NGHolder &h,
|
||||
const ue2::unordered_map<NFAVertex, u32> &states,
|
||||
const vector<BoundedRepeatData> &repeats,
|
||||
const map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const map<u32, set<NFAVertex>> &tops,
|
||||
const set<NFAVertex> &zombies,
|
||||
const CompileContext &cc) {
|
||||
const unordered_map<NFAVertex, u32> &states,
|
||||
const vector<BoundedRepeatData> &repeats,
|
||||
const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const unordered_map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const map<u32, set<NFAVertex>> &tops,
|
||||
const set<NFAVertex> &zombies,
|
||||
const CompileContext &cc) {
|
||||
const u32 num_states = max_state(states) + 1;
|
||||
DEBUG_PRINTF("total states: %u\n", num_states);
|
||||
|
||||
|
@ -34,15 +34,16 @@
|
||||
#ifndef LIMEX_COMPILE_H
|
||||
#define LIMEX_COMPILE_H
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "nfagraph/ng_holder.h"
|
||||
#include "nfagraph/ng_squash.h" // for NFAStateSet
|
||||
#include "ue2common.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
struct NFA;
|
||||
|
||||
@ -69,16 +70,16 @@ struct CompileContext;
|
||||
* graph.
|
||||
*/
|
||||
bytecode_ptr<NFA> generate(NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &states,
|
||||
const std::vector<BoundedRepeatData> &repeats,
|
||||
const std::map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const std::map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const std::map<u32, std::set<NFAVertex>> &tops,
|
||||
const std::set<NFAVertex> &zombies,
|
||||
bool do_accel,
|
||||
bool stateCompression,
|
||||
u32 hint,
|
||||
const CompileContext &cc);
|
||||
const std::unordered_map<NFAVertex, u32> &states,
|
||||
const std::vector<BoundedRepeatData> &repeats,
|
||||
const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const std::unordered_map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const std::map<u32, std::set<NFAVertex>> &tops,
|
||||
const std::set<NFAVertex> &zombies,
|
||||
bool do_accel,
|
||||
bool stateCompression,
|
||||
u32 hint,
|
||||
const CompileContext &cc);
|
||||
|
||||
/**
|
||||
* \brief For a given graph, count the number of accelerable states it has.
|
||||
@ -87,13 +88,13 @@ bytecode_ptr<NFA> generate(NGHolder &g,
|
||||
* implementable.
|
||||
*/
|
||||
u32 countAccelStates(NGHolder &h,
|
||||
const ue2::unordered_map<NFAVertex, u32> &states,
|
||||
const std::vector<BoundedRepeatData> &repeats,
|
||||
const std::map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const std::map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const std::map<u32, std::set<NFAVertex>> &tops,
|
||||
const std::set<NFAVertex> &zombies,
|
||||
const CompileContext &cc);
|
||||
const std::unordered_map<NFAVertex, u32> &states,
|
||||
const std::vector<BoundedRepeatData> &repeats,
|
||||
const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const std::unordered_map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const std::map<u32, std::set<NFAVertex>> &tops,
|
||||
const std::set<NFAVertex> &zombies,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@ -487,25 +487,24 @@ void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) {
|
||||
}
|
||||
}
|
||||
|
||||
template<typename limex_type>
|
||||
static
|
||||
void dumpLimexDot(const NFA *nfa, const limex_type *limex, FILE *f) {
|
||||
dumpDotPreamble(f);
|
||||
u32 state_count = nfa->nPositions;
|
||||
dumpVertexDotInfo(limex, state_count, f, limex_labeller<limex_type>(limex));
|
||||
for (u32 i = 0; i < state_count; i++) {
|
||||
dumpLimDotInfo(limex, i, f);
|
||||
dumpExDotInfo(limex, i, f);
|
||||
}
|
||||
dumpDotTrailer(f);
|
||||
}
|
||||
|
||||
#define LIMEX_DUMP_FN(size) \
|
||||
void nfaExecLimEx##size##_dump(const NFA *nfa, const string &base) { \
|
||||
auto limex = (const LimExNFA##size *)getImplNfa(nfa); \
|
||||
\
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); \
|
||||
dumpLimexText(limex, f); \
|
||||
fclose(f); \
|
||||
\
|
||||
f = fopen_or_throw((base + ".dot").c_str(), "w"); \
|
||||
dumpDotPreamble(f); \
|
||||
u32 state_count = nfa->nPositions; \
|
||||
dumpVertexDotInfo(limex, state_count, f, \
|
||||
limex_labeller<LimExNFA##size>(limex)); \
|
||||
for (u32 i = 0; i < state_count; i++) { \
|
||||
dumpLimDotInfo(limex, i, f); \
|
||||
dumpExDotInfo(limex, i, f); \
|
||||
} \
|
||||
dumpDotTrailer(f); \
|
||||
fclose(f); \
|
||||
dumpLimexText(limex, StdioFile(base + ".txt", "w")); \
|
||||
dumpLimexDot(nfa, limex, StdioFile(base + ".dot", "w")); \
|
||||
}
|
||||
|
||||
LIMEX_DUMP_FN(32)
|
||||
|
@ -46,7 +46,7 @@
|
||||
#include "util/make_unique.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/unaligned.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
@ -288,11 +288,12 @@ unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports(
|
||||
|
||||
raw_report_list rrl(s.reports, rm, remap_reports);
|
||||
DEBUG_PRINTF("non empty r\n");
|
||||
if (rev.find(rrl) != rev.end()) {
|
||||
reports.push_back(rev[rrl]);
|
||||
auto it = rev.find(rrl);
|
||||
if (it != rev.end()) {
|
||||
reports.push_back(it->second);
|
||||
} else {
|
||||
DEBUG_PRINTF("adding to rl %zu\n", ri->size());
|
||||
rev[rrl] = ri->size();
|
||||
rev.emplace(rrl, ri->size());
|
||||
reports.push_back(ri->size());
|
||||
ri->rl.push_back(rrl);
|
||||
}
|
||||
@ -306,13 +307,14 @@ unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports(
|
||||
|
||||
DEBUG_PRINTF("non empty r eod\n");
|
||||
raw_report_list rrl(s.reports_eod, rm, remap_reports);
|
||||
if (rev.find(rrl) != rev.end()) {
|
||||
reports_eod.push_back(rev[rrl]);
|
||||
auto it = rev.find(rrl);
|
||||
if (it != rev.end()) {
|
||||
reports_eod.push_back(it->second);
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size());
|
||||
rev[rrl] = ri->size();
|
||||
rev.emplace(rrl, ri->size());
|
||||
reports_eod.push_back(ri->size());
|
||||
ri->rl.push_back(rrl);
|
||||
}
|
||||
@ -325,10 +327,9 @@ unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports(
|
||||
*arbReport = 0;
|
||||
}
|
||||
|
||||
|
||||
/* if we have only a single report id generated from all accepts (not eod)
|
||||
* we can take some short cuts */
|
||||
set<ReportID> reps;
|
||||
flat_set<ReportID> reps;
|
||||
|
||||
for (u32 rl_index : reports) {
|
||||
if (rl_index == MO_INVALID_IDX) {
|
||||
@ -897,7 +898,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
|
||||
}
|
||||
|
||||
u32 self_loop_width = 0;
|
||||
const dstate curr_raw = info.states[curr_id];
|
||||
const dstate &curr_raw = info.states[curr_id];
|
||||
for (unsigned i = 0; i < N_CHARS; i++) {
|
||||
if (curr_raw.next[info.alpha_remap[i]] == curr_id) {
|
||||
self_loop_width++;
|
||||
@ -914,33 +915,6 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
|
||||
info.extra[curr_id].shermanState = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calls accessible outside this module.
|
||||
*/
|
||||
|
||||
u16 raw_dfa::getImplAlphaSize() const {
|
||||
return alpha_size - N_SPECIAL_SYMBOL;
|
||||
}
|
||||
|
||||
void raw_dfa::stripExtraEodReports(void) {
|
||||
/* if a state generates a given report as a normal accept - then it does
|
||||
* not also need to generate an eod report for it */
|
||||
for (dstate &ds : states) {
|
||||
for (const ReportID &report : ds.reports) {
|
||||
ds.reports_eod.erase(report);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool raw_dfa::hasEodReports(void) const {
|
||||
for (const dstate &ds : states) {
|
||||
if (!ds.reports_eod.empty()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
|
||||
symbol_t alphasize = raw.getImplAlphaSize();
|
||||
@ -964,7 +938,8 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
||||
const CompileContext &cc,
|
||||
bool trust_daddy_states,
|
||||
set<dstate_id_t> *accel_states) {
|
||||
u16 total_daddy = 0;
|
||||
assert(!is_dead(raw));
|
||||
|
||||
dfa_info info(strat);
|
||||
bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;
|
||||
|
||||
@ -974,21 +949,24 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
|
||||
}
|
||||
|
||||
bool has_eod_reports = raw.hasEodReports();
|
||||
bool any_cyclic_near_anchored_state = is_cyclic_near(raw,
|
||||
raw.start_anchored);
|
||||
|
||||
for (u32 i = 0; i < info.size(); i++) {
|
||||
find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state,
|
||||
trust_daddy_states, cc.grey);
|
||||
total_daddy += info.extra[i].daddytaken;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
||||
info.size() * info.impl_alpha_size, info.size(),
|
||||
info.impl_alpha_size);
|
||||
|
||||
bytecode_ptr<NFA> nfa;
|
||||
if (!using8bit) {
|
||||
u16 total_daddy = 0;
|
||||
bool any_cyclic_near_anchored_state
|
||||
= is_cyclic_near(raw, raw.start_anchored);
|
||||
|
||||
for (u32 i = 0; i < info.size(); i++) {
|
||||
find_better_daddy(info, i, using8bit,
|
||||
any_cyclic_near_anchored_state,
|
||||
trust_daddy_states, cc.grey);
|
||||
total_daddy += info.extra[i].daddytaken;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
|
||||
info.size() * info.impl_alpha_size, info.size(),
|
||||
info.impl_alpha_size);
|
||||
|
||||
nfa = mcclellanCompile16(info, cc, accel_states);
|
||||
} else {
|
||||
nfa = mcclellanCompile8(info, cc, accel_states);
|
||||
|
@ -33,7 +33,6 @@
|
||||
#include "rdfa.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
@ -30,12 +30,11 @@
|
||||
|
||||
#include "rdfa.h"
|
||||
#include "util/container.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/hash.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <deque>
|
||||
|
||||
#include <boost/functional/hash/hash.hpp>
|
||||
#include <map>
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -127,13 +126,11 @@ u32 remove_leading_dots(raw_dfa &raw) {
|
||||
static never_inline
|
||||
u32 calc_min_dist_from_bob(raw_dfa &raw, vector<u32> *dist_in) {
|
||||
vector<u32> &dist = *dist_in;
|
||||
dist.clear();
|
||||
dist.resize(raw.states.size(), ~0U);
|
||||
dist.assign(raw.states.size(), ~0U);
|
||||
|
||||
assert(raw.start_anchored != DEAD_STATE);
|
||||
|
||||
deque<dstate_id_t> to_visit;
|
||||
to_visit.push_back(raw.start_anchored);
|
||||
deque<dstate_id_t> to_visit = { raw.start_anchored };
|
||||
dist[raw.start_anchored] = 0;
|
||||
|
||||
u32 last_d = 0;
|
||||
@ -148,8 +145,7 @@ u32 calc_min_dist_from_bob(raw_dfa &raw, vector<u32> *dist_in) {
|
||||
assert(d >= last_d);
|
||||
assert(d != ~0U);
|
||||
|
||||
for (u32 j = 0; j < raw.alpha_size; j++) {
|
||||
dstate_id_t t = raw.states[s].next[j];
|
||||
for (dstate_id_t t : raw.states[s].next) {
|
||||
if (t == DEAD_STATE) {
|
||||
continue;
|
||||
}
|
||||
@ -187,7 +183,21 @@ bool clear_deeper_reports(raw_dfa &raw, u32 max_offset) {
|
||||
}
|
||||
}
|
||||
|
||||
return changed;
|
||||
if (!changed) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// We may have cleared all reports from the DFA, in which case it should
|
||||
// become empty.
|
||||
if (all_of_in(raw.states, [](const dstate &ds) {
|
||||
return ds.reports.empty() && ds.reports_eod.empty();
|
||||
})) {
|
||||
DEBUG_PRINTF("no reports left at all, dfa is dead\n");
|
||||
raw.start_anchored = DEAD_STATE;
|
||||
raw.start_floating = DEAD_STATE;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
set<ReportID> all_reports(const raw_dfa &rdfa) {
|
||||
@ -218,22 +228,18 @@ bool has_non_eod_accepts(const raw_dfa &rdfa) {
|
||||
}
|
||||
|
||||
size_t hash_dfa_no_reports(const raw_dfa &rdfa) {
|
||||
using boost::hash_combine;
|
||||
using boost::hash_range;
|
||||
|
||||
size_t v = 0;
|
||||
hash_combine(v, rdfa.alpha_size);
|
||||
hash_combine(v, hash_range(begin(rdfa.alpha_remap), end(rdfa.alpha_remap)));
|
||||
hash_combine(v, rdfa.alpha_remap);
|
||||
|
||||
for (const auto &ds : rdfa.states) {
|
||||
hash_combine(v, hash_range(begin(ds.next), end(ds.next)));
|
||||
hash_combine(v, ds.next);
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
size_t hash_dfa(const raw_dfa &rdfa) {
|
||||
using boost::hash_combine;
|
||||
size_t v = 0;
|
||||
hash_combine(v, hash_dfa_no_reports(rdfa));
|
||||
hash_combine(v, all_reports(rdfa));
|
||||
@ -272,4 +278,9 @@ bool can_die_early(const raw_dfa &raw, u32 age_limit) {
|
||||
return can_die_early(raw, raw.start_anchored, visited, age_limit);
|
||||
}
|
||||
|
||||
bool is_dead(const raw_dfa &rdfa) {
|
||||
return rdfa.start_anchored == DEAD_STATE &&
|
||||
rdfa.start_floating == DEAD_STATE;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -59,6 +59,13 @@ size_t hash_dfa(const raw_dfa &rdfa);
|
||||
|
||||
bool can_die_early(const raw_dfa &raw, u32 age_limit);
|
||||
|
||||
/**
|
||||
* \brief Returns true if this DFA cannot match, i.e. its start state is
|
||||
* DEAD_STATE.
|
||||
*/
|
||||
bool is_dead(const raw_dfa &rdfa);
|
||||
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -442,22 +442,14 @@ void nfaExecMcClellan8_dumpText(const NFA *nfa, FILE *f) {
|
||||
|
||||
void nfaExecMcClellan16_dump(const NFA *nfa, const string &base) {
|
||||
assert(nfa->type == MCCLELLAN_NFA_16);
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
nfaExecMcClellan16_dumpText(nfa, f);
|
||||
fclose(f);
|
||||
f = fopen_or_throw((base + ".dot").c_str(), "w");
|
||||
nfaExecMcClellan16_dumpDot(nfa, f);
|
||||
fclose(f);
|
||||
nfaExecMcClellan16_dumpText(nfa, StdioFile(base + ".txt", "w"));
|
||||
nfaExecMcClellan16_dumpDot(nfa, StdioFile(base + ".dot", "w"));
|
||||
}
|
||||
|
||||
void nfaExecMcClellan8_dump(const NFA *nfa, const string &base) {
|
||||
assert(nfa->type == MCCLELLAN_NFA_8);
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
nfaExecMcClellan8_dumpText(nfa, f);
|
||||
fclose(f);
|
||||
f = fopen_or_throw((base + ".dot").c_str(), "w");
|
||||
nfaExecMcClellan8_dumpDot(nfa, f);
|
||||
fclose(f);
|
||||
nfaExecMcClellan8_dumpText(nfa, StdioFile(base + ".txt", "w"));
|
||||
nfaExecMcClellan8_dumpDot(nfa, StdioFile(base + ".dot", "w"));
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -45,13 +45,14 @@
|
||||
#include "util/compare.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/unaligned.h"
|
||||
#include "util/unordered.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
@ -383,6 +384,8 @@ CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) {
|
||||
#define MAX_SHENG_STATES 16
|
||||
#define MAX_SHENG_LEAKINESS 0.05
|
||||
|
||||
using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>;
|
||||
|
||||
/**
|
||||
* Returns the proportion of strings of length 'depth' which will leave the
|
||||
* sheng region when starting at state 'u'.
|
||||
@ -390,8 +393,7 @@ CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) {
|
||||
static
|
||||
double leakiness(const RdfaGraph &g, dfa_info &info,
|
||||
const flat_set<RdfaVertex> &sheng_states, RdfaVertex u,
|
||||
u32 depth,
|
||||
unordered_map<pair<RdfaVertex, u32>, double> &cache) {
|
||||
u32 depth, LeakinessCache &cache) {
|
||||
double rv = 0;
|
||||
if (contains(cache, make_pair(u, depth))) {
|
||||
return cache[make_pair(u, depth)];
|
||||
@ -426,7 +428,7 @@ double leakiness(const RdfaGraph &g, dfa_info &info,
|
||||
static
|
||||
double leakiness(const RdfaGraph &g, dfa_info &info,
|
||||
const flat_set<RdfaVertex> &sheng_states, RdfaVertex u) {
|
||||
unordered_map<pair<RdfaVertex, u32>, double> cache;
|
||||
LeakinessCache cache;
|
||||
double rv = leakiness(g, info, sheng_states, u, 8, cache);
|
||||
return rv;
|
||||
}
|
||||
@ -738,7 +740,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id,
|
||||
assert(info.is_normal(currState.daddy));
|
||||
|
||||
u32 self_loop_width = 0;
|
||||
const dstate curr_raw = info.states[curr_id];
|
||||
const dstate &curr_raw = info.states[curr_id];
|
||||
for (unsigned i = 0; i < N_CHARS; i++) {
|
||||
if (curr_raw.next[info.alpha_remap[i]] == curr_id) {
|
||||
self_loop_width++;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -394,22 +394,14 @@ void dump_text_8(const NFA *nfa, FILE *f) {
|
||||
|
||||
void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) {
|
||||
assert(nfa->type == MCSHENG_NFA_16);
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
dump_text_16(nfa, f);
|
||||
fclose(f);
|
||||
f = fopen_or_throw((base + ".dot").c_str(), "w");
|
||||
dump_dot_16(nfa, f);
|
||||
fclose(f);
|
||||
dump_text_16(nfa, StdioFile(base + ".txt", "w"));
|
||||
dump_dot_16(nfa, StdioFile(base + ".dot", "w"));
|
||||
}
|
||||
|
||||
void nfaExecMcSheng8_dump(const NFA *nfa, const string &base) {
|
||||
assert(nfa->type == MCSHENG_NFA_8);
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
dump_text_8(nfa, f);
|
||||
fclose(f);
|
||||
f = fopen_or_throw((base + ".dot").c_str(), "w");
|
||||
dump_dot_8(nfa, f);
|
||||
fclose(f);
|
||||
dump_text_8(nfa, StdioFile(base + ".txt", "w"));
|
||||
dump_dot_8(nfa, StdioFile(base + ".dot", "w"));
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -132,7 +132,7 @@ void dumpCounter(FILE *f, const mpv_counter_info *c) {
|
||||
void nfaExecMpv_dump(const NFA *nfa, const string &base) {
|
||||
const mpv *m = (const mpv *)getImplNfa(nfa);
|
||||
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
StdioFile f(base + ".txt", "w");
|
||||
|
||||
fprintf(f, "Puff the Magic Engines\n");
|
||||
fprintf(f, "\n");
|
||||
@ -154,7 +154,6 @@ void nfaExecMpv_dump(const NFA *nfa, const string &base) {
|
||||
}
|
||||
|
||||
dumpTextReverse(nfa, f);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
55
src/nfa/rdfa.cpp
Normal file
55
src/nfa/rdfa.cpp
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "rdfa.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
// prevent weak vtables
|
||||
raw_dfa::~raw_dfa() {}
|
||||
|
||||
void raw_dfa::stripExtraEodReports(void) {
|
||||
/* if a state generates a given report as a normal accept - then it does
|
||||
* not also need to generate an eod report for it */
|
||||
for (dstate &ds : states) {
|
||||
for (const ReportID &report : ds.reports) {
|
||||
ds.reports_eod.erase(report);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool raw_dfa::hasEodReports(void) const {
|
||||
for (const dstate &ds : states) {
|
||||
if (!ds.reports_eod.empty()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -32,7 +32,7 @@
|
||||
#include "nfa_kind.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/flat_containers.h"
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
@ -81,7 +81,7 @@ struct raw_dfa {
|
||||
explicit raw_dfa(nfa_kind k) : kind(k) {}
|
||||
virtual ~raw_dfa();
|
||||
|
||||
u16 getImplAlphaSize() const;
|
||||
u16 getImplAlphaSize() const { return alpha_size - N_SPECIAL_SYMBOL; }
|
||||
virtual void stripExtraEodReports(void);
|
||||
bool hasEodReports(void) const;
|
||||
};
|
||||
|
@ -36,9 +36,10 @@
|
||||
#include "nfagraph/ng_mcclellan_internal.h"
|
||||
#include "util/container.h"
|
||||
#include "util/determinise.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/unordered.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <queue>
|
||||
@ -53,8 +54,8 @@ namespace {
|
||||
|
||||
class Automaton_Merge {
|
||||
public:
|
||||
typedef vector<u16> StateSet;
|
||||
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
|
||||
using StateSet = vector<u16>;
|
||||
using StateMap = ue2_unordered_map<StateSet, dstate_id_t>;
|
||||
|
||||
Automaton_Merge(const raw_dfa *rdfa1, const raw_dfa *rdfa2,
|
||||
const ReportManager *rm_in, const Grey &grey_in)
|
||||
@ -289,7 +290,7 @@ unique_ptr<raw_dfa> mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2,
|
||||
auto rdfa = ue2::make_unique<raw_dfa>(d1->kind);
|
||||
|
||||
Automaton_Merge autom(d1, d2, rm, grey);
|
||||
if (!determinise(autom, rdfa->states, max_states)) {
|
||||
if (determinise(autom, rdfa->states, max_states)) {
|
||||
rdfa->start_anchored = autom.start_anchored;
|
||||
rdfa->start_floating = autom.start_floating;
|
||||
rdfa->alpha_size = autom.alphasize;
|
||||
@ -374,7 +375,7 @@ unique_ptr<raw_dfa> mergeAllDfas(const vector<const raw_dfa *> &dfas,
|
||||
|
||||
DEBUG_PRINTF("merging dfa\n");
|
||||
|
||||
if (determinise(n, rdfa->states, max_states)) {
|
||||
if (!determinise(n, rdfa->states, max_states)) {
|
||||
DEBUG_PRINTF("state limit (%zu) exceeded\n", max_states);
|
||||
return nullptr; /* over state limit */
|
||||
}
|
||||
|
@ -33,7 +33,10 @@
|
||||
#include "rdfa.h"
|
||||
#include "util/bytecode_ptr.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/flat_containers.h"
|
||||
|
||||
#include <memory>
|
||||
#include <set>
|
||||
|
||||
struct NFA;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -41,7 +41,6 @@
|
||||
#include "util/dump_util.h"
|
||||
#include "util/simd_types.h"
|
||||
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
@ -267,12 +266,8 @@ void nfaExecSheng_dumpDot(const NFA *nfa, FILE *f) {
|
||||
|
||||
void nfaExecSheng_dump(const NFA *nfa, const string &base) {
|
||||
assert(nfa->type == SHENG_NFA);
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
nfaExecSheng_dumpText(nfa, f);
|
||||
fclose(f);
|
||||
f = fopen_or_throw((base + ".dot").c_str(), "w");
|
||||
nfaExecSheng_dumpDot(nfa, f);
|
||||
fclose(f);
|
||||
nfaExecSheng_dumpText(nfa, StdioFile(base + ".txt", "w"));
|
||||
nfaExecSheng_dumpDot(nfa, StdioFile(base + ".dot", "w"));
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -33,7 +33,7 @@
|
||||
#include "ue2common.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/container.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/flat_containers.h"
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -35,7 +35,7 @@
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/flat_containers.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
|
@ -27,7 +27,7 @@
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Tamarama: container engine for exclusve engines, dump code.
|
||||
* \brief Tamarama: container engine for exclusive engines, dump code.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
@ -54,7 +54,7 @@ namespace ue2 {
|
||||
void nfaExecTamarama_dump(const struct NFA *nfa, const string &base) {
|
||||
const Tamarama *t = (const Tamarama *)getImplNfa(nfa);
|
||||
|
||||
FILE *f = fopen_or_throw((base + ".txt").c_str(), "w");
|
||||
StdioFile f(base + ".txt", "w");
|
||||
|
||||
fprintf(f, "Tamarama container engine\n");
|
||||
fprintf(f, "\n");
|
||||
@ -63,7 +63,6 @@ void nfaExecTamarama_dump(const struct NFA *nfa, const string &base) {
|
||||
fprintf(f, "\n");
|
||||
dumpTextReverse(nfa, f);
|
||||
fprintf(f, "\n");
|
||||
fclose(f);
|
||||
|
||||
const u32 *subOffset =
|
||||
(const u32 *)((const char *)t + sizeof(struct Tamarama) +
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -32,12 +32,15 @@
|
||||
* truffle is always able to represent an entire character class, providing a
|
||||
* backstop to other acceleration engines.
|
||||
*/
|
||||
|
||||
#include "trufflecompile.h"
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/dump_mask.h"
|
||||
#include "util/simd_types.h"
|
||||
|
||||
#include "util/dump_mask.h"
|
||||
#include <cstring>
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
@ -44,7 +44,6 @@
|
||||
#include "util/graph.h"
|
||||
#include "util/noncopyable.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <deque>
|
||||
#include <map>
|
||||
|
@ -220,6 +220,52 @@ vector<NFAEdge> findShellEdges(const NGHolder &g,
|
||||
return shell_edges;
|
||||
}
|
||||
|
||||
template<typename GetAdjRange>
|
||||
bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &shell,
|
||||
GetAdjRange adj_range_func) {
|
||||
if (shell.empty()) {
|
||||
DEBUG_PRINTF("no shell\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
NFAVertex exit_vertex = NGHolder::null_vertex();
|
||||
for (auto u : shell) {
|
||||
for (auto v : adj_range_func(u, g)) {
|
||||
if (contains(shell, v)) {
|
||||
continue;
|
||||
}
|
||||
if (!exit_vertex) {
|
||||
exit_vertex = v;
|
||||
continue;
|
||||
}
|
||||
if (exit_vertex == v) {
|
||||
continue;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* True if all edges out of vertices in the head shell lead to at most a single
|
||||
* outside vertex, or the inverse for the tail shell.
|
||||
*/
|
||||
static
|
||||
bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &head_shell,
|
||||
const flat_set<NFAVertex> &tail_shell) {
|
||||
if (shellHasOnePath(g, head_shell, adjacent_vertices_range<NGHolder>)) {
|
||||
DEBUG_PRINTF("head shell has only one path through it\n");
|
||||
return true;
|
||||
}
|
||||
if (shellHasOnePath(g, tail_shell, inv_adjacent_vertices_range<NGHolder>)) {
|
||||
DEBUG_PRINTF("tail shell has only one path into it\n");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Common code called by calc- and recalc- below. Splits the given holder into
|
||||
* one or more connected components, adding them to the comps deque.
|
||||
@ -250,16 +296,25 @@ void splitIntoComponents(unique_ptr<NGHolder> g,
|
||||
return;
|
||||
}
|
||||
|
||||
// Find edges connecting the head and tail shells directly.
|
||||
vector<NFAEdge> shell_edges = findShellEdges(*g, head_shell, tail_shell);
|
||||
|
||||
DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n",
|
||||
head_shell.size(), tail_shell.size(), shell_edges.size());
|
||||
|
||||
ue2::unordered_map<NFAVertex, NFAUndirectedVertex> old2new;
|
||||
// If there are no shell edges and only one path out of the head shell or
|
||||
// into the tail shell, we aren't going to find more than one component.
|
||||
if (shell_edges.empty() && shellHasOnePath(*g, head_shell, tail_shell)) {
|
||||
DEBUG_PRINTF("single component\n");
|
||||
comps.push_back(std::move(g));
|
||||
return;
|
||||
}
|
||||
|
||||
unordered_map<NFAVertex, NFAUndirectedVertex> old2new;
|
||||
auto ug = createUnGraph(*g, true, true, old2new);
|
||||
|
||||
// Construct reverse mapping.
|
||||
ue2::unordered_map<NFAUndirectedVertex, NFAVertex> new2old;
|
||||
unordered_map<NFAUndirectedVertex, NFAVertex> new2old;
|
||||
for (const auto &m : old2new) {
|
||||
new2old.emplace(m.second, m.first);
|
||||
}
|
||||
@ -301,7 +356,7 @@ void splitIntoComponents(unique_ptr<NGHolder> g,
|
||||
DEBUG_PRINTF("vertex %zu is in comp %u\n", (*g)[v].index, c);
|
||||
}
|
||||
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> v_map; // temp map for fillHolder
|
||||
unordered_map<NFAVertex, NFAVertex> v_map; // temp map for fillHolder
|
||||
for (auto &vv : verts) {
|
||||
// Shells are in every component.
|
||||
vv.insert(vv.end(), begin(head_shell), end(head_shell));
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -62,9 +62,11 @@
|
||||
#include "ng_prune.h"
|
||||
#include "ng_util.h"
|
||||
#include "util/container.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/graph_small_color_map.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <boost/graph/depth_first_search.hpp>
|
||||
#include <boost/graph/reverse_graph.hpp>
|
||||
|
||||
@ -123,17 +125,17 @@ class SearchVisitor : public boost::default_dfs_visitor {
|
||||
|
||||
} // namespace
|
||||
|
||||
template<class Graph>
|
||||
template<class Graph, class ColorMap>
|
||||
static
|
||||
bool searchForward(const Graph &g, const CharReach &reach,
|
||||
ColorMap &colours,
|
||||
const flat_set<typename Graph::vertex_descriptor> &s,
|
||||
typename Graph::vertex_descriptor w) {
|
||||
map<NFAVertex, boost::default_color_type> colours;
|
||||
colours.fill(small_color::white);
|
||||
try {
|
||||
depth_first_visit(g, w, SearchVisitor(reach),
|
||||
make_assoc_property_map(colours),
|
||||
VertexInSet<typename Graph::vertex_descriptor, Graph>(s));
|
||||
} catch (SearchFailed&) {
|
||||
depth_first_visit(g, w, SearchVisitor(reach), colours,
|
||||
VertexInSet<typename Graph::vertex_descriptor, Graph>(s));
|
||||
} catch (SearchFailed &) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -162,6 +164,9 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
|
||||
|
||||
typedef typename Graph::vertex_descriptor vertex_descriptor;
|
||||
|
||||
// Colour map used for depth_first_visit().
|
||||
auto colours = make_small_color_map(g);
|
||||
|
||||
// precalc successors of v.
|
||||
flat_set<vertex_descriptor> succ_v;
|
||||
insert(&succ_v, adjacent_vertices(v, g));
|
||||
@ -200,7 +205,7 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
|
||||
|
||||
DEBUG_PRINTF(" - checking w %zu\n", g[w].index);
|
||||
|
||||
if (!searchForward(g, reach, s, w)) {
|
||||
if (!searchForward(g, reach, colours, s, w)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -234,6 +239,8 @@ bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) {
|
||||
}
|
||||
|
||||
bool removeCyclicPathRedundancy(NGHolder &g) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
|
||||
// Forward pass.
|
||||
bool f_changed = cyclicPathRedundancyPass(g, g);
|
||||
if (f_changed) {
|
||||
|
@ -34,17 +34,18 @@
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/graph_small_color_map.h"
|
||||
|
||||
#include <deque>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/graph/breadth_first_search.hpp>
|
||||
#include <boost/graph/dag_shortest_paths.hpp>
|
||||
#include <boost/graph/depth_first_search.hpp>
|
||||
#include <boost/graph/breadth_first_search.hpp>
|
||||
#include <boost/graph/filtered_graph.hpp>
|
||||
#include <boost/graph/property_maps/constant_property_map.hpp>
|
||||
#include <boost/graph/reverse_graph.hpp>
|
||||
#include <boost/graph/topological_sort.hpp>
|
||||
#include <boost/graph/property_maps/constant_property_map.hpp>
|
||||
#include <boost/range/adaptor/reversed.hpp>
|
||||
|
||||
using namespace std;
|
||||
@ -137,13 +138,15 @@ vector<bool> findLoopReachable(const Graph &g,
|
||||
EdgeSet deadEdges;
|
||||
BackEdges<EdgeSet> be(deadEdges);
|
||||
|
||||
depth_first_search(g, visitor(be).root_vertex(src));
|
||||
auto colors = make_small_color_map(g);
|
||||
|
||||
depth_first_search(g, be, colors, src);
|
||||
auto af = make_bad_edge_filter(&deadEdges);
|
||||
auto acyclic_g = make_filtered_graph(g, af);
|
||||
|
||||
vector<Vertex> topoOrder; /* actually reverse topological order */
|
||||
topoOrder.reserve(deadNodes.size());
|
||||
topological_sort(acyclic_g, back_inserter(topoOrder));
|
||||
topological_sort(acyclic_g, back_inserter(topoOrder), color_map(colors));
|
||||
|
||||
for (const auto &e : deadEdges) {
|
||||
size_t srcIdx = g[source(e, g)].index;
|
||||
@ -204,14 +207,16 @@ void calcDepthFromSource(const GraphT &g,
|
||||
visitor(make_bfs_visitor(record_distances(
|
||||
make_iterator_property_map(dMin.begin(),
|
||||
min_index_map),
|
||||
boost::on_tree_edge()))));
|
||||
boost::on_tree_edge())))
|
||||
.color_map(make_small_color_map(mindist_g)));
|
||||
|
||||
auto max_index_map = get(vertex_index, maxdist_g);
|
||||
|
||||
dag_shortest_paths(maxdist_g, srcVertex,
|
||||
distance_map(make_iterator_property_map(dMax.begin(),
|
||||
max_index_map))
|
||||
.weight_map(make_constant_property<EdgeT>(-1)));
|
||||
.weight_map(make_constant_property<EdgeT>(-1))
|
||||
.color_map(make_small_color_map(maxdist_g)));
|
||||
|
||||
for (size_t i = 0; i < numVerts; i++) {
|
||||
if (dMin[i] > DIST_UNREACHABLE) {
|
||||
|
@ -36,7 +36,6 @@
|
||||
#include "ue2common.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_util.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <boost-patched/graph/dominator_tree.hpp> // locally patched version
|
||||
#include <boost-patched/graph/reverse_graph.hpp>
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -36,15 +36,14 @@
|
||||
#define NG_DOMINATORS_H
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
std::unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g);
|
||||
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g);
|
||||
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g);
|
||||
std::unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@ -51,6 +51,7 @@
|
||||
#include "smallwrite/smallwrite_dump.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/dump_util.h"
|
||||
#include "util/report.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2string.h"
|
||||
@ -175,7 +176,7 @@ public:
|
||||
: g(g_in), rm(&rm_in) {}
|
||||
|
||||
NFAWriter(const GraphT &g_in,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map_in)
|
||||
const unordered_map<NFAVertex, u32> ®ion_map_in)
|
||||
: g(g_in), region_map(®ion_map_in) {}
|
||||
|
||||
void operator()(ostream& os, const VertexT& v) const {
|
||||
@ -253,7 +254,7 @@ public:
|
||||
private:
|
||||
const GraphT &g;
|
||||
const ReportManager *rm = nullptr;
|
||||
const ue2::unordered_map<NFAVertex, u32> *region_map = nullptr;
|
||||
const unordered_map<NFAVertex, u32> *region_map = nullptr;
|
||||
};
|
||||
}
|
||||
|
||||
@ -277,7 +278,7 @@ void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm) {
|
||||
|
||||
template <typename GraphT>
|
||||
void dumpGraphImpl(const char *name, const GraphT &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map) {
|
||||
const unordered_map<NFAVertex, u32> ®ion_map) {
|
||||
typedef typename boost::graph_traits<GraphT>::vertex_descriptor VertexT;
|
||||
typedef typename boost::graph_traits<GraphT>::edge_descriptor EdgeT;
|
||||
ofstream os(name);
|
||||
@ -331,7 +332,7 @@ void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber,
|
||||
}
|
||||
|
||||
void dumpHolderImpl(const NGHolder &h,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
const unordered_map<NFAVertex, u32> ®ion_map,
|
||||
unsigned int stageNumber, const char *stageName,
|
||||
const Grey &grey) {
|
||||
if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) {
|
||||
@ -348,14 +349,7 @@ void dumpSmallWrite(const RoseEngine *rose, const Grey &grey) {
|
||||
}
|
||||
|
||||
const struct SmallWriteEngine *smwr = getSmallWrite(rose);
|
||||
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "smallwrite.txt";
|
||||
|
||||
FILE *f = fopen(ss.str().c_str(), "w");
|
||||
smwrDumpText(smwr, f);
|
||||
fclose(f);
|
||||
|
||||
smwrDumpText(smwr, StdioFile(grey.dumpPath + "smallwrite.txt", "w"));
|
||||
smwrDumpNFA(smwr, false, grey.dumpPath);
|
||||
}
|
||||
|
||||
@ -420,9 +414,7 @@ void dumpReportManager(const ReportManager &rm, const Grey &grey) {
|
||||
return;
|
||||
}
|
||||
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "internal_reports.txt";
|
||||
FILE *f = fopen(ss.str().c_str(), "w");
|
||||
StdioFile f(grey.dumpPath + "internal_reports.txt", "w");
|
||||
const vector<Report> &reports = rm.reports();
|
||||
for (size_t i = 0; i < reports.size(); i++) {
|
||||
const Report &report = reports[i];
|
||||
@ -461,7 +453,6 @@ void dumpReportManager(const ReportManager &rm, const Grey &grey) {
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -36,7 +36,8 @@
|
||||
#include "grey.h"
|
||||
#include "ng_holder.h" // for graph types
|
||||
#include "ue2common.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
#include <fstream>
|
||||
@ -75,7 +76,7 @@ void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber,
|
||||
|
||||
// Variant that takes a region map as well.
|
||||
void dumpHolderImpl(const NGHolder &h,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
const std::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
unsigned int stageNumber, const char *stageName,
|
||||
const Grey &grey);
|
||||
|
||||
@ -123,7 +124,7 @@ void dumpHolder(UNUSED const NGHolder &h, UNUSED unsigned int stageNumber,
|
||||
|
||||
UNUSED static inline
|
||||
void dumpHolder(UNUSED const NGHolder &h,
|
||||
UNUSED const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
UNUSED const std::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
UNUSED unsigned int stageNumber, UNUSED const char *name,
|
||||
UNUSED const Grey &grey) {
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -38,8 +38,8 @@
|
||||
#include "parser/position.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
@ -181,6 +181,28 @@ bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) {
|
||||
return dead_count;
|
||||
}
|
||||
|
||||
static
|
||||
bool checkReportsRev(const NGHolder &g, NFAVertex v,
|
||||
const set<NFAVertex> &happy) {
|
||||
if (g[v].reports.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
assert(edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second);
|
||||
|
||||
/* an edge to accept takes priority over eod only accept */
|
||||
NFAVertex accept = edge(v, g.accept, g).second ? g.accept : g.acceptEod;
|
||||
|
||||
flat_set<ReportID> happy_reports;
|
||||
for (NFAVertex u : happy) {
|
||||
if (edge(u, accept, g).second) {
|
||||
insert(&happy_reports, g[u].reports);
|
||||
}
|
||||
}
|
||||
|
||||
return is_subset_of(g[v].reports, happy_reports);
|
||||
}
|
||||
|
||||
/** \brief Redundant self-loop removal (reverse version).
|
||||
*
|
||||
* A self loop on a vertex v can be removed if:
|
||||
@ -233,7 +255,8 @@ bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) {
|
||||
happy.insert(u);
|
||||
}
|
||||
|
||||
if (!happy.empty() && checkVerticesRev(g, sad, happy)) {
|
||||
if (!happy.empty() && checkVerticesRev(g, sad, happy)
|
||||
&& checkReportsRev(g, v, happy)) {
|
||||
dead_count++;
|
||||
remove_edge(v, v, g);
|
||||
}
|
||||
|
@ -37,9 +37,10 @@
|
||||
#include "ng_holder.h"
|
||||
#include "ng_util.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/unordered.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
@ -121,16 +122,9 @@ public:
|
||||
vertex_flags == b.vertex_flags && rs == b.rs;
|
||||
}
|
||||
|
||||
friend size_t hash_value(const ClassInfo &c) {
|
||||
size_t val = 0;
|
||||
boost::hash_combine(val, c.rs);
|
||||
boost::hash_combine(val, c.vertex_flags);
|
||||
boost::hash_combine(val, c.cr);
|
||||
boost::hash_combine(val, c.adjacent_cr);
|
||||
boost::hash_combine(val, c.node_type);
|
||||
boost::hash_combine(val, c.depth.d1);
|
||||
boost::hash_combine(val, c.depth.d2);
|
||||
return val;
|
||||
size_t hash() const {
|
||||
return hash_all(rs, vertex_flags, cr, adjacent_cr, node_type, depth.d1,
|
||||
depth.d2);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -319,7 +313,7 @@ vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos,
|
||||
const size_t num_verts = infos.size();
|
||||
|
||||
vector<VertexInfoSet> classes;
|
||||
unordered_map<ClassInfo, unsigned> classinfomap;
|
||||
ue2_unordered_map<ClassInfo, unsigned> classinfomap;
|
||||
|
||||
// assume we will have lots of classes, so we don't waste time resizing
|
||||
// these structures.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -35,7 +35,7 @@
|
||||
#define NG_EXECUTE_H
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/flat_containers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -48,7 +48,7 @@ namespace ue2 {
|
||||
|
||||
static
|
||||
bool findMask(const NGHolder &g, vector<CharReach> *mask, bool *anchored,
|
||||
ue2::flat_set<ReportID> *reports) {
|
||||
flat_set<ReportID> *reports) {
|
||||
DEBUG_PRINTF("looking for a mask pattern\n");
|
||||
set<NFAVertex> s_succ;
|
||||
insert(&s_succ, adjacent_vertices(g.start, g));
|
||||
@ -117,7 +117,7 @@ bool handleFixedWidth(RoseBuild &rose, const NGHolder &g, const Grey &grey) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ue2::flat_set<ReportID> reports;
|
||||
flat_set<ReportID> reports;
|
||||
bool anchored = false;
|
||||
vector<CharReach> mask;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -40,10 +40,12 @@
|
||||
#include "util/bitfield.h"
|
||||
#include "util/container.h"
|
||||
#include "util/determinise.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/hash_dynamic_bitset.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/unordered.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
@ -236,7 +238,7 @@ public:
|
||||
|
||||
struct Big_Traits {
|
||||
using StateSet = dynamic_bitset<>;
|
||||
using StateMap = map<StateSet, dstate_id_t>;
|
||||
using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>;
|
||||
|
||||
static StateSet init_states(u32 num) {
|
||||
return StateSet(num);
|
||||
@ -257,7 +259,7 @@ public:
|
||||
|
||||
struct Graph_Traits {
|
||||
using StateSet = bitfield<NFA_STATE_LIMIT>;
|
||||
using StateMap = ue2::unordered_map<StateSet, dstate_id_t>;
|
||||
using StateMap = unordered_map<StateSet, dstate_id_t>;
|
||||
|
||||
static StateSet init_states(UNUSED u32 num) {
|
||||
assert(num <= NFA_STATE_LIMIT);
|
||||
@ -284,8 +286,8 @@ public:
|
||||
|
||||
class Automaton_Haig_Merge {
|
||||
public:
|
||||
typedef vector<u16> StateSet;
|
||||
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
|
||||
using StateSet = vector<u16>;
|
||||
using StateMap = ue2_unordered_map<StateSet, dstate_id_t>;
|
||||
|
||||
explicit Automaton_Haig_Merge(const vector<const raw_som_dfa *> &in)
|
||||
: nfas(in.begin(), in.end()), dead(in.size()) {
|
||||
@ -514,11 +516,11 @@ bool doHaig(const NGHolder &g, som_type som,
|
||||
raw_som_dfa *rdfa) {
|
||||
u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
|
||||
a fight */
|
||||
typedef typename Auto::StateSet StateSet;
|
||||
using StateSet = typename Auto::StateSet;
|
||||
vector<StateSet> nfa_state_map;
|
||||
Auto n(g, som, triggers, unordered_som);
|
||||
try {
|
||||
if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
|
||||
if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
|
||||
DEBUG_PRINTF("state limit exceeded\n");
|
||||
return false;
|
||||
}
|
||||
@ -720,15 +722,14 @@ unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &df
|
||||
}
|
||||
}
|
||||
|
||||
typedef Automaton_Haig_Merge::StateSet StateSet;
|
||||
using StateSet = Automaton_Haig_Merge::StateSet;
|
||||
vector<StateSet> nfa_state_map;
|
||||
auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som,
|
||||
NODE_START,
|
||||
dfas[0]->stream_som_loc_width);
|
||||
|
||||
int rv = determinise(n, rdfa->states, limit, &nfa_state_map);
|
||||
if (rv) {
|
||||
DEBUG_PRINTF("%d:state limit (%u) exceeded\n", rv, limit);
|
||||
if (!determinise(n, rdfa->states, limit, &nfa_state_map)) {
|
||||
DEBUG_PRINTF("state limit (%u) exceeded\n", limit);
|
||||
return nullptr; /* over state limit */
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -40,7 +40,7 @@
|
||||
#include "ue2common.h"
|
||||
#include "nfa/nfa_kind.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/ue2_graph.h"
|
||||
|
||||
namespace ue2 {
|
||||
@ -67,7 +67,7 @@ struct NFAGraphEdgeProps {
|
||||
|
||||
/** \brief For graphs that will be implemented as multi-top engines, this
|
||||
* specifies the top events. Only used on edges from the start vertex. */
|
||||
ue2::flat_set<u32> tops;
|
||||
flat_set<u32> tops;
|
||||
|
||||
/** \brief Flags associated with assertions. */
|
||||
u32 assert_flags = 0;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -39,13 +39,9 @@
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/container.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <set>
|
||||
|
||||
#include <boost/functional/hash/hash.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -200,11 +196,11 @@ u64a hash_holder(const NGHolder &g) {
|
||||
size_t rv = 0;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
boost::hash_combine(rv, g[v].index);
|
||||
boost::hash_combine(rv, g[v].char_reach);
|
||||
hash_combine(rv, g[v].index);
|
||||
hash_combine(rv, g[v].char_reach);
|
||||
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
boost::hash_combine(rv, g[w].index);
|
||||
hash_combine(rv, g[w].index);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -346,24 +346,4 @@ bytecode_ptr<NFA> constructLBR(const NGHolder &g,
|
||||
return constructLBR(proto, triggers, cc, rm);
|
||||
}
|
||||
|
||||
/** \brief True if graph \p g could be turned into an LBR engine. */
|
||||
bool isLBR(const NGHolder &g, const Grey &grey) {
|
||||
if (!grey.allowLbr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
PureRepeat repeat;
|
||||
if (!isPureRepeat(g, repeat)) {
|
||||
DEBUG_PRINTF("not pure bounded repeat\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (repeat.reports.size() != 1) {
|
||||
DEBUG_PRINTF("too many reports\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -66,9 +66,6 @@ constructLBR(const CastleProto &proto,
|
||||
const std::vector<std::vector<CharReach>> &triggers,
|
||||
const CompileContext &cc, const ReportManager &rm);
|
||||
|
||||
/** \brief True if graph \p g could be turned into an LBR engine. */
|
||||
bool isLBR(const NGHolder &g, const Grey &grey);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_LBR_H
|
||||
|
@ -53,11 +53,13 @@
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/flat_containers.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
@ -73,8 +75,8 @@ namespace ue2 {
|
||||
// Only used in assertions.
|
||||
static
|
||||
bool sanityCheckGraph(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids) {
|
||||
ue2::unordered_set<u32> seen_states;
|
||||
const unordered_map<NFAVertex, u32> &state_ids) {
|
||||
unordered_set<u32> seen_states;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
// Non-specials should have non-empty reachability.
|
||||
@ -115,10 +117,9 @@ bool sanityCheckGraph(const NGHolder &g,
|
||||
#endif
|
||||
|
||||
static
|
||||
void findSquashStates(const NGHolder &g,
|
||||
const vector<BoundedRepeatData> &repeats,
|
||||
map<NFAVertex, NFAStateSet> &squashMap) {
|
||||
squashMap = findSquashers(g);
|
||||
unordered_map<NFAVertex, NFAStateSet> findSquashStates(const NGHolder &g,
|
||||
const vector<BoundedRepeatData> &repeats) {
|
||||
auto squashMap = findSquashers(g);
|
||||
filterSquashers(g, squashMap);
|
||||
|
||||
/* We also filter out the cyclic states representing bounded repeats, as
|
||||
@ -128,6 +129,8 @@ void findSquashStates(const NGHolder &g,
|
||||
squashMap.erase(br.cyclic);
|
||||
}
|
||||
}
|
||||
|
||||
return squashMap;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -468,7 +471,7 @@ void makeTopStates(NGHolder &g, map<u32, set<NFAVertex>> &tops_out,
|
||||
static
|
||||
set<NFAVertex> findZombies(const NGHolder &h,
|
||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
const unordered_map<NFAVertex, u32> &state_ids,
|
||||
const CompileContext &cc) {
|
||||
set<NFAVertex> zombies;
|
||||
if (!cc.grey.allowZombies) {
|
||||
@ -516,7 +519,7 @@ set<NFAVertex> findZombies(const NGHolder &h,
|
||||
}
|
||||
|
||||
static
|
||||
void reverseStateOrdering(ue2::unordered_map<NFAVertex, u32> &state_ids) {
|
||||
void reverseStateOrdering(unordered_map<NFAVertex, u32> &state_ids) {
|
||||
vector<NFAVertex> ordering;
|
||||
for (auto &e : state_ids) {
|
||||
if (e.second == NO_STATE) {
|
||||
@ -569,7 +572,7 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm,
|
||||
const map<u32, u32> &fixed_depth_tops,
|
||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||
bool impl_test_only, const CompileContext &cc,
|
||||
ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
unordered_map<NFAVertex, u32> &state_ids,
|
||||
vector<BoundedRepeatData> &repeats,
|
||||
map<u32, set<NFAVertex>> &tops) {
|
||||
assert(is_triggered(h_in) || fixed_depth_tops.empty());
|
||||
@ -637,7 +640,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
||||
assert(rm);
|
||||
}
|
||||
|
||||
ue2::unordered_map<NFAVertex, u32> state_ids;
|
||||
unordered_map<NFAVertex, u32> state_ids;
|
||||
vector<BoundedRepeatData> repeats;
|
||||
map<u32, set<NFAVertex>> tops;
|
||||
unique_ptr<NGHolder> h
|
||||
@ -657,12 +660,12 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
||||
br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax);
|
||||
}
|
||||
|
||||
map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
map<NFAVertex, NFAStateSet> squashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> squashMap;
|
||||
|
||||
// build map of squashed and squashers
|
||||
if (cc.grey.squashNFA) {
|
||||
findSquashStates(*h, repeats, squashMap);
|
||||
squashMap = findSquashStates(*h, repeats);
|
||||
|
||||
if (rm && cc.grey.highlanderSquash) {
|
||||
reportSquashMap = findHighlanderSquashers(*h, *rm);
|
||||
@ -734,8 +737,8 @@ bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint,
|
||||
map<u32, set<NFAVertex>> tops; /* only the standards tops for nfas */
|
||||
set<NFAVertex> zombies;
|
||||
vector<BoundedRepeatData> repeats;
|
||||
map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
map<NFAVertex, NFAStateSet> squashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> squashMap;
|
||||
|
||||
return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops,
|
||||
zombies, false, false, hint, cc);
|
||||
@ -785,7 +788,7 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
|
||||
* resultant NGHolder has <= NFA_MAX_STATES. If it does, we know we can
|
||||
* implement it as an NFA. */
|
||||
|
||||
ue2::unordered_map<NFAVertex, u32> state_ids;
|
||||
unordered_map<NFAVertex, u32> state_ids;
|
||||
vector<BoundedRepeatData> repeats;
|
||||
map<u32, set<NFAVertex>> tops;
|
||||
unique_ptr<NGHolder> h
|
||||
@ -832,7 +835,7 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
|
||||
const map<u32, u32> fixed_depth_tops; // empty
|
||||
const map<u32, vector<vector<CharReach>>> triggers; // empty
|
||||
|
||||
ue2::unordered_map<NFAVertex, u32> state_ids;
|
||||
unordered_map<NFAVertex, u32> state_ids;
|
||||
vector<BoundedRepeatData> repeats;
|
||||
map<u32, set<NFAVertex>> tops;
|
||||
unique_ptr<NGHolder> h
|
||||
@ -848,8 +851,8 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
|
||||
|
||||
// Should have no bearing on accel calculation, so we leave these empty.
|
||||
const set<NFAVertex> zombies;
|
||||
const map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
const map<NFAVertex, NFAStateSet> squashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> squashMap;
|
||||
|
||||
return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap,
|
||||
tops, zombies, cc);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user